diff --git a/.gitignore b/.gitignore index 9cf345e49..1c6ec4b11 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ !/.devcontainer/ !/.github/ !/.gitignore +!/.npmrc !/.rspec !/.vscode/ !/CLAUDE.md @@ -17,6 +18,8 @@ !/gems/** !/LICENSE !/ONE-PAGER.md +!/package-lock.json +!/package.json !/README.md !/TODO.md !/clear @@ -159,3 +162,6 @@ transpile-tests/fuzz/*.cht # Generated architecture reports gems/espalier/architecture.yml + +# Decomplex native Rust build artifacts +gems/decomplex/rust/target/ diff --git a/.npmrc b/.npmrc new file mode 100644 index 000000000..521a9f7c0 --- /dev/null +++ b/.npmrc @@ -0,0 +1 @@ +legacy-peer-deps=true diff --git a/Gemfile b/Gemfile index 186513287..0084b635d 100644 --- a/Gemfile +++ b/Gemfile @@ -8,6 +8,7 @@ group :development do gem 'minitest', '~> 5.25' gem 'rspec' gem 'parallel_rspec' + gem "parallel_tests", "~> 5.7", require: false gem 'tty-cursor', require: false gem 'tty-reader', require: false gem 'tty-screen', require: false diff --git a/Gemfile.lock b/Gemfile.lock index 9d3a35aa8..f9afad6c6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -124,6 +124,8 @@ GEM parallel_rspec (3.0.0) rake (> 10.0) rspec + parallel_tests (5.7.0) + parallel parlour (9.1.2) commander (~> 5.0) parser @@ -298,6 +300,7 @@ DEPENDENCIES mutant-rspec nil-kill! parallel_rspec + parallel_tests (~> 5.7) reek rspec rubocop diff --git a/docs/agents/TODO.md b/docs/agents/TODO.md new file mode 100644 index 000000000..4c2fa743b --- /dev/null +++ b/docs/agents/TODO.md @@ -0,0 +1,46 @@ +# Launch Checklist: CLEAR v0.1 & Generalized Gems + +## Phase 1: The "Secret Sauce" Launch (Weeks 1-6) + +### 1. Lineage (The Backbone) +- [ ] Finalize Rust `lineage` crate with VCS Trait support (Git/JJ/Hg). +- [ ] Implement Sentry/Stack-trace ingestion with verification anchors. +- [ ] Implement Coverage-Delta ingestion (Aggregates only). +- [ ] Build the Local UI Server (Rust/Axum + React/Monaco) with gutters. + +### 2. Boobytrap & SlopCop (The Integrity Wall) +- [ ] Generalize SlopCop regexes into language-neutral providers. +- [ ] Implement Systems-Test Coverage detection (Atomics -> Loom, etc.) for Zig. +- [ ] Add `--format sarif` output for native GitHub Check Annotations. +- [ ] Update Boobytrap to use Lineage SQLite DB for function-level history. + +### 3. Nil-Kill & Auto-Type (The Repair Engine) +- [ ] Complete the extraction of `auto-type` from `nil-kill`. +- [ ] Implement the `auto-type` Provider Registry (Template/LLM/AST tiers). +- [ ] Abstract Nil-Kill Z3 evidence extraction into language providers. +- [ ] Launch "Hidden Enum Discovery" as a flagship AI-refactoring feature. + +## Phase 2: CLEAR v0.1 Architectural Preview (Weeks 7-8) + +### 4. Compiler Hardening +- [ ] **Must Build:** Promote the "Memory Brains" (`Type`, `CleanupClassifier`, `EscapeAnalysis`) to Hard-Gated mutation status. +- [ ] Ensure 100% of safety invariants in `CLAUDE.md` are killed by transpile-mutants. +- [ ] Conduct a final parity run: Tree-sitter-Ruby vs. Prism-Ruby facts. + +### 5. Launch Artifacts +- [ ] Finalize the "Language Tour" featuring the MiniVM (`_bc_runner.cht`) as proof of logic. +- [ ] Release the 3-week "Decomplex Expansion" narrative (Ruby -> Python -> Universal). + +## Phase 3: CLEAR v0.2 Self-Hosted Release (Weeks 9-10+) + +### 6. The Great Migration +- [ ] Execute the "Narrowing the Funnel" refactor: move Ruby source to "Spiritual CLEAR". +- [ ] Run the S2S Script (Ruby-to-CLEAR) on the Kernel (AST/Type/Annotator). +- [ ] Achieve first successful self-compiled "Hello World". +- [ ] Achieve full self-hosting of the MIR Lowering and Checker passes. + +## Phase 4: Enterprise & Scalability + +- [ ] Port the Lineage UI to a standalone **Tauri** Desktop Application. +- [ ] Design the "Cloud Fact-Store" for aggregate team risk (hosting-safe). +- [ ] Finalize per-seat license model for the "Systems Integrity Platform". diff --git a/docs/agents/self-host.md b/docs/agents/self-host.md new file mode 100644 index 000000000..1799a62b2 --- /dev/null +++ b/docs/agents/self-host.md @@ -0,0 +1,67 @@ +# Self-Hosting Plan: The "Boiling Frog" Transpilation + +This document outlines a phased bootstrapping approach to self-hosting the CLEAR compiler. Rather than a single "big bang" transpilation, we build the transpiler incrementally to handle the specific Ruby features used in each compiler pass. + +## Core Strategy + +1. **Surgical Ruby Refactoring (Phase 0)**: Eliminate dynamic hazards (e.g., `send`, `instance_variable_get`) in the Ruby source to simplify the transpiler and ensure idiomatic CLEAR output. +2. **Phase-Locked Development**: Build the transpiler logic required for Pass N, then transpile Pass N. +3. **Surgical Manual Intervention**: Complex Ruby idioms (e.g., dynamic regex generation in the Lexer, or complex metaprogramming in the Annotator) are manually converted during each pass. +4. **Fact-Driven Memory Safety**: Use `decomplex` to detect aliasing and ownership facts in the Ruby source to drive correct CLEAR capability selection (GIVE vs. Borrow vs. Shared). + +## Phase Estimates + +| Phase | Component | Ruby LOC | Transpiler LOC (Cumulative) | Manual Work | +| :--- | :--- | :---: | :---: | :---: | +| **P0** | **Source Refactor** | N/A | 0 | 0% | +| **P1** | **Lexer** | 360 | 1,000 | 0% | +| **P2** | **Parser & AST** | 18,500 | 3,000 | 5% | +| **P3** | **Type Inference (Annotator)** | 11,000 | 5,000 | 10% | +| **P4** | **Cap & Effect Tracking** | 11,200 | 6,500 | 15% | +| **P5** | **Escape Analysis** | 4,100 | 7,500 | 15% | +| **P6** | **AST/MIR Re-writing** | 1,500 | 8,500 | 10% | +| **P7** | **Thunk Conversion** | 3,300 | 9,500 | 20% | +| **P8** | **FSM Conversion** | 7,500 | 10,500 | 25% | +| **P9** | **MIR Lowering** | 22,000 | 11,500 | 15% | +| **P10** | **MIR Safety Verification** | 3,000 | 12,000 | 5% | +| **P11** | **Zig Emission** | 4,200 | 12,500 | 5% | +| **P12** | **Test Suite (spec/)** | 100,000+ | 16,000 | 15% | + +### Phase Details + +#### P0: Surgical Refactor +- **Goal**: Remove dynamic Ruby features that are difficult to transpile. +- **Actions**: Replace `send` with explicit interfaces; replace `instance_variable_get` with getters; simplify RSpec mocks to use structural doubles. +- **Benefit**: Reduces transpiler complexity by ~2,000 LOC and ensures the output follows CLEAR "Fortress Architecture" principles. + +#### P1-P2: Frontend (Lexer & Parser) +- **Ruby Surface**: String scanning, recursive descent, large case/match blocks, AST node instantiation. +- **Transpiler Goal**: Map Ruby `StringScanner` to CLEAR `Scanner`, and `case` to CLEAR `MATCH`. +- **Manual Work**: Complex regex-driven tokenization rules that don't map 1:1 to Zig's regex engine. + +#### P3-P4: Semantic Analysis (Annotator) +- **Ruby Surface**: Symbol tables, recursive tree walks, Sorbet `sig` blocks, `T::Hash`, `T::Set`. +- **Transpiler Goal**: Robust mapping of Sorbet types to CLEAR types; mapping Ruby `Hash/Set` to CLEAR `@map/@set`. +- **Manual Work**: Deeply nested type-inference edge cases and circular dependency resolution in the declaration index. + +#### P5-P8: Middle-End (Semantic & Transforms) +- **Ruby Surface**: Flow-sensitive analysis, graph traversal, tree-to-tree transformations (Rewriters), closure/thunk generation. +- **Transpiler Goal**: Implementing a "Data-Flow Bridge" in `decomplex` to detect aliasing hazards. +- **Manual Work**: FSM conversion logic is the most complex Ruby in the codebase, requiring careful manual verification of the generated state machines. + +#### P9-P11: Backend (Lowering & Emission) +- **Ruby Surface**: Explicit memory decision logic, cleanup classification, Zig template strings. +- **Transpiler Goal**: High-fidelity mapping of Ruby logic to CLEAR's ownership markers (`Cleanup`, `MoveMark`). +- **Manual Work**: Very low; these passes are already designed with "CLEAR-like" semantics (mechanical and fact-driven). + +#### P12: Test Suite (spec/) +- **Ruby Surface**: RSpec DSL (`expect`, `it`, `describe`), dynamic doubles, `send` for white-box testing. +- **Transpiler Goal**: Map RSpec DSL to CLEAR `TEST` and `ASSERT` blocks. +- **Manual Work**: High (~15%) due to the highly dynamic nature of Ruby test mocking. + +## Success Criteria + +A phase is considered complete when: +1. The CLEAR-transpiled version of Pass N passes all unit tests when driven by the Ruby versions of Passes 0..(N-1). +2. Decomplex reports 0 "Encapsulation Breaches" in the generated CLEAR code. +3. The binary size and performance of the self-hosted pass are within 20% of the Ruby baseline (targeting 2-5x faster eventually). diff --git a/gems/decomplex/CONTRIBUTING.md b/gems/decomplex/CONTRIBUTING.md index 2cf473eaf..4b2dbab82 100644 --- a/gems/decomplex/CONTRIBUTING.md +++ b/gems/decomplex/CONTRIBUTING.md @@ -36,6 +36,20 @@ Prefer one clear signal over a broad blended score. If a metric is noisy, mark it tier 3 or make it supporting evidence for convergence/root-cause clusters. +## Running Tests + +Run the Decomplex Minitest suite in parallel with: + +```bash +bundle exec parallel_test gems/decomplex/test +``` + +For a smaller local run while debugging output, cap workers explicitly: + +```bash +bundle exec parallel_test gems/decomplex/test -n 4 --serialize-stdout +``` + ## Language Support New language work should go through the syntax/profile boundary: diff --git a/gems/decomplex/benchmark.rb b/gems/decomplex/benchmark.rb new file mode 100644 index 000000000..499e6df07 --- /dev/null +++ b/gems/decomplex/benchmark.rb @@ -0,0 +1,16 @@ +require_relative "lib/decomplex" +require "benchmark" + +files = ["lib/decomplex/ast.rb"] +detectors = Decomplex::DetectorRunner::DETECTORS.keys + +Benchmark.bm(40) do |x| + detectors.each do |det| + x.report("#{det} (ruby)") do + Decomplex::DetectorRunner.run(det, files, engine: "ruby") + end + x.report("#{det} (rust)") do + Decomplex::DetectorRunner.run(det, files, engine: "rust") + end + end +end diff --git a/gems/decomplex/benchmark_dir.rb b/gems/decomplex/benchmark_dir.rb new file mode 100644 index 000000000..fb50c25c9 --- /dev/null +++ b/gems/decomplex/benchmark_dir.rb @@ -0,0 +1,18 @@ +require_relative "lib/decomplex" +require "benchmark" + +files = Dir.glob("lib/decomplex/**/*.rb") +detectors = Decomplex::DetectorRunner::DETECTORS.keys + +puts "Benchmarking across #{files.size} files in lib/decomplex/" + +Benchmark.bm(40) do |x| + detectors.each do |det| + x.report("#{det} (ruby)") do + Decomplex::DetectorRunner.run(det, files, engine: "ruby") + end + x.report("#{det} (rust)") do + Decomplex::DetectorRunner.run(det, files, engine: "rust") + end + end +end diff --git a/gems/decomplex/docs/agents/aliasing-complexity-metrics.md b/gems/decomplex/docs/agents/aliasing-complexity-metrics.md new file mode 100644 index 000000000..cb421bd54 --- /dev/null +++ b/gems/decomplex/docs/agents/aliasing-complexity-metrics.md @@ -0,0 +1,44 @@ +# Aliasing and Ownership Complexity Metrics + +This document outlines the expansion of Decomplex to include pointer-aliasing and ownership detection. These metrics transform Decomplex from a heuristic structural analyzer into a semantic fact-engine capable of driving high-accuracy transpilation to CLEAR's affine ownership model. + +## Metric Tiers + +### Tier 1: High Confidence / Structural Hazards +**Metric: Encapsulation Breach** +- **Description**: Detects when a class returns a mutable reference to an internal state field (`@ivar`) without a copy (`.dup` / `.clone`) or conversion. +- **Architectural Risk**: Violates "Fortress Architecture" principles. It allows external callers to bypass class invariants and validation by mutating state "from the outside." +- **CLEAR Impact**: Identifies sites where CLEAR must either enforce a `COPY` or wrap the field in a read-only borrow. + +### Tier 2: Design Pressure / Structural Risk +**Metric: Aliasing Tangle (Action-at-a-Distance)** +- **Description**: Identifies single objects that are aliased across three or more disparate modules/subsystems. +- **Architectural Risk**: Creates "tangled webs" where mutation in one module causes unpredictable behavior in another. This is the primary driver of "locality drag"—where a developer must understand 5 files to change 1. +- **CLEAR Impact**: Signals that a resource requires a Group 1 capability (e.g., `@shared:locked` or `@shared:writeLocked`) rather than simple affine move semantics. + +### Tier 3: Strategic / Project Context +**Metric: Entanglement Density** +- **Description**: An aggregate heatmap quantifying the ratio of aliased references to total references within a file or directory. +- **Architectural Risk**: High-density files are objectively harder to refactor, test, and reason about. They represent the "dark matter" of the codebase where most regressions occur. +- **CLEAR Impact**: Prioritization metric. Files with low entanglement density are "low-hanging fruit" for 98% automated transpilation. High-density files require manual architectural review before migration. + +## Implementation Strategy + +The implementation leverages a two-pass semantic analyzer within `gems/decomplex/lib/decomplex/`: + +1. **Escape & Reachability Pass**: + - Extends `LocalFlow` to track def-use chains across method boundaries. + - Builds a **Reachability Graph** to determine if an object allocated in Scope A can reach Scope B via return, argument passing, or field assignment. + +2. **Ownership Synthesis**: + - Aggregates escape facts to classify bindings as `Unique`, `Borrowed`, or `Shared`. + - Detects "Reification Misses" where a developer intended for an object to be private but allowed it to escape via a getter. + +## Transpiler Bridge + +When the CLEAR transpiler processes Ruby code, it queries the Decomplex fact-graph: + +- **Fact: Unique** -> Transpile to `GIVE` (Move). +- **Fact: Borrowed** -> Transpile to `WITH ... AS alias` (Borrow). +- **Fact: Shared** -> Transpile to `@shared:locked` (Arc/RwLock). +- **Fact: Escaped Field** -> Transpile to `RETURN COPY field` or `@indirect` wrapper. diff --git a/gems/decomplex/docs/agents/architectural-issues.md b/gems/decomplex/docs/agents/architectural-issues.md new file mode 100644 index 000000000..b9202ff7f --- /dev/null +++ b/gems/decomplex/docs/agents/architectural-issues.md @@ -0,0 +1,544 @@ +# Architectural Issues in `ast.rb` + +This is a gap analysis of the current `gems/decomplex/lib/decomplex/ast.rb` +Tree-sitter normalization layer. Line references below refer to the current +file state at the time of this analysis. + +## Executive Summary + +`ast.rb` is not only an AST facade. It currently blends three separate jobs: + +1. Tree-sitter grammar adaptation. +2. Cross-language semantic normalization. +3. Ruby AST compatibility and Ruby-specific scope semantics. + +That mix defeats the intended architecture. If Decomplex has a parser facade, +a Tree-sitter normalizer, and per-language adapters, then grammar-specific +quirks must be owned by the adapters. The shared normalizer should consume +already-classified semantic facts, not mine native grammar tokens for every +language. + +The current design still centralizes language knowledge in one giant shared +normalizer. Adding a language means editing shared dispatch tables, shared +punctuation checks, and Ruby-shaped AST output logic. That is brittle, hard to +test, and likely to regress existing languages. + +## Quantitative Signals + +Current `ast.rb` size: + +- `gems/decomplex/lib/decomplex/ast.rb`: 4,023 lines. +- Rough static scan: 439 method definitions. +- Rough static scan: 129 methods contain `rescue StandardError`. +- Rough static scan: at least 10 trivial hook methods return only `false`, + `nil`, or `true`. + +The trivial hooks found by the scan are: + +- `TreeSitterNormalizationAdapter#ruby?` at line 145 returns `false`. +- `TreeSitterNormalizationAdapter#super_statement?` at line 156 returns `false`. +- `TreeSitterNormalizationAdapter#member_assignment_target?` at line 222 returns `false`. +- `TreeSitterNormalizationAdapter#identifier_text_node?` at line 226 returns `false`. +- `TreeSitterNormalizationAdapter#case_argument_list?` at line 266 returns `false`. +- `TreeSitterNormalizationAdapter#case_else_arm?` at line 293 returns `false`. +- `TreeSitterNormalizationAdapter#ensure_clause_body` at line 489 returns `nil`. +- `TreeSitterNormalizationAdapter#heredoc_call_for_body?` at line 612 returns `false`. +- `TreeSitterNormalizationAdapter#zero_child_identifier_call?` at line 642 returns `false`. +- `RubyTreeSitterNormalizationAdapter#ruby?` at line 838 returns `true`. + +Some no-op hooks are reasonable when they are an explicit adapter contract. +Here they are mixed into a large base adapter that also contains many concrete +language heuristics, so it is not clear which methods are deliberate extension +points and which are unimplemented behavior. + +## Primary Architectural Gaps + +### 1. Shared Normalizer Owns Language Dispatch + +`TreeSitterNormalizationAdapter.for` selects an adapter at lines 128-135, but +the selected adapter does not actually own the language boundary. The base +adapter above it still contains cross-language constants and grammar knowledge: + +- Function/class kinds at lines 49 and 70-73. +- Assignment operator tables at lines 50-56. +- Case/when/else grammar tables at lines 64-69. +- Wrapper and statement shape tables throughout lines 82-125. + +Then `TreeSitterNormalizer#normalize_node` at lines 1524-1658 performs one +large global dispatch across all languages. It checks assignment, infix, +dotted calls, unary operators, functions, classes, modules, loops, cases, +hashes, arrays, element references, rescue, ensure, calls, identifiers, nil, +strings, and symbols in one ordering-dependent chain. + +This makes the adapter layer incomplete. A new grammar still has to be wired +into shared lists and shared branch ordering. That is the exact direction the +architecture was supposed to avoid. + +Expected direction: + +- Each language adapter should classify native Tree-sitter nodes into a small + canonical set of semantic categories. +- Shared code should normalize canonical facts, not native grammar nodes. +- Adding a language should mostly mean adding or updating one adapter/profile, + plus tests for that language. + +### 2. Ruby AST Vocabulary Is Treated as Language-Neutral + +The comment at lines 1436-1441 says the target is "portable structural facts, +not Ruby semantics", but the output vocabulary is heavily Ruby-shaped: + +- `DEFN`, `DEFS`, `SCOPE`, `VCALL`, `FCALL`, `ITER`, `DASGN`, `DVAR`. +- `IASGN`, `GASGN`, `GVAR`, `NTH_REF`. +- `ATTRASGN`, `OP_ASGN1`, `OP_ASGN2`, `OP_ASGN_OR`, `OP_ASGN_AND`. +- `MATCH3`, `BLOCK_PASS`, `RESBODY`, `SCLASS`. + +Those are not neutral structural facts. They encode Ruby parser concepts and +Ruby name-resolution semantics. Forcing Python, Lua, TypeScript, Rust, C, Zig, +Swift, Kotlin, and Java into that vocabulary will either lose information or +invent false equivalences. + +Expected direction: + +- Decide whether this layer is a Ruby AST compatibility layer or a + language-neutral Decomplex IR. +- If Ruby compatibility is still required, keep it as a Ruby-specific output + adapter. +- Detectors should consume language-neutral concepts such as function, call, + assignment, branch, loop, literal, member access, block, return, and scope. + +### 3. `ruby?` Branches in Shared Code Prove the Normalizer Is Not Shared + +`TreeSitterNormalizer` delegates `ruby?` to the adapter at lines 2688-2690, +then uses it throughout shared normalization: + +- Root normalization enters Ruby scope tracking at lines 1512-1518. +- Ruby `yield` identifier handling appears at lines 1638-1643. +- Ruby `=~` handling appears at lines 1811-1815 and 2019-2023. +- Ruby `self[]` call rewriting appears at lines 1824-1826 and 3397-3399. +- Ruby hash key shorthand handling appears at lines 1915-1918. +- Ruby argument-list call normalization is gated at lines 2050-2073. +- Ruby argument-list element references are gated at lines 2168-2173. +- Ruby logical assignment lowering is gated at lines 2799-2808. +- Ruby local/vcall scope tracking lives at lines 2659-2686 and 2820-2910. +- Ruby parameter normalization is gated at lines 3024-3050. +- Ruby inline `def` handling lives at lines 3726-3793. +- Ruby tail return and implicit nil elision live at lines 3804-3858. +- Ruby inline parameter marker handling lives at lines 3860-3896. + +This is adapter logic living in the shared normalizer. It also means the base +normalizer cannot be reasoned about independently from Ruby. + +Expected direction: + +- Remove language predicates from shared normalization. +- Move Ruby scope, vcall/fcall, inline def, tail-return elision, implicit nil, + and Ruby-specific assignment lowering into a Ruby adapter or Ruby normalizer. +- Other languages should have their own scope/name-resolution rules or should + explicitly opt out of name-resolution at this layer. + +### 4. Broad `rescue StandardError` Masks Contract Failures + +The file repeatedly uses `rescue StandardError` to return `false`, `nil`, or +empty arrays. The base adapter alone has many examples in the first few hundred +lines, including: + +- `yield_statement?` at lines 149-153. +- `lambda_expression?` at lines 190-193. +- `literal_fragment_assignment_context?` at lines 230-239. +- `named_field` at lines 246-249. +- `safe_navigation_call?` at lines 252-255. +- `case_else_node` at lines 276-290. +- `leading_owner_statement?` at lines 319-327. +- `leading_if_statement?` at lines 336-346. + +Later helper methods do the same for sibling and parent access at lines +3177-3198, and for shape detection such as `infix_statement_parts` at +2545-2566. + +This hides missing optional values, wrong node shapes, facade bugs, and adapter +contract violations. A parser shape that should fail a test instead degrades +into "not this construct", which looks like partial language support rather +than a bug. + +Expected direction: + +- Provide safe node access helpers with explicit nil behavior. +- Rescue only known parser/facade exceptions at the parser boundary. +- Make adapter contracts explicit: a method should either return a documented + optional value or raise a meaningful unsupported-shape error in tests. + +### 5. Raw Token and Source-Text Mining Is Used for Semantic Decisions + +Many semantic decisions are made by checking token text or raw node source: + +- Safe navigation checks raw `&.` at lines 252-255. +- Leading function detection checks the first child kind against `"def"` at + lines 303-305 and 722-726. +- Ternary detection checks raw `?` and `:` tokens at lines 709-718. +- Dotted calls check raw `.` and `&.` at lines 3254-3262. +- Argument-list element reference checks raw `[` and `]` at lines 2160-2165. +- Hash pairs check raw `=>` at lines 1907-1912. +- Operator assignment parses raw token text at lines 2785-2790 and 3629-3647. +- Inline def handling checks source text for `"def "` at lines 3726-3737. +- Hidden match detection checks `node.text` for `"match "` at lines 3969-3973. + +This is not portable. Tree-sitter grammars expose punctuation and keywords +differently. Some grammars make punctuation anonymous, some name it, some hide +it behind fields, and some represent a construct as a dedicated node. Source +text also fails as soon as whitespace, comments, macro syntax, generated +facade text, or language-specific tokenization changes. + +Expected direction: + +- Adapters should use grammar fields and native node kinds to identify + language constructs. +- Shared normalization should receive facts such as `safe_navigation_call`, + `function_decl`, `ternary`, `member_access`, and `subscript`, not discover + them with punctuation scans. + +### 6. `safe_navigation_call?` Is in the Wrong Layer + +The base implementation at lines 252-255 checks for Ruby's `&.` token. The +TypeScript override at lines 1304-1308 adds `optional_chain`/`?.` checks and +recursive call-expression scanning. + +This should not be a shared base behavior. It is inherently grammar-specific: + +- Ruby uses `&.`. +- TypeScript and JavaScript use `?.`. +- C# and Swift have their own optional chaining syntax. +- Kotlin has `?.` but a different grammar. +- Python has no equivalent built-in operator. +- Rust, C, C++, Zig, Go, Java, and Lua do not have the same concept in the + same form. + +Expected direction: + +- Each adapter should expose optional-call/member-access semantics for its + grammar. +- Languages without this feature should explicitly return "unsupported" or + "not applicable", not inherit a Ruby token scan. + +### 7. Leading Statement Helpers Assume Keyword Tokens + +`leading_function_statement?` defaults to `def` at lines 303-305, and the +generic helper checks `node.children.first&.kind.to_s == keyword` at lines +722-726. Python overrides with another `"def"` check; Lua overrides with +`"function"`. + +That is still keyword-token mining. It cannot scale to languages where +function declarations are identified by node kind, declarator shape, receiver, +macro item, annotations/modifiers, or field names rather than a first keyword +token. + +Expected direction: + +- Adapter methods should answer "this wrapper contains a leading function + declaration" by using that grammar's function node and field structure. +- The shared normalizer should not know the keyword string. + +### 8. Assignment and Operator Tables Are Global, Incomplete, and Unsafe + +The base adapter defines assignment operators for Ruby, Python, Lua, and +TypeScript at lines 50-56. The fallback `assignment_operators` method returns +only `COMMON_ASSIGNMENT_OPERATORS` at lines 671-674. + +That silently misclassifies or ignores languages with different assignment +forms or operators: + +- Rust: `=`, `+=`, `-=`, `*=`, `/=`, `%=` plus bitwise/shift variants. +- C/C++/Java/C#/Go/Zig/Kotlin/Swift: overlapping but not identical augmented + assignment sets. +- Languages with declaration assignment, walrus-like operators, or pattern + assignment need grammar-specific handling. + +Expected direction: + +- Assignment/operator classification belongs in the language adapter/profile. +- Shared code should ask the adapter for an assignment semantic object, not + infer assignment by checking sibling punctuation. + +### 9. Scope and Local Resolution Are Ruby-Only but Central + +The normalizer tracks Ruby locals with `@local_stack`, `with_ruby_scope`, +`ruby_scope_locals`, `collect_ruby_scope_locals`, `ruby_assignment_node?`, and +related helpers at lines 2820-2910. It uses that to decide whether identifiers +become `LVAR`, `DVAR`, `VCALL`, or `FCALL`. + +That logic is Ruby-specific. Other languages have different scoping rules: + +- Python has local/global/nonlocal behavior and lexical scopes. +- JavaScript/TypeScript have `var`, `let`, `const`, function scope, block + scope, imports, and destructuring. +- Lua has globals by default and `local`. +- Rust, C, C++, Java, Kotlin, Swift, Zig, and Go have declaration forms and + block/module scopes unlike Ruby. + +Expected direction: + +- Either remove name-resolution from this normalization layer, or delegate it + to per-language scope adapters. +- The shared normalizer should not decide call-vs-local from Ruby local rules. + +### 10. Parameter Normalization Is Ruby-Gated + +`normalize_parameters` returns `nil` unless `ruby?` at lines 3024-3037. +`normalize_block_parameters` also returns `nil` unless `ruby?` at lines +3039-3050. + +That means non-Ruby function parameters, defaults, destructuring, and block or +lambda parameters are mostly unavailable through this AST contract. This is a +large parity gap because many Decomplex detectors need parameters to +distinguish state, local data flow, receiver conventions, and trivial wrappers. + +Expected direction: + +- Language adapters should emit canonical parameter facts. +- Parameter normalization should exist for every supported language with + explicit capability gaps. + +### 11. Control-Flow Semantics Are Flattened Into Ruby Names + +`RETURN_KINDS` at lines 1488-1497 maps `"continue_statement"` to `:NEXT` and +Ruby `next` also to `:NEXT`. `LOOP_KINDS` at lines 1454-1462 maps native loop +kinds into Ruby-ish symbols. Rescue/ensure normalization maps Python and +TypeScript exception constructs into `RESCUE`, `RESBODY`, and `ENSURE` shapes. + +This may be acceptable for a Ruby compatibility mode, but it is not a neutral +model. `continue`, Ruby `next`, `break`, `return`, `throw`, `raise`, `panic`, +and exception/finally constructs do not have identical semantics across +languages. + +Expected direction: + +- Use neutral control-flow facts: `return`, `break`, `continue`, + `exception_handler`, `finally`, `throw`, and language-specific termination + signals where needed. +- Only convert to Ruby names at the Ruby compatibility boundary. + +### 12. Literal Semantics Are Conflated Across Languages + +`NIL_KINDS` at line 1487 conflates `nil`, `none`, and `null`. Terminal +statement handling at lines 3557-3575 hard-codes Ruby spellings such as +`nil`, `true`, `false`, symbols, instance variables, globals, and `[]`. +Scalar argument handling repeats similar text matching at lines 3910-3927. + +That loses important distinctions: + +- Python `None`, JavaScript `null`, JavaScript `undefined`, Ruby `nil`, Swift + `nil`, Zig `null`, and Go `nil` are not always equivalent in analysis. +- Ruby symbols do not exist in most target languages. +- Ruby globals and numbered captures are not portable. + +Expected direction: + +- Adapters should classify literals into canonical literal facts with original + language and spelling preserved. +- Detectors should decide which literal classes are equivalent for a specific + metric. + +### 13. Member Access and Calls Are Guessed by Shared Heuristics + +`MEMBER_KINDS`, `CALL_KINDS`, `IDENTIFIER_KINDS`, and `CONST_KINDS` live in the +shared normalizer at lines 1474-1482. Member parsing is then guessed in +`member_parts` at lines 2912-2929 by trying several field names and falling +back to child order. + +That is unsafe across languages. Member access differs for: + +- Ruby calls without parentheses. +- JavaScript optional chaining and private fields. +- C/C++ pointer member access. +- Rust paths, method calls, and associated functions. +- Go selectors. +- Swift/Kotlin null-safe access. +- Python attributes and calls. + +Expected direction: + +- Each adapter should expose a canonical call/member/subscript shape. +- Shared code should not infer receiver and method name by trying a long list + of field names from unrelated grammars. + +### 14. Unsupported Languages Silently Use the Generic Adapter + +`TreeSitterNormalizationAdapter.for` falls back to `new(document)` at line +134. That means unsupported languages appear to work using generic heuristics. +The result is worse than a clean unsupported error because detectors can +publish partial, misleading findings. + +This is especially risky because `syntax.rb` already has `LANGUAGE_PROFILES` +for many languages at lines 2510-2598, while `ast.rb` only selects dedicated +normalization adapters for Ruby, Python, Lua, TypeScript, and JavaScript at +lines 128-135. + +Expected direction: + +- Require an explicit normalization adapter/profile for every language that + flows through `Ast.parse`. +- If a language is only partially supported, expose a capability matrix and + skip unsupported detector paths explicitly. + +### 15. There Are Two Adapter Systems That Can Drift + +`syntax.rb` already defines `TreeSitterLanguageAdapter` and language profiles +starting at lines 271 and 2510. Those profiles contain language lexicons, +function extraction, owner extraction, state reads/writes, call targets, +parameters, and branch facts. + +`ast.rb` defines a separate `TreeSitterNormalizationAdapter` starting at line +45 with its own function kinds, owner kinds, assignment operators, branch +heuristics, safe navigation logic, parameters, rescue/ensure handling, and +language subclasses. + +That is duplicated ownership. A language feature can be fixed in one adapter +layer and remain broken in the other. This is likely why language-specific +logic keeps reappearing in the wrong file. + +Expected direction: + +- Unify adapter ownership, or make one adapter explicitly depend on the other. +- There should be one place where language grammar knowledge is defined. +- `ast.rb` should not maintain its own parallel language universe. + +### 16. Source Span Utilities Are Mixed With Semantic Rewrites + +`wrap`, `source_before_child`, `source_from_nodes`, and +`source_from_normalized_nodes` at lines 3087-3171 construct spans and source +text while the same class performs semantic rewrites. + +This increases coupling. Transform code has to know how spans are rebuilt, +and span code has to handle both Tree-sitter nodes and already-normalized +nodes. + +Expected direction: + +- Move span/source helpers behind a small source mapping utility. +- Keep semantic normalization focused on semantic shape. + +### 17. Dispatch Ordering Is an Implicit Contract + +`normalize_node` at lines 1524-1658 and `normalize_body` at lines 2316-2359 +both contain long, order-sensitive dispatch chains. The same conceptual +constructs are checked in several places: leading functions, leading branches, +rescue/ensure bodies, calls with blocks, infix statements, unary operations, +element references, arrays, hashes, and terminal statements. + +Adding a new language or construct requires knowing exactly where it belongs +in two large branch chains. A new check can shadow an older one globally. + +Expected direction: + +- Classify once into a semantic category. +- Dispatch on that category with a small table or polymorphic handler. +- Keep body normalization and expression normalization separate where the + language actually distinguishes statements and expressions. + +## Cross-Language Incompatibilities + +These are representative examples of logic that cannot be correct across +languages while living in shared code. + +| Current behavior | Why it is not portable | Better owner | +|---|---|---| +| `safe_navigation_call?` checks `&.` in the base adapter. | Optional chaining is language-specific and absent in many languages. | Per-language adapter. | +| `leading_function_statement?` searches for `"def"` or `"function"` keyword tokens. | Function declarations are grammar-specific and often declarator-based. | Per-language adapter. | +| `ruby?` gates shared normalization. | Shared code changes behavior by language instead of using polymorphism. | Ruby normalizer or adapter. | +| `NIL_KINDS = %w[nil none null]`. | Nil/null/None/undefined have different semantics. | Literal classifier per language. | +| `RETURN_KINDS` maps `continue_statement` to `NEXT`. | Ruby `next` and non-Ruby `continue` are not the same abstraction. | Neutral control-flow IR. | +| `self_node?` maps `self` and `this` together. | `self`, `this`, receiver, class/static context, and module context differ. | Language scope/receiver adapter. | +| `member_parts` guesses receiver/member from many possible field names. | Member grammar differs widely and includes pointer, path, optional, private, and static forms. | Per-language call/member adapter. | +| `assignment_lhs?` checks sibling token text. | Assignment shape is not reliably represented by adjacent punctuation. | Per-language assignment classifier. | +| `normalize_parameters` is Ruby-only. | Non-Ruby functions lose parameter facts. | Per-language parameter adapter. | +| `normalize_pair` assumes Ruby hash semantics and symbol shorthand. | Object literals, dictionaries, tables, maps, and hashes differ. | Per-language literal/container adapter. | +| `vcall_identifier?` and `ruby_vcall_identifier?` decide local vs call. | Bare identifier semantics differ by language. | Per-language scope adapter or detector layer. | +| Rescue/ensure are normalized as Ruby `RESCUE`/`ENSURE`. | Exceptions/finally/defer/panic/error returns differ substantially. | Neutral exception/control-flow IR. | + +## Recommended Remediation Plan + +### P0: Stop the Architectural Bleeding + +- Do not add new language support by extending shared constants in `ast.rb`. +- Remove or isolate `ruby?` checks from `TreeSitterNormalizer`. +- Stop silent fallback to the generic normalization adapter for unsupported + languages. +- Replace broad `rescue StandardError` in hot-path shape checks with explicit + nil-safe accessors and documented adapter contracts. +- Move Ruby-only behavior out of the shared normalizer first: local/vcall + scope, inline def, implicit nil, tail return elision, Ruby argument-list + calls, Ruby hash shorthand, and Ruby `=~`. + +### P1: Define the Adapter Contract + +- Define the canonical facts a language adapter must provide: + function declaration, class/owner declaration, call, member access, + assignment, parameter, branch, loop, case arm, return/break/continue, + literal, string interpolation, exception handler, finally/ensure, and block. +- Make capability gaps explicit. A language should say "I do not support this + fact yet" rather than returning `false` from inherited generic heuristics. +- Pull punctuation and keyword-token checks into language adapters. +- Add adapter-level fixture tests per language that assert canonical facts, + not Ruby AST node names. + +### P2: Separate Ruby Compatibility From Decomplex Semantics + +- Introduce a language-neutral semantic IR for detector input. +- Keep Ruby AST-compatible node names only as a compatibility adapter for + legacy Ruby detector code. +- Migrate detectors toward semantic facts and away from Ruby parser node names. +- Preserve source spans as a separate utility so semantic normalization is not + responsible for source reconstruction. + +### P3: Unify `syntax.rb` and `ast.rb` Language Ownership + +- `syntax.rb` already has language profiles and structural fact extraction. +- `ast.rb` should either consume those profiles or be refactored so profile + ownership lives in one place. +- Avoid parallel adapter hierarchies with overlapping function, owner, branch, + assignment, call, and state semantics. + +## Desired End State + +The ideal architecture should look like this: + +1. `Syntax.parse` produces a Tree-sitter document with a known language + profile. +2. The language adapter owns grammar-specific queries and token quirks. +3. The adapter emits canonical semantic facts or canonical syntax nodes. +4. The shared normalizer only maps canonical facts into Decomplex's detector + model. +5. Ruby AST compatibility, where still required, is a Ruby-specific adapter, + not the shared representation. + +In that design, adding Rust, Zig, Go, C, C++, Java, Swift, Kotlin, or any other +language does not require stuffing more native grammar names into +`TreeSitterNormalizer#normalize_node`. It requires implementing that language's +adapter contract and proving it with language-specific fixtures. + +## Current Remediation Notes + +The Ruby production detector path has moved in this direction: + +- `FalseSimplicity` now consumes `Syntax::SemanticEffectSite` facts and owner / + function facts. Ruby-specific effect lexicons and grammar quirks live under + `lib/decomplex/syntax/ruby_effects.rb`. +- `OrderedProtocolMine` now consumes `Syntax::ProtocolMethodEffect` and + `Syntax::ProtocolMethodPath` facts. Ruby branch/case/lambda path semantics and + state-effect extraction live under `lib/decomplex/syntax/ruby_protocols.rb`. +- `SequenceMine` and `OversizedPredicate` now consume `Syntax` call and decision + facts directly instead of `Ast.parse_semantic`. +- `Syntax` no longer requires the `Ast` facade; the dependency now points from + compatibility parsing toward `Syntax`, not from Syntax back into Ast. +- Ruby structural/local/path helper behavior has been split out of `syntax.rb` + into `lib/decomplex/syntax/ruby.rb`; Ruby effect and protocol quirks live in + `ruby_effects.rb` and `ruby_protocols.rb`. +- A production detector grep no longer finds `Ast.parse`, `Ast.parse_semantic`, + or legacy Ruby AST node names outside the `ast.rb` compatibility facade. + +Remaining architectural debt: + +- `ast/legacy_normalizer.rb` still exists as a Ruby-shaped compatibility layer. +- Non-Ruby profile behavior in `syntax.rb` should continue moving into + language-specific profile files as those languages are made first-class. +- Rust still needs to mirror the Ruby architecture with minimal changes after + Ruby verification is complete. diff --git a/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md b/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md new file mode 100644 index 000000000..285959b6f --- /dev/null +++ b/gems/decomplex/docs/agents/cross-language-fixture-commit-audit.md @@ -0,0 +1,69 @@ +# Cross-Language Fixture Commit Audit + +Audited commit: `cda67cd87` (`Add cross-language decomplex oracle fixtures`). + +Status: resolved in the current working tree. + +## Architecture Guardrails Added + +- `spec/decomplex_architecture_invariants_spec.rb` adds a root RSpec static + architecture guard matching the repo's existing invariant style. +- `gems/decomplex/test/architecture_invariants_test.rb` adds the same guard to + the Decomplex minitest suite. +- The guards fail if detector files use raw Tree-sitter node APIs such as + `children`, `named_children`, `child_by_field_name`, byte/point offsets, + `TreeSitter*` classes, or raw node duck typing. +- The guards fail if `syntax.rb` starts hosting detector-specific syntax + extension facts such as clone candidates, dispatch sites, nil guard facts, + or local complexity facts. +- The guards fail if concrete language adapter implementations move back into + `syntax.rb`, or if language profiles instantiate the base + `TreeSitterLanguageAdapter` directly. + +## Burned Down Architecture Items + +- `FlaySimilarity` now consumes `document.clone_candidates`; parser-specific + clone fingerprinting lives in `syntax/clone_similarity.rb`. +- `WeightedInlinedCognitiveComplexity` and `LocalityDrag` now consume + `document.local_complexity_scores`; local scoring lives in + `syntax/complexity.rb`. +- `RedundantNilGuard` now consumes `document.redundant_nil_guard_findings`; + nil-guard parsing lives in `syntax/nil_guards.rb`. +- `DecisionPressure` now gets local assignment contracts through + `document.local_contract_assignments`; contract extraction lives in + `syntax/contracts.rb`. +- `FatUnion` now consumes `document.dispatch_sites`; dispatch extraction lives + in `syntax/dispatch.rb`. +- Concrete language adapter behavior has moved from `syntax.rb` into + `syntax/ruby.rb` and `syntax/adapters.rb`. + +## Oracle Strength Restored + +The shared example oracle now asserts detector-specific normalized content +instead of mere finding presence for the previously weak detectors: + +- `decision-pressure`: contract, decision count, essential count, method count. +- `miner`: conjunction members, support, scatter, neglected-condition pattern. +- `semantic-alias`: normalized canonical predicate and reification miss count. +- `flay-similarity`: clone type, node kind, site count. +- `temporal-ordering-pressure`: owner, method counts, writer count, orderings, + state fields, shared fields. +- `state-branch-density`: normalized method name, decisions, state refs. +- `state-mesh`: total fields/writes/reads/re-derivations and field names. +- `implicit-control-flow`: protocol pair, dependency, support, observed/missing + calls, states. +- `path-condition`: normalized pattern, support, missing guard, action. +- `function-lcom`: mode, component count, local count, statement count, + terminal join. +- `fat-union`: common members, variant members, degeneracy, support, scatter, + variant set. +- `structural-topology`: method count and exact normalized edge rows. + +## Verification + +- `bundle exec rspec spec/decomplex_architecture_invariants_spec.rb` +- `bundle exec ruby -I gems/decomplex/test gems/decomplex/test/architecture_invariants_test.rb` +- `bundle exec ruby -I gems/decomplex/test gems/decomplex/test/examples_oracle_test.rb` +- `bundle exec ruby -I gems/decomplex/test -I gems/decomplex/lib -e 'Dir["gems/decomplex/test/*_test.rb"].sort.each { |path| require File.expand_path(path) }'` + +Current result: all pass, including the full Decomplex suite with 0 skips. diff --git a/gems/decomplex/docs/agents/cross-system-fact-oracle-design.md b/gems/decomplex/docs/agents/cross-system-fact-oracle-design.md new file mode 100644 index 000000000..75960ac4c --- /dev/null +++ b/gems/decomplex/docs/agents/cross-system-fact-oracle-design.md @@ -0,0 +1,143 @@ +# Cross-System Fact Oracle Design + +Status: WIP design for the Ruby-vs-Rust Decomplex parity work. + +## Problem + +The current test stack lets Rust drift into detector-owned fact generation. That is an architectural failure. Detectors must consume already-normalized facts. If a detector needs to walk raw Tree-sitter nodes, normalized AST roots, language profiles, or language-specific syntax, the required fact is missing from the syntax layer and must be added there first. + +The test suite must prove fact generation before it proves detector scoring. A detector oracle that only checks the final finding is too late and too coarse; it can hide incorrect or missing facts, duplicated mining code, and detector-specific language hacks. + +## Required Oracle Layers + +Blocking rule: do not continue detector parity, report parity, SARIF parity, or real-repo end-to-end parity until source-level fact generation integration tests exist for every fact consumed by detectors and those tests run against both Ruby Decomplex and Rust Decomplex. + +1. Source fact oracle + - Input: source file in a real language. + - Engines: Ruby Decomplex and Rust Decomplex. + - Output: exact canonical fact projection. + - Purpose: prove that adapters and syntax modules generate the same facts from source. + +2. Normalized fact JSON oracle + - Input: language-neutral JSON fact set. + - Engines: Ruby detector/report pipeline and Rust detector/report pipeline. + - Output: exact detector/report/SARIF/root-cause/convergence projection. + - Purpose: prove detector consumers behave the same once facts are correct. + +3. End-to-end repository parity + - Input: real repos. + - Engines: Ruby full pipeline and Rust full pipeline. + - Output: byte-for-byte report/json/SARIF where supported. + - Purpose: acceptance only. This must not be the primary way bugs are discovered. + +## Fact Generation Contract + +Every fact that any detector consumes must have source-level oracle coverage: + +- `function_defs` +- `owner_defs` +- `call_sites` +- `state_reads` +- `state_writes` +- `state_declarations` +- `state_param_origins` +- `decision_sites` +- `branch_decisions` +- `dispatch_sites` +- `comparison_uses` +- `semantic_effect_sites` +- `predicate_defs` +- `path_condition_sites` +- `local_methods` +- `local_complexity_scores` +- `clone_candidates` +- `protocol_method_effects` +- `protocol_call_paths` +- `redundant_nil_guard_findings` +- language-specific optional contract facts such as immutable reader/type alias facts + +If a detector needs a new input, the change order is: + +1. Add or extend the syntax fact type. +2. Add source fixtures for at least Ruby and any language being touched. +3. Add exact Ruby-vs-Rust source fact oracle assertions. +4. Update the detector to consume that fact. +5. Add or extend normalized fact JSON detector oracles. + +No detector may add fallback fact mining. + +## Ruby Source Fixtures Needed First + +These Ruby fixtures should live under `gems/decomplex/examples/source-facts/ruby/` and each should have an exact oracle under `examples/source-facts/oracles/`. + +- `state_reads.rb`: receivers, chained receivers, self reads, globals, constants that must not become state, safe navigation when represented. +- `state_writes.rb`: instance/global writes, indexed writes, field writes, operator assignment, local writes that must not become state. +- `visibility.rb`: public/protected/private declarations, standalone visibility, symbol-list visibility, default public. +- `semantic_effects.rb`: hidden IO, dynamic dispatch, callback inversion, metaprogramming, context reads, `[]=`, `<<`, method hooks. +- `block_receiver_calls.rb`: block parameter receiver calls, nested block calls, iterator control metadata, without unrelated mutation noise. +- `locals_not_state.rb`: params, locals, `ENV[key]`, indexed local assignment receiver reads, assertion commands, block-local values, outer locals. +- `local_flow.rb`: reads, writes, dependencies, co-uses, boundaries, destructuring, loops, nested scopes, indexed/member writes. +- `nil_guards.rb`: prior non-nil proof, redundant `nil?`, safe navigation, branch dominance, termination. +- `path_conditions.rb`: nested guards, `&&`, modifier conditionals, case/when, guarded actions. +- `clone_candidates.rb`: function bodies, owner bodies, DSL wrapper bodies, fingerprint/mass behavior. +- `protocols.rb`: receiverless Ruby calls, bare readers, internal call paths, method effects, mutating calls, declarative/DSL calls that must not become protocol events. + +The Ruby source-fact oracle should not collapse these to counts. It should assert the exact relevant rows and fields for each section under test. + +## Cross-Language Happy Path Matrix + +For each supported language, add at least one fixture per fact bucket that proves the language adapter emits the shared fact shape: + +- functions/owners/calls +- state reads/writes +- local methods +- branch/path facts +- semantic effects +- clone candidates +- protocol facts where the language has receiverless calls or implicit receiver calls +- nil/null guard facts where the language supports the detector + +Languages where function calls require `()` should not need Ruby-style bare-call protocol heuristics. Languages that allow omitted call delimiters or implicit receiver calls must solve that ambiguity in the adapter and prove it with source-fact fixtures. + +## Normalized Fact JSON Path + +The JSON fact fixtures under `gems/decomplex/examples/facts/` should cover detector consumers after normalization. These fixtures are language-neutral and should be shared by Ruby and Rust. + +Required groups: + +- `facts/local-flow/`: derived-state, locality-drag, function-LCOM, operational discontinuity, inconsistent rename clone, decision pressure. +- `facts/detectors/`: detectors that consume simpler direct facts. +- `facts/root-cause/`: root-cause ranking from a full detector fact set. +- `facts/convergence/`: convergence output from the same full detector fact set. +- `facts/report/`: markdown and JSON report output from the same full detector fact set. +- `facts/sarif/`: SARIF output from the same full detector fact set. + +The normalized fact JSON must include the full fact set needed by the downstream stage, not a detector-specific stub that proves only that the current code repeats itself. + +## Architecture Invariants + +Rust must mirror Ruby's architectural guardrails: + +- production detector modules must not import `tree_sitter` +- production detector modules must not import `syntax::adapters` +- production detector modules must not call `language_profile` +- production detector modules must not inspect `document.language` +- production detector modules must not read `document.root` or `document.normalized_root` +- production detector modules must not use `RawNode` +- production detector modules must not branch on `Language::Ruby`, `Language::Python`, or any other concrete language + +If one of these invariants blocks a detector fix, the fix belongs in syntax/adapters or in a new fact type. + +## CI Gates + +The minimum CI gate before end-to-end repo parity work: + +- Ruby architecture invariants pass. +- Rust architecture invariants pass. +- Ruby source-fact oracle passes for Ruby and Rust engines. +- Rust integration source-fact oracle passes without shelling through Ruby test assertions. +- Normalized fact JSON detector oracles pass for Ruby and Rust. +- Report/root-cause/convergence/SARIF JSON-input oracles pass for Ruby and Rust. +- No skips for supported fact buckets. Unsupported language/fact combinations must be explicit `unsupported` entries in the matrix, not skipped tests. + +End-to-end repo parity should start only after these gates are green. diff --git a/gems/decomplex/docs/agents/fixture-coverage-gap-analysis.md b/gems/decomplex/docs/agents/fixture-coverage-gap-analysis.md new file mode 100644 index 000000000..0c1d47a1d --- /dev/null +++ b/gems/decomplex/docs/agents/fixture-coverage-gap-analysis.md @@ -0,0 +1,88 @@ +# Decomplex fixture and coverage gap analysis + +Date: 2026-06-20 + +## Current measured state + +The shared detector examples now run in both places: + +- Ruby: `gems/decomplex/test/examples_oracle_test.rb` +- Rust: `gems/decomplex/rust/tests/examples_oracle.rs` + +Current shared fixture grid: + +- 15 languages. +- 24 detectors. +- 360 detector/language fixture cells. +- 0 missing fixture cells. + +Current Rust coverage from `cargo llvm-cov`, with Rust test code excluded from the line counts: + +- Rust production: 68,796 / 84,602 executable lines, 81.32%. +- Rust detectors: 5,728 / 6,725 executable lines, 85.17%. + +The largest earlier false signal was stale Rust-only detector code. Several low-coverage detector paths were not missing fixture coverage; they were code paths Ruby no longer owns in detectors: + +- `state_branch_density`: removed the normalized-AST fallback scanner. Ruby consumes mined `branch_decisions`. +- `fat_union`: removed the normalized-root fallback scanner. Ruby consumes dispatch facts. +- `false_simplicity`: moved semantic-effect classification into syntax facts. The detector now consumes `semantic_effect_sites`. +- `state_mesh`: removed normalized-root read/write fallback behavior. The detector now consumes state facts. +- `temporal_ordering_pressure`: now discovers owners from both owner and function facts like Ruby. +- `weighted_inlined_cognitive_complexity` and `locality_drag`: moved local complexity scoring to `Document#local_complexity_scores`, matching Ruby's syntax fact boundary. + +## Detectors below 90% Rust LoC coverage + +These are the remaining detector implementation files below 90% after the architecture cleanup: + +| Detector | Coverage | Primary gap | +| --- | ---: | --- | +| `sequence_mine` | 62.07% | One fixture hits only the positive pair. It misses ignored/declarative calls, nested protocol events, confidence filters, denominator branches, and sort tie-breaks. | +| `derived_state` | 65.38% | Fixture hits one stale derived variable. It misses multi-write ordering, self-dependency exclusion, no-reassignment, and recomputed-derived negatives. | +| `redundant_nil_guard` | 69.57% | Fixture is too narrow for guard shapes. Needs safe navigation, explicit nil checks, chained guards, local reassignment, and negative useful guards. | +| `decision_pressure` | 79.01% | Fixture hits local contract assignment only. It misses essential dispatch, rescue-nil, receiver/index/local contract canonicalization, conditional assignment rejection, and ranking. | +| `state_branch_density` | 79.44% | Fixture hits one non-nested state predicate. It misses wrapper suppression for nested branches and multi-row ranking. | +| `false_simplicity` | 79.59% | Oracle asserts only `kind`. It misses detail/support/scatter, top-level effects, monkeypatch/core owner cases, reopen cases, and grouping/ranking. | +| `state_mesh` | 81.35% | Fixture has one field. It misses multi-field percentiles, semantic-alias re-derivations, custom fields, and graph details. | +| `path_condition` | 84.38% | Fixture hits one neglected condition. It misses action/guard extraction variants, support/confidence filters, span containment, and negative paths. | +| `weighted_inlined_cognitive_complexity` | 84.81% | Architecture is now correct; fixture still needs multi-finding ranking, shared public step weighting, cycle/visited guard, and missing-callee branches. | +| `structural_topology` | 84.85% | Fixture misses self-call exclusion, singleton/static scoped names, multi-line source spans, hidden Ruby owner wrappers, and enclosing-span helper branches. | +| `local_flow` | 86.93% | The oracle is stronger than before but still not broad enough for all syntax categories. Needs local-flow semantic cases by grammar feature. | +| `locality_drag` | 89.89% | Needs one more case for low-complexity/short-gap negatives, rewrite-before-use, related gap expansion, and ranking. | + +## Fixture strategy + +The plan is sound, with one correction: do not write fixtures to cover code that should not exist in detectors. First delete or move misplaced detector-owned syntax work, then expand fixtures around the remaining legitimate detector behavior. + +Use these fixture layers: + +1. Keep the existing `examples//.` files as smoke tests. +2. Add case fixtures where one file per detector is not enough. Preferred layout: + - `examples///.` + - `examples/oracles//.json` +3. Keep oracles shared across languages. Only scrub location/SARIF fields; do not collapse semantic fields to counts when the detector behavior depends on the omitted fields. +4. Run both engines against the same oracle projection: + - Ruby for cross-engine parity. + - Rust integration tests for Rust CI truth and Rust LCOV coverage. + +## Immediate fixture expansion order + +Highest leverage order: + +1. `sequence_mine`: add support/confidence negative cases and nested protocol events. +2. `derived_state`: add stale, recomputed, self-dependent, and multi-write cases. +3. `redundant_nil_guard`: add guard-shape matrix and useful-guard negatives. +4. `state_branch_density`: add nested wrapper suppression and multi-row ranking. +5. `decision_pressure`: add essential dispatch and rescue-nil cases. +6. `false_simplicity`: strengthen projection to include `kind`, `detail`, `support`, and `scatter`, then add effect and monkeypatch cases. +7. `state_mesh`: add multi-field/re-derivation/custom-field cases. +8. `local_flow`: add syntax-facts-style fixtures for reads/writes/dependencies/co-uses across declarations, destructuring, member/index writes, loops, closures, and cleanup blocks. + +## Root-cause/report/SARIF plan + +Ignoring reporting for the current detector pass is reasonable. The downstream plan should still be: + +1. Create a shared facts JSON oracle containing detector outputs and syntax facts. +2. Feed that JSON into Ruby and Rust root-cause code and compare a stable projected output. +3. Reuse the same facts JSON for report and SARIF snapshot tests later. + +That gives coverage for root cause, convergence, report, and SARIF without multiplying language fixtures. The JSON should contain full facts, not a detector-specific subset, so later stages can share it. diff --git a/gems/decomplex/docs/agents/ruby-first-cross-language-ast-design.md b/gems/decomplex/docs/agents/ruby-first-cross-language-ast-design.md new file mode 100644 index 000000000..939fc003a --- /dev/null +++ b/gems/decomplex/docs/agents/ruby-first-cross-language-ast-design.md @@ -0,0 +1,621 @@ +# Ruby-First Cross-Language AST Architecture + +Status: Ruby implementation complete for the production detector architecture. +Rust mirror work is pending. The legacy AST normalizer remains quarantined as a +compatibility layer, not as detector infrastructure. + +Related analysis: `gems/decomplex/docs/agents/architectural-issues.md`. + +## Implementation Status + +Completed so far: + +- `ast.rb` has been reduced to a small facade. +- AST infrastructure has been split into `ast/node.rb`, `ast/cache.rb`, + `ast/source_map.rb`, and adapter files. +- `Ast.parse_semantic` and `SemanticNode` still exist as compatibility + infrastructure, but production detectors should consume `Syntax` facts + directly. +- `TreeSitterNormalizationAdapter.for` now fails loudly for unsupported AST + compatibility languages instead of silently falling back to a generic + adapter. +- Ruby-specific defaults for `yield`, `&.`, leading `def`, heredoc handling, + and Ruby variable text checks have been moved out of the base AST adapter + into `adapters/ruby.rb`. +- `RubySyntaxAdapter` owns Ruby method visibility markers and singleton + method receiver naming for structural facts. +- `PythonSyntaxAdapter` owns Python receiverless adjacent-call syntax. +- `Syntax::SemanticEffectSite` and Ruby effect adapters now expose + False-Simplicity-style semantic effects such as Ruby dynamic dispatch, + command literals, `yield`, singleton-class metaprogramming, globals, + receiver mutation, callbacks, and core-class reopen support. +- `Syntax::ProtocolMethodEffect` and `Syntax::ProtocolMethodPath` now expose + Ruby ordered-protocol method effects and path-separated internal call + sequences, including branch/case separation and lambda-body exclusion. +- `Syntax` no longer requires the `Ast` facade; the dependency direction is + compatibility-only (`Ast` may call into `Syntax`, not the reverse). +- Ruby structural/local/path helper behavior has been split out of `syntax.rb` + into `syntax/ruby.rb`; `syntax.rb` now keeps only the shared profile and + dispatcher layer plus a Ruby adapter stub. +- These detectors now avoid `Ast.parse` and `Ast.parse_semantic` in production + and consume `Syntax` facts: + - `SequenceMine` + - `OversizedPredicate` + - `StructuralTopology` + - `TemporalOrderingPressure` + - `StateBranchDensity` + - `StateMesh` write/read discovery + - `PredicateAlias` + - `SemanticAlias` + - `LocalFlow` + - `DerivedState` + - `FatUnion` + - `DecisionPressure` + - `PathCondition` + - `InconsistentRenameClone` + - `WeightedInlinedCognitiveComplexity` + - `RedundantNilGuard` + - `FalseSimplicity` + - `OrderedProtocolMine` +- A production detector search now leaves legacy Ruby AST node names only in + `ast.rb`, the explicit compatibility facade. + +Remaining follow-up work: + +- `ast/legacy_normalizer.rb` is still a large Ruby-shaped compatibility + normalizer. It is no longer production detector infrastructure, but it should + eventually shrink or become Ruby-only compatibility code. +- The base `TreeSitterLanguageAdapter` in `syntax.rb` still contains broad + cross-language heuristic tables; non-Ruby language work should continue to + move behavior into explicit language profiles. +- The semantic model still does not expose exception-flow details or a full + expression tree. Current Ruby detector coverage does not require those facts, + but future detectors must add adapter-owned facts rather than reviving the + legacy AST model. +- Rust has not yet been mirrored to the Ruby architecture; current Rust parity + is preserved for the migrated Ruby detector fixtures. + +## Goal + +Make Decomplex's Ruby AST/normalization implementation architecturally correct +first, then mirror that architecture in Rust with minimal behavioral drift. + +The correct end state is not "one Ruby AST shape that every language pretends +to be." The correct end state is: + +1. Language adapters own Tree-sitter grammar quirks. +2. A shared semantic model represents facts detectors can use across + languages. +3. Ruby parser compatibility exists only at a Ruby boundary. +4. Unsupported language features are explicit capability gaps, not silent + generic fallbacks. + +## Non-Goals + +- Do not add another layer of string matching to the shared normalizer. +- Do not preserve `ruby?` as a shared-code branch mechanism. +- Do not make Rust lead the architecture. Rust mirrors Ruby after Ruby is + correct. +- Do not claim cross-language support because tests produce Ruby AST node + names for non-Ruby code. +- Do not keep expanding `ast.rb` as a universal normalization file. + +## Current Problem + +`gems/decomplex/lib/decomplex/ast.rb` is 4,023 lines and currently combines: + +- AST facade helpers. +- Tree-sitter grammar adaptation. +- Ruby AST compatibility output. +- Ruby local/scope semantics. +- Shared cross-language normalization. +- Source span reconstruction. + +`gems/decomplex/rust/src/decomplex/ast.rs` is 8,642 lines and mirrors the same +architectural mistake. Rust currently has `syntax/tree_sitter_adapter.rs` with +a `LanguageProfile` trait, but AST normalization itself is still a single +large enum-driven file. + +The first implementation target is Ruby because Ruby owns the legacy behavior +and the existing detector contracts. Once Ruby has a clean boundary, Rust can +mirror the structure without copying the monolith. + +## Target Ruby File Layout + +The Ruby implementation should move toward this structure: + +```text +gems/decomplex/lib/decomplex/ast.rb +gems/decomplex/lib/decomplex/ast/node.rb +gems/decomplex/lib/decomplex/ast/span.rb +gems/decomplex/lib/decomplex/ast/source_map.rb +gems/decomplex/lib/decomplex/ast/semantic_node.rb +gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb +gems/decomplex/lib/decomplex/ast/ruby_compat.rb +gems/decomplex/lib/decomplex/ast/adapters/base.rb +gems/decomplex/lib/decomplex/ast/adapters/ruby.rb +gems/decomplex/lib/decomplex/ast/adapters/python.rb +gems/decomplex/lib/decomplex/ast/adapters/lua.rb +gems/decomplex/lib/decomplex/ast/adapters/typescript.rb +``` + +`ast.rb` should become a facade and compatibility entry point. It should not +contain language-specific grammar tables or semantic rewrites. + +## Target Rust File Layout + +Rust should mirror Ruby after the Ruby boundary is correct: + +```text +gems/decomplex/rust/src/decomplex/ast/mod.rs +gems/decomplex/rust/src/decomplex/ast/node.rs +gems/decomplex/rust/src/decomplex/ast/span.rs +gems/decomplex/rust/src/decomplex/ast/source_map.rs +gems/decomplex/rust/src/decomplex/ast/semantic_node.rs +gems/decomplex/rust/src/decomplex/ast/semantic_normalizer.rs +gems/decomplex/rust/src/decomplex/ast/ruby_compat.rs +gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs +gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs +gems/decomplex/rust/src/decomplex/ast/adapters/python.rs +gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs +gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs +``` + +Rust should not receive a large redesign before Ruby is stabilized. The Rust +work is a mirror step, not an independent architecture experiment. + +## Line-of-Code Budgets + +These budgets are guardrails. They are not strict limits, but exceeding them +should trigger review. + +| Component | Target LoC | +|---|---:| +| `ast.rb` facade | 50-150 | +| `node.rb` | 50-120 | +| `span.rb` / `source_map.rb` | 100-250 total | +| `semantic_node.rb` | 100-250 | +| `semantic_normalizer.rb` | 400-800 | +| `ruby_compat.rb` | 400-900 | +| Base adapter contract | 150-300 | +| Ruby adapter | 400-700 | +| Python adapter | 250-400 | +| TypeScript/JavaScript adapter | 250-450 | +| Lua adapter | 150-300 | +| Each later language adapter | 200-500 | + +If a language adapter grows past roughly 700 lines, either the shared semantic +contract is too weak or detector logic has leaked into the adapter. If the +shared normalizer grows past roughly 800 lines, it is probably becoming the new +monolith. + +## Semantic Model + +The detector-facing model must not be Ruby AST names. It should represent +cross-language concepts directly. + +Minimum semantic node/fact types: + +- `Root` +- `Owner` +- `Function` +- `Parameter` +- `Block` +- `Call` +- `MemberAccess` +- `Subscript` +- `Assignment` +- `Identifier` +- `Literal` +- `Branch` +- `Loop` +- `Case` +- `CaseArm` +- `Return` +- `Break` +- `Continue` +- `BooleanOp` +- `Comparison` +- `UnaryOp` +- `BinaryOp` +- `Lambda` +- `ExceptionHandler` +- `Finally` +- `Unknown` + +Every semantic node should carry: + +- `type` +- `children` +- `span` +- `text` +- `language` +- optional metadata, such as `name`, `receiver`, `message`, `operator`, + `parameters`, `visibility`, `owner`, `control`, or `capability_gap`. + +The shared semantic model can preserve source text, but it should not depend +on source text to discover language constructs. + +## Adapter Contract + +Each language adapter should classify native Tree-sitter nodes into semantic +facts. The shared normalizer should ask the adapter for meaning instead of +matching grammar strings directly. + +Required adapter methods: + +```ruby +function_definition(node) +owner_definition(node) +parameters(node) +call(node) +member_access(node) +subscript(node) +assignment(node) +branch(node) +loop(node) +case_expression(node) +case_arm(node) +return_statement(node) +break_statement(node) +continue_statement(node) +literal(node) +identifier(node) +boolean_operation(node) +comparison(node) +unary_operation(node) +binary_operation(node) +lambda_expression(node) +exception_handler(node) +finally_clause(node) +block(node) +ignored_node?(node) +``` + +Each method returns either: + +- a semantic descriptor, +- `nil` when the node is not that construct, +- or a capability-gap object when the language construct is recognized but not + implemented yet. + +The base adapter should not contain Ruby token checks, shared operator tables, +or broad fallback grammar heuristics. It should mostly define the contract, +safe node access helpers, and common descriptor structs. + +## Ruby Adapter Responsibilities + +The Ruby adapter owns Ruby grammar and Ruby semantics: + +- `def`, singleton methods, inline `def`. +- `class`, `module`, singleton class. +- `yield`, `super`, `block_argument`. +- `&.` safe navigation. +- Ruby block and lambda syntax. +- Ruby `case`/`when`. +- Ruby `rescue`/`ensure`. +- Ruby local variable discovery and bare-call resolution. +- `VCALL`, `FCALL`, `DVAR`, `DASGN`, and `SCOPE` if needed for compatibility. +- Ruby symbols, globals, instance variables, class variables. +- Ruby hash key shorthand. +- Ruby `=~` behavior. +- Implicit nil and tail-return elision. +- Visibility calls such as `private`, `protected`, `public`, + `module_function`, and `private_class_method`. + +None of these should live in shared normalizer code. + +## Ruby Compatibility Boundary + +Existing Ruby detectors currently depend on Ruby AST-like node names such as: + +- `DEFN`, `DEFS`, `SCOPE` +- `CALL`, `QCALL`, `FCALL`, `VCALL`, `OPCALL` +- `LASGN`, `IASGN`, `DASGN` +- `LVAR`, `DVAR`, `IVAR`, `GVAR` +- `IF`, `UNLESS`, `CASE`, `WHEN` +- `RETURN`, `BREAK`, `NEXT` + +Those names can remain temporarily, but only behind a Ruby compatibility +adapter: + +```text +Tree-sitter Ruby nodes + -> Ruby adapter descriptors + -> semantic nodes + -> Ruby compatibility nodes for legacy detectors +``` + +New or migrated cross-language detectors should consume semantic nodes/facts: + +```text +Tree-sitter language nodes + -> language adapter descriptors + -> semantic nodes + -> detector facts +``` + +This separation is what makes the system truly cross-language. + +## Implementation Phases + +### Phase 1: Split Non-Language Infrastructure + +Create these files without changing behavior: + +- `ast/node.rb` +- `ast/span.rb` +- `ast/source_map.rb` +- `ast/adapters/base.rb` + +Move only mechanical infrastructure: + +- `Node` +- `node?` +- `slice` +- source span construction +- parent/child safe access helpers +- normalized cache helpers + +Acceptance criteria: + +- Ruby tests still pass. +- `ast.rb` becomes a facade for existing behavior. +- No semantic changes yet. + +### Phase 2: Extract Ruby Adapter + +Move Ruby-specific syntax and semantics out of `TreeSitterNormalizer` into +`ast/adapters/ruby.rb`. + +Initial Ruby adapter methods should cover: + +- functions and singleton functions +- owners +- calls and safe calls +- assignments +- identifiers and locals +- blocks/lambdas +- branch/case/loop +- rescue/ensure +- literals +- parameters + +Acceptance criteria: + +- No `ruby?` branch remains in shared normalizer. +- Ruby-specific token checks are in `RubyAdapter`. +- Ruby tests pass. +- Existing Ruby detector output is unchanged. + +### Phase 3: Introduce Semantic Nodes + +Add `ast/semantic_node.rb` and `ast/semantic_normalizer.rb`. + +The semantic normalizer should: + +- walk Tree-sitter nodes, +- ask the adapter for descriptors, +- emit semantic nodes, +- preserve spans and text, +- avoid language-specific grammar strings. + +Acceptance criteria: + +- Ruby semantic fixtures pass. +- Ruby compatibility output can be generated from semantic nodes. +- Shared semantic code contains no Ruby-specific behavior. + +### Phase 4: Move Legacy Ruby AST Output Behind Compatibility + +Create `ast/ruby_compat.rb`. + +This layer converts Ruby semantic nodes to the legacy Ruby AST-like nodes +needed by existing detectors. + +Acceptance criteria: + +- `Ast.parse(file)` still returns the legacy shape for Ruby until detectors + migrate. +- Internally, Ruby Tree-sitter nodes no longer flow through a shared + Ruby-shaped normalizer. +- All current Ruby detector tests pass. + +### Phase 5: Add Detector-Facing Semantic API + +Add a new API alongside `Ast.parse`: + +```ruby +Ast.parse_semantic(file, language: nil) +``` + +or equivalent through `Syntax.parse`. + +Acceptance criteria: + +- Cross-language detectors can use semantic facts without Ruby compatibility + nodes. +- At least one detector is ported to the semantic API as proof. +- Semantic facts include source spans and file/method context. + +### Phase 6: Extract Existing Non-Ruby Adapters + +Move the current Python, Lua, and TypeScript logic into adapter files. During +this phase, do not try to make every detector perfect for every language. +Focus on correct adapter ownership. + +Acceptance criteria: + +- Python/Lua/TypeScript grammar quirks are not in shared normalizer code. +- Unsupported features are explicit capability gaps. +- Existing non-Ruby smoke tests either pass or fail with intentional, + documented unsupported-feature assertions. + +### Phase 7: Rust Mirror + +After Ruby is correct, mirror the structure in Rust: + +- split `ast.rs`, +- replace `TreeSitterNormalizationAdapter` enum with an adapter trait, +- move language logic to `ast/adapters/*.rs`, +- keep Rust behavior matched to Ruby fixtures. + +Acceptance criteria: + +- Rust remains behaviorally equivalent for Ruby. +- Rust test files are separate from implementation files. +- Rust adapter files follow the same contract as Ruby. + +## Detector Migration Strategy + +Detectors fall into three categories. + +### Category A: Can Move to Semantic Facts Early + +These mostly need functions, branches, calls, assignments, and spans: + +- weighted inlined cognitive complexity +- structural topology +- local flow +- temporal ordering pressure +- state branch density +- sequence mining +- path condition +- oversized predicate + +### Category B: Needs Ruby Compatibility During Migration + +These depend on Ruby-specific node names or Ruby semantics: + +- predicate alias +- semantic alias +- redundant nil guard +- false simplicity +- ordered protocol mining +- derived state +- decision pressure +- state mesh +- fat union + +### Category C: Should Stay Ruby-Specific Unless Redesigned + +Any detector relying on Ruby-only language semantics should explicitly declare +Ruby-only support until it is redesigned. + +Examples: + +- Ruby visibility wrappers. +- Ruby metaprogramming shapes. +- Ruby `nil?` and safe-navigation-specific analyses. +- Ruby local-vs-call semantics. + +## Salvage Plan for `ast.rb` + +Expected salvage from the current 4,023 lines: + +| Portion | Approximate fate | +|---|---| +| `Node`, cache, `slice`, `node?` | Keep, move to small files | +| Source span helpers | Keep, move to `source_map.rb` | +| `flatten_and`, `def_push`, `body_stmts`, `canon_polarity` | Keep temporarily, then migrate to semantic helpers | +| Ruby scope/local/vcall logic | Keep only in Ruby adapter or Ruby compatibility | +| Ruby inline def/tail return/implicit nil | Keep only in Ruby compatibility | +| Python/Lua/TypeScript shape helpers | Move to adapter files, then rewrite where token mining is unsafe | +| Giant `normalize_node` dispatch | Delete/rewrite | +| Global grammar kind tables | Delete/move into adapters | +| `ruby?` predicate model | Delete | +| Generic fallback adapter | Delete | +| Broad `rescue StandardError` shape checks | Replace with explicit nil-safe helpers | + +Realistically: + +- 10-15% is directly reusable cross-language infrastructure. +- 25-35% is salvageable Ruby compatibility behavior. +- 15-20% is reusable as adapter seeds. +- 50% or more should be deleted or rewritten. + +## Testing Requirements + +### Ruby Must Stay Byte-for-Byte Compatible Where Legacy Requires It + +Before changing behavior, capture current Ruby detector output fixtures for: + +- report sections, +- state branch density, +- structural topology, +- weighted inlined cognitive complexity, +- redundant nil guard, +- false simplicity, +- local flow, +- temporal ordering pressure. + +Ruby compatibility output should remain unchanged until a detector is +explicitly migrated. + +### Semantic Fixtures + +Add language-independent semantic fixtures for: + +- function with parameters, +- method/member call, +- receiverless call, +- assignment, +- branch, +- loop, +- case/match/switch, +- boolean and comparison operations, +- return/break/continue, +- exception/finally, +- lambda/block, +- subscript, +- literal families. + +Each fixture should assert semantic facts, not Ruby AST node names. + +### Adapter Ownership Tests + +Add tests that fail if shared normalizer code learns language-specific tokens. +Examples: + +- no `ruby?` in shared normalizer, +- no `"def"`/`"function"` keyword checks in shared normalizer, +- no `&.`/`?.` checks in shared normalizer, +- no language assignment-operator tables in shared normalizer, +- no silent default adapter for supported languages. + +## Completion Criteria + +The Ruby implementation is complete only when all of these are true: + +- `ast.rb` is a small facade, not a monolith. +- Ruby-specific grammar and semantic behavior live in `adapters/ruby.rb` or + `ruby_compat.rb`. +- Shared normalizer code has no `ruby?` branches. +- Shared normalizer code does not inspect Ruby keyword/operator tokens. +- There is an explicit semantic model for detector-facing cross-language + support. +- `Ast.parse` is compatibility-only; production detectors do not call it. +- `Ast.parse_semantic` is compatibility-only; production detectors consume + `Syntax` facts directly. +- Ruby production detectors consume semantic facts instead of Ruby AST node + names. +- Unsupported language features are represented as explicit capability gaps. +- Ruby tests pass. +- Relevant cross-language semantic fixtures pass. +- Rust has not diverged; it is either unchanged pending mirror work or updated + minimally to match the Ruby architecture. + +Do not report the Ruby implementation as finished before these criteria are +satisfied. + +## Reporting Protocol + +During implementation, report status by phase: + +- completed files, +- behavior preserved, +- tests run, +- remaining architectural blockers. + +Only report "Ruby implementation complete" when the completion criteria above +are satisfied. Until then, report partial progress as partial progress. diff --git a/gems/decomplex/docs/agents/scaling.md b/gems/decomplex/docs/agents/scaling.md new file mode 100644 index 000000000..c6cdc38f5 --- /dev/null +++ b/gems/decomplex/docs/agents/scaling.md @@ -0,0 +1,63 @@ +# Decomplex Native Scaling Notes + +## Current Strategy + +The Rust port parallelizes at the `Document` boundary: + +```text +scan_files -> syntax::parse_files -> scan_documents +``` + +`syntax::parse_files` parses and normalizes files in parallel, while detectors still consume a deterministic `Vec` in input order. This is intentional. It keeps the Rust code close to the Ruby architecture so detectors and language normalizers can be ported file-for-file instead of redesigned around detector-specific map/reduce pipelines. + +Parallelism is controlled with: + +- `--jobs=N` on `decomplex detector ... --engine=rust` +- `--jobs=N` on the native `decomplex-rust` command +- `DECOMPLEX_RUST_JOBS` +- `DECOMPLEX_JOBS` + +## Measured Scaling + +Measured on `src/` with 162 Ruby files, using the release native binary. + +| Detector | Jobs | Elapsed | Speedup | Efficiency | +|---|---:|---:|---:|---:| +| `co-update` | 1 | 2.125s | 1.00x | 100.0% | +| `co-update` | 2 | 1.217s | 1.75x | 87.3% | +| `co-update` | 4 | 0.732s | 2.90x | 72.6% | +| `co-update` | 8 | 0.491s | 4.33x | 54.1% | +| `co-update` | 16 | 0.424s | 5.01x | 31.3% | +| `co-update` | 32 | 0.446s | 4.77x | 14.9% | +| `predicate-alias` | 1 | 2.097s | 1.00x | 100.0% | +| `predicate-alias` | 2 | 1.220s | 1.72x | 86.0% | +| `predicate-alias` | 4 | 0.716s | 2.93x | 73.2% | +| `predicate-alias` | 8 | 0.486s | 4.32x | 53.9% | +| `predicate-alias` | 16 | 0.383s | 5.47x | 34.2% | +| `predicate-alias` | 32 | 0.462s | 4.54x | 14.2% | +| `structural-similarity` | 1 | 4.265s | 1.00x | 100.0% | +| `structural-similarity` | 2 | 3.480s | 1.23x | 61.3% | +| `structural-similarity` | 4 | 3.010s | 1.42x | 35.4% | +| `structural-similarity` | 8 | 2.756s | 1.55x | 19.3% | +| `structural-similarity` | 16 | 2.740s | 1.56x | 9.7% | +| `structural-similarity` | 32 | 2.761s | 1.54x | 4.8% | + +## Interpretation + +The current implementation does not scale well to 32 jobs on this workload. + +`co-update` and `predicate-alias` are parse-heavy enough to benefit substantially from parallel document construction, peaking around 16 jobs. `structural-similarity` has more serial detector aggregation after parsing, so it barely improves beyond 4-8 jobs. + +For now, the best practical default is `--jobs=8` or `--jobs=16`, not `--jobs=32`. + +## Why Not Deeper Parallelism Yet? + +The immediate goal is a sustainable Ruby-to-Rust migration: + +1. Port Ruby `Syntax`/`Document` shape to Rust. +2. Port each detector as a direct `scan_documents` translation. +3. Port each language normalizer into the shared `Document` abstraction. + +Detector-specific map/reduce aggregation could improve some metrics later, but it would also force architectural drift while the port is still incomplete. The current boundary gives useful speedups without making future detector and language migrations harder. + +Once all detectors and language normalizers are ported, deeper parallel aggregation can be added selectively where profiling shows a decisive win. diff --git a/gems/decomplex/docs/agents/superfluous-state.md b/gems/decomplex/docs/agents/superfluous-state.md new file mode 100644 index 000000000..671479e75 --- /dev/null +++ b/gems/decomplex/docs/agents/superfluous-state.md @@ -0,0 +1,361 @@ +# Superfluous State (Tier 1) -- state that can be eliminated entirely + +## Why this exists + +StateMesh answers "where is state and how messy is it?" TemporalOrderingPressure +answers "does this owner expose an implicit state machine?" Superfluous State +answers the natural next question: **"could this field simply be removed?"** + +Most codebases accumulate fields that are not really state at all. They are +transit data that happens to be stored in an ivar because the developer +needed to pass a value between two methods and an ivar was the path of least +resistance. The field looks like state -- it lives on the object, it persists +between calls -- but it's actually a local variable that escaped its method +body. + +This detector finds those fields and ranks them by eliminability. + +## What it detects + +### Pattern 1: Intra-method pass-through (eliminable with near-certainty) + +A field that is **written and read within the same method body**. The value +never escapes the stack frame. The ivar is purely a local variable that was +promoted for no reason. + +```ruby +def checkout(user, cart) + @total = cart.items.sum(&:price) # <-- written + charge(user, @total) # <-- read + @total # <-- read again +end +``` + +`@total` is written once, read twice, all inside `checkout`. No other method +ever touches it. It should be a local variable `total`. Detection requires +zero opinion -- the writer span and reader spans are all within the same +DEFN boundary. + +### Pattern 2: Adjacent-call pass-through (eliminable with high confidence) + +A field with **exactly one writer method and exactly one reader method**, +where every observed call site places the writer immediately before the +reader. + +```ruby +class BillingService + def set_user(user) + @user = user # <-- only writer + end + + def validate + return unless @user # <-- only reader + end +end + +# Every observed call site: +# service.set_user(u) +# service.validate +``` + +`@user` is transit data: `set_user` produces it, `validate` consumes it. It +can be eliminated by converting `set_user` to return the value and `validate` +to accept it as a parameter: `user = acquire_user(...); validate(user)`. + +False positives can occur when the writer genuinely mutates object state that +other methods depend on. This is guarded by the "exactly one reader" and +"adjacent calls" constraints. If `@user` is read in three other methods, or +if calls are not consistently adjacent, the score drops below the report +threshold. + +### Pattern 3: Derived cache (eliminable with medium confidence, user-gated) + +A field that is computed from other fields and never independently mutated. +Its value is always derivable from the source fields. + +```ruby +def initialize(cart) + @cart = cart + @total = @cart.total # <-- derived, never written elsewhere +end +``` + +`@total` is a cache of `@cart.total`. It can be eliminated by recomputing on +read. The tradeoff depends on recomputation cost: `@cart.total` with a +10,000-item collection is different from `@user.name`. This detector flags +derived caches and leaves the recomputation decision to the human. + +## Score formula + +For each field, compute: + +``` +eliminability_score = + (1.0 / max(1, reader_method_count)) × # fewer reader methods = easier to eliminate + (1.0 / max(1, writer_method_count)) × # fewer writer methods = fewer refactor sites + intra_method_bonus × # × 10 if all reads and writes are in the same method + adjacent_call_bonus × # × 5 if writer-reader is adjacent at every callsite + (1.0 - rederivation_penalty) # penalize if this field gates other re-derivations +``` + +### Terms + +| Term | Range | Definition | +|---|---|---| +| `reader_method_count` | ≥ 1 | Number of distinct (file, defn) pairs that **read** this field | +| `writer_method_count` | ≥ 1 | Number of distinct (file, defn) pairs that **write** this field | +| `intra_method_bonus` | 1.0 or 10.0 | 10.0 if all reads AND writes are in the same method body; 1.0 otherwise | +| `adjacent_call_bonus` | 1.0 or 5.0 | 5.0 if writer_method_count == 1 AND reader_method_count == 1 AND every callsite sequence is writer-then-reader adjacent; 1.0 otherwise | +| `rederivation_penalty` | 0.0 -- 1.0 | Fraction of re-derivation sites that depend on this field. If this field is an input to N re-derivations out of total T tracked re-derivations, penalty = N / T (capped at 1.0). Gives a weight penalty for "this field's value is used to derive other computed state." | + +### Thresholds + +| Score range | Classification | Action | +|---|---|---| +| > 0.5 | Almost certainly eliminable | Remove the field; convert to local variable or parameter | +| 0.1 -- 0.5 | Probably eliminable with moderate refactor | Adjust call signatures, inline the write | +| < 0.1 | Genuinely stateful or gating complex re-derivations | Do not report (below noise floor) | + +**Only scores > 0.1 are reported.** This avoids surfacing fields that are +legitimate persistent state. + +## Relationship to other metrics + +| Metric | Question | Superfluous State adds | +|---|---|---| +| StateMesh | "Where is state and how messy?" | "Which fields don't need to exist at all?" | +| TemporalOrderingPressure | "Does this owner expose an implicit state machine?" | "Can we eliminate the fields that create the machine?" | +| DecisionPressure | "Which contracts drive defensive code?" | "Can removing a field reduce contracts that need defending?" | + +StateMesh and TemporalOrderingPressure show the *problem*. Superfluous State +shows the *fix*. + +## Implementation + +### Input facts + +All required facts already exist in Decomplex. Superfluous State is a +**post-analyzer** -- it reads StateMesh and ImplicitControlFlow output, +scores each field, and emits a ranked list. No new AST walks. + +| Fact | Source | Needed for | +|---|---|---| +| Field read sites (per file, defn, line) | `StateMesh#reads` | `reader_method_count` | +| Field write sites (per file, defn, line) | `StateMesh#writes` | `writer_method_count` | +| Method boundaries (DEFN/DEFS spans) | `StateMesh` AST root | `intra_method_bonus` | +| Re-derivation chains | `StateMesh#re_derivations` | `rederivation_penalty` | +| Call adjacency per field pair | `ImplicitControlFlow` sequences | `adjacent_call_bonus` | +| Field names (normalized) | `StateMesh` known fields | Identity | + +### Phases + +**Phase 1: Group reads and writes by field.** + +For each normalized field name in StateMesh: +- Collect all `Write` sites into `writers = Map<(file, defn) → [Write]>` +- Collect all `Read` sites into `readers = Map<(file, defn) → [Read]>` +- Compute `writer_method_count = writers.keys.uniq.size` +- Compute `reader_method_count = readers.keys.uniq.size` + +**Phase 2: Detect intra-method pass-through.** + +A field is intra-method if `writer_method_count == 1` AND +`reader_method_count == 1` AND the single writer and single reader +(file, defn, line) spans are both within the same DEFN/DEFS body. + +Implementation: StateMesh already tracks file and defn per site. +Compare the `defn` field of the writer and reader. If they match and +the total read count within that defn >= 1, the field is intra-method +pass-through. + +**Phase 3: Detect adjacent-call pass-through.** + +A field is adjacent-call pass-through if: +- `writer_method_count == 1` AND `reader_method_count == 1` +- NOT intra-method (distinct methods) +- For every `ImplicitControlFlow::MethodSequence` that contains the + writer method: the reader method immediately follows it in the + observed call order. + +Adjacency is directional: `set_user → validate` is adjacent; `validate +→ set_user` is not. If at least one callsite reverses the order +(reader before writer), the field does NOT qualify for the +adjacent-call bonus. + +If no callsites are found for the pair (the writer is tested alone or +called from unknown sites), the bonus is NOT applied -- adjacency +cannot be proven. This is the conservative default. + +**Phase 4: Compute re-derivation penalty.** + +A field gates re-derivations if it appears as an input in StateMesh +re-derivation chains. For each re-derivation: +- If `re_derivation.field == this_field`, count it. +- `rederivation_penalty = this_field_rederivations / max(1, total_rederivations)` + +This prevents flagging a field like `@storage` that feeds 12 other +derived fields as "eliminable." + +**Phase 5: Score and rank.** + +Apply the formula above for each known field. Sort descending by score. +Emit only fields with score > 0.1. + +### Output schema + +```ruby +{ + field: "@total", # ivar name + normalized: "total", # without @ prefix + score: 0.92, # eliminability score + classification: "intra_method", # "intra_method" | "adjacent_call" | "derived_cache" + writer_method_count: 1, + reader_method_count: 1, + write_sites: [ # all write locations + { file: "app/services/billing.rb", defn: "checkout", line: 4 } + ], + read_sites: [ # all read locations + { file: "app/services/billing.rb", defn: "checkout", line: 5 }, + { file: "app/services/billing.rb", defn: "checkout", line: 6 } + ], + rederivations_gated: 0, # how many re-derivations depend on this field + adjacent_callsites: nil, # for adjacent_call patterns: [caller, callee, file, line] + recommendation: "Replace @total with a local variable in checkout." +} +``` + +### Test fixtures + +**Fixture A: Intra-method pass-through** + +```ruby +class Example + def checkout(cart) + @total = cart.total # write + format(@total) # read + end +end +``` + +Expected: `@total` score > 0.5, classification `intra_method`. + +**Fixture B: Adjacent-call pass-through** + +```ruby +class Billing + def set_user(user) + @user = user + end + + def validate + return unless @user + charge(@user) + end + + def process + set_user(find_user) + validate + end +end +``` + +Expected: `@user` score > 0.5, classification `adjacent_call`, +`adjacent_callsites` includes `process` line. + +**Fixture C: Adjacent-call with reversed order (NOT eliminable)** + +```ruby +class Billing + def set_user(user) + @user = user + end + + def validate + return unless @user + end + + def process + validate # reader BEFORE writer -- order is reversed + set_user(find_user) + end +end +``` + +Expected: `@user` score < 0.1 (no adjacent-call bonus, reader not +adjacent after writer), **NOT reported**. + +**Fixture D: Derived cache** + +```ruby +class Cart + def initialize(items) + @items = items + @total = items.sum(&:price) # derived from @items + end +end +``` + +Expected: `@total` score 0.1--0.5, classification `derived_cache`. + +**Fixture E: Genuine state (NOT reported)** + +```ruby +class Cart + def initialize + @items = [] + end + + def add(item) + @items << item # multiple writers + @total = @items.sum(&:price) + end + + def remove(item) + @items.delete(item) # multiple writers + @total = @items.sum(&:price) + end + + def empty? + @items.empty? # multiple readers + end + + def total + @total # multiple readers + end +end +``` + +Expected: `@items` and `@total` both score < 0.1 (`writer_method_count` and +`reader_method_count` are both > 1), **NOT reported**. + +## Language portability + +This metric is fully language-agnostic. Every language has fields/properties/ +members/ivars with write sites and read sites. The facts are: + +- **Field identity**: name within owner +- **Writer locations**: (file, function, line) +- **Reader locations**: (file, function, line) +- **Call adjacency**: (caller, callee, file, line) +- **Re-derivation dependency**: field X is an input to derived field Y + +None of these are Ruby-specific. The TreeSitter fact extraction layer +(`syntax/.rs`) needs to emit `StateWrite`, `StateRead`, and +`CallSite` facts per language. Superfluous State, like StateMesh and +TemporalOrderingPressure, consumes those language-agnostic facts. + +## Non-goals + +- **Do not** recommend refactoring actions. The detector says "this field + is probably eliminable." The human decides whether to inline it, convert + it to a return value, or keep it. +- **Do not** compute recomputation cost for derived caches. Flag them, + let the user decide. +- **Do not** attempt cross-file call adjacency. Adjacent-call detection + is intra-file only (same as ImplicitControlFlow's current scope). +- **Do not** analyze initialization-only fields differently. A field + written in `initialize` and read once can be pass-through if the read + is in a single-adjacent-call method. But `initialize` → caller is + implicit; the adjacent-call bonus does not apply across an + initialization boundary unless the caller method directly follows + construction in observed sequences. diff --git a/gems/decomplex/docs/agents/syntax-adapter-decomposition-design.md b/gems/decomplex/docs/agents/syntax-adapter-decomposition-design.md new file mode 100644 index 000000000..4b30104d8 --- /dev/null +++ b/gems/decomplex/docs/agents/syntax-adapter-decomposition-design.md @@ -0,0 +1,96 @@ +# Syntax Adapter Decomposition Design + +## Goal + +`Decomplex::Syntax` should be a cross-language fact model and Tree-sitter facade. It should not know Ruby, PHP, Java, Rust, Zig, or any other concrete grammar beyond the registry that maps a language key to an adapter. + +Language adapters own: + +- parser package metadata and extensions +- lexicon regexes +- concrete Tree-sitter node kind names +- grammar-specific hidden constructs +- source-text conventions that cannot be represented generically + +The base `TreeSitterLanguageAdapter` owns: + +- traversal +- shared fact emission +- generic algorithms over adapter-provided grammar shapes +- empty defaults for optional language-specific fact providers + +## Current Issues + +`syntax.rb` still contains a large union of concrete grammar node names. That makes new language support look easy until a language differs, then the generic code grows another special case. The result is brittle cross-language support because unrelated languages inherit grammar assumptions they do not share. + +The main areas are: + +- function and owner detection +- parameter, body, and local-flow discovery +- assignment and declaration recognition +- branch, case, loop, and hidden branch detection +- call, member-access, and state-target discovery + +## Target Shape + +Each adapter exposes declarative grammar-shape methods. The base adapter uses those methods instead of hard-coded language unions: + +- `function_node_kinds` +- `method_node_kinds` +- `owner_node_kinds` +- `loop_node_kinds` +- `if_node_kinds` +- `case_node_kinds` +- `hidden_if_wrapper_kinds` +- `hidden_case_wrapper_kinds` +- `case_arm_node_kinds` +- `function_body_node_kinds` +- `parameter_list_node_kinds` +- `assignment_node_kinds` +- `declaration_node_kinds` +- `field_declaration_node_kinds` +- `identifier_node_kinds` +- `field_like_node_kinds` +- `call_node_kinds` +- `adjacent_call_node_kinds` +- `argument_list_node_kinds` +- `comment_prefixes` + +Adapters override only the shapes they need. When a language needs real logic instead of vocabulary, it overrides the semantic method directly, such as `function_name`, `call_target`, `state_target`, or `case_arm_patterns`. + +## Migration Plan + +1. Move source-text language quirks out of `syntax.rb`. + - Generic defaults return empty facts. + - Ruby owns Sorbet `T::Struct`, `const`, and `T.type_alias` parsing. + +2. Move lexicons beside adapters. + - `LanguageLexicon` remains shared. + - Concrete `*_LEXICON` constants live in the files that define their adapters. + +3. Introduce grammar-shape methods in the base adapter. + - Start with one area at a time. + - Replace each concrete node-kind union with an adapter method call. + - Keep behavior stable while moving the data boundary. + +4. Push concrete node kinds down into adapters. + - The base adapter may retain only truly generic names if they are part of a documented normalized adapter contract. + - Otherwise, adapters provide the language-specific kind sets. + +5. Add architecture invariants. + - `syntax.rb` must not define language lexicons. + - `syntax.rb` must not contain Sorbet or Ruby source-text patterns. + - Detectors must not call Tree-sitter APIs directly. + - New concrete grammar kind lists in `syntax.rb` should fail review unless backed by a documented generic adapter contract. + +## Verification + +For each migration step: + +- run the examples oracle tests +- run the full Decomplex Ruby test suite +- run architecture invariant tests +- run `decomplex report` on `gems/decomplex/lib/decomplex` before and after +- compare whether reported issues are stable or whether differences reflect reduced self-findings in `syntax.rb` + +The expected direction is a smaller `syntax.rb`, fewer language names and source-level quirks in shared code, and no loss of detector oracle specificity. diff --git a/gems/decomplex/docs/agents/tree-sitter-migration.md b/gems/decomplex/docs/agents/tree-sitter-migration.md index 70c0f37c9..a3a31e8a1 100644 --- a/gems/decomplex/docs/agents/tree-sitter-migration.md +++ b/gems/decomplex/docs/agents/tree-sitter-migration.md @@ -200,6 +200,36 @@ Example capability flags: Detectors should skip unsupported sub-signals rather than infer them from unrelated syntax. +## Native Rust Port Contract + +The Rust implementation is a performance port, not a new Decomplex. +It must stay structurally symmetric with the Ruby implementation so the +remaining detectors and languages can be migrated mechanically. + +Rules: + +- Port Ruby files file-for-file and function-for-function unless a + later optimization is proven after parity. +- Keep the normalized AST API aligned with `lib/decomplex/ast.rb`: + `parse`, `node`, `slice`, `body_stmts`, `def_push`, + `canon_polarity`, `flatten_and`, and the `Node` vocabulary. +- Keep language adapters responsible for syntax normalization, not + detector decisions. Detectors should consume the same normalized AST + or the same syntax facts their Ruby counterpart consumes. +- Do not hide AST drift by sorting, filtering, or reshaping detector + results. Fix the normalizer or the detector port so the canonical + JSON matches Ruby output. +- Every native detector needs an engine-parity test and a real `src/` + parity smoke before it is treated as migrated. + +Current split: + +- `DecisionPressure`, `PredicateAlias`, and `SemanticAlias` are + AST-backed ports and compare byte-for-byte with Ruby on `src/`. +- `CoUpdate` and `Miner` consume syntax facts because their Ruby + counterparts consume `Syntax.parse` / `SiteExtractor` facts. +- `FlaySimilarity` consumes `Syntax.parse` in both Ruby and Rust. + ## Preserving Output Ruby migration must be gated by exact-output tests before Tree-sitter @@ -490,3 +520,57 @@ That slice protects the current consumers, proves the output discipline on the highest-value detectors, and creates the extension point needed for Python/JavaScript/TypeScript/Go/Rust/Zig profiles without forcing a full rewrite. + +## Native Rust Detector Migration + +Status: in progress. The native Rust port must stay a structural mirror +of the Ruby implementation: shared syntax/AST facts first, detector +reducers second. Do not add detector-specific Tree-sitter walkers. + +Migration order follows the Decomplex Metrics Expo tiers. Tier 1 +detectors move first because they carry the highest signal and should +benefit earliest from native speed. + +Benchmarks below use `src/` on this repository through: + +``` +ruby gems/decomplex/exe/decomplex detector DETECTOR --engine=ruby --json src/ +ruby gems/decomplex/exe/decomplex detector DETECTOR --engine=rust --json --jobs=8 src/ +``` + +The JSON outputs are canonical detector-only payloads and are byte-for- +byte compared before recording a detector as migrated. + +| Tier | Detector / section | Native status | Ruby | Rust | Speedup | Notes | +|---|---|---:|---:|---:|---:|---| +| 1 | Missing Abstractions | migrated | 13.02s | 0.64s | 20.3x | Implemented by `miner`; consumes shared `DecisionSite` facts, matching Ruby `SiteExtractor`. | +| 1 | Semantic Predicate Aliases | migrated | 86.41s | 2.60s | 33.2x | AST-backed file/function port of `SemanticAlias`. | +| 1 | Reification Misses | migrated | 86.41s | 2.60s | 33.2x | Same AST-backed native pass as semantic aliases. | +| 1 | Exact Predicate Aliases | migrated | 85.50s | 2.58s | 33.1x | AST-backed file/function port of `PredicateAlias`. | +| 1 | Decision Pressure | migrated | 84.45s | 2.77s | 30.5x | AST-backed file/function port of `DecisionPressure`. | +| 1 | Redundant Nil Guards | pending | - | - | - | Needs local dominance/null-check normalized AST facts. | +| 1 | State Heatmap | pending | - | - | - | Needs shared `StateRead`, `StateWrite`, and semantic re-derivation facts. | +| 1 | State-Based Branch Density | pending | - | - | - | Needs branch decision facts with state refs. | +| 1 | Temporal Ordering Pressure | pending | - | - | - | Needs owner/method visibility plus state read/write facts. | +| 2 | Structural Similarity (Type-2/3) | migrated | 85.34s | 2.88s | 29.6x | File/function port of structural fingerprinting over shared `RawNode`. | +| 2 | Neglected Updates | migrated | 43.90s | 0.62s | 70.8x | Same native pass as co-update. | +| 2 | Neglected Conditions | migrated | 13.02s | 0.64s | 20.3x | Implemented by `miner`; consumes shared `DecisionSite` facts, matching Ruby `SiteExtractor`. | +| 2 | Derived-State Staleness | pending | - | - | - | Needs local write/read/dependency facts or Rust normalized AST. | +| 2 | Inconsistent Rename Clones | pending | - | - | - | Can likely share structural clone tokenization with Rust AST facade. | +| 2 | Implicit Control Flow | pending | - | - | - | Needs topology/path protocol and state effect facts. | +| 2 | Weighted Inlined Cognitive Complexity | pending | - | - | - | Needs topology plus local cognitive scorer. | +| 2 | Locality Drag | pending | - | - | - | Needs local flow summaries and boundaries. | +| 2/3 | Operational Discontinuity | pending | - | - | - | Needs local flow summaries and boundaries. | +| 3 | Neglected Path Conditions | pending | - | - | - | Needs path-condition facts over normalized branch syntax. | +| 3 | Oversized Predicates | pending | - | - | - | Needs normalized boolean atom counting. | +| 3 | Broken Protocols | pending | - | - | - | Needs call-sequence mining facts. | +| 3 | Function LCOM | pending | - | - | - | Needs local flow summaries. | +| 3 | False Simplicity | pending | - | - | - | Needs language lexicons plus call/mutation/reopen facts. | +| 3 | Fat Unions | pending | - | - | - | Needs class/variant dispatch and member-use facts. | + +Earlier single-thread / pre-architecture-correction timings recorded before the +AST-backed alias and decision-pressure ports: + +- co-update: Ruby 43.205838s, Rust 2.144622s, 20.1x. +- predicate-alias: Ruby 81.583126s, Rust 2.136387s, 38.2x. +- structural-similarity: Ruby 85.163481s, Rust 4.331976s, 19.7x. diff --git a/gems/decomplex/examples/c/co-update.c b/gems/decomplex/examples/c/co-update.c new file mode 100644 index 000000000..e498d0bfa --- /dev/null +++ b/gems/decomplex/examples/c/co-update.c @@ -0,0 +1,4 @@ +void stable_one(Node node) { node.storage = 1; node.provenance = 1; } +void stable_two(Node node) { node.storage = 1; node.provenance = 1; } +void stable_three(Node node) { node.storage = 1; node.provenance = 1; } +void misses_provenance(Node node) { node.storage = 1; } diff --git a/gems/decomplex/examples/c/decision-pressure.c b/gems/decomplex/examples/c/decision-pressure.c new file mode 100644 index 000000000..df529b6ce --- /dev/null +++ b/gems/decomplex/examples/c/decision-pressure.c @@ -0,0 +1 @@ +bool scan(Node node) { Value value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/c/derived-state.c b/gems/decomplex/examples/c/derived-state.c new file mode 100644 index 000000000..8c51ff74a --- /dev/null +++ b/gems/decomplex/examples/c/derived-state.c @@ -0,0 +1 @@ +void check(int input) { int cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/c/false-simplicity.c b/gems/decomplex/examples/c/false-simplicity.c new file mode 100644 index 000000000..e7a038a09 --- /dev/null +++ b/gems/decomplex/examples/c/false-simplicity.c @@ -0,0 +1 @@ +void hack() { print("hidden IO"); } diff --git a/gems/decomplex/examples/c/fat-union.c b/gems/decomplex/examples/c/fat-union.c new file mode 100644 index 000000000..0e18ef013 --- /dev/null +++ b/gems/decomplex/examples/c/fat-union.c @@ -0,0 +1,7 @@ +void handle(Node *self) { + switch (self) { + case AST_Call: self->line(); self->col(); self->ty(); self->span(); self->parent(); self->recv(); break; + case AST_Func: self->line(); self->col(); self->ty(); self->span(); self->parent(); self->name(); break; + case AST_Lit: self->line(); self->col(); self->ty(); self->span(); self->parent(); self->value(); break; + } +} diff --git a/gems/decomplex/examples/c/flay-similarity.c b/gems/decomplex/examples/c/flay-similarity.c new file mode 100644 index 000000000..74d8ffd5c --- /dev/null +++ b/gems/decomplex/examples/c/flay-similarity.c @@ -0,0 +1,2 @@ +int first_clone(Node node) { let total = 0; int value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } int value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } int value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } int value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } int value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } int value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } int value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } int value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +int second_clone(Node entry) { let total = 0; int item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } int item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } int item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } int item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } int item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } int item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } int item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } int item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/c/function-lcom.c b/gems/decomplex/examples/c/function-lcom.c new file mode 100644 index 000000000..80f5b6338 --- /dev/null +++ b/gems/decomplex/examples/c/function-lcom.c @@ -0,0 +1,12 @@ +Result mixed(int price, int tax, Logger logger) { + int subtotal = price + tax; + int total = subtotal * 2; + int rounded = total.round(); + + int timestamp = now(); + Buffer buffer = Buffer_init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result_init(rounded, buffer); +} diff --git a/gems/decomplex/examples/c/implicit-control-flow.c b/gems/decomplex/examples/c/implicit-control-flow.c new file mode 100644 index 000000000..328266712 --- /dev/null +++ b/gems/decomplex/examples/c/implicit-control-flow.c @@ -0,0 +1,9 @@ +typedef struct FlowExample { int status; int valid; int done; } FlowExample; +void prepare(FlowExample *self) { self->status = 1; } +void validate(FlowExample *self) { self->valid = self->status == 1; } +void commit(FlowExample *self) { self->done = self->valid; } +void ok1(FlowExample *self) { prepare(self); validate(self); commit(self); } +void ok2(FlowExample *self) { prepare(self); validate(self); commit(self); } +void ok3(FlowExample *self) { prepare(self); validate(self); commit(self); } +void ok4(FlowExample *self) { prepare(self); validate(self); commit(self); } +void drift(FlowExample *self) { validate(self); prepare(self); commit(self); } diff --git a/gems/decomplex/examples/c/inconsistent-rename-clone.c b/gems/decomplex/examples/c/inconsistent-rename-clone.c new file mode 100644 index 000000000..581bfd232 --- /dev/null +++ b/gems/decomplex/examples/c/inconsistent-rename-clone.c @@ -0,0 +1,2 @@ +void original() { int src = fetch(1); check(src); store(src); finalize(src); } +void pasted() { int dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/c/local-flow.c b/gems/decomplex/examples/c/local-flow.c new file mode 100644 index 000000000..c03515f6e --- /dev/null +++ b/gems/decomplex/examples/c/local-flow.c @@ -0,0 +1,9 @@ +Result mixed(int price, int tax) { + int subtotal = price + tax; + int total = subtotal.round(); + + int timestamp = now(); + Buffer buffer = Buffer_init(); + buffer.push(timestamp); + return Result_init(total, buffer); +} diff --git a/gems/decomplex/examples/c/locality-drag.c b/gems/decomplex/examples/c/locality-drag.c new file mode 100644 index 000000000..d7a892e5a --- /dev/null +++ b/gems/decomplex/examples/c/locality-drag.c @@ -0,0 +1,27 @@ +void run(User user, Cart cart, Logger logger) { + int receipt_id = user.id; + + int total = cart.total; + if (total > 100) { + if (cart.discountable()) { + int discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + int tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + int status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/c/miner.c b/gems/decomplex/examples/c/miner.c new file mode 100644 index 000000000..a48eaf209 --- /dev/null +++ b/gems/decomplex/examples/c/miner.c @@ -0,0 +1,4 @@ +bool one(bool a, bool b, bool c) { return a && b && c; } +bool two(bool a, bool b, bool c) { return a && b && c; } +bool three(bool a, bool b, bool c) { return a && b && c; } +bool broken(bool a, bool b) { return a && b; } diff --git a/gems/decomplex/examples/c/operational-discontinuity.c b/gems/decomplex/examples/c/operational-discontinuity.c new file mode 100644 index 000000000..d011d0237 --- /dev/null +++ b/gems/decomplex/examples/c/operational-discontinuity.c @@ -0,0 +1,9 @@ +void phase_shift() { + int a = 1; + int b = 2; + + // Phase 2 + int x = 3; + int y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/c/oversized-predicate.c b/gems/decomplex/examples/c/oversized-predicate.c new file mode 100644 index 000000000..9c9197082 --- /dev/null +++ b/gems/decomplex/examples/c/oversized-predicate.c @@ -0,0 +1 @@ +void complex_check(bool a, bool b, bool c, bool d) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/c/path-condition.c b/gems/decomplex/examples/c/path-condition.c new file mode 100644 index 000000000..9fc2f74f2 --- /dev/null +++ b/gems/decomplex/examples/c/path-condition.c @@ -0,0 +1,4 @@ +void one(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void two(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void three(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void bug(X x, Y y, Z z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/c/predicate-alias.c b/gems/decomplex/examples/c/predicate-alias.c new file mode 100644 index 000000000..b812862c9 --- /dev/null +++ b/gems/decomplex/examples/c/predicate-alias.c @@ -0,0 +1,3 @@ +bool first() { return true; } +bool second() { return true; } +bool other() { return false; } diff --git a/gems/decomplex/examples/c/redundant-nil-guard.c b/gems/decomplex/examples/c/redundant-nil-guard.c new file mode 100644 index 000000000..6cab7e8de --- /dev/null +++ b/gems/decomplex/examples/c/redundant-nil-guard.c @@ -0,0 +1 @@ +void check(Value *value) { if (value->isSome()) { value->isNull(); } } diff --git a/gems/decomplex/examples/c/semantic-alias.c b/gems/decomplex/examples/c/semantic-alias.c new file mode 100644 index 000000000..60c1db51a --- /dev/null +++ b/gems/decomplex/examples/c/semantic-alias.c @@ -0,0 +1,4 @@ +bool frame(Node node) { return node.provenance == FRAME; } +bool is_frame(Node node) { return provenance == FRAME; } +bool heap(Node node) { return node.provenance == HEAP; } +int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/c/sequence-mine.c b/gems/decomplex/examples/c/sequence-mine.c new file mode 100644 index 000000000..e05be841f --- /dev/null +++ b/gems/decomplex/examples/c/sequence-mine.c @@ -0,0 +1,5 @@ +void one() { alloc_mark(x); body1(); cleanup(x); } +void two() { alloc_mark(y); body2(); cleanup(y); } +void three() { alloc_mark(z); body3(); cleanup(z); } +void four() { alloc_mark(w); body4(); cleanup(w); } +void leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/c/state-branch-density.c b/gems/decomplex/examples/c/state-branch-density.c new file mode 100644 index 000000000..4595b4dc8 --- /dev/null +++ b/gems/decomplex/examples/c/state-branch-density.c @@ -0,0 +1,2 @@ +typedef struct StateBranchChecker { int checked; } StateBranchChecker; +void check(StateBranchChecker *self, bool admin, const char *name) { if (admin) { self->checked = true; } if (self->checked && name == "admin") { print("hello"); } } diff --git a/gems/decomplex/examples/c/state-mesh.c b/gems/decomplex/examples/c/state-mesh.c new file mode 100644 index 000000000..506cae880 --- /dev/null +++ b/gems/decomplex/examples/c/state-mesh.c @@ -0,0 +1,5 @@ +typedef struct StateMeshExample { int a; int b; } StateMeshExample; +void initialize(StateMeshExample *self) { self->a = 1; self->b = 2; } +void writer(StateMeshExample *self) { self->a = 3; } +int reader(StateMeshExample *self) { return self->a + self->b; } +int a_alias(StateMeshExample *self) { return self->a; } diff --git a/gems/decomplex/examples/c/structural-topology.c b/gems/decomplex/examples/c/structural-topology.c new file mode 100644 index 000000000..af1c3ef29 --- /dev/null +++ b/gems/decomplex/examples/c/structural-topology.c @@ -0,0 +1,6 @@ +typedef struct Worker { int ready_flag; } Worker; +void run(Worker *self, Items items) { prepare(self); if (ready(self)) { validate(self); } for (int item = 0; item < items.count; item++) { helper(self, item); } } +void prepare(Worker *self) {} +bool ready(Worker *self) { return true; } +void validate(Worker *self) {} +void helper(Worker *self, Item item) { item.use(); } diff --git a/gems/decomplex/examples/c/temporal-ordering-pressure.c b/gems/decomplex/examples/c/temporal-ordering-pressure.c new file mode 100644 index 000000000..49e98ca09 --- /dev/null +++ b/gems/decomplex/examples/c/temporal-ordering-pressure.c @@ -0,0 +1,5 @@ +typedef struct TemporalOrderExample { int a; int b; } TemporalOrderExample; +void one(TemporalOrderExample *self) { self->a = 1; } +void two(TemporalOrderExample *self) { self->a = 2; self->b = 3; } +void three(TemporalOrderExample *self) { self->b = 4; } +int reader(TemporalOrderExample *self) { return self->a; } diff --git a/gems/decomplex/examples/c/weighted-inlined-complexity.c b/gems/decomplex/examples/c/weighted-inlined-complexity.c new file mode 100644 index 000000000..47be86e74 --- /dev/null +++ b/gems/decomplex/examples/c/weighted-inlined-complexity.c @@ -0,0 +1,5 @@ +void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +bool validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +void audit_cart(Cart cart) { for (int item = 0; item < cart.count; item++) { if (item_taxable(item)) { if (item_region(item) && item_amount(item) > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/cpp/co-update.cpp b/gems/decomplex/examples/cpp/co-update.cpp new file mode 100644 index 000000000..e498d0bfa --- /dev/null +++ b/gems/decomplex/examples/cpp/co-update.cpp @@ -0,0 +1,4 @@ +void stable_one(Node node) { node.storage = 1; node.provenance = 1; } +void stable_two(Node node) { node.storage = 1; node.provenance = 1; } +void stable_three(Node node) { node.storage = 1; node.provenance = 1; } +void misses_provenance(Node node) { node.storage = 1; } diff --git a/gems/decomplex/examples/cpp/decision-pressure.cpp b/gems/decomplex/examples/cpp/decision-pressure.cpp new file mode 100644 index 000000000..7b6370558 --- /dev/null +++ b/gems/decomplex/examples/cpp/decision-pressure.cpp @@ -0,0 +1 @@ +bool scan(Node node) { auto value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/cpp/derived-state.cpp b/gems/decomplex/examples/cpp/derived-state.cpp new file mode 100644 index 000000000..02357b90a --- /dev/null +++ b/gems/decomplex/examples/cpp/derived-state.cpp @@ -0,0 +1 @@ +void check(int input) { auto cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/cpp/false-simplicity.cpp b/gems/decomplex/examples/cpp/false-simplicity.cpp new file mode 100644 index 000000000..faa1ea232 --- /dev/null +++ b/gems/decomplex/examples/cpp/false-simplicity.cpp @@ -0,0 +1 @@ +class FalseSimplicityExample { void hack() { print("hidden IO"); } }; diff --git a/gems/decomplex/examples/cpp/fat-union.cpp b/gems/decomplex/examples/cpp/fat-union.cpp new file mode 100644 index 000000000..5cf051bde --- /dev/null +++ b/gems/decomplex/examples/cpp/fat-union.cpp @@ -0,0 +1 @@ +void handle(Node node) { switch (node) { case AST::Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST::Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST::Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } diff --git a/gems/decomplex/examples/cpp/flay-similarity.cpp b/gems/decomplex/examples/cpp/flay-similarity.cpp new file mode 100644 index 000000000..86f50f2fa --- /dev/null +++ b/gems/decomplex/examples/cpp/flay-similarity.cpp @@ -0,0 +1,2 @@ +int first_clone(Node node) { let total = 0; auto value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } auto value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } auto value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } auto value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } auto value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } auto value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } auto value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } auto value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +int second_clone(Node entry) { let total = 0; auto item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } auto item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } auto item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } auto item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } auto item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } auto item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } auto item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } auto item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/cpp/function-lcom.cpp b/gems/decomplex/examples/cpp/function-lcom.cpp new file mode 100644 index 000000000..aaa03424a --- /dev/null +++ b/gems/decomplex/examples/cpp/function-lcom.cpp @@ -0,0 +1,12 @@ +Result mixed(int price, int tax, Logger logger) { + auto subtotal = price + tax; + auto total = subtotal * 2; + auto rounded = total.round(); + + auto timestamp = now(); + auto buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/cpp/implicit-control-flow.cpp b/gems/decomplex/examples/cpp/implicit-control-flow.cpp new file mode 100644 index 000000000..cfd71512a --- /dev/null +++ b/gems/decomplex/examples/cpp/implicit-control-flow.cpp @@ -0,0 +1 @@ +class FlowExample { public: int status; bool valid; bool done; void prepare() { this->status = 1; } void validate() { this->valid = this->status == 1; } void commit() { this->done = this->valid; } void ok1() { prepare(); validate(); commit(); } void ok2() { prepare(); validate(); commit(); } void ok3() { prepare(); validate(); commit(); } void ok4() { prepare(); validate(); commit(); } void drift() { validate(); prepare(); commit(); } }; diff --git a/gems/decomplex/examples/cpp/inconsistent-rename-clone.cpp b/gems/decomplex/examples/cpp/inconsistent-rename-clone.cpp new file mode 100644 index 000000000..c24e9b89c --- /dev/null +++ b/gems/decomplex/examples/cpp/inconsistent-rename-clone.cpp @@ -0,0 +1,2 @@ +void original() { auto src = fetch(1); check(src); store(src); finalize(src); } +void pasted() { auto dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/cpp/local-flow.cpp b/gems/decomplex/examples/cpp/local-flow.cpp new file mode 100644 index 000000000..107b3840b --- /dev/null +++ b/gems/decomplex/examples/cpp/local-flow.cpp @@ -0,0 +1,9 @@ +Result mixed(int price, int tax) { + auto subtotal = price + tax; + auto total = subtotal.round(); + + auto timestamp = now(); + auto buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/cpp/locality-drag.cpp b/gems/decomplex/examples/cpp/locality-drag.cpp new file mode 100644 index 000000000..e2730e079 --- /dev/null +++ b/gems/decomplex/examples/cpp/locality-drag.cpp @@ -0,0 +1,27 @@ +void run(User user, Cart cart, Logger logger) { + auto receipt_id = user.id; + + auto total = cart.total; + if (total > 100) { + if (cart.discountable()) { + auto discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + auto tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + auto status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/cpp/miner.cpp b/gems/decomplex/examples/cpp/miner.cpp new file mode 100644 index 000000000..a48eaf209 --- /dev/null +++ b/gems/decomplex/examples/cpp/miner.cpp @@ -0,0 +1,4 @@ +bool one(bool a, bool b, bool c) { return a && b && c; } +bool two(bool a, bool b, bool c) { return a && b && c; } +bool three(bool a, bool b, bool c) { return a && b && c; } +bool broken(bool a, bool b) { return a && b; } diff --git a/gems/decomplex/examples/cpp/operational-discontinuity.cpp b/gems/decomplex/examples/cpp/operational-discontinuity.cpp new file mode 100644 index 000000000..d011d0237 --- /dev/null +++ b/gems/decomplex/examples/cpp/operational-discontinuity.cpp @@ -0,0 +1,9 @@ +void phase_shift() { + int a = 1; + int b = 2; + + // Phase 2 + int x = 3; + int y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/cpp/oversized-predicate.cpp b/gems/decomplex/examples/cpp/oversized-predicate.cpp new file mode 100644 index 000000000..9c9197082 --- /dev/null +++ b/gems/decomplex/examples/cpp/oversized-predicate.cpp @@ -0,0 +1 @@ +void complex_check(bool a, bool b, bool c, bool d) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/cpp/path-condition.cpp b/gems/decomplex/examples/cpp/path-condition.cpp new file mode 100644 index 000000000..9fc2f74f2 --- /dev/null +++ b/gems/decomplex/examples/cpp/path-condition.cpp @@ -0,0 +1,4 @@ +void one(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void two(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void three(X x, Y y, Z z) { if (x.p() && y.q() && z.r()) { go(x); } } +void bug(X x, Y y, Z z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/cpp/predicate-alias.cpp b/gems/decomplex/examples/cpp/predicate-alias.cpp new file mode 100644 index 000000000..b812862c9 --- /dev/null +++ b/gems/decomplex/examples/cpp/predicate-alias.cpp @@ -0,0 +1,3 @@ +bool first() { return true; } +bool second() { return true; } +bool other() { return false; } diff --git a/gems/decomplex/examples/cpp/redundant-nil-guard.cpp b/gems/decomplex/examples/cpp/redundant-nil-guard.cpp new file mode 100644 index 000000000..6cab7e8de --- /dev/null +++ b/gems/decomplex/examples/cpp/redundant-nil-guard.cpp @@ -0,0 +1 @@ +void check(Value *value) { if (value->isSome()) { value->isNull(); } } diff --git a/gems/decomplex/examples/cpp/semantic-alias.cpp b/gems/decomplex/examples/cpp/semantic-alias.cpp new file mode 100644 index 000000000..60c1db51a --- /dev/null +++ b/gems/decomplex/examples/cpp/semantic-alias.cpp @@ -0,0 +1,4 @@ +bool frame(Node node) { return node.provenance == FRAME; } +bool is_frame(Node node) { return provenance == FRAME; } +bool heap(Node node) { return node.provenance == HEAP; } +int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/cpp/sequence-mine.cpp b/gems/decomplex/examples/cpp/sequence-mine.cpp new file mode 100644 index 000000000..e05be841f --- /dev/null +++ b/gems/decomplex/examples/cpp/sequence-mine.cpp @@ -0,0 +1,5 @@ +void one() { alloc_mark(x); body1(); cleanup(x); } +void two() { alloc_mark(y); body2(); cleanup(y); } +void three() { alloc_mark(z); body3(); cleanup(z); } +void four() { alloc_mark(w); body4(); cleanup(w); } +void leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/cpp/state-branch-density.cpp b/gems/decomplex/examples/cpp/state-branch-density.cpp new file mode 100644 index 000000000..1bee1354f --- /dev/null +++ b/gems/decomplex/examples/cpp/state-branch-density.cpp @@ -0,0 +1 @@ +class StateBranchChecker { public: bool checked; void check(bool admin, string name) { if (admin) { this->checked = true; } if (this->checked && name == "admin") { print("hello"); } } }; diff --git a/gems/decomplex/examples/cpp/state-mesh.cpp b/gems/decomplex/examples/cpp/state-mesh.cpp new file mode 100644 index 000000000..423482f33 --- /dev/null +++ b/gems/decomplex/examples/cpp/state-mesh.cpp @@ -0,0 +1 @@ +class StateMeshExample { public: int a; int b; void initialize() { this->a = 1; this->b = 2; } void writer() { this->a = 3; } int reader() { return this->a + this->b; } int a_alias() { return this->a; } }; diff --git a/gems/decomplex/examples/cpp/structural-topology.cpp b/gems/decomplex/examples/cpp/structural-topology.cpp new file mode 100644 index 000000000..f39dde337 --- /dev/null +++ b/gems/decomplex/examples/cpp/structural-topology.cpp @@ -0,0 +1 @@ +class Worker { public: void run(Items items) { prepare(); if (ready()) { validate(); } for (auto item : items) { helper(item); } } private: void prepare() {} bool ready() { return true; } public: void validate() {} private: void helper(Item item) { item.use(); } }; diff --git a/gems/decomplex/examples/cpp/temporal-ordering-pressure.cpp b/gems/decomplex/examples/cpp/temporal-ordering-pressure.cpp new file mode 100644 index 000000000..4b565e3ed --- /dev/null +++ b/gems/decomplex/examples/cpp/temporal-ordering-pressure.cpp @@ -0,0 +1 @@ +class TemporalOrderExample { public: int a; int b; void one() { this->a = 1; } void two() { this->a = 2; this->b = 3; } void three() { this->b = 4; } int reader() { return this->a; } }; diff --git a/gems/decomplex/examples/cpp/weighted-inlined-complexity.cpp b/gems/decomplex/examples/cpp/weighted-inlined-complexity.cpp new file mode 100644 index 000000000..4fd497081 --- /dev/null +++ b/gems/decomplex/examples/cpp/weighted-inlined-complexity.cpp @@ -0,0 +1,5 @@ +void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +bool validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +void audit_cart(Cart cart) { for (auto item : cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/csharp/co-update.cs b/gems/decomplex/examples/csharp/co-update.cs new file mode 100644 index 000000000..093c07fbe --- /dev/null +++ b/gems/decomplex/examples/csharp/co-update.cs @@ -0,0 +1,6 @@ +class Example { + static void stable_one(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_two(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_three(Node node) { node.storage = 1; node.provenance = 1; } + static void misses_provenance(Node node) { node.storage = 1; } +} diff --git a/gems/decomplex/examples/csharp/decision-pressure.cs b/gems/decomplex/examples/csharp/decision-pressure.cs new file mode 100644 index 000000000..2be54cf91 --- /dev/null +++ b/gems/decomplex/examples/csharp/decision-pressure.cs @@ -0,0 +1 @@ +class Example { static bool scan(Node node) { var value = node.symbol; return value.isNull(); } } diff --git a/gems/decomplex/examples/csharp/derived-state.cs b/gems/decomplex/examples/csharp/derived-state.cs new file mode 100644 index 000000000..1170f1db7 --- /dev/null +++ b/gems/decomplex/examples/csharp/derived-state.cs @@ -0,0 +1 @@ +class Example { static void check(int input) { var cached = input + 1; input = 2; print(cached); } } diff --git a/gems/decomplex/examples/csharp/false-simplicity.cs b/gems/decomplex/examples/csharp/false-simplicity.cs new file mode 100644 index 000000000..78fb3897c --- /dev/null +++ b/gems/decomplex/examples/csharp/false-simplicity.cs @@ -0,0 +1 @@ +class FalseSimplicityExample { void hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/csharp/fat-union.cs b/gems/decomplex/examples/csharp/fat-union.cs new file mode 100644 index 000000000..ad280adb7 --- /dev/null +++ b/gems/decomplex/examples/csharp/fat-union.cs @@ -0,0 +1 @@ +class Example { static void handle(Node node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } } diff --git a/gems/decomplex/examples/csharp/flay-similarity.cs b/gems/decomplex/examples/csharp/flay-similarity.cs new file mode 100644 index 000000000..b6ca69339 --- /dev/null +++ b/gems/decomplex/examples/csharp/flay-similarity.cs @@ -0,0 +1,75 @@ +class Example { + static int first_clone(Node node) { + var total = 0; + var value1 = node.part1; + if (value1.ready() && value1.enabled()) { + total += value1.amount; + } + var value2 = node.part2; + if (value2.ready() && value2.enabled()) { + total += value2.amount; + } + var value3 = node.part3; + if (value3.ready() && value3.enabled()) { + total += value3.amount; + } + var value4 = node.part4; + if (value4.ready() && value4.enabled()) { + total += value4.amount; + } + var value5 = node.part5; + if (value5.ready() && value5.enabled()) { + total += value5.amount; + } + var value6 = node.part6; + if (value6.ready() && value6.enabled()) { + total += value6.amount; + } + var value7 = node.part7; + if (value7.ready() && value7.enabled()) { + total += value7.amount; + } + var value8 = node.part8; + if (value8.ready() && value8.enabled()) { + total += value8.amount; + } + return total; + } + + static int second_clone(Node entry) { + var total = 0; + var item1 = entry.part1; + if (item1.ready() && item1.enabled()) { + total += item1.amount; + } + var item2 = entry.part2; + if (item2.ready() && item2.enabled()) { + total += item2.amount; + } + var item3 = entry.part3; + if (item3.ready() && item3.enabled()) { + total += item3.amount; + } + var item4 = entry.part4; + if (item4.ready() && item4.enabled()) { + total += item4.amount; + } + var item5 = entry.part5; + if (item5.ready() && item5.enabled()) { + total += item5.amount; + } + var item6 = entry.part6; + if (item6.ready() && item6.enabled()) { + total += item6.amount; + } + var item7 = entry.part7; + if (item7.ready() && item7.enabled()) { + total += item7.amount; + } + var item8 = entry.part8; + if (item8.ready() && item8.enabled()) { + total += item8.amount; + } + return total; + } +} diff --git a/gems/decomplex/examples/csharp/function-lcom.cs b/gems/decomplex/examples/csharp/function-lcom.cs new file mode 100644 index 000000000..433cc611c --- /dev/null +++ b/gems/decomplex/examples/csharp/function-lcom.cs @@ -0,0 +1,12 @@ +class Example { static Result mixed(int price, int tax, Logger logger) { + var subtotal = price + tax; + var total = subtotal * 2; + var rounded = total.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} } diff --git a/gems/decomplex/examples/csharp/implicit-control-flow.cs b/gems/decomplex/examples/csharp/implicit-control-flow.cs new file mode 100644 index 000000000..f5e7319f7 --- /dev/null +++ b/gems/decomplex/examples/csharp/implicit-control-flow.cs @@ -0,0 +1 @@ +class FlowExample { int status; bool valid; bool done; void prepare() { this.status = 1; } void validate() { this.valid = this.status == 1; } void commit() { this.done = this.valid; } void ok1() { prepare(); validate(); commit(); } void ok2() { prepare(); validate(); commit(); } void ok3() { prepare(); validate(); commit(); } void ok4() { prepare(); validate(); commit(); } void drift() { validate(); prepare(); commit(); } } diff --git a/gems/decomplex/examples/csharp/inconsistent-rename-clone.cs b/gems/decomplex/examples/csharp/inconsistent-rename-clone.cs new file mode 100644 index 000000000..6ec491226 --- /dev/null +++ b/gems/decomplex/examples/csharp/inconsistent-rename-clone.cs @@ -0,0 +1 @@ +class Example { static void original() { var src = fetch(1); check(src); store(src); finalize(src); } static void pasted() { var dst = fetch(2); check(dst); store(src); finalize(dst); } } diff --git a/gems/decomplex/examples/csharp/local-flow.cs b/gems/decomplex/examples/csharp/local-flow.cs new file mode 100644 index 000000000..a5fa7f235 --- /dev/null +++ b/gems/decomplex/examples/csharp/local-flow.cs @@ -0,0 +1,9 @@ +class Example { static Result mixed(int price, int tax) { + var subtotal = price + tax; + var total = subtotal.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} } diff --git a/gems/decomplex/examples/csharp/locality-drag.cs b/gems/decomplex/examples/csharp/locality-drag.cs new file mode 100644 index 000000000..8d31b0dcb --- /dev/null +++ b/gems/decomplex/examples/csharp/locality-drag.cs @@ -0,0 +1,29 @@ +class Example { + static void run(User user, Cart cart, Logger logger) { + var receipt_id = user.id; + + var total = cart.total; + if (total > 100) { + if (cart.discountable()) { + var discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + var tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + var status = 1; + } + } + + emit(receipt_id); + } +} diff --git a/gems/decomplex/examples/csharp/miner.cs b/gems/decomplex/examples/csharp/miner.cs new file mode 100644 index 000000000..4d471d44c --- /dev/null +++ b/gems/decomplex/examples/csharp/miner.cs @@ -0,0 +1 @@ +class Example { static bool one(bool a, bool b, bool c) { return a && b && c; } static bool two(bool a, bool b, bool c) { return a && b && c; } static bool three(bool a, bool b, bool c) { return a && b && c; } static bool broken(bool a, bool b) { return a && b; } } diff --git a/gems/decomplex/examples/csharp/operational-discontinuity.cs b/gems/decomplex/examples/csharp/operational-discontinuity.cs new file mode 100644 index 000000000..d2dfba674 --- /dev/null +++ b/gems/decomplex/examples/csharp/operational-discontinuity.cs @@ -0,0 +1,9 @@ +class Example { static void phase_shift() { + var a = 1; + var b = 2; + + // Phase 2 + var x = 3; + var y = 4; + print(x); print(y); +} } diff --git a/gems/decomplex/examples/csharp/oversized-predicate.cs b/gems/decomplex/examples/csharp/oversized-predicate.cs new file mode 100644 index 000000000..917339ced --- /dev/null +++ b/gems/decomplex/examples/csharp/oversized-predicate.cs @@ -0,0 +1 @@ +class Example { static void complex_check(bool a, bool b, bool c, bool d) { if (a && b && c && d) { print("too big"); } } } diff --git a/gems/decomplex/examples/csharp/path-condition.cs b/gems/decomplex/examples/csharp/path-condition.cs new file mode 100644 index 000000000..941f756f7 --- /dev/null +++ b/gems/decomplex/examples/csharp/path-condition.cs @@ -0,0 +1 @@ +class Example { static void one(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void two(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void three(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void bug(X x,Y y,Z z) { if (x.p() && y.q()) { go(x); } } } diff --git a/gems/decomplex/examples/csharp/predicate-alias.cs b/gems/decomplex/examples/csharp/predicate-alias.cs new file mode 100644 index 000000000..c853e98a4 --- /dev/null +++ b/gems/decomplex/examples/csharp/predicate-alias.cs @@ -0,0 +1 @@ +class Example { static bool first() { return true; } static bool second() { return true; } static bool other() { return false; } } diff --git a/gems/decomplex/examples/csharp/redundant-nil-guard.cs b/gems/decomplex/examples/csharp/redundant-nil-guard.cs new file mode 100644 index 000000000..32f12a734 --- /dev/null +++ b/gems/decomplex/examples/csharp/redundant-nil-guard.cs @@ -0,0 +1 @@ +class Example { static void check(Value value) { if (value.isSome()) { value.isNull(); } } } diff --git a/gems/decomplex/examples/csharp/semantic-alias.cs b/gems/decomplex/examples/csharp/semantic-alias.cs new file mode 100644 index 000000000..75c330865 --- /dev/null +++ b/gems/decomplex/examples/csharp/semantic-alias.cs @@ -0,0 +1 @@ +class Example { static bool frame(Node node) { return node.provenance == FRAME; } static bool is_frame(Node node) { return provenance == FRAME; } static bool heap(Node node) { return node.provenance == HEAP; } static int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } } diff --git a/gems/decomplex/examples/csharp/sequence-mine.cs b/gems/decomplex/examples/csharp/sequence-mine.cs new file mode 100644 index 000000000..629558214 --- /dev/null +++ b/gems/decomplex/examples/csharp/sequence-mine.cs @@ -0,0 +1 @@ +class Example { static void one() { alloc_mark(x); body1(); cleanup(x); } static void two() { alloc_mark(y); body2(); cleanup(y); } static void three() { alloc_mark(z); body3(); cleanup(z); } static void four() { alloc_mark(w); body4(); cleanup(w); } static void leak() { alloc_mark(q); use_value(q); } } diff --git a/gems/decomplex/examples/csharp/state-branch-density.cs b/gems/decomplex/examples/csharp/state-branch-density.cs new file mode 100644 index 000000000..10f872540 --- /dev/null +++ b/gems/decomplex/examples/csharp/state-branch-density.cs @@ -0,0 +1 @@ +class StateBranchChecker { bool checked; void check(bool admin, string name) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/csharp/state-mesh.cs b/gems/decomplex/examples/csharp/state-mesh.cs new file mode 100644 index 000000000..ed01f5ad9 --- /dev/null +++ b/gems/decomplex/examples/csharp/state-mesh.cs @@ -0,0 +1 @@ +class StateMeshExample { int a; int b; void initialize() { this.a = 1; this.b = 2; } void writer() { this.a = 3; } int reader() { return this.a + this.b; } int a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/csharp/structural-topology.cs b/gems/decomplex/examples/csharp/structural-topology.cs new file mode 100644 index 000000000..1af56a088 --- /dev/null +++ b/gems/decomplex/examples/csharp/structural-topology.cs @@ -0,0 +1 @@ +class Worker { public void run(Items items) { prepare(); if (ready()) { validate(); } foreach (var item in items) { helper(item); } } private void prepare() {} private bool ready() { return true; } public void validate() {} private void helper(Item item) { item.use(); } } diff --git a/gems/decomplex/examples/csharp/temporal-ordering-pressure.cs b/gems/decomplex/examples/csharp/temporal-ordering-pressure.cs new file mode 100644 index 000000000..83a07d855 --- /dev/null +++ b/gems/decomplex/examples/csharp/temporal-ordering-pressure.cs @@ -0,0 +1 @@ +class TemporalOrderExample { int a; int b; public void one() { this.a = 1; } public void two() { this.a = 2; this.b = 3; } public void three() { this.b = 4; } public int reader() { return this.a; } } diff --git a/gems/decomplex/examples/csharp/weighted-inlined-complexity.cs b/gems/decomplex/examples/csharp/weighted-inlined-complexity.cs new file mode 100644 index 000000000..e78470393 --- /dev/null +++ b/gems/decomplex/examples/csharp/weighted-inlined-complexity.cs @@ -0,0 +1 @@ +class Example { static void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } static bool validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } static int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } static void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } static void audit_cart(Cart cart) { foreach (var item in cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } } diff --git a/gems/decomplex/examples/facts/detectors/co-update-rich.json b/gems/decomplex/examples/facts/detectors/co-update-rich.json new file mode 100644 index 000000000..aaebc703f --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/co-update-rich.json @@ -0,0 +1,67 @@ +{ + "detector": "co-update", + "input": { + "documents": [ + { + "file": "facts/co_update.rb", + "language": "ruby", + "state_writes": [ + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "create", "line": 1, "span": [1, 0, 1, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "create", "line": 2, "span": [2, 0, 2, 15], "owner": "Fixture"}, + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "replace", "line": 4, "span": [4, 0, 4, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "replace", "line": 5, "span": [5, 0, 5, 15], "owner": "Fixture"}, + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "restore", "line": 7, "span": [7, 0, 7, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "restore", "line": 8, "span": [8, 0, 8, 15], "owner": "Fixture"}, + {"field": "status", "receiver": "self", "file": "facts/co_update.rb", "function": "touch_status", "line": 10, "span": [10, 0, 10, 14], "owner": "Fixture"}, + {"field": "version", "receiver": "self", "file": "facts/co_update.rb", "function": "touch_version", "line": 12, "span": [12, 0, 12, 15], "owner": "Fixture"} + ] + } + ] + }, + "expected": { + "co_written_pairs": [ + { + "pair": [ + "status", + "version" + ], + "sites": [ + "facts/co_update.rb:create", + "facts/co_update.rb:replace", + "facts/co_update.rb:restore" + ], + "support": 3 + } + ], + "neglected_updates": [ + { + "at": "facts/co_update.rb:touch_status:10", + "has": "status", + "missing": "version", + "pair": [ + "status", + "version" + ], + "recv": "self", + "spans": { + "facts/co_update.rb:touch_status:10": [10, 0, 10, 14] + }, + "support": 3 + }, + { + "at": "facts/co_update.rb:touch_version:12", + "has": "version", + "missing": "status", + "pair": [ + "status", + "version" + ], + "recv": "self", + "spans": { + "facts/co_update.rb:touch_version:12": [12, 0, 12, 15] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/decision-pressure.json b/gems/decomplex/examples/facts/detectors/decision-pressure.json new file mode 100644 index 000000000..38e872e35 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/decision-pressure.json @@ -0,0 +1,188 @@ +{ + "detector": "decision-pressure", + "input": { + "documents": [ + { + "file": "facts/decision.rb", + "language": "ruby", + "local_contract_assignments": { + "check": { + "candidate": "input.user" + } + }, + "local_methods": [ + { + "id": "Fixture#check", + "owner": "Fixture", + "name": "check", + "file": "facts/decision.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 22 + ], + "source": "candidate = input.user", + "reads": [ + "input" + ], + "writes": [ + "candidate" + ], + "dependencies": [ + [ + "candidate", + "input" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 25 + ], + "source": "candidate.name rescue nil", + "reads": [ + "candidate" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ], + "call_sites": [ + { + "receiver": "candidate", + "message": "nil?", + "file": "facts/decision.rb", + "function": "check", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 2, + 3, + 16 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "candidate", + "message": "ready?", + "file": "facts/decision.rb", + "function": "check", + "owner": "Fixture", + "line": 4, + "span": [ + 4, + 2, + 4, + 18 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "candidate", + "message": "name", + "file": "facts/decision.rb", + "function": "check", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 0, + 2, + 20 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": [ + { + "contract": ".user", + "decisions": 1, + "essential": 1, + "methods": 1, + "sites": [ + "facts/decision.rb:check:3" + ], + "spans": { + "facts/decision.rb:check:3": [ + 3, + 2, + 3, + 16 + ] + } + }, + { + "contract": ".name", + "decisions": 1, + "essential": 0, + "methods": 1, + "sites": [ + "facts/decision.rb:check:2" + ], + "spans": { + "facts/decision.rb:check:2": [ + 2, + 0, + 2, + 25 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/derived-state-branches.json b/gems/decomplex/examples/facts/detectors/derived-state-branches.json new file mode 100644 index 000000000..7326c37e5 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/derived-state-branches.json @@ -0,0 +1,353 @@ +{ + "detector": "derived-state", + "input": { + "documents": [ + { + "file": "facts/derived_branches.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#ordered", + "owner": "Fixture", + "name": "ordered", + "file": "facts/derived_branches.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 20 + ], + "source": "z = input; a = input", + "reads": [ + "input" + ], + "writes": [ + "z", + "a" + ], + "dependencies": [ + [ + "z", + "input" + ], + [ + "a", + "input" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#self_dep", + "owner": "Fixture", + "name": "self_dep", + "file": "facts/derived_branches.rb", + "line": 3, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 11 + ], + "source": "same = same", + "reads": [ + "same" + ], + "writes": [ + "same" + ], + "dependencies": [ + [ + "same", + "same" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#stale", + "owner": "Fixture", + "name": "stale", + "file": "facts/derived_branches.rb", + "line": 9, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 13 + ], + "source": "source = load", + "reads": [ + "load" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "load" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 10, + "end_line": 10, + "span": [ + 10, + 0, + 10, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 13, + "end_line": 13, + "span": [ + 13, + 0, + 13, + 15 + ], + "source": "source = reload", + "reads": [ + "reload" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "reload" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#recomputed", + "owner": "Fixture", + "name": "recomputed", + "file": "facts/derived_branches.rb", + "line": 5, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 5, + "end_line": 5, + "span": [ + 5, + 0, + 5, + 13 + ], + "source": "source = load", + "reads": [ + "load" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "load" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 6, + "end_line": 6, + "span": [ + 6, + 0, + 6, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 15 + ], + "source": "source = reload", + "reads": [ + "reload" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "reload" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 8, + "end_line": 8, + "span": [ + 8, + 0, + 8, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/derived_branches.rb:stale:10", + "defn": "stale", + "derived": "derived", + "derived_at": 10, + "file": "facts/derived_branches.rb", + "gap": 3, + "source": "source", + "source_reassigned_at": 13, + "spans": { + "facts/derived_branches.rb:stale:10": [ + 10, + 0, + 10, + 16 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/derived-state.json b/gems/decomplex/examples/facts/detectors/derived-state.json new file mode 100644 index 000000000..0e521875d --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/derived-state.json @@ -0,0 +1,156 @@ +{ + "detector": "derived-state", + "input": { + "documents": [ + { + "file": "facts/derived.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#refresh", + "owner": "Fixture", + "name": "refresh", + "file": "facts/derived.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 13 + ], + "source": "source = load", + "reads": [ + "load" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "load" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 16 + ], + "source": "derived = source", + "reads": [ + "source" + ], + "writes": [ + "derived" + ], + "dependencies": [ + [ + "derived", + "source" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 15 + ], + "source": "source = reload", + "reads": [ + "reload" + ], + "writes": [ + "source" + ], + "dependencies": [ + [ + "source", + "reload" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 12 + ], + "source": "use(derived)", + "reads": [ + "derived" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/derived.rb:refresh:2", + "defn": "refresh", + "derived": "derived", + "derived_at": 2, + "file": "facts/derived.rb", + "gap": 5, + "source": "source", + "source_reassigned_at": 7, + "spans": { + "facts/derived.rb:refresh:2": [ + 2, + 0, + 2, + 16 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/false-simplicity-core-top-level.json b/gems/decomplex/examples/facts/detectors/false-simplicity-core-top-level.json new file mode 100644 index 000000000..0d6722792 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/false-simplicity-core-top-level.json @@ -0,0 +1,61 @@ +{ + "detector": "false-simplicity", + "input": { + "documents": [ + { + "file": "facts/false_core.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "normalized_root": {"type": "ROOT", "children": [], "first_lineno": 1, "first_column": 0, "last_lineno": 1, "last_column": 0, "text": ""}, + "function_defs": [ + {"file": "facts/false_core.rb", "name": "scrub", "owner": "String", "line": 2, "span": [2, 2, 2, 20], "body": {"kind": "body_statement", "text": "", "span": [2, 2, 2, 20], "named": true, "field_name": null, "children": []}, "visibility": null, "params": []} + ], + "owner_defs": [ + {"file": "facts/false_core.rb", "name": "String", "kind": "class", "line": 1, "span": [1, 0, 3, 3]} + ], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [ + {"kind": "hidden_context", "detail": "Dir.chdir", "file": "facts/false_core.rb", "function": "", "owner": "", "line": 5, "span": [5, 0, 5, 18]} + ], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": [ + { + "at": "facts/false_core.rb:(top-level):5", + "detail": "Dir.chdir", + "kind": "hidden_context", + "scatter": 1, + "sites": [ + "facts/false_core.rb:(top-level):5" + ], + "spans": { + "facts/false_core.rb:(top-level):5": [5, 0, 5, 18] + }, + "support": 1 + }, + { + "at": "facts/false_core.rb:String:1", + "detail": "String", + "kind": "monkeypatch", + "scatter": 1, + "sites": [ + "facts/false_core.rb:String:1" + ], + "spans": { + "facts/false_core.rb:String:1": [1, 0, 3, 3] + }, + "support": 1 + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/false-simplicity.json b/gems/decomplex/examples/facts/detectors/false-simplicity.json new file mode 100644 index 000000000..c7acd3a3c --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/false-simplicity.json @@ -0,0 +1,87 @@ +{ + "detector": "false-simplicity", + "input": { + "documents": [ + { + "file": "facts/false_a.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "normalized_root": {"type": "ROOT", "children": [], "first_lineno": 1, "first_column": 0, "last_lineno": 1, "last_column": 0, "text": ""}, + "function_defs": [ + {"file": "facts/false_a.rb", "name": "run", "owner": "Project::Thing", "line": 2, "span": [2, 0, 2, 8], "body": {"kind": "body_statement", "text": "", "span": [2, 0, 2, 8], "named": true, "field_name": null, "children": []}, "visibility": null, "params": []} + ], + "owner_defs": [ + {"file": "facts/false_a.rb", "name": "Project::Thing", "kind": "class", "line": 1, "span": [1, 0, 1, 20]} + ], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [ + {"kind": "hidden_io", "detail": "puts", "file": "facts/false_a.rb", "function": "run", "line": 3, "span": [3, 2, 3, 6]} + ], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + }, + { + "file": "facts/false_b.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "normalized_root": {"type": "ROOT", "children": [], "first_lineno": 1, "first_column": 0, "last_lineno": 1, "last_column": 0, "text": ""}, + "function_defs": [ + {"file": "facts/false_b.rb", "name": "again", "owner": "Project::Thing", "line": 2, "span": [2, 0, 2, 10], "body": {"kind": "body_statement", "text": "", "span": [2, 0, 2, 10], "named": true, "field_name": null, "children": []}, "visibility": null, "params": []} + ], + "owner_defs": [ + {"file": "facts/false_b.rb", "name": "Project::Thing", "kind": "class", "line": 1, "span": [1, 0, 1, 20]} + ], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": [ + { + "at": "facts/false_a.rb:Project::Thing:1", + "detail": "reopen Project::Thing", + "kind": "monkeypatch", + "scatter": 2, + "sites": [ + "facts/false_a.rb:Project::Thing:1", + "facts/false_b.rb:Project::Thing:1" + ], + "spans": { + "facts/false_a.rb:Project::Thing:1": [1, 0, 1, 20], + "facts/false_b.rb:Project::Thing:1": [1, 0, 1, 20] + }, + "support": 2 + }, + { + "at": "facts/false_a.rb:run:3", + "detail": "puts", + "kind": "hidden_io", + "scatter": 1, + "sites": [ + "facts/false_a.rb:run:3" + ], + "spans": { + "facts/false_a.rb:run:3": [3, 2, 3, 6] + }, + "support": 1 + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/flay-similarity-facts.json b/gems/decomplex/examples/facts/detectors/flay-similarity-facts.json new file mode 100644 index 000000000..698835931 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/flay-similarity-facts.json @@ -0,0 +1,234 @@ +{ + "detector": "flay-similarity", + "options": { + "mass": 1, + "fuzzy": 1 + }, + "input": { + "documents": [ + { + "file": "facts/flay.rb", + "language": "ruby", + "root": {"kind": "program", "text": "", "span": [1, 0, 1, 0], "named": true, "field_name": null, "children": []}, + "function_defs": [ + { + "file": "facts/flay.rb", + "name": "alpha", + "owner": "Fixture", + "line": 1, + "span": [1, 0, 5, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "left = source.fetch(:left)\nright = source.fetch(:right)\ncommit(left, right)", + "span": [2, 2, 4, 21], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "left = source.fetch(:left)", "span": [2, 2, 2, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "left", "span": [2, 2, 2, 6], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [2, 7, 2, 8], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:left)", "span": [2, 9, 2, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [2, 9, 2, 15], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [2, 16, 2, 21], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:left)", "span": [2, 21, 2, 28], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":left", "span": [2, 22, 2, 27], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "assignment", "text": "right = source.fetch(:right)", "span": [3, 2, 3, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "right", "span": [3, 2, 3, 7], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [3, 8, 3, 9], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:right)", "span": [3, 10, 3, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [3, 10, 3, 16], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [3, 17, 3, 22], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:right)", "span": [3, 22, 3, 30], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":right", "span": [3, 23, 3, 29], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "commit(left, right)", "span": [4, 2, 4, 21], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "commit", "span": [4, 2, 4, 8], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(left, right)", "span": [4, 8, 4, 21], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "left", "span": [4, 9, 4, 13], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "right", "span": [4, 15, 4, 20], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + }, + { + "file": "facts/flay.rb", + "name": "beta", + "owner": "Fixture", + "line": 7, + "span": [7, 0, 11, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "first = source.fetch(:first)\nsecond = source.fetch(:second)\ncommit(first, second)", + "span": [8, 2, 10, 25], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "first = source.fetch(:first)", "span": [8, 2, 8, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "first", "span": [8, 2, 8, 7], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [8, 8, 8, 9], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:first)", "span": [8, 10, 8, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [8, 10, 8, 16], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [8, 17, 8, 22], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:first)", "span": [8, 22, 8, 30], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":first", "span": [8, 23, 8, 29], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "assignment", "text": "second = source.fetch(:second)", "span": [9, 2, 9, 32], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "second", "span": [9, 2, 9, 8], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [9, 9, 9, 10], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:second)", "span": [9, 11, 9, 32], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [9, 11, 9, 17], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [9, 18, 9, 23], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:second)", "span": [9, 23, 9, 32], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":second", "span": [9, 24, 9, 31], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "commit(first, second)", "span": [10, 2, 10, 25], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "commit", "span": [10, 2, 10, 8], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(first, second)", "span": [10, 8, 10, 25], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "first", "span": [10, 9, 10, 14], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "second", "span": [10, 16, 10, 22], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + }, + { + "file": "facts/flay.rb", + "name": "gamma", + "owner": "Fixture", + "line": 13, + "span": [13, 0, 17, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "value = source.fetch(:value)\nsave(value)\nlog(value)", + "span": [14, 2, 16, 12], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "value = source.fetch(:value)", "span": [14, 2, 14, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "value", "span": [14, 2, 14, 7], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [14, 8, 14, 9], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:value)", "span": [14, 10, 14, 30], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [14, 10, 14, 16], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [14, 17, 14, 22], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:value)", "span": [14, 22, 14, 30], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":value", "span": [14, 23, 14, 29], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "save(value)", "span": [15, 2, 15, 13], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "save", "span": [15, 2, 15, 6], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(value)", "span": [15, 6, 15, 13], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "value", "span": [15, 7, 15, 12], "named": true, "field_name": null, "children": []} + ]} + ]}, + {"kind": "call", "text": "log(value)", "span": [16, 2, 16, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "log", "span": [16, 2, 16, 5], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(value)", "span": [16, 5, 16, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "value", "span": [16, 6, 16, 11], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + }, + { + "file": "facts/flay.rb", + "name": "delta", + "owner": "Fixture", + "line": 19, + "span": [19, 0, 23, 3], + "visibility": "public", + "params": [], + "body": { + "kind": "body_statement", + "text": "item = source.fetch(:item)\nsave(item)\naudit(item)", + "span": [20, 2, 22, 14], + "named": true, + "field_name": null, + "children": [ + {"kind": "assignment", "text": "item = source.fetch(:item)", "span": [20, 2, 20, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "item", "span": [20, 2, 20, 6], "named": true, "field_name": null, "children": []}, + {"kind": "=", "text": "=", "span": [20, 7, 20, 8], "named": false, "field_name": null, "children": []}, + {"kind": "call", "text": "source.fetch(:item)", "span": [20, 9, 20, 28], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "source", "span": [20, 9, 20, 15], "named": true, "field_name": null, "children": []}, + {"kind": "identifier", "text": "fetch", "span": [20, 16, 20, 21], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(:item)", "span": [20, 21, 20, 28], "named": true, "field_name": null, "children": [ + {"kind": "symbol", "text": ":item", "span": [20, 22, 20, 27], "named": true, "field_name": null, "children": []} + ]} + ]} + ]}, + {"kind": "call", "text": "save(item)", "span": [21, 2, 21, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "save", "span": [21, 2, 21, 6], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(item)", "span": [21, 6, 21, 12], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "item", "span": [21, 7, 21, 11], "named": true, "field_name": null, "children": []} + ]} + ]}, + {"kind": "call", "text": "audit(item)", "span": [22, 2, 22, 14], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "audit", "span": [22, 2, 22, 7], "named": true, "field_name": null, "children": []}, + {"kind": "argument_list", "text": "(item)", "span": [22, 7, 22, 14], "named": true, "field_name": null, "children": [ + {"kind": "identifier", "text": "item", "span": [22, 8, 22, 12], "named": true, "field_name": null, "children": []} + ]} + ]} + ] + } + } + ] + } + ] + }, + "expected": { + "findings": [ + { + "at": "facts/flay.rb:alpha:2", + "clone_type": "type2", + "locations": [ + "facts/flay.rb:2", + "facts/flay.rb:8" + ], + "mass": 22, + "node": "defn", + "sites": [ + "facts/flay.rb:alpha:2", + "facts/flay.rb:beta:8" + ], + "spans": { + "facts/flay.rb:alpha:2": [2, 0, 4, 1], + "facts/flay.rb:beta:8": [8, 0, 10, 1] + } + }, + { + "at": "facts/flay.rb:delta:20", + "clone_type": "type3", + "locations": [ + "facts/flay.rb:14", + "facts/flay.rb:20" + ], + "mass": 12, + "node": "defn", + "sites": [ + "facts/flay.rb:delta:20", + "facts/flay.rb:gamma:14" + ], + "spans": { + "facts/flay.rb:delta:20": [20, 0, 22, 1], + "facts/flay.rb:gamma:14": [14, 0, 16, 1] + } + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/function-lcom.json b/gems/decomplex/examples/facts/detectors/function-lcom.json new file mode 100644 index 000000000..b871c2d57 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/function-lcom.json @@ -0,0 +1,248 @@ +{ + "detector": "function-lcom", + "input": { + "documents": [ + { + "file": "facts/lcom.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#mixed", + "owner": "Fixture", + "name": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 11 + ], + "source": "a = input_a", + "reads": [ + "input_a" + ], + "writes": [ + "a" + ], + "dependencies": [ + [ + "a", + "input_a" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 16 + ], + "source": "b = normalize(a)", + "reads": [ + "a" + ], + "writes": [ + "b" + ], + "dependencies": [ + [ + "b", + "a" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 11 + ], + "source": "c = input_c", + "reads": [ + "input_c" + ], + "writes": [ + "c" + ], + "dependencies": [ + [ + "c", + "input_c" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 4, + "end_line": 4, + "span": [ + 4, + 0, + 4, + 16 + ], + "source": "d = normalize(c)", + "reads": [ + "c" + ], + "writes": [ + "d" + ], + "dependencies": [ + [ + "d", + "c" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 4, + "line": 5, + "end_line": 5, + "span": [ + 5, + 0, + 5, + 11 + ], + "source": "e = input_e", + "reads": [ + "input_e" + ], + "writes": [ + "e" + ], + "dependencies": [ + [ + "e", + "input_e" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 5, + "line": 6, + "end_line": 6, + "span": [ + 6, + 0, + 6, + 16 + ], + "source": "f = normalize(e)", + "reads": [ + "e" + ], + "writes": [ + "f" + ], + "dependencies": [ + [ + "f", + "e" + ] + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/lcom.rb:mixed:1", + "component_lines": [ + [ + 1, + 2 + ], + [ + 3, + 4 + ], + [ + 5, + 6 + ] + ], + "component_vars": [ + [ + "a", + "b", + "input_a" + ], + [ + "c", + "d", + "input_c" + ], + [ + "e", + "f", + "input_e" + ] + ], + "components": 3, + "defn": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "locals": 9, + "method": "mixed", + "mode": "disjoint", + "owner": "Fixture", + "score": 45, + "spans": { + "facts/lcom.rb:mixed:1": [ + 1, + 0, + 20, + 3 + ] + }, + "statements": 6, + "terminal_join": false + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/local-flow-go-receiver.json b/gems/decomplex/examples/facts/detectors/local-flow-go-receiver.json new file mode 100644 index 000000000..8e3bd0767 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/local-flow-go-receiver.json @@ -0,0 +1,755 @@ +{ + "detector": "local-flow", + "input": { + "documents": [ + { + "file": "facts/local_flow_receiver.go", + "language": "go", + "source": "package main\n\ntype Worker struct{}\n\nfunc (w *Worker) Handle(price int, tax int) Result {\n subtotal := price + tax\n return Result_init(w, subtotal)\n}\n", + "lines": [ + "package main", + "", + "type Worker struct{}", + "", + "func (w *Worker) Handle(price int, tax int) Result {", + " subtotal := price + tax", + " return Result_init(w, subtotal)", + "}" + ], + "function_defs": [ + { + "file": "facts/local_flow_receiver.go", + "name": "Handle", + "owner": "Worker", + "line": 5, + "span": [ + 5, + 0, + 8, + 1 + ], + "body": { + "kind": "method_declaration", + "text": "func (w *Worker) Handle(price int, tax int) Result {\n subtotal := price + tax\n return Result_init(w, subtotal)\n}", + "span": [ + 5, + 0, + 8, + 1 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "func", + "text": "func", + "span": [ + 5, + 0, + 5, + 4 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_list", + "text": "(w *Worker)", + "span": [ + 5, + 5, + 5, + 16 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 5, + 5, + 5, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_declaration", + "text": "w *Worker", + "span": [ + 5, + 6, + 5, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "w", + "span": [ + 5, + 6, + 5, + 7 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "pointer_type", + "text": "*Worker", + "span": [ + 5, + 8, + 5, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "*", + "text": "*", + "span": [ + 5, + 8, + 5, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type_identifier", + "text": "Worker", + "span": [ + 5, + 9, + 5, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 5, + 15, + 5, + 16 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "field_identifier", + "text": "Handle", + "span": [ + 5, + 17, + 5, + 23 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_list", + "text": "(price int, tax int)", + "span": [ + 5, + 23, + 5, + 43 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 5, + 23, + 5, + 24 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_declaration", + "text": "price int", + "span": [ + 5, + 24, + 5, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "price", + "span": [ + 5, + 24, + 5, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type_identifier", + "text": "int", + "span": [ + 5, + 30, + 5, + 33 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 5, + 33, + 5, + 34 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameter_declaration", + "text": "tax int", + "span": [ + 5, + 35, + 5, + 42 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "tax", + "span": [ + 5, + 35, + 5, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type_identifier", + "text": "int", + "span": [ + 5, + 39, + 5, + 42 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 5, + 42, + 5, + 43 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "type_identifier", + "text": "Result", + "span": [ + 5, + 44, + 5, + 50 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "{\n subtotal := price + tax\n return Result_init(w, subtotal)\n}", + "span": [ + 5, + 51, + 8, + 1 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "{", + "text": "{", + "span": [ + 5, + 51, + 5, + 52 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "statement_list", + "text": "subtotal := price + tax\n return Result_init(w, subtotal)\n", + "span": [ + 6, + 2, + 8, + 0 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "short_var_declaration", + "text": "subtotal := price + tax", + "span": [ + 6, + 2, + 6, + 25 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "expression_list", + "text": "subtotal", + "span": [ + 6, + 2, + 6, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":=", + "text": ":=", + "span": [ + 6, + 11, + 6, + 13 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "expression_list", + "text": "price + tax", + "span": [ + 6, + 14, + 6, + 25 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "price", + "span": [ + 6, + 14, + 6, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 6, + 20, + 6, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "tax", + "span": [ + 6, + 22, + 6, + 25 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "return_statement", + "text": "return Result_init(w, subtotal)", + "span": [ + 7, + 2, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 7, + 2, + 7, + 8 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "expression_list", + "text": "Result_init(w, subtotal)", + "span": [ + 7, + 9, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "Result_init", + "span": [ + 7, + 9, + 7, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "argument_list", + "text": "(w, subtotal)", + "span": [ + 7, + 20, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 7, + 20, + 7, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "w", + "span": [ + 7, + 21, + 7, + 22 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 7, + 22, + 7, + 23 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "subtotal", + "span": [ + 7, + 24, + 7, + 32 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 7, + 32, + 7, + 33 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "kind": "}", + "text": "}", + "span": [ + 8, + 0, + 8, + 1 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + "price", + "tax" + ] + } + ] + } + ] + }, + "expected": [ + { + "boundaries": [ + + ], + "file": "facts/local_flow_receiver.go", + "id": "Worker#Handle", + "line": 5, + "name": "Handle", + "owner": "Worker", + "span": [ + 5, + 0, + 8, + 1 + ], + "statements": [ + { + "co_uses": [ + [ + "price", + "tax" + ] + ], + "dependencies": [ + [ + "subtotal", + "price" + ], + [ + "subtotal", + "tax" + ] + ], + "end_line": 6, + "index": 0, + "line": 6, + "reads": [ + "price", + "tax" + ], + "source": "subtotal := price + tax", + "span": [ + 6, + 2, + 6, + 25 + ], + "writes": [ + "subtotal" + ] + }, + { + "co_uses": [ + [ + "subtotal", + "w" + ] + ], + "dependencies": [ + + ], + "end_line": 7, + "index": 1, + "line": 7, + "reads": [ + "subtotal", + "w" + ], + "source": "return Result_init(w, subtotal)", + "span": [ + 7, + 2, + 7, + 33 + ], + "writes": [ + + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/local-flow-python-raw.json b/gems/decomplex/examples/facts/detectors/local-flow-python-raw.json new file mode 100644 index 000000000..82a2b8db1 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/local-flow-python-raw.json @@ -0,0 +1,2834 @@ +{ + "detector": "local-flow", + "input": { + "documents": [ + { + "file": "facts/local_flow_python.py", + "language": "python", + "source": "class Worker:\n def handle(self, price, tax, items, mapping, target):\n subtotal = price + tax\n running: int = subtotal\n annotated: int\n annotated = running\n if (chosen := mapping.get(\"key\")):\n running = running + chosen\n for item in items:\n running = running + item\n for other in items:\n running = running + other\n with open(\"log\") as handle:\n buffer = handle.read()\n target.slot = running\n target[price] = buffer\n mapping = {\"key\": buffer}\n return Result(running, buffer, target)\n", + "lines": [ + "class Worker:", + " def handle(self, price, tax, items, mapping, target):", + " subtotal = price + tax", + " running: int = subtotal", + " annotated: int", + " annotated = running", + " if (chosen := mapping.get(\"key\")):", + " running = running + chosen", + " for item in items:", + " running = running + item", + " for other in items:", + " running = running + other", + " with open(\"log\") as handle:", + " buffer = handle.read()", + " target.slot = running", + " target[price] = buffer", + " mapping = {\"key\": buffer}", + " return Result(running, buffer, target)" + ], + "function_defs": [ + { + "file": "facts/local_flow_python.py", + "name": "handle", + "owner": "Worker", + "line": 2, + "span": [ + 2, + 4, + 18, + 46 + ], + "body": { + "kind": "block", + "text": "def handle(self, price, tax, items, mapping, target):\n subtotal = price + tax\n running: int = subtotal\n annotated: int\n annotated = running\n if (chosen := mapping.get(\"key\")):\n running = running + chosen\n for item in items:\n running = running + item\n for other in items:\n running = running + other\n with open(\"log\") as handle:\n buffer = handle.read()\n target.slot = running\n target[price] = buffer\n mapping = {\"key\": buffer}\n return Result(running, buffer, target)", + "span": [ + 2, + 4, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "def", + "text": "def", + "span": [ + 2, + 4, + 2, + 7 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "handle", + "span": [ + 2, + 8, + 2, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parameters", + "text": "(self, price, tax, items, mapping, target)", + "span": [ + 2, + 14, + 2, + 56 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 2, + 14, + 2, + 15 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "self", + "span": [ + 2, + 15, + 2, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 19, + 2, + 20 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "price", + "span": [ + 2, + 21, + 2, + 26 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 26, + 2, + 27 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "tax", + "span": [ + 2, + 28, + 2, + 31 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 31, + 2, + 32 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "items", + "span": [ + 2, + 33, + 2, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 38, + 2, + 39 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "mapping", + "span": [ + 2, + 40, + 2, + 47 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 2, + 47, + 2, + 48 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "target", + "span": [ + 2, + 49, + 2, + 55 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 2, + 55, + 2, + 56 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 2, + 56, + 2, + 57 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "subtotal = price + tax\n running: int = subtotal\n annotated: int\n annotated = running\n if (chosen := mapping.get(\"key\")):\n running = running + chosen\n for item in items:\n running = running + item\n for other in items:\n running = running + other\n with open(\"log\") as handle:\n buffer = handle.read()\n target.slot = running\n target[price] = buffer\n mapping = {\"key\": buffer}\n return Result(running, buffer, target)", + "span": [ + 3, + 8, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "expression_statement", + "text": "subtotal = price + tax", + "span": [ + 3, + 8, + 3, + 30 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "subtotal", + "span": [ + 3, + 8, + 3, + 16 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 3, + 17, + 3, + 18 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "price + tax", + "span": [ + 3, + 19, + 3, + 30 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "price", + "span": [ + 3, + 19, + 3, + 24 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 3, + 25, + 3, + 26 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "tax", + "span": [ + 3, + 27, + 3, + 30 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "running: int = subtotal", + "span": [ + 4, + 8, + 4, + 31 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 4, + 8, + 4, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 4, + 15, + 4, + 16 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type", + "text": "int", + "span": [ + 4, + 17, + 4, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 4, + 21, + 4, + 22 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "subtotal", + "span": [ + 4, + 23, + 4, + 31 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "annotated: int", + "span": [ + 5, + 8, + 5, + 22 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "annotated", + "span": [ + 5, + 8, + 5, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 5, + 17, + 5, + 18 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "type", + "text": "int", + "span": [ + 5, + 19, + 5, + 22 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "annotated = running", + "span": [ + 6, + 8, + 6, + 27 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "annotated", + "span": [ + 6, + 8, + 6, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 6, + 18, + 6, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "running", + "span": [ + 6, + 20, + 6, + 27 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "if_statement", + "text": "if (chosen := mapping.get(\"key\")):\n running = running + chosen", + "span": [ + 7, + 8, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "if", + "text": "if", + "span": [ + 7, + 8, + 7, + 10 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "parenthesized_expression", + "text": "(chosen := mapping.get(\"key\"))", + "span": [ + 7, + 11, + 7, + 41 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 7, + 11, + 7, + 12 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "named_expression", + "text": "chosen := mapping.get(\"key\")", + "span": [ + 7, + 12, + 7, + 40 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "chosen", + "span": [ + 7, + 12, + 7, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":=", + "text": ":=", + "span": [ + 7, + 19, + 7, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "mapping.get(\"key\")", + "span": [ + 7, + 22, + 7, + 40 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "attribute", + "text": "mapping.get", + "span": [ + 7, + 22, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "mapping", + "span": [ + 7, + 22, + 7, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 7, + 29, + 7, + 30 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "get", + "span": [ + 7, + 30, + 7, + 33 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "argument_list", + "text": "(\"key\")", + "span": [ + 7, + 33, + 7, + 40 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 7, + 33, + 7, + 34 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string", + "text": "\"key\"", + "span": [ + 7, + 34, + 7, + 39 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string_start", + "text": "\"", + "span": [ + 7, + 34, + 7, + 35 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_content", + "text": "key", + "span": [ + 7, + 35, + 7, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_end", + "text": "\"", + "span": [ + 7, + 38, + 7, + 39 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 7, + 39, + 7, + 40 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 7, + 40, + 7, + 41 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 7, + 41, + 7, + 42 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "running = running + chosen", + "span": [ + 8, + 12, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 8, + 12, + 8, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 8, + 20, + 8, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "running + chosen", + "span": [ + 8, + 22, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 8, + 22, + 8, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 8, + 30, + 8, + 31 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "chosen", + "span": [ + 8, + 32, + 8, + 38 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "for_statement", + "text": "for item in items:\n running = running + item", + "span": [ + 9, + 8, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "for", + "text": "for", + "span": [ + 9, + 8, + 9, + 11 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "item", + "span": [ + 9, + 12, + 9, + 16 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "in", + "text": "in", + "span": [ + 9, + 17, + 9, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "items", + "span": [ + 9, + 20, + 9, + 25 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 9, + 25, + 9, + 26 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "running = running + item", + "span": [ + 10, + 12, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 10, + 12, + 10, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 10, + 20, + 10, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "running + item", + "span": [ + 10, + 22, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 10, + 22, + 10, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 10, + 30, + 10, + 31 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "item", + "span": [ + 10, + 32, + 10, + 36 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "for_statement", + "text": "for other in items:\n running = running + other", + "span": [ + 11, + 8, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "for", + "text": "for", + "span": [ + 11, + 8, + 11, + 11 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "other", + "span": [ + 11, + 12, + 11, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "in", + "text": "in", + "span": [ + 11, + 18, + 11, + 20 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "items", + "span": [ + 11, + 21, + 11, + 26 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 11, + 26, + 11, + 27 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "running = running + other", + "span": [ + 12, + 12, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 12, + 12, + 12, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 12, + 20, + 12, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "binary_operator", + "text": "running + other", + "span": [ + 12, + 22, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "running", + "span": [ + 12, + 22, + 12, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "+", + "text": "+", + "span": [ + 12, + 30, + 12, + 31 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "other", + "span": [ + 12, + 32, + 12, + 37 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "with_statement", + "text": "with open(\"log\") as handle:\n buffer = handle.read()", + "span": [ + 13, + 8, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "with", + "text": "with", + "span": [ + 13, + 8, + 13, + 12 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "with_clause", + "text": "open(\"log\") as handle", + "span": [ + 13, + 13, + 13, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "open(\"log\")", + "span": [ + 13, + 13, + 13, + 24 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "open", + "span": [ + 13, + 13, + 13, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "argument_list", + "text": "(\"log\")", + "span": [ + 13, + 17, + 13, + 24 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 13, + 17, + 13, + 18 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string", + "text": "\"log\"", + "span": [ + 13, + 18, + 13, + 23 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string_start", + "text": "\"", + "span": [ + 13, + 18, + 13, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_content", + "text": "log", + "span": [ + 13, + 19, + 13, + 22 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_end", + "text": "\"", + "span": [ + 13, + 22, + 13, + 23 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 13, + 23, + 13, + 24 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "as", + "text": "as", + "span": [ + 13, + 25, + 13, + 27 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "as_pattern_target", + "text": "handle", + "span": [ + 13, + 28, + 13, + 34 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 13, + 34, + 13, + 35 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "block", + "text": "buffer = handle.read()", + "span": [ + 14, + 12, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "buffer", + "span": [ + 14, + 12, + 14, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 14, + 19, + 14, + 20 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "handle.read()", + "span": [ + 14, + 21, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "attribute", + "text": "handle.read", + "span": [ + 14, + 21, + 14, + 32 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "handle", + "span": [ + 14, + 21, + 14, + 27 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 14, + 27, + 14, + 28 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "read", + "span": [ + 14, + 28, + 14, + 32 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "argument_list", + "text": "()", + "span": [ + 14, + 32, + 14, + 34 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 14, + 32, + 14, + 33 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 14, + 33, + 14, + 34 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "target.slot = running", + "span": [ + 15, + 8, + 15, + 29 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "attribute", + "text": "target.slot", + "span": [ + 15, + 8, + 15, + 19 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "target", + "span": [ + 15, + 8, + 15, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 15, + 14, + 15, + 15 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "slot", + "span": [ + 15, + 15, + 15, + 19 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 15, + 20, + 15, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "running", + "span": [ + 15, + 22, + 15, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "target[price] = buffer", + "span": [ + 16, + 8, + 16, + 30 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "subscript", + "text": "target[price]", + "span": [ + 16, + 8, + 16, + 21 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "target", + "span": [ + 16, + 8, + 16, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "[", + "text": "[", + "span": [ + 16, + 14, + 16, + 15 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "price", + "span": [ + 16, + 15, + 16, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "]", + "text": "]", + "span": [ + 16, + 20, + 16, + 21 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 16, + 22, + 16, + 23 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "buffer", + "span": [ + 16, + 24, + 16, + 30 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "expression_statement", + "text": "mapping = {\"key\": buffer}", + "span": [ + 17, + 8, + 17, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "mapping", + "span": [ + 17, + 8, + 17, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "=", + "text": "=", + "span": [ + 17, + 16, + 17, + 17 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "dictionary", + "text": "{\"key\": buffer}", + "span": [ + 17, + 18, + 17, + 33 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "{", + "text": "{", + "span": [ + 17, + 18, + 17, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "pair", + "text": "\"key\": buffer", + "span": [ + 17, + 19, + 17, + 32 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string", + "text": "\"key\"", + "span": [ + 17, + 19, + 17, + 24 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "string_start", + "text": "\"", + "span": [ + 17, + 19, + 17, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_content", + "text": "key", + "span": [ + 17, + 20, + 17, + 23 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "string_end", + "text": "\"", + "span": [ + 17, + 23, + 17, + 24 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": ":", + "text": ":", + "span": [ + 17, + 24, + 17, + 25 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "buffer", + "span": [ + 17, + 26, + 17, + 32 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "}", + "text": "}", + "span": [ + 17, + 32, + 17, + 33 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "return_statement", + "text": "return Result(running, buffer, target)", + "span": [ + 18, + 8, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 18, + 8, + 18, + 14 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "Result(running, buffer, target)", + "span": [ + 18, + 15, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "Result", + "span": [ + 18, + 15, + 18, + 21 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "argument_list", + "text": "(running, buffer, target)", + "span": [ + 18, + 21, + 18, + 46 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "(", + "text": "(", + "span": [ + 18, + 21, + 18, + 22 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "running", + "span": [ + 18, + 22, + 18, + 29 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 18, + 29, + 18, + 30 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "buffer", + "span": [ + 18, + 31, + 18, + 37 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ",", + "text": ",", + "span": [ + 18, + 37, + 18, + 38 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "target", + "span": [ + 18, + 39, + 18, + 45 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ")", + "text": ")", + "span": [ + 18, + 45, + 18, + 46 + ], + "named": false, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + "self", + "price", + "tax", + "items", + "mapping", + "target" + ] + } + ] + } + ] + }, + "expected": [ + { + "boundaries": [ + + ], + "file": "facts/local_flow_python.py", + "id": "Worker#handle", + "line": 2, + "name": "handle", + "owner": "Worker", + "span": [ + 2, + 4, + 18, + 46 + ], + "statements": [ + { + "co_uses": [ + [ + "price", + "tax" + ] + ], + "dependencies": [ + [ + "subtotal", + "price" + ], + [ + "subtotal", + "tax" + ] + ], + "end_line": 3, + "index": 0, + "line": 3, + "reads": [ + "price", + "tax" + ], + "source": "subtotal = price + tax", + "span": [ + 3, + 8, + 3, + 30 + ], + "writes": [ + "subtotal" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + [ + "running", + "subtotal" + ] + ], + "end_line": 4, + "index": 1, + "line": 4, + "reads": [ + "subtotal" + ], + "source": "running: int = subtotal", + "span": [ + 4, + 8, + 4, + 31 + ], + "writes": [ + "running" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + + ], + "end_line": 5, + "index": 2, + "line": 5, + "reads": [ + + ], + "source": "annotated: int", + "span": [ + 5, + 8, + 5, + 22 + ], + "writes": [ + "annotated" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + [ + "annotated", + "running" + ] + ], + "end_line": 6, + "index": 3, + "line": 6, + "reads": [ + "running" + ], + "source": "annotated = running", + "span": [ + 6, + 8, + 6, + 27 + ], + "writes": [ + "annotated" + ] + }, + { + "co_uses": [ + [ + "chosen", + "mapping" + ], + [ + "chosen", + "running" + ], + [ + "mapping", + "running" + ] + ], + "dependencies": [ + [ + "chosen", + "mapping" + ], + [ + "running", + "mapping" + ] + ], + "end_line": 8, + "index": 4, + "line": 7, + "reads": [ + "chosen", + "mapping", + "running" + ], + "source": "if (chosen := mapping.get(\"key\")): running = running + chosen", + "span": [ + 7, + 8, + 8, + 38 + ], + "writes": [ + "chosen", + "running" + ] + }, + { + "co_uses": [ + [ + "item", + "items" + ], + [ + "item", + "running" + ], + [ + "items", + "running" + ] + ], + "dependencies": [ + [ + "item", + "items" + ], + [ + "running", + "items" + ] + ], + "end_line": 10, + "index": 5, + "line": 9, + "reads": [ + "item", + "items", + "running" + ], + "source": "for item in items: running = running + item", + "span": [ + 9, + 8, + 10, + 36 + ], + "writes": [ + "item", + "running" + ] + }, + { + "co_uses": [ + [ + "items", + "other" + ], + [ + "items", + "running" + ], + [ + "other", + "running" + ] + ], + "dependencies": [ + [ + "other", + "items" + ], + [ + "running", + "items" + ] + ], + "end_line": 12, + "index": 6, + "line": 11, + "reads": [ + "items", + "other", + "running" + ], + "source": "for other in items: running = running + other", + "span": [ + 11, + 8, + 12, + 37 + ], + "writes": [ + "other", + "running" + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + + ], + "end_line": 14, + "index": 7, + "line": 13, + "reads": [ + "handle" + ], + "source": "with open(\"log\") as handle: buffer = handle.read()", + "span": [ + 13, + 8, + 14, + 34 + ], + "writes": [ + "buffer", + "handle" + ] + }, + { + "co_uses": [ + [ + "running", + "target" + ] + ], + "dependencies": [ + + ], + "end_line": 15, + "index": 8, + "line": 15, + "reads": [ + "running", + "target" + ], + "source": "target.slot = running", + "span": [ + 15, + 8, + 15, + 29 + ], + "writes": [ + + ] + }, + { + "co_uses": [ + [ + "buffer", + "price" + ], + [ + "buffer", + "target" + ], + [ + "price", + "target" + ] + ], + "dependencies": [ + + ], + "end_line": 16, + "index": 9, + "line": 16, + "reads": [ + "buffer", + "price", + "target" + ], + "source": "target[price] = buffer", + "span": [ + 16, + 8, + 16, + 30 + ], + "writes": [ + + ] + }, + { + "co_uses": [ + + ], + "dependencies": [ + [ + "mapping", + "buffer" + ] + ], + "end_line": 17, + "index": 10, + "line": 17, + "reads": [ + "buffer" + ], + "source": "mapping = {\"key\": buffer}", + "span": [ + 17, + 8, + 17, + 33 + ], + "writes": [ + "mapping" + ] + }, + { + "co_uses": [ + [ + "buffer", + "running" + ], + [ + "buffer", + "target" + ], + [ + "running", + "target" + ] + ], + "dependencies": [ + + ], + "end_line": 18, + "index": 11, + "line": 18, + "reads": [ + "buffer", + "running", + "target" + ], + "source": "return Result(running, buffer, target)", + "span": [ + 18, + 8, + 18, + 46 + ], + "writes": [ + + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/locality-drag.json b/gems/decomplex/examples/facts/detectors/locality-drag.json new file mode 100644 index 000000000..e4b7a9c5e --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/locality-drag.json @@ -0,0 +1,260 @@ +{ + "detector": "locality-drag", + "input": { + "documents": [ + { + "file": "facts/locality.rb", + "language": "ruby", + "local_complexity_scores": { + "Fixture#assemble": { + "score": 18.0, + "signals": { + "branches": 2 + } + } + }, + "local_methods": [ + { + "id": "Fixture#assemble", + "owner": "Fixture", + "name": "assemble", + "file": "facts/locality.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 23 + ], + "source": "payload = build_payload", + "reads": [ + "build_payload" + ], + "writes": [ + "payload" + ], + "dependencies": [ + [ + "payload", + "build_payload" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 18 + ], + "source": "alpha = load_alpha", + "reads": [ + "load_alpha" + ], + "writes": [ + "alpha" + ], + "dependencies": [ + [ + "alpha", + "load_alpha" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 5, + "end_line": 5, + "span": [ + 5, + 0, + 5, + 16 + ], + "source": "beta = load_beta", + "reads": [ + "load_beta" + ], + "writes": [ + "beta" + ], + "dependencies": [ + [ + "beta", + "load_beta" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 18 + ], + "source": "gamma = load_gamma", + "reads": [ + "load_gamma" + ], + "writes": [ + "gamma" + ], + "dependencies": [ + [ + "gamma", + "load_gamma" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 4, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 18 + ], + "source": "delta = load_delta", + "reads": [ + "load_delta" + ], + "writes": [ + "delta" + ], + "dependencies": [ + [ + "delta", + "load_delta" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 5, + "line": 13, + "end_line": 13, + "span": [ + 13, + 0, + 13, + 16 + ], + "source": "consume(payload)", + "reads": [ + "payload" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + { + "before_index": 0, + "after_index": 5, + "line": 11, + "kind": "comment", + "text": "# phase 2" + } + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/locality.rb:assemble:1", + "boundaries": [ + { + "line": 11, + "marker": "# phase 2" + } + ], + "boundary_crossings": 1, + "defined_at": 1, + "definition_deps": [ + "build_payload" + ], + "defn": "assemble", + "examples": [ + { + "line": 3, + "source": "alpha = load_alpha" + }, + { + "line": 5, + "source": "beta = load_beta" + }, + { + "line": 7, + "source": "gamma = load_gamma" + } + ], + "file": "facts/locality.rb", + "gap_lines": 12, + "gap_statements": 4, + "line": 1, + "local_complexity": 18.0, + "method": "assemble", + "owner": "Fixture", + "reason": "`payload` is initialized 12 line(s) before first use; 4 unrelated intervening statement(s); 1 structural boundary crossing(s); method local complexity 18.0", + "related_statements": 0, + "score": 63, + "setup_statements": 0, + "spans": { + "facts/locality.rb:assemble:1": [ + 1, + 0, + 20, + 3 + ] + }, + "unrelated_statements": 4, + "use_reads": [ + "payload" + ], + "used_at": 13, + "variable": "payload" + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/operational-discontinuity.json b/gems/decomplex/examples/facts/detectors/operational-discontinuity.json new file mode 100644 index 000000000..b3e9a0f49 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/operational-discontinuity.json @@ -0,0 +1,245 @@ +{ + "detector": "operational-discontinuity", + "input": { + "documents": [ + { + "file": "facts/operational.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#process", + "owner": "Fixture", + "name": "process", + "file": "facts/operational.rb", + "line": 1, + "span": [ + 1, + 0, + 20, + 3 + ], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [ + 1, + 0, + 1, + 9 + ], + "source": "a = first", + "reads": [ + "first" + ], + "writes": [ + "a" + ], + "dependencies": [ + [ + "a", + "first" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [ + 2, + 0, + 2, + 10 + ], + "source": "b = second", + "reads": [ + "second" + ], + "writes": [ + "b" + ], + "dependencies": [ + [ + "b", + "second" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 2, + "line": 3, + "end_line": 3, + "span": [ + 3, + 0, + 3, + 9 + ], + "source": "use(a, b)", + "reads": [ + "a", + "b" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "index": 3, + "line": 7, + "end_line": 7, + "span": [ + 7, + 0, + 7, + 9 + ], + "source": "c = third", + "reads": [ + "third" + ], + "writes": [ + "c" + ], + "dependencies": [ + [ + "c", + "third" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 4, + "line": 8, + "end_line": 8, + "span": [ + 8, + 0, + 8, + 10 + ], + "source": "d = fourth", + "reads": [ + "fourth" + ], + "writes": [ + "d" + ], + "dependencies": [ + [ + "d", + "fourth" + ] + ], + "co_uses": [ + + ] + }, + { + "index": 5, + "line": 9, + "end_line": 9, + "span": [ + 9, + 0, + 9, + 9 + ], + "source": "use(c, d)", + "reads": [ + "c", + "d" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + { + "before_index": 2, + "after_index": 3, + "line": 6, + "kind": "comment", + "text": "# phase 2" + } + ] + } + ] + } + ] + }, + "expected": [ + { + "at": "facts/operational.rb:process:1", + "confidence": "high", + "confidence_reasons": [ + "explicit_phase_marker" + ], + "dead_total": 4, + "defn": "process", + "file": "facts/operational.rb", + "line": 1, + "method": "process", + "new_total": 4, + "owner": "Fixture", + "reset_points": [ + { + "after_statement": 3, + "before_statement": 2, + "continuing": [ + + ], + "dead": [ + "a", + "b", + "first", + "second" + ], + "kind": "comment", + "line": 6, + "new": [ + "c", + "d", + "fourth", + "third" + ], + "text": "# phase 2" + } + ], + "resets": 1, + "score": 16, + "spans": { + "facts/operational.rb:process:1": [ + 1, + 0, + 20, + 3 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/path-condition-derived.json b/gems/decomplex/examples/facts/detectors/path-condition-derived.json new file mode 100644 index 000000000..204612d34 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/path-condition-derived.json @@ -0,0 +1,444 @@ +{ + "detector": "path-condition", + "input": { + "documents": [ + { + "file": "facts/path_derived.rb", + "language": "ruby", + "lines": [ + "def paths", + " if a", + " if b", + " if c", + " commit_one", + " end", + " if c", + " commit_two", + " end", + " if c", + " commit_three", + " end", + " commit_four", + " end", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/path_derived.rb", + "name": "paths", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 16, + 3 + ], + "body": { + "kind": "method", + "text": "def paths", + "span": [ + 1, + 0, + 16, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "paths", + "span": [ + 1, + 4, + 1, + 9 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 15, + 5 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": " if a\n if b\n if c\n commit_one\n end\n if c\n commit_two\n end\n if c\n commit_three\n end\n commit_four\n end\n end", + "span": [ + 2, + 2, + 15, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "a", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "", + "span": [ + 3, + 4, + 14, + 7 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "if", + "text": " if b\n if c\n commit_one\n end\n if c\n commit_two\n end\n if c\n commit_three\n end\n commit_four\n end", + "span": [ + 3, + 4, + 14, + 7 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 3, + 7, + 3, + 8 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "", + "span": [ + 4, + 6, + 13, + 17 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "if", + "text": "if c\n commit_one\nend", + "span": [ + 4, + 6, + 6, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "c", + "span": [ + 4, + 9, + 4, + 10 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "commit_one", + "span": [ + 5, + 8, + 5, + 18 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit_one", + "span": [ + 5, + 8, + 5, + 18 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_one", + "span": [ + 5, + 8, + 5, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "if c\n commit_two\nend", + "span": [ + 7, + 6, + 9, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "c", + "span": [ + 7, + 9, + 7, + 10 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "commit_two", + "span": [ + 8, + 8, + 8, + 18 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit_two", + "span": [ + 8, + 8, + 8, + 18 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_two", + "span": [ + 8, + 8, + 8, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "if c\n commit_three\nend", + "span": [ + 10, + 6, + 12, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "c", + "span": [ + 10, + 9, + 10, + 10 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "then", + "text": "commit_three", + "span": [ + 11, + 8, + 11, + 20 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit_three", + "span": [ + 11, + 8, + 11, + 20 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_three", + "span": [ + 11, + 8, + 11, + 20 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "call", + "text": "commit_four", + "span": [ + 13, + 6, + 13, + 17 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit_four", + "span": [ + 13, + 6, + 13, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ] + } + ] + }, + "expected": { + "neglected": [ + { + "action": "commit_four", + "at": "facts/path_derived.rb:paths:13", + "missing": "c", + "pattern": [ + "a", + "b", + "c" + ], + "spans": { + "facts/path_derived.rb:paths:13": [ + 13, + 6, + 13, + 17 + ] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/path-condition-raw.json b/gems/decomplex/examples/facts/detectors/path-condition-raw.json new file mode 100644 index 000000000..350369ec4 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/path-condition-raw.json @@ -0,0 +1,274 @@ +{ + "detector": "path-condition", + "input": { + "documents": [ + { + "file": "facts/path_raw.rb", + "language": "ruby", + "function_defs": [ + { + "file": "facts/path_raw.rb", + "name": "one", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 5, + 3 + ], + "body": { + "kind": "method", + "text": "def one", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "one", + "span": [ + 1, + 0, + 1, + 3 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": "if a\n if b\n commit\n end\nend", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "a", + "span": [ + 1, + 3, + 1, + 4 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "if b", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "if", + "text": "if b\n commit\nend", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": "condition", + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "commit", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": "consequence", + "children": [ + { + "kind": "call", + "text": "commit", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "commit", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "path_condition_sites": [ + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "one", + "line": 3, + "span": [ + 3, + 4, + 3, + 10 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "two", + "line": 6, + "span": [ + 6, + 0, + 6, + 6 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "three", + "line": 7, + "span": [ + 7, + 0, + 7, + 6 + ] + }, + { + "guards": [ + "a" + ], + "action": "commit", + "file": "facts/path_raw.rb", + "function": "four", + "line": 8, + "span": [ + 8, + 0, + 8, + 6 + ] + } + ] + } + ] + }, + "expected": { + "neglected": [ + { + "action": "commit", + "at": "facts/path_raw.rb:four:8", + "missing": "b", + "pattern": [ + "a", + "b" + ], + "spans": { + "facts/path_raw.rb:four:8": [ + 8, + 0, + 8, + 6 + ] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/path-condition.json b/gems/decomplex/examples/facts/detectors/path-condition.json new file mode 100644 index 000000000..4ab29bd07 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/path-condition.json @@ -0,0 +1,98 @@ +{ + "detector": "path-condition", + "input": { + "documents": [ + { + "file": "facts/path.rb", + "language": "ruby", + "path_condition_sites": [ + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "one", + "line": 1, + "span": [ + 1, + 0, + 1, + 6 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "two", + "line": 2, + "span": [ + 2, + 0, + 2, + 6 + ] + }, + { + "guards": [ + "a", + "b" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "three", + "line": 3, + "span": [ + 3, + 0, + 3, + 6 + ] + }, + { + "guards": [ + "a" + ], + "action": "commit", + "file": "facts/path.rb", + "function": "four", + "line": 4, + "span": [ + 4, + 0, + 4, + 6 + ] + } + ] + } + ] + }, + "expected": { + "neglected": [ + { + "action": "commit", + "at": "facts/path.rb:four:4", + "missing": "b", + "pattern": [ + "a", + "b" + ], + "spans": { + "facts/path.rb:four:4": [ + 4, + 0, + 4, + 6 + ] + }, + "support": 3 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/redundant-nil-guard-facts.json b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-facts.json new file mode 100644 index 000000000..1e80a6a15 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-facts.json @@ -0,0 +1,402 @@ +{ + "detector": "redundant-nil-guard", + "input": { + "documents": [ + { + "file": "facts/nil_guard.rb", + "language": "ruby", + "lines": [ + "def check", + " if x != nil", + " x.nil?", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/nil_guard.rb", + "name": "check", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 5, + 3 + ], + "body": { + "kind": "method", + "text": "def check", + "span": [ + 1, + 0, + 5, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "check", + "span": [ + 1, + 4, + 1, + 9 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": "if x != nil\n x.nil?\nend", + "span": [ + 2, + 2, + 4, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "x != nil", + "span": [ + 2, + 5, + 2, + 13 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 2, + 7, + 2, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 2, + 10, + 2, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "x.nil?", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "x.nil?", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 3, + 4, + 3, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 3, + 6, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "normalized_root": { + "type": "ROOT", + "children": [ + { + "Node": { + "type": "DEFN", + "children": [ + { + "Symbol": "check" + }, + { + "Node": { + "type": "SCOPE", + "children": [ + { + "Nil": null + }, + { + "Nil": null + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 6, + "text": "x" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 13, + "text": "x != nil" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 5, + "text": "x" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 10, + "text": "x.nil?" + } + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 10, + "text": "x.nil?" + } + }, + { + "Nil": null + } + ], + "first_lineno": 2, + "first_column": 2, + "last_lineno": 4, + "last_column": 5, + "text": "if x != nil\n x.nil?\nend" + } + } + ], + "first_lineno": 2, + "first_column": 2, + "last_lineno": 4, + "last_column": 5, + "text": "if x != nil\n x.nil?\nend" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 5, + "last_column": 3, + "text": "def check" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 5, + "last_column": 3, + "text": "def check" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 5, + "last_column": 3, + "text": "def check" + } + } + ] + }, + "expected": [ + { + "at": "facts/nil_guard.rb:check:3", + "defn": "check", + "file": "facts/nil_guard.rb", + "guard": "x.nil?", + "line": 3, + "local": "x", + "proof": "x is already proven non-nil on this path", + "span": [ + 3, + 4, + 3, + 10 + ], + "spans": { + "facts/nil_guard.rb:check:3": [ + 3, + 4, + 3, + 10 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/redundant-nil-guard-rich.json b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-rich.json new file mode 100644 index 000000000..baf8387f3 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/redundant-nil-guard-rich.json @@ -0,0 +1,2209 @@ +{ + "detector": "redundant-nil-guard", + "input": { + "documents": [ + { + "file": "facts/nil_guard_rich.rb", + "language": "ruby", + "lines": [ + "def rich", + " if x != nil && y.present?", + " x.nil?", + " y.present?", + " x&.foo", + " end", + " if z.nil?", + " return", + " else", + " z.nil?", + " end", + " unless w.nil?", + " w.nil?", + " end", + " if obj&.ready", + " obj&.name", + " end", + " if b != nil", + " b.nil?", + " else", + " return", + " end", + " if a != nil", + " return", + " else", + " abort", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/nil_guard_rich.rb", + "name": "rich", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 28, + 3 + ], + "body": { + "kind": "method", + "text": "def rich", + "span": [ + 1, + 0, + 28, + 3 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "rich", + "span": [ + 1, + 4, + 1, + 8 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 27, + 5 + ], + "named": true, + "field_name": "body", + "children": [ + { + "kind": "if", + "text": "", + "span": [ + 2, + 2, + 6, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "x != nil && y.present?", + "span": [ + 2, + 5, + 2, + 27 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "x != nil", + "span": [ + 2, + 5, + 2, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 2, + 5, + 2, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 2, + 7, + 2, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 2, + 10, + 2, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "&&", + "text": "&&", + "span": [ + 2, + 14, + 2, + 16 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "call", + "text": "y.present?", + "span": [ + 2, + 17, + 2, + 27 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "y", + "span": [ + 2, + 17, + 2, + 18 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 2, + 18, + 2, + 19 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "present?", + "span": [ + 2, + 19, + 2, + 27 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "then", + "text": "x.nil?\ny.present?\nx&.foo", + "span": [ + 3, + 4, + 5, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "x.nil?", + "span": [ + 3, + 4, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 3, + 4, + 3, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 3, + 5, + 3, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 3, + 6, + 3, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "call", + "text": "y.present?", + "span": [ + 4, + 4, + 4, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "y", + "span": [ + 4, + 4, + 4, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 4, + 5, + 4, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "present?", + "span": [ + 4, + 6, + 4, + 14 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "call", + "text": "x&.foo", + "span": [ + 5, + 4, + 5, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "x", + "span": [ + 5, + 4, + 5, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "&.", + "text": "&.", + "span": [ + 5, + 5, + 5, + 7 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "foo", + "span": [ + 5, + 7, + 5, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 7, + 2, + 11, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "z.nil?", + "span": [ + 7, + 5, + 7, + 11 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "z", + "span": [ + 7, + 5, + 7, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 7, + 6, + 7, + 7 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 7, + 7, + 7, + 11 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "return", + "span": [ + 8, + 4, + 8, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 8, + 4, + 8, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "else", + "text": "z.nil?", + "span": [ + 10, + 4, + 10, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "z.nil?", + "span": [ + 10, + 4, + 10, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "z", + "span": [ + 10, + 4, + 10, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 10, + 5, + 10, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 10, + 6, + 10, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "unless", + "text": "", + "span": [ + 12, + 2, + 14, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "w.nil?", + "span": [ + 12, + 9, + 12, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "w", + "span": [ + 12, + 9, + 12, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 12, + 10, + 12, + 11 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 12, + 11, + 12, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "w.nil?", + "span": [ + 13, + 4, + 13, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "w.nil?", + "span": [ + 13, + 4, + 13, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "w", + "span": [ + 13, + 4, + 13, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 13, + 5, + 13, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 13, + 6, + 13, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 15, + 2, + 17, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "obj&.ready", + "span": [ + 15, + 5, + 15, + 15 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "obj", + "span": [ + 15, + 5, + 15, + 8 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "&.", + "text": "&.", + "span": [ + 15, + 8, + 15, + 10 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "ready", + "span": [ + 15, + 10, + 15, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "obj&.name", + "span": [ + 16, + 4, + 16, + 13 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "obj&.name", + "span": [ + 16, + 4, + 16, + 13 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "obj", + "span": [ + 16, + 4, + 16, + 7 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "&.", + "text": "&.", + "span": [ + 16, + 7, + 16, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "name", + "span": [ + 16, + 9, + 16, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 18, + 2, + 22, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "b != nil", + "span": [ + 18, + 5, + 18, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 18, + 5, + 18, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 18, + 7, + 18, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 18, + 10, + 18, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "b.nil?", + "span": [ + 19, + 4, + 19, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "b.nil?", + "span": [ + 19, + 4, + 19, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "b", + "span": [ + 19, + 4, + 19, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": ".", + "text": ".", + "span": [ + 19, + 5, + 19, + 6 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "identifier", + "text": "nil?", + "span": [ + 19, + 6, + 19, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "else", + "text": "return", + "span": [ + 21, + 4, + 21, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 21, + 4, + 21, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + }, + { + "kind": "if", + "text": "", + "span": [ + 23, + 2, + 27, + 5 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "binary", + "text": "a != nil", + "span": [ + 23, + 5, + 23, + 14 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "a", + "span": [ + 23, + 5, + 23, + 6 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "!=", + "text": "!=", + "span": [ + 23, + 7, + 23, + 9 + ], + "named": false, + "field_name": null, + "children": [ + + ] + }, + { + "kind": "nil", + "text": "nil", + "span": [ + 23, + 10, + 23, + 13 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "then", + "text": "return", + "span": [ + 24, + 4, + 24, + 10 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "return", + "text": "return", + "span": [ + 24, + 4, + 24, + 10 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + }, + { + "kind": "else", + "text": "abort", + "span": [ + 26, + 4, + 26, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "call", + "text": "abort", + "span": [ + 26, + 4, + 26, + 9 + ], + "named": true, + "field_name": null, + "children": [ + { + "kind": "identifier", + "text": "abort", + "span": [ + 26, + 4, + 26, + 9 + ], + "named": true, + "field_name": null, + "children": [ + + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "normalized_root": { + "type": "ROOT", + "children": [ + { + "Node": { + "type": "DEFN", + "children": [ + { + "Symbol": "rich" + }, + { + "Node": { + "type": "SCOPE", + "children": [ + { + "Nil": null + }, + { + "Nil": null + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "AND", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 6, + "text": "x" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 10, + "last_lineno": 2, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 14, + "text": "x != nil" + } + }, + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "y" + } + ], + "first_lineno": 2, + "first_column": 17, + "last_lineno": 2, + "last_column": 18, + "text": "y" + } + }, + { + "Symbol": "present?" + } + ], + "first_lineno": 2, + "first_column": 17, + "last_lineno": 2, + "last_column": 27, + "text": "y.present?" + } + } + ], + "first_lineno": 2, + "first_column": 5, + "last_lineno": 2, + "last_column": 27, + "text": "x != nil && y.present?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 5, + "text": "x" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 3, + "last_column": 10, + "text": "x.nil?" + } + }, + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "y" + } + ], + "first_lineno": 4, + "first_column": 4, + "last_lineno": 4, + "last_column": 5, + "text": "y" + } + }, + { + "Symbol": "present?" + } + ], + "first_lineno": 4, + "first_column": 4, + "last_lineno": 4, + "last_column": 14, + "text": "y.present?" + } + }, + { + "Node": { + "type": "QCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "x" + } + ], + "first_lineno": 5, + "first_column": 4, + "last_lineno": 5, + "last_column": 5, + "text": "x" + } + }, + { + "Symbol": "foo" + } + ], + "first_lineno": 5, + "first_column": 4, + "last_lineno": 5, + "last_column": 10, + "text": "x&.foo" + } + } + ], + "first_lineno": 3, + "first_column": 4, + "last_lineno": 5, + "last_column": 10, + "text": "x.nil?\ny.present?\nx&.foo" + } + }, + { + "Nil": null + } + ], + "first_lineno": 2, + "first_column": 2, + "last_lineno": 6, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "z" + } + ], + "first_lineno": 7, + "first_column": 5, + "last_lineno": 7, + "last_column": 6, + "text": "z" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 7, + "first_column": 5, + "last_lineno": 7, + "last_column": 11, + "text": "z.nil?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "RETURN", + "children": [ + + ], + "first_lineno": 8, + "first_column": 4, + "last_lineno": 8, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 8, + "first_column": 4, + "last_lineno": 8, + "last_column": 10, + "text": "return" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "z" + } + ], + "first_lineno": 10, + "first_column": 4, + "last_lineno": 10, + "last_column": 5, + "text": "z" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 10, + "first_column": 4, + "last_lineno": 10, + "last_column": 10, + "text": "z.nil?" + } + } + ], + "first_lineno": 10, + "first_column": 4, + "last_lineno": 10, + "last_column": 10, + "text": "z.nil?" + } + } + ], + "first_lineno": 7, + "first_column": 2, + "last_lineno": 11, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "UNLESS", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "w" + } + ], + "first_lineno": 12, + "first_column": 9, + "last_lineno": 12, + "last_column": 10, + "text": "w" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 12, + "first_column": 9, + "last_lineno": 12, + "last_column": 15, + "text": "w.nil?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "w" + } + ], + "first_lineno": 13, + "first_column": 4, + "last_lineno": 13, + "last_column": 5, + "text": "w" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 13, + "first_column": 4, + "last_lineno": 13, + "last_column": 10, + "text": "w.nil?" + } + } + ], + "first_lineno": 13, + "first_column": 4, + "last_lineno": 13, + "last_column": 10, + "text": "w.nil?" + } + }, + { + "Nil": null + } + ], + "first_lineno": 12, + "first_column": 2, + "last_lineno": 14, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "QCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "obj" + } + ], + "first_lineno": 15, + "first_column": 5, + "last_lineno": 15, + "last_column": 8, + "text": "obj" + } + }, + { + "Symbol": "ready" + } + ], + "first_lineno": 15, + "first_column": 5, + "last_lineno": 15, + "last_column": 15, + "text": "obj&.ready" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "QCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "obj" + } + ], + "first_lineno": 16, + "first_column": 4, + "last_lineno": 16, + "last_column": 7, + "text": "obj" + } + }, + { + "Symbol": "name" + } + ], + "first_lineno": 16, + "first_column": 4, + "last_lineno": 16, + "last_column": 13, + "text": "obj&.name" + } + } + ], + "first_lineno": 16, + "first_column": 4, + "last_lineno": 16, + "last_column": 13, + "text": "obj&.name" + } + }, + { + "Nil": null + } + ], + "first_lineno": 15, + "first_column": 2, + "last_lineno": 17, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "b" + } + ], + "first_lineno": 18, + "first_column": 5, + "last_lineno": 18, + "last_column": 6, + "text": "b" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 18, + "first_column": 10, + "last_lineno": 18, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 18, + "first_column": 10, + "last_lineno": 18, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 18, + "first_column": 5, + "last_lineno": 18, + "last_column": 14, + "text": "b != nil" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "CALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "b" + } + ], + "first_lineno": 19, + "first_column": 4, + "last_lineno": 19, + "last_column": 5, + "text": "b" + } + }, + { + "Symbol": "nil?" + } + ], + "first_lineno": 19, + "first_column": 4, + "last_lineno": 19, + "last_column": 10, + "text": "b.nil?" + } + } + ], + "first_lineno": 19, + "first_column": 4, + "last_lineno": 19, + "last_column": 10, + "text": "b.nil?" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "RETURN", + "children": [ + + ], + "first_lineno": 21, + "first_column": 4, + "last_lineno": 21, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 21, + "first_column": 4, + "last_lineno": 21, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 18, + "first_column": 2, + "last_lineno": 22, + "last_column": 5, + "text": "" + } + }, + { + "Node": { + "type": "IF", + "children": [ + { + "Node": { + "type": "OPCALL", + "children": [ + { + "Node": { + "type": "LVAR", + "children": [ + { + "String": "a" + } + ], + "first_lineno": 23, + "first_column": 5, + "last_lineno": 23, + "last_column": 6, + "text": "a" + } + }, + { + "Symbol": "!=" + }, + { + "Node": { + "type": "LIST", + "children": [ + { + "Node": { + "type": "NIL", + "children": [ + + ], + "first_lineno": 23, + "first_column": 10, + "last_lineno": 23, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 23, + "first_column": 10, + "last_lineno": 23, + "last_column": 13, + "text": "nil" + } + } + ], + "first_lineno": 23, + "first_column": 5, + "last_lineno": 23, + "last_column": 14, + "text": "a != nil" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "RETURN", + "children": [ + + ], + "first_lineno": 24, + "first_column": 4, + "last_lineno": 24, + "last_column": 10, + "text": "return" + } + } + ], + "first_lineno": 24, + "first_column": 4, + "last_lineno": 24, + "last_column": 10, + "text": "return" + } + }, + { + "Node": { + "type": "BLOCK", + "children": [ + { + "Node": { + "type": "VCALL", + "children": [ + { + "Symbol": "abort" + } + ], + "first_lineno": 26, + "first_column": 4, + "last_lineno": 26, + "last_column": 9, + "text": "abort" + } + } + ], + "first_lineno": 26, + "first_column": 4, + "last_lineno": 26, + "last_column": 9, + "text": "abort" + } + } + ], + "first_lineno": 23, + "first_column": 2, + "last_lineno": 27, + "last_column": 5, + "text": "" + } + } + ], + "first_lineno": 2, + "first_column": 4, + "last_lineno": 27, + "last_column": 5, + "text": "\n\n\n\n\n" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 28, + "last_column": 3, + "text": "def rich" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 28, + "last_column": 3, + "text": "def rich" + } + } + ], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 28, + "last_column": 3, + "text": "def rich" + } + } + ] + }, + "expected": [ + { + "at": "facts/nil_guard_rich.rb:rich:3", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "x.nil?", + "line": 3, + "local": "x", + "proof": "x is already proven non-nil on this path", + "span": [ + 3, + 4, + 3, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:3": [ + 3, + 4, + 3, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:4", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "y.present?", + "line": 4, + "local": "y", + "proof": "y is already proven non-nil on this path", + "span": [ + 4, + 4, + 4, + 14 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:4": [ + 4, + 4, + 4, + 14 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:5", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "x&.foo", + "line": 5, + "local": "x", + "proof": "x is already proven non-nil on this path", + "span": [ + 5, + 4, + 5, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:5": [ + 5, + 4, + 5, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:10", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "z.nil?", + "line": 10, + "local": "z", + "proof": "z is already proven non-nil on this path", + "span": [ + 10, + 4, + 10, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:10": [ + 10, + 4, + 10, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:13", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "w.nil?", + "line": 13, + "local": "w", + "proof": "w is already proven non-nil on this path", + "span": [ + 13, + 4, + 13, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:13": [ + 13, + 4, + 13, + 10 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:16", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "obj&.name", + "line": 16, + "local": "obj", + "proof": "obj is already proven non-nil on this path", + "span": [ + 16, + 4, + 16, + 13 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:16": [ + 16, + 4, + 16, + 13 + ] + } + }, + { + "at": "facts/nil_guard_rich.rb:rich:19", + "defn": "rich", + "file": "facts/nil_guard_rich.rb", + "guard": "b.nil?", + "line": 19, + "local": "b", + "proof": "b is already proven non-nil on this path", + "span": [ + 19, + 4, + 19, + 10 + ], + "spans": { + "facts/nil_guard_rich.rb:rich:19": [ + 19, + 4, + 19, + 10 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/sequence-mine-nested.json b/gems/decomplex/examples/facts/detectors/sequence-mine-nested.json new file mode 100644 index 000000000..2c8e742e9 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/sequence-mine-nested.json @@ -0,0 +1,193 @@ +{ + "detector": "sequence-mine", + "input": { + "documents": [ + { + "file": "facts/sequence_nested.rb", + "language": "ruby", + "lines": [ + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire }", + "sig { release }" + ], + "call_sites": [ + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_0", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 1, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_1", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 0, + 2, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_2", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 0, + 3, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "paired_3", + "owner": "Fixture", + "line": 4, + "span": [ + 4, + 0, + 4, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "missing_release", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 0, + 5, + 15 + ], + "conditional": false, + "arguments": [ + "acquire" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_nested.rb", + "function": "missing_acquire", + "owner": "Fixture", + "line": 6, + "span": [ + 6, + 0, + 6, + 15 + ], + "conditional": false, + "arguments": [ + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "broken": [ + { + "at": "facts/sequence_nested.rb:missing_release:5", + "confidence": 0.8, + "has": "acquire", + "missing": "release", + "pair": [ + "acquire", + "release" + ], + "spans": { + "facts/sequence_nested.rb:missing_release:5": [ + 5, + 0, + 5, + 15 + ] + }, + "support": 4 + }, + { + "at": "facts/sequence_nested.rb:missing_acquire:6", + "confidence": 0.8, + "has": "release", + "missing": "acquire", + "pair": [ + "acquire", + "release" + ], + "spans": { + "facts/sequence_nested.rb:missing_acquire:6": [ + 6, + 0, + 6, + 15 + ] + }, + "support": 4 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/sequence-mine-rich.json b/gems/decomplex/examples/facts/detectors/sequence-mine-rich.json new file mode 100644 index 000000000..7bf5c4d21 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/sequence-mine-rich.json @@ -0,0 +1,220 @@ +{ + "detector": "sequence-mine", + "input": { + "documents": [ + { + "file": "facts/sequence_rich.rb", + "language": "ruby", + "lines": [ + "sig {", + " acquire", + " release", + "}", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire release }", + "sig { acquire }", + "sig { release }", + "sig { Acquire release }", + "sig { acquire release$ }" + ], + "call_sites": [ + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_multiline", + "owner": "Fixture", + "line": 1, + "span": [ + 1, + 0, + 4, + 1 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_1", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 0, + 5, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_2", + "owner": "Fixture", + "line": 6, + "span": [ + 6, + 0, + 6, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "paired_3", + "owner": "Fixture", + "line": 7, + "span": [ + 7, + 0, + 7, + 23 + ], + "conditional": false, + "arguments": [ + "acquire", + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "missing_release", + "owner": "Fixture", + "line": 8, + "span": [ + 8, + 0, + 8, + 15 + ], + "conditional": false, + "arguments": [ + "acquire" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "missing_acquire", + "owner": "Fixture", + "line": 9, + "span": [ + 9, + 0, + 9, + 15 + ], + "conditional": false, + "arguments": [ + "release" + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "ignored_caps", + "owner": "Fixture", + "line": 10, + "span": [ + 10, + 0, + 10, + 23 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "", + "message": "sig", + "file": "facts/sequence_rich.rb", + "function": "ignored_symbol", + "owner": "Fixture", + "line": 11, + "span": [ + 11, + 0, + 11, + 24 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "broken": [ + { + "at": "facts/sequence_rich.rb:missing_release:8", + "confidence": 0.83, + "has": "acquire", + "missing": "release", + "pair": [ + "acquire", + "release" + ], + "spans": { + "facts/sequence_rich.rb:missing_release:8": [ + 8, + 0, + 8, + 15 + ] + }, + "support": 5 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/sequence-mine.json b/gems/decomplex/examples/facts/detectors/sequence-mine.json new file mode 100644 index 000000000..197cf03fc --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/sequence-mine.json @@ -0,0 +1,79 @@ +{ + "detector": "sequence-mine", + "input": { + "documents": [ + { + "file": "facts/sequence.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": { + "kind": "program", + "text": "", + "span": [1, 0, 1, 0], + "named": true, + "field_name": null, + "children": [] + }, + "normalized_root": { + "type": "ROOT", + "children": [], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 1, + "last_column": 0, + "text": "" + }, + "function_defs": [], + "owner_defs": [], + "call_sites": [ + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_0", "owner": "", "line": 1, "span": [1, 0, 1, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_0", "owner": "", "line": 2, "span": [2, 0, 2, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_1", "owner": "", "line": 3, "span": [3, 0, 3, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_1", "owner": "", "line": 4, "span": [4, 0, 4, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_2", "owner": "", "line": 5, "span": [5, 0, 5, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_2", "owner": "", "line": 6, "span": [6, 0, 6, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "paired_3", "owner": "", "line": 7, "span": [7, 0, 7, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "paired_3", "owner": "", "line": 8, "span": [8, 0, 8, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "acquire", "file": "facts/sequence.rb", "function": "missing_release", "owner": "", "line": 9, "span": [9, 0, 9, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "", "message": "release", "file": "facts/sequence.rb", "function": "missing_acquire", "owner": "", "line": 10, "span": [10, 0, 10, 7], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false} + ], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [], + "dispatch_sites": [], + "semantic_effect_sites": [], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": { + "broken": [ + { + "at": "facts/sequence.rb:missing_release:9", + "confidence": 0.8, + "has": "acquire", + "missing": "release", + "pair": ["acquire", "release"], + "spans": { + "facts/sequence.rb:missing_release:9": [9, 0, 9, 7] + }, + "support": 4 + }, + { + "at": "facts/sequence.rb:missing_acquire:10", + "confidence": 0.8, + "has": "release", + "missing": "acquire", + "pair": ["acquire", "release"], + "spans": { + "facts/sequence.rb:missing_acquire:10": [10, 0, 10, 7] + }, + "support": 4 + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/state-branch-density.json b/gems/decomplex/examples/facts/detectors/state-branch-density.json new file mode 100644 index 000000000..a1e4072f9 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/state-branch-density.json @@ -0,0 +1,86 @@ +{ + "detector": "state-branch-density", + "input": { + "documents": [ + { + "file": "facts/state_branch.rb", + "language": "ruby", + "source": "", + "lines": [], + "root": { + "kind": "program", + "text": "", + "span": [1, 0, 1, 0], + "named": true, + "field_name": null, + "children": [] + }, + "normalized_root": { + "type": "ROOT", + "children": [], + "first_lineno": 1, + "first_column": 0, + "last_lineno": 1, + "last_column": 0, + "text": "" + }, + "function_defs": [], + "owner_defs": [], + "call_sites": [], + "state_reads": [], + "state_writes": [], + "decision_sites": [], + "branch_decisions": [ + { + "file": "facts/state_branch.rb", + "function": "check", + "line": 1, + "span": [1, 0, 5, 3], + "predicate": "if ready", + "state_refs": ["ready"] + }, + { + "file": "facts/state_branch.rb", + "function": "check", + "line": 2, + "span": [2, 2, 2, 20], + "predicate": "ready?", + "state_refs": ["ready"] + }, + { + "file": "facts/state_branch.rb", + "function": "check", + "line": 6, + "span": [6, 0, 6, 20], + "predicate": "unless stale", + "state_refs": ["stale"] + } + ], + "dispatch_sites": [], + "semantic_effect_sites": [], + "local_complexity_scores": {}, + "predicate_aliases": [], + "comparison_uses": [] + } + ] + }, + "expected": [ + { + "at": "facts/state_branch.rb:check:2", + "decisions": 2, + "file": "facts/state_branch.rb", + "method": "check", + "predicate": "ready?", + "score": 4, + "sites": [ + "facts/state_branch.rb:check:2", + "facts/state_branch.rb:check:6" + ], + "spans": { + "facts/state_branch.rb:check:2": [2, 2, 2, 20], + "facts/state_branch.rb:check:6": [6, 0, 6, 20] + }, + "state_refs": ["ready", "stale"] + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/state-mesh-rich.json b/gems/decomplex/examples/facts/detectors/state-mesh-rich.json new file mode 100644 index 000000000..2263f19e5 --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/state-mesh-rich.json @@ -0,0 +1,408 @@ +{ + "detector": "state-mesh", + "input": { + "documents": [ + { + "file": "facts/state_mesh_rich.rb", + "language": "ruby", + "state_writes": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "first", + "line": 1, + "span": [ + 1, + 0, + 1, + 11 + ], + "owner": "Fixture" + }, + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "second", + "line": 2, + "span": [ + 2, + 0, + 2, + 11 + ], + "owner": "Fixture" + }, + { + "field": "stale", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "third", + "line": 3, + "span": [ + 3, + 0, + 3, + 11 + ], + "owner": "Fixture" + }, + { + "field": "stale", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "fourth", + "line": 4, + "span": [ + 4, + 0, + 4, + 11 + ], + "owner": "Fixture" + } + ], + "state_reads": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "check", + "line": 5, + "span": [ + 5, + 0, + 5, + 10 + ], + "owner": "Fixture" + }, + { + "field": "stale", + "receiver": "self", + "file": "facts/state_mesh_rich.rb", + "function": "report", + "line": 6, + "span": [ + 6, + 0, + 6, + 10 + ], + "owner": "Fixture" + } + ], + "predicate_defs": [ + { + "name": "ready?", + "body": "ready == true", + "file": "facts/state_mesh_rich.rb", + "line": 7, + "span": [ + 7, + 0, + 7, + 20 + ] + } + ], + "comparison_sites": [ + { + "source": "ready == true", + "file": "facts/state_mesh_rich.rb", + "function": "inline", + "line": 8, + "span": [ + 8, + 0, + 8, + 13 + ] + } + ], + "predicate_aliases": [ + { + "name": "ready?", + "body": "ready == true", + "file": "facts/state_mesh_rich.rb", + "defn": "ready?", + "line": 7, + "span": [ + 7, + 0, + 7, + 20 + ] + } + ], + "comparison_uses": [ + { + "canon_source": "ready == true", + "raw": "ready == true", + "file": "facts/state_mesh_rich.rb", + "function": "inline", + "line": 8, + "span": [ + 8, + 0, + 8, + 13 + ], + "enclosing_span": [ + 8, + 0, + 8, + 13 + ] + } + ] + } + ] + }, + "expected": { + "fields": { + "ready": { + "messiness": 16.0, + "metrics": { + "fix_churn": 1.0, + "percentiles": { + "messiness": 100, + "pressure": 100, + "re_derivations": 100, + "reads": 100, + "scatter": 100, + "writes": 100 + }, + "pressure": 1, + "re_derivations": 1, + "read_scatter": 1, + "reads": 1, + "receiver_types": 1, + "scatter": 4, + "write_scatter": 2, + "writes": 2 + }, + "rank": 1, + "re_derivations": [ + { + "canon": "ready == true", + "defn": "inline", + "file": "facts/state_mesh_rich.rb", + "line": 8, + "predicate": "ready?", + "raw": "ready == true" + } + ], + "readers": [ + { + "defn": "check", + "file": "facts/state_mesh_rich.rb", + "line": 5, + "recv": "self", + "span": [ + 5, + 0, + 5, + 10 + ] + } + ], + "writers": [ + { + "defn": "first", + "file": "facts/state_mesh_rich.rb", + "line": 1, + "recv": "self", + "span": [ + 1, + 0, + 1, + 11 + ] + }, + { + "defn": "second", + "file": "facts/state_mesh_rich.rb", + "line": 2, + "recv": "self", + "span": [ + 2, + 0, + 2, + 11 + ] + } + ] + }, + "stale": { + "messiness": 9.0, + "metrics": { + "fix_churn": 1.0, + "percentiles": { + "messiness": 50, + "pressure": 100, + "re_derivations": 50, + "reads": 100, + "scatter": 50, + "writes": 100 + }, + "pressure": 1, + "re_derivations": 0, + "read_scatter": 1, + "reads": 1, + "receiver_types": 1, + "scatter": 3, + "write_scatter": 2, + "writes": 2 + }, + "rank": 2, + "re_derivations": [ + + ], + "readers": [ + { + "defn": "report", + "file": "facts/state_mesh_rich.rb", + "line": 6, + "recv": "self", + "span": [ + 6, + 0, + 6, + 10 + ] + } + ], + "writers": [ + { + "defn": "third", + "file": "facts/state_mesh_rich.rb", + "line": 3, + "recv": "self", + "span": [ + 3, + 0, + 3, + 11 + ] + }, + { + "defn": "fourth", + "file": "facts/state_mesh_rich.rb", + "line": 4, + "recv": "self", + "span": [ + 4, + 0, + 4, + 11 + ] + } + ] + } + }, + "hierarchy": [ + { + "files": [ + { + "defns": [ + { + "fields": { + "read": [ + "ready" + ], + "written": [ + + ] + }, + "name": "check", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "first", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "stale" + ] + }, + "name": "fourth", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + "stale" + ], + "written": [ + + ] + }, + "name": "report", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "second", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "stale" + ] + }, + "name": "third", + "readers": 0, + "writers": 1 + } + ], + "name": "state_mesh_rich.rb", + "readers": 2, + "writers": 4 + } + ], + "name": "facts", + "readers": 2, + "writers": 4 + } + ], + "state_mesh": { + "custom_fields": null, + "min_writes": 2, + "total_fields": 2, + "total_re_derivations": 1, + "total_reads": 2, + "total_writes": 4 + } + } +} diff --git a/gems/decomplex/examples/facts/detectors/state-mesh.json b/gems/decomplex/examples/facts/detectors/state-mesh.json new file mode 100644 index 000000000..94418d1de --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/state-mesh.json @@ -0,0 +1,223 @@ +{ + "detector": "state-mesh", + "input": { + "documents": [ + { + "file": "facts/state_mesh.rb", + "language": "ruby", + "state_writes": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "first", + "line": 1, + "span": [ + 1, + 0, + 1, + 11 + ], + "owner": "Fixture" + }, + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "second", + "line": 2, + "span": [ + 2, + 0, + 2, + 11 + ], + "owner": "Fixture" + } + ], + "state_reads": [ + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "check", + "line": 4, + "span": [ + 4, + 0, + 4, + 10 + ], + "owner": "Fixture" + }, + { + "field": "ready", + "receiver": "self", + "file": "facts/state_mesh.rb", + "function": "report", + "line": 5, + "span": [ + 5, + 0, + 5, + 10 + ], + "owner": "Fixture" + } + ] + } + ] + }, + "expected": { + "fields": { + "ready": { + "messiness": 16.0, + "metrics": { + "fix_churn": 1.0, + "percentiles": { + }, + "pressure": 2, + "re_derivations": 0, + "read_scatter": 2, + "reads": 2, + "receiver_types": 1, + "scatter": 4, + "write_scatter": 2, + "writes": 2 + }, + "rank": 1, + "re_derivations": [ + + ], + "readers": [ + { + "defn": "check", + "file": "facts/state_mesh.rb", + "line": 4, + "recv": "self", + "span": [ + 4, + 0, + 4, + 10 + ] + }, + { + "defn": "report", + "file": "facts/state_mesh.rb", + "line": 5, + "recv": "self", + "span": [ + 5, + 0, + 5, + 10 + ] + } + ], + "writers": [ + { + "defn": "first", + "file": "facts/state_mesh.rb", + "line": 1, + "recv": "self", + "span": [ + 1, + 0, + 1, + 11 + ] + }, + { + "defn": "second", + "file": "facts/state_mesh.rb", + "line": 2, + "recv": "self", + "span": [ + 2, + 0, + 2, + 11 + ] + } + ] + } + }, + "hierarchy": [ + { + "files": [ + { + "defns": [ + { + "fields": { + "read": [ + "ready" + ], + "written": [ + + ] + }, + "name": "check", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "first", + "readers": 0, + "writers": 1 + }, + { + "fields": { + "read": [ + "ready" + ], + "written": [ + + ] + }, + "name": "report", + "readers": 1, + "writers": 0 + }, + { + "fields": { + "read": [ + + ], + "written": [ + "ready" + ] + }, + "name": "second", + "readers": 0, + "writers": 1 + } + ], + "name": "state_mesh.rb", + "readers": 2, + "writers": 2 + } + ], + "name": "facts", + "readers": 2, + "writers": 2 + } + ], + "state_mesh": { + "custom_fields": null, + "min_writes": 2, + "total_fields": 1, + "total_re_derivations": 0, + "total_reads": 2, + "total_writes": 2 + } + } +} diff --git a/gems/decomplex/examples/facts/detectors/structural-topology-rich.json b/gems/decomplex/examples/facts/detectors/structural-topology-rich.json new file mode 100644 index 000000000..35079b6cd --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/structural-topology-rich.json @@ -0,0 +1,195 @@ +{ + "detector": "structural-topology", + "input": { + "documents": [ + { + "file": "facts/topology_rich.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def self.entry", + " self.", + " helper", + " self.entry", + " end", + " def self.helper", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/topology_rich.rb", + "name": "self.entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 6, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 6, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/topology_rich.rb", + "name": "self.helper", + "owner": "Fixture", + "line": 7, + "span": [ + 7, + 2, + 8, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 7, + 2, + 8, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/topology_rich.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 9, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/topology_rich.rb", + "function": "self.entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 4, + 12 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "entry", + "file": "facts/topology_rich.rb", + "function": "self.entry", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 4, + 5, + 14 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "edges": [ + { + "callee": "Fixture#self.helper", + "callee_name": "self.helper", + "caller": "Fixture#self.entry", + "caller_name": "self.entry", + "confidence": "high", + "file": "facts/topology_rich.rb", + "kind": "direct_self", + "line": 3, + "span": [ + 3, + 4, + 4, + 12 + ], + "type": "always" + } + ], + "methods": [ + { + "file": "facts/topology_rich.rb", + "id": "Fixture#self.entry", + "line": 2, + "name": "self.entry", + "owner": "Fixture", + "span": [ + 2, + 2, + 6, + 5 + ], + "visibility": "public" + }, + { + "file": "facts/topology_rich.rb", + "id": "Fixture#self.helper", + "line": 7, + "name": "self.helper", + "owner": "Fixture", + "span": [ + 7, + 2, + 8, + 5 + ], + "visibility": "public" + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/structural-topology.json b/gems/decomplex/examples/facts/detectors/structural-topology.json new file mode 100644 index 000000000..a64b9a9cd --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/structural-topology.json @@ -0,0 +1,171 @@ +{ + "detector": "structural-topology", + "input": { + "documents": [ + { + "file": "facts/topology.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " end", + " private def helper; end", + "end" + ], + "function_defs": [ + { + "file": "facts/topology.rb", + "name": "entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 4, + 5 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/topology.rb", + "name": "helper", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 2, + 5, + 25 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/topology.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 6, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/topology.rb", + "function": "entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": "conditional", + "safe_navigation": false, + "block": false + } + ] + } + ] + }, + "expected": { + "edges": [ + { + "callee": "Fixture#helper", + "callee_name": "helper", + "caller": "Fixture#entry", + "caller_name": "entry", + "confidence": "high", + "file": "facts/topology.rb", + "kind": "direct_self", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "type": "conditional" + } + ], + "methods": [ + { + "file": "facts/topology.rb", + "id": "Fixture#entry", + "line": 2, + "name": "entry", + "owner": "Fixture", + "span": [ + 2, + 2, + 4, + 5 + ], + "visibility": "public" + }, + { + "file": "facts/topology.rb", + "id": "Fixture#helper", + "line": 5, + "name": "helper", + "owner": "Fixture", + "span": [ + 5, + 2, + 5, + 25 + ], + "visibility": "private" + } + ] + } +} diff --git a/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-edges.json b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-edges.json new file mode 100644 index 000000000..578983ace --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-edges.json @@ -0,0 +1,167 @@ +{ + "detector": "weighted-inlined-complexity", + "input": { + "documents": [ + { + "file": "facts/weighted_edges.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " self.helper", + " self.shared", + " self.missing", + " end", + " def helper", + " self.leaf", + " end", + " def leaf", + " self.deep", + " end", + " def deep", + " end", + " def shared", + " end", + " def hub", + " self.shared", + " end", + " def other", + " self.shared", + " end", + "end" + ], + "function_defs": [ + {"file": "facts/weighted_edges.rb", "name": "entry", "owner": "Fixture", "line": 2, "span": [2, 2, 7, 5], "body": {"kind": "body_statement", "text": "", "span": [2, 2, 7, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "helper", "owner": "Fixture", "line": 8, "span": [8, 2, 10, 5], "body": {"kind": "body_statement", "text": "", "span": [8, 2, 10, 5], "named": true, "field_name": null, "children": []}, "visibility": "private", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "leaf", "owner": "Fixture", "line": 11, "span": [11, 2, 13, 5], "body": {"kind": "body_statement", "text": "", "span": [11, 2, 13, 5], "named": true, "field_name": null, "children": []}, "visibility": "private", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "deep", "owner": "Fixture", "line": 14, "span": [14, 2, 15, 5], "body": {"kind": "body_statement", "text": "", "span": [14, 2, 15, 5], "named": true, "field_name": null, "children": []}, "visibility": "private", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "shared", "owner": "Fixture", "line": 16, "span": [16, 2, 17, 5], "body": {"kind": "body_statement", "text": "", "span": [16, 2, 17, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "hub", "owner": "Fixture", "line": 18, "span": [18, 2, 20, 5], "body": {"kind": "body_statement", "text": "", "span": [18, 2, 20, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []}, + {"file": "facts/weighted_edges.rb", "name": "other", "owner": "Fixture", "line": 21, "span": [21, 2, 23, 5], "body": {"kind": "body_statement", "text": "", "span": [21, 2, 23, 5], "named": true, "field_name": null, "children": []}, "visibility": "public", "params": []} + ], + "owner_defs": [ + {"file": "facts/weighted_edges.rb", "name": "Fixture", "kind": "class", "line": 1, "span": [1, 0, 24, 3]} + ], + "call_sites": [ + {"receiver": "self", "message": "helper", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 3, "span": [3, 4, 3, 15], "conditional": false, "arguments": [], "control": "conditional", "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "helper", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 4, "span": [4, 4, 4, 15], "conditional": false, "arguments": [], "control": "iterates", "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "shared", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 5, "span": [5, 4, 5, 15], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "missing", "file": "facts/weighted_edges.rb", "function": "entry", "owner": "Fixture", "line": 6, "span": [6, 4, 6, 16], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "leaf", "file": "facts/weighted_edges.rb", "function": "helper", "owner": "Fixture", "line": 9, "span": [9, 4, 9, 13], "conditional": false, "arguments": [], "control": "iterates", "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "deep", "file": "facts/weighted_edges.rb", "function": "leaf", "owner": "Fixture", "line": 12, "span": [12, 4, 12, 13], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "shared", "file": "facts/weighted_edges.rb", "function": "hub", "owner": "Fixture", "line": 19, "span": [19, 4, 19, 15], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false}, + {"receiver": "self", "message": "shared", "file": "facts/weighted_edges.rb", "function": "other", "owner": "Fixture", "line": 22, "span": [22, 4, 22, 15], "conditional": false, "arguments": [], "control": null, "safe_navigation": false, "block": false} + ], + "local_methods": [ + {"id": "Fixture#entry", "owner": "Fixture", "name": "entry", "file": "facts/weighted_edges.rb", "line": 2, "span": [2, 2, 7, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#helper", "owner": "Fixture", "name": "helper", "file": "facts/weighted_edges.rb", "line": 8, "span": [8, 2, 10, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#leaf", "owner": "Fixture", "name": "leaf", "file": "facts/weighted_edges.rb", "line": 11, "span": [11, 2, 13, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#deep", "owner": "Fixture", "name": "deep", "file": "facts/weighted_edges.rb", "line": 14, "span": [14, 2, 15, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#shared", "owner": "Fixture", "name": "shared", "file": "facts/weighted_edges.rb", "line": 16, "span": [16, 2, 17, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#hub", "owner": "Fixture", "name": "hub", "file": "facts/weighted_edges.rb", "line": 18, "span": [18, 2, 20, 5], "statements": [], "boundaries": []}, + {"id": "Fixture#other", "owner": "Fixture", "name": "other", "file": "facts/weighted_edges.rb", "line": 21, "span": [21, 2, 23, 5], "statements": [], "boundaries": []} + ], + "local_complexity_scores": { + "Fixture#entry": {"score": 2.0, "signals": {"branches": 1}}, + "Fixture#helper": {"score": 10.0, "signals": {"branches": 3}}, + "Fixture#leaf": {"score": 8.0, "signals": {"branches": 2}}, + "Fixture#deep": {"score": 12.0, "signals": {"branches": 2}}, + "Fixture#shared": {"score": 80.0, "signals": {"branches": 4}}, + "Fixture#hub": {"score": 1.0, "signals": {"branches": 1}}, + "Fixture#other": {"score": 1.0, "signals": {"branches": 1}} + } + } + ] + }, + "expected": [ + { + "at": "facts/weighted_edges.rb:entry:2", + "call_chain": [ + "entry", + "shared" + ], + "depth": 2, + "hidden": 34.6, + "inlined": 36.6, + "local": 2.0, + "method": "entry", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 34.6 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [ + "helper" + ], + "spans": { + "facts/weighted_edges.rb:entry:2": [2, 2, 7, 5] + } + }, + { + "at": "facts/weighted_edges.rb:helper:8", + "call_chain": [ + "helper", + "leaf" + ], + "depth": 2, + "hidden": 17.5, + "inlined": 27.5, + "local": 10.0, + "method": "helper", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 17.5 weighted cognitive points", + "signals": { + "branches": 3 + }, + "single_caller_callees": [ + "leaf" + ], + "spans": { + "facts/weighted_edges.rb:helper:8": [8, 2, 10, 5] + } + }, + { + "at": "facts/weighted_edges.rb:hub:18", + "call_chain": [ + "hub", + "shared" + ], + "depth": 1, + "hidden": 16.8, + "inlined": 17.8, + "local": 1.0, + "method": "hub", + "owner": "Fixture", + "reason": "same-owner call chain adds 16.8 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [], + "spans": { + "facts/weighted_edges.rb:hub:18": [18, 2, 20, 5] + } + }, + { + "at": "facts/weighted_edges.rb:other:21", + "call_chain": [ + "other", + "shared" + ], + "depth": 1, + "hidden": 16.8, + "inlined": 17.8, + "local": 1.0, + "method": "other", + "owner": "Fixture", + "reason": "same-owner call chain adds 16.8 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [], + "spans": { + "facts/weighted_edges.rb:other:21": [21, 2, 23, 5] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-rich.json b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-rich.json new file mode 100644 index 000000000..2435c065e --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity-rich.json @@ -0,0 +1,443 @@ +{ + "detector": "weighted-inlined-complexity", + "input": { + "documents": [ + { + "file": "facts/weighted_rich.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " self.shared", + " end", + " def helper", + " self.leaf", + " end", + " def leaf; end", + " def shared; end", + " def other", + " self.shared", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/weighted_rich.rb", + "name": "entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 5, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 2, + 2, + 5, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "helper", + "owner": "Fixture", + "line": 6, + "span": [ + 6, + 2, + 8, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 6, + 2, + 8, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "leaf", + "owner": "Fixture", + "line": 9, + "span": [ + 9, + 2, + 9, + 15 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 9, + 2, + 9, + 15 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "shared", + "owner": "Fixture", + "line": 10, + "span": [ + 10, + 2, + 10, + 17 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 10, + 2, + 10, + 17 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/weighted_rich.rb", + "name": "other", + "owner": "Fixture", + "line": 11, + "span": [ + 11, + 2, + 13, + 5 + ], + "body": { + "kind": "body_statement", + "text": "", + "span": [ + 11, + 2, + 13, + 5 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/weighted_rich.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 14, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/weighted_rich.rb", + "function": "entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": "conditional", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "shared", + "file": "facts/weighted_rich.rb", + "function": "entry", + "owner": "Fixture", + "line": 4, + "span": [ + 4, + 4, + 4, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "leaf", + "file": "facts/weighted_rich.rb", + "function": "helper", + "owner": "Fixture", + "line": 7, + "span": [ + 7, + 4, + 7, + 13 + ], + "conditional": false, + "arguments": [ + + ], + "control": "iterates", + "safe_navigation": false, + "block": false + }, + { + "receiver": "self", + "message": "shared", + "file": "facts/weighted_rich.rb", + "function": "other", + "owner": "Fixture", + "line": 12, + "span": [ + 12, + 4, + 12, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ], + "local_methods": [ + { + "id": "Fixture#entry", + "owner": "Fixture", + "name": "entry", + "file": "facts/weighted_rich.rb", + "line": 2, + "span": [ + 2, + 2, + 5, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#helper", + "owner": "Fixture", + "name": "helper", + "file": "facts/weighted_rich.rb", + "line": 6, + "span": [ + 6, + 2, + 8, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#leaf", + "owner": "Fixture", + "name": "leaf", + "file": "facts/weighted_rich.rb", + "line": 9, + "span": [ + 9, + 2, + 9, + 15 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#shared", + "owner": "Fixture", + "name": "shared", + "file": "facts/weighted_rich.rb", + "line": 10, + "span": [ + 10, + 2, + 10, + 17 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#other", + "owner": "Fixture", + "name": "other", + "file": "facts/weighted_rich.rb", + "line": 11, + "span": [ + 11, + 2, + 13, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + } + ], + "local_complexity_scores": { + "Fixture#entry": { + "score": 4.0, + "signals": { + "branches": 1 + } + }, + "Fixture#helper": { + "score": 12.0, + "signals": { + "branches": 3 + } + }, + "Fixture#leaf": { + "score": 11.0, + "signals": { + "branches": 2 + } + }, + "Fixture#shared": { + "score": 10.0, + "signals": { + "branches": 2 + } + }, + "Fixture#other": { + "score": 3.0, + "signals": { + "branches": 1 + } + } + } + } + ] + }, + "expected": [ + { + "at": "facts/weighted_rich.rb:entry:2", + "call_chain": [ + "entry", + "helper" + ], + "depth": 2, + "hidden": 16.8, + "inlined": 20.8, + "local": 4.0, + "method": "entry", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 16.8 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [ + "helper" + ], + "spans": { + "facts/weighted_rich.rb:entry:2": [ + 2, + 2, + 5, + 5 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity.json b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity.json new file mode 100644 index 000000000..ca507b48d --- /dev/null +++ b/gems/decomplex/examples/facts/detectors/weighted-inlined-complexity.json @@ -0,0 +1,205 @@ +{ + "detector": "weighted-inlined-complexity", + "input": { + "documents": [ + { + "file": "facts/weighted.rb", + "language": "ruby", + "lines": [ + "class Fixture", + " def entry", + " self.helper", + " end", + " def helper", + " end", + "end" + ], + "function_defs": [ + { + "file": "facts/weighted.rb", + "name": "entry", + "owner": "Fixture", + "line": 2, + "span": [ + 2, + 2, + 4, + 5 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "public", + "params": [ + + ] + }, + { + "file": "facts/weighted.rb", + "name": "helper", + "owner": "Fixture", + "line": 5, + "span": [ + 5, + 2, + 6, + 5 + ], + "body": { + "kind": "program", + "text": "", + "span": [ + 1, + 0, + 1, + 0 + ], + "named": true, + "field_name": null, + "children": [ + + ] + }, + "visibility": "private", + "params": [ + + ] + } + ], + "owner_defs": [ + { + "file": "facts/weighted.rb", + "name": "Fixture", + "kind": "class", + "line": 1, + "span": [ + 1, + 0, + 7, + 3 + ] + } + ], + "call_sites": [ + { + "receiver": "self", + "message": "helper", + "file": "facts/weighted.rb", + "function": "entry", + "owner": "Fixture", + "line": 3, + "span": [ + 3, + 4, + 3, + 15 + ], + "conditional": false, + "arguments": [ + + ], + "control": null, + "safe_navigation": false, + "block": false + } + ], + "local_methods": [ + { + "id": "Fixture#entry", + "owner": "Fixture", + "name": "entry", + "file": "facts/weighted.rb", + "line": 2, + "span": [ + 2, + 2, + 4, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + }, + { + "id": "Fixture#helper", + "owner": "Fixture", + "name": "helper", + "file": "facts/weighted.rb", + "line": 5, + "span": [ + 5, + 2, + 6, + 5 + ], + "statements": [ + + ], + "boundaries": [ + + ] + } + ], + "local_complexity_scores": { + "Fixture#entry": { + "score": 3.0, + "signals": { + "branches": 1 + } + }, + "Fixture#helper": { + "score": 16.0, + "signals": { + "branches": 4 + } + } + } + } + ] + }, + "expected": [ + { + "at": "facts/weighted.rb:entry:2", + "call_chain": [ + "entry", + "helper" + ], + "depth": 1, + "hidden": 16.0, + "inlined": 19.0, + "local": 3.0, + "method": "entry", + "owner": "Fixture", + "reason": "1 single-caller helper(s) add 16.0 weighted cognitive points", + "signals": { + "branches": 1 + }, + "single_caller_callees": [ + "helper" + ], + "spans": { + "facts/weighted.rb:entry:2": [ + 2, + 2, + 4, + 5 + ] + } + } + ] +} diff --git a/gems/decomplex/examples/facts/local-flow/consumers.json b/gems/decomplex/examples/facts/local-flow/consumers.json new file mode 100644 index 000000000..2d8568754 --- /dev/null +++ b/gems/decomplex/examples/facts/local-flow/consumers.json @@ -0,0 +1,343 @@ +{ + "input": { + "documents": [ + { + "file": "facts/derived.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#refresh", + "owner": "Fixture", + "name": "refresh", + "file": "facts/derived.rb", + "line": 1, + "span": [1, 0, 20, 3], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [1, 0, 1, 13], + "source": "source = load", + "reads": ["load"], + "writes": ["source"], + "dependencies": [["source", "load"]], + "co_uses": [] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [2, 0, 2, 16], + "source": "derived = source", + "reads": ["source"], + "writes": ["derived"], + "dependencies": [["derived", "source"]], + "co_uses": [] + }, + { + "index": 2, + "line": 7, + "end_line": 7, + "span": [7, 0, 7, 15], + "source": "source = reload", + "reads": ["reload"], + "writes": ["source"], + "dependencies": [["source", "reload"]], + "co_uses": [] + }, + { + "index": 3, + "line": 9, + "end_line": 9, + "span": [9, 0, 9, 12], + "source": "use(derived)", + "reads": ["derived"], + "writes": [], + "dependencies": [], + "co_uses": [] + } + ], + "boundaries": [] + } + ] + }, + { + "file": "facts/locality.rb", + "language": "ruby", + "local_complexity_scores": { + "Fixture#assemble": { + "score": 18.0, + "signals": { + "branches": 2 + } + } + }, + "local_methods": [ + { + "id": "Fixture#assemble", + "owner": "Fixture", + "name": "assemble", + "file": "facts/locality.rb", + "line": 1, + "span": [1, 0, 20, 3], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [1, 0, 1, 23], + "source": "payload = build_payload", + "reads": ["build_payload"], + "writes": ["payload"], + "dependencies": [["payload", "build_payload"]], + "co_uses": [] + }, + { + "index": 1, + "line": 3, + "end_line": 3, + "span": [3, 0, 3, 18], + "source": "alpha = load_alpha", + "reads": ["load_alpha"], + "writes": ["alpha"], + "dependencies": [["alpha", "load_alpha"]], + "co_uses": [] + }, + { + "index": 2, + "line": 5, + "end_line": 5, + "span": [5, 0, 5, 16], + "source": "beta = load_beta", + "reads": ["load_beta"], + "writes": ["beta"], + "dependencies": [["beta", "load_beta"]], + "co_uses": [] + }, + { + "index": 3, + "line": 7, + "end_line": 7, + "span": [7, 0, 7, 18], + "source": "gamma = load_gamma", + "reads": ["load_gamma"], + "writes": ["gamma"], + "dependencies": [["gamma", "load_gamma"]], + "co_uses": [] + }, + { + "index": 4, + "line": 9, + "end_line": 9, + "span": [9, 0, 9, 18], + "source": "delta = load_delta", + "reads": ["load_delta"], + "writes": ["delta"], + "dependencies": [["delta", "load_delta"]], + "co_uses": [] + }, + { + "index": 5, + "line": 13, + "end_line": 13, + "span": [13, 0, 13, 16], + "source": "consume(payload)", + "reads": ["payload"], + "writes": [], + "dependencies": [], + "co_uses": [] + } + ], + "boundaries": [ + { + "before_index": 0, + "after_index": 5, + "line": 11, + "kind": "comment", + "text": "# phase 2" + } + ] + } + ] + }, + { + "file": "facts/lcom.rb", + "language": "ruby", + "local_methods": [ + { + "id": "Fixture#mixed", + "owner": "Fixture", + "name": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "span": [1, 0, 20, 3], + "statements": [ + { + "index": 0, + "line": 1, + "end_line": 1, + "span": [1, 0, 1, 11], + "source": "a = input_a", + "reads": ["input_a"], + "writes": ["a"], + "dependencies": [["a", "input_a"]], + "co_uses": [] + }, + { + "index": 1, + "line": 2, + "end_line": 2, + "span": [2, 0, 2, 16], + "source": "b = normalize(a)", + "reads": ["a"], + "writes": ["b"], + "dependencies": [["b", "a"]], + "co_uses": [] + }, + { + "index": 2, + "line": 3, + "end_line": 3, + "span": [3, 0, 3, 11], + "source": "c = input_c", + "reads": ["input_c"], + "writes": ["c"], + "dependencies": [["c", "input_c"]], + "co_uses": [] + }, + { + "index": 3, + "line": 4, + "end_line": 4, + "span": [4, 0, 4, 16], + "source": "d = normalize(c)", + "reads": ["c"], + "writes": ["d"], + "dependencies": [["d", "c"]], + "co_uses": [] + }, + { + "index": 4, + "line": 5, + "end_line": 5, + "span": [5, 0, 5, 11], + "source": "e = input_e", + "reads": ["input_e"], + "writes": ["e"], + "dependencies": [["e", "input_e"]], + "co_uses": [] + }, + { + "index": 5, + "line": 6, + "end_line": 6, + "span": [6, 0, 6, 16], + "source": "f = normalize(e)", + "reads": ["e"], + "writes": ["f"], + "dependencies": [["f", "e"]], + "co_uses": [] + } + ], + "boundaries": [] + } + ] + } + ] + }, + "expected": { + "derived-state": [ + { + "at": "facts/derived.rb:refresh:2", + "defn": "refresh", + "derived": "derived", + "derived_at": 2, + "file": "facts/derived.rb", + "gap": 5, + "source": "source", + "source_reassigned_at": 7, + "spans": { + "facts/derived.rb:refresh:2": [2, 0, 2, 16] + } + } + ], + "locality-drag": [ + { + "at": "facts/locality.rb:assemble:1", + "boundaries": [ + { + "line": 11, + "marker": "# phase 2" + } + ], + "boundary_crossings": 1, + "defined_at": 1, + "definition_deps": ["build_payload"], + "defn": "assemble", + "examples": [ + { + "line": 3, + "source": "alpha = load_alpha" + }, + { + "line": 5, + "source": "beta = load_beta" + }, + { + "line": 7, + "source": "gamma = load_gamma" + } + ], + "file": "facts/locality.rb", + "gap_lines": 12, + "gap_statements": 4, + "line": 1, + "local_complexity": 18.0, + "method": "assemble", + "owner": "Fixture", + "reason": "`payload` is initialized 12 line(s) before first use; 4 unrelated intervening statement(s); 1 structural boundary crossing(s); method local complexity 18.0", + "related_statements": 0, + "score": 63, + "setup_statements": 0, + "spans": { + "facts/locality.rb:assemble:1": [1, 0, 20, 3] + }, + "unrelated_statements": 4, + "use_reads": ["payload"], + "used_at": 13, + "variable": "payload" + } + ], + "function-lcom": [ + { + "at": "facts/lcom.rb:mixed:1", + "component_lines": [ + [1, 2], + [3, 4], + [5, 6] + ], + "component_vars": [ + ["a", "b", "input_a"], + ["c", "d", "input_c"], + ["e", "f", "input_e"] + ], + "components": 3, + "defn": "mixed", + "file": "facts/lcom.rb", + "line": 1, + "locals": 9, + "method": "mixed", + "mode": "disjoint", + "owner": "Fixture", + "score": 45, + "spans": { + "facts/lcom.rb:mixed:1": [1, 0, 20, 3] + }, + "statements": 6, + "terminal_join": false + } + ], + "operational-discontinuity": [] + } +} diff --git a/gems/decomplex/examples/facts/report/postprocess.json b/gems/decomplex/examples/facts/report/postprocess.json new file mode 100644 index 000000000..f39a99842 --- /dev/null +++ b/gems/decomplex/examples/facts/report/postprocess.json @@ -0,0 +1,392 @@ +{ + "input": { + "format": "decomplex.report-facts.v1", + "files": ["facts/report.rb"], + "detectors": { + "miner": { + "missing_abstractions": [ + { + "kind": "conjunction", + "members": ["ready", "valid"], + "support": 2, + "scatter": 2, + "at": "facts/report.rb:checkout:10", + "sites": ["facts/report.rb:checkout:10", "facts/report.rb:refund:30"], + "spans": { + "facts/report.rb:checkout:10": [10, 2, 10, 20] + } + } + ], + "neglected_conditions": [ + { + "pattern": ["ready", "valid"], + "support": 2, + "missing": "valid", + "at": "facts/report.rb:checkout:11", + "spans": { + "facts/report.rb:checkout:11": [11, 2, 11, 18] + } + } + ] + }, + "co_update": { + "co_written_pairs": [ + { + "pair": ["provenance", "storage"], + "support": 3, + "sites": ["facts/report.rb:prepare:4", "facts/report.rb:checkout:12"] + } + ], + "neglected_updates": [ + { + "pair": ["provenance", "storage"], + "support": 3, + "has": "storage", + "missing": "provenance", + "recv": "order", + "at": "facts/report.rb:checkout:12", + "spans": { + "facts/report.rb:checkout:12": [12, 2, 12, 20] + } + } + ] + }, + "predicate_alias": { + "alias_clusters": [] + }, + "semantic_alias": { + "alias_clusters": [ + { + "canon": "storage == READY", + "names": ["ready?", "prepared?"], + "at": "facts/report.rb:checkout:13", + "sites": ["facts/report.rb:checkout:13"], + "spans": { + "facts/report.rb:checkout:13": [13, 2, 13, 18] + } + } + ], + "reification_misses": [ + { + "predicate": "ready?", + "raw": "storage == READY", + "canon": "storage == READY", + "at": "facts/report.rb:checkout:14", + "spans": { + "facts/report.rb:checkout:14": [14, 2, 14, 22] + } + } + ] + }, + "path_condition": { + "neglected": [ + { + "pattern": ["ready", "valid"], + "support": 2, + "missing": "valid", + "action": "ship(order)", + "at": "facts/report.rb:checkout:15", + "spans": { + "facts/report.rb:checkout:15": [15, 2, 15, 22] + } + } + ], + "scattered": [] + }, + "sequence_mine": { + "broken_protocol": [ + { + "pair": ["open", "close"], + "support": 4, + "confidence": 0.8, + "has": "open", + "missing": "close", + "at": "facts/report.rb:checkout:16", + "spans": { + "facts/report.rb:checkout:16": [16, 2, 16, 10] + } + } + ] + }, + "implicit_control_flow": { + "ordered_protocols": [] + }, + "derived_state": [ + { + "file": "facts/report.rb", + "defn": "checkout", + "derived": "storage", + "source": "provenance", + "derived_at": 17, + "source_reassigned_at": 22, + "gap": 5, + "at": "facts/report.rb:checkout:17", + "spans": { + "facts/report.rb:checkout:17": [17, 2, 17, 24] + } + } + ], + "inconsistent_rename_clone": [], + "flay_similarity": [], + "decision_pressure": [ + { + "contract": ".storage", + "decisions": 2, + "essential": 1, + "methods": 1, + "sites": ["facts/report.rb:checkout:18"], + "spans": { + "facts/report.rb:checkout:18": [18, 2, 18, 24] + } + } + ], + "redundant_nil_guard": [], + "false_simplicity": [ + { + "kind": "hidden_mutation", + "detail": "storage=", + "support": 1, + "scatter": 1, + "at": "facts/report.rb:checkout:19", + "sites": ["facts/report.rb:checkout:19"], + "spans": { + "facts/report.rb:checkout:19": [19, 2, 19, 14] + } + } + ], + "oversized_predicate": [], + "fat_union": { + "fat_unions": [] + }, + "state_heatmap": [], + "state_branch_density": [ + { + "at": "facts/report.rb:checkout:20", + "file": "facts/report.rb", + "method": "checkout", + "decisions": 1, + "state_refs": ["storage"], + "predicate": "storage.ready?", + "score": 1, + "sites": ["facts/report.rb:checkout:20"], + "spans": { + "facts/report.rb:checkout:20": [20, 2, 20, 24] + } + } + ], + "temporal_ordering_pressure": [], + "weighted_inlined_complexity": [], + "locality_drag": [], + "function_lcom": [], + "operational_discontinuity": [] + } + }, + "expected": { + "convergence": [ + { + "file": "facts/report.rb", + "method": "checkout", + "detectors": [ + "Broken Protocols", + "Decision Pressure", + "Derived-State Staleness", + "False Simplicity", + "Missing Abstractions", + "Neglected Conditions", + "Neglected Path Conditions", + "Neglected Updates", + "Reification Misses", + "Semantic Predicate Aliases", + "State-Based Branch Density" + ], + "n_detectors": 11, + "score": 24, + "findings": 15, + "at": "facts/report.rb:checkout:18" + } + ], + "root_clusters": [ + { + "kind": "name", + "token": "storage", + "detectors": [ + "Decision Pressure", + "Derived-State Staleness", + "False Simplicity", + "Neglected Updates", + "Reification Misses", + "Semantic Predicate Aliases", + "State-Based Branch Density" + ], + "n_detectors": 7, + "support": 7, + "scatter": 1, + "score": 17, + "fat_union": false, + "fix": "single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape", + "sites": [ + "facts/report.rb:checkout:18", + "facts/report.rb:checkout:20", + "facts/report.rb:checkout:14", + "facts/report.rb:checkout:13", + "facts/report.rb:checkout:12", + "facts/report.rb:checkout:17", + "facts/report.rb:checkout:19" + ] + }, + { + "kind": "name", + "token": "ready", + "detectors": [ + "Reification Misses", + "Semantic Predicate Aliases", + "State-Based Branch Density" + ], + "n_detectors": 3, + "support": 3, + "scatter": 1, + "score": 9, + "fat_union": false, + "fix": "reify ONE named predicate/decision and call it everywhere", + "sites": [ + "facts/report.rb:checkout:20", + "facts/report.rb:checkout:14", + "facts/report.rb:checkout:13" + ] + }, + { + "kind": "tuple", + "token": "ready | valid", + "detectors": [ + "Missing Abstractions", + "Neglected Conditions", + "Neglected Path Conditions" + ], + "n_detectors": 3, + "support": 3, + "scatter": 2, + "score": 6, + "fat_union": false, + "fix": "reify ONE named predicate/decision and call it everywhere", + "sites": [ + "facts/report.rb:checkout:10", + "facts/report.rb:refund:30", + "facts/report.rb:checkout:11", + "facts/report.rb:checkout:15" + ] + }, + { + "kind": "name", + "token": "READY", + "detectors": [ + "Reification Misses", + "Semantic Predicate Aliases" + ], + "n_detectors": 2, + "support": 2, + "scatter": 1, + "score": 6, + "fat_union": false, + "fix": "reify ONE named predicate/decision and call it everywhere", + "sites": [ + "facts/report.rb:checkout:14", + "facts/report.rb:checkout:13" + ] + }, + { + "kind": "name", + "token": "provenance", + "detectors": [ + "Derived-State Staleness", + "Neglected Updates" + ], + "n_detectors": 2, + "support": 2, + "scatter": 1, + "score": 4, + "fat_union": false, + "fix": "single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape", + "sites": [ + "facts/report.rb:checkout:12", + "facts/report.rb:checkout:17" + ] + }, + { + "kind": "name", + "token": "valid", + "detectors": [ + "Neglected Conditions", + "Neglected Path Conditions" + ], + "n_detectors": 2, + "support": 2, + "scatter": 1, + "score": 3, + "fat_union": false, + "fix": "converging structural debt -- resolve once at the named entity", + "sites": [ + "facts/report.rb:checkout:11", + "facts/report.rb:checkout:15" + ] + } + ], + "sarif": { + "rule_count": 25, + "result_count": 8, + "rule_ids": [ + "decomplex.decision-pressure", + "decomplex.missing-abstractions", + "decomplex.reification-misses", + "decomplex.semantic-predicate-aliases", + "decomplex.state-based-branch-density", + "decomplex.derived-state-staleness", + "decomplex.neglected-conditions", + "decomplex.neglected-updates" + ], + "messages": [ + "Decision Pressure: `.storage` creates 2 eliminable guard decision(s) across 1 method(s)", + "Missing Abstractions: guard tuple `ready | valid` repeats in 2 site(s) with scatter=2", + "Reification Misses: predicate `ready?` is reinvented inline as `storage == READY`", + "Semantic Predicate Aliases: predicate aliases `ready? = prepared?` for `storage == READY`", + "State-Based Branch Density: 1 state-based branch decision(s) over `storage`; example predicate `storage.ready?`", + "Derived-State Staleness: `storage` derived from `provenance` at line 17; `provenance` reassigned at line 22 but `storage` is not recomputed", + "Neglected Conditions: missing condition `valid` from `ready | valid` (support=2)", + "Neglected Updates: writes `.storage` but not co-written `.provenance` on receiver `order` (support=3)" + ], + "locations": [ + { + "uri": "facts/report.rb", + "startLine": 18 + }, + { + "uri": "facts/report.rb", + "startLine": 10 + }, + { + "uri": "facts/report.rb", + "startLine": 14 + }, + { + "uri": "facts/report.rb", + "startLine": 13 + }, + { + "uri": "facts/report.rb", + "startLine": 20 + }, + { + "uri": "facts/report.rb", + "startLine": 17 + }, + { + "uri": "facts/report.rb", + "startLine": 11 + }, + { + "uri": "facts/report.rb", + "startLine": 12 + } + ] + } + } +} diff --git a/gems/decomplex/examples/facts/report/postprocess.md b/gems/decomplex/examples/facts/report/postprocess.md new file mode 100644 index 000000000..ff44d4644 --- /dev/null +++ b/gems/decomplex/examples/facts/report/postprocess.md @@ -0,0 +1,222 @@ +# Decomplex Report + +> Decision-level duplication and neglected-condition analysis. +> Every entry is a ranked **candidate** (Engler's discipline), +> never a verdict -- *POSSIBLE* findings, triaged by a human. +> Sections are ordered by SIGNAL TIER (1 = lowest false +> positive), not by volume. Items within a section are +> frequency-ranked. Triage tier 1, top-of-list, first. + +## Table of Contents +- [Project Prioritization](#project-prioritization) +- [Cross-Detector Convergence (1)](#cross-detector-convergence-1) +- [Root-Cause Clusters (6)](#root-cause-clusters-6) +- [Decision Pressure (1)](#decision-pressure-1) +- [Redundant Nil Guards (0)](#redundant-nil-guards-0) +- [State Heatmap (0)](#state-heatmap-0) +- [State-Based Branch Density (1)](#statebased-branch-density-1) +- [Temporal Ordering Pressure (0)](#temporal-ordering-pressure-0) +- [Missing Abstractions (1)](#missing-abstractions-1) +- [Reification Misses (1)](#reification-misses-1) +- [Semantic Predicate Aliases (1)](#semantic-predicate-aliases-1) +- [Exact Predicate Aliases (0)](#exact-predicate-aliases-0) +- [Inconsistent Rename Clones (0)](#inconsistent-rename-clones-0) +- [Structural Similarity (Type-2/3) (0)](#structural-similarity-type23-0) +- [Neglected Updates (1)](#neglected-updates-1) +- [Derived-State Staleness (1)](#derivedstate-staleness-1) +- [Neglected Conditions (1)](#neglected-conditions-1) +- [Neglected Path Conditions (1)](#neglected-path-conditions-1) +- [Oversized Predicates (0)](#oversized-predicates-0) +- [Broken Protocols (1)](#broken-protocols-1) +- [Implicit Control Flow (0)](#implicit-control-flow-0) +- [Weighted Inlined Cognitive Complexity (0)](#weighted-inlined-cognitive-complexity-0) +- [Locality Drag (0)](#locality-drag-0) +- [Operational Discontinuity (High Confidence) (0)](#operational-discontinuity-high-confidence-0) +- [Function LCOM (0)](#function-lcom-0) +- [Operational Discontinuity (0)](#operational-discontinuity-0) +- [False Simplicity (1)](#false-simplicity-1) +- [Fat Unions (0)](#fat-unions-0) +- [Run Summary](#run-summary) + +## Project Prioritization +_Ordered by signal tier (1 = highest signal / lowest FP), then by volume._ + +- **[tier 1]** [Decision Pressure (1)](#decision-pressure-1): ELIMINABLE guard-pressure per loose contract (nil/is_a?/respond_to?/safe-nav/rescue-nil) -> tighten the contract once / nil-kill: DELETE. essential dispatch + pure c-uses are split out, NEVER summed (Rapps-Weyuker p-use; McCabe) +- **[tier 1]** [State-Based Branch Density (1)](#statebased-branch-density-1): branch decisions over mutable/object state -- state + control-flow pressure +- **[tier 1]** [Missing Abstractions (1)](#missing-abstractions-1): guard tuple recomputed across >=2 decision units +- **[tier 1]** [Reification Misses (1)](#reification-misses-1): an existing predicate reinvented inline -- invariant #16 +- **[tier 1]** [Semantic Predicate Aliases (1)](#semantic-predicate-aliases-1): one decision, multiple names (receiver/polarity folded) +- **[tier 2]** [Neglected Updates (1)](#neglected-updates-1): co-written state, one write missing -- *POSSIBLE* redundant-state desync +- **[tier 2]** [Derived-State Staleness (1)](#derivedstate-staleness-1): b = f(a); a later reassigned, b not recomputed -- *POSSIBLE* bug +- **[tier 2]** [Neglected Conditions (1)](#neglected-conditions-1): dispatch/conjunction minus one element -- *POSSIBLE* bug +- **[tier 3]** [Neglected Path Conditions (1)](#neglected-path-conditions-1): nested-if/&& guard set minus one atom -- *POSSIBLE* bug (noisy) +- **[tier 3]** [Broken Protocols (1)](#broken-protocols-1): co-called pair, one site does A without B -- *POSSIBLE* bug (noisy) +- **[tier 3]** [False Simplicity (1)](#false-simplicity-1): looks simple, behaves non-locally: hidden dispatch/mutation/IO/context/reflection/reopen -- *POSSIBLE* (noisy) + +## Cross-Detector Convergence (1) +_(file, method) units flagged by >=2 INDEPENDENT detectors -- the strongest triage signal: agreement outranks any single detector's volume. Tier-weighted (1=3, 2=2, 3=1). **Start here.**_ + +- `facts/report.rb:18` (checkout) -- **11 detectors** [score 24, 15 findings]: Broken Protocols, Decision Pressure, Derived-State Staleness, False Simplicity, Missing Abstractions, Neglected Conditions, Neglected Path Conditions, Neglected Updates, Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + +### By file +- `facts/report.rb` -- 11 detectors across 1 method(s): Broken Protocols, Decision Pressure, Derived-State Staleness, False Simplicity, Missing Abstractions, Neglected Conditions, Neglected Path Conditions, Neglected Updates, Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + +## Root-Cause Clusters (6) +_Findings across >=2 INDEPENDENT detectors that name the SAME entity -- 'N findings are really one invariant'. Convergence says where to look; this says **what one fix collapses the cluster**. Ranked candidate, not a verdict._ + +- **[name]** `storage` -- **7 detectors** [score 17] across 1 unit(s), 7 findings: Decision Pressure, Derived-State Staleness, False Simplicity, Neglected Updates, Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + - FIX: single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape + - `facts/report.rb:18` (checkout) ; `facts/report.rb:20` (checkout) ; `facts/report.rb:14` (checkout) ; `facts/report.rb:13` (checkout) +- **[name]** `ready` -- **3 detectors** [score 9] across 1 unit(s), 3 findings: Reification Misses, Semantic Predicate Aliases, State-Based Branch Density + - FIX: reify ONE named predicate/decision and call it everywhere + - `facts/report.rb:20` (checkout) ; `facts/report.rb:14` (checkout) ; `facts/report.rb:13` (checkout) +- **[tuple]** `ready | valid` -- **3 detectors** [score 6] across 2 unit(s), 3 findings: Missing Abstractions, Neglected Conditions, Neglected Path Conditions + - FIX: reify ONE named predicate/decision and call it everywhere + - `facts/report.rb:10` (checkout) ; `facts/report.rb:30` (refund) ; `facts/report.rb:11` (checkout) ; `facts/report.rb:15` (checkout) +- **[name]** `READY` -- **2 detectors** [score 6] across 1 unit(s), 2 findings: Reification Misses, Semantic Predicate Aliases + - FIX: reify ONE named predicate/decision and call it everywhere + - `facts/report.rb:14` (checkout) ; `facts/report.rb:13` (checkout) +- **[name]** `provenance` -- **2 detectors** [score 4] across 1 unit(s), 2 findings: Derived-State Staleness, Neglected Updates + - FIX: single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape + - `facts/report.rb:12` (checkout) ; `facts/report.rb:17` (checkout) +- **[name]** `valid` -- **2 detectors** [score 3] across 1 unit(s), 2 findings: Neglected Conditions, Neglected Path Conditions + - FIX: converging structural debt -- resolve once at the named entity + - `facts/report.rb:11` (checkout) ; `facts/report.rb:15` (checkout) + +## Decision Pressure (1) +_ELIMINABLE guard-pressure per loose contract (nil/is_a?/respond_to?/safe-nav/rescue-nil) -> tighten the contract once / nil-kill: DELETE. essential dispatch + pure c-uses are split out, NEVER summed (Rapps-Weyuker p-use; McCabe)_ + +- `.storage` -- ELIMINABLE guard-pressure **2** across 1 method(s) -> tighten contract / nil-kill: DELETE (+1 essential dispatch on this contract -- legitimate; leave unless Fat-Union/Missing-Abstractions says re-derived) + - `facts/report.rb:18` (checkout) + +## Redundant Nil Guards (0) +_nil checks / safe-nav dominated by an earlier non-nil proof -- delete repeated control flow or tighten the type_ + +None. + +## State Heatmap (0) +_state fields ranked by write/read/re-derivation scatter -- tangled mutable state should get one owner_ + +None. + +## State-Based Branch Density (1) +_branch decisions over mutable/object state -- state + control-flow pressure_ + +- `facts/report.rb:20` (checkout) -- **1** state-based branch decision(s), refs=`storage` score=1 + - example predicate: `storage.ready?` + +## Temporal Ordering Pressure (0) +_public mutable lifecycle surfaces that create implicit state-machine ordering_ + +None. + +## Missing Abstractions (1) +_guard tuple recomputed across >=2 decision units_ + +- **[conjunction]** support=2 scatter=2 rank= + - tuple: `ready | valid` + - `facts/report.rb:10` (checkout) ; `facts/report.rb:30` (refund) + +## Reification Misses (1) +_an existing predicate reinvented inline -- invariant #16_ + +- predicate `ready?` reinvented inline at `facts/report.rb:14` (checkout) (`storage == READY`) + +## Semantic Predicate Aliases (1) +_one decision, multiple names (receiver/polarity folded)_ + +- `ready? = prepared?` == `storage == READY` + - `facts/report.rb:13` (checkout) + +## Exact Predicate Aliases (0) +_identical one-line predicate body under >=2 names_ + +None. + +## Inconsistent Rename Clones (0) +_pasted block with inconsistent identifier mapping -- *POSSIBLE* missed rename bug_ + +None. + +## Structural Similarity (Type-2/3) (0) +_Tree-sitter structural clone pressure: Type-2 renamed clones and Type-3 fuzzy clones -- refactor pressure, not a verdict_ + +None. + +## Neglected Updates (1) +_co-written state, one write missing -- *POSSIBLE* redundant-state desync_ + +- *POSSIBLE* (support=3) `facts/report.rb:12` (checkout) writes `.storage` but NOT `.provenance` (recv `order`) + +## Derived-State Staleness (1) +_b = f(a); a later reassigned, b not recomputed -- *POSSIBLE* bug_ + +- *POSSIBLE* `facts/report.rb:17` (checkout): `storage` derived from `provenance` (line 17); `provenance` reassigned line 22, `storage` not recomputed + +## Neglected Conditions (1) +_dispatch/conjunction minus one element -- *POSSIBLE* bug_ + +- *POSSIBLE* (support=2) `facts/report.rb:11` (checkout) -- MISSING `valid` from `ready | valid` + +## Neglected Path Conditions (1) +_nested-if/&& guard set minus one atom -- *POSSIBLE* bug (noisy)_ + +- *POSSIBLE* (support=2) `facts/report.rb:15` (checkout) -- MISSING `valid` from `ready | valid` + +## Oversized Predicates (0) +_predicate with >3 condition atoms -- use an existing helper or extract a named predicate_ + +None. + +## Broken Protocols (1) +_co-called pair, one site does A without B -- *POSSIBLE* bug (noisy)_ + +- *POSSIBLE* conf=0.8 support=4 `facts/report.rb:16` (checkout) does `open` without `close` + +## Implicit Control Flow (0) +_state-dependent internal call order exists -- hidden lifecycle/control-flow pressure_ + +None. + +## Weighted Inlined Cognitive Complexity (0) +_same-owner helper chain hides cognitive load behind a low-looking orchestration method_ + +None. + +## Locality Drag (0) +_local initialized far before first use while unrelated work runs -- move setup closer or extract a private phase_ + +None. + +## Operational Discontinuity (High Confidence) (0) +_strong blank/comment phase boundary where local variable lifetimes reset -- likely implicit sub-function boundary_ + +None. + +## Function LCOM (0) +_independent local data-flow components inside one method -- *POSSIBLE* mixed concerns_ + +None. + +## Operational Discontinuity (0) +_blank/comment phase boundary where local variable lifetimes reset -- *POSSIBLE* implicit sub-function boundary_ + +None. + +## False Simplicity (1) +_looks simple, behaves non-locally: hidden dispatch/mutation/IO/context/reflection/reopen -- *POSSIBLE* (noisy)_ + +- *POSSIBLE* [hidden_mutation] scatter=1 support=1 `storage=` -- `facts/report.rb:19` (checkout) + +## Fat Unions (0) +_case dispatch over class consts whose arms read mostly variant-invariant members -- product-vs-sum decomposition candidate (extraction -> nil-kill) -- *POSSIBLE*_ + +None. + +## Run Summary +- Files analyzed: 1 +- Detectors: 25 (all shipped, self-tested) +- Convergence: 1 unit(s) flagged by >=2 independent detectors +- Root-cause clusters: 6 (one fix collapses each) +- Total candidates: 11 +- Method: stdlib AST only, intra-procedural, zero deps, no CFG / no points-to; Type-2/3 similarity uses Tree-sitter structural fingerprints (see docs/agents/design.md) diff --git a/gems/decomplex/examples/go/co-update.go b/gems/decomplex/examples/go/co-update.go new file mode 100644 index 000000000..7bc6047a7 --- /dev/null +++ b/gems/decomplex/examples/go/co-update.go @@ -0,0 +1,5 @@ +package main +func stable_one(node Node) { node.storage = 1; node.provenance = 1 } +func stable_two(node Node) { node.storage = 1; node.provenance = 1 } +func stable_three(node Node) { node.storage = 1; node.provenance = 1 } +func misses_provenance(node Node) { node.storage = 1 } diff --git a/gems/decomplex/examples/go/decision-pressure.go b/gems/decomplex/examples/go/decision-pressure.go new file mode 100644 index 000000000..84f811f68 --- /dev/null +++ b/gems/decomplex/examples/go/decision-pressure.go @@ -0,0 +1,2 @@ +package main +func scan(node Node) bool { value := node.symbol; return value.isNull() } diff --git a/gems/decomplex/examples/go/derived-state.go b/gems/decomplex/examples/go/derived-state.go new file mode 100644 index 000000000..1c6997b53 --- /dev/null +++ b/gems/decomplex/examples/go/derived-state.go @@ -0,0 +1,2 @@ +package main +func check(input int) { cached := input + 1; input = 2; print(cached) } diff --git a/gems/decomplex/examples/go/false-simplicity.go b/gems/decomplex/examples/go/false-simplicity.go new file mode 100644 index 000000000..3ce4c70a3 --- /dev/null +++ b/gems/decomplex/examples/go/false-simplicity.go @@ -0,0 +1,3 @@ +package main +type FalseSimplicityExample struct {} +func (self FalseSimplicityExample) hack() { print("hidden IO") } diff --git a/gems/decomplex/examples/go/fat-union.go b/gems/decomplex/examples/go/fat-union.go new file mode 100644 index 000000000..d88b1deee --- /dev/null +++ b/gems/decomplex/examples/go/fat-union.go @@ -0,0 +1,8 @@ +package main +func handle(node Node) { + switch node { + case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() + case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() + case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() + } +} diff --git a/gems/decomplex/examples/go/flay-similarity.go b/gems/decomplex/examples/go/flay-similarity.go new file mode 100644 index 000000000..e6fa6e4db --- /dev/null +++ b/gems/decomplex/examples/go/flay-similarity.go @@ -0,0 +1,3 @@ +package main +func first_clone(node Node) int { total := 0; value1 := node.part1; if value1.ready() && value1.enabled() { total += value1.amount } value2 := node.part2; if value2.ready() && value2.enabled() { total += value2.amount } value3 := node.part3; if value3.ready() && value3.enabled() { total += value3.amount } value4 := node.part4; if value4.ready() && value4.enabled() { total += value4.amount } value5 := node.part5; if value5.ready() && value5.enabled() { total += value5.amount } value6 := node.part6; if value6.ready() && value6.enabled() { total += value6.amount } value7 := node.part7; if value7.ready() && value7.enabled() { total += value7.amount } value8 := node.part8; if value8.ready() && value8.enabled() { total += value8.amount } return total } +func second_clone(entry Node) int { total := 0; item1 := entry.part1; if item1.ready() && item1.enabled() { total += item1.amount } item2 := entry.part2; if item2.ready() && item2.enabled() { total += item2.amount } item3 := entry.part3; if item3.ready() && item3.enabled() { total += item3.amount } item4 := entry.part4; if item4.ready() && item4.enabled() { total += item4.amount } item5 := entry.part5; if item5.ready() && item5.enabled() { total += item5.amount } item6 := entry.part6; if item6.ready() && item6.enabled() { total += item6.amount } item7 := entry.part7; if item7.ready() && item7.enabled() { total += item7.amount } item8 := entry.part8; if item8.ready() && item8.enabled() { total += item8.amount } return total } diff --git a/gems/decomplex/examples/go/function-lcom.go b/gems/decomplex/examples/go/function-lcom.go new file mode 100644 index 000000000..0a6d644fc --- /dev/null +++ b/gems/decomplex/examples/go/function-lcom.go @@ -0,0 +1,13 @@ +package main +func mixed(price int, tax int, logger Logger) Result { + subtotal := price + tax + total := subtotal * 2 + rounded := total.round() + + timestamp := now() + buffer := Buffer_init() + buffer.push(timestamp) + logger.info(buffer) + + return Result_init(rounded, buffer) +} diff --git a/gems/decomplex/examples/go/implicit-control-flow.go b/gems/decomplex/examples/go/implicit-control-flow.go new file mode 100644 index 000000000..3aae75762 --- /dev/null +++ b/gems/decomplex/examples/go/implicit-control-flow.go @@ -0,0 +1,10 @@ +package main +type FlowExample struct { status int; valid bool; done bool } +func (self *FlowExample) prepare() { self.status = 1 } +func (self *FlowExample) validate() { self.valid = self.status == 1 } +func (self *FlowExample) commit() { self.done = self.valid } +func (self *FlowExample) ok1() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) ok2() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) ok3() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) ok4() { self.prepare(); self.validate(); self.commit() } +func (self *FlowExample) drift() { self.validate(); self.prepare(); self.commit() } diff --git a/gems/decomplex/examples/go/inconsistent-rename-clone.go b/gems/decomplex/examples/go/inconsistent-rename-clone.go new file mode 100644 index 000000000..5c55f1b9b --- /dev/null +++ b/gems/decomplex/examples/go/inconsistent-rename-clone.go @@ -0,0 +1,3 @@ +package main +func original() { src := fetch(1); check(src); store(src); finalize(src) } +func pasted() { dst := fetch(2); check(dst); store(src); finalize(dst) } diff --git a/gems/decomplex/examples/go/local-flow.go b/gems/decomplex/examples/go/local-flow.go new file mode 100644 index 000000000..63d08007e --- /dev/null +++ b/gems/decomplex/examples/go/local-flow.go @@ -0,0 +1,10 @@ +package main +func mixed(price int, tax int) Result { + subtotal := price + tax + total := subtotal.round() + + timestamp := now() + buffer := Buffer_init() + buffer.push(timestamp) + return Result_init(total, buffer) +} diff --git a/gems/decomplex/examples/go/locality-drag.go b/gems/decomplex/examples/go/locality-drag.go new file mode 100644 index 000000000..0ea9ebeef --- /dev/null +++ b/gems/decomplex/examples/go/locality-drag.go @@ -0,0 +1,31 @@ +package main +func run(user User, cart Cart, logger Logger) { + receipt_id := user.id + + total := cart.total + if total > 100 { + if cart.discountable() { + discount := 10 + _ = discount + } + } + if cart.taxable() { + if cart.region { + tax := total * 2 + _ = tax + } + } + if logger.enabled() { + if logger.debug() { + logger.info(total) + } + } + if cart.valid() { + if cart.ready() { + status := 1 + _ = status + } + } + + emit(receipt_id) +} diff --git a/gems/decomplex/examples/go/miner.go b/gems/decomplex/examples/go/miner.go new file mode 100644 index 000000000..0613414c5 --- /dev/null +++ b/gems/decomplex/examples/go/miner.go @@ -0,0 +1,5 @@ +package main +func one(a bool,b bool,c bool) bool { return a && b && c } +func two(a bool,b bool,c bool) bool { return a && b && c } +func three(a bool,b bool,c bool) bool { return a && b && c } +func broken(a bool,b bool) bool { return a && b } diff --git a/gems/decomplex/examples/go/operational-discontinuity.go b/gems/decomplex/examples/go/operational-discontinuity.go new file mode 100644 index 000000000..f577d642c --- /dev/null +++ b/gems/decomplex/examples/go/operational-discontinuity.go @@ -0,0 +1,11 @@ +package main +func phase_shift() { + a := 1 + b := 2 + _ = a; _ = b + + // Phase 2 + x := 3 + y := 4 + print(x); print(y) +} diff --git a/gems/decomplex/examples/go/oversized-predicate.go b/gems/decomplex/examples/go/oversized-predicate.go new file mode 100644 index 000000000..98feb9fb3 --- /dev/null +++ b/gems/decomplex/examples/go/oversized-predicate.go @@ -0,0 +1,2 @@ +package main +func complex_check(a bool,b bool,c bool,d bool) { if a && b && c && d { print("too big") } } diff --git a/gems/decomplex/examples/go/path-condition.go b/gems/decomplex/examples/go/path-condition.go new file mode 100644 index 000000000..60ac5b8a1 --- /dev/null +++ b/gems/decomplex/examples/go/path-condition.go @@ -0,0 +1,5 @@ +package main +func one(x X,y Y,z Z) { if x.p() && y.q() && z.r() { go(x) } } +func two(x X,y Y,z Z) { if x.p() && y.q() && z.r() { go(x) } } +func three(x X,y Y,z Z) { if x.p() && y.q() && z.r() { go(x) } } +func bug(x X,y Y,z Z) { if x.p() && y.q() { go(x) } } diff --git a/gems/decomplex/examples/go/predicate-alias.go b/gems/decomplex/examples/go/predicate-alias.go new file mode 100644 index 000000000..86b2ce5eb --- /dev/null +++ b/gems/decomplex/examples/go/predicate-alias.go @@ -0,0 +1,4 @@ +package main +func first() bool { return true } +func second() bool { return true } +func other() bool { return false } diff --git a/gems/decomplex/examples/go/redundant-nil-guard.go b/gems/decomplex/examples/go/redundant-nil-guard.go new file mode 100644 index 000000000..939e32535 --- /dev/null +++ b/gems/decomplex/examples/go/redundant-nil-guard.go @@ -0,0 +1,2 @@ +package main +func check(value Value) { if value.isSome() { value.isNull() } } diff --git a/gems/decomplex/examples/go/semantic-alias.go b/gems/decomplex/examples/go/semantic-alias.go new file mode 100644 index 000000000..dc9d43838 --- /dev/null +++ b/gems/decomplex/examples/go/semantic-alias.go @@ -0,0 +1,5 @@ +package main +func frame(node Node) bool { return node.provenance == FRAME } +func is_frame(node Node) bool { return provenance == FRAME } +func heap(node Node) bool { return node.provenance == HEAP } +func somewhere(node Node) int { if node.provenance == FRAME { return 1 }; return 0 } diff --git a/gems/decomplex/examples/go/sequence-mine.go b/gems/decomplex/examples/go/sequence-mine.go new file mode 100644 index 000000000..6f2c7d83a --- /dev/null +++ b/gems/decomplex/examples/go/sequence-mine.go @@ -0,0 +1,6 @@ +package main +func one() { alloc_mark(x); body1(); cleanup(x) } +func two() { alloc_mark(y); body2(); cleanup(y) } +func three() { alloc_mark(z); body3(); cleanup(z) } +func four() { alloc_mark(w); body4(); cleanup(w) } +func leak() { alloc_mark(q); use_value(q) } diff --git a/gems/decomplex/examples/go/state-branch-density.go b/gems/decomplex/examples/go/state-branch-density.go new file mode 100644 index 000000000..460cc9a17 --- /dev/null +++ b/gems/decomplex/examples/go/state-branch-density.go @@ -0,0 +1,3 @@ +package main +type StateBranchChecker struct { checked bool } +func (self *StateBranchChecker) check(admin bool, name string) { if admin { self.checked = true } if self.checked && name == "admin" { print("hello") } } diff --git a/gems/decomplex/examples/go/state-mesh.go b/gems/decomplex/examples/go/state-mesh.go new file mode 100644 index 000000000..14ede1069 --- /dev/null +++ b/gems/decomplex/examples/go/state-mesh.go @@ -0,0 +1,6 @@ +package main +type StateMeshExample struct { a int; b int } +func (self *StateMeshExample) initialize() { self.a = 1; self.b = 2 } +func (self *StateMeshExample) writer() { self.a = 3 } +func (self *StateMeshExample) reader() int { return self.a + self.b } +func (self *StateMeshExample) a_alias() int { return self.a } diff --git a/gems/decomplex/examples/go/structural-topology.go b/gems/decomplex/examples/go/structural-topology.go new file mode 100644 index 000000000..c3b5805f8 --- /dev/null +++ b/gems/decomplex/examples/go/structural-topology.go @@ -0,0 +1,7 @@ +package main +type Worker struct {} +func (self *Worker) run(items Items) { self.prepare(); if self.ready() { self.validate() }; for _, item := range items { self.helper(item) } } +func (self *Worker) prepare() {} +func (self *Worker) ready() bool { return true } +func (self *Worker) validate() {} +func (self *Worker) helper(item Item) { item.use() } diff --git a/gems/decomplex/examples/go/temporal-ordering-pressure.go b/gems/decomplex/examples/go/temporal-ordering-pressure.go new file mode 100644 index 000000000..5b29ea4e4 --- /dev/null +++ b/gems/decomplex/examples/go/temporal-ordering-pressure.go @@ -0,0 +1,6 @@ +package main +type TemporalOrderExample struct { a int; b int } +func (self *TemporalOrderExample) One() { self.a = 1 } +func (self *TemporalOrderExample) Two() { self.a = 2; self.b = 3 } +func (self *TemporalOrderExample) Three() { self.b = 4 } +func (self *TemporalOrderExample) Reader() int { return self.a } diff --git a/gems/decomplex/examples/go/weighted-inlined-complexity.go b/gems/decomplex/examples/go/weighted-inlined-complexity.go new file mode 100644 index 000000000..75881fb16 --- /dev/null +++ b/gems/decomplex/examples/go/weighted-inlined-complexity.go @@ -0,0 +1,6 @@ +package main +func checkout(user User, cart Cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart) } +func validate_user(user User) bool { if user.active() && !user.suspended() { if user.profile.complete() { return true } else { return false } } else { return false } } +func apply_discount(cart Cart) int { if cart.total > 100 && eligible() { if holiday() { return 20 } else if loyalty_month() { return 15 } else { return 10 } }; return 0 } +func process_payment(user User, cart Cart) { if gateway.ready() { if cart.total > 0 && user.active() { if fraud_check(user) { charge(user, cart) } else { decline(user) } } } } +func audit_cart(cart Cart) { for _, item := range cart.items { if item.taxable() { if item.region && item.amount > 0 { record_tax(item) } } } } diff --git a/gems/decomplex/examples/java/co-update.java b/gems/decomplex/examples/java/co-update.java new file mode 100644 index 000000000..093c07fbe --- /dev/null +++ b/gems/decomplex/examples/java/co-update.java @@ -0,0 +1,6 @@ +class Example { + static void stable_one(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_two(Node node) { node.storage = 1; node.provenance = 1; } + static void stable_three(Node node) { node.storage = 1; node.provenance = 1; } + static void misses_provenance(Node node) { node.storage = 1; } +} diff --git a/gems/decomplex/examples/java/decision-pressure.java b/gems/decomplex/examples/java/decision-pressure.java new file mode 100644 index 000000000..f86ec54a1 --- /dev/null +++ b/gems/decomplex/examples/java/decision-pressure.java @@ -0,0 +1 @@ +class Example { static boolean scan(Node node) { Value value = node.symbol; return value.isNull(); } } diff --git a/gems/decomplex/examples/java/derived-state.java b/gems/decomplex/examples/java/derived-state.java new file mode 100644 index 000000000..a514b815a --- /dev/null +++ b/gems/decomplex/examples/java/derived-state.java @@ -0,0 +1 @@ +class Example { static void check(int input) { int cached = input + 1; input = 2; print(cached); } } diff --git a/gems/decomplex/examples/java/false-simplicity.java b/gems/decomplex/examples/java/false-simplicity.java new file mode 100644 index 000000000..78fb3897c --- /dev/null +++ b/gems/decomplex/examples/java/false-simplicity.java @@ -0,0 +1 @@ +class FalseSimplicityExample { void hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/java/fat-union.java b/gems/decomplex/examples/java/fat-union.java new file mode 100644 index 000000000..ad280adb7 --- /dev/null +++ b/gems/decomplex/examples/java/fat-union.java @@ -0,0 +1 @@ +class Example { static void handle(Node node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } } diff --git a/gems/decomplex/examples/java/flay-similarity.java b/gems/decomplex/examples/java/flay-similarity.java new file mode 100644 index 000000000..a1f4beee8 --- /dev/null +++ b/gems/decomplex/examples/java/flay-similarity.java @@ -0,0 +1,75 @@ +class Example { + static int first_clone(Node node) { + int total = 0; + var value1 = node.part1; + if (value1.ready() && value1.enabled()) { + total += value1.amount; + } + var value2 = node.part2; + if (value2.ready() && value2.enabled()) { + total += value2.amount; + } + var value3 = node.part3; + if (value3.ready() && value3.enabled()) { + total += value3.amount; + } + var value4 = node.part4; + if (value4.ready() && value4.enabled()) { + total += value4.amount; + } + var value5 = node.part5; + if (value5.ready() && value5.enabled()) { + total += value5.amount; + } + var value6 = node.part6; + if (value6.ready() && value6.enabled()) { + total += value6.amount; + } + var value7 = node.part7; + if (value7.ready() && value7.enabled()) { + total += value7.amount; + } + var value8 = node.part8; + if (value8.ready() && value8.enabled()) { + total += value8.amount; + } + return total; + } + + static int second_clone(Node entry) { + int total = 0; + var item1 = entry.part1; + if (item1.ready() && item1.enabled()) { + total += item1.amount; + } + var item2 = entry.part2; + if (item2.ready() && item2.enabled()) { + total += item2.amount; + } + var item3 = entry.part3; + if (item3.ready() && item3.enabled()) { + total += item3.amount; + } + var item4 = entry.part4; + if (item4.ready() && item4.enabled()) { + total += item4.amount; + } + var item5 = entry.part5; + if (item5.ready() && item5.enabled()) { + total += item5.amount; + } + var item6 = entry.part6; + if (item6.ready() && item6.enabled()) { + total += item6.amount; + } + var item7 = entry.part7; + if (item7.ready() && item7.enabled()) { + total += item7.amount; + } + var item8 = entry.part8; + if (item8.ready() && item8.enabled()) { + total += item8.amount; + } + return total; + } +} diff --git a/gems/decomplex/examples/java/function-lcom.java b/gems/decomplex/examples/java/function-lcom.java new file mode 100644 index 000000000..433cc611c --- /dev/null +++ b/gems/decomplex/examples/java/function-lcom.java @@ -0,0 +1,12 @@ +class Example { static Result mixed(int price, int tax, Logger logger) { + var subtotal = price + tax; + var total = subtotal * 2; + var rounded = total.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} } diff --git a/gems/decomplex/examples/java/implicit-control-flow.java b/gems/decomplex/examples/java/implicit-control-flow.java new file mode 100644 index 000000000..4bdd60844 --- /dev/null +++ b/gems/decomplex/examples/java/implicit-control-flow.java @@ -0,0 +1 @@ +class FlowExample { int status; boolean valid; boolean done; void prepare() { this.status = 1; } void validate() { this.valid = this.status == 1; } void commit() { this.done = this.valid; } void ok1() { prepare(); validate(); commit(); } void ok2() { prepare(); validate(); commit(); } void ok3() { prepare(); validate(); commit(); } void ok4() { prepare(); validate(); commit(); } void drift() { validate(); prepare(); commit(); } } diff --git a/gems/decomplex/examples/java/inconsistent-rename-clone.java b/gems/decomplex/examples/java/inconsistent-rename-clone.java new file mode 100644 index 000000000..6ec491226 --- /dev/null +++ b/gems/decomplex/examples/java/inconsistent-rename-clone.java @@ -0,0 +1 @@ +class Example { static void original() { var src = fetch(1); check(src); store(src); finalize(src); } static void pasted() { var dst = fetch(2); check(dst); store(src); finalize(dst); } } diff --git a/gems/decomplex/examples/java/local-flow.java b/gems/decomplex/examples/java/local-flow.java new file mode 100644 index 000000000..a5fa7f235 --- /dev/null +++ b/gems/decomplex/examples/java/local-flow.java @@ -0,0 +1,9 @@ +class Example { static Result mixed(int price, int tax) { + var subtotal = price + tax; + var total = subtotal.round(); + + var timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} } diff --git a/gems/decomplex/examples/java/locality-drag.java b/gems/decomplex/examples/java/locality-drag.java new file mode 100644 index 000000000..8d31b0dcb --- /dev/null +++ b/gems/decomplex/examples/java/locality-drag.java @@ -0,0 +1,29 @@ +class Example { + static void run(User user, Cart cart, Logger logger) { + var receipt_id = user.id; + + var total = cart.total; + if (total > 100) { + if (cart.discountable()) { + var discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + var tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + var status = 1; + } + } + + emit(receipt_id); + } +} diff --git a/gems/decomplex/examples/java/miner.java b/gems/decomplex/examples/java/miner.java new file mode 100644 index 000000000..749991f98 --- /dev/null +++ b/gems/decomplex/examples/java/miner.java @@ -0,0 +1 @@ +class Example { static boolean one(boolean a, boolean b, boolean c) { return a && b && c; } static boolean two(boolean a, boolean b, boolean c) { return a && b && c; } static boolean three(boolean a, boolean b, boolean c) { return a && b && c; } static boolean broken(boolean a, boolean b) { return a && b; } } diff --git a/gems/decomplex/examples/java/operational-discontinuity.java b/gems/decomplex/examples/java/operational-discontinuity.java new file mode 100644 index 000000000..d2dfba674 --- /dev/null +++ b/gems/decomplex/examples/java/operational-discontinuity.java @@ -0,0 +1,9 @@ +class Example { static void phase_shift() { + var a = 1; + var b = 2; + + // Phase 2 + var x = 3; + var y = 4; + print(x); print(y); +} } diff --git a/gems/decomplex/examples/java/oversized-predicate.java b/gems/decomplex/examples/java/oversized-predicate.java new file mode 100644 index 000000000..4a60c2466 --- /dev/null +++ b/gems/decomplex/examples/java/oversized-predicate.java @@ -0,0 +1 @@ +class Example { static void complex_check(boolean a, boolean b, boolean c, boolean d) { if (a && b && c && d) { print("too big"); } } } diff --git a/gems/decomplex/examples/java/path-condition.java b/gems/decomplex/examples/java/path-condition.java new file mode 100644 index 000000000..941f756f7 --- /dev/null +++ b/gems/decomplex/examples/java/path-condition.java @@ -0,0 +1 @@ +class Example { static void one(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void two(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void three(X x,Y y,Z z) { if (x.p() && y.q() && z.r()) { go(x); } } static void bug(X x,Y y,Z z) { if (x.p() && y.q()) { go(x); } } } diff --git a/gems/decomplex/examples/java/predicate-alias.java b/gems/decomplex/examples/java/predicate-alias.java new file mode 100644 index 000000000..891938723 --- /dev/null +++ b/gems/decomplex/examples/java/predicate-alias.java @@ -0,0 +1 @@ +class Example { static boolean first() { return true; } static boolean second() { return true; } static boolean other() { return false; } } diff --git a/gems/decomplex/examples/java/redundant-nil-guard.java b/gems/decomplex/examples/java/redundant-nil-guard.java new file mode 100644 index 000000000..32f12a734 --- /dev/null +++ b/gems/decomplex/examples/java/redundant-nil-guard.java @@ -0,0 +1 @@ +class Example { static void check(Value value) { if (value.isSome()) { value.isNull(); } } } diff --git a/gems/decomplex/examples/java/semantic-alias.java b/gems/decomplex/examples/java/semantic-alias.java new file mode 100644 index 000000000..29895156a --- /dev/null +++ b/gems/decomplex/examples/java/semantic-alias.java @@ -0,0 +1 @@ +class Example { static boolean frame(Node node) { return node.provenance == FRAME; } static boolean is_frame(Node node) { return provenance == FRAME; } static boolean heap(Node node) { return node.provenance == HEAP; } static int somewhere(Node node) { if (node.provenance == FRAME) { return 1; } return 0; } } diff --git a/gems/decomplex/examples/java/sequence-mine.java b/gems/decomplex/examples/java/sequence-mine.java new file mode 100644 index 000000000..629558214 --- /dev/null +++ b/gems/decomplex/examples/java/sequence-mine.java @@ -0,0 +1 @@ +class Example { static void one() { alloc_mark(x); body1(); cleanup(x); } static void two() { alloc_mark(y); body2(); cleanup(y); } static void three() { alloc_mark(z); body3(); cleanup(z); } static void four() { alloc_mark(w); body4(); cleanup(w); } static void leak() { alloc_mark(q); use_value(q); } } diff --git a/gems/decomplex/examples/java/state-branch-density.java b/gems/decomplex/examples/java/state-branch-density.java new file mode 100644 index 000000000..64436338f --- /dev/null +++ b/gems/decomplex/examples/java/state-branch-density.java @@ -0,0 +1 @@ +class StateBranchChecker { boolean checked; void check(boolean admin, String name) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/java/state-mesh.java b/gems/decomplex/examples/java/state-mesh.java new file mode 100644 index 000000000..ed01f5ad9 --- /dev/null +++ b/gems/decomplex/examples/java/state-mesh.java @@ -0,0 +1 @@ +class StateMeshExample { int a; int b; void initialize() { this.a = 1; this.b = 2; } void writer() { this.a = 3; } int reader() { return this.a + this.b; } int a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/java/structural-topology.java b/gems/decomplex/examples/java/structural-topology.java new file mode 100644 index 000000000..550e21e7f --- /dev/null +++ b/gems/decomplex/examples/java/structural-topology.java @@ -0,0 +1 @@ +class Worker { public void run(Items items) { prepare(); if (ready()) { validate(); } for (Item item : items) { helper(item); } } private void prepare() {} private boolean ready() { return true; } public void validate() {} private void helper(Item item) { item.use(); } } diff --git a/gems/decomplex/examples/java/temporal-ordering-pressure.java b/gems/decomplex/examples/java/temporal-ordering-pressure.java new file mode 100644 index 000000000..83a07d855 --- /dev/null +++ b/gems/decomplex/examples/java/temporal-ordering-pressure.java @@ -0,0 +1 @@ +class TemporalOrderExample { int a; int b; public void one() { this.a = 1; } public void two() { this.a = 2; this.b = 3; } public void three() { this.b = 4; } public int reader() { return this.a; } } diff --git a/gems/decomplex/examples/java/weighted-inlined-complexity.java b/gems/decomplex/examples/java/weighted-inlined-complexity.java new file mode 100644 index 000000000..9efea4524 --- /dev/null +++ b/gems/decomplex/examples/java/weighted-inlined-complexity.java @@ -0,0 +1 @@ +class Example { static void checkout(User user, Cart cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } static boolean validate_user(User user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } static int apply_discount(Cart cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } static void process_payment(User user, Cart cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } static void audit_cart(Cart cart) { for (Item item : cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } } diff --git a/gems/decomplex/examples/javascript/co-update.js b/gems/decomplex/examples/javascript/co-update.js new file mode 100644 index 000000000..0fbb808be --- /dev/null +++ b/gems/decomplex/examples/javascript/co-update.js @@ -0,0 +1,4 @@ +function stable_one(node) { node.storage = 1; node.provenance = 1; } +function stable_two(node) { node.storage = 1; node.provenance = 1; } +function stable_three(node) { node.storage = 1; node.provenance = 1; } +function misses_provenance(node) { node.storage = 1; } diff --git a/gems/decomplex/examples/javascript/decision-pressure.js b/gems/decomplex/examples/javascript/decision-pressure.js new file mode 100644 index 000000000..41b0455cc --- /dev/null +++ b/gems/decomplex/examples/javascript/decision-pressure.js @@ -0,0 +1 @@ +function scan(node) { const value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/javascript/derived-state.js b/gems/decomplex/examples/javascript/derived-state.js new file mode 100644 index 000000000..a5ec31614 --- /dev/null +++ b/gems/decomplex/examples/javascript/derived-state.js @@ -0,0 +1 @@ +function check(input) { const cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/javascript/false-simplicity.js b/gems/decomplex/examples/javascript/false-simplicity.js new file mode 100644 index 000000000..4bd9d8e03 --- /dev/null +++ b/gems/decomplex/examples/javascript/false-simplicity.js @@ -0,0 +1 @@ +class FalseSimplicityExample { hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/javascript/fat-union.js b/gems/decomplex/examples/javascript/fat-union.js new file mode 100644 index 000000000..5b52db663 --- /dev/null +++ b/gems/decomplex/examples/javascript/fat-union.js @@ -0,0 +1 @@ +function handle(node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } diff --git a/gems/decomplex/examples/javascript/flay-similarity.js b/gems/decomplex/examples/javascript/flay-similarity.js new file mode 100644 index 000000000..17ffc6b6a --- /dev/null +++ b/gems/decomplex/examples/javascript/flay-similarity.js @@ -0,0 +1,2 @@ +function first_clone(node) { let total = 0; const value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } const value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } const value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } const value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } const value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } const value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } const value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } const value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +function second_clone(entry) { let total = 0; const item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } const item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } const item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } const item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } const item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } const item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } const item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } const item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/javascript/function-lcom.js b/gems/decomplex/examples/javascript/function-lcom.js new file mode 100644 index 000000000..6e550446b --- /dev/null +++ b/gems/decomplex/examples/javascript/function-lcom.js @@ -0,0 +1,12 @@ +function mixed(price, tax, logger) { + const subtotal = price + tax; + const total = subtotal * 2; + const rounded = total.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/javascript/implicit-control-flow.js b/gems/decomplex/examples/javascript/implicit-control-flow.js new file mode 100644 index 000000000..7878c7cf7 --- /dev/null +++ b/gems/decomplex/examples/javascript/implicit-control-flow.js @@ -0,0 +1 @@ +class FlowExample { prepare() { this.status = 1; } validate() { this.valid = this.status == 1; } commit() { this.done = this.valid; } ok1() { this.prepare(); this.validate(); this.commit(); } ok2() { this.prepare(); this.validate(); this.commit(); } ok3() { this.prepare(); this.validate(); this.commit(); } ok4() { this.prepare(); this.validate(); this.commit(); } drift() { this.validate(); this.prepare(); this.commit(); } } diff --git a/gems/decomplex/examples/javascript/inconsistent-rename-clone.js b/gems/decomplex/examples/javascript/inconsistent-rename-clone.js new file mode 100644 index 000000000..67430611b --- /dev/null +++ b/gems/decomplex/examples/javascript/inconsistent-rename-clone.js @@ -0,0 +1,2 @@ +function original() { const src = fetch(1); check(src); store(src); finalize(src); } +function pasted() { const dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/javascript/local-flow.js b/gems/decomplex/examples/javascript/local-flow.js new file mode 100644 index 000000000..28856eb43 --- /dev/null +++ b/gems/decomplex/examples/javascript/local-flow.js @@ -0,0 +1,9 @@ +function mixed(price, tax) { + const subtotal = price + tax; + const total = subtotal.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/javascript/locality-drag.js b/gems/decomplex/examples/javascript/locality-drag.js new file mode 100644 index 000000000..21de7e8ae --- /dev/null +++ b/gems/decomplex/examples/javascript/locality-drag.js @@ -0,0 +1,27 @@ +function run(user, cart, logger) { + const receipt_id = user.id; + + const total = cart.total; + if (total > 100) { + if (cart.discountable()) { + const discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + const tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + const status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/javascript/miner.js b/gems/decomplex/examples/javascript/miner.js new file mode 100644 index 000000000..c7a3f8430 --- /dev/null +++ b/gems/decomplex/examples/javascript/miner.js @@ -0,0 +1,4 @@ +function one(a,b,c) { return a && b && c; } +function two(a,b,c) { return a && b && c; } +function three(a,b,c) { return a && b && c; } +function broken(a,b) { return a && b; } diff --git a/gems/decomplex/examples/javascript/operational-discontinuity.js b/gems/decomplex/examples/javascript/operational-discontinuity.js new file mode 100644 index 000000000..629c5e9a2 --- /dev/null +++ b/gems/decomplex/examples/javascript/operational-discontinuity.js @@ -0,0 +1,9 @@ +function phase_shift() { + const a = 1; + const b = 2; + + // Phase 2 + const x = 3; + const y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/javascript/oversized-predicate.js b/gems/decomplex/examples/javascript/oversized-predicate.js new file mode 100644 index 000000000..bb7a5f359 --- /dev/null +++ b/gems/decomplex/examples/javascript/oversized-predicate.js @@ -0,0 +1 @@ +function complex_check(a,b,c,d) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/javascript/path-condition.js b/gems/decomplex/examples/javascript/path-condition.js new file mode 100644 index 000000000..b717ad99a --- /dev/null +++ b/gems/decomplex/examples/javascript/path-condition.js @@ -0,0 +1,4 @@ +function one(x,y,z) { if (x.p() && y.q() && z.r()) { go(x); } } +function two(x,y,z) { if (x.p() && y.q() && z.r()) { go(x); } } +function three(x,y,z) { if (x.p() && y.q() && z.r()) { go(x); } } +function bug(x,y,z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/javascript/predicate-alias.js b/gems/decomplex/examples/javascript/predicate-alias.js new file mode 100644 index 000000000..62b2c9b57 --- /dev/null +++ b/gems/decomplex/examples/javascript/predicate-alias.js @@ -0,0 +1,3 @@ +function first() { return true; } +function second() { return true; } +function other() { return false; } diff --git a/gems/decomplex/examples/javascript/redundant-nil-guard.js b/gems/decomplex/examples/javascript/redundant-nil-guard.js new file mode 100644 index 000000000..95d1802fa --- /dev/null +++ b/gems/decomplex/examples/javascript/redundant-nil-guard.js @@ -0,0 +1 @@ +function check(value) { if (value.isSome()) { value.isNull(); } } diff --git a/gems/decomplex/examples/javascript/semantic-alias.js b/gems/decomplex/examples/javascript/semantic-alias.js new file mode 100644 index 000000000..8f3d0de18 --- /dev/null +++ b/gems/decomplex/examples/javascript/semantic-alias.js @@ -0,0 +1,4 @@ +function frame(node) { return node.provenance == FRAME; } +function is_frame(node) { return provenance == FRAME; } +function heap(node) { return node.provenance == HEAP; } +function somewhere(node) { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/javascript/sequence-mine.js b/gems/decomplex/examples/javascript/sequence-mine.js new file mode 100644 index 000000000..72ee4959a --- /dev/null +++ b/gems/decomplex/examples/javascript/sequence-mine.js @@ -0,0 +1,5 @@ +function one() { alloc_mark(x); body1(); cleanup(x); } +function two() { alloc_mark(y); body2(); cleanup(y); } +function three() { alloc_mark(z); body3(); cleanup(z); } +function four() { alloc_mark(w); body4(); cleanup(w); } +function leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/javascript/state-branch-density.js b/gems/decomplex/examples/javascript/state-branch-density.js new file mode 100644 index 000000000..8c4153da3 --- /dev/null +++ b/gems/decomplex/examples/javascript/state-branch-density.js @@ -0,0 +1 @@ +class StateBranchChecker { check(admin, name) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/javascript/state-mesh.js b/gems/decomplex/examples/javascript/state-mesh.js new file mode 100644 index 000000000..3e6271f94 --- /dev/null +++ b/gems/decomplex/examples/javascript/state-mesh.js @@ -0,0 +1 @@ +class StateMeshExample { initialize() { this.a = 1; this.b = 2; } writer() { this.a = 3; } reader() { return this.a + this.b; } a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/javascript/structural-topology.js b/gems/decomplex/examples/javascript/structural-topology.js new file mode 100644 index 000000000..b7559a669 --- /dev/null +++ b/gems/decomplex/examples/javascript/structural-topology.js @@ -0,0 +1 @@ +class Worker { run(items) { this.prepare(); if (this.ready()) { this.validate(); } for (const item of items) { this.helper(item); } } prepare() {} ready() { return true; } validate() {} helper(item) { return item; } } diff --git a/gems/decomplex/examples/javascript/temporal-ordering-pressure.js b/gems/decomplex/examples/javascript/temporal-ordering-pressure.js new file mode 100644 index 000000000..0901e3c2e --- /dev/null +++ b/gems/decomplex/examples/javascript/temporal-ordering-pressure.js @@ -0,0 +1 @@ +class TemporalOrderExample { one() { this.a = 1; } two() { this.a = 2; this.b = 3; } three() { this.b = 4; } reader() { return this.a; } } diff --git a/gems/decomplex/examples/javascript/weighted-inlined-complexity.js b/gems/decomplex/examples/javascript/weighted-inlined-complexity.js new file mode 100644 index 000000000..e7ae5abc4 --- /dev/null +++ b/gems/decomplex/examples/javascript/weighted-inlined-complexity.js @@ -0,0 +1,5 @@ +function checkout(user, cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +function validate_user(user) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +function apply_discount(cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +function process_payment(user, cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +function audit_cart(cart) { for (const item of cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/kotlin/co-update.kt b/gems/decomplex/examples/kotlin/co-update.kt new file mode 100644 index 000000000..7880a0e3c --- /dev/null +++ b/gems/decomplex/examples/kotlin/co-update.kt @@ -0,0 +1,4 @@ +fun stable_one(node: Node) { node.storage = 1; node.provenance = 1 } +fun stable_two(node: Node) { node.storage = 1; node.provenance = 1 } +fun stable_three(node: Node) { node.storage = 1; node.provenance = 1 } +fun misses_provenance(node: Node) { node.storage = 1 } diff --git a/gems/decomplex/examples/kotlin/decision-pressure.kt b/gems/decomplex/examples/kotlin/decision-pressure.kt new file mode 100644 index 000000000..320c10e64 --- /dev/null +++ b/gems/decomplex/examples/kotlin/decision-pressure.kt @@ -0,0 +1 @@ +fun scan(node: Node): Boolean { val value = node.symbol; return value.isNull() } diff --git a/gems/decomplex/examples/kotlin/derived-state.kt b/gems/decomplex/examples/kotlin/derived-state.kt new file mode 100644 index 000000000..ee30f74e4 --- /dev/null +++ b/gems/decomplex/examples/kotlin/derived-state.kt @@ -0,0 +1 @@ +fun check(input_value: Int) { var input = input_value; val cached = input + 1; input = 2; print(cached) } diff --git a/gems/decomplex/examples/kotlin/false-simplicity.kt b/gems/decomplex/examples/kotlin/false-simplicity.kt new file mode 100644 index 000000000..f5be1a591 --- /dev/null +++ b/gems/decomplex/examples/kotlin/false-simplicity.kt @@ -0,0 +1 @@ +class FalseSimplicityExample { fun hack() { print("hidden IO") } } diff --git a/gems/decomplex/examples/kotlin/fat-union.kt b/gems/decomplex/examples/kotlin/fat-union.kt new file mode 100644 index 000000000..159f08ac5 --- /dev/null +++ b/gems/decomplex/examples/kotlin/fat-union.kt @@ -0,0 +1,7 @@ +fun handle(node: Node) { + when (node) { + AST.Call -> { node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() } + AST.Func -> { node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() } + AST.Lit -> { node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() } + } +} diff --git a/gems/decomplex/examples/kotlin/flay-similarity.kt b/gems/decomplex/examples/kotlin/flay-similarity.kt new file mode 100644 index 000000000..63ff60cda --- /dev/null +++ b/gems/decomplex/examples/kotlin/flay-similarity.kt @@ -0,0 +1,73 @@ +fun first_clone(node: Node): Int { + var total = 0 + val value1 = node.part1 + if (value1.ready() && value1.enabled()) { + total += value1.amount + } + val value2 = node.part2 + if (value2.ready() && value2.enabled()) { + total += value2.amount + } + val value3 = node.part3 + if (value3.ready() && value3.enabled()) { + total += value3.amount + } + val value4 = node.part4 + if (value4.ready() && value4.enabled()) { + total += value4.amount + } + val value5 = node.part5 + if (value5.ready() && value5.enabled()) { + total += value5.amount + } + val value6 = node.part6 + if (value6.ready() && value6.enabled()) { + total += value6.amount + } + val value7 = node.part7 + if (value7.ready() && value7.enabled()) { + total += value7.amount + } + val value8 = node.part8 + if (value8.ready() && value8.enabled()) { + total += value8.amount + } + return total +} + +fun second_clone(entry: Node): Int { + var total = 0 + val item1 = entry.part1 + if (item1.ready() && item1.enabled()) { + total += item1.amount + } + val item2 = entry.part2 + if (item2.ready() && item2.enabled()) { + total += item2.amount + } + val item3 = entry.part3 + if (item3.ready() && item3.enabled()) { + total += item3.amount + } + val item4 = entry.part4 + if (item4.ready() && item4.enabled()) { + total += item4.amount + } + val item5 = entry.part5 + if (item5.ready() && item5.enabled()) { + total += item5.amount + } + val item6 = entry.part6 + if (item6.ready() && item6.enabled()) { + total += item6.amount + } + val item7 = entry.part7 + if (item7.ready() && item7.enabled()) { + total += item7.amount + } + val item8 = entry.part8 + if (item8.ready() && item8.enabled()) { + total += item8.amount + } + return total +} diff --git a/gems/decomplex/examples/kotlin/function-lcom.kt b/gems/decomplex/examples/kotlin/function-lcom.kt new file mode 100644 index 000000000..24c32f01b --- /dev/null +++ b/gems/decomplex/examples/kotlin/function-lcom.kt @@ -0,0 +1,12 @@ +fun mixed(price: Int, tax: Int, logger: Logger): Result { + val subtotal = price + tax + val total = subtotal * 2 + val rounded = total.round() + + val timestamp = now() + val buffer = Buffer.init() + buffer.push(timestamp) + logger.info(buffer) + + return Result.init(rounded, buffer) +} diff --git a/gems/decomplex/examples/kotlin/implicit-control-flow.kt b/gems/decomplex/examples/kotlin/implicit-control-flow.kt new file mode 100644 index 000000000..8ba4b59ac --- /dev/null +++ b/gems/decomplex/examples/kotlin/implicit-control-flow.kt @@ -0,0 +1,13 @@ +class FlowExample { + var status = 0 + var valid = false + var done = false + fun prepare() { this.status = 1 } + fun validate() { this.valid = this.status == 1 } + fun commit() { this.done = this.valid } + fun ok1() { this.prepare(); this.validate(); this.commit() } + fun ok2() { this.prepare(); this.validate(); this.commit() } + fun ok3() { this.prepare(); this.validate(); this.commit() } + fun ok4() { this.prepare(); this.validate(); this.commit() } + fun drift() { this.validate(); this.prepare(); this.commit() } +} diff --git a/gems/decomplex/examples/kotlin/inconsistent-rename-clone.kt b/gems/decomplex/examples/kotlin/inconsistent-rename-clone.kt new file mode 100644 index 000000000..df533ba97 --- /dev/null +++ b/gems/decomplex/examples/kotlin/inconsistent-rename-clone.kt @@ -0,0 +1,2 @@ +fun original() { val src = fetch(1); check(src); store(src); finalize(src) } +fun pasted() { val dst = fetch(2); check(dst); store(src); finalize(dst) } diff --git a/gems/decomplex/examples/kotlin/local-flow.kt b/gems/decomplex/examples/kotlin/local-flow.kt new file mode 100644 index 000000000..3b1c13723 --- /dev/null +++ b/gems/decomplex/examples/kotlin/local-flow.kt @@ -0,0 +1,9 @@ +fun mixed(price: Int, tax: Int): Result { + val subtotal = price + tax + val total = subtotal.round() + + val timestamp = now() + val buffer = Buffer.init() + buffer.push(timestamp) + return Result.init(total, buffer) +} diff --git a/gems/decomplex/examples/kotlin/locality-drag.kt b/gems/decomplex/examples/kotlin/locality-drag.kt new file mode 100644 index 000000000..7bc0d1e1a --- /dev/null +++ b/gems/decomplex/examples/kotlin/locality-drag.kt @@ -0,0 +1,27 @@ +fun run(user: User, cart: Cart, logger: Logger) { + val receipt_id = user.id + + val total = cart.total + if (total > 100) { + if (cart.discountable()) { + val discount = 10 + } + } + if (cart.taxable()) { + if (cart.region) { + val tax = total * 2 + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total) + } + } + if (cart.valid()) { + if (cart.ready()) { + val status = 1 + } + } + + emit(receipt_id) +} diff --git a/gems/decomplex/examples/kotlin/miner.kt b/gems/decomplex/examples/kotlin/miner.kt new file mode 100644 index 000000000..12ba782f7 --- /dev/null +++ b/gems/decomplex/examples/kotlin/miner.kt @@ -0,0 +1,4 @@ +fun one(a: Boolean,b: Boolean,c: Boolean): Boolean { return a && b && c } +fun two(a: Boolean,b: Boolean,c: Boolean): Boolean { return a && b && c } +fun three(a: Boolean,b: Boolean,c: Boolean): Boolean { return a && b && c } +fun broken(a: Boolean,b: Boolean): Boolean { return a && b } diff --git a/gems/decomplex/examples/kotlin/operational-discontinuity.kt b/gems/decomplex/examples/kotlin/operational-discontinuity.kt new file mode 100644 index 000000000..7ddfb33e1 --- /dev/null +++ b/gems/decomplex/examples/kotlin/operational-discontinuity.kt @@ -0,0 +1,9 @@ +fun phase_shift() { + val a = 1 + val b = 2 + + // Phase 2 + val x = 3 + val y = 4 + print(x); print(y) +} diff --git a/gems/decomplex/examples/kotlin/oversized-predicate.kt b/gems/decomplex/examples/kotlin/oversized-predicate.kt new file mode 100644 index 000000000..948ea9889 --- /dev/null +++ b/gems/decomplex/examples/kotlin/oversized-predicate.kt @@ -0,0 +1 @@ +fun complex_check(a: Boolean,b: Boolean,c: Boolean,d: Boolean) { if (a && b && c && d) { print("too big") } } diff --git a/gems/decomplex/examples/kotlin/path-condition.kt b/gems/decomplex/examples/kotlin/path-condition.kt new file mode 100644 index 000000000..4037d0a9d --- /dev/null +++ b/gems/decomplex/examples/kotlin/path-condition.kt @@ -0,0 +1,4 @@ +fun one(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x) } } +fun two(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x) } } +fun three(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x) } } +fun bug(x: X,y: Y,z: Z) { if (x.p() && y.q()) { go(x) } } diff --git a/gems/decomplex/examples/kotlin/predicate-alias.kt b/gems/decomplex/examples/kotlin/predicate-alias.kt new file mode 100644 index 000000000..e6b3081cd --- /dev/null +++ b/gems/decomplex/examples/kotlin/predicate-alias.kt @@ -0,0 +1,3 @@ +fun first(): Boolean { return true } +fun second(): Boolean { return true } +fun other(): Boolean { return false } diff --git a/gems/decomplex/examples/kotlin/redundant-nil-guard.kt b/gems/decomplex/examples/kotlin/redundant-nil-guard.kt new file mode 100644 index 000000000..d9659325f --- /dev/null +++ b/gems/decomplex/examples/kotlin/redundant-nil-guard.kt @@ -0,0 +1 @@ +fun check(value: Value) { if (value.isSome()) { value.isNull() } } diff --git a/gems/decomplex/examples/kotlin/semantic-alias.kt b/gems/decomplex/examples/kotlin/semantic-alias.kt new file mode 100644 index 000000000..576612c58 --- /dev/null +++ b/gems/decomplex/examples/kotlin/semantic-alias.kt @@ -0,0 +1,4 @@ +fun frame(node: Node): Boolean { return node.provenance == FRAME } +fun is_frame(node: Node): Boolean { return provenance == FRAME } +fun heap(node: Node): Boolean { return node.provenance == HEAP } +fun somewhere(node: Node): Int { if (node.provenance == FRAME) { return 1 }; return 0 } diff --git a/gems/decomplex/examples/kotlin/sequence-mine.kt b/gems/decomplex/examples/kotlin/sequence-mine.kt new file mode 100644 index 000000000..fe2c778cd --- /dev/null +++ b/gems/decomplex/examples/kotlin/sequence-mine.kt @@ -0,0 +1,5 @@ +fun one() { alloc_mark(x); body1(); cleanup(x) } +fun two() { alloc_mark(y); body2(); cleanup(y) } +fun three() { alloc_mark(z); body3(); cleanup(z) } +fun four() { alloc_mark(w); body4(); cleanup(w) } +fun leak() { alloc_mark(q); use_value(q) } diff --git a/gems/decomplex/examples/kotlin/state-branch-density.kt b/gems/decomplex/examples/kotlin/state-branch-density.kt new file mode 100644 index 000000000..2e75fd989 --- /dev/null +++ b/gems/decomplex/examples/kotlin/state-branch-density.kt @@ -0,0 +1,13 @@ +class StateBranchChecker { + var checked = false + + fun check(admin: Boolean, name: String) { + if (admin) { + this.checked = true + } + + if (this.checked && name == "admin") { + print("hello") + } + } +} diff --git a/gems/decomplex/examples/kotlin/state-mesh.kt b/gems/decomplex/examples/kotlin/state-mesh.kt new file mode 100644 index 000000000..63ce4525a --- /dev/null +++ b/gems/decomplex/examples/kotlin/state-mesh.kt @@ -0,0 +1,21 @@ +class StateMeshExample { + var a = 0 + var b = 0 + + fun initialize() { + this.a = 1 + this.b = 2 + } + + fun writer() { + this.a = 3 + } + + fun reader(): Int { + return this.a + this.b + } + + fun a_alias(): Int { + return this.a + } +} diff --git a/gems/decomplex/examples/kotlin/structural-topology.kt b/gems/decomplex/examples/kotlin/structural-topology.kt new file mode 100644 index 000000000..f30679d5f --- /dev/null +++ b/gems/decomplex/examples/kotlin/structural-topology.kt @@ -0,0 +1,16 @@ +class Worker { + fun run(items: Items) { + this.prepare() + if (this.ready()) { + this.validate() + } + for (item in items) { + this.helper(item) + } + } + + private fun prepare() {} + private fun ready(): Boolean { return true } + fun validate() {} + private fun helper(item: Item) { item.use() } +} diff --git a/gems/decomplex/examples/kotlin/temporal-ordering-pressure.kt b/gems/decomplex/examples/kotlin/temporal-ordering-pressure.kt new file mode 100644 index 000000000..6b94a5132 --- /dev/null +++ b/gems/decomplex/examples/kotlin/temporal-ordering-pressure.kt @@ -0,0 +1,21 @@ +class TemporalOrderExample { + var a = 0 + var b = 0 + + fun one() { + this.a = 1 + } + + fun two() { + this.a = 2 + this.b = 3 + } + + fun three() { + this.b = 4 + } + + fun reader(): Int { + return this.a + } +} diff --git a/gems/decomplex/examples/kotlin/weighted-inlined-complexity.kt b/gems/decomplex/examples/kotlin/weighted-inlined-complexity.kt new file mode 100644 index 000000000..00c5543e7 --- /dev/null +++ b/gems/decomplex/examples/kotlin/weighted-inlined-complexity.kt @@ -0,0 +1,5 @@ +fun checkout(user: User, cart: Cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart) } +fun validate_user(user: User): Boolean { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true } else { return false } } else { return false } } +fun apply_discount(cart: Cart): Int { if (cart.total > 100 && eligible()) { if (holiday()) { return 20 } else if (loyalty_month()) { return 15 } else { return 10 } }; return 0 } +fun process_payment(user: User, cart: Cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart) } else { decline(user) } } } } +fun audit_cart(cart: Cart) { for (item in cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item) } } } } diff --git a/gems/decomplex/examples/lua/co-update.lua b/gems/decomplex/examples/lua/co-update.lua new file mode 100644 index 000000000..1497b28e7 --- /dev/null +++ b/gems/decomplex/examples/lua/co-update.lua @@ -0,0 +1,15 @@ +function stable_one(node) + node.storage = 1 + node.provenance = 1 +end +function stable_two(node) + node.storage = 1 + node.provenance = 1 +end +function stable_three(node) + node.storage = 1 + node.provenance = 1 +end +function misses_provenance(node) + node.storage = 1 +end diff --git a/gems/decomplex/examples/lua/decision-pressure.lua b/gems/decomplex/examples/lua/decision-pressure.lua new file mode 100644 index 000000000..206ffbcea --- /dev/null +++ b/gems/decomplex/examples/lua/decision-pressure.lua @@ -0,0 +1,4 @@ +function scan(node) + local value = node.symbol + return value.isNull() +end diff --git a/gems/decomplex/examples/lua/derived-state.lua b/gems/decomplex/examples/lua/derived-state.lua new file mode 100644 index 000000000..aaee214a8 --- /dev/null +++ b/gems/decomplex/examples/lua/derived-state.lua @@ -0,0 +1 @@ +function check(input) local cached = input + 1; input = 2; print(cached) end diff --git a/gems/decomplex/examples/lua/false-simplicity.lua b/gems/decomplex/examples/lua/false-simplicity.lua new file mode 100644 index 000000000..e222554dc --- /dev/null +++ b/gems/decomplex/examples/lua/false-simplicity.lua @@ -0,0 +1,2 @@ +FalseSimplicityExample = {} +function FalseSimplicityExample:hack() print("hidden IO") end diff --git a/gems/decomplex/examples/lua/fat-union.lua b/gems/decomplex/examples/lua/fat-union.lua new file mode 100644 index 000000000..7a87c60de --- /dev/null +++ b/gems/decomplex/examples/lua/fat-union.lua @@ -0,0 +1,5 @@ +function handle(node) + if node == AST.Call then node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() end + if node == AST.Func then node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() end + if node == AST.Lit then node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() end +end diff --git a/gems/decomplex/examples/lua/flay-similarity.lua b/gems/decomplex/examples/lua/flay-similarity.lua new file mode 100644 index 000000000..d64d5c2c4 --- /dev/null +++ b/gems/decomplex/examples/lua/flay-similarity.lua @@ -0,0 +1,2 @@ +function first_clone(node) local total = 0; local value1 = node.part1; if value1.ready() and value1.enabled() then total = total + value1.amount end local value2 = node.part2; if value2.ready() and value2.enabled() then total = total + value2.amount end local value3 = node.part3; if value3.ready() and value3.enabled() then total = total + value3.amount end local value4 = node.part4; if value4.ready() and value4.enabled() then total = total + value4.amount end local value5 = node.part5; if value5.ready() and value5.enabled() then total = total + value5.amount end local value6 = node.part6; if value6.ready() and value6.enabled() then total = total + value6.amount end local value7 = node.part7; if value7.ready() and value7.enabled() then total = total + value7.amount end local value8 = node.part8; if value8.ready() and value8.enabled() then total = total + value8.amount end return total end +function second_clone(entry) local total = 0; local item1 = entry.part1; if item1.ready() and item1.enabled() then total = total + item1.amount end local item2 = entry.part2; if item2.ready() and item2.enabled() then total = total + item2.amount end local item3 = entry.part3; if item3.ready() and item3.enabled() then total = total + item3.amount end local item4 = entry.part4; if item4.ready() and item4.enabled() then total = total + item4.amount end local item5 = entry.part5; if item5.ready() and item5.enabled() then total = total + item5.amount end local item6 = entry.part6; if item6.ready() and item6.enabled() then total = total + item6.amount end local item7 = entry.part7; if item7.ready() and item7.enabled() then total = total + item7.amount end local item8 = entry.part8; if item8.ready() and item8.enabled() then total = total + item8.amount end return total end diff --git a/gems/decomplex/examples/lua/function-lcom.lua b/gems/decomplex/examples/lua/function-lcom.lua new file mode 100644 index 000000000..eb1c84c69 --- /dev/null +++ b/gems/decomplex/examples/lua/function-lcom.lua @@ -0,0 +1,12 @@ +function mixed(price, tax, logger) + local subtotal = price + tax + local total = subtotal * 2 + local rounded = total.round() + + local timestamp = now() + local buffer = Buffer.init() + buffer.push(timestamp) + logger.info(buffer) + + return Result.init(rounded, buffer) +end diff --git a/gems/decomplex/examples/lua/implicit-control-flow.lua b/gems/decomplex/examples/lua/implicit-control-flow.lua new file mode 100644 index 000000000..603b2d6d4 --- /dev/null +++ b/gems/decomplex/examples/lua/implicit-control-flow.lua @@ -0,0 +1,9 @@ +FlowExample = {} +function FlowExample:prepare() self.status = 1 end +function FlowExample:validate() self.valid = self.status == 1 end +function FlowExample:commit() self.done = self.valid end +function FlowExample:ok1() self.prepare(); self.validate(); self.commit() end +function FlowExample:ok2() self.prepare(); self.validate(); self.commit() end +function FlowExample:ok3() self.prepare(); self.validate(); self.commit() end +function FlowExample:ok4() self.prepare(); self.validate(); self.commit() end +function FlowExample:drift() self.validate(); self.prepare(); self.commit() end diff --git a/gems/decomplex/examples/lua/inconsistent-rename-clone.lua b/gems/decomplex/examples/lua/inconsistent-rename-clone.lua new file mode 100644 index 000000000..f7c681c86 --- /dev/null +++ b/gems/decomplex/examples/lua/inconsistent-rename-clone.lua @@ -0,0 +1,2 @@ +function original() local src = fetch(1); check(src); store(src); finalize(src) end +function pasted() local dst = fetch(2); check(dst); store(src); finalize(dst) end diff --git a/gems/decomplex/examples/lua/local-flow.lua b/gems/decomplex/examples/lua/local-flow.lua new file mode 100644 index 000000000..3627024e2 --- /dev/null +++ b/gems/decomplex/examples/lua/local-flow.lua @@ -0,0 +1,9 @@ +function mixed(price, tax) + local subtotal = price + tax + local total = subtotal.round() + + local timestamp = now() + local buffer = Buffer.init() + buffer.push(timestamp) + return Result.init(total, buffer) +end diff --git a/gems/decomplex/examples/lua/locality-drag.lua b/gems/decomplex/examples/lua/locality-drag.lua new file mode 100644 index 000000000..1cca002ab --- /dev/null +++ b/gems/decomplex/examples/lua/locality-drag.lua @@ -0,0 +1,27 @@ +function run(user, cart, logger) + local receipt_id = user.id + + local total = cart.total + if total > 100 then + if cart.discountable() then + local discount = 10 + end + end + if cart.taxable() then + if cart.region then + local tax = total * 2 + end + end + if logger.enabled() then + if logger.debug() then + logger.info(total) + end + end + if cart.valid() then + if cart.ready() then + local status = 1 + end + end + + emit(receipt_id) +end diff --git a/gems/decomplex/examples/lua/miner.lua b/gems/decomplex/examples/lua/miner.lua new file mode 100644 index 000000000..e188986dc --- /dev/null +++ b/gems/decomplex/examples/lua/miner.lua @@ -0,0 +1,4 @@ +function one(a,b,c) return a and b and c end +function two(a,b,c) return a and b and c end +function three(a,b,c) return a and b and c end +function broken(a,b) return a and b end diff --git a/gems/decomplex/examples/lua/operational-discontinuity.lua b/gems/decomplex/examples/lua/operational-discontinuity.lua new file mode 100644 index 000000000..af2e143f7 --- /dev/null +++ b/gems/decomplex/examples/lua/operational-discontinuity.lua @@ -0,0 +1,9 @@ +function phase_shift() + local a = 1 + local b = 2 + + -- Phase 2 + local x = 3 + local y = 4 + print(x); print(y) +end diff --git a/gems/decomplex/examples/lua/oversized-predicate.lua b/gems/decomplex/examples/lua/oversized-predicate.lua new file mode 100644 index 000000000..28e204f1b --- /dev/null +++ b/gems/decomplex/examples/lua/oversized-predicate.lua @@ -0,0 +1 @@ +function complex_check(a,b,c,d) if a and b and c and d then print("too big") end end diff --git a/gems/decomplex/examples/lua/path-condition.lua b/gems/decomplex/examples/lua/path-condition.lua new file mode 100644 index 000000000..fb94f2cf9 --- /dev/null +++ b/gems/decomplex/examples/lua/path-condition.lua @@ -0,0 +1,4 @@ +function one(x,y,z) if x.p() and y.q() and z.r() then go(x) end end +function two(x,y,z) if x.p() and y.q() and z.r() then go(x) end end +function three(x,y,z) if x.p() and y.q() and z.r() then go(x) end end +function bug(x,y,z) if x.p() and y.q() then go(x) end end diff --git a/gems/decomplex/examples/lua/predicate-alias.lua b/gems/decomplex/examples/lua/predicate-alias.lua new file mode 100644 index 000000000..3acd4b64d --- /dev/null +++ b/gems/decomplex/examples/lua/predicate-alias.lua @@ -0,0 +1,3 @@ +function first() return true end +function second() return true end +function other() return false end diff --git a/gems/decomplex/examples/lua/redundant-nil-guard.lua b/gems/decomplex/examples/lua/redundant-nil-guard.lua new file mode 100644 index 000000000..9aeb9b3d1 --- /dev/null +++ b/gems/decomplex/examples/lua/redundant-nil-guard.lua @@ -0,0 +1 @@ +function check(value) if value.isSome() then value.isNull() end end diff --git a/gems/decomplex/examples/lua/semantic-alias.lua b/gems/decomplex/examples/lua/semantic-alias.lua new file mode 100644 index 000000000..7206ed542 --- /dev/null +++ b/gems/decomplex/examples/lua/semantic-alias.lua @@ -0,0 +1,4 @@ +function frame(node) return node.provenance == FRAME end +function is_frame(node) return provenance == FRAME end +function heap(node) return node.provenance == HEAP end +function somewhere(node) if node.provenance == FRAME then return 1 end return 0 end diff --git a/gems/decomplex/examples/lua/sequence-mine.lua b/gems/decomplex/examples/lua/sequence-mine.lua new file mode 100644 index 000000000..c17077f23 --- /dev/null +++ b/gems/decomplex/examples/lua/sequence-mine.lua @@ -0,0 +1,5 @@ +function one() alloc_mark(x); body1(); cleanup(x) end +function two() alloc_mark(y); body2(); cleanup(y) end +function three() alloc_mark(z); body3(); cleanup(z) end +function four() alloc_mark(w); body4(); cleanup(w) end +function leak() alloc_mark(q); use_value(q) end diff --git a/gems/decomplex/examples/lua/state-branch-density.lua b/gems/decomplex/examples/lua/state-branch-density.lua new file mode 100644 index 000000000..7fb2235ee --- /dev/null +++ b/gems/decomplex/examples/lua/state-branch-density.lua @@ -0,0 +1,2 @@ +StateBranchChecker = {} +function StateBranchChecker:check(admin, name) if admin then self.checked = true end if self.checked and name == "admin" then print("hello") end end diff --git a/gems/decomplex/examples/lua/state-mesh.lua b/gems/decomplex/examples/lua/state-mesh.lua new file mode 100644 index 000000000..6725a32df --- /dev/null +++ b/gems/decomplex/examples/lua/state-mesh.lua @@ -0,0 +1,5 @@ +StateMeshExample = {} +function StateMeshExample:initialize() self.a = 1; self.b = 2 end +function StateMeshExample:writer() self.a = 3 end +function StateMeshExample:reader() return self.a + self.b end +function StateMeshExample:a_alias() return self.a end diff --git a/gems/decomplex/examples/lua/structural-topology.lua b/gems/decomplex/examples/lua/structural-topology.lua new file mode 100644 index 000000000..ee4cd50f3 --- /dev/null +++ b/gems/decomplex/examples/lua/structural-topology.lua @@ -0,0 +1,6 @@ +Worker = {} +function Worker:run(items) self.prepare(); if self.ready() then self.validate() end for item in items do self.helper(item) end end +function Worker:prepare() end +function Worker:ready() return true end +function Worker:validate() end +function Worker:helper(item) return item end diff --git a/gems/decomplex/examples/lua/temporal-ordering-pressure.lua b/gems/decomplex/examples/lua/temporal-ordering-pressure.lua new file mode 100644 index 000000000..5a7eb8295 --- /dev/null +++ b/gems/decomplex/examples/lua/temporal-ordering-pressure.lua @@ -0,0 +1,5 @@ +TemporalOrderExample = {} +function TemporalOrderExample:one() self.a = 1 end +function TemporalOrderExample:two() self.a = 2; self.b = 3 end +function TemporalOrderExample:three() self.b = 4 end +function TemporalOrderExample:reader() return self.a end diff --git a/gems/decomplex/examples/lua/weighted-inlined-complexity.lua b/gems/decomplex/examples/lua/weighted-inlined-complexity.lua new file mode 100644 index 000000000..e2e454bee --- /dev/null +++ b/gems/decomplex/examples/lua/weighted-inlined-complexity.lua @@ -0,0 +1,5 @@ +function checkout(user, cart) validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart) end +function validate_user(user) if user.active() and not user.suspended() then if user.profile.complete() then return true else return false end else return false end end +function apply_discount(cart) if cart.total > 100 and eligible() then if holiday() then return 20 elseif loyalty_month() then return 15 else return 10 end end return 0 end +function process_payment(user, cart) if gateway.ready() then if cart.total > 0 and user.active() then if fraud_check(user) then charge(user, cart) else decline(user) end end end end +function audit_cart(cart) for item in cart.items do if item.taxable() then if item.region and item.amount > 0 then record_tax(item) end end end end diff --git a/gems/decomplex/examples/oracles/co-update.json b/gems/decomplex/examples/oracles/co-update.json new file mode 100644 index 000000000..5ebc590a0 --- /dev/null +++ b/gems/decomplex/examples/oracles/co-update.json @@ -0,0 +1,27 @@ +{ + "detector": "co-update", + "options": { + }, + "expected": { + "co_written_pairs": [ + { + "pair": [ + "provenance", + "storage" + ], + "support": 3 + } + ], + "neglected_updates": [ + { + "pair": [ + "provenance", + "storage" + ], + "support": 3, + "has": "storage", + "missing": "provenance" + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/decision-pressure.json b/gems/decomplex/examples/oracles/decision-pressure.json new file mode 100644 index 000000000..f7550f8d8 --- /dev/null +++ b/gems/decomplex/examples/oracles/decision-pressure.json @@ -0,0 +1,13 @@ +{ + "detector": "decision-pressure", + "options": { + }, + "expected": [ + { + "contract": ".symbol", + "decisions": 1, + "essential": 0, + "methods": 1 + } + ] +} diff --git a/gems/decomplex/examples/oracles/derived-state.json b/gems/decomplex/examples/oracles/derived-state.json new file mode 100644 index 000000000..1dd101977 --- /dev/null +++ b/gems/decomplex/examples/oracles/derived-state.json @@ -0,0 +1,11 @@ +{ + "detector": "derived-state", + "options": { + }, + "expected": [ + { + "derived": "cached", + "source": "input" + } + ] +} diff --git a/gems/decomplex/examples/oracles/false-simplicity.json b/gems/decomplex/examples/oracles/false-simplicity.json new file mode 100644 index 000000000..807166a32 --- /dev/null +++ b/gems/decomplex/examples/oracles/false-simplicity.json @@ -0,0 +1,10 @@ +{ + "detector": "false-simplicity", + "options": { + }, + "expected": [ + { + "kind": "hidden_io" + } + ] +} diff --git a/gems/decomplex/examples/oracles/fat-union.json b/gems/decomplex/examples/oracles/fat-union.json new file mode 100644 index 000000000..855427eee --- /dev/null +++ b/gems/decomplex/examples/oracles/fat-union.json @@ -0,0 +1,29 @@ +{ + "detector": "fat-union", + "options": { + }, + "expected": [ + { + "common": [ + "col", + "line", + "parent", + "span", + "ty" + ], + "variant": [ + "name", + "recv", + "value" + ], + "degenerate": false, + "support": 1, + "scatter": 1, + "variant_set": [ + "AST.Call", + "AST.Func", + "AST.Lit" + ] + } + ] +} diff --git a/gems/decomplex/examples/oracles/flay-similarity.json b/gems/decomplex/examples/oracles/flay-similarity.json new file mode 100644 index 000000000..0aa527fb4 --- /dev/null +++ b/gems/decomplex/examples/oracles/flay-similarity.json @@ -0,0 +1,12 @@ +{ + "detector": "flay-similarity", + "options": { + }, + "expected": [ + { + "clone_type": "type2", + "node": "defn", + "site_count": 2 + } + ] +} diff --git a/gems/decomplex/examples/oracles/function-lcom.json b/gems/decomplex/examples/oracles/function-lcom.json new file mode 100644 index 000000000..7dfcf5103 --- /dev/null +++ b/gems/decomplex/examples/oracles/function-lcom.json @@ -0,0 +1,14 @@ +{ + "detector": "function-lcom", + "options": { + }, + "expected": [ + { + "mode": "late_join", + "components": 2, + "locals": 8, + "statements": 8, + "terminal_join": true + } + ] +} diff --git a/gems/decomplex/examples/oracles/implicit-control-flow.json b/gems/decomplex/examples/oracles/implicit-control-flow.json new file mode 100644 index 000000000..ce5649f3e --- /dev/null +++ b/gems/decomplex/examples/oracles/implicit-control-flow.json @@ -0,0 +1,91 @@ +{ + "detector": "implicit-control-flow", + "options": { + }, + "expected": { + "ordered_protocols": [ + { + "protocol": [ + "prepare", + "validate" + ], + "dependency": [ + "write_read" + ], + "support": 4, + "observed": [ + "prepare", + "validate" + ], + "missing": [ + + ], + "states": [ + "status" + ] + }, + { + "protocol": [ + "validate", + "commit" + ], + "dependency": [ + "write_read" + ], + "support": 4, + "observed": [ + "validate", + "commit" + ], + "missing": [ + + ], + "states": [ + "valid" + ] + }, + { + "protocol": [ + "validate", + "prepare" + ], + "dependency": [ + "read_write" + ], + "support": 1, + "observed": [ + "validate", + "prepare" + ], + "missing": [ + + ], + "states": [ + "status" + ] + } + ], + "order_drift": [ + { + "protocol": [ + "prepare", + "validate" + ], + "dependency": [ + "write_read" + ], + "support": 4, + "observed": [ + "validate", + "prepare" + ], + "missing": [ + + ], + "states": [ + "status" + ] + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/inconsistent-rename-clone.json b/gems/decomplex/examples/oracles/inconsistent-rename-clone.json new file mode 100644 index 000000000..487a93610 --- /dev/null +++ b/gems/decomplex/examples/oracles/inconsistent-rename-clone.json @@ -0,0 +1,11 @@ +{ + "detector": "inconsistent-rename-clone", + "options": { + }, + "expected": [ + { + "ref_name": "src", + "divergent_count": 2 + } + ] +} diff --git a/gems/decomplex/examples/oracles/local-flow.json b/gems/decomplex/examples/oracles/local-flow.json new file mode 100644 index 000000000..3fb7a8586 --- /dev/null +++ b/gems/decomplex/examples/oracles/local-flow.json @@ -0,0 +1,114 @@ +{ + "detector": "local-flow", + "options": { + }, + "expected": [ + { + "method": "mixed", + "statements": [ + { + "reads": [ + "price", + "tax" + ], + "writes": [ + "subtotal" + ], + "dependencies": [ + [ + "subtotal", + "price" + ], + [ + "subtotal", + "tax" + ] + ], + "co_uses": [ + [ + "price", + "tax" + ] + ] + }, + { + "reads": [ + "subtotal" + ], + "writes": [ + "total" + ], + "dependencies": [ + [ + "total", + "subtotal" + ] + ], + "co_uses": [ + ] + }, + { + "reads": [ + ], + "writes": [ + "timestamp" + ], + "dependencies": [ + ], + "co_uses": [ + ] + }, + { + "reads": [ + ], + "writes": [ + "buffer" + ], + "dependencies": [ + ], + "co_uses": [ + ] + }, + { + "reads": [ + "buffer", + "timestamp" + ], + "writes": [ + ], + "dependencies": [ + ], + "co_uses": [ + [ + "buffer", + "timestamp" + ] + ] + }, + { + "reads": [ + "buffer", + "total" + ], + "writes": [ + ], + "dependencies": [ + ], + "co_uses": [ + [ + "buffer", + "total" + ] + ] + } + ], + "boundaries": [ + { + "before_index": 1, + "after_index": 2, + "kind": "blank" + } + ] + } + ] +} diff --git a/gems/decomplex/examples/oracles/locality-drag.json b/gems/decomplex/examples/oracles/locality-drag.json new file mode 100644 index 000000000..918b9c1bd --- /dev/null +++ b/gems/decomplex/examples/oracles/locality-drag.json @@ -0,0 +1,10 @@ +{ + "detector": "locality-drag", + "options": { + }, + "expected": [ + { + "variable": "receipt_id" + } + ] +} diff --git a/gems/decomplex/examples/oracles/miner.json b/gems/decomplex/examples/oracles/miner.json new file mode 100644 index 000000000..6ccddd467 --- /dev/null +++ b/gems/decomplex/examples/oracles/miner.json @@ -0,0 +1,30 @@ +{ + "detector": "miner", + "options": { + }, + "expected": { + "missing_abstractions": [ + { + "kind": "conjunction", + "members": [ + "a", + "b", + "c" + ], + "support": 3, + "scatter": 3 + } + ], + "neglected_conditions": [ + { + "pattern": [ + "a", + "b", + "c" + ], + "support": 3, + "missing": "c" + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/operational-discontinuity.json b/gems/decomplex/examples/oracles/operational-discontinuity.json new file mode 100644 index 000000000..1fbbe2f4a --- /dev/null +++ b/gems/decomplex/examples/oracles/operational-discontinuity.json @@ -0,0 +1,11 @@ +{ + "detector": "operational-discontinuity", + "options": { + }, + "expected": [ + { + "resets": 1, + "confidence": "high" + } + ] +} diff --git a/gems/decomplex/examples/oracles/oversized-predicate.json b/gems/decomplex/examples/oracles/oversized-predicate.json new file mode 100644 index 000000000..a3901ad4b --- /dev/null +++ b/gems/decomplex/examples/oracles/oversized-predicate.json @@ -0,0 +1,11 @@ +{ + "detector": "oversized-predicate", + "options": { + }, + "expected": [ + { + "count": 4, + "atom_count": 4 + } + ] +} diff --git a/gems/decomplex/examples/oracles/path-condition.json b/gems/decomplex/examples/oracles/path-condition.json new file mode 100644 index 000000000..b26ed9dbc --- /dev/null +++ b/gems/decomplex/examples/oracles/path-condition.json @@ -0,0 +1,17 @@ +{ + "detector": "path-condition", + "options": { + }, + "expected": [ + { + "pattern": [ + "x.p", + "y.q", + "z.r" + ], + "support": 3, + "missing": "z.r", + "action": "go(x)" + } + ] +} diff --git a/gems/decomplex/examples/oracles/predicate-alias.json b/gems/decomplex/examples/oracles/predicate-alias.json new file mode 100644 index 000000000..ab4f95343 --- /dev/null +++ b/gems/decomplex/examples/oracles/predicate-alias.json @@ -0,0 +1,12 @@ +{ + "detector": "predicate-alias", + "options": { + }, + "expected": { + "alias_clusters": [ + { + "name_count": 2 + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/redundant-nil-guard.json b/gems/decomplex/examples/oracles/redundant-nil-guard.json new file mode 100644 index 000000000..9bb46a735 --- /dev/null +++ b/gems/decomplex/examples/oracles/redundant-nil-guard.json @@ -0,0 +1,10 @@ +{ + "detector": "redundant-nil-guard", + "options": { + }, + "expected": [ + { + "local": "value" + } + ] +} diff --git a/gems/decomplex/examples/oracles/semantic-alias.json b/gems/decomplex/examples/oracles/semantic-alias.json new file mode 100644 index 000000000..678e710b5 --- /dev/null +++ b/gems/decomplex/examples/oracles/semantic-alias.json @@ -0,0 +1,14 @@ +{ + "detector": "semantic-alias", + "options": { + }, + "expected": { + "alias_clusters": [ + { + "canon": "provenance == FRAME", + "name_count": 2 + } + ], + "reification_miss_count": 1 + } +} diff --git a/gems/decomplex/examples/oracles/sequence-mine.json b/gems/decomplex/examples/oracles/sequence-mine.json new file mode 100644 index 000000000..ca294ef5b --- /dev/null +++ b/gems/decomplex/examples/oracles/sequence-mine.json @@ -0,0 +1,16 @@ +{ + "detector": "sequence-mine", + "options": { + }, + "expected": [ + { + "pair": [ + "alloc_mark", + "cleanup" + ], + "support": 4, + "has": "alloc_mark", + "missing": "cleanup" + } + ] +} diff --git a/gems/decomplex/examples/oracles/state-branch-density.json b/gems/decomplex/examples/oracles/state-branch-density.json new file mode 100644 index 000000000..395a31a46 --- /dev/null +++ b/gems/decomplex/examples/oracles/state-branch-density.json @@ -0,0 +1,14 @@ +{ + "detector": "state-branch-density", + "options": { + }, + "expected": [ + { + "decisions": 1, + "method": "check", + "state_refs": [ + "checked" + ] + } + ] +} diff --git a/gems/decomplex/examples/oracles/state-mesh.json b/gems/decomplex/examples/oracles/state-mesh.json new file mode 100644 index 000000000..7d4e4d024 --- /dev/null +++ b/gems/decomplex/examples/oracles/state-mesh.json @@ -0,0 +1,16 @@ +{ + "detector": "state-mesh", + "options": { + }, + "expected": { + "state_mesh": { + "total_fields": 1, + "total_writes": 3, + "total_reads": 2, + "total_re_derivations": 0 + }, + "field_names": [ + "a" + ] + } +} diff --git a/gems/decomplex/examples/oracles/structural-topology.json b/gems/decomplex/examples/oracles/structural-topology.json new file mode 100644 index 000000000..e437e4606 --- /dev/null +++ b/gems/decomplex/examples/oracles/structural-topology.json @@ -0,0 +1,30 @@ +{ + "detector": "structural-topology", + "options": { + }, + "expected": { + "method_count": 5, + "edges": [ + { + "caller_name": "run", + "callee_name": "prepare", + "type": "always" + }, + { + "caller_name": "run", + "callee_name": "ready", + "type": "conditional" + }, + { + "caller_name": "run", + "callee_name": "validate", + "type": "conditional" + }, + { + "caller_name": "run", + "callee_name": "helper", + "type": "iterates" + } + ] + } +} diff --git a/gems/decomplex/examples/oracles/temporal-ordering-pressure.json b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json new file mode 100644 index 000000000..beba2d751 --- /dev/null +++ b/gems/decomplex/examples/oracles/temporal-ordering-pressure.json @@ -0,0 +1,22 @@ +{ + "detector": "temporal-ordering-pressure", + "options": { + }, + "expected": [ + { + "owner": "TemporalOrderExample", + "public_methods": 4, + "state_methods": 4, + "writers": 3, + "orderings": "4!", + "state_fields": [ + "a", + "b" + ], + "shared_fields": [ + "a", + "b" + ] + } + ] +} diff --git a/gems/decomplex/examples/oracles/weighted-inlined-complexity.json b/gems/decomplex/examples/oracles/weighted-inlined-complexity.json new file mode 100644 index 000000000..3a0c82850 --- /dev/null +++ b/gems/decomplex/examples/oracles/weighted-inlined-complexity.json @@ -0,0 +1,12 @@ +{ + "detector": "weighted-inlined-complexity", + "options": { + }, + "expected": [ + { + "method": "checkout", + "depth": 1, + "callee_count": 4 + } + ] +} diff --git a/gems/decomplex/examples/php/co-update.php b/gems/decomplex/examples/php/co-update.php new file mode 100644 index 000000000..24674bb9b --- /dev/null +++ b/gems/decomplex/examples/php/co-update.php @@ -0,0 +1,19 @@ +storage = HEAP; + $node->provenance = HEAP; +} + +function stable_two($node) { + $node->storage = HEAP; + $node->provenance = HEAP; +} + +function stable_three($node) { + $node->storage = HEAP; + $node->provenance = HEAP; +} + +function misses_provenance($node) { + $node->storage = HEAP; +} diff --git a/gems/decomplex/examples/php/decision-pressure.php b/gems/decomplex/examples/php/decision-pressure.php new file mode 100644 index 000000000..6fa0480b7 --- /dev/null +++ b/gems/decomplex/examples/php/decision-pressure.php @@ -0,0 +1,5 @@ +symbol; + return $value->isNull(); +} diff --git a/gems/decomplex/examples/php/derived-state.php b/gems/decomplex/examples/php/derived-state.php new file mode 100644 index 000000000..bb81ac362 --- /dev/null +++ b/gems/decomplex/examples/php/derived-state.php @@ -0,0 +1,6 @@ +line(); + $node->col(); + $node->ty(); + $node->span(); + $node->parent(); + $node->recv(); + break; + case AST::Func: + $node->line(); + $node->col(); + $node->ty(); + $node->span(); + $node->parent(); + $node->name(); + break; + case AST::Lit: + $node->line(); + $node->col(); + $node->ty(); + $node->span(); + $node->parent(); + $node->value(); + break; + } +} diff --git a/gems/decomplex/examples/php/flay-similarity.php b/gems/decomplex/examples/php/flay-similarity.php new file mode 100644 index 000000000..0a97d128c --- /dev/null +++ b/gems/decomplex/examples/php/flay-similarity.php @@ -0,0 +1,42 @@ +part1; + if ($value1->ready() && $value1->enabled()) { $total += $value1->amount; } + $value2 = $node->part2; + if ($value2->ready() && $value2->enabled()) { $total += $value2->amount; } + $value3 = $node->part3; + if ($value3->ready() && $value3->enabled()) { $total += $value3->amount; } + $value4 = $node->part4; + if ($value4->ready() && $value4->enabled()) { $total += $value4->amount; } + $value5 = $node->part5; + if ($value5->ready() && $value5->enabled()) { $total += $value5->amount; } + $value6 = $node->part6; + if ($value6->ready() && $value6->enabled()) { $total += $value6->amount; } + $value7 = $node->part7; + if ($value7->ready() && $value7->enabled()) { $total += $value7->amount; } + $value8 = $node->part8; + if ($value8->ready() && $value8->enabled()) { $total += $value8->amount; } + return $total; +} + +function second_clone($entry) { + $total = 0; + $item1 = $entry->part1; + if ($item1->ready() && $item1->enabled()) { $total += $item1->amount; } + $item2 = $entry->part2; + if ($item2->ready() && $item2->enabled()) { $total += $item2->amount; } + $item3 = $entry->part3; + if ($item3->ready() && $item3->enabled()) { $total += $item3->amount; } + $item4 = $entry->part4; + if ($item4->ready() && $item4->enabled()) { $total += $item4->amount; } + $item5 = $entry->part5; + if ($item5->ready() && $item5->enabled()) { $total += $item5->amount; } + $item6 = $entry->part6; + if ($item6->ready() && $item6->enabled()) { $total += $item6->amount; } + $item7 = $entry->part7; + if ($item7->ready() && $item7->enabled()) { $total += $item7->amount; } + $item8 = $entry->part8; + if ($item8->ready() && $item8->enabled()) { $total += $item8->amount; } + return $total; +} diff --git a/gems/decomplex/examples/php/function-lcom.php b/gems/decomplex/examples/php/function-lcom.php new file mode 100644 index 000000000..30cf1ee1e --- /dev/null +++ b/gems/decomplex/examples/php/function-lcom.php @@ -0,0 +1,13 @@ +round(); + + $timestamp = now(); + $buffer = Buffer::init(); + $buffer->push($timestamp); + $logger->info($buffer); + + return Result::init($rounded, $buffer); +} diff --git a/gems/decomplex/examples/php/implicit-control-flow.php b/gems/decomplex/examples/php/implicit-control-flow.php new file mode 100644 index 000000000..b3c0ebaeb --- /dev/null +++ b/gems/decomplex/examples/php/implicit-control-flow.php @@ -0,0 +1,12 @@ +status = READY; } + public function validate() { $this->valid = $this->status == READY; } + public function commit() { $this->done = $this->valid; } + + public function ok1() { $this->prepare(); $this->validate(); $this->commit(); } + public function ok2() { $this->prepare(); $this->validate(); $this->commit(); } + public function ok3() { $this->prepare(); $this->validate(); $this->commit(); } + public function ok4() { $this->prepare(); $this->validate(); $this->commit(); } + public function drift() { $this->validate(); $this->prepare(); $this->commit(); } +} diff --git a/gems/decomplex/examples/php/inconsistent-rename-clone.php b/gems/decomplex/examples/php/inconsistent-rename-clone.php new file mode 100644 index 000000000..cf838cd66 --- /dev/null +++ b/gems/decomplex/examples/php/inconsistent-rename-clone.php @@ -0,0 +1,14 @@ +round(); + + $timestamp = now(); + $buffer = Buffer::init(); + $buffer->push($timestamp); + return Result::init($total, $buffer); +} diff --git a/gems/decomplex/examples/php/locality-drag.php b/gems/decomplex/examples/php/locality-drag.php new file mode 100644 index 000000000..8e11e0d0f --- /dev/null +++ b/gems/decomplex/examples/php/locality-drag.php @@ -0,0 +1,28 @@ +id; + + $total = $cart->total; + if ($total > 100) { + if ($cart->discountable()) { + $discount = 10; + } + } + if ($cart->taxable()) { + if ($cart->region) { + $tax = $total * 0.2; + } + } + if ($logger->enabled()) { + if ($logger->debug()) { + $logger->info($total); + } + } + if ($cart->valid()) { + if ($cart->ready()) { + $status = READY; + } + } + + emit($receipt_id); +} diff --git a/gems/decomplex/examples/php/miner.php b/gems/decomplex/examples/php/miner.php new file mode 100644 index 000000000..da59083db --- /dev/null +++ b/gems/decomplex/examples/php/miner.php @@ -0,0 +1,5 @@ +p() && $y->q() && $z->r()) { go($x); } +} + +function two($x, $y, $z) { + if ($x->p() && $y->q() && $z->r()) { go($x); } +} + +function three($x, $y, $z) { + if ($x->p() && $y->q() && $z->r()) { go($x); } +} + +function bug($x, $y, $z) { + if ($x->p() && $y->q()) { go($x); } +} diff --git a/gems/decomplex/examples/php/predicate-alias.php b/gems/decomplex/examples/php/predicate-alias.php new file mode 100644 index 000000000..e139bbd5b --- /dev/null +++ b/gems/decomplex/examples/php/predicate-alias.php @@ -0,0 +1,4 @@ +isSome()) { + $value->isNull(); + } +} diff --git a/gems/decomplex/examples/php/semantic-alias.php b/gems/decomplex/examples/php/semantic-alias.php new file mode 100644 index 000000000..0ff8fcc64 --- /dev/null +++ b/gems/decomplex/examples/php/semantic-alias.php @@ -0,0 +1,8 @@ +provenance == FRAME; } +function is_frame($node) { return $node->provenance == FRAME; } +function heap_pred($node) { return $node->provenance == HEAP; } + +function somewhere($node) { + if ($node->provenance == FRAME) { return 1; } +} diff --git a/gems/decomplex/examples/php/sequence-mine.php b/gems/decomplex/examples/php/sequence-mine.php new file mode 100644 index 000000000..e11013b62 --- /dev/null +++ b/gems/decomplex/examples/php/sequence-mine.php @@ -0,0 +1,6 @@ +checked = true; + } + + if ($this->checked && $name == "admin") { + print("hello"); + } + } +} diff --git a/gems/decomplex/examples/php/state-mesh.php b/gems/decomplex/examples/php/state-mesh.php new file mode 100644 index 000000000..27a5d3101 --- /dev/null +++ b/gems/decomplex/examples/php/state-mesh.php @@ -0,0 +1,19 @@ +a = 1; + $this->b = 2; + } + + public function writer() { + $this->a = 3; + } + + public function reader() { + return $this->a + $this->b; + } + + public function a_alias() { + return $this->a; + } +} diff --git a/gems/decomplex/examples/php/structural-topology.php b/gems/decomplex/examples/php/structural-topology.php new file mode 100644 index 000000000..e23623abb --- /dev/null +++ b/gems/decomplex/examples/php/structural-topology.php @@ -0,0 +1,17 @@ +prepare(); + if ($this->ready()) { + $this->validate(); + } + foreach ($items as $item) { + $this->helper($item); + } + } + + private function prepare() {} + private function ready() { return true; } + public function validate() {} + private function helper($item) { return $item; } +} diff --git a/gems/decomplex/examples/php/temporal-ordering-pressure.php b/gems/decomplex/examples/php/temporal-ordering-pressure.php new file mode 100644 index 000000000..e3456b4d9 --- /dev/null +++ b/gems/decomplex/examples/php/temporal-ordering-pressure.php @@ -0,0 +1,19 @@ +a = 1; + } + + public function two() { + $this->a = 2; + $this->b = 3; + } + + public function three() { + $this->b = 4; + } + + public function reader() { + return $this->a; + } +} diff --git a/gems/decomplex/examples/php/weighted-inlined-complexity.php b/gems/decomplex/examples/php/weighted-inlined-complexity.php new file mode 100644 index 000000000..e8dfba599 --- /dev/null +++ b/gems/decomplex/examples/php/weighted-inlined-complexity.php @@ -0,0 +1,45 @@ +validate_user($user); + $this->apply_discount($cart); + $this->process_payment($user, $cart); + $this->audit_cart($cart); + } + + private function validate_user($user) { + if (!$user) { return false; } + if ($user->active() && !$user->suspended()) { + if ($user->profile->complete()) { return true; } + return false; + } + return false; + } + + private function apply_discount($cart) { + if ($cart->total > 100 && $this->eligible()) { + if ($this->holiday()) { return 20; } + if ($this->loyalty_month()) { return 15; } + return 10; + } + } + + private function process_payment($user, $cart) { + if ($this->gateway->ready()) { + if ($cart->total > 0 && $user->active()) { + if ($this->fraud_check($user)) { $this->charge($user, $cart); } + else { $this->decline($user); } + } + } + } + + private function audit_cart($cart) { + foreach ($cart->items as $item) { + if ($item->taxable()) { + if ($item->region && $item->amount > 0) { + $this->record_tax($item); + } + } + } + } +} diff --git a/gems/decomplex/examples/python/co-update.py b/gems/decomplex/examples/python/co-update.py new file mode 100644 index 000000000..450322d79 --- /dev/null +++ b/gems/decomplex/examples/python/co-update.py @@ -0,0 +1,14 @@ +def stable_one(node): + node.storage = 1 + node.provenance = 1 + +def stable_two(node): + node.storage = 1 + node.provenance = 1 + +def stable_three(node): + node.storage = 1 + node.provenance = 1 + +def misses_provenance(node): + node.storage = 1 diff --git a/gems/decomplex/examples/python/decision-pressure.py b/gems/decomplex/examples/python/decision-pressure.py new file mode 100644 index 000000000..637b935de --- /dev/null +++ b/gems/decomplex/examples/python/decision-pressure.py @@ -0,0 +1,3 @@ +def scan(node): + value = node.symbol + return value.isNull() diff --git a/gems/decomplex/examples/python/derived-state.py b/gems/decomplex/examples/python/derived-state.py new file mode 100644 index 000000000..99799bd0e --- /dev/null +++ b/gems/decomplex/examples/python/derived-state.py @@ -0,0 +1,4 @@ +def check(input): + cached = input + 1 + input = 2 + print(cached) diff --git a/gems/decomplex/examples/python/false-simplicity.py b/gems/decomplex/examples/python/false-simplicity.py new file mode 100644 index 000000000..a8601cbb2 --- /dev/null +++ b/gems/decomplex/examples/python/false-simplicity.py @@ -0,0 +1,3 @@ +class FalseSimplicityExample: + def hack(self): + print("hidden IO") diff --git a/gems/decomplex/examples/python/fat-union.py b/gems/decomplex/examples/python/fat-union.py new file mode 100644 index 000000000..990a35126 --- /dev/null +++ b/gems/decomplex/examples/python/fat-union.py @@ -0,0 +1,8 @@ +def handle(node): + match node: + case AST.Call: + node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() + case AST.Func: + node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() + case AST.Lit: + node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() diff --git a/gems/decomplex/examples/python/flay-similarity.py b/gems/decomplex/examples/python/flay-similarity.py new file mode 100644 index 000000000..81a886050 --- /dev/null +++ b/gems/decomplex/examples/python/flay-similarity.py @@ -0,0 +1,55 @@ +def first_clone(node): + total = 0 + value1 = node.part1 + if value1.ready() and value1.enabled(): + total += value1.amount + value2 = node.part2 + if value2.ready() and value2.enabled(): + total += value2.amount + value3 = node.part3 + if value3.ready() and value3.enabled(): + total += value3.amount + value4 = node.part4 + if value4.ready() and value4.enabled(): + total += value4.amount + value5 = node.part5 + if value5.ready() and value5.enabled(): + total += value5.amount + value6 = node.part6 + if value6.ready() and value6.enabled(): + total += value6.amount + value7 = node.part7 + if value7.ready() and value7.enabled(): + total += value7.amount + value8 = node.part8 + if value8.ready() and value8.enabled(): + total += value8.amount + return total + +def second_clone(entry): + total = 0 + item1 = entry.part1 + if item1.ready() and item1.enabled(): + total += item1.amount + item2 = entry.part2 + if item2.ready() and item2.enabled(): + total += item2.amount + item3 = entry.part3 + if item3.ready() and item3.enabled(): + total += item3.amount + item4 = entry.part4 + if item4.ready() and item4.enabled(): + total += item4.amount + item5 = entry.part5 + if item5.ready() and item5.enabled(): + total += item5.amount + item6 = entry.part6 + if item6.ready() and item6.enabled(): + total += item6.amount + item7 = entry.part7 + if item7.ready() and item7.enabled(): + total += item7.amount + item8 = entry.part8 + if item8.ready() and item8.enabled(): + total += item8.amount + return total diff --git a/gems/decomplex/examples/python/function-lcom.py b/gems/decomplex/examples/python/function-lcom.py new file mode 100644 index 000000000..cf1fe2c61 --- /dev/null +++ b/gems/decomplex/examples/python/function-lcom.py @@ -0,0 +1,11 @@ +def mixed(price, tax, logger): + subtotal = price + tax + total = subtotal * 2 + rounded = total.round() + + timestamp = now() + buffer = Buffer() + buffer.push(timestamp) + logger.info(buffer) + + return Result(rounded, buffer) diff --git a/gems/decomplex/examples/python/implicit-control-flow.py b/gems/decomplex/examples/python/implicit-control-flow.py new file mode 100644 index 000000000..2fef8d307 --- /dev/null +++ b/gems/decomplex/examples/python/implicit-control-flow.py @@ -0,0 +1,9 @@ +class FlowExample: + def prepare(self): self.status = 1 + def validate(self): self.valid = self.status == 1 + def commit(self): self.done = self.valid + def ok1(self): self.prepare(); self.validate(); self.commit() + def ok2(self): self.prepare(); self.validate(); self.commit() + def ok3(self): self.prepare(); self.validate(); self.commit() + def ok4(self): self.prepare(); self.validate(); self.commit() + def drift(self): self.validate(); self.prepare(); self.commit() diff --git a/gems/decomplex/examples/python/inconsistent-rename-clone.py b/gems/decomplex/examples/python/inconsistent-rename-clone.py new file mode 100644 index 000000000..1b9a28c9f --- /dev/null +++ b/gems/decomplex/examples/python/inconsistent-rename-clone.py @@ -0,0 +1,11 @@ +def original(): + src = fetch(1) + check(src) + store(src) + finalize(src) + +def pasted(): + dst = fetch(2) + check(dst) + store(src) + finalize(dst) diff --git a/gems/decomplex/examples/python/local-flow.py b/gems/decomplex/examples/python/local-flow.py new file mode 100644 index 000000000..8862fc626 --- /dev/null +++ b/gems/decomplex/examples/python/local-flow.py @@ -0,0 +1,8 @@ +def mixed(price, tax): + subtotal = price + tax + total = subtotal.round() + + timestamp = now() + buffer = Buffer() + buffer.push(timestamp) + return Result(total, buffer) diff --git a/gems/decomplex/examples/python/locality-drag.py b/gems/decomplex/examples/python/locality-drag.py new file mode 100644 index 000000000..9ae444fc9 --- /dev/null +++ b/gems/decomplex/examples/python/locality-drag.py @@ -0,0 +1,14 @@ +def run(user, cart, logger): + receipt_id = user.id + + total = cart.total + if total > 100: + if cart.discountable(): discount = 10 + if cart.taxable(): + if cart.region: tax = total * 2 + if logger.enabled(): + if logger.debug(): logger.info(total) + if cart.valid(): + if cart.ready(): status = 1 + + emit(receipt_id) diff --git a/gems/decomplex/examples/python/miner.py b/gems/decomplex/examples/python/miner.py new file mode 100644 index 000000000..298e46ddd --- /dev/null +++ b/gems/decomplex/examples/python/miner.py @@ -0,0 +1,4 @@ +def one(a,b,c): return a and b and c +def two(a,b,c): return a and b and c +def three(a,b,c): return a and b and c +def broken(a,b): return a and b diff --git a/gems/decomplex/examples/python/operational-discontinuity.py b/gems/decomplex/examples/python/operational-discontinuity.py new file mode 100644 index 000000000..2ae5319b7 --- /dev/null +++ b/gems/decomplex/examples/python/operational-discontinuity.py @@ -0,0 +1,8 @@ +def phase_shift(): + a = 1 + b = 2 + + # Phase 2 + x = 3 + y = 4 + print(x, y) diff --git a/gems/decomplex/examples/python/oversized-predicate.py b/gems/decomplex/examples/python/oversized-predicate.py new file mode 100644 index 000000000..886c6d71c --- /dev/null +++ b/gems/decomplex/examples/python/oversized-predicate.py @@ -0,0 +1,3 @@ +def complex_check(a,b,c,d): + if a and b and c and d: + print("too big") diff --git a/gems/decomplex/examples/python/path-condition.py b/gems/decomplex/examples/python/path-condition.py new file mode 100644 index 000000000..9749a124c --- /dev/null +++ b/gems/decomplex/examples/python/path-condition.py @@ -0,0 +1,8 @@ +def one(x,y,z): + if x.p() and y.q() and z.r(): go(x) +def two(x,y,z): + if x.p() and y.q() and z.r(): go(x) +def three(x,y,z): + if x.p() and y.q() and z.r(): go(x) +def bug(x,y,z): + if x.p() and y.q(): go(x) diff --git a/gems/decomplex/examples/python/predicate-alias.py b/gems/decomplex/examples/python/predicate-alias.py new file mode 100644 index 000000000..42320b895 --- /dev/null +++ b/gems/decomplex/examples/python/predicate-alias.py @@ -0,0 +1,3 @@ +def first(): return True +def second(): return True +def other(): return False diff --git a/gems/decomplex/examples/python/redundant-nil-guard.py b/gems/decomplex/examples/python/redundant-nil-guard.py new file mode 100644 index 000000000..8fe538750 --- /dev/null +++ b/gems/decomplex/examples/python/redundant-nil-guard.py @@ -0,0 +1,3 @@ +def check(value): + if value.isSome(): + value.isNull() diff --git a/gems/decomplex/examples/python/semantic-alias.py b/gems/decomplex/examples/python/semantic-alias.py new file mode 100644 index 000000000..3c093f443 --- /dev/null +++ b/gems/decomplex/examples/python/semantic-alias.py @@ -0,0 +1,7 @@ +def frame(node): return node.provenance == FRAME +def is_frame(node): return provenance == FRAME +def heap(node): return node.provenance == HEAP +def somewhere(node): + if node.provenance == FRAME: + return 1 + return 0 diff --git a/gems/decomplex/examples/python/sequence-mine.py b/gems/decomplex/examples/python/sequence-mine.py new file mode 100644 index 000000000..9e3bdd379 --- /dev/null +++ b/gems/decomplex/examples/python/sequence-mine.py @@ -0,0 +1,5 @@ +def one(): alloc_mark(x); body1(); cleanup(x) +def two(): alloc_mark(y); body2(); cleanup(y) +def three(): alloc_mark(z); body3(); cleanup(z) +def four(): alloc_mark(w); body4(); cleanup(w) +def leak(): alloc_mark(q); use_value(q) diff --git a/gems/decomplex/examples/python/state-branch-density.py b/gems/decomplex/examples/python/state-branch-density.py new file mode 100644 index 000000000..b4dfdf071 --- /dev/null +++ b/gems/decomplex/examples/python/state-branch-density.py @@ -0,0 +1,6 @@ +class StateBranchChecker: + def check(self, admin, name): + if admin: + self.checked = True + if self.checked and name == "admin": + print("hello") diff --git a/gems/decomplex/examples/python/state-mesh.py b/gems/decomplex/examples/python/state-mesh.py new file mode 100644 index 000000000..85396e9fd --- /dev/null +++ b/gems/decomplex/examples/python/state-mesh.py @@ -0,0 +1,10 @@ +class StateMeshExample: + def initialize(self): + self.a = 1 + self.b = 2 + def writer(self): + self.a = 3 + def reader(self): + return self.a + self.b + def a_alias(self): + return self.a diff --git a/gems/decomplex/examples/python/structural-topology.py b/gems/decomplex/examples/python/structural-topology.py new file mode 100644 index 000000000..6652682c0 --- /dev/null +++ b/gems/decomplex/examples/python/structural-topology.py @@ -0,0 +1,11 @@ +class Worker: + def run(self, items): + self.prepare() + if self.ready(): + self.validate() + for item in items: + self.helper(item) + def prepare(self): pass + def ready(self): return True + def validate(self): pass + def helper(self, item): return item diff --git a/gems/decomplex/examples/python/temporal-ordering-pressure.py b/gems/decomplex/examples/python/temporal-ordering-pressure.py new file mode 100644 index 000000000..b26e60e63 --- /dev/null +++ b/gems/decomplex/examples/python/temporal-ordering-pressure.py @@ -0,0 +1,5 @@ +class TemporalOrderExample: + def one(self): self.a = 1 + def two(self): self.a = 2; self.b = 3 + def three(self): self.b = 4 + def reader(self): return self.a diff --git a/gems/decomplex/examples/python/weighted-inlined-complexity.py b/gems/decomplex/examples/python/weighted-inlined-complexity.py new file mode 100644 index 000000000..25aa61456 --- /dev/null +++ b/gems/decomplex/examples/python/weighted-inlined-complexity.py @@ -0,0 +1,30 @@ +def checkout(user, cart): + validate_user(user) + apply_discount(cart) + process_payment(user, cart) + audit_cart(cart) + +def validate_user(user): + if user.active() and not user.suspended(): + if user.profile.complete(): return True + else: return False + else: return False + +def apply_discount(cart): + if cart.total > 100 and eligible(): + if holiday(): return 20 + elif loyalty_month(): return 15 + else: return 10 + return 0 + +def process_payment(user, cart): + if gateway.ready(): + if cart.total > 0 and user.active(): + if fraud_check(user): charge(user, cart) + else: decline(user) + +def audit_cart(cart): + for item in cart.items: + if item.taxable(): + if item.region and item.amount > 0: + record_tax(item) diff --git a/gems/decomplex/examples/ruby/co-update.rb b/gems/decomplex/examples/ruby/co-update.rb new file mode 100644 index 000000000..a7b2a38bb --- /dev/null +++ b/gems/decomplex/examples/ruby/co-update.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +def stable_one(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_two(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_three(node) + node.storage = :heap + node.provenance = :heap +end + +def misses_provenance(node) + node.storage = :heap +end diff --git a/gems/decomplex/examples/ruby/decision-pressure.rb b/gems/decomplex/examples/ruby/decision-pressure.rb new file mode 100644 index 000000000..193ffe609 --- /dev/null +++ b/gems/decomplex/examples/ruby/decision-pressure.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +def scan(node) + value = node.symbol + value.nil? +end diff --git a/gems/decomplex/examples/ruby/derived-state.rb b/gems/decomplex/examples/ruby/derived-state.rb new file mode 100644 index 000000000..f1a9e1cda --- /dev/null +++ b/gems/decomplex/examples/ruby/derived-state.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def check(input) + cached = input + 1 + input = 2 + puts cached +end diff --git a/gems/decomplex/examples/ruby/false-simplicity.rb b/gems/decomplex/examples/ruby/false-simplicity.rb new file mode 100644 index 000000000..2b709e756 --- /dev/null +++ b/gems/decomplex/examples/ruby/false-simplicity.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class FalseSimplicityExample + def hack + print "hidden IO" + end +end diff --git a/gems/decomplex/examples/ruby/fat-union.rb b/gems/decomplex/examples/ruby/fat-union.rb new file mode 100644 index 000000000..7fd78ebd6 --- /dev/null +++ b/gems/decomplex/examples/ruby/fat-union.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +def handle(node) + case node + when AST::Call + node.line + node.col + node.ty + node.span + node.parent + node.recv + when AST::Func + node.line + node.col + node.ty + node.span + node.parent + node.name + when AST::Lit + node.line + node.col + node.ty + node.span + node.parent + node.value + end +end diff --git a/gems/decomplex/examples/ruby/flay-similarity.rb b/gems/decomplex/examples/ruby/flay-similarity.rb new file mode 100644 index 000000000..e3515ad97 --- /dev/null +++ b/gems/decomplex/examples/ruby/flay-similarity.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +def first_clone(node) + total = 0 + value1 = node.part1 + if value1.ready? && value1.enabled? + total += value1.amount + end + value2 = node.part2 + if value2.ready? && value2.enabled? + total += value2.amount + end + value3 = node.part3 + if value3.ready? && value3.enabled? + total += value3.amount + end + value4 = node.part4 + if value4.ready? && value4.enabled? + total += value4.amount + end + value5 = node.part5 + if value5.ready? && value5.enabled? + total += value5.amount + end + value6 = node.part6 + if value6.ready? && value6.enabled? + total += value6.amount + end + value7 = node.part7 + if value7.ready? && value7.enabled? + total += value7.amount + end + value8 = node.part8 + if value8.ready? && value8.enabled? + total += value8.amount + end + total +end + +def second_clone(entry) + total = 0 + item1 = entry.part1 + if item1.ready? && item1.enabled? + total += item1.amount + end + item2 = entry.part2 + if item2.ready? && item2.enabled? + total += item2.amount + end + item3 = entry.part3 + if item3.ready? && item3.enabled? + total += item3.amount + end + item4 = entry.part4 + if item4.ready? && item4.enabled? + total += item4.amount + end + item5 = entry.part5 + if item5.ready? && item5.enabled? + total += item5.amount + end + item6 = entry.part6 + if item6.ready? && item6.enabled? + total += item6.amount + end + item7 = entry.part7 + if item7.ready? && item7.enabled? + total += item7.amount + end + item8 = entry.part8 + if item8.ready? && item8.enabled? + total += item8.amount + end + total +end diff --git a/gems/decomplex/examples/ruby/function-lcom.rb b/gems/decomplex/examples/ruby/function-lcom.rb new file mode 100644 index 000000000..722342983 --- /dev/null +++ b/gems/decomplex/examples/ruby/function-lcom.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class Billing + def mixed(price, tax, logger) + subtotal = price + tax + total = subtotal * 2 + rounded = total.round + + timestamp = Time.now + buffer = [] + buffer << timestamp + logger.info(buffer) + + [rounded, buffer] + end +end diff --git a/gems/decomplex/examples/ruby/implicit-control-flow.rb b/gems/decomplex/examples/ruby/implicit-control-flow.rb new file mode 100644 index 000000000..edb727b4f --- /dev/null +++ b/gems/decomplex/examples/ruby/implicit-control-flow.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class FlowExample + def prepare; self.status = :ready; end + def validate; @valid = status == :ready; end + def commit; self.done = @valid; end + + def ok1; prepare; validate; commit; end + def ok2; prepare; validate; commit; end + def ok3; prepare; validate; commit; end + def ok4; prepare; validate; commit; end + def drift; validate; prepare; commit; end +end diff --git a/gems/decomplex/examples/ruby/inconsistent-rename-clone.rb b/gems/decomplex/examples/ruby/inconsistent-rename-clone.rb new file mode 100644 index 000000000..9409249b7 --- /dev/null +++ b/gems/decomplex/examples/ruby/inconsistent-rename-clone.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +def original + src = fetch(1) + check(src) + store(src) + finalize(src) +end + +def pasted + dst = fetch(2) + check(dst) + store(src) + finalize(dst) +end diff --git a/gems/decomplex/examples/ruby/local-flow.rb b/gems/decomplex/examples/ruby/local-flow.rb new file mode 100644 index 000000000..f4c3de6d5 --- /dev/null +++ b/gems/decomplex/examples/ruby/local-flow.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class Billing + def mixed(price, tax) + subtotal = price + tax + total = subtotal.round + + timestamp = Time.now + buffer = [] + buffer << timestamp + [total, buffer] + end +end diff --git a/gems/decomplex/examples/ruby/locality-drag.rb b/gems/decomplex/examples/ruby/locality-drag.rb new file mode 100644 index 000000000..1b87ae6ba --- /dev/null +++ b/gems/decomplex/examples/ruby/locality-drag.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +class Importer + def run(user, cart, logger) + receipt_id = user.id + + total = cart.total + if total > 100 + if cart.discountable? + discount = 10 + end + end + if cart.taxable? + if cart.region + tax = total * 0.2 + end + end + if logger.enabled? + if logger.debug? + logger.info(total) + end + end + if cart.valid? + if cart.ready? + status = :ready + end + end + + emit(receipt_id) + end +end diff --git a/gems/decomplex/examples/ruby/miner.rb b/gems/decomplex/examples/ruby/miner.rb new file mode 100644 index 000000000..4213623bc --- /dev/null +++ b/gems/decomplex/examples/ruby/miner.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +def one(a, b, c) + a && b && c +end + +def two(a, b, c) + a && b && c +end + +def three(a, b, c) + a && b && c +end + +def broken(a, b) + a && b +end diff --git a/gems/decomplex/examples/ruby/operational-discontinuity.rb b/gems/decomplex/examples/ruby/operational-discontinuity.rb new file mode 100644 index 000000000..3979ac0ba --- /dev/null +++ b/gems/decomplex/examples/ruby/operational-discontinuity.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +def phase_shift + a = 1 + b = 2 + + # Phase 2 + x = 3 + y = 4 + puts x, y +end diff --git a/gems/decomplex/examples/ruby/oversized-predicate.rb b/gems/decomplex/examples/ruby/oversized-predicate.rb new file mode 100644 index 000000000..b1d8e005c --- /dev/null +++ b/gems/decomplex/examples/ruby/oversized-predicate.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def complex_check + if a && b && c && d + puts "too big" + end +end diff --git a/gems/decomplex/examples/ruby/path-condition.rb b/gems/decomplex/examples/ruby/path-condition.rb new file mode 100644 index 000000000..3dde8c231 --- /dev/null +++ b/gems/decomplex/examples/ruby/path-condition.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +def one(x, y, z) + go(x) if x.p? && y.q? && z.r? +end + +def two(x, y, z) + go(x) if x.p? && y.q? && z.r? +end + +def three(x, y, z) + go(x) if x.p? && y.q? && z.r? +end + +def bug(x, y, z) + go(x) if x.p? && y.q? +end diff --git a/gems/decomplex/examples/ruby/predicate-alias.rb b/gems/decomplex/examples/ruby/predicate-alias.rb new file mode 100644 index 000000000..e187f130f --- /dev/null +++ b/gems/decomplex/examples/ruby/predicate-alias.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +def first + true +end + +def second + true +end + +def other + false +end diff --git a/gems/decomplex/examples/ruby/redundant-nil-guard.rb b/gems/decomplex/examples/ruby/redundant-nil-guard.rb new file mode 100644 index 000000000..5a66bac58 --- /dev/null +++ b/gems/decomplex/examples/ruby/redundant-nil-guard.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def check(value) + if value + value.nil? + end +end diff --git a/gems/decomplex/examples/ruby/semantic-alias.rb b/gems/decomplex/examples/ruby/semantic-alias.rb new file mode 100644 index 000000000..cdbe2734c --- /dev/null +++ b/gems/decomplex/examples/ruby/semantic-alias.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +def frame?; @provenance == :frame; end +def is_frame?; provenance == :frame; end +def heap?; @provenance == :heap; end + +def somewhere(node) + return 1 if node.provenance == :frame +end diff --git a/gems/decomplex/examples/ruby/sequence-mine.rb b/gems/decomplex/examples/ruby/sequence-mine.rb new file mode 100644 index 000000000..eb43e797a --- /dev/null +++ b/gems/decomplex/examples/ruby/sequence-mine.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +def one; alloc_mark(x); body1; cleanup(x); end +def two; alloc_mark(y); body2; cleanup(y); end +def three; alloc_mark(z); body3; cleanup(z); end +def four; alloc_mark(w); body4; cleanup(w); end +def leak; alloc_mark(q); use(q); end diff --git a/gems/decomplex/examples/ruby/state-branch-density.rb b/gems/decomplex/examples/ruby/state-branch-density.rb new file mode 100644 index 000000000..8c15b796a --- /dev/null +++ b/gems/decomplex/examples/ruby/state-branch-density.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class StateBranchChecker + def check(admin, name) + if admin + @checked = true + end + + if @checked && name == "admin" + puts "hello" + end + end +end diff --git a/gems/decomplex/examples/ruby/state-mesh.rb b/gems/decomplex/examples/ruby/state-mesh.rb new file mode 100644 index 000000000..9efd86ef2 --- /dev/null +++ b/gems/decomplex/examples/ruby/state-mesh.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class StateMeshExample + def initialize + @a = 1 + @b = 2 + end + + def writer + @a = 3 + end + + def reader + @a + @b + end + + def a_alias + @a + end +end diff --git a/gems/decomplex/examples/ruby/structural-topology.rb b/gems/decomplex/examples/ruby/structural-topology.rb new file mode 100644 index 000000000..ced19bfeb --- /dev/null +++ b/gems/decomplex/examples/ruby/structural-topology.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true + +class Worker + def run(items) + prepare + if ready() + validate + end + items.each do |item| + helper(item) + end + end + + private + + def prepare; end + def ready; true; end + def validate; end + def helper(item); item; end + + public :validate +end diff --git a/gems/decomplex/examples/ruby/temporal-ordering-pressure.rb b/gems/decomplex/examples/ruby/temporal-ordering-pressure.rb new file mode 100644 index 000000000..afa9d0d1c --- /dev/null +++ b/gems/decomplex/examples/ruby/temporal-ordering-pressure.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class TemporalOrderExample + def one + @a = 1 + end + + def two + @a = 2 + @b = 3 + end + + def three + @b = 4 + end + + def reader + @a + end +end diff --git a/gems/decomplex/examples/ruby/weighted-inlined-complexity.rb b/gems/decomplex/examples/ruby/weighted-inlined-complexity.rb new file mode 100644 index 000000000..c9f82e75a --- /dev/null +++ b/gems/decomplex/examples/ruby/weighted-inlined-complexity.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +class WeightedInlineExample + def checkout(user, cart) + validate_user(user) + apply_discount(cart) + process_payment(user, cart) + audit_cart(cart) + end + + private + + def validate_user(user) + return false unless user + if user.active? && !user.suspended? + if user.profile.complete? + true + else + false + end + else + false + end + end + + def apply_discount(cart) + if cart.total > 100 && eligible? + if holiday? + 20 + elsif loyalty_month? + 15 + else + 10 + end + end + end + + def process_payment(user, cart) + if gateway.ready? + if cart.total > 0 && user.active? + if fraud_check(user) + charge(user, cart) + else + decline(user) + end + end + end + end + + def audit_cart(cart) + cart.items.each do |item| + if item.taxable? + if item.region && item.amount > 0 + record_tax(item) + end + end + end + end +end diff --git a/gems/decomplex/examples/rust/co-update.rs b/gems/decomplex/examples/rust/co-update.rs new file mode 100644 index 000000000..6afaea6b6 --- /dev/null +++ b/gems/decomplex/examples/rust/co-update.rs @@ -0,0 +1,23 @@ +struct Node { + storage: i32, + provenance: i32, +} + +fn stable_one(mut node: Node) { + node.storage = 1; + node.provenance = 1; +} + +fn stable_two(mut node: Node) { + node.storage = 1; + node.provenance = 1; +} + +fn stable_three(mut node: Node) { + node.storage = 1; + node.provenance = 1; +} + +fn misses_provenance(mut node: Node) { + node.storage = 1; +} diff --git a/gems/decomplex/examples/rust/decision-pressure.rs b/gems/decomplex/examples/rust/decision-pressure.rs new file mode 100644 index 000000000..3a8401f04 --- /dev/null +++ b/gems/decomplex/examples/rust/decision-pressure.rs @@ -0,0 +1,3 @@ +fn scan(node: Node) -> bool { + node.symbol.is_none() +} diff --git a/gems/decomplex/examples/rust/derived-state.rs b/gems/decomplex/examples/rust/derived-state.rs new file mode 100644 index 000000000..aebef251b --- /dev/null +++ b/gems/decomplex/examples/rust/derived-state.rs @@ -0,0 +1,5 @@ +fn check(mut input: i32) { + let cached = input + 1; + input = 2; + print(cached); +} diff --git a/gems/decomplex/examples/rust/false-simplicity.rs b/gems/decomplex/examples/rust/false-simplicity.rs new file mode 100644 index 000000000..fb7937db7 --- /dev/null +++ b/gems/decomplex/examples/rust/false-simplicity.rs @@ -0,0 +1,7 @@ +struct FalseSimplicityExample; + +impl FalseSimplicityExample { + fn hack(&self) { + print("hidden IO"); + } +} diff --git a/gems/decomplex/examples/rust/fat-union.rs b/gems/decomplex/examples/rust/fat-union.rs new file mode 100644 index 000000000..aca570ca6 --- /dev/null +++ b/gems/decomplex/examples/rust/fat-union.rs @@ -0,0 +1,28 @@ +fn handle(node: Ast) { + match node { + AST::Call => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.recv(); + } + AST::Func => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.name(); + } + AST::Lit => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.value(); + } + } +} diff --git a/gems/decomplex/examples/rust/flay-similarity.rs b/gems/decomplex/examples/rust/flay-similarity.rs new file mode 100644 index 000000000..ab464e914 --- /dev/null +++ b/gems/decomplex/examples/rust/flay-similarity.rs @@ -0,0 +1,41 @@ +fn first_clone(node: Node) -> i32 { + let mut total = 0; + let value1 = node.part1; + if value1.ready() && value1.enabled() { total += value1.amount; } + let value2 = node.part2; + if value2.ready() && value2.enabled() { total += value2.amount; } + let value3 = node.part3; + if value3.ready() && value3.enabled() { total += value3.amount; } + let value4 = node.part4; + if value4.ready() && value4.enabled() { total += value4.amount; } + let value5 = node.part5; + if value5.ready() && value5.enabled() { total += value5.amount; } + let value6 = node.part6; + if value6.ready() && value6.enabled() { total += value6.amount; } + let value7 = node.part7; + if value7.ready() && value7.enabled() { total += value7.amount; } + let value8 = node.part8; + if value8.ready() && value8.enabled() { total += value8.amount; } + total +} + +fn second_clone(entry: Node) -> i32 { + let mut total = 0; + let item1 = entry.part1; + if item1.ready() && item1.enabled() { total += item1.amount; } + let item2 = entry.part2; + if item2.ready() && item2.enabled() { total += item2.amount; } + let item3 = entry.part3; + if item3.ready() && item3.enabled() { total += item3.amount; } + let item4 = entry.part4; + if item4.ready() && item4.enabled() { total += item4.amount; } + let item5 = entry.part5; + if item5.ready() && item5.enabled() { total += item5.amount; } + let item6 = entry.part6; + if item6.ready() && item6.enabled() { total += item6.amount; } + let item7 = entry.part7; + if item7.ready() && item7.enabled() { total += item7.amount; } + let item8 = entry.part8; + if item8.ready() && item8.enabled() { total += item8.amount; } + total +} diff --git a/gems/decomplex/examples/rust/function-lcom.rs b/gems/decomplex/examples/rust/function-lcom.rs new file mode 100644 index 000000000..ed4a5cb70 --- /dev/null +++ b/gems/decomplex/examples/rust/function-lcom.rs @@ -0,0 +1,12 @@ +fn mixed(price: i32, tax: i32, logger: Logger) -> (i32, Buffer) { + let subtotal = price + tax; + let total = subtotal * 2; + let rounded = total.round(); + + let timestamp = now(); + let mut buffer = Buffer::new(); + buffer.push(timestamp); + logger.info(buffer); + + (rounded, buffer) +} diff --git a/gems/decomplex/examples/rust/implicit-control-flow.rs b/gems/decomplex/examples/rust/implicit-control-flow.rs new file mode 100644 index 000000000..f8a00efe4 --- /dev/null +++ b/gems/decomplex/examples/rust/implicit-control-flow.rs @@ -0,0 +1,17 @@ +struct FlowExample { + status: i32, + valid: bool, + done: bool, +} + +impl FlowExample { + fn prepare(&mut self) { self.status = 1; } + fn validate(&mut self) { self.valid = self.status == 1; } + fn commit(&mut self) { self.done = self.valid; } + + fn ok1(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn ok2(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn ok3(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn ok4(&mut self) { self.prepare(); self.validate(); self.commit(); } + fn drift(&mut self) { self.validate(); self.prepare(); self.commit(); } +} diff --git a/gems/decomplex/examples/rust/inconsistent-rename-clone.rs b/gems/decomplex/examples/rust/inconsistent-rename-clone.rs new file mode 100644 index 000000000..8e8a3f819 --- /dev/null +++ b/gems/decomplex/examples/rust/inconsistent-rename-clone.rs @@ -0,0 +1,13 @@ +fn original() { + let src = fetch(1); + check(src); + store(src); + finalize(src); +} + +fn pasted() { + let dst = fetch(2); + check(dst); + store(src); + finalize(dst); +} diff --git a/gems/decomplex/examples/rust/local-flow.rs b/gems/decomplex/examples/rust/local-flow.rs new file mode 100644 index 000000000..d027c1e92 --- /dev/null +++ b/gems/decomplex/examples/rust/local-flow.rs @@ -0,0 +1,9 @@ +fn mixed(price: i32, tax: i32) -> (i32, Buffer) { + let subtotal = price + tax; + let total = subtotal.round(); + + let timestamp = now(); + let mut buffer = Buffer::new(); + buffer.push(timestamp); + (total, buffer) +} diff --git a/gems/decomplex/examples/rust/locality-drag.rs b/gems/decomplex/examples/rust/locality-drag.rs new file mode 100644 index 000000000..1d71f299f --- /dev/null +++ b/gems/decomplex/examples/rust/locality-drag.rs @@ -0,0 +1,27 @@ +fn run(user: User, cart: Cart, logger: Logger) { + let receipt_id = user.id; + + let total = cart.total; + if total > 100 { + if cart.discountable() { + let discount = 10; + } + } + if cart.taxable() { + if cart.region { + let tax = total * 2; + } + } + if logger.enabled() { + if logger.debug() { + logger.info(total); + } + } + if cart.valid() { + if cart.ready() { + let status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/rust/miner.rs b/gems/decomplex/examples/rust/miner.rs new file mode 100644 index 000000000..4f5b7fcab --- /dev/null +++ b/gems/decomplex/examples/rust/miner.rs @@ -0,0 +1,15 @@ +fn one(a: bool, b: bool, c: bool) -> bool { + a && b && c +} + +fn two(a: bool, b: bool, c: bool) -> bool { + a && b && c +} + +fn three(a: bool, b: bool, c: bool) -> bool { + a && b && c +} + +fn broken(a: bool, b: bool) -> bool { + a && b +} diff --git a/gems/decomplex/examples/rust/operational-discontinuity.rs b/gems/decomplex/examples/rust/operational-discontinuity.rs new file mode 100644 index 000000000..401589871 --- /dev/null +++ b/gems/decomplex/examples/rust/operational-discontinuity.rs @@ -0,0 +1,10 @@ +fn phase_shift() { + let a = 1; + let b = 2; + + // Phase 2 + let x = 3; + let y = 4; + print(x); + print(y); +} diff --git a/gems/decomplex/examples/rust/oversized-predicate.rs b/gems/decomplex/examples/rust/oversized-predicate.rs new file mode 100644 index 000000000..40129fdde --- /dev/null +++ b/gems/decomplex/examples/rust/oversized-predicate.rs @@ -0,0 +1,5 @@ +fn complex_check(a: bool, b: bool, c: bool, d: bool) { + if a && b && c && d { + print("too big"); + } +} diff --git a/gems/decomplex/examples/rust/path-condition.rs b/gems/decomplex/examples/rust/path-condition.rs new file mode 100644 index 000000000..343d2af0b --- /dev/null +++ b/gems/decomplex/examples/rust/path-condition.rs @@ -0,0 +1,15 @@ +fn one(x: X, y: Y, z: Z) { + if x.p() && y.q() && z.r() { go(x); } +} + +fn two(x: X, y: Y, z: Z) { + if x.p() && y.q() && z.r() { go(x); } +} + +fn three(x: X, y: Y, z: Z) { + if x.p() && y.q() && z.r() { go(x); } +} + +fn bug(x: X, y: Y, z: Z) { + if x.p() && y.q() { go(x); } +} diff --git a/gems/decomplex/examples/rust/predicate-alias.rs b/gems/decomplex/examples/rust/predicate-alias.rs new file mode 100644 index 000000000..5d70e6588 --- /dev/null +++ b/gems/decomplex/examples/rust/predicate-alias.rs @@ -0,0 +1,11 @@ +fn first() -> bool { + true +} + +fn second() -> bool { + true +} + +fn other() -> bool { + false +} diff --git a/gems/decomplex/examples/rust/redundant-nil-guard.rs b/gems/decomplex/examples/rust/redundant-nil-guard.rs new file mode 100644 index 000000000..33e2f1184 --- /dev/null +++ b/gems/decomplex/examples/rust/redundant-nil-guard.rs @@ -0,0 +1,5 @@ +fn check(value: Option) { + if value.is_some() { + value.is_none(); + } +} diff --git a/gems/decomplex/examples/rust/semantic-alias.rs b/gems/decomplex/examples/rust/semantic-alias.rs new file mode 100644 index 000000000..2b707286d --- /dev/null +++ b/gems/decomplex/examples/rust/semantic-alias.rs @@ -0,0 +1,8 @@ +fn frame(node: Node) -> bool { node.provenance == FRAME } +fn is_frame(node: Node) -> bool { provenance == FRAME } +fn heap(node: Node) -> bool { node.provenance == HEAP } + +fn somewhere(node: Node) -> i32 { + if node.provenance == FRAME { return 1; } + 0 +} diff --git a/gems/decomplex/examples/rust/sequence-mine.rs b/gems/decomplex/examples/rust/sequence-mine.rs new file mode 100644 index 000000000..412047b5d --- /dev/null +++ b/gems/decomplex/examples/rust/sequence-mine.rs @@ -0,0 +1,5 @@ +fn one() { alloc_mark(x); body1(); cleanup(x); } +fn two() { alloc_mark(y); body2(); cleanup(y); } +fn three() { alloc_mark(z); body3(); cleanup(z); } +fn four() { alloc_mark(w); body4(); cleanup(w); } +fn leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/rust/state-branch-density.rs b/gems/decomplex/examples/rust/state-branch-density.rs new file mode 100644 index 000000000..7575342c9 --- /dev/null +++ b/gems/decomplex/examples/rust/state-branch-density.rs @@ -0,0 +1,15 @@ +struct StateBranchChecker { + checked: bool, +} + +impl StateBranchChecker { + fn check(&mut self, admin: bool, name: String) { + if admin { + self.checked = true; + } + + if self.checked && name == "admin" { + print("hello"); + } + } +} diff --git a/gems/decomplex/examples/rust/state-mesh.rs b/gems/decomplex/examples/rust/state-mesh.rs new file mode 100644 index 000000000..a7d28e1d1 --- /dev/null +++ b/gems/decomplex/examples/rust/state-mesh.rs @@ -0,0 +1,23 @@ +struct StateMeshExample { + a: i32, + b: i32, +} + +impl StateMeshExample { + fn initialize(&mut self) { + self.a = 1; + self.b = 2; + } + + fn writer(&mut self) { + self.a = 3; + } + + fn reader(&self) -> i32 { + self.a + self.b + } + + fn a_alias(&self) -> i32 { + self.a + } +} diff --git a/gems/decomplex/examples/rust/structural-topology.rs b/gems/decomplex/examples/rust/structural-topology.rs new file mode 100644 index 000000000..bb16bd86a --- /dev/null +++ b/gems/decomplex/examples/rust/structural-topology.rs @@ -0,0 +1,18 @@ +struct Worker; + +impl Worker { + pub fn run(&self, items: Items) { + self.prepare(); + if self.ready() { + self.validate(); + } + for item in items { + self.helper(item); + } + } + + fn prepare(&self) {} + fn ready(&self) -> bool { true } + pub fn validate(&self) {} + fn helper(&self, item: Item) { item; } +} diff --git a/gems/decomplex/examples/rust/temporal-ordering-pressure.rs b/gems/decomplex/examples/rust/temporal-ordering-pressure.rs new file mode 100644 index 000000000..cb845cfe9 --- /dev/null +++ b/gems/decomplex/examples/rust/temporal-ordering-pressure.rs @@ -0,0 +1,23 @@ +pub struct TemporalOrderExample { + a: i32, + b: i32, +} + +impl TemporalOrderExample { + pub fn one(&mut self) { + self.a = 1; + } + + pub fn two(&mut self) { + self.a = 2; + self.b = 3; + } + + pub fn three(&mut self) { + self.b = 4; + } + + pub fn reader(&self) -> i32 { + self.a + } +} diff --git a/gems/decomplex/examples/rust/weighted-inlined-complexity.rs b/gems/decomplex/examples/rust/weighted-inlined-complexity.rs new file mode 100644 index 000000000..8cc8c9e8e --- /dev/null +++ b/gems/decomplex/examples/rust/weighted-inlined-complexity.rs @@ -0,0 +1,44 @@ +struct WeightedInlineExample; + +impl WeightedInlineExample { + fn checkout(&self, user: User, cart: Cart) { + self.validate_user(user); + self.apply_discount(cart); + self.process_payment(user, cart); + self.audit_cart(cart); + } + + fn validate_user(&self, user: User) -> bool { + if user.active() && !user.suspended() { + if user.profile.complete() { true } else { false } + } else { + false + } + } + + fn apply_discount(&self, cart: Cart) -> i32 { + if cart.total > 100 && eligible() { + if holiday() { 20 } else if loyalty_month() { 15 } else { 10 } + } else { + 0 + } + } + + fn process_payment(&self, user: User, cart: Cart) { + if gateway.ready() { + if cart.total > 0 && user.active() { + if fraud_check(user) { charge(user, cart); } else { decline(user); } + } + } + } + + fn audit_cart(&self, cart: Cart) { + for item in cart.items { + if item.taxable() { + if item.region && item.amount > 0 { + record_tax(item); + } + } + } + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-block_receiver_calls.json b/gems/decomplex/examples/source-facts/oracles/ruby-block_receiver_calls.json new file mode 100644 index 000000000..ced1a727c --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-block_receiver_calls.json @@ -0,0 +1,161 @@ +{ + "syntax": { + "functions": [ + { + "name": "collect", + "owner": "SourceFactBlockReceiverCalls", + "line": 4, + "visibility": "public", + "params": [ + "items" + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "names", + "function": "collect", + "line": 10, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "name", + "function": "collect", + "line": 7, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "children", + "function": "collect", + "line": 8, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "child", + "message": "name", + "function": "collect", + "line": 8, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "items", + "message": "flat_map", + "function": "collect", + "line": 6, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "item.children", + "message": "each", + "function": "collect", + "line": 8, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "item", + "field": "children", + "function": "collect", + "line": 8 + }, + { + "receiver": "item.children", + "field": "each", + "function": "collect", + "line": 8 + }, + { + "receiver": "items", + "field": "flat_map", + "function": "collect", + "line": 6 + }, + { + "receiver": "item", + "field": "name", + "function": "collect", + "line": 7 + }, + { + "receiver": "child", + "field": "name", + "function": "collect", + "line": 8 + } + ], + "semantic_effects": [ + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "collect", + "line": 7 + } + ] + }, + "local_flow": [ + { + "method": "collect", + "statements": [ + { + "reads": [], + "writes": [ + "names" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "items", + "names" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "items", + "names" + ] + ] + }, + { + "reads": [ + "names" + ], + "writes": [], + "dependencies": [], + "co_uses": [] + } + ], + "boundaries": [] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-branch_predicate_paths.json b/gems/decomplex/examples/source-facts/oracles/ruby-branch_predicate_paths.json new file mode 100644 index 000000000..52d79177e --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-branch_predicate_paths.json @@ -0,0 +1,271 @@ +{ + "syntax": { + "functions": [ + { + "name": "ready?", + "owner": "SourceFactBranchPredicatePaths", + "line": 4, + "visibility": "public", + "params": [] + }, + { + "name": "route", + "owner": "SourceFactBranchPredicatePaths", + "line": 8, + "visibility": "public", + "params": [ + "user" + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "publish", + "function": "route", + "line": 10, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + ":ready" + ] + }, + { + "receiver": "self", + "message": "warn", + "function": "route", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "\"not ready\"" + ] + }, + { + "receiver": "self", + "message": "audit", + "function": "route", + "line": 17, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "user" + ] + }, + { + "receiver": "self", + "message": "fallback", + "function": "route", + "line": 19, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "user" + ] + }, + { + "receiver": "self", + "message": "default", + "function": "route", + "line": 21, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "user" + ] + }, + { + "receiver": "user", + "message": "role", + "function": "route", + "line": 15, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "user", + "message": "active?", + "function": "route", + "line": 9, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "@status", + "function": "ready?", + "line": 5 + }, + { + "receiver": "self", + "field": "@status", + "function": "route", + "line": 9 + }, + { + "receiver": "user", + "field": "active?", + "function": "route", + "line": 9 + }, + { + "receiver": "user", + "field": "role", + "function": "route", + "line": 15 + } + ], + "decisions": [ + { + "kind": "case_dispatch", + "members": [ + "\"admin\"", + "\"guest\"" + ], + "function": "route", + "line": 15, + "predicate": "user.role" + }, + { + "kind": "conjunction", + "members": [ + "@status == :ready", + "user.active?" + ], + "function": "route", + "line": 9, + "predicate": "@status == :ready && user.active?" + } + ], + "branch_decisions": [ + { + "function": "route", + "line": 15, + "predicate": "user.role", + "state_refs": [ + "user.role" + ] + }, + { + "function": "route", + "line": 9, + "predicate": "@status == :ready && user.active?", + "state_refs": [ + "@status", + "user.active?" + ] + } + ], + "branch_arms": [ + { + "function": "route", + "kind": "case", + "line": 16, + "decision_line": 15, + "predicate": "user.role", + "member": "\"admin\"", + "body": "audit(user)" + }, + { + "function": "route", + "kind": "if", + "line": 11, + "decision_line": 9, + "predicate": "@status == :ready && user.active?", + "member": "else", + "body": "else warn(\"not ready\")" + }, + { + "function": "route", + "kind": "case", + "line": 18, + "decision_line": 15, + "predicate": "user.role", + "member": "\"guest\"", + "body": "fallback(user)" + }, + { + "function": "route", + "kind": "if", + "line": 9, + "decision_line": 9, + "predicate": "@status == :ready && user.active?", + "member": "then", + "body": "publish(:ready)" + } + ], + "dispatch_sites": [], + "predicate_bodies": [ + { + "name": "ready?", + "owner": "SourceFactBranchPredicatePaths", + "body": "@status == :ready", + "line": 4 + } + ], + "comparisons": [ + { + "source": "@status == :ready", + "raw": "@status == :ready", + "canon_source": "status == :ready", + "operator": "==", + "function": "ready?", + "line": 5 + }, + { + "source": "@status == :ready", + "raw": "@status == :ready", + "canon_source": "status == :ready", + "operator": "==", + "function": "route", + "line": 9 + } + ], + "path_conditions": [ + { + "guards": [ + "@status == :ready", + "user.active?" + ], + "action": "publish(:ready)", + "function": "route", + "line": 10 + }, + { + "guards": [ + "!@status == :ready", + "!user.active?" + ], + "action": "warn(\"not ready\")", + "function": "route", + "line": 12 + } + ], + "semantic_effects": [ + { + "kind": "hidden_io", + "detail": "warn", + "function": "route", + "line": 12 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-local_flow_edges.json b/gems/decomplex/examples/source-facts/oracles/ruby-local_flow_edges.json new file mode 100644 index 000000000..07d396bb9 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-local_flow_edges.json @@ -0,0 +1,254 @@ +{ + "syntax": { + "state_reads": [ + { + "receiver": "findings", + "field": "each", + "function": "build", + "line": 11 + }, + { + "receiver": "sections", + "field": "each", + "function": "build", + "line": 8 + }, + { + "receiver": "file", + "field": "empty?", + "function": "build", + "line": 13 + }, + { + "receiver": "meth", + "field": "empty?", + "function": "build", + "line": 13 + }, + { + "receiver": "audit", + "field": "findings", + "function": "build", + "line": 16 + }, + { + "receiver": "audit", + "field": "findings", + "function": "build", + "line": 20 + }, + { + "receiver": "finding", + "field": "loc", + "function": "build", + "line": 12 + }, + { + "receiver": "audit.findings", + "field": "size", + "function": "build", + "line": 16 + } + ], + "semantic_effects": [ + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "build", + "line": 16 + }, + { + "kind": "hidden_io", + "detail": "File.file?", + "function": "build", + "line": 6 + } + ], + "path_conditions": [ + { + "guards": [ + "!File.file?(grammar)", + "!grammar" + ], + "action": "skip \"missing grammar\"", + "function": "build", + "line": 6 + } + ], + "local_complexity_scores": [ + { + "id": "SourceFactLocalFlowEdges#build", + "score": 14.7, + "signals": { + "boolean_ops": 8, + "branches": 3, + "early_exits": 2, + "loops": 2, + "nested": 3 + } + } + ] + }, + "local_flow": [ + { + "method": "build", + "statements": [ + { + "reads": [ + + ], + "writes": [ + "rows" + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "reads": [ + "grammar" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "reads": [ + "audit", + "file", + "key", + "meth", + "rows", + "sections" + ], + "writes": [ + "file", + "key", + "meth" + ], + "dependencies": [ + [ + "key", + "file" + ], + [ + "key", + "meth" + ] + ], + "co_uses": [ + [ + "audit", + "file" + ], + [ + "audit", + "key" + ], + [ + "audit", + "meth" + ], + [ + "audit", + "rows" + ], + [ + "audit", + "sections" + ], + [ + "file", + "key" + ], + [ + "file", + "meth" + ], + [ + "file", + "rows" + ], + [ + "file", + "sections" + ], + [ + "key", + "meth" + ], + [ + "key", + "rows" + ], + [ + "key", + "sections" + ], + [ + "meth", + "rows" + ], + [ + "meth", + "sections" + ], + [ + "rows", + "sections" + ] + ] + }, + { + "reads": [ + + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + }, + { + "reads": [ + "rows" + ], + "writes": [ + + ], + "dependencies": [ + + ], + "co_uses": [ + + ] + } + ], + "boundaries": [ + { + "before_index": 1, + "after_index": 2, + "kind": "blank" + }, + { + "before_index": 2, + "after_index": 3, + "kind": "blank" + } + ] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-local_methods_contracts.json b/gems/decomplex/examples/source-facts/oracles/ruby-local_methods_contracts.json new file mode 100644 index 000000000..4ded02cd3 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-local_methods_contracts.json @@ -0,0 +1,308 @@ +{ + "syntax": { + "functions": [ + { + "name": "process", + "owner": "SourceFactLocalMethodsContracts", + "line": 4, + "visibility": "public", + "params": [ + "user", + "items" + ] + } + ], + "calls": [ + { + "receiver": "user", + "message": "profile", + "function": "process", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "names", + "message": "any?", + "function": "process", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "profile", + "message": "name", + "function": "process", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "ready?", + "function": "process", + "line": 8, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "name", + "function": "process", + "line": 9, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "items", + "message": "each", + "function": "process", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "names", + "field": "any?", + "function": "process", + "line": 12 + }, + { + "receiver": "items", + "field": "each", + "function": "process", + "line": 7 + }, + { + "receiver": "profile", + "field": "name", + "function": "process", + "line": 12 + }, + { + "receiver": "item", + "field": "name", + "function": "process", + "line": 9 + }, + { + "receiver": "user", + "field": "profile", + "function": "process", + "line": 5 + }, + { + "receiver": "item", + "field": "ready?", + "function": "process", + "line": 8 + } + ], + "state_writes": [], + "semantic_effects": [ + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "process", + "line": 9 + } + ], + "local_methods": [ + { + "id": "SourceFactLocalMethodsContracts#process", + "owner": "SourceFactLocalMethodsContracts", + "name": "process", + "line": 4, + "statements": [ + { + "co_uses": [], + "dependencies": [ + [ + "profile", + "user" + ] + ], + "end_line": 5, + "index": 0, + "line": 5, + "reads": [ + "user" + ], + "source": "profile = user.profile", + "span": [ + 5, + 4, + 5, + 26 + ], + "writes": [ + "profile" + ] + }, + { + "co_uses": [], + "dependencies": [], + "end_line": 6, + "index": 1, + "line": 6, + "reads": [], + "source": "names = []", + "span": [ + 6, + 4, + 6, + 14 + ], + "writes": [ + "names" + ] + }, + { + "co_uses": [ + [ + "items", + "names" + ] + ], + "dependencies": [], + "end_line": 11, + "index": 2, + "line": 7, + "reads": [ + "items", + "names" + ], + "source": "items.each do |item| if item.ready? names << item.name end end", + "span": [ + 7, + 4, + 11, + 7 + ], + "writes": [] + }, + { + "co_uses": [ + [ + "names", + "profile" + ] + ], + "dependencies": [], + "end_line": 12, + "index": 3, + "line": 12, + "reads": [ + "names", + "profile" + ], + "source": "profile.name if names.any?", + "span": [ + 12, + 4, + 12, + 30 + ], + "writes": [] + } + ], + "boundaries": [], + "local_contract_assignments": { + "names": "[]", + "profile": "user.profile" + } + } + ], + "local_complexity_scores": [ + { + "id": "SourceFactLocalMethodsContracts#process", + "score": 4.3, + "signals": { + "boolean_ops": 0, + "branches": 2, + "loops": 1, + "nested": 1 + } + } + ] + }, + "local_flow": [ + { + "method": "process", + "statements": [ + { + "reads": [ + "user" + ], + "writes": [ + "profile" + ], + "dependencies": [ + [ + "profile", + "user" + ] + ], + "co_uses": [] + }, + { + "reads": [], + "writes": [ + "names" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "items", + "names" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "items", + "names" + ] + ] + }, + { + "reads": [ + "names", + "profile" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "names", + "profile" + ] + ] + } + ], + "boundaries": [] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-locals_not_state.json b/gems/decomplex/examples/source-facts/oracles/ruby-locals_not_state.json new file mode 100644 index 000000000..0abef7a20 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-locals_not_state.json @@ -0,0 +1,163 @@ +{ + "syntax": { + "functions": [ + { + "name": "build", + "owner": "SourceFactLocalsNotState", + "line": 4, + "visibility": "public", + "params": [ + "values", + "config" + ] + } + ], + "calls": [ + { + "receiver": "self", + "message": "assert_empty", + "function": "build", + "line": 12, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "values" + ] + }, + { + "receiver": "values", + "message": "each", + "function": "build", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "values", + "field": "each", + "function": "build", + "line": 8 + } + ], + "state_writes": [], + "semantic_effects": [ + { + "kind": "context_dependency", + "detail": "ENV", + "function": "build", + "line": 6 + }, + { + "kind": "hidden_mutation", + "detail": "[]=", + "function": "build", + "line": 11 + } + ] + }, + "local_flow": [ + { + "method": "build", + "statements": [ + { + "reads": [], + "writes": [ + "key" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "key" + ], + "writes": [ + "path" + ], + "dependencies": [ + [ + "path", + "key" + ] + ], + "co_uses": [] + }, + { + "reads": [], + "writes": [ + "total" + ], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "total", + "values" + ], + "writes": [ + "total" + ], + "dependencies": [], + "co_uses": [ + [ + "total", + "values" + ] + ] + }, + { + "reads": [ + "config", + "path" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "config", + "path" + ] + ] + }, + { + "reads": [], + "writes": [], + "dependencies": [], + "co_uses": [] + }, + { + "reads": [ + "config", + "path", + "total" + ], + "writes": [], + "dependencies": [], + "co_uses": [ + [ + "config", + "path" + ], + [ + "config", + "total" + ], + [ + "path", + "total" + ] + ] + } + ], + "boundaries": [] + } + ] +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-protocols_nil_clone.json b/gems/decomplex/examples/source-facts/oracles/ruby-protocols_nil_clone.json new file mode 100644 index 000000000..ed78fcd3a --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-protocols_nil_clone.json @@ -0,0 +1,568 @@ +{ + "syntax": { + "functions": [ + { + "name": "run", + "owner": "SourceFactProtocolsNilClone", + "line": 12, + "visibility": "public", + "params": [ + "item" + ] + }, + { + "name": "guard", + "owner": "SourceFactProtocolsNilClone", + "line": 17, + "visibility": "public", + "params": [ + "value" + ] + }, + { + "name": "clone_left", + "owner": "SourceFactProtocolsNilClone", + "line": 23, + "visibility": "public", + "params": [ + "user" + ] + }, + { + "name": "clone_right", + "owner": "SourceFactProtocolsNilClone", + "line": 29, + "visibility": "public", + "params": [ + "account" + ] + }, + { + "name": "open", + "owner": "SourceFactProtocolsNilClone", + "line": 4, + "visibility": "public", + "params": [] + }, + { + "name": "close", + "owner": "SourceFactProtocolsNilClone", + "line": 8, + "visibility": "public", + "params": [] + } + ], + "calls": [ + { + "receiver": "self", + "message": "audit", + "function": "clone_left", + "line": 25, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "data" + ] + }, + { + "receiver": "self", + "message": "audit", + "function": "clone_right", + "line": 31, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "data" + ] + }, + { + "receiver": "user.profile", + "message": "name", + "function": "clone_left", + "line": 24, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "user", + "message": "profile", + "function": "clone_left", + "line": 24, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "data", + "function": "clone_left", + "line": 26, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "account.profile", + "message": "name", + "function": "clone_right", + "line": 30, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "account", + "message": "profile", + "function": "clone_right", + "line": 30, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "data", + "function": "clone_right", + "line": 32, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "value", + "message": "name", + "function": "guard", + "line": 20, + "conditional": false, + "control": "always", + "safe_navigation": true, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "open", + "function": "run", + "line": 13, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "value", + "function": "guard", + "line": 18, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "close", + "function": "run", + "line": 14, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "item", + "message": "ready?", + "function": "run", + "line": 14, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "user.profile", + "field": "name", + "function": "clone_left", + "line": 24 + }, + { + "receiver": "account.profile", + "field": "name", + "function": "clone_right", + "line": 30 + }, + { + "receiver": "value", + "field": "name", + "function": "guard", + "line": 20 + }, + { + "receiver": "user", + "field": "profile", + "function": "clone_left", + "line": 24 + }, + { + "receiver": "account", + "field": "profile", + "function": "clone_right", + "line": 30 + }, + { + "receiver": "item", + "field": "ready?", + "function": "run", + "line": 14 + } + ], + "state_writes": [ + { + "receiver": "self", + "field": "@opened", + "function": "close", + "line": 9 + }, + { + "receiver": "self", + "field": "@opened", + "function": "open", + "line": 5 + } + ], + "semantic_effects": [ + { + "kind": "hidden_io", + "detail": "open", + "function": "run", + "line": 13 + } + ], + "protocol_method_effects": [ + { + "owner": "SourceFactProtocolsNilClone", + "name": "run", + "line": 12, + "reads": [ + "close", + "open" + ], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "guard", + "line": 17, + "reads": [], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_left", + "line": 23, + "reads": [], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_right", + "line": 29, + "reads": [], + "writes": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "open", + "line": 4, + "reads": [], + "writes": [ + "opened" + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "close", + "line": 8, + "reads": [], + "writes": [ + "opened" + ] + } + ], + "protocol_call_paths": [ + { + "owner": "SourceFactProtocolsNilClone", + "name": "guard", + "line": 17, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "guard", + "line": 17, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "open", + "line": 4, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "close", + "line": 8, + "calls": [] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "run", + "line": 12, + "calls": [ + { + "line": 13, + "mid": "open", + "span": [ + 13, + 4, + 13, + 8 + ] + }, + { + "line": 14, + "mid": "close", + "span": [ + 14, + 4, + 14, + 9 + ] + } + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "run", + "line": 12, + "calls": [ + { + "line": 13, + "mid": "open", + "span": [ + 13, + 4, + 13, + 8 + ] + } + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_left", + "line": 23, + "calls": [ + { + "line": 25, + "mid": "audit", + "span": [ + 25, + 4, + 25, + 15 + ] + } + ] + }, + { + "owner": "SourceFactProtocolsNilClone", + "name": "clone_right", + "line": 29, + "calls": [ + { + "line": 31, + "mid": "audit", + "span": [ + 31, + 4, + 31, + 15 + ] + } + ] + } + ], + "clone_candidates": [ + { + "method_name": "clone_left", + "node_name": "defn", + "line": 23, + "mass": 26, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)", + "child_fingerprints": [ + "assignment(id =:= call(call(id .:. id) .:. id))", + "call(id argument_list((:( id ):)))" + ], + "child_masses": [ + 10, + 6 + ] + }, + { + "method_name": "clone_right", + "node_name": "defn", + "line": 29, + "mass": 26, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)", + "child_fingerprints": [ + "assignment(id =:= call(call(id .:. id) .:. id))", + "call(id argument_list((:( id ):)))" + ], + "child_masses": [ + 10, + 6 + ] + }, + { + "method_name": "clone_left", + "node_name": "assignment", + "line": 24, + "mass": 10, + "fingerprint": "assignment(id =:= call(call(id .:. id) .:. id))", + "child_fingerprints": [ + "call(call(id .:. id) .:. id)" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "clone_right", + "node_name": "assignment", + "line": 30, + "mass": 10, + "fingerprint": "assignment(id =:= call(call(id .:. id) .:. id))", + "child_fingerprints": [ + "call(call(id .:. id) .:. id)" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "run", + "node_name": "defn", + "line": 12, + "mass": 17, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(id if_modifier(id id call(id .:. id))) id)", + "child_fingerprints": [ + "if_modifier(id id call(id .:. id))" + ], + "child_masses": [ + 7 + ] + }, + { + "method_name": "open", + "node_name": "defn", + "line": 4, + "mass": 8, + "fingerprint": "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "child_fingerprints": [ + "instance_variable:@opened", + "bool" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "close", + "node_name": "defn", + "line": 8, + "mass": 8, + "fingerprint": "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "child_fingerprints": [ + "instance_variable:@opened", + "bool" + ], + "child_masses": [ + 1, + 1 + ] + }, + { + "method_name": "(top-level)", + "node_name": "class", + "line": 3, + "mass": 107, + "fingerprint": "class(id id body_statement(method(id id body_statement(instance_variable:@opened =:= bool) id) method(id id body_statement(instance_variable:@opened =:= bool) id) method(id id method_parameters((:( id ):)) body_statement(id if_modifier(id id call(id .:. id))) id) method(id id method_parameters((:( id ):)) body_statement(unless_modifier(id id id) call(id &.:&. id)) id) method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id) method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)) id)", + "child_fingerprints": [ + "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "method(id id body_statement(instance_variable:@opened =:= bool) id)", + "method(id id method_parameters((:( id ):)) body_statement(id if_modifier(id id call(id .:. id))) id)", + "method(id id method_parameters((:( id ):)) body_statement(unless_modifier(id id id) call(id &.:&. id)) id)", + "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)", + "method(id id method_parameters((:( id ):)) body_statement(assignment(id =:= call(call(id .:. id) .:. id)) call(id argument_list((:( id ):))) id) id)" + ], + "child_masses": [ + 8, + 8, + 17, + 17, + 26, + 26 + ] + }, + { + "method_name": "guard", + "node_name": "defn", + "line": 17, + "mass": 17, + "fingerprint": "method(id id method_parameters((:( id ):)) body_statement(unless_modifier(id id id) call(id &.:&. id)) id)", + "child_fingerprints": [ + "unless_modifier(id id id)", + "call(id &.:&. id)" + ], + "child_masses": [ + 4, + 4 + ] + } + ], + "redundant_nil_guards": [ + { + "defn": "guard", + "line": 20, + "local": "value", + "guard": "value&.name", + "proof": "value is already proven non-nil on this path" + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json b/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json new file mode 100644 index 000000000..c3fe62e89 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-semantic_effects.json @@ -0,0 +1,199 @@ +{ + "syntax": { + "functions": [ + { + "name": "mutate", + "owner": "SourceFactSemanticEffects", + "line": 11, + "visibility": "public", + "params": [ + "target", + "value" + ] + }, + { + "name": "shape_hash", + "owner": "SourceFactSemanticEffects", + "line": 16, + "visibility": "public", + "params": [ + "data" + ] + }, + { + "name": "perform", + "owner": "SourceFactSemanticEffects", + "line": 4, + "visibility": "public", + "params": [ + "callback", + "name" + ] + } + ], + "calls": [ + { + "receiver": "Hash", + "message": "new", + "function": "shape_hash", + "line": 19, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "0" + ] + }, + { + "receiver": "self", + "message": "send", + "function": "perform", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + ":audit", + "name" + ] + }, + { + "receiver": "self", + "message": "puts", + "function": "perform", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "name" + ] + }, + { + "receiver": "callback", + "message": "call", + "function": "perform", + "line": 6, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "name" + ] + }, + { + "receiver": "target", + "message": "items", + "function": "mutate", + "line": 13, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + + ] + }, + { + "receiver": "Hash", + "message": "new", + "function": "shape_hash", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [ + + ] + }, + { + "receiver": "data", + "message": "each", + "function": "shape_hash", + "line": 20, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [ + + ] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "$source_fact_seen", + "function": "perform", + "line": 8 + }, + { + "receiver": "data", + "field": "each", + "function": "shape_hash", + "line": 20 + }, + { + "receiver": "target", + "field": "items", + "function": "mutate", + "line": 13 + } + ], + "semantic_effects": [ + { + "kind": "context_dependency", + "detail": "$source_fact_seen", + "function": "perform", + "line": 8 + }, + { + "kind": "hidden_mutation", + "detail": "<<", + "function": "mutate", + "line": 13 + }, + { + "kind": "hidden_mutation", + "detail": "[]=", + "function": "mutate", + "line": 12 + }, + { + "kind": "hidden_mutation", + "detail": "[]=", + "function": "shape_hash", + "line": 18 + }, + { + "kind": "dynamic_dispatch", + "detail": "callback.call", + "function": "perform", + "line": 6 + }, + { + "kind": "hidden_mutation", + "detail": "op-assign", + "function": "shape_hash", + "line": 20 + }, + { + "kind": "hidden_io", + "detail": "puts", + "function": "perform", + "line": 5 + }, + { + "kind": "dynamic_dispatch", + "detail": "send", + "function": "perform", + "line": 7 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-sequence_call_edges.json b/gems/decomplex/examples/source-facts/oracles/ruby-sequence_call_edges.json new file mode 100644 index 000000000..723b9cfc2 --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-sequence_call_edges.json @@ -0,0 +1,344 @@ +{ + "syntax": { + "calls": [ + { + "receiver": "arms", + "message": "map", + "function": "symbol_proc_maps", + "line": 16, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "&:category" + ] + }, + { + "receiver": "C.classify_file(rsf.path, f.path)", + "message": "map", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "&:category" + ] + }, + { + "receiver": "C.classify_file(rsf.path, f.path, diagnostic_mids: [:report_invalid_input!])", + "message": "map", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "&:category" + ] + }, + { + "receiver": "self", + "message": "assert_empty", + "function": "assertion_single_call_argument", + "line": 10, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "C", + "classify_file", + "(coverage, file, root: dir)" + ] + }, + { + "receiver": "self", + "message": "assert_nil", + "function": "assertion_single_call_argument", + "line": 11, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "SlopCop::DecomplexVerdict", + "lookup", + "(v, path, \"plain\", 2)" + ] + }, + { + "receiver": "self", + "message": "with_env", + "function": "assertion_single_call_argument", + "line": 9, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [ + "\"DECOMPLEX_PARSER\"", + "\"tree_sitter\"" + ] + }, + { + "receiver": "result", + "message": "dig", + "function": "assertion_argument_calls", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "\"locations\"", + "0", + "\"physicalLocation\"", + "\"artifactLocation\"", + "\"uri\"" + ] + }, + { + "receiver": "self", + "message": "assert_equal", + "function": "assertion_argument_calls", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "\"uri\"", + "result.dig(\"locations\", 0, \"physicalLocation\", \"artifactLocation\", \"uri\")" + ] + }, + { + "receiver": "File", + "message": "file?", + "function": "chained_multiline", + "line": 26, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "abs" + ] + }, + { + "receiver": "self", + "message": "overlay_arm", + "function": "chained_multiline", + "line": 34, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "arm", + "repo" + ] + }, + { + "receiver": "self", + "message": "repo_relative", + "function": "chained_multiline", + "line": 24, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "file", + "repo" + ] + }, + { + "receiver": "self", + "message": "Array", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "files" + ] + }, + { + "receiver": "File", + "message": "expand_path", + "function": "chained_multiline", + "line": 25, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "rel", + "repo" + ] + }, + { + "receiver": "Classifier", + "message": "classify_file", + "function": "chained_multiline", + "line": 28, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [ + "resultset", + "abs", + "root: repo", + "ffi_boundary: ffi_boundary", + "diagnostic_mids: diagnostic_mids" + ] + }, + { + "receiver": "C", + "message": "classify_file", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "rsf.path", + "f.path", + "diagnostic_mids: [:report_invalid_input!]" + ] + }, + { + "receiver": "C", + "message": "classify_file", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "rsf.path", + "f.path" + ] + }, + { + "receiver": "Array(files).flat_map do |file| rel = repo_relative(file, repo) abs = File.expand_path(rel, repo) next [] unless File.file?(abs) Classifier.classify_file( resultset, abs, root: repo, ffi_boundary: ffi_boundary, diagnostic_mids: diagnostic_mids ).map { |arm| overlay_arm(arm, repo) } end", + "message": "compact", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "arms", + "function": "chained_multiline", + "line": 38, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "f", + "message": "path", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "rsf", + "message": "path", + "function": "symbol_proc_maps", + "line": 17, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "f", + "message": "path", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "rsf", + "message": "path", + "function": "symbol_proc_maps", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "arm[\"line\"]", + "message": "to_i", + "function": "chained_multiline", + "line": 36, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "Array(files)", + "message": "flat_map", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "Array(files).flat_map do |file| rel = repo_relative(file, repo) abs = File.expand_path(rel, repo) next [] unless File.file?(abs) Classifier.classify_file( resultset, abs, root: repo, ffi_boundary: ffi_boundary, diagnostic_mids: diagnostic_mids ).map { |arm| overlay_arm(arm, repo) } end.compact", + "message": "sort_by", + "function": "chained_multiline", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "Classifier.classify_file( resultset, abs, root: repo, ffi_boundary: ffi_boundary, diagnostic_mids: diagnostic_mids )", + "message": "map", + "function": "chained_multiline", + "line": 28, + "conditional": true, + "control": "iterates", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-slopcop_parity_edges.json b/gems/decomplex/examples/source-facts/oracles/ruby-slopcop_parity_edges.json new file mode 100644 index 000000000..edbb71a4c --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-slopcop_parity_edges.json @@ -0,0 +1,285 @@ +{ + "syntax": { + "calls": [ + { + "receiver": "gaps", + "message": "first", + "function": "report", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "@top" + ] + }, + { + "receiver": "self", + "message": "source_path?", + "function": "scan", + "line": 13, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "path" + ] + }, + { + "receiver": "self", + "message": "Array", + "function": "scan", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "paths" + ] + }, + { + "receiver": "self", + "message": "Array", + "function": "scan", + "line": 13, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [ + "paths" + ] + }, + { + "receiver": "self", + "message": "emit", + "function": "guarded_emit", + "line": 23, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "source" + ] + }, + { + "receiver": "x[:detectors]", + "message": "to_a", + "function": "report", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "dup", + "message": "positive?", + "function": "report", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "gaps", + "message": "size", + "function": "report", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "source", + "function": "guarded_emit", + "line": 20, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "evidence", + "message": "covered?", + "function": "guarded_emit", + "line": 21, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "Array(paths)", + "message": "empty?", + "function": "scan", + "line": 12, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "gaps", + "message": "reject", + "function": "report", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "flagged", + "message": "count", + "function": "report", + "line": 6, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "gaps.first(@top)", + "message": "map", + "function": "report", + "line": 7, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": true, + "arguments": [] + }, + { + "receiver": "Array(paths)", + "message": "select", + "function": "scan", + "line": 13, + "conditional": true, + "control": "conditional", + "safe_navigation": false, + "block": true, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "@top", + "function": "report", + "line": 7 + }, + { + "receiver": "flagged", + "field": "count", + "function": "report", + "line": 6 + }, + { + "receiver": "evidence", + "field": "covered?", + "function": "guarded_emit", + "line": 21 + }, + { + "receiver": "Array(paths)", + "field": "empty?", + "function": "scan", + "line": 12 + }, + { + "receiver": "gaps.first(@top)", + "field": "map", + "function": "report", + "line": 7 + }, + { + "receiver": "dup", + "field": "positive?", + "function": "report", + "line": 8 + }, + { + "receiver": "gaps", + "field": "reject", + "function": "report", + "line": 5 + }, + { + "receiver": "Array(paths)", + "field": "select", + "function": "scan", + "line": 13 + }, + { + "receiver": "gaps", + "field": "size", + "function": "report", + "line": 8 + }, + { + "receiver": "x[:detectors]", + "field": "to_a", + "function": "report", + "line": 5 + } + ], + "branch_decisions": [ + { + "function": "guarded_emit", + "line": 21, + "predicate": "evidence.covered?", + "state_refs": [ + "evidence.covered?" + ] + }, + { + "function": "scan", + "line": 12, + "predicate": "paths && !Array(paths).empty?", + "state_refs": [ + "Array(paths).empty?" + ] + } + ], + "local_complexity_scores": [ + { + "id": "SourceFactSlopcopParityEdges#guarded_emit", + "score": 3.7, + "signals": { + "boolean_ops": 0, + "branches": 2, + "early_exits": 2 + } + }, + { + "id": "SourceFactSlopcopParityEdges#report", + "score": 0.0, + "signals": {} + }, + { + "id": "SourceFactSlopcopParityEdges#scan", + "score": 1.9, + "signals": { + "boolean_ops": 2, + "branches": 1 + } + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-state_reads.json b/gems/decomplex/examples/source-facts/oracles/ruby-state_reads.json new file mode 100644 index 000000000..f6c517f7e --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-state_reads.json @@ -0,0 +1,93 @@ +{ + "syntax": { + "functions": [ + { + "name": "initialize", + "owner": "SourceFactStateReads", + "line": 4, + "visibility": "public", + "params": [ + "user" + ] + }, + { + "name": "inspect_profile", + "owner": "SourceFactStateReads", + "line": 9, + "visibility": "public", + "params": [ + "account" + ] + } + ], + "calls": [ + { + "receiver": "@user.profile", + "message": "name", + "function": "inspect_profile", + "line": 10, + "conditional": false, + "control": "always", + "safe_navigation": true, + "block": false, + "arguments": [] + }, + { + "receiver": "@user", + "message": "profile", + "function": "inspect_profile", + "line": 10, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "account", + "message": "active?", + "function": "inspect_profile", + "line": 12, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_reads": [ + { + "receiver": "self", + "field": "@status", + "function": "inspect_profile", + "line": 11 + }, + { + "receiver": "self", + "field": "@user", + "function": "inspect_profile", + "line": 10 + }, + { + "receiver": "account", + "field": "active?", + "function": "inspect_profile", + "line": 12 + } + ], + "state_writes": [ + { + "receiver": "self", + "field": "@status", + "function": "initialize", + "line": 6 + }, + { + "receiver": "self", + "field": "@user", + "function": "initialize", + "line": 5 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/oracles/ruby-visibility.json b/gems/decomplex/examples/source-facts/oracles/ruby-visibility.json new file mode 100644 index 000000000..b879d956b --- /dev/null +++ b/gems/decomplex/examples/source-facts/oracles/ruby-visibility.json @@ -0,0 +1,86 @@ +{ + "syntax": { + "functions": [ + { + "name": "prepare", + "owner": "SourceFactVisibility", + "line": 10, + "visibility": "private", + "params": [] + }, + { + "name": "inline_guard", + "owner": "SourceFactVisibility", + "line": 14, + "visibility": "private", + "params": [] + }, + { + "name": "public_step", + "owner": "SourceFactVisibility", + "line": 4, + "visibility": "public", + "params": [] + } + ], + "calls": [ + { + "receiver": "self", + "message": "private", + "function": "(top-level)", + "line": 18, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + ":inline_guard" + ] + }, + { + "receiver": "self", + "message": "protected", + "function": "(top-level)", + "line": 14, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [ + "inline_guard", + "true" + ] + }, + { + "receiver": "self", + "message": "private", + "function": "(top-level)", + "line": 8, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + }, + { + "receiver": "self", + "message": "prepare", + "function": "public_step", + "line": 5, + "conditional": false, + "control": "always", + "safe_navigation": false, + "block": false, + "arguments": [] + } + ], + "state_writes": [ + { + "receiver": "self", + "field": "@ready", + "function": "prepare", + "line": 11 + } + ] + } +} diff --git a/gems/decomplex/examples/source-facts/ruby/block_receiver_calls.rb b/gems/decomplex/examples/source-facts/ruby/block_receiver_calls.rb new file mode 100644 index 000000000..d120c2039 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/block_receiver_calls.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +class SourceFactBlockReceiverCalls + def collect(items) + names = [] + items.flat_map do |item| + names << item.name + item.children.each { |child| child.name } + end + names + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/branch_predicate_paths.rb b/gems/decomplex/examples/source-facts/ruby/branch_predicate_paths.rb new file mode 100644 index 000000000..34f163cbb --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/branch_predicate_paths.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +class SourceFactBranchPredicatePaths + def ready? + @status == :ready + end + + def route(user) + if @status == :ready && user.active? + publish(:ready) + else + warn("not ready") + end + + case user.role + when "admin" + audit(user) + when "guest" + fallback(user) + else + default(user) + end + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/local_flow_edges.rb b/gems/decomplex/examples/source-facts/ruby/local_flow_edges.rb new file mode 100644 index 000000000..6fb8dbeac --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/local_flow_edges.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class SourceFactLocalFlowEdges + def build(sections, audit, grammar) + rows = [] + skip "missing grammar" unless grammar && File.file?(grammar) + + sections.each do |title, findings| + next unless findings + + findings.each do |finding| + file, meth, = parse_loc(finding.loc) + next unless file && !file.empty? && meth && !meth.empty? + + key = [file, meth] + rows << "| #{key.join(":")} | #{audit.findings.size} |" + end + end + + assert_empty audit.findings + rows + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/local_methods_contracts.rb b/gems/decomplex/examples/source-facts/ruby/local_methods_contracts.rb new file mode 100644 index 000000000..308a2062f --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/local_methods_contracts.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class SourceFactLocalMethodsContracts + def process(user, items) + profile = user.profile + names = [] + items.each do |item| + if item.ready? + names << item.name + end + end + profile.name if names.any? + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/locals_not_state.rb b/gems/decomplex/examples/source-facts/ruby/locals_not_state.rb new file mode 100644 index 000000000..a4cf11cc9 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/locals_not_state.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class SourceFactLocalsNotState + def build(values, config) + key = "HOME" + path = ENV[key] + total = 0 + values.each do |value| + total = total + value + end + config[:path] = path + assert_empty values + [path, total, config] + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/protocols_nil_clone.rb b/gems/decomplex/examples/source-facts/ruby/protocols_nil_clone.rb new file mode 100644 index 000000000..4560a1323 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/protocols_nil_clone.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +class SourceFactProtocolsNilClone + def open + @opened = true + end + + def close + @opened = false + end + + def run(item) + open + close if item.ready? + end + + def guard(value) + return unless value + + value&.name + end + + def clone_left(user) + data = user.profile.name + audit(data) + data + end + + def clone_right(account) + data = account.profile.name + audit(data) + data + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb b/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb new file mode 100644 index 000000000..315a4cc9e --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/semantic_effects.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class SourceFactSemanticEffects + def perform(callback, name) + puts name + callback.call(name) + send(:audit, name) + $source_fact_seen + end + + def mutate(target, value) + target[:name] = value + target.items << value + end + + def shape_hash(data) + schema = { "$schema" => "https://example.test/schema.json" } + buckets = Hash.new { |hash, key| hash[key] = [] } + totals = Hash.new(0) + data.each { |key, count| totals[key] += count } + [schema, buckets, totals] + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/sequence_call_edges.rb b/gems/decomplex/examples/source-facts/ruby/sequence_call_edges.rb new file mode 100644 index 000000000..2dd7a9226 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/sequence_call_edges.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +class SourceFactSequenceCallEdges + def assertion_argument_calls(result) + assert_equal "uri", result.dig("locations", 0, "physicalLocation", "artifactLocation", "uri") + end + + def assertion_single_call_argument(coverage, file, dir, v, path) + with_env("DECOMPLEX_PARSER", "tree_sitter") do + assert_empty C.classify_file(coverage, file, root: dir) + assert_nil SlopCop::DecomplexVerdict.lookup(v, path, "plain", 2) + end + end + + def symbol_proc_maps(arms, rsf, f) + arms.map(&:category) + C.classify_file(rsf.path, f.path).map(&:category) + C.classify_file(rsf.path, f.path, + diagnostic_mids: [:report_invalid_input!]).map(&:category) + end + + def chained_multiline(files, resultset, repo, ffi_boundary, diagnostic_mids) + arms = Array(files).flat_map do |file| + rel = repo_relative(file, repo) + abs = File.expand_path(rel, repo) + next [] unless File.file?(abs) + + Classifier.classify_file( + resultset, + abs, + root: repo, + ffi_boundary: ffi_boundary, + diagnostic_mids: diagnostic_mids + ).map { |arm| overlay_arm(arm, repo) } + end.compact.sort_by do |arm| + [arm["file"], arm["line"].to_i, arm["method"].to_s, arm["arm_category"].to_s] + end + arms + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/slopcop_parity_edges.rb b/gems/decomplex/examples/source-facts/ruby/slopcop_parity_edges.rb new file mode 100644 index 000000000..91aa19ce4 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/slopcop_parity_edges.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +class SourceFactSlopcopParityEdges + def report(gaps) + flagged = gaps.reject { |x| x[:detectors].to_a.empty? } + dup = flagged.count { |x| x[:coarse_dup] } + gaps.first(@top).map { |x| x[:file] } + "#{gaps.size} #{dup.positive? ? @top : 0}" + end + + def scan(paths) + if paths && !Array(paths).empty? + Array(paths).select { |path| source_path?(path) } + else + [] + end + end + + def guarded_emit(source, evidence) + return unless source + return if evidence.covered? + + emit(source) + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/state_reads.rb b/gems/decomplex/examples/source-facts/ruby/state_reads.rb new file mode 100644 index 000000000..7cd6cd6c7 --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/state_reads.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class SourceFactStateReads + def initialize(user) + @user = user + @status = :idle + end + + def inspect_profile(account) + name = @user.profile&.name + status = @status + account.active? && status == :idle && name + end +end diff --git a/gems/decomplex/examples/source-facts/ruby/visibility.rb b/gems/decomplex/examples/source-facts/ruby/visibility.rb new file mode 100644 index 000000000..96e21a95c --- /dev/null +++ b/gems/decomplex/examples/source-facts/ruby/visibility.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +class SourceFactVisibility + def public_step + prepare + end + + private + + def prepare + @ready = true + end + + protected def inline_guard + true + end + + private :inline_guard +end diff --git a/gems/decomplex/examples/swift/co-update.swift b/gems/decomplex/examples/swift/co-update.swift new file mode 100644 index 000000000..389d85727 --- /dev/null +++ b/gems/decomplex/examples/swift/co-update.swift @@ -0,0 +1,4 @@ +func stable_one(node: Node) { node.storage = 1; node.provenance = 1 } +func stable_two(node: Node) { node.storage = 1; node.provenance = 1 } +func stable_three(node: Node) { node.storage = 1; node.provenance = 1 } +func misses_provenance(node: Node) { node.storage = 1 } diff --git a/gems/decomplex/examples/swift/decision-pressure.swift b/gems/decomplex/examples/swift/decision-pressure.swift new file mode 100644 index 000000000..2edc2cfda --- /dev/null +++ b/gems/decomplex/examples/swift/decision-pressure.swift @@ -0,0 +1 @@ +func scan(node: Node) -> Bool { let value = node.symbol; return value.isNull() } diff --git a/gems/decomplex/examples/swift/derived-state.swift b/gems/decomplex/examples/swift/derived-state.swift new file mode 100644 index 000000000..53abac1f0 --- /dev/null +++ b/gems/decomplex/examples/swift/derived-state.swift @@ -0,0 +1 @@ +func check(inputValue: Int) { var input = inputValue; let cached = input + 1; input = 2; print(cached) } diff --git a/gems/decomplex/examples/swift/false-simplicity.swift b/gems/decomplex/examples/swift/false-simplicity.swift new file mode 100644 index 000000000..7afa201cf --- /dev/null +++ b/gems/decomplex/examples/swift/false-simplicity.swift @@ -0,0 +1 @@ +class FalseSimplicityExample { func hack() { print("hidden IO") } } diff --git a/gems/decomplex/examples/swift/fat-union.swift b/gems/decomplex/examples/swift/fat-union.swift new file mode 100644 index 000000000..c55414822 --- /dev/null +++ b/gems/decomplex/examples/swift/fat-union.swift @@ -0,0 +1,7 @@ +func handle(node: Node) { + switch node { + case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv() + case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name() + case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value() + } +} diff --git a/gems/decomplex/examples/swift/flay-similarity.swift b/gems/decomplex/examples/swift/flay-similarity.swift new file mode 100644 index 000000000..e731d0d82 --- /dev/null +++ b/gems/decomplex/examples/swift/flay-similarity.swift @@ -0,0 +1,73 @@ +func first_clone(node: Node) -> Int { + var total = 0 + let value1 = node.part1 + if value1.ready() && value1.enabled() { + total += value1.amount + } + let value2 = node.part2 + if value2.ready() && value2.enabled() { + total += value2.amount + } + let value3 = node.part3 + if value3.ready() && value3.enabled() { + total += value3.amount + } + let value4 = node.part4 + if value4.ready() && value4.enabled() { + total += value4.amount + } + let value5 = node.part5 + if value5.ready() && value5.enabled() { + total += value5.amount + } + let value6 = node.part6 + if value6.ready() && value6.enabled() { + total += value6.amount + } + let value7 = node.part7 + if value7.ready() && value7.enabled() { + total += value7.amount + } + let value8 = node.part8 + if value8.ready() && value8.enabled() { + total += value8.amount + } + return total +} + +func second_clone(entry: Node) -> Int { + var total = 0 + let item1 = entry.part1 + if item1.ready() && item1.enabled() { + total += item1.amount + } + let item2 = entry.part2 + if item2.ready() && item2.enabled() { + total += item2.amount + } + let item3 = entry.part3 + if item3.ready() && item3.enabled() { + total += item3.amount + } + let item4 = entry.part4 + if item4.ready() && item4.enabled() { + total += item4.amount + } + let item5 = entry.part5 + if item5.ready() && item5.enabled() { + total += item5.amount + } + let item6 = entry.part6 + if item6.ready() && item6.enabled() { + total += item6.amount + } + let item7 = entry.part7 + if item7.ready() && item7.enabled() { + total += item7.amount + } + let item8 = entry.part8 + if item8.ready() && item8.enabled() { + total += item8.amount + } + return total +} diff --git a/gems/decomplex/examples/swift/function-lcom.swift b/gems/decomplex/examples/swift/function-lcom.swift new file mode 100644 index 000000000..d2adc99af --- /dev/null +++ b/gems/decomplex/examples/swift/function-lcom.swift @@ -0,0 +1,12 @@ +func mixed(price: Int, tax: Int, logger: Logger) -> Result { + let subtotal = price + tax + let total = subtotal * 2 + let rounded = total.round() + + let timestamp = now() + let buffer = Buffer.init() + buffer.push(timestamp) + logger.info(buffer) + + return Result.init(rounded, buffer) +} diff --git a/gems/decomplex/examples/swift/implicit-control-flow.swift b/gems/decomplex/examples/swift/implicit-control-flow.swift new file mode 100644 index 000000000..fcfb6ba52 --- /dev/null +++ b/gems/decomplex/examples/swift/implicit-control-flow.swift @@ -0,0 +1,13 @@ +class FlowExample { + var status = 0 + var valid = false + var done = false + func prepare() { self.status = 1 } + func validate() { self.valid = self.status == 1 } + func commit() { self.done = self.valid } + func ok1() { self.prepare(); self.validate(); self.commit() } + func ok2() { self.prepare(); self.validate(); self.commit() } + func ok3() { self.prepare(); self.validate(); self.commit() } + func ok4() { self.prepare(); self.validate(); self.commit() } + func drift() { self.validate(); self.prepare(); self.commit() } +} diff --git a/gems/decomplex/examples/swift/inconsistent-rename-clone.swift b/gems/decomplex/examples/swift/inconsistent-rename-clone.swift new file mode 100644 index 000000000..696feba3a --- /dev/null +++ b/gems/decomplex/examples/swift/inconsistent-rename-clone.swift @@ -0,0 +1,2 @@ +func original() { let src = fetch(1); check(src); store(src); finalize(src) } +func pasted() { let dst = fetch(2); check(dst); store(src); finalize(dst) } diff --git a/gems/decomplex/examples/swift/local-flow.swift b/gems/decomplex/examples/swift/local-flow.swift new file mode 100644 index 000000000..375727f9d --- /dev/null +++ b/gems/decomplex/examples/swift/local-flow.swift @@ -0,0 +1,9 @@ +func mixed(price: Int, tax: Int) -> Result { + let subtotal = price + tax + let total = subtotal.round() + + let timestamp = now() + let buffer = Buffer.init() + buffer.push(timestamp) + return Result.init(total, buffer) +} diff --git a/gems/decomplex/examples/swift/locality-drag.swift b/gems/decomplex/examples/swift/locality-drag.swift new file mode 100644 index 000000000..d73827fe1 --- /dev/null +++ b/gems/decomplex/examples/swift/locality-drag.swift @@ -0,0 +1,27 @@ +func run(user: User, cart: Cart, logger: Logger) { + let receipt_id = user.id + + let total = cart.total + if total > 100 { + if cart.discountable() { + let discount = 10 + } + } + if cart.taxable() { + if cart.region { + let tax = total * 2 + } + } + if logger.enabled() { + if logger.debug() { + logger.info(total) + } + } + if cart.valid() { + if cart.ready() { + let status = 1 + } + } + + emit(receipt_id) +} diff --git a/gems/decomplex/examples/swift/miner.swift b/gems/decomplex/examples/swift/miner.swift new file mode 100644 index 000000000..2c4f863ad --- /dev/null +++ b/gems/decomplex/examples/swift/miner.swift @@ -0,0 +1,4 @@ +func one(a: Bool,b: Bool,c: Bool) -> Bool { return a && b && c } +func two(a: Bool,b: Bool,c: Bool) -> Bool { return a && b && c } +func three(a: Bool,b: Bool,c: Bool) -> Bool { return a && b && c } +func broken(a: Bool,b: Bool) -> Bool { return a && b } diff --git a/gems/decomplex/examples/swift/operational-discontinuity.swift b/gems/decomplex/examples/swift/operational-discontinuity.swift new file mode 100644 index 000000000..837d4b336 --- /dev/null +++ b/gems/decomplex/examples/swift/operational-discontinuity.swift @@ -0,0 +1,9 @@ +func phase_shift() { + let a = 1 + let b = 2 + + // Phase 2 + let x = 3 + let y = 4 + print(x); print(y) +} diff --git a/gems/decomplex/examples/swift/oversized-predicate.swift b/gems/decomplex/examples/swift/oversized-predicate.swift new file mode 100644 index 000000000..e0128fcc0 --- /dev/null +++ b/gems/decomplex/examples/swift/oversized-predicate.swift @@ -0,0 +1 @@ +func complex_check(a: Bool,b: Bool,c: Bool,d: Bool) { if a && b && c && d { print("too big") } } diff --git a/gems/decomplex/examples/swift/path-condition.swift b/gems/decomplex/examples/swift/path-condition.swift new file mode 100644 index 000000000..4f0e26ef0 --- /dev/null +++ b/gems/decomplex/examples/swift/path-condition.swift @@ -0,0 +1,4 @@ +func one(x: X,y: Y,z: Z) { if x.p() && y.q() && z.r() { go(x) } } +func two(x: X,y: Y,z: Z) { if x.p() && y.q() && z.r() { go(x) } } +func three(x: X,y: Y,z: Z) { if x.p() && y.q() && z.r() { go(x) } } +func bug(x: X,y: Y,z: Z) { if x.p() && y.q() { go(x) } } diff --git a/gems/decomplex/examples/swift/predicate-alias.swift b/gems/decomplex/examples/swift/predicate-alias.swift new file mode 100644 index 000000000..d00c8d1dd --- /dev/null +++ b/gems/decomplex/examples/swift/predicate-alias.swift @@ -0,0 +1,3 @@ +func first() -> Bool { return true } +func second() -> Bool { return true } +func other() -> Bool { return false } diff --git a/gems/decomplex/examples/swift/redundant-nil-guard.swift b/gems/decomplex/examples/swift/redundant-nil-guard.swift new file mode 100644 index 000000000..b288620f3 --- /dev/null +++ b/gems/decomplex/examples/swift/redundant-nil-guard.swift @@ -0,0 +1 @@ +func check(value: Value) { if value.isSome() { value.isNull() } } diff --git a/gems/decomplex/examples/swift/semantic-alias.swift b/gems/decomplex/examples/swift/semantic-alias.swift new file mode 100644 index 000000000..178244e76 --- /dev/null +++ b/gems/decomplex/examples/swift/semantic-alias.swift @@ -0,0 +1,4 @@ +func frame(node: Node) -> Bool { return node.provenance == FRAME } +func is_frame(node: Node) -> Bool { return provenance == FRAME } +func heap(node: Node) -> Bool { return node.provenance == HEAP } +func somewhere(node: Node) -> Int { if node.provenance == FRAME { return 1 }; return 0 } diff --git a/gems/decomplex/examples/swift/sequence-mine.swift b/gems/decomplex/examples/swift/sequence-mine.swift new file mode 100644 index 000000000..bd72413da --- /dev/null +++ b/gems/decomplex/examples/swift/sequence-mine.swift @@ -0,0 +1,5 @@ +func one() { alloc_mark(x); body1(); cleanup(x) } +func two() { alloc_mark(y); body2(); cleanup(y) } +func three() { alloc_mark(z); body3(); cleanup(z) } +func four() { alloc_mark(w); body4(); cleanup(w) } +func leak() { alloc_mark(q); use_value(q) } diff --git a/gems/decomplex/examples/swift/state-branch-density.swift b/gems/decomplex/examples/swift/state-branch-density.swift new file mode 100644 index 000000000..34451cf30 --- /dev/null +++ b/gems/decomplex/examples/swift/state-branch-density.swift @@ -0,0 +1,13 @@ +class StateBranchChecker { + var checked = false + + func check(admin: Bool, name: String) { + if admin { + self.checked = true + } + + if self.checked && name == "admin" { + print("hello") + } + } +} diff --git a/gems/decomplex/examples/swift/state-mesh.swift b/gems/decomplex/examples/swift/state-mesh.swift new file mode 100644 index 000000000..0faaf547a --- /dev/null +++ b/gems/decomplex/examples/swift/state-mesh.swift @@ -0,0 +1 @@ +class StateMeshExample { var a = 0; var b = 0; func initialize() { self.a = 1; self.b = 2 } func writer() { self.a = 3 } func reader() -> Int { return self.a + self.b } func a_alias() -> Int { return self.a } } diff --git a/gems/decomplex/examples/swift/structural-topology.swift b/gems/decomplex/examples/swift/structural-topology.swift new file mode 100644 index 000000000..b12f247d8 --- /dev/null +++ b/gems/decomplex/examples/swift/structural-topology.swift @@ -0,0 +1,16 @@ +class Worker { + func run(items: Items) { + self.prepare() + if self.ready() { + self.validate() + } + for item in items { + self.helper(item: item) + } + } + + private func prepare() {} + private func ready() -> Bool { return true } + func validate() {} + private func helper(item: Item) { item.use() } +} diff --git a/gems/decomplex/examples/swift/temporal-ordering-pressure.swift b/gems/decomplex/examples/swift/temporal-ordering-pressure.swift new file mode 100644 index 000000000..775acd72d --- /dev/null +++ b/gems/decomplex/examples/swift/temporal-ordering-pressure.swift @@ -0,0 +1 @@ +class TemporalOrderExample { var a = 0; var b = 0; func one() { self.a = 1 } func two() { self.a = 2; self.b = 3 } func three() { self.b = 4 } func reader() -> Int { return self.a } } diff --git a/gems/decomplex/examples/swift/weighted-inlined-complexity.swift b/gems/decomplex/examples/swift/weighted-inlined-complexity.swift new file mode 100644 index 000000000..98e6579b2 --- /dev/null +++ b/gems/decomplex/examples/swift/weighted-inlined-complexity.swift @@ -0,0 +1,5 @@ +func checkout(user: User, cart: Cart) { validate_user(user: user); apply_discount(cart: cart); process_payment(user: user, cart: cart); audit_cart(cart: cart) } +func validate_user(user: User) -> Bool { if user.active() && !user.suspended() { if user.profile.complete() { return true } else { return false } } else { return false } } +func apply_discount(cart: Cart) -> Int { if cart.total > 100 && eligible() { if holiday() { return 20 } else if loyalty_month() { return 15 } else { return 10 } }; return 0 } +func process_payment(user: User, cart: Cart) { if gateway.ready() { if cart.total > 0 && user.active() { if fraud_check(user) { charge(user, cart) } else { decline(user) } } } } +func audit_cart(cart: Cart) { for item in cart.items { if item.taxable() { if item.region && item.amount > 0 { record_tax(item) } } } } diff --git a/gems/decomplex/examples/syntax-facts/c/core.c b/gems/decomplex/examples/syntax-facts/c/core.c new file mode 100644 index 000000000..cde7856d5 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/c/core.c @@ -0,0 +1,51 @@ +typedef enum { + STATUS_IDLE, + STATUS_BUSY +} Status; + +typedef struct CSyntaxFactsCore { + Status status; + int count; + Sink *sink; +} CSyntaxFactsCore; + +void CSyntaxFactsCore_process(CSyntaxFactsCore *self, User *user, Item **items, int item_count, Callback callback) { + const char *name = user->profile->name; + Account account = make_account(name, user->active); + callback(&account); + + switch (user->role) { + case ROLE_OWNER: + case ROLE_ADMIN: + escalate(self, user); + break; + case ROLE_GUEST: + fallback(self, user); + break; + default: + default_case(self, user); + break; + } + + if (self->status == STATUS_IDLE && user->ready) { + self->count += 1; + publish(self, STATUS_BUSY); + } else { + warn("not ready"); + } + + for (int i = 0; i < item_count; i++) { + item_children(items[i]); + } +} + +static Status CSyntaxFactsCore_audit(CSyntaxFactsCore *self, const char *name) { + puts(name); + sink_send(self->sink, "record", name); + return self->status; +} + +int CSyntaxFactsCore_ready(CSyntaxFactsCore *self) { + return self->count > 0; +} + diff --git a/gems/decomplex/examples/syntax-facts/cpp/core.cpp b/gems/decomplex/examples/syntax-facts/cpp/core.cpp new file mode 100644 index 000000000..965fa11b4 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/cpp/core.cpp @@ -0,0 +1,61 @@ +#include +#include + +enum class Status { + Idle, + Busy +}; + +class CppSyntaxFactsCore { + Status status; + int count; + Sink *sink; + +public: + explicit CppSyntaxFactsCore(Status status, Sink *sink) + : status(status), count(0), sink(sink) {} + + std::string process(User &user, std::vector &items, Callback callback) { + std::string name = user.profile().name(); + Account account{name, user.active()}; + callback(account); + + switch (user.role()) { + case Role::Owner: + case Role::Admin: + escalate(user); + break; + case Role::Guest: + fallback(user); + break; + default: + defaultCase(user); + break; + } + + if (status == Status::Idle && user.ready()) { + count += 1; + publish(Status::Busy); + } else { + warn("not ready"); + } + + for (auto &item : items) { + item.children(); + } + + return name; + } + +private: + Status audit(const std::string &name) { + std::cout << name; + sink->send("record", name); + return status; + } + + bool ready() const { + return count > 0; + } +}; + diff --git a/gems/decomplex/examples/syntax-facts/csharp/core.cs b/gems/decomplex/examples/syntax-facts/csharp/core.cs new file mode 100644 index 000000000..168611ffe --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/csharp/core.cs @@ -0,0 +1,67 @@ +using System; +using System.Collections.Generic; + +class CSharpSyntaxFactsCore +{ + private Status status; + private int count; + private Sink sink; + + public CSharpSyntaxFactsCore(Status status, Sink sink) + { + this.status = status; + this.count = 0; + this.sink = sink; + } + + public string Process(User user, IEnumerable items, Action callback) + { + var name = user.Profile.Name; + var account = new Account(name, user.Active); + callback(account); + + switch (user.Role) + { + case "owner": + case "admin": + Escalate(user); + break; + case "guest": + Fallback(user); + break; + default: + DefaultCase(user); + break; + } + + if (this.status == Status.Idle && user.Ready) + { + this.count += 1; + Publish(Status.Busy); + } + else + { + Console.WriteLine("not ready"); + } + + foreach (var item in items) + { + item.Children(); + } + + return name; + } + + private Status Audit(string name) + { + Console.WriteLine(name); + sink.Send("record", name); + return status; + } + + private bool Ready() + { + return count > 0; + } +} + diff --git a/gems/decomplex/examples/syntax-facts/go/core.go b/gems/decomplex/examples/syntax-facts/go/core.go new file mode 100644 index 000000000..c33e2db33 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/go/core.go @@ -0,0 +1,81 @@ +package syntaxfacts + +type Status int + +const ( + Idle Status = iota + Busy +) + +type Profile struct { + Name string +} + +type User struct { + Role string + Ready bool + Active bool + Profile Profile +} + +type Account struct { + Name string + Active bool +} + +type GoSyntaxFactsCore struct { + status Status + count int + lookup map[string]int +} + +func NewGoSyntaxFactsCore(status Status) *GoSyntaxFactsCore { + return &GoSyntaxFactsCore{status: status, lookup: map[string]int{}} +} + +func (c *GoSyntaxFactsCore) Process(user User, items []string, callback func(Account)) string { + var first, second int = 1, 2 + _ = first + _ = second + + name := user.Profile.Name + account := Account{Name: name, Active: user.Active} + callback(account) + + switch user.Role { + case "owner", "admin": + c.escalate(user) + case "guest": + c.fallback(user) + default: + c.defaultCase(user) + } + + if c.status == Idle && user.Ready { + c.count += 1 + c.publish(Busy) + } else { + c.warn("not ready") + } + + for _, item := range items { + c.children(item) + } + + c.lookup[name] = c.count + go c.audit(name) + defer c.audit(name) + + return name +} + +func (c *GoSyntaxFactsCore) audit(name string) { + println(name) + c.send("record", name) + _ = c.status +} + +func (c GoSyntaxFactsCore) Ready() bool { + return c.count > 0 +} + diff --git a/gems/decomplex/examples/syntax-facts/java/core.java b/gems/decomplex/examples/syntax-facts/java/core.java new file mode 100644 index 000000000..d3b220bfc --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/java/core.java @@ -0,0 +1,53 @@ +package syntaxfacts; + +class JavaSyntaxFactsCore { + private Status status; + private int count; + + public JavaSyntaxFactsCore(Status status) { + this.status = status; + this.count = 0; + } + + public String process(User user, Iterable items, Callback callback) { + String name = user.profile().name(); + Account account = new Account(name, user.active()); + callback.call(account); + + switch (user.role()) { + case "owner": + case "admin": + this.escalate(user); + break; + case "guest": + this.fallback(user); + break; + default: + this.defaultCase(user); + } + + if (this.status == Status.IDLE && user.ready()) { + this.count += 1; + this.publish(Status.BUSY); + } else { + System.err.println("not ready"); + } + + for (Item item : items) { + item.children(); + } + + return name; + } + + private void audit(String name) { + System.out.println(name); + this.send("record", name); + this.status.name(); + } + + boolean ready() { + return this.count > 0; + } +} + diff --git a/gems/decomplex/examples/syntax-facts/javascript/core.js b/gems/decomplex/examples/syntax-facts/javascript/core.js new file mode 100644 index 000000000..8f06a5326 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/javascript/core.js @@ -0,0 +1,55 @@ +export class JavaScriptSyntaxFactsCore { + #status; + + constructor(status, sink) { + this.#status = status; + this.count = 0; + this.sink = sink; + } + + process(user, items, callback) { + const name = user?.profile?.name; + const account = { name, active: user.active }; + callback(account); + + switch (user.role) { + case "owner": + case "admin": + this.escalate(user); + break; + case "guest": + this.fallback(user); + break; + default: + this.defaultCase(user); + } + + if (this.#status === "idle" && user.ready) { + this.count += 1; + this.publish("busy"); + } else { + console.warn("not ready"); + } + + for (const index in items) { + this.#audit(items[index]); + } + + return name ?? null; + } + + #audit(name) { + console.log(name); + this.sink.send("record", name); + return this.#status; + } + + ready() { + return this.count > 0; + } +} + +export function normalizeValue(input) { + return input ?? null; +} + diff --git a/gems/decomplex/examples/syntax-facts/kotlin/core.kt b/gems/decomplex/examples/syntax-facts/kotlin/core.kt new file mode 100644 index 000000000..e02085ff8 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/kotlin/core.kt @@ -0,0 +1,46 @@ +package syntaxfacts + +class KotlinSyntaxFactsCore(private var status: Status, private val sink: Sink) { + private var count = 0 + + fun process(user: User, items: List, callback: (Account) -> Unit): String? { + val name = user.profile?.name + val account = Account(name, user.active) + callback(account) + + when (user.role) { + "owner", "admin" -> escalate(user) + "guest" -> fallback(user) + else -> defaultCase(user) + } + + if (status == Status.IDLE && user.ready) { + count += 1 + publish(Status.BUSY) + } else { + println("not ready") + } + + for (item in items) { + item.children() + } + + return name ?: "missing" + } + + private fun audit(name: String): Status { + println(name) + sink.send("record", name) + return status + } + + fun ready(): Boolean { + return count > 0 + } +} + +enum class Status { + IDLE, + BUSY +} + diff --git a/gems/decomplex/examples/syntax-facts/lua/core.lua b/gems/decomplex/examples/syntax-facts/lua/core.lua new file mode 100644 index 000000000..84ef82b3e --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/lua/core.lua @@ -0,0 +1,51 @@ +local LuaSyntaxFactsCore = {} +LuaSyntaxFactsCore.__index = LuaSyntaxFactsCore + +function LuaSyntaxFactsCore.new(status, sink) + local instance = { + status = status, + count = 0, + sink = sink + } + return setmetatable(instance, LuaSyntaxFactsCore) +end + +function LuaSyntaxFactsCore:process(user, items, callback) + local name = user.profile.name + local account = { name = name, active = user.active } + callback(account) + + if user.role == "owner" or user.role == "admin" then + self:escalate(user) + elseif user.role == "guest" then + self:fallback(user) + else + self:default_case(user) + end + + if self.status == "idle" and user.ready then + self.count = self.count + 1 + self:publish("busy") + else + print("not ready") + end + + for _, item in ipairs(items) do + item:children() + end + + return name or "missing" +end + +function LuaSyntaxFactsCore:audit(name) + print(name) + self.sink:send("record", name) + return self.status +end + +function LuaSyntaxFactsCore:ready() + return self.count > 0 +end + +return LuaSyntaxFactsCore + diff --git a/gems/decomplex/examples/syntax-facts/oracles/c-core.json b/gems/decomplex/examples/syntax-facts/oracles/c-core.json new file mode 100644 index 000000000..b5ea57a4e --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/c-core.json @@ -0,0 +1,555 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/c/core.c", + "language": "c", + "functions": [ + { + "line": 12, + "name": "CSyntaxFactsCore_process", + "owner": "CSyntaxFactsCore", + "params": [ + "self", + "user", + "items", + "item_count", + "callback" + ], + "span": [ + 12, + 0, + 40, + 1 + ], + "visibility": "public" + }, + { + "line": 42, + "name": "CSyntaxFactsCore_audit", + "owner": "CSyntaxFactsCore", + "params": [ + "self", + "name" + ], + "span": [ + 42, + 0, + 46, + 1 + ], + "visibility": "private" + }, + { + "line": 48, + "name": "CSyntaxFactsCore_ready", + "owner": "CSyntaxFactsCore", + "params": [ + "self" + ], + "span": [ + 48, + 0, + 50, + 1 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "struct", + "line": 6, + "name": "CSyntaxFactsCore", + "span": [ + 6, + 8, + 10, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "&account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_process", + "line": 15, + "message": "callback", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 15, + 2, + 15, + 20 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 34, + "message": "warn", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 4, + 34, + 21 + ] + }, + { + "arguments": [ + "items[i]" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "CSyntaxFactsCore_process", + "line": 38, + "message": "item_children", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 38, + 4, + 38, + 27 + ] + }, + { + "arguments": [ + "name", + "user->active" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_process", + "line": 14, + "message": "make_account", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 14, + 20, + 14, + 52 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_audit", + "line": 43, + "message": "puts", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 43, + 2, + 43, + 12 + ] + }, + { + "arguments": [ + "self", + "STATUS_BUSY" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 32, + "message": "publish", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 4, + 32, + 30 + ] + }, + { + "arguments": [ + "self", + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 20, + "message": "escalate", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 20, + 6, + 20, + 26 + ] + }, + { + "arguments": [ + "self", + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 23, + "message": "fallback", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 6, + 23, + 26 + ] + }, + { + "arguments": [ + "self", + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "CSyntaxFactsCore_process", + "line": 26, + "message": "default_case", + "owner": "CSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 6, + 26, + 30 + ] + }, + { + "arguments": [ + "self->sink", + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "CSyntaxFactsCore_audit", + "line": 44, + "message": "sink_send", + "owner": "CSyntaxFactsCore", + "receiver": "self.sink", + "safe_navigation": false, + "span": [ + 44, + 2, + 44, + 39 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "CSyntaxFactsCore_process", + "line": 14, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 14, + 39, + 14, + 51 + ] + }, + { + "field": "count", + "function": "CSyntaxFactsCore_ready", + "line": 49, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 49, + 9, + 49, + 20 + ] + }, + { + "field": "name", + "function": "CSyntaxFactsCore_process", + "line": 13, + "owner": "CSyntaxFactsCore", + "receiver": "user->profile", + "span": [ + 13, + 21, + 13, + 40 + ] + }, + { + "field": "profile", + "function": "CSyntaxFactsCore_process", + "line": 13, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 13, + 21, + 13, + 34 + ] + }, + { + "field": "ready", + "function": "CSyntaxFactsCore_process", + "line": 30, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 30, + 37, + 30, + 48 + ] + }, + { + "field": "role", + "function": "CSyntaxFactsCore_process", + "line": 17, + "owner": "CSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 10, + 17, + 20 + ] + }, + { + "field": "sink", + "function": "CSyntaxFactsCore_audit", + "line": 44, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 44, + 12, + 44, + 22 + ] + }, + { + "field": "status", + "function": "CSyntaxFactsCore_audit", + "line": 45, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 45, + 9, + 45, + 21 + ] + }, + { + "field": "status", + "function": "CSyntaxFactsCore_process", + "line": 30, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 6, + 30, + 18 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "CSyntaxFactsCore_process", + "line": 31, + "owner": "CSyntaxFactsCore", + "receiver": "self", + "span": [ + 31, + 4, + 31, + 20 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 17, + 2, + 28, + 3 + ], + "function": "CSyntaxFactsCore_process", + "kind": "case_dispatch", + "line": 17, + "members": [ + "ROLE_ADMIN", + "ROLE_GUEST", + "ROLE_OWNER" + ], + "predicate": "user->role", + "span": [ + 17, + 2, + 28, + 3 + ] + }, + { + "enclosing_span": [ + 30, + 2, + 35, + 3 + ], + "function": "CSyntaxFactsCore_process", + "kind": "conjunction", + "line": 30, + "members": [ + "self->status == STATUS_IDLE", + "user->ready" + ], + "predicate": "self->status == STATUS_IDLE && user->ready", + "span": [ + 30, + 6, + 30, + 48 + ] + } + ], + "branch_decisions": [ + { + "function": "CSyntaxFactsCore_process", + "line": 17, + "predicate": "(user->role)", + "span": [ + 17, + 2, + 28, + 3 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "CSyntaxFactsCore_process", + "line": 30, + "predicate": "(self->status == STATUS_IDLE && user->ready)", + "span": [ + 30, + 2, + 35, + 3 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + { + "arm_members": { + "ROLE_ADMIN": [ + + ], + "ROLE_GUEST": [ + + ], + "ROLE_OWNER": [ + + ] + }, + "function": "CSyntaxFactsCore_process", + "line": 17, + "outside": [ + + ], + "span": [ + 17, + 2, + 28, + 3 + ], + "variant_set": [ + "ROLE_ADMIN", + "ROLE_GUEST", + "ROLE_OWNER" + ] + } + ], + "semantic_effects": [ + { + "detail": "callback", + "function": "CSyntaxFactsCore_process", + "kind": "callback_inversion", + "line": 15, + "span": [ + 15, + 2, + 15, + 20 + ] + }, + { + "detail": "puts", + "function": "CSyntaxFactsCore_audit", + "kind": "hidden_io", + "line": 43, + "span": [ + 43, + 2, + 43, + 12 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/cpp-core.json b/gems/decomplex/examples/syntax-facts/oracles/cpp-core.json new file mode 100644 index 000000000..b0c7783da --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/cpp-core.json @@ -0,0 +1,637 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/cpp/core.cpp", + "language": "cpp", + "functions": [ + { + "line": 15, + "name": "CppSyntaxFactsCore", + "owner": "CppSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 15, + 2, + 16, + 47 + ], + "visibility": "public" + }, + { + "line": 18, + "name": "process", + "owner": "CppSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 18, + 2, + 48, + 3 + ], + "visibility": "public" + }, + { + "line": 51, + "name": "audit", + "owner": "CppSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 51, + 2, + 55, + 3 + ], + "visibility": "private" + }, + { + "line": 57, + "name": "ready", + "owner": "CppSyntaxFactsCore", + "params": [ + + ], + "span": [ + 57, + 2, + 59, + 3 + ], + "visibility": "private" + } + ], + "owners": [ + { + "kind": "class", + "line": 9, + "name": "CppSyntaxFactsCore", + "span": [ + 9, + 0, + 60, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status::Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 38, + "message": "publish", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 38, + 6, + 38, + 27 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 40, + "message": "warn", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 40, + 6, + 40, + 23 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 53, + "message": "send", + "owner": "CppSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 53, + 4, + 53, + 30 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 21, + "message": "callback", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 4, + 21, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 26, + "message": "escalate", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 6, + 26, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "fallback", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 29, + 6, + 29, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 32, + "message": "defaultCase", + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 6, + 32, + 23 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "name", + "owner": "CppSyntaxFactsCore", + "receiver": "user.profile()", + "safe_navigation": false, + "span": [ + 19, + 23, + 19, + 44 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "profile", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 19, + 23, + 19, + 37 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 20, + "message": "active", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 20, + 26, + 20, + 39 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "role", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 23, + 12, + 23, + 23 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 36, + "message": "ready", + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 36, + 34, + 36, + 46 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 44, + "message": "children", + "owner": "CppSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 44, + 6, + 44, + 21 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 20, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 20, + 26, + 20, + 37 + ] + }, + { + "field": "children", + "function": "process", + "line": 44, + "owner": "CppSyntaxFactsCore", + "receiver": "item", + "span": [ + 44, + 6, + 44, + 19 + ] + }, + { + "field": "count", + "function": "CppSyntaxFactsCore", + "line": 16, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 16, + 24, + 16, + 29 + ] + }, + { + "field": "count", + "function": "ready", + "line": 58, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 58, + 11, + 58, + 16 + ] + }, + { + "field": "name", + "function": "process", + "line": 19, + "owner": "CppSyntaxFactsCore", + "receiver": "user.profile()", + "span": [ + 19, + 23, + 19, + 42 + ] + }, + { + "field": "profile", + "function": "process", + "line": 19, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 19, + 23, + 19, + 35 + ] + }, + { + "field": "ready", + "function": "process", + "line": 36, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 36, + 34, + 36, + 44 + ] + }, + { + "field": "role", + "function": "process", + "line": 23, + "owner": "CppSyntaxFactsCore", + "receiver": "user", + "span": [ + 23, + 12, + 23, + 21 + ] + }, + { + "field": "send", + "function": "audit", + "line": 53, + "owner": "CppSyntaxFactsCore", + "receiver": "sink", + "span": [ + 53, + 4, + 53, + 14 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 53, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 53, + 4, + 53, + 8 + ] + }, + { + "field": "status", + "function": "audit", + "line": 54, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 54, + 11, + 54, + 17 + ] + }, + { + "field": "status", + "function": "process", + "line": 36, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 36, + 8, + 36, + 14 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 37, + "owner": "CppSyntaxFactsCore", + "receiver": "self", + "span": [ + 37, + 6, + 37, + 11 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 23, + 4, + 34, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 23, + "members": [ + "Role::Admin", + "Role::Guest", + "Role::Owner" + ], + "predicate": "user.role()", + "span": [ + 23, + 4, + 34, + 5 + ] + }, + { + "enclosing_span": [ + 36, + 4, + 41, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 36, + "members": [ + "status == Status::Idle", + "user.ready()" + ], + "predicate": "status == Status::Idle && user.ready()", + "span": [ + 36, + 8, + 36, + 46 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 23, + "predicate": "(user.role())", + "span": [ + 23, + 4, + 34, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 36, + "predicate": "(status == Status::Idle && user.ready())", + "span": [ + 36, + 4, + 41, + 5 + ], + "state_refs": [ + "user.ready" + ] + } + ], + "dispatch_sites": [ + { + "arm_members": { + "Role::Admin": [ + + ], + "Role::Guest": [ + + ], + "Role::Owner": [ + + ] + }, + "function": "process", + "line": 23, + "outside": [ + + ], + "span": [ + 23, + 4, + 34, + 5 + ], + "variant_set": [ + "Role::Admin", + "Role::Guest", + "Role::Owner" + ] + } + ], + "semantic_effects": [ + + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/csharp-core.json b/gems/decomplex/examples/syntax-facts/oracles/csharp-core.json new file mode 100644 index 000000000..3fc20be16 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/csharp-core.json @@ -0,0 +1,563 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/csharp/core.cs", + "language": "csharp", + "functions": [ + { + "line": 17, + "name": "Process", + "owner": "CSharpSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 17, + 4, + 53, + 5 + ], + "visibility": "public" + }, + { + "line": 55, + "name": "Audit", + "owner": "CSharpSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 55, + 4, + 60, + 5 + ], + "visibility": "private" + }, + { + "line": 62, + "name": "Ready", + "owner": "CSharpSyntaxFactsCore", + "params": [ + + ], + "span": [ + 62, + 4, + 65, + 5 + ], + "visibility": "private" + } + ], + "owners": [ + { + "kind": "class", + "line": 4, + "name": "CSharpSyntaxFactsCore", + "span": [ + 4, + 0, + 66, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status.Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 40, + "message": "Publish", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 40, + 12, + 40, + 32 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 44, + "message": "WriteLine", + "owner": "CSharpSyntaxFactsCore", + "receiver": "Console", + "safe_navigation": false, + "span": [ + 44, + 12, + 44, + 42 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Audit", + "line": 58, + "message": "Send", + "owner": "CSharpSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 58, + 8, + 58, + 33 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Process", + "line": 21, + "message": "callback", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 8, + 21, + 25 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Audit", + "line": 57, + "message": "WriteLine", + "owner": "CSharpSyntaxFactsCore", + "receiver": "Console", + "safe_navigation": false, + "span": [ + 57, + 8, + 57, + 31 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 27, + "message": "Escalate", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 27, + 16, + 27, + 30 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 30, + "message": "Fallback", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 30, + 16, + 30, + 30 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 33, + "message": "DefaultCase", + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 33, + 16, + 33, + 33 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "Process", + "line": 49, + "message": "Children", + "owner": "CSharpSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 49, + 12, + 49, + 27 + ] + } + ], + "state_reads": [ + { + "field": "Active", + "function": "Process", + "line": 20, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 20, + 40, + 20, + 51 + ] + }, + { + "field": "Children", + "function": "Process", + "line": 49, + "owner": "CSharpSyntaxFactsCore", + "receiver": "item", + "span": [ + 49, + 12, + 49, + 25 + ] + }, + { + "field": "Name", + "function": "Process", + "line": 19, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user.Profile", + "span": [ + 19, + 19, + 19, + 36 + ] + }, + { + "field": "Profile", + "function": "Process", + "line": 19, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 19, + 19, + 19, + 31 + ] + }, + { + "field": "Ready", + "function": "Process", + "line": 37, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 37, + 42, + 37, + 52 + ] + }, + { + "field": "Role", + "function": "Process", + "line": 23, + "owner": "CSharpSyntaxFactsCore", + "receiver": "user", + "span": [ + 23, + 16, + 23, + 25 + ] + }, + { + "field": "Send", + "function": "Audit", + "line": 58, + "owner": "CSharpSyntaxFactsCore", + "receiver": "sink", + "span": [ + 58, + 8, + 58, + 17 + ] + }, + { + "field": "count", + "function": "Ready", + "line": 64, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 64, + 15, + 64, + 20 + ] + }, + { + "field": "sink", + "function": "Audit", + "line": 58, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 58, + 8, + 58, + 12 + ] + }, + { + "field": "status", + "function": "Audit", + "line": 59, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 59, + 15, + 59, + 21 + ] + }, + { + "field": "status", + "function": "Process", + "line": 37, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 37, + 12, + 37, + 23 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "(top-level)", + "line": 13, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 13, + 8, + 13, + 22 + ] + }, + { + "field": "count", + "function": "Process", + "line": 39, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 39, + 12, + 39, + 27 + ] + }, + { + "field": "sink", + "function": "(top-level)", + "line": 14, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 14, + 8, + 14, + 24 + ] + }, + { + "field": "status", + "function": "(top-level)", + "line": 12, + "owner": "CSharpSyntaxFactsCore", + "receiver": "self", + "span": [ + 12, + 8, + 12, + 28 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 23, + 8, + 35, + 9 + ], + "function": "Process", + "kind": "case_dispatch", + "line": 23, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.Role", + "span": [ + 23, + 8, + 35, + 9 + ] + }, + { + "enclosing_span": [ + 37, + 8, + 45, + 9 + ], + "function": "Process", + "kind": "conjunction", + "line": 37, + "members": [ + "this.status == Status.Idle", + "user.Ready" + ], + "predicate": "this.status == Status.Idle && user.Ready", + "span": [ + 37, + 12, + 37, + 52 + ] + } + ], + "branch_decisions": [ + { + "function": "Process", + "line": 23, + "predicate": "user.Role", + "span": [ + 23, + 8, + 35, + 9 + ], + "state_refs": [ + "user.Role" + ] + }, + { + "function": "Process", + "line": 37, + "predicate": "this.status == Status.Idle && user.Ready", + "span": [ + 37, + 8, + 45, + 9 + ], + "state_refs": [ + "this.status", + "user.Ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "Console.WriteLine", + "function": "Audit", + "kind": "hidden_io", + "line": 57, + "span": [ + 57, + 8, + 57, + 31 + ] + }, + { + "detail": "Console.WriteLine", + "function": "Process", + "kind": "hidden_io", + "line": 44, + "span": [ + 44, + 12, + 44, + 42 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/go-core.json b/gems/decomplex/examples/syntax-facts/oracles/go-core.json new file mode 100644 index 000000000..32f611a0c --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/go-core.json @@ -0,0 +1,727 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/go/core.go", + "language": "go", + "functions": [ + { + "line": 32, + "name": "NewGoSyntaxFactsCore", + "owner": "core", + "params": [ + "status" + ], + "span": [ + 32, + 0, + 34, + 1 + ], + "visibility": "public" + }, + { + "line": 36, + "name": "Process", + "owner": "GoSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 36, + 0, + 70, + 1 + ], + "visibility": "public" + }, + { + "line": 72, + "name": "audit", + "owner": "GoSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 72, + 0, + 76, + 1 + ], + "visibility": "private" + }, + { + "line": 78, + "name": "Ready", + "owner": "GoSyntaxFactsCore", + "params": [ + + ], + "span": [ + 78, + 0, + 80, + 1 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "owner", + "line": 10, + "name": "Profile", + "span": [ + 10, + 5, + 12, + 1 + ] + }, + { + "kind": "owner", + "line": 14, + "name": "User", + "span": [ + 14, + 5, + 19, + 1 + ] + }, + { + "kind": "owner", + "line": 21, + "name": "Account", + "span": [ + 21, + 5, + 24, + 1 + ] + }, + { + "kind": "owner", + "line": 26, + "name": "GoSyntaxFactsCore", + "span": [ + 26, + 5, + 30, + 1 + ] + }, + { + "kind": "owner", + "line": 3, + "name": "Status", + "span": [ + 3, + 5, + 3, + 15 + ] + } + ], + "calls": [ + { + "arguments": [ + "Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 56, + "message": "publish", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 56, + 2, + 56, + 17 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 58, + "message": "warn", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 58, + 2, + 58, + 21 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 74, + "message": "send", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 74, + 1, + 74, + 23 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Process", + "line": 43, + "message": "callback", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 43, + 1, + 43, + 18 + ] + }, + { + "arguments": [ + "item" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "Process", + "line": 62, + "message": "children", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 62, + 2, + 62, + 18 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Process", + "line": 66, + "message": "audit", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 66, + 4, + 66, + 17 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "Process", + "line": 67, + "message": "audit", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 67, + 7, + 67, + 20 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 73, + "message": "println", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 73, + 1, + 73, + 14 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 47, + "message": "escalate", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 47, + 2, + 47, + 18 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 49, + "message": "fallback", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 49, + 2, + 49, + 18 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "Process", + "line": 51, + "message": "defaultCase", + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 51, + 2, + 51, + 21 + ] + } + ], + "state_reads": [ + { + "field": "Active", + "function": "Process", + "line": 42, + "owner": "GoSyntaxFactsCore", + "receiver": "user", + "span": [ + 42, + 40, + 42, + 51 + ] + }, + { + "field": "Name", + "function": "Process", + "line": 41, + "owner": "GoSyntaxFactsCore", + "receiver": "user.Profile", + "span": [ + 41, + 9, + 41, + 26 + ] + }, + { + "field": "Profile", + "function": "Process", + "line": 41, + "owner": "GoSyntaxFactsCore", + "receiver": "user", + "span": [ + 41, + 9, + 41, + 21 + ] + }, + { + "field": "Ready", + "function": "Process", + "line": 54, + "owner": "GoSyntaxFactsCore", + "receiver": "user", + "span": [ + 54, + 24, + 54, + 34 + ] + }, + { + "field": "Role", + "function": "Process", + "line": 45, + "owner": "GoSyntaxFactsCore", + "receiver": "user", + "span": [ + 45, + 8, + 45, + 17 + ] + }, + { + "field": "audit", + "function": "Process", + "line": 66, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 66, + 4, + 66, + 11 + ] + }, + { + "field": "audit", + "function": "Process", + "line": 67, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 67, + 7, + 67, + 14 + ] + }, + { + "field": "children", + "function": "Process", + "line": 62, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 62, + 2, + 62, + 12 + ] + }, + { + "field": "count", + "function": "Process", + "line": 65, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 65, + 18, + 65, + 25 + ] + }, + { + "field": "count", + "function": "Ready", + "line": 79, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 79, + 8, + 79, + 15 + ] + }, + { + "field": "defaultCase", + "function": "Process", + "line": 51, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 51, + 2, + 51, + 15 + ] + }, + { + "field": "escalate", + "function": "Process", + "line": 47, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 47, + 2, + 47, + 12 + ] + }, + { + "field": "fallback", + "function": "Process", + "line": 49, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 49, + 2, + 49, + 12 + ] + }, + { + "field": "lookup", + "function": "Process", + "line": 65, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 65, + 1, + 65, + 9 + ] + }, + { + "field": "publish", + "function": "Process", + "line": 56, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 56, + 2, + 56, + 11 + ] + }, + { + "field": "send", + "function": "audit", + "line": 74, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 74, + 1, + 74, + 7 + ] + }, + { + "field": "status", + "function": "Process", + "line": 54, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 54, + 4, + 54, + 12 + ] + }, + { + "field": "status", + "function": "audit", + "line": 75, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 75, + 5, + 75, + 13 + ] + }, + { + "field": "warn", + "function": "Process", + "line": 58, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 58, + 2, + 58, + 8 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "Process", + "line": 55, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 55, + 2, + 55, + 14 + ] + }, + { + "field": "lookup", + "function": "Process", + "line": 65, + "owner": "GoSyntaxFactsCore", + "receiver": "self", + "span": [ + 65, + 1, + 65, + 25 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 45, + 1, + 52, + 2 + ], + "function": "Process", + "kind": "case_dispatch", + "line": 45, + "members": [ + "\"guest\"", + "\"owner\", \"admin\"" + ], + "predicate": "user.Role", + "span": [ + 45, + 1, + 52, + 2 + ] + }, + { + "enclosing_span": [ + 54, + 1, + 59, + 2 + ], + "function": "Process", + "kind": "conjunction", + "line": 54, + "members": [ + "c.status == Idle", + "user.Ready" + ], + "predicate": "c.status == Idle && user.Ready", + "span": [ + 54, + 4, + 54, + 34 + ] + } + ], + "branch_decisions": [ + { + "function": "Process", + "line": 45, + "predicate": "user.Role", + "span": [ + 45, + 1, + 52, + 2 + ], + "state_refs": [ + "user.Role" + ] + }, + { + "function": "Process", + "line": 54, + "predicate": "c.status == Idle && user.Ready", + "span": [ + 54, + 1, + 59, + 2 + ], + "state_refs": [ + "c.status", + "user.Ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "println", + "function": "audit", + "kind": "hidden_io", + "line": 73, + "span": [ + 73, + 1, + 73, + 14 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/java-core.json b/gems/decomplex/examples/syntax-facts/oracles/java-core.json new file mode 100644 index 000000000..ca8fdb2f6 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/java-core.json @@ -0,0 +1,563 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/java/core.java", + "language": "java", + "functions": [ + { + "line": 12, + "name": "process", + "owner": "JavaSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 12, + 2, + 41, + 3 + ], + "visibility": "public" + }, + { + "line": 43, + "name": "audit", + "owner": "JavaSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 43, + 2, + 47, + 3 + ], + "visibility": "private" + }, + { + "line": 49, + "name": "ready", + "owner": "JavaSyntaxFactsCore", + "params": [ + + ], + "span": [ + 49, + 2, + 51, + 3 + ], + "visibility": null + } + ], + "owners": [ + { + "kind": "class", + "line": 3, + "name": "JavaSyntaxFactsCore", + "span": [ + 3, + 0, + 52, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status.BUSY" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 31, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 31, + 6, + 31, + 31 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 33, + "message": "err", + "owner": "JavaSyntaxFactsCore", + "receiver": "System", + "safe_navigation": false, + "span": [ + 33, + 6, + 33, + 37 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 45, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 45, + 4, + 45, + 29 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 15, + "message": "call", + "owner": "JavaSyntaxFactsCore", + "receiver": "callback", + "safe_navigation": false, + "span": [ + 15, + 4, + 15, + 26 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 44, + "message": "out", + "owner": "JavaSyntaxFactsCore", + "receiver": "System", + "safe_navigation": false, + "span": [ + 44, + 4, + 44, + 28 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 20, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 20, + 8, + 20, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 8, + 23, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 26, + "message": "this", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 8, + 26, + 30 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 46, + "message": "status", + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 46, + 4, + 46, + 22 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 13, + "message": "profile", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 13, + 18, + 13, + 32 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 13, + "message": "profile()", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 13, + 18, + 13, + 39 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 14, + "message": "active", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 14, + 40, + 14, + 53 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 17, + "message": "role", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 17, + 12, + 17, + 23 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "ready", + "owner": "JavaSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 29, + 38, + 29, + 50 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 37, + "message": "children", + "owner": "JavaSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 37, + 6, + 37, + 21 + ] + } + ], + "state_reads": [ + { + "field": "count", + "function": "ready", + "line": 50, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 50, + 11, + 50, + 21 + ] + }, + { + "field": "status", + "function": "audit", + "line": 46, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 46, + 4, + 46, + 15 + ] + }, + { + "field": "status", + "function": "process", + "line": 29, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 29, + 8, + 29, + 19 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "(top-level)", + "line": 9, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 9, + 4, + 9, + 18 + ] + }, + { + "field": "count", + "function": "process", + "line": 30, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 6, + 30, + 21 + ] + }, + { + "field": "status", + "function": "(top-level)", + "line": 8, + "owner": "JavaSyntaxFactsCore", + "receiver": "self", + "span": [ + 8, + 4, + 8, + 24 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 17, + 4, + 27, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 17, + "members": [ + "case \"admin\"", + "case \"guest\"", + "case \"owner\"" + ], + "predicate": "user.role()", + "span": [ + 17, + 4, + 27, + 5 + ] + }, + { + "enclosing_span": [ + 29, + 4, + 34, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 29, + "members": [ + "this.status == Status.IDLE", + "user.ready()" + ], + "predicate": "this.status == Status.IDLE && user.ready()", + "span": [ + 29, + 8, + 29, + 50 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 29, + "predicate": "(this.status == Status.IDLE && user.ready())", + "span": [ + 29, + 4, + 34, + 5 + ], + "state_refs": [ + "this.status" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "System.err", + "function": "process", + "kind": "hidden_io", + "line": 33, + "span": [ + 33, + 6, + 33, + 37 + ] + }, + { + "detail": "System.out", + "function": "audit", + "kind": "hidden_io", + "line": 44, + "span": [ + 44, + 4, + 44, + 28 + ] + }, + { + "detail": "callback.call", + "function": "process", + "kind": "dynamic_dispatch", + "line": 15, + "span": [ + 15, + 4, + 15, + 26 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/javascript-core.json b/gems/decomplex/examples/syntax-facts/oracles/javascript-core.json new file mode 100644 index 000000000..a44e8c524 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/javascript-core.json @@ -0,0 +1,695 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/javascript/core.js", + "language": "javascript", + "functions": [ + { + "line": 10, + "name": "process", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 10, + 2, + 39, + 3 + ], + "visibility": "public" + }, + { + "line": 4, + "name": "constructor", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 4, + 2, + 8, + 3 + ], + "visibility": "public" + }, + { + "line": 41, + "name": "#audit", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 41, + 2, + 45, + 3 + ], + "visibility": "private" + }, + { + "line": 47, + "name": "ready", + "owner": "JavaScriptSyntaxFactsCore", + "params": [ + + ], + "span": [ + 47, + 2, + 49, + 3 + ], + "visibility": "public" + }, + { + "line": 52, + "name": "normalizeValue", + "owner": "core", + "params": [ + "input" + ], + "span": [ + 52, + 7, + 54, + 1 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "class", + "line": 1, + "name": "JavaScriptSyntaxFactsCore", + "span": [ + 1, + 7, + 50, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "publish", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 29, + 6, + 29, + 26 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 31, + "message": "warn", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 31, + 6, + 31, + 31 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "#audit", + "line": 43, + "message": "send", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "this.sink", + "safe_navigation": false, + "span": [ + 43, + 4, + 43, + 34 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 13, + "message": "callback", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 13, + 4, + 13, + 21 + ] + }, + { + "arguments": [ + "items[index]" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 35, + "message": "#audit", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 35, + 6, + 35, + 31 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "#audit", + "line": 42, + "message": "log", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 42, + 4, + 42, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 18, + "message": "escalate", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 18, + 8, + 18, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 21, + "message": "fallback", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 8, + 21, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 24, + "message": "defaultCase", + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 24, + 8, + 24, + 30 + ] + } + ], + "state_reads": [ + { + "field": "#audit", + "function": "process", + "line": 35, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 35, + 6, + 35, + 17 + ] + }, + { + "field": "#status", + "function": "#audit", + "line": 44, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 44, + 11, + 44, + 23 + ] + }, + { + "field": "#status", + "function": "process", + "line": 27, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 8, + 27, + 20 + ] + }, + { + "field": "active", + "function": "process", + "line": 12, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 12, + 36, + 12, + 47 + ] + }, + { + "field": "count", + "function": "ready", + "line": 48, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 48, + 11, + 48, + 21 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 24, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 24, + 8, + 24, + 24 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 18, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 18, + 8, + 18, + 21 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 21, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 21, + 8, + 21, + 21 + ] + }, + { + "field": "log", + "function": "#audit", + "line": 42, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 42, + 4, + 42, + 15 + ] + }, + { + "field": "name", + "function": "process", + "line": 11, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user?.profile", + "span": [ + 11, + 17, + 11, + 36 + ] + }, + { + "field": "profile", + "function": "process", + "line": 11, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 11, + 17, + 11, + 30 + ] + }, + { + "field": "publish", + "function": "process", + "line": 29, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 29, + 6, + 29, + 18 + ] + }, + { + "field": "ready", + "function": "process", + "line": 27, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 27, + 35, + 27, + 45 + ] + }, + { + "field": "role", + "function": "process", + "line": 15, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 15, + 12, + 15, + 21 + ] + }, + { + "field": "send", + "function": "#audit", + "line": 43, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "this.sink", + "span": [ + 43, + 4, + 43, + 18 + ] + }, + { + "field": "sink", + "function": "#audit", + "line": 43, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 43, + 4, + 43, + 13 + ] + }, + { + "field": "warn", + "function": "process", + "line": 31, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 31, + 6, + 31, + 18 + ] + } + ], + "state_writes": [ + { + "field": "#status", + "function": "constructor", + "line": 5, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 5, + 4, + 5, + 25 + ] + }, + { + "field": "count", + "function": "constructor", + "line": 6, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 6, + 4, + 6, + 18 + ] + }, + { + "field": "count", + "function": "process", + "line": 28, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 28, + 6, + 28, + 21 + ] + }, + { + "field": "sink", + "function": "constructor", + "line": 7, + "owner": "JavaScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 7, + 4, + 7, + 20 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 15, + 4, + 25, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 15, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 15, + 4, + 25, + 5 + ] + }, + { + "enclosing_span": [ + 27, + 4, + 32, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 27, + "members": [ + "this.#status === \"idle\"", + "user.ready" + ], + "predicate": "this.#status === \"idle\" && user.ready", + "span": [ + 27, + 8, + 27, + 45 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 15, + "predicate": "(user.role)", + "span": [ + 15, + 4, + 25, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 27, + "predicate": "(this.#status === \"idle\" && user.ready)", + "span": [ + 27, + 4, + 32, + 5 + ], + "state_refs": [ + "this.#status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "console.log", + "function": "#audit", + "kind": "hidden_io", + "line": 42, + "span": [ + 42, + 4, + 42, + 21 + ] + }, + { + "detail": "console.warn", + "function": "process", + "kind": "hidden_io", + "line": 31, + "span": [ + 31, + 6, + 31, + 31 + ] + } + ], + "predicate_bodies": [ + { + "body": "input ?? null", + "line": 52, + "name": "normalizeValue", + "owner": "core", + "span": [ + 52, + 7, + 54, + 1 + ] + }, + { + "body": "name ?? null", + "line": 10, + "name": "process", + "owner": "JavaScriptSyntaxFactsCore", + "span": [ + 10, + 2, + 39, + 3 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/kotlin-core.json b/gems/decomplex/examples/syntax-facts/oracles/kotlin-core.json new file mode 100644 index 000000000..70499c795 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/kotlin-core.json @@ -0,0 +1,491 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/kotlin/core.kt", + "language": "kotlin", + "functions": [ + { + "line": 31, + "name": "audit", + "owner": "KotlinSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 31, + 4, + 35, + 5 + ], + "visibility": "private" + }, + { + "line": 37, + "name": "ready", + "owner": "KotlinSyntaxFactsCore", + "params": [ + + ], + "span": [ + 37, + 4, + 39, + 5 + ], + "visibility": null + }, + { + "line": 6, + "name": "process", + "owner": "KotlinSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 6, + 4, + 29, + 5 + ], + "visibility": null + } + ], + "owners": [ + { + "kind": "class", + "line": 3, + "name": "KotlinSyntaxFactsCore", + "span": [ + 3, + 0, + 40, + 1 + ] + }, + { + "kind": "class", + "line": 42, + "name": "Status", + "span": [ + 42, + 0, + 45, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "Status.BUSY" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 19, + "message": "publish", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 12, + 19, + 32 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 21, + "message": "println", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 12, + 21, + 19 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 33, + "message": "send", + "owner": "KotlinSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 33, + 8, + 33, + 33 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 9, + "message": "callback", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 9, + 8, + 9, + 25 + ] + }, + { + "arguments": [ + "name", + "user.active" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 8, + "message": "Account", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 8, + 22, + 8, + 48 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 32, + "message": "println", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 8, + 32, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 12, + "message": "escalate", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 12, + 32, + 12, + 40 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 13, + "message": "fallback", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 13, + 23, + 13, + 31 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 14, + "message": "defaultCase", + "owner": "KotlinSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 14, + 20, + 14, + 31 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 25, + "message": "children", + "owner": "KotlinSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 25, + 12, + 25, + 25 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 8, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 8, + 36, + 8, + 47 + ] + }, + { + "field": "children", + "function": "process", + "line": 25, + "owner": "KotlinSyntaxFactsCore", + "receiver": "item", + "span": [ + 25, + 12, + 25, + 25 + ] + }, + { + "field": "name", + "function": "process", + "line": 7, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 7, + 19, + 7, + 37 + ] + }, + { + "field": "profile", + "function": "process", + "line": 7, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 7, + 19, + 7, + 31 + ] + }, + { + "field": "ready", + "function": "process", + "line": 17, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 37, + 17, + 47 + ] + }, + { + "field": "role", + "function": "process", + "line": 11, + "owner": "KotlinSyntaxFactsCore", + "receiver": "user", + "span": [ + 11, + 14, + 11, + 23 + ] + }, + { + "field": "send", + "function": "audit", + "line": 33, + "owner": "KotlinSyntaxFactsCore", + "receiver": "sink", + "span": [ + 33, + 8, + 33, + 17 + ] + } + ], + "state_writes": [ + + ], + "decisions": [ + { + "enclosing_span": [ + 11, + 8, + 15, + 9 + ], + "function": "process", + "kind": "case_dispatch", + "line": 11, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 11, + 8, + 15, + 9 + ] + }, + { + "enclosing_span": [ + 17, + 8, + 22, + 9 + ], + "function": "process", + "kind": "conjunction", + "line": 17, + "members": [ + "status == Status.IDLE", + "user.ready" + ], + "predicate": "status == Status.IDLE && user.ready", + "span": [ + 17, + 12, + 17, + 47 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 11, + "predicate": "(user.role)", + "span": [ + 11, + 8, + 15, + 9 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 17, + "predicate": "status == Status.IDLE && user.ready", + "span": [ + 17, + 8, + 22, + 9 + ], + "state_refs": [ + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "println", + "function": "audit", + "kind": "hidden_io", + "line": 32, + "span": [ + 32, + 8, + 32, + 21 + ] + }, + { + "detail": "println", + "function": "process", + "kind": "hidden_io", + "line": 21, + "span": [ + 21, + 12, + 21, + 19 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/lua-core.json b/gems/decomplex/examples/syntax-facts/oracles/lua-core.json new file mode 100644 index 000000000..9791f7048 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/lua-core.json @@ -0,0 +1,597 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/lua/core.lua", + "language": "lua", + "functions": [ + { + "line": 13, + "name": "process", + "owner": "LuaSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 13, + 0, + 38, + 3 + ], + "visibility": null + }, + { + "line": 4, + "name": "LuaSyntaxFactsCore.new", + "owner": "core", + "params": [ + "status", + "sink" + ], + "span": [ + 4, + 0, + 11, + 3 + ], + "visibility": null + }, + { + "line": 40, + "name": "audit", + "owner": "LuaSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 40, + 0, + 44, + 3 + ], + "visibility": null + }, + { + "line": 46, + "name": "ready", + "owner": "LuaSyntaxFactsCore", + "params": [ + + ], + "span": [ + 46, + 0, + 48, + 3 + ], + "visibility": null + } + ], + "owners": [ + + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 28, + "message": "publish", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 28, + 4, + 28, + 24 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 30, + "message": "print", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 30, + 4, + 30, + 9 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 42, + "message": "send", + "owner": "LuaSyntaxFactsCore", + "receiver": "self.sink", + "safe_navigation": false, + "span": [ + 42, + 2, + 42, + 32 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 16, + "message": "callback", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 16, + 2, + 16, + 19 + ] + }, + { + "arguments": [ + "instance", + "LuaSyntaxFactsCore" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "LuaSyntaxFactsCore.new", + "line": 10, + "message": "setmetatable", + "owner": "core", + "receiver": "self", + "safe_navigation": false, + "span": [ + 10, + 9, + 10, + 21 + ] + }, + { + "arguments": [ + "items" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 33, + "message": "ipairs", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 33, + 17, + 33, + 23 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 41, + "message": "print", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 41, + 2, + 41, + 13 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 19, + "message": "escalate", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 4, + 19, + 17 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 21, + "message": "fallback", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 4, + 21, + 17 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "default_case", + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 4, + 23, + 21 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 34, + "message": "children", + "owner": "LuaSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 34, + 4, + 34, + 17 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 15, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 15, + 42, + 15, + 53 + ] + }, + { + "field": "count", + "function": "process", + "line": 27, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 17, + 27, + 27 + ] + }, + { + "field": "count", + "function": "ready", + "line": 47, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 47, + 9, + 47, + 19 + ] + }, + { + "field": "name", + "function": "process", + "line": 14, + "owner": "LuaSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 14, + 15, + 14, + 32 + ] + }, + { + "field": "profile", + "function": "process", + "line": 14, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 14, + 15, + 14, + 27 + ] + }, + { + "field": "ready", + "function": "process", + "line": 26, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 26, + 31, + 26, + 41 + ] + }, + { + "field": "role", + "function": "process", + "line": 18, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 29, + 18, + 38 + ] + }, + { + "field": "role", + "function": "process", + "line": 18, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 5, + 18, + 14 + ] + }, + { + "field": "role", + "function": "process", + "line": 20, + "owner": "LuaSyntaxFactsCore", + "receiver": "user", + "span": [ + 20, + 9, + 20, + 18 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 42, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 42, + 2, + 42, + 11 + ] + }, + { + "field": "status", + "function": "audit", + "line": 43, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 43, + 9, + 43, + 20 + ] + }, + { + "field": "status", + "function": "process", + "line": 26, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 26, + 5, + 26, + 16 + ] + } + ], + "state_writes": [ + { + "field": "__index", + "function": "(top-level)", + "line": 2, + "owner": "core", + "receiver": "LuaSyntaxFactsCore", + "span": [ + 2, + 0, + 2, + 47 + ] + }, + { + "field": "count", + "function": "process", + "line": 27, + "owner": "LuaSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 4, + 27, + 31 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 26, + 2, + 31, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 26, + "members": [ + "self.status == \"idle\"", + "user.ready" + ], + "predicate": "self.status == \"idle\" and user.ready", + "span": [ + 26, + 5, + 26, + 41 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 18, + "predicate": "user.role == \"owner\" or user.role == \"admin\"", + "span": [ + 18, + 2, + 24, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 26, + "predicate": "self.status == \"idle\" and user.ready", + "span": [ + 26, + 2, + 31, + 5 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "print", + "function": "audit", + "kind": "hidden_io", + "line": 41, + "span": [ + 41, + 2, + 41, + 13 + ] + }, + { + "detail": "print", + "function": "process", + "kind": "hidden_io", + "line": 30, + "span": [ + 30, + 4, + 30, + 9 + ] + }, + { + "detail": "setmetatable", + "function": "LuaSyntaxFactsCore.new", + "kind": "metaprogramming", + "line": 10, + "span": [ + 10, + 9, + 10, + 21 + ] + } + ], + "predicate_bodies": [ + { + "body": "name or \"missing\"", + "line": 13, + "name": "process", + "owner": "LuaSyntaxFactsCore", + "span": [ + 13, + 0, + 38, + 3 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/php-core.json b/gems/decomplex/examples/syntax-facts/oracles/php-core.json new file mode 100644 index 000000000..b89836f8f --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/php-core.json @@ -0,0 +1,629 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/php/core.php", + "language": "php", + "functions": [ + { + "line": 15, + "name": "process", + "owner": "PhpSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 15, + 4, + 46, + 5 + ], + "visibility": "public" + }, + { + "line": 48, + "name": "audit", + "owner": "PhpSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 48, + 4, + 53, + 5 + ], + "visibility": "private" + }, + { + "line": 55, + "name": "ready", + "owner": "PhpSyntaxFactsCore", + "params": [ + + ], + "span": [ + 55, + 4, + 58, + 5 + ], + "visibility": "public" + }, + { + "line": 9, + "name": "__construct", + "owner": "PhpSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 9, + 4, + 13, + 5 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "class", + "line": 3, + "name": "PhpSyntaxFactsCore", + "span": [ + 3, + 0, + 59, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 36, + "message": "publish", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 36, + 12, + 36, + 34 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 38, + "message": "print", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 38, + 12, + 38, + 29 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 51, + "message": "send", + "owner": "PhpSyntaxFactsCore", + "receiver": "this.sink", + "safe_navigation": false, + "span": [ + 51, + 8, + 51, + 42 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "callback", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 8, + 19, + 27 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 50, + "message": "print", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 50, + 8, + 50, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 24, + "message": "escalate", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 24, + 16, + 24, + 38 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 27, + "message": "fallback", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 27, + 16, + 27, + 38 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 30, + "message": "defaultCase", + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 30, + 16, + 30, + 41 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 42, + "message": "children", + "owner": "PhpSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 42, + 12, + 42, + 29 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 18, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 38, + 18, + 51 + ] + }, + { + "field": "children", + "function": "process", + "line": 42, + "owner": "PhpSyntaxFactsCore", + "receiver": "item", + "span": [ + 42, + 12, + 42, + 29 + ] + }, + { + "field": "count", + "function": "ready", + "line": 57, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 57, + 15, + 57, + 27 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 30, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 16, + 30, + 41 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 24, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 24, + 16, + 24, + 38 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 27, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 16, + 27, + 38 + ] + }, + { + "field": "name", + "function": "process", + "line": 17, + "owner": "PhpSyntaxFactsCore", + "receiver": "user?.profile", + "span": [ + 17, + 16, + 17, + 38 + ] + }, + { + "field": "profile", + "function": "process", + "line": 17, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 16, + 17, + 31 + ] + }, + { + "field": "publish", + "function": "process", + "line": 36, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 36, + 12, + 36, + 34 + ] + }, + { + "field": "ready", + "function": "process", + "line": 34, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 34, + 40, + 34, + 52 + ] + }, + { + "field": "role", + "function": "process", + "line": 21, + "owner": "PhpSyntaxFactsCore", + "receiver": "user", + "span": [ + 21, + 16, + 21, + 27 + ] + }, + { + "field": "send", + "function": "audit", + "line": 51, + "owner": "PhpSyntaxFactsCore", + "receiver": "this.sink", + "span": [ + 51, + 8, + 51, + 42 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 51, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 51, + 8, + 51, + 19 + ] + }, + { + "field": "status", + "function": "audit", + "line": 52, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 52, + 15, + 52, + 28 + ] + }, + { + "field": "status", + "function": "process", + "line": 34, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 34, + 12, + 34, + 25 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 35, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 35, + 12, + 35, + 29 + ] + }, + { + "field": "sink", + "function": "__construct", + "line": 12, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 12, + 8, + 12, + 27 + ] + }, + { + "field": "status", + "function": "__construct", + "line": 11, + "owner": "PhpSyntaxFactsCore", + "receiver": "self", + "span": [ + 11, + 8, + 11, + 31 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 21, + 8, + 32, + 9 + ], + "function": "process", + "kind": "case_dispatch", + "line": 21, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 21, + 8, + 32, + 9 + ] + }, + { + "enclosing_span": [ + 34, + 8, + 39, + 9 + ], + "function": "process", + "kind": "conjunction", + "line": 34, + "members": [ + "this.status === \"idle\"", + "user.ready" + ], + "predicate": "this.status === \"idle\" && user.ready", + "span": [ + 34, + 12, + 34, + 52 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 21, + "predicate": "(user.role)", + "span": [ + 21, + 8, + 32, + 9 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 34, + "predicate": "(this.status === \"idle\" && user.ready)", + "span": [ + 34, + 8, + 39, + 9 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "print", + "function": "audit", + "kind": "hidden_io", + "line": 50, + "span": [ + 50, + 8, + 50, + 20 + ] + }, + { + "detail": "print", + "function": "process", + "kind": "hidden_io", + "line": 38, + "span": [ + 38, + 12, + 38, + 29 + ] + } + ], + "predicate_bodies": [ + { + "body": "name ?? null", + "line": 15, + "name": "process", + "owner": "PhpSyntaxFactsCore", + "span": [ + 15, + 4, + 46, + 5 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/python-core.json b/gems/decomplex/examples/syntax-facts/oracles/python-core.json new file mode 100644 index 000000000..8f12ef7e8 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/python-core.json @@ -0,0 +1,569 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/python/core.py", + "language": "python", + "functions": [ + { + "line": 12, + "name": "process", + "owner": "PythonSyntaxFactsCore", + "params": [ + "self", + "user", + "items", + "callback" + ], + "span": [ + 12, + 4, + 52, + 41 + ], + "visibility": "public" + }, + { + "line": 54, + "name": "_normalize", + "owner": "PythonSyntaxFactsCore", + "params": [ + "self", + "value" + ], + "span": [ + 54, + 4, + 56, + 22 + ], + "visibility": "private" + }, + { + "line": 58, + "name": "generator", + "owner": "PythonSyntaxFactsCore", + "params": [ + "self", + "values" + ], + "span": [ + 58, + 4, + 60, + 23 + ], + "visibility": "public" + }, + { + "line": 62, + "name": "simple_with", + "owner": "PythonSyntaxFactsCore", + "params": [ + "self", + "resource" + ], + "span": [ + 62, + 4, + 64, + 16 + ], + "visibility": "public" + }, + { + "line": 7, + "name": "__init__", + "owner": "PythonSyntaxFactsCore", + "params": [ + "self", + "lock", + "resource" + ], + "span": [ + 7, + 4, + 10, + 22 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "class", + "line": 6, + "name": "PythonSyntaxFactsCore", + "span": [ + 6, + 0, + 64, + 16 + ] + } + ], + "calls": [ + { + "arguments": [ + "\"x\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 28, + "message": "startswith", + "owner": "PythonSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 28, + 30, + 28, + 50 + ] + }, + { + "arguments": [ + "item" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "callback", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 29, + 16, + 29, + 30 + ] + }, + { + "arguments": [ + "item" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 37, + "message": "append", + "owner": "PythonSyntaxFactsCore", + "receiver": "result", + "safe_navigation": false, + "span": [ + 37, + 12, + 37, + 31 + ] + }, + { + "arguments": [ + "result" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 40, + "message": "len", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 40, + 22, + 40, + 33 + ] + }, + { + "arguments": [ + "result[index]" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 45, + "message": "audit", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 45, + 16, + 45, + 41 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 33, + "message": "escalate", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 33, + 20, + 33, + 39 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 35, + "message": "default", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 35, + 20, + 35, + 38 + ] + }, + { + "arguments": [ + "user.path" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 21, + "message": "open", + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 21, + 13, + 21, + 28 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "_normalize", + "line": 55, + "message": "strip", + "owner": "PythonSyntaxFactsCore", + "receiver": "value", + "safe_navigation": false, + "span": [ + 55, + 18, + 55, + 31 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 22, + "message": "read", + "owner": "PythonSyntaxFactsCore", + "receiver": "handle", + "safe_navigation": false, + "span": [ + 22, + 19, + 22, + 32 + ] + } + ], + "state_reads": [ + { + "field": "append", + "function": "process", + "line": 37, + "owner": "PythonSyntaxFactsCore", + "receiver": "result", + "span": [ + 37, + 12, + 37, + 25 + ] + }, + { + "field": "audit", + "function": "process", + "line": 45, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "span": [ + 45, + 16, + 45, + 26 + ] + }, + { + "field": "default", + "function": "process", + "line": 35, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "span": [ + 35, + 20, + 35, + 32 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 33, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "span": [ + 33, + 20, + 33, + 33 + ] + }, + { + "field": "name", + "function": "process", + "line": 13, + "owner": "PythonSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 13, + 20, + 13, + 37 + ] + }, + { + "field": "path", + "function": "process", + "line": 21, + "owner": "PythonSyntaxFactsCore", + "receiver": "user", + "span": [ + 21, + 18, + 21, + 27 + ] + }, + { + "field": "profile", + "function": "process", + "line": 13, + "owner": "PythonSyntaxFactsCore", + "receiver": "user", + "span": [ + 13, + 20, + 13, + 32 + ] + }, + { + "field": "read", + "function": "process", + "line": 22, + "owner": "PythonSyntaxFactsCore", + "receiver": "handle", + "span": [ + 22, + 19, + 22, + 30 + ] + }, + { + "field": "ready", + "function": "process", + "line": 28, + "owner": "PythonSyntaxFactsCore", + "receiver": "user", + "span": [ + 28, + 15, + 28, + 25 + ] + }, + { + "field": "startswith", + "function": "process", + "line": 28, + "owner": "PythonSyntaxFactsCore", + "receiver": "item", + "span": [ + 28, + 30, + 28, + 45 + ] + }, + { + "field": "strip", + "function": "_normalize", + "line": 55, + "owner": "PythonSyntaxFactsCore", + "receiver": "value", + "span": [ + 55, + 18, + 55, + 29 + ] + } + ], + "state_writes": [ + { + "field": "_lock", + "function": "__init__", + "line": 8, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "span": [ + 8, + 8, + 8, + 25 + ] + }, + { + "field": "count", + "function": "__init__", + "line": 10, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "span": [ + 10, + 8, + 10, + 22 + ] + }, + { + "field": "count", + "function": "process", + "line": 19, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "span": [ + 19, + 12, + 19, + 27 + ] + }, + { + "field": "resource", + "function": "__init__", + "line": 9, + "owner": "PythonSyntaxFactsCore", + "receiver": "self", + "span": [ + 9, + 8, + 9, + 32 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 28, + 12, + 29, + 30 + ], + "function": "process", + "kind": "conjunction", + "line": 28, + "members": [ + "item.startswith(\"x\")", + "user.ready" + ], + "predicate": "user.ready and item.startswith(\"x\")", + "span": [ + 28, + 15, + 28, + 50 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 28, + "predicate": "user.ready and item.startswith(\"x\")", + "span": [ + 28, + 12, + 29, + 30 + ], + "state_refs": [ + "item.startswith", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "open", + "function": "process", + "kind": "hidden_io", + "line": 21, + "span": [ + 21, + 13, + 21, + 28 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json new file mode 100644 index 000000000..3a90a9349 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/ruby-core.json @@ -0,0 +1,1189 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/ruby/core.rb", + "language": "ruby", + "functions": [ + { + "line": 14, + "name": "self.build", + "owner": "RubySyntaxFactsCore", + "params": [ + "source" + ], + "span": [ + 14, + 2, + 16, + 5 + ], + "visibility": "public" + }, + { + "line": 19, + "name": "initialize", + "owner": "RubySyntaxFactsCore", + "params": [ + "source" + ], + "span": [ + 19, + 2, + 23, + 5 + ], + "visibility": "public" + }, + { + "line": 26, + "name": "process", + "owner": "RubySyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 26, + 2, + 53, + 5 + ], + "visibility": "public" + }, + { + "line": 57, + "name": "audit", + "owner": "RubySyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 57, + 2, + 62, + 5 + ], + "visibility": "private" + }, + { + "line": 64, + "name": "inline_private", + "owner": "RubySyntaxFactsCore", + "params": [ + "value" + ], + "span": [ + 64, + 10, + 66, + 5 + ], + "visibility": "private" + }, + { + "line": 68, + "name": "ready?", + "owner": "RubySyntaxFactsCore", + "params": [ + + ], + "span": [ + 68, + 2, + 70, + 5 + ], + "visibility": "private" + }, + { + "line": 72, + "name": "loaded?", + "owner": "RubySyntaxFactsCore", + "params": [ + + ], + "span": [ + 72, + 2, + 72, + 33 + ], + "visibility": "private" + } + ], + "owners": [ + { + "kind": "class", + "line": 3, + "name": "Account", + "span": [ + 3, + 0, + 6, + 3 + ] + }, + { + "kind": "class", + "line": 8, + "name": "RubySyntaxFactsCore", + "span": [ + 8, + 0, + 73, + 3 + ] + } + ], + "calls": [ + { + "arguments": [ + "0", + "Integer" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "initialize", + "line": 21, + "message": "let", + "owner": "RubySyntaxFactsCore", + "receiver": "T", + "safe_navigation": false, + "span": [ + 21, + 13, + 21, + 30 + ] + }, + { + "arguments": [ + ":active", + "T::Boolean" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 5, + "message": "prop", + "owner": "Account", + "receiver": "self", + "safe_navigation": false, + "span": [ + 5, + 2, + 5, + 26 + ] + }, + { + "arguments": [ + ":count" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 12, + "message": "attr_reader", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 12, + 2, + 12, + 20 + ] + }, + { + "arguments": [ + ":idle", + "Status" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "initialize", + "line": 22, + "message": "let", + "owner": "RubySyntaxFactsCore", + "receiver": "T", + "safe_navigation": false, + "span": [ + 22, + 14, + 22, + 34 + ] + }, + { + "arguments": [ + ":name", + "String" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 4, + "message": "const", + "owner": "Account", + "receiver": "self", + "safe_navigation": false, + "span": [ + 4, + 2, + 4, + 21 + ] + }, + { + "arguments": [ + ":ready" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 43, + "message": "publish", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 43, + 6, + 43, + 21 + ] + }, + { + "arguments": [ + ":record", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 59, + "message": "send", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 59, + 4, + 59, + 23 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 45, + "message": "warn", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 45, + 6, + 45, + 23 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 30, + "message": "call", + "owner": "RubySyntaxFactsCore", + "receiver": "callback", + "safe_navigation": false, + "span": [ + 30, + 4, + 30, + 22 + ] + }, + { + "arguments": [ + "inline_private", + "(value)", + "helper(value)" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 64, + "message": "private", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 64, + 2, + 66, + 5 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 58, + "message": "puts", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 58, + 4, + 58, + 14 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 29, + "message": "audit", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 29, + 4, + 29, + 15 + ] + }, + { + "arguments": [ + "name: name", + "active: user.active?" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 28, + "message": "new", + "owner": "RubySyntaxFactsCore", + "receiver": "Account", + "safe_navigation": false, + "span": [ + 28, + 14, + 28, + 59 + ] + }, + { + "arguments": [ + "source" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "self.build", + "line": 15, + "message": "new", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 15, + 4, + 15, + 15 + ] + }, + { + "arguments": [ + "source: Object" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 18, + "message": "params", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 18, + 8, + 18, + 30 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 34, + "message": "escalate", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 6, + 34, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 36, + "message": "fallback", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 36, + 6, + 36, + 20 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 38, + "message": "default", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 38, + 6, + 38, + 19 + ] + }, + { + "arguments": [ + "user: Object", + "items: Array", + "callback: Proc" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 25, + "message": "params", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 25, + 8, + 25, + 58 + ] + }, + { + "arguments": [ + "value" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "inline_private", + "line": 65, + "message": "helper", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 65, + 4, + 65, + 17 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 55, + "message": "private", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 55, + 2, + 55, + 9 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 9, + "message": "freeze", + "owner": "RubySyntaxFactsCore", + "receiver": "%w[owner admin]", + "safe_navigation": false, + "span": [ + 9, + 16, + 9, + 38 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 27, + "message": "name", + "owner": "RubySyntaxFactsCore", + "receiver": "user&.profile", + "safe_navigation": true, + "span": [ + 27, + 11, + 27, + 30 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 27, + "message": "profile", + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "safe_navigation": true, + "span": [ + 27, + 11, + 27, + 24 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 28, + "message": "active?", + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 28, + 46, + 28, + 58 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 32, + "message": "role", + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 32, + 9, + 32, + 18 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 41, + "message": "ready?", + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 41, + 27, + 41, + 38 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 49, + "message": "children", + "owner": "RubySyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 49, + 6, + 49, + 19 + ] + }, + { + "arguments": [ + + ], + "block": true, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 10, + "message": "type_alias", + "owner": "RubySyntaxFactsCore", + "receiver": "T", + "safe_navigation": false, + "span": [ + 10, + 11, + 10, + 34 + ] + }, + { + "arguments": [ + + ], + "block": true, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 18, + "message": "sig", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 18, + 2, + 18, + 37 + ] + }, + { + "arguments": [ + + ], + "block": true, + "conditional": false, + "control": "always", + "function": "(top-level)", + "line": 25, + "message": "sig", + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 25, + 2, + 25, + 76 + ] + }, + { + "arguments": [ + + ], + "block": true, + "conditional": false, + "control": "always", + "function": "process", + "line": 48, + "message": "flat_map", + "owner": "RubySyntaxFactsCore", + "receiver": "items", + "safe_navigation": false, + "span": [ + 48, + 4, + 50, + 7 + ] + } + ], + "state_reads": [ + { + "field": "$GLOBAL_STATE", + "function": "audit", + "line": 60, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 60, + 4, + 60, + 17 + ] + }, + { + "field": "@count", + "function": "ready?", + "line": 69, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 69, + 4, + 69, + 10 + ] + }, + { + "field": "@source", + "function": "audit", + "line": 61, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 61, + 4, + 61, + 11 + ] + }, + { + "field": "@status", + "function": "loaded?", + "line": 72, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 72, + 16, + 72, + 23 + ] + }, + { + "field": "@status", + "function": "process", + "line": 41, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 41, + 7, + 41, + 14 + ] + }, + { + "field": "@status", + "function": "process", + "line": 52, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 52, + 4, + 52, + 11 + ] + }, + { + "field": "active?", + "function": "process", + "line": 28, + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "span": [ + 28, + 46, + 28, + 58 + ] + }, + { + "field": "children", + "function": "process", + "line": 49, + "owner": "RubySyntaxFactsCore", + "receiver": "item", + "span": [ + 49, + 6, + 49, + 19 + ] + }, + { + "field": "flat_map", + "function": "process", + "line": 48, + "owner": "RubySyntaxFactsCore", + "receiver": "items", + "span": [ + 48, + 4, + 50, + 7 + ] + }, + { + "field": "freeze", + "function": "(top-level)", + "line": 9, + "owner": "RubySyntaxFactsCore", + "receiver": "%w[owner admin]", + "span": [ + 9, + 16, + 9, + 38 + ] + }, + { + "field": "name", + "function": "process", + "line": 27, + "owner": "RubySyntaxFactsCore", + "receiver": "user&.profile", + "span": [ + 27, + 11, + 27, + 30 + ] + }, + { + "field": "profile", + "function": "process", + "line": 27, + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "span": [ + 27, + 11, + 27, + 24 + ] + }, + { + "field": "ready?", + "function": "process", + "line": 41, + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "span": [ + 41, + 27, + 41, + 38 + ] + }, + { + "field": "role", + "function": "process", + "line": 32, + "owner": "RubySyntaxFactsCore", + "receiver": "user", + "span": [ + 32, + 9, + 32, + 18 + ] + } + ], + "state_writes": [ + { + "field": "@count", + "function": "initialize", + "line": 21, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 21, + 4, + 21, + 30 + ] + }, + { + "field": "@count", + "function": "process", + "line": 42, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 42, + 6, + 42, + 17 + ] + }, + { + "field": "@source", + "function": "initialize", + "line": 20, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 20, + 4, + 20, + 20 + ] + }, + { + "field": "@status", + "function": "initialize", + "line": 22, + "owner": "RubySyntaxFactsCore", + "receiver": "self", + "span": [ + 22, + 4, + 22, + 34 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 32, + 4, + 39, + 7 + ], + "function": "process", + "kind": "case_dispatch", + "line": 32, + "members": [ + "\"owner\"", + "ADMIN_ROLES", + "nil" + ], + "predicate": "user.role", + "span": [ + 32, + 4, + 39, + 7 + ] + }, + { + "enclosing_span": [ + 41, + 4, + 46, + 7 + ], + "function": "process", + "kind": "conjunction", + "line": 41, + "members": [ + "@status == :idle", + "user.ready?" + ], + "predicate": "@status == :idle && user.ready?", + "span": [ + 41, + 7, + 41, + 38 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 32, + "predicate": "user.role", + "span": [ + 32, + 4, + 39, + 7 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 41, + "predicate": "@status == :idle && user.ready?", + "span": [ + 41, + 4, + 46, + 7 + ], + "state_refs": [ + "@status", + "user.ready?" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "$GLOBAL_STATE", + "function": "audit", + "kind": "context_dependency", + "line": 60, + "span": [ + 60, + 4, + 60, + 17 + ] + }, + { + "detail": "callback.call", + "function": "process", + "kind": "dynamic_dispatch", + "line": 30, + "span": [ + 30, + 4, + 30, + 22 + ] + }, + { + "detail": "puts", + "function": "audit", + "kind": "hidden_io", + "line": 58, + "span": [ + 58, + 4, + 58, + 14 + ] + }, + { + "detail": "send", + "function": "audit", + "kind": "dynamic_dispatch", + "line": 59, + "span": [ + 59, + 4, + 59, + 23 + ] + }, + { + "detail": "warn", + "function": "process", + "kind": "hidden_io", + "line": 45, + "span": [ + 45, + 6, + 45, + 23 + ] + } + ], + "predicate_bodies": [ + { + "body": "@status == :ready", + "line": 72, + "name": "loaded?", + "owner": "RubySyntaxFactsCore", + "span": [ + 72, + 2, + 72, + 33 + ] + } + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/rust-core.json b/gems/decomplex/examples/syntax-facts/oracles/rust-core.json new file mode 100644 index 000000000..d8d73466b --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/rust-core.json @@ -0,0 +1,784 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/rust/core.rs", + "language": "rust", + "functions": [ + { + "line": 12, + "name": "new", + "owner": "RustSyntaxFactsCore", + "params": [ + "status" + ], + "span": [ + 12, + 4, + 14, + 5 + ], + "visibility": "public" + }, + { + "line": 16, + "name": "process", + "owner": "RustSyntaxFactsCore", + "params": [ + "&mut self", + "user", + "items", + "callback" + ], + "span": [ + 16, + 4, + 44, + 5 + ], + "visibility": "public" + }, + { + "line": 46, + "name": "audit", + "owner": "RustSyntaxFactsCore", + "params": [ + "&self", + "name" + ], + "span": [ + 46, + 4, + 50, + 5 + ], + "visibility": "private" + }, + { + "line": 52, + "name": "ready", + "owner": "RustSyntaxFactsCore", + "params": [ + "&self" + ], + "span": [ + 52, + 4, + 54, + 5 + ], + "visibility": "private" + } + ], + "owners": [ + { + "kind": "impl", + "line": 11, + "name": "RustSyntaxFactsCore", + "span": [ + 11, + 0, + 55, + 1 + ] + }, + { + "kind": "struct", + "line": 1, + "name": "RustSyntaxFactsCore", + "span": [ + 1, + 0, + 4, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "&account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 24, + "message": "callback", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 24, + 8, + 24, + 26 + ] + }, + { + "arguments": [ + "Status::Busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 34, + "message": "publish", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 12, + 34, + 38 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 36, + "message": "warn", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 36, + 12, + 36, + 34 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 48, + "message": "send", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 48, + 8, + 48, + 33 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 43, + "message": "Some", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 43, + 8, + 43, + 18 + ] + }, + { + "arguments": [ + "name.clone()", + "user.active()" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 23, + "message": "new", + "owner": "RustSyntaxFactsCore", + "receiver": "Account", + "safe_navigation": false, + "span": [ + 23, + 22, + 23, + 63 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 27, + "message": "escalate", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 27, + 41, + 27, + 60 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 28, + "message": "fallback", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 28, + 27, + 28, + 46 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 29, + "message": "default_case", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 29, + 17, + 29, + 40 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 49, + "message": "status", + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 49, + 8, + 49, + 21 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 22, + "message": "name", + "owner": "RustSyntaxFactsCore", + "receiver": "user.profile()", + "safe_navigation": false, + "span": [ + 22, + 19, + 22, + 40 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 22, + "message": "profile", + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 22, + 19, + 22, + 33 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 22, + "message": "to_string", + "owner": "RustSyntaxFactsCore", + "receiver": "user.profile().name()", + "safe_navigation": false, + "span": [ + 22, + 19, + 22, + 52 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 23, + "message": "active", + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 23, + 49, + 23, + 62 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 23, + "message": "clone", + "owner": "RustSyntaxFactsCore", + "receiver": "name", + "safe_navigation": false, + "span": [ + 23, + 35, + 23, + 47 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 26, + "message": "role", + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 26, + 14, + 26, + 25 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 32, + "message": "ready", + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "safe_navigation": false, + "span": [ + 32, + 50, + 32, + 62 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 40, + "message": "children", + "owner": "RustSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 40, + 12, + 40, + 27 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 23, + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "span": [ + 23, + 49, + 23, + 60 + ] + }, + { + "field": "children", + "function": "process", + "line": 40, + "owner": "RustSyntaxFactsCore", + "receiver": "item", + "span": [ + 40, + 12, + 40, + 25 + ] + }, + { + "field": "clone", + "function": "process", + "line": 23, + "owner": "RustSyntaxFactsCore", + "receiver": "name", + "span": [ + 23, + 35, + 23, + 45 + ] + }, + { + "field": "count", + "function": "ready", + "line": 53, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 53, + 8, + 53, + 18 + ] + }, + { + "field": "default_case", + "function": "process", + "line": 29, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 29, + 17, + 29, + 34 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 27, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 41, + 27, + 54 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 28, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 28, + 27, + 28, + 40 + ] + }, + { + "field": "name", + "function": "process", + "line": 22, + "owner": "RustSyntaxFactsCore", + "receiver": "user.profile()", + "span": [ + 22, + 19, + 22, + 38 + ] + }, + { + "field": "profile", + "function": "process", + "line": 22, + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "span": [ + 22, + 19, + 22, + 31 + ] + }, + { + "field": "publish", + "function": "process", + "line": 34, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 34, + 12, + 34, + 24 + ] + }, + { + "field": "ready", + "function": "process", + "line": 32, + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "span": [ + 32, + 50, + 32, + 60 + ] + }, + { + "field": "role", + "function": "process", + "line": 26, + "owner": "RustSyntaxFactsCore", + "receiver": "user", + "span": [ + 26, + 14, + 26, + 23 + ] + }, + { + "field": "send", + "function": "audit", + "line": 48, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 48, + 8, + 48, + 17 + ] + }, + { + "field": "status", + "function": "audit", + "line": 49, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 49, + 8, + 49, + 19 + ] + }, + { + "field": "to_string", + "function": "process", + "line": 22, + "owner": "RustSyntaxFactsCore", + "receiver": "user.profile().name()", + "span": [ + 22, + 19, + 22, + 50 + ] + }, + { + "field": "warn", + "function": "process", + "line": 36, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 36, + 12, + 36, + 21 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 33, + "owner": "RustSyntaxFactsCore", + "receiver": "self", + "span": [ + 33, + 12, + 33, + 27 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 26, + 8, + 30, + 9 + ], + "function": "process", + "kind": "case_dispatch", + "line": 26, + "members": [ + "Role::Guest", + "Role::Owner | Role::Admin" + ], + "predicate": "user.role()", + "span": [ + 26, + 8, + 30, + 9 + ] + }, + { + "enclosing_span": [ + 32, + 8, + 37, + 9 + ], + "function": "process", + "kind": "conjunction", + "line": 32, + "members": [ + "matches!(self.status, Status::Idle)", + "user.ready()" + ], + "predicate": "matches!(self.status, Status::Idle) && user.ready()", + "span": [ + 32, + 11, + 32, + 62 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 26, + "predicate": "user.role()", + "span": [ + 26, + 8, + 30, + 9 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 32, + "predicate": "matches!(self.status, Status::Idle) && user.ready()", + "span": [ + 32, + 8, + 37, + 9 + ], + "state_refs": [ + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "callback", + "function": "process", + "kind": "callback_inversion", + "line": 24, + "span": [ + 24, + 8, + 24, + 26 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/swift-core.json b/gems/decomplex/examples/syntax-facts/oracles/swift-core.json new file mode 100644 index 000000000..fec3e06d3 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/swift-core.json @@ -0,0 +1,595 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/swift/core.swift", + "language": "swift", + "functions": [ + { + "line": 16, + "name": "process", + "owner": "SwiftSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 16, + 2, + 42, + 3 + ], + "visibility": null + }, + { + "line": 44, + "name": "audit", + "owner": "SwiftSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 44, + 2, + 48, + 3 + ], + "visibility": "private" + }, + { + "line": 50, + "name": "ready", + "owner": "SwiftSyntaxFactsCore", + "params": [ + + ], + "span": [ + 50, + 2, + 52, + 3 + ], + "visibility": null + } + ], + "owners": [ + { + "kind": "class", + "line": 1, + "name": "Status", + "span": [ + 1, + 0, + 4, + 1 + ] + }, + { + "kind": "class", + "line": 6, + "name": "SwiftSyntaxFactsCore", + "span": [ + 6, + 0, + 53, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + ".busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 32, + "message": "publish", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 32, + 6, + 32, + 25 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 34, + "message": "print", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 6, + 34, + 24 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 46, + "message": "send", + "owner": "SwiftSyntaxFactsCore", + "receiver": "sink", + "safe_navigation": false, + "span": [ + 46, + 4, + 46, + 29 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 19, + "message": "callback", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 4, + 19, + 21 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 45, + "message": "print", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 45, + 4, + 45, + 15 + ] + }, + { + "arguments": [ + "name: name", + "active: user.active" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 18, + "message": "Account", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 18, + 18, + 18, + 58 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 23, + "message": "escalate", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 23, + 6, + 23, + 25 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 25, + "message": "fallback", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 25, + 6, + 25, + 19 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 27, + "message": "defaultCase", + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 27, + 6, + 27, + 28 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 38, + "message": "children", + "owner": "SwiftSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 38, + 6, + 38, + 21 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 18, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 18, + 46, + 18, + 57 + ] + }, + { + "field": "children", + "function": "process", + "line": 38, + "owner": "SwiftSyntaxFactsCore", + "receiver": "item", + "span": [ + 38, + 6, + 38, + 19 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 27, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 27, + 6, + 27, + 22 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 23, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 23, + 6, + 23, + 19 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 25, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 25, + 6, + 25, + 19 + ] + }, + { + "field": "name", + "function": "process", + "line": 17, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 17, + 15, + 17, + 33 + ] + }, + { + "field": "profile", + "function": "process", + "line": 17, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 15, + 17, + 27 + ] + }, + { + "field": "publish", + "function": "process", + "line": 32, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 32, + 6, + 32, + 18 + ] + }, + { + "field": "ready", + "function": "process", + "line": 30, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 30, + 31, + 30, + 41 + ] + }, + { + "field": "role", + "function": "process", + "line": 21, + "owner": "SwiftSyntaxFactsCore", + "receiver": "user", + "span": [ + 21, + 11, + 21, + 20 + ] + }, + { + "field": "send", + "function": "audit", + "line": 46, + "owner": "SwiftSyntaxFactsCore", + "receiver": "sink", + "span": [ + 46, + 4, + 46, + 13 + ] + }, + { + "field": "status", + "function": "process", + "line": 30, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 30, + 7, + 30, + 18 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 31, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 31, + 6, + 31, + 21 + ] + }, + { + "field": "sink", + "function": "(top-level)", + "line": 13, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 13, + 4, + 13, + 20 + ] + }, + { + "field": "status", + "function": "(top-level)", + "line": 12, + "owner": "SwiftSyntaxFactsCore", + "receiver": "self", + "span": [ + 12, + 4, + 12, + 24 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 21, + 4, + 28, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 21, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 21, + 4, + 28, + 5 + ] + }, + { + "enclosing_span": [ + 30, + 4, + 35, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 30, + "members": [ + ".idle", + "user.ready" + ], + "predicate": ".idle && user.ready", + "span": [ + 30, + 22, + 30, + 41 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 21, + "predicate": "user.role", + "span": [ + 21, + 4, + 28, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 30, + "predicate": "self.status == .idle && user.ready", + "span": [ + 30, + 4, + 35, + 5 + ], + "state_refs": [ + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "print", + "function": "audit", + "kind": "hidden_io", + "line": 45, + "span": [ + 45, + 4, + 45, + 15 + ] + }, + { + "detail": "print", + "function": "process", + "kind": "hidden_io", + "line": 34, + "span": [ + 34, + 6, + 34, + 24 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/typescript-core.json b/gems/decomplex/examples/syntax-facts/oracles/typescript-core.json new file mode 100644 index 000000000..e8d47251d --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/typescript-core.json @@ -0,0 +1,646 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/typescript/core.ts", + "language": "typescript", + "functions": [ + { + "line": 19, + "name": "constructor", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + "status", + "sink" + ], + "span": [ + 19, + 2, + 21, + 3 + ], + "visibility": "public" + }, + { + "line": 23, + "name": "process", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + "user", + "items", + "callback" + ], + "span": [ + 23, + 2, + 52, + 3 + ], + "visibility": "public" + }, + { + "line": 54, + "name": "audit", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + "name" + ], + "span": [ + 54, + 2, + 58, + 3 + ], + "visibility": "private" + }, + { + "line": 60, + "name": "ready", + "owner": "TypeScriptSyntaxFactsCore", + "params": [ + + ], + "span": [ + 60, + 2, + 62, + 3 + ], + "visibility": "public" + }, + { + "line": 65, + "name": "normalizeValue", + "owner": "core", + "params": [ + "input" + ], + "span": [ + 65, + 7, + 67, + 1 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "class", + "line": 15, + "name": "TypeScriptSyntaxFactsCore", + "span": [ + 15, + 7, + 63, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + "\"busy\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 42, + "message": "publish", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 42, + 6, + 42, + 26 + ] + }, + { + "arguments": [ + "\"not ready\"" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 44, + "message": "warn", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 44, + 6, + 44, + 31 + ] + }, + { + "arguments": [ + "\"record\"", + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 56, + "message": "send", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "this.sink", + "safe_navigation": false, + "span": [ + 56, + 4, + 56, + 34 + ] + }, + { + "arguments": [ + "account" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 26, + "message": "callback", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 26, + 4, + 26, + 21 + ] + }, + { + "arguments": [ + "items[index]" + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 48, + "message": "audit", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 48, + 6, + 48, + 30 + ] + }, + { + "arguments": [ + "name" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "audit", + "line": 55, + "message": "log", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "safe_navigation": false, + "span": [ + 55, + 4, + 55, + 21 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 31, + "message": "escalate", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 31, + 8, + 31, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 34, + "message": "fallback", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 34, + 8, + 34, + 27 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 37, + "message": "defaultCase", + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 37, + 8, + 37, + 30 + ] + } + ], + "state_reads": [ + { + "field": "active", + "function": "process", + "line": 25, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 25, + 45, + 25, + 56 + ] + }, + { + "field": "audit", + "function": "process", + "line": 48, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 48, + 6, + 48, + 16 + ] + }, + { + "field": "count", + "function": "ready", + "line": 61, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 61, + 11, + 61, + 21 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 37, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 37, + 8, + 37, + 24 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 31, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 31, + 8, + 31, + 21 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 34, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 34, + 8, + 34, + 21 + ] + }, + { + "field": "log", + "function": "audit", + "line": 55, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 55, + 4, + 55, + 15 + ] + }, + { + "field": "name", + "function": "process", + "line": 24, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 24, + 17, + 24, + 35 + ] + }, + { + "field": "profile", + "function": "process", + "line": 24, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 24, + 17, + 24, + 29 + ] + }, + { + "field": "publish", + "function": "process", + "line": 42, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 42, + 6, + 42, + 18 + ] + }, + { + "field": "ready", + "function": "process", + "line": 40, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 40, + 34, + 40, + 44 + ] + }, + { + "field": "role", + "function": "process", + "line": 28, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "user", + "span": [ + 28, + 12, + 28, + 21 + ] + }, + { + "field": "send", + "function": "audit", + "line": 56, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "this.sink", + "span": [ + 56, + 4, + 56, + 18 + ] + }, + { + "field": "sink", + "function": "audit", + "line": 56, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 56, + 4, + 56, + 13 + ] + }, + { + "field": "status", + "function": "audit", + "line": 57, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 57, + 11, + 57, + 22 + ] + }, + { + "field": "status", + "function": "process", + "line": 40, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 40, + 8, + 40, + 19 + ] + }, + { + "field": "warn", + "function": "process", + "line": 44, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "console", + "span": [ + 44, + 6, + 44, + 18 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "process", + "line": 41, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 41, + 6, + 41, + 21 + ] + }, + { + "field": "status", + "function": "constructor", + "line": 20, + "owner": "TypeScriptSyntaxFactsCore", + "receiver": "self", + "span": [ + 20, + 4, + 20, + 24 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 28, + 4, + 38, + 5 + ], + "function": "process", + "kind": "case_dispatch", + "line": 28, + "members": [ + "\"admin\"", + "\"guest\"", + "\"owner\"" + ], + "predicate": "user.role", + "span": [ + 28, + 4, + 38, + 5 + ] + }, + { + "enclosing_span": [ + 40, + 4, + 45, + 5 + ], + "function": "process", + "kind": "conjunction", + "line": 40, + "members": [ + "this.status === \"idle\"", + "user.ready" + ], + "predicate": "this.status === \"idle\" && user.ready", + "span": [ + 40, + 8, + 40, + 44 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 28, + "predicate": "(user.role)", + "span": [ + 28, + 4, + 38, + 5 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 40, + "predicate": "(this.status === \"idle\" && user.ready)", + "span": [ + 40, + 4, + 45, + 5 + ], + "state_refs": [ + "this.status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + { + "detail": "console.log", + "function": "audit", + "kind": "hidden_io", + "line": 55, + "span": [ + 55, + 4, + 55, + 21 + ] + }, + { + "detail": "console.warn", + "function": "process", + "kind": "hidden_io", + "line": 44, + "span": [ + 44, + 6, + 44, + 31 + ] + } + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/oracles/zig-core.json b/gems/decomplex/examples/syntax-facts/oracles/zig-core.json new file mode 100644 index 000000000..9518741c5 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/oracles/zig-core.json @@ -0,0 +1,572 @@ +{ + "format": "decomplex.syntax-facts.v1", + "documents": [ + { + "file": "gems/decomplex/examples/syntax-facts/zig/core.zig", + "language": "zig", + "functions": [ + { + "line": 11, + "name": "process", + "owner": "ZigSyntaxFactsCore", + "params": [ + "self", + "user", + "items", + "callback" + ], + "span": [ + 11, + 4, + 36, + 5 + ], + "visibility": "public" + }, + { + "line": 38, + "name": "audit", + "owner": "ZigSyntaxFactsCore", + "params": [ + "self", + "name" + ], + "span": [ + 38, + 4, + 41, + 5 + ], + "visibility": "private" + }, + { + "line": 43, + "name": "ready", + "owner": "ZigSyntaxFactsCore", + "params": [ + "self" + ], + "span": [ + 43, + 4, + 45, + 5 + ], + "visibility": "private" + }, + { + "line": 7, + "name": "init", + "owner": "ZigSyntaxFactsCore", + "params": [ + "status" + ], + "span": [ + 7, + 4, + 9, + 5 + ], + "visibility": "public" + } + ], + "owners": [ + { + "kind": "struct", + "line": 3, + "name": "ZigSyntaxFactsCore", + "span": [ + 3, + 31, + 46, + 1 + ] + }, + { + "kind": "struct", + "line": 53, + "name": "Item", + "span": [ + 53, + 13, + 55, + 1 + ] + } + ], + "calls": [ + { + "arguments": [ + ".busy" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 25, + "message": "publish", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 25, + 12, + 25, + 31 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": false, + "control": "always", + "function": "process", + "line": 15, + "message": "callback", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 15, + 8, + 15, + 22 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 18, + "message": "escalate", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 18, + 30, + 18, + 49 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 19, + "message": "fallback", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 19, + 22, + 19, + 41 + ] + }, + { + "arguments": [ + "user" + ], + "block": false, + "conditional": true, + "control": "conditional", + "function": "process", + "line": 20, + "message": "defaultCase", + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "safe_navigation": false, + "span": [ + 20, + 20, + 20, + 42 + ] + }, + { + "arguments": [ + + ], + "block": false, + "conditional": true, + "control": "iterates", + "function": "process", + "line": 31, + "message": "children", + "owner": "ZigSyntaxFactsCore", + "receiver": "item", + "safe_navigation": false, + "span": [ + 31, + 16, + 31, + 31 + ] + } + ], + "state_reads": [ + { + "field": "admin", + "function": "process", + "line": 18, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", + "span": [ + 18, + 20, + 18, + 26 + ] + }, + { + "field": "busy", + "function": "process", + "line": 25, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", + "span": [ + 25, + 25, + 25, + 30 + ] + }, + { + "field": "children", + "function": "process", + "line": 31, + "owner": "ZigSyntaxFactsCore", + "receiver": "item", + "span": [ + 31, + 16, + 31, + 29 + ] + }, + { + "field": "count", + "function": "ready", + "line": 44, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 44, + 15, + 44, + 25 + ] + }, + { + "field": "defaultCase", + "function": "process", + "line": 20, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 20, + 20, + 20, + 36 + ] + }, + { + "field": "escalate", + "function": "process", + "line": 18, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 18, + 30, + 18, + 43 + ] + }, + { + "field": "fallback", + "function": "process", + "line": 19, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 19, + 22, + 19, + 35 + ] + }, + { + "field": "guest", + "function": "process", + "line": 19, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", + "span": [ + 19, + 12, + 19, + 18 + ] + }, + { + "field": "idle", + "function": "process", + "line": 23, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", + "span": [ + 23, + 27, + 23, + 32 + ] + }, + { + "field": "name", + "function": "process", + "line": 12, + "owner": "ZigSyntaxFactsCore", + "receiver": "user.profile", + "span": [ + 12, + 21, + 12, + 38 + ] + }, + { + "field": "owner", + "function": "process", + "line": 18, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", + "span": [ + 18, + 12, + 18, + 18 + ] + }, + { + "field": "profile", + "function": "process", + "line": 12, + "owner": "ZigSyntaxFactsCore", + "receiver": "user", + "span": [ + 12, + 21, + 12, + 33 + ] + }, + { + "field": "publish", + "function": "process", + "line": 25, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 25, + 12, + 25, + 24 + ] + }, + { + "field": "ready", + "function": "process", + "line": 23, + "owner": "ZigSyntaxFactsCore", + "receiver": "user", + "span": [ + 23, + 37, + 23, + 47 + ] + }, + { + "field": "role", + "function": "process", + "line": 17, + "owner": "ZigSyntaxFactsCore", + "receiver": "user", + "span": [ + 17, + 16, + 17, + 25 + ] + }, + { + "field": "status", + "function": "audit", + "line": 40, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 40, + 12, + 40, + 23 + ] + }, + { + "field": "status", + "function": "process", + "line": 23, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 23, + 12, + 23, + 23 + ] + } + ], + "state_writes": [ + { + "field": "count", + "function": "init", + "line": 8, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", + "span": [ + 8, + 53, + 8, + 63 + ] + }, + { + "field": "count", + "function": "process", + "line": 24, + "owner": "ZigSyntaxFactsCore", + "receiver": "self", + "span": [ + 24, + 12, + 24, + 22 + ] + }, + { + "field": "status", + "function": "init", + "line": 8, + "owner": "ZigSyntaxFactsCore", + "receiver": ".literal", + "span": [ + 8, + 35, + 8, + 51 + ] + } + ], + "decisions": [ + { + "enclosing_span": [ + 17, + 8, + 21, + 9 + ], + "function": "process", + "kind": "case_dispatch", + "line": 17, + "members": [ + ".guest", + ".owner" + ], + "predicate": "user.role", + "span": [ + 17, + 8, + 21, + 9 + ] + }, + { + "enclosing_span": [ + 23, + 8, + 28, + 9 + ], + "function": "process", + "kind": "conjunction", + "line": 23, + "members": [ + "self.status == .idle", + "user.ready" + ], + "predicate": "self.status == .idle and user.ready", + "span": [ + 23, + 12, + 23, + 47 + ] + } + ], + "branch_decisions": [ + { + "function": "process", + "line": 17, + "predicate": "user.role", + "span": [ + 17, + 8, + 21, + 9 + ], + "state_refs": [ + "user.role" + ] + }, + { + "function": "process", + "line": 23, + "predicate": "self.status == .idle and user.ready", + "span": [ + 23, + 8, + 28, + 9 + ], + "state_refs": [ + ".literal.idle", + "status", + "user.ready" + ] + } + ], + "dispatch_sites": [ + + ], + "semantic_effects": [ + + ], + "predicate_bodies": [ + + ] + } + ] +} diff --git a/gems/decomplex/examples/syntax-facts/php/core.php b/gems/decomplex/examples/syntax-facts/php/core.php new file mode 100644 index 000000000..ed925a380 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/php/core.php @@ -0,0 +1,60 @@ +status = $status; + $this->sink = $sink; + } + + public function process($user, array $items, callable $callback): ?string + { + $name = $user?->profile?->name; + $account = new Account($name, $user->active); + $callback($account); + + switch ($user->role) { + case "owner": + case "admin": + $this->escalate($user); + break; + case "guest": + $this->fallback($user); + break; + default: + $this->defaultCase($user); + break; + } + + if ($this->status === "idle" && $user->ready) { + $this->count += 1; + $this->publish("busy"); + } else { + print "not ready"; + } + + foreach ($items as $item) { + $item->children(); + } + + return $name ?? null; + } + + private function audit(string $name): string + { + print($name); + $this->sink->send("record", $name); + return $this->status; + } + + public function ready(): bool + { + return $this->count > 0; + } +} + diff --git a/gems/decomplex/examples/syntax-facts/python/core.py b/gems/decomplex/examples/syntax-facts/python/core.py new file mode 100644 index 000000000..5ef1ef474 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/python/core.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import os.path + + +class PythonSyntaxFactsCore: + def __init__(self, lock, resource): + self._lock = lock + self.resource = resource + self.count = 0 + + def process(self, user: "User", items: list[str], callback): + name: str = user.profile.name + pending: str + result = [] + marker = "\\n" + + with self._lock: + self.count += 1 + + with open(user.path) as handle: + data = handle.read() + + for item in items: + if item is None: + continue + + if user.ready and item.startswith("x"): + callback(item) + + match item: + case "owner" | "admin": + self.escalate(user) + case _: + self.default(user) + + result.append(item) + + index = 0 + while index < len(result): + if result[index] == "stop": + break + + try: + self.audit(result[index]) + except ValueError: + continue + + index += 1 + + assert result + return data if result else marker + + def _normalize(self, value: str | None = None): + cleaned = value.strip() if value is not None else "missing" + return cleaned + + def generator(self, values): + for value in values: + yield value + + def simple_with(self, resource): + with resource: + pass + diff --git a/gems/decomplex/examples/syntax-facts/ruby/core.rb b/gems/decomplex/examples/syntax-facts/ruby/core.rb new file mode 100644 index 000000000..0115a0e2f --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/ruby/core.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +class Account < T::Struct + const :name, String + prop :active, T::Boolean +end + +class RubySyntaxFactsCore + ADMIN_ROLES = %w[owner admin].freeze + Status = T.type_alias { Symbol } + + attr_reader :count + + def self.build(source) + new(source) + end + + sig { params(source: Object).void } + def initialize(source) + @source = source + @count = T.let(0, Integer) + @status = T.let(:idle, Status) + end + + sig { params(user: Object, items: Array, callback: Proc).returns(Symbol) } + def process(user, items, callback) + name = user&.profile&.name + account = Account.new(name: name, active: user.active?) + audit(name) + callback.(account) + + case user.role + when "owner", *ADMIN_ROLES + escalate(user) + when nil + fallback(user) + else + default(user) + end + + if @status == :idle && user.ready? + @count += 1 + publish(:ready) + else + warn("not ready") + end + + items.flat_map do |item| + item.children + end + + @status + end + + private + + def audit(name) + puts(name) + send(:record, name) + $GLOBAL_STATE + @source + end + + private def inline_private(value) + helper(value) + end + + def ready? + @count > 0 + end + + def loaded? = @status == :ready +end diff --git a/gems/decomplex/examples/syntax-facts/rust/core.rs b/gems/decomplex/examples/syntax-facts/rust/core.rs new file mode 100644 index 000000000..461089b93 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/rust/core.rs @@ -0,0 +1,56 @@ +pub struct RustSyntaxFactsCore { + status: Status, + count: usize, +} + +pub enum Status { + Idle, + Busy, +} + +impl RustSyntaxFactsCore { + pub fn new(status: Status) -> Self { + Self { status, count: 0 } + } + + pub fn process( + &mut self, + user: &User, + items: Vec, + callback: fn(&Account), + ) -> Option { + let name = user.profile().name().to_string(); + let account = Account::new(name.clone(), user.active()); + callback(&account); + + match user.role() { + Role::Owner | Role::Admin => self.escalate(user), + Role::Guest => self.fallback(user), + _ => self.default_case(user), + } + + if matches!(self.status, Status::Idle) && user.ready() { + self.count += 1; + self.publish(Status::Busy); + } else { + self.warn("not ready"); + } + + for item in items { + item.children(); + } + + Some(name) + } + + fn audit(&self, name: &str) { + println!("{}", name); + self.send("record", name); + self.status(); + } + + fn ready(&self) -> bool { + self.count > 0 + } +} + diff --git a/gems/decomplex/examples/syntax-facts/swift/core.swift b/gems/decomplex/examples/syntax-facts/swift/core.swift new file mode 100644 index 000000000..769e94e4d --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/swift/core.swift @@ -0,0 +1,54 @@ +enum Status { + case idle + case busy +} + +class SwiftSyntaxFactsCore { + private var status: Status + private var count = 0 + private let sink: Sink + + init(status: Status, sink: Sink) { + self.status = status + self.sink = sink + } + + func process(user: User, items: [Item], callback: (Account) -> Void) -> String? { + let name = user.profile?.name + let account = Account(name: name, active: user.active) + callback(account) + + switch user.role { + case "owner", "admin": + self.escalate(user) + case "guest": + self.fallback(user) + default: + self.defaultCase(user) + } + + if self.status == .idle && user.ready { + self.count += 1 + self.publish(.busy) + } else { + print("not ready") + } + + for item in items { + item.children() + } + + return name ?? "missing" + } + + private func audit(name: String) -> Status { + print(name) + sink.send("record", name) + return status + } + + func ready() -> Bool { + return count > 0 + } +} + diff --git a/gems/decomplex/examples/syntax-facts/typescript/core.ts b/gems/decomplex/examples/syntax-facts/typescript/core.ts new file mode 100644 index 000000000..1e1c6fdbb --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/typescript/core.ts @@ -0,0 +1,72 @@ +type Status = "idle" | "busy"; + +interface User { + role: string; + ready: boolean; + active: boolean; + profile?: { name?: string }; +} + +interface Account { + name: string | undefined; + active: boolean; +} + +export class TypeScriptSyntaxFactsCore { + private status: Status; + private count = 0; + + constructor(status: Status, private sink: Sink) { + this.status = status; + } + + process(user: User, items: string[], callback: (account: Account) => void): string | undefined { + const name = user.profile?.name; + const account: Account = { name, active: user.active }; + callback(account); + + switch (user.role) { + case "owner": + case "admin": + this.escalate(user); + break; + case "guest": + this.fallback(user); + break; + default: + this.defaultCase(user); + } + + if (this.status === "idle" && user.ready) { + this.count += 1; + this.publish("busy"); + } else { + console.warn("not ready"); + } + + for (const index in items) { + this.audit(items[index]); + } + + return name ?? undefined; + } + + private audit(name: string): Status { + console.log(name); + this.sink.send("record", name); + return this.status; + } + + ready(): boolean { + return this.count > 0; + } +} + +export function normalizeValue(input?: string): string | undefined { + return input ?? undefined; +} + +interface Sink { + send(kind: string, value: string): void; +} + diff --git a/gems/decomplex/examples/syntax-facts/zig/core.zig b/gems/decomplex/examples/syntax-facts/zig/core.zig new file mode 100644 index 000000000..af8fc2b74 --- /dev/null +++ b/gems/decomplex/examples/syntax-facts/zig/core.zig @@ -0,0 +1,56 @@ +const std = @import("std"); + +pub const ZigSyntaxFactsCore = struct { + status: Status, + count: usize, + + pub fn init(status: Status) ZigSyntaxFactsCore { + return ZigSyntaxFactsCore{ .status = status, .count = 0 }; + } + + pub fn process(self: *ZigSyntaxFactsCore, user: anytype, items: []const Item, callback: anytype) ?[]const u8 { + const name = user.profile.name; + var result: ?[]const u8 = null; + + callback(user); + + switch (user.role) { + .owner, .admin => self.escalate(user), + .guest => self.fallback(user), + else => self.defaultCase(user), + } + + if (self.status == .idle and user.ready) { + self.count += 1; + self.publish(.busy); + } else { + std.debug.print("not ready", .{}); + } + + for (items) |item| { + _ = item.children(); + } + + result = name; + return result; + } + + fn audit(self: *ZigSyntaxFactsCore, name: []const u8) void { + std.debug.print("{s}", .{name}); + _ = self.status; + } + + fn ready(self: *ZigSyntaxFactsCore) bool { + return self.count > 0; + } +}; + +const Status = enum { + idle, + busy, +}; + +const Item = struct { + value: []const u8, +}; + diff --git a/gems/decomplex/examples/typescript/co-update.ts b/gems/decomplex/examples/typescript/co-update.ts new file mode 100644 index 000000000..7ced4120f --- /dev/null +++ b/gems/decomplex/examples/typescript/co-update.ts @@ -0,0 +1,4 @@ +function stable_one(node: Node) { node.storage = 1; node.provenance = 1; } +function stable_two(node: Node) { node.storage = 1; node.provenance = 1; } +function stable_three(node: Node) { node.storage = 1; node.provenance = 1; } +function misses_provenance(node: Node) { node.storage = 1; } diff --git a/gems/decomplex/examples/typescript/decision-pressure.ts b/gems/decomplex/examples/typescript/decision-pressure.ts new file mode 100644 index 000000000..3356de7d5 --- /dev/null +++ b/gems/decomplex/examples/typescript/decision-pressure.ts @@ -0,0 +1 @@ +function scan(node: Node): boolean { const value = node.symbol; return value.isNull(); } diff --git a/gems/decomplex/examples/typescript/derived-state.ts b/gems/decomplex/examples/typescript/derived-state.ts new file mode 100644 index 000000000..de02aa6d3 --- /dev/null +++ b/gems/decomplex/examples/typescript/derived-state.ts @@ -0,0 +1 @@ +function check(input: number) { const cached = input + 1; input = 2; print(cached); } diff --git a/gems/decomplex/examples/typescript/false-simplicity.ts b/gems/decomplex/examples/typescript/false-simplicity.ts new file mode 100644 index 000000000..4bd9d8e03 --- /dev/null +++ b/gems/decomplex/examples/typescript/false-simplicity.ts @@ -0,0 +1 @@ +class FalseSimplicityExample { hack() { print("hidden IO"); } } diff --git a/gems/decomplex/examples/typescript/fat-union.ts b/gems/decomplex/examples/typescript/fat-union.ts new file mode 100644 index 000000000..68ee3bff9 --- /dev/null +++ b/gems/decomplex/examples/typescript/fat-union.ts @@ -0,0 +1 @@ +function handle(node: Node) { switch (node) { case AST.Call: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.recv(); break; case AST.Func: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.name(); break; case AST.Lit: node.line(); node.col(); node.ty(); node.span(); node.parent(); node.value(); break; } } diff --git a/gems/decomplex/examples/typescript/flay-similarity.ts b/gems/decomplex/examples/typescript/flay-similarity.ts new file mode 100644 index 000000000..b838287c8 --- /dev/null +++ b/gems/decomplex/examples/typescript/flay-similarity.ts @@ -0,0 +1,2 @@ +function first_clone(node: Node): number { let total = 0; const value1 = node.part1; if (value1.ready() && value1.enabled()) { total += value1.amount; } const value2 = node.part2; if (value2.ready() && value2.enabled()) { total += value2.amount; } const value3 = node.part3; if (value3.ready() && value3.enabled()) { total += value3.amount; } const value4 = node.part4; if (value4.ready() && value4.enabled()) { total += value4.amount; } const value5 = node.part5; if (value5.ready() && value5.enabled()) { total += value5.amount; } const value6 = node.part6; if (value6.ready() && value6.enabled()) { total += value6.amount; } const value7 = node.part7; if (value7.ready() && value7.enabled()) { total += value7.amount; } const value8 = node.part8; if (value8.ready() && value8.enabled()) { total += value8.amount; } return total; } +function second_clone(entry: Node): number { let total = 0; const item1 = entry.part1; if (item1.ready() && item1.enabled()) { total += item1.amount; } const item2 = entry.part2; if (item2.ready() && item2.enabled()) { total += item2.amount; } const item3 = entry.part3; if (item3.ready() && item3.enabled()) { total += item3.amount; } const item4 = entry.part4; if (item4.ready() && item4.enabled()) { total += item4.amount; } const item5 = entry.part5; if (item5.ready() && item5.enabled()) { total += item5.amount; } const item6 = entry.part6; if (item6.ready() && item6.enabled()) { total += item6.amount; } const item7 = entry.part7; if (item7.ready() && item7.enabled()) { total += item7.amount; } const item8 = entry.part8; if (item8.ready() && item8.enabled()) { total += item8.amount; } return total; } diff --git a/gems/decomplex/examples/typescript/function-lcom.ts b/gems/decomplex/examples/typescript/function-lcom.ts new file mode 100644 index 000000000..42c2f141e --- /dev/null +++ b/gems/decomplex/examples/typescript/function-lcom.ts @@ -0,0 +1,12 @@ +function mixed(price: number, tax: number, logger: Logger) { + const subtotal = price + tax; + const total = subtotal * 2; + const rounded = total.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/typescript/implicit-control-flow.ts b/gems/decomplex/examples/typescript/implicit-control-flow.ts new file mode 100644 index 000000000..7878c7cf7 --- /dev/null +++ b/gems/decomplex/examples/typescript/implicit-control-flow.ts @@ -0,0 +1 @@ +class FlowExample { prepare() { this.status = 1; } validate() { this.valid = this.status == 1; } commit() { this.done = this.valid; } ok1() { this.prepare(); this.validate(); this.commit(); } ok2() { this.prepare(); this.validate(); this.commit(); } ok3() { this.prepare(); this.validate(); this.commit(); } ok4() { this.prepare(); this.validate(); this.commit(); } drift() { this.validate(); this.prepare(); this.commit(); } } diff --git a/gems/decomplex/examples/typescript/inconsistent-rename-clone.ts b/gems/decomplex/examples/typescript/inconsistent-rename-clone.ts new file mode 100644 index 000000000..67430611b --- /dev/null +++ b/gems/decomplex/examples/typescript/inconsistent-rename-clone.ts @@ -0,0 +1,2 @@ +function original() { const src = fetch(1); check(src); store(src); finalize(src); } +function pasted() { const dst = fetch(2); check(dst); store(src); finalize(dst); } diff --git a/gems/decomplex/examples/typescript/local-flow.ts b/gems/decomplex/examples/typescript/local-flow.ts new file mode 100644 index 000000000..65c38cad3 --- /dev/null +++ b/gems/decomplex/examples/typescript/local-flow.ts @@ -0,0 +1,9 @@ +function mixed(price: number, tax: number) { + const subtotal = price + tax; + const total = subtotal.round(); + + const timestamp = now(); + const buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/typescript/locality-drag.ts b/gems/decomplex/examples/typescript/locality-drag.ts new file mode 100644 index 000000000..581fc1db1 --- /dev/null +++ b/gems/decomplex/examples/typescript/locality-drag.ts @@ -0,0 +1,27 @@ +function run(user: User, cart: Cart, logger: Logger) { + const receipt_id = user.id; + + const total = cart.total; + if (total > 100) { + if (cart.discountable()) { + const discount = 10; + } + } + if (cart.taxable()) { + if (cart.region) { + const tax = total * 2; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + const status = 1; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/typescript/miner.ts b/gems/decomplex/examples/typescript/miner.ts new file mode 100644 index 000000000..384e6c1ba --- /dev/null +++ b/gems/decomplex/examples/typescript/miner.ts @@ -0,0 +1,4 @@ +function one(a: boolean,b: boolean,c: boolean): boolean { return a && b && c; } +function two(a: boolean,b: boolean,c: boolean): boolean { return a && b && c; } +function three(a: boolean,b: boolean,c: boolean): boolean { return a && b && c; } +function broken(a: boolean,b: boolean): boolean { return a && b; } diff --git a/gems/decomplex/examples/typescript/operational-discontinuity.ts b/gems/decomplex/examples/typescript/operational-discontinuity.ts new file mode 100644 index 000000000..629c5e9a2 --- /dev/null +++ b/gems/decomplex/examples/typescript/operational-discontinuity.ts @@ -0,0 +1,9 @@ +function phase_shift() { + const a = 1; + const b = 2; + + // Phase 2 + const x = 3; + const y = 4; + print(x); print(y); +} diff --git a/gems/decomplex/examples/typescript/oversized-predicate.ts b/gems/decomplex/examples/typescript/oversized-predicate.ts new file mode 100644 index 000000000..0563b6ef1 --- /dev/null +++ b/gems/decomplex/examples/typescript/oversized-predicate.ts @@ -0,0 +1 @@ +function complex_check(a: boolean,b: boolean,c: boolean,d: boolean) { if (a && b && c && d) { print("too big"); } } diff --git a/gems/decomplex/examples/typescript/path-condition.ts b/gems/decomplex/examples/typescript/path-condition.ts new file mode 100644 index 000000000..72a739b13 --- /dev/null +++ b/gems/decomplex/examples/typescript/path-condition.ts @@ -0,0 +1,4 @@ +function one(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x); } } +function two(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x); } } +function three(x: X,y: Y,z: Z) { if (x.p() && y.q() && z.r()) { go(x); } } +function bug(x: X,y: Y,z: Z) { if (x.p() && y.q()) { go(x); } } diff --git a/gems/decomplex/examples/typescript/predicate-alias.ts b/gems/decomplex/examples/typescript/predicate-alias.ts new file mode 100644 index 000000000..6d32d956a --- /dev/null +++ b/gems/decomplex/examples/typescript/predicate-alias.ts @@ -0,0 +1,3 @@ +function first(): boolean { return true; } +function second(): boolean { return true; } +function other(): boolean { return false; } diff --git a/gems/decomplex/examples/typescript/redundant-nil-guard.ts b/gems/decomplex/examples/typescript/redundant-nil-guard.ts new file mode 100644 index 000000000..ab1fcae19 --- /dev/null +++ b/gems/decomplex/examples/typescript/redundant-nil-guard.ts @@ -0,0 +1 @@ +function check(value: Value) { if (value.isSome()) { value.isNull(); } } diff --git a/gems/decomplex/examples/typescript/semantic-alias.ts b/gems/decomplex/examples/typescript/semantic-alias.ts new file mode 100644 index 000000000..9161d9f3c --- /dev/null +++ b/gems/decomplex/examples/typescript/semantic-alias.ts @@ -0,0 +1,4 @@ +function frame(node: Node): boolean { return node.provenance == FRAME; } +function is_frame(node: Node): boolean { return provenance == FRAME; } +function heap(node: Node): boolean { return node.provenance == HEAP; } +function somewhere(node: Node): number { if (node.provenance == FRAME) { return 1; } return 0; } diff --git a/gems/decomplex/examples/typescript/sequence-mine.ts b/gems/decomplex/examples/typescript/sequence-mine.ts new file mode 100644 index 000000000..72ee4959a --- /dev/null +++ b/gems/decomplex/examples/typescript/sequence-mine.ts @@ -0,0 +1,5 @@ +function one() { alloc_mark(x); body1(); cleanup(x); } +function two() { alloc_mark(y); body2(); cleanup(y); } +function three() { alloc_mark(z); body3(); cleanup(z); } +function four() { alloc_mark(w); body4(); cleanup(w); } +function leak() { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/typescript/state-branch-density.ts b/gems/decomplex/examples/typescript/state-branch-density.ts new file mode 100644 index 000000000..57427b860 --- /dev/null +++ b/gems/decomplex/examples/typescript/state-branch-density.ts @@ -0,0 +1 @@ +class StateBranchChecker { check(admin: boolean, name: string) { if (admin) { this.checked = true; } if (this.checked && name == "admin") { print("hello"); } } } diff --git a/gems/decomplex/examples/typescript/state-mesh.ts b/gems/decomplex/examples/typescript/state-mesh.ts new file mode 100644 index 000000000..3e6271f94 --- /dev/null +++ b/gems/decomplex/examples/typescript/state-mesh.ts @@ -0,0 +1 @@ +class StateMeshExample { initialize() { this.a = 1; this.b = 2; } writer() { this.a = 3; } reader() { return this.a + this.b; } a_alias() { return this.a; } } diff --git a/gems/decomplex/examples/typescript/structural-topology.ts b/gems/decomplex/examples/typescript/structural-topology.ts new file mode 100644 index 000000000..b7559a669 --- /dev/null +++ b/gems/decomplex/examples/typescript/structural-topology.ts @@ -0,0 +1 @@ +class Worker { run(items) { this.prepare(); if (this.ready()) { this.validate(); } for (const item of items) { this.helper(item); } } prepare() {} ready() { return true; } validate() {} helper(item) { return item; } } diff --git a/gems/decomplex/examples/typescript/temporal-ordering-pressure.ts b/gems/decomplex/examples/typescript/temporal-ordering-pressure.ts new file mode 100644 index 000000000..0901e3c2e --- /dev/null +++ b/gems/decomplex/examples/typescript/temporal-ordering-pressure.ts @@ -0,0 +1 @@ +class TemporalOrderExample { one() { this.a = 1; } two() { this.a = 2; this.b = 3; } three() { this.b = 4; } reader() { return this.a; } } diff --git a/gems/decomplex/examples/typescript/weighted-inlined-complexity.ts b/gems/decomplex/examples/typescript/weighted-inlined-complexity.ts new file mode 100644 index 000000000..8dde50019 --- /dev/null +++ b/gems/decomplex/examples/typescript/weighted-inlined-complexity.ts @@ -0,0 +1,5 @@ +function checkout(user: User, cart: Cart) { validate_user(user); apply_discount(cart); process_payment(user, cart); audit_cart(cart); } +function validate_user(user: User) { if (user.active() && !user.suspended()) { if (user.profile.complete()) { return true; } else { return false; } } else { return false; } } +function apply_discount(cart: Cart) { if (cart.total > 100 && eligible()) { if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } } return 0; } +function process_payment(user: User, cart: Cart) { if (gateway.ready()) { if (cart.total > 0 && user.active()) { if (fraud_check(user)) { charge(user, cart); } else { decline(user); } } } } +function audit_cart(cart: Cart) { for (const item of cart.items) { if (item.taxable()) { if (item.region && item.amount > 0) { record_tax(item); } } } } diff --git a/gems/decomplex/examples/zig/co-update.zig b/gems/decomplex/examples/zig/co-update.zig new file mode 100644 index 000000000..2170aaa99 --- /dev/null +++ b/gems/decomplex/examples/zig/co-update.zig @@ -0,0 +1,23 @@ +const Node = struct { + storage: i32, + provenance: i32, +}; + +pub fn stable_one(node: *Node) void { + node.storage = 1; + node.provenance = 1; +} + +pub fn stable_two(node: *Node) void { + node.storage = 1; + node.provenance = 1; +} + +pub fn stable_three(node: *Node) void { + node.storage = 1; + node.provenance = 1; +} + +pub fn misses_provenance(node: *Node) void { + node.storage = 1; +} diff --git a/gems/decomplex/examples/zig/decision-pressure.zig b/gems/decomplex/examples/zig/decision-pressure.zig new file mode 100644 index 000000000..c7bfb6725 --- /dev/null +++ b/gems/decomplex/examples/zig/decision-pressure.zig @@ -0,0 +1,3 @@ +pub fn scan(node: Node) bool { + return node.symbol.isNull(); +} diff --git a/gems/decomplex/examples/zig/derived-state.zig b/gems/decomplex/examples/zig/derived-state.zig new file mode 100644 index 000000000..47904c4ad --- /dev/null +++ b/gems/decomplex/examples/zig/derived-state.zig @@ -0,0 +1,6 @@ +pub fn check(input_value: i32) void { + var input = input_value; + const cached = input + 1; + input = 2; + print(cached); +} diff --git a/gems/decomplex/examples/zig/false-simplicity.zig b/gems/decomplex/examples/zig/false-simplicity.zig new file mode 100644 index 000000000..dc9f857b0 --- /dev/null +++ b/gems/decomplex/examples/zig/false-simplicity.zig @@ -0,0 +1,6 @@ +const FalseSimplicityExample = struct { + pub fn hack(self: *FalseSimplicityExample) void { + _ = self; + print("hidden IO"); + } +}; diff --git a/gems/decomplex/examples/zig/fat-union.zig b/gems/decomplex/examples/zig/fat-union.zig new file mode 100644 index 000000000..7ac30e8a2 --- /dev/null +++ b/gems/decomplex/examples/zig/fat-union.zig @@ -0,0 +1,28 @@ +pub fn handle(node: Ast) void { + switch (node) { + AST.Call => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.recv(); + }, + AST.Func => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.name(); + }, + AST.Lit => { + node.line(); + node.col(); + node.ty(); + node.span(); + node.parent(); + node.value(); + }, + } +} diff --git a/gems/decomplex/examples/zig/flay-similarity.zig b/gems/decomplex/examples/zig/flay-similarity.zig new file mode 100644 index 000000000..72a75ff2e --- /dev/null +++ b/gems/decomplex/examples/zig/flay-similarity.zig @@ -0,0 +1,41 @@ +pub fn first_clone(node: Node) i32 { + var total = 0; + const value1 = node.part1; + if (value1.ready() and value1.enabled()) { total += value1.amount; } + const value2 = node.part2; + if (value2.ready() and value2.enabled()) { total += value2.amount; } + const value3 = node.part3; + if (value3.ready() and value3.enabled()) { total += value3.amount; } + const value4 = node.part4; + if (value4.ready() and value4.enabled()) { total += value4.amount; } + const value5 = node.part5; + if (value5.ready() and value5.enabled()) { total += value5.amount; } + const value6 = node.part6; + if (value6.ready() and value6.enabled()) { total += value6.amount; } + const value7 = node.part7; + if (value7.ready() and value7.enabled()) { total += value7.amount; } + const value8 = node.part8; + if (value8.ready() and value8.enabled()) { total += value8.amount; } + return total; +} + +pub fn second_clone(entry: Node) i32 { + var total = 0; + const item1 = entry.part1; + if (item1.ready() and item1.enabled()) { total += item1.amount; } + const item2 = entry.part2; + if (item2.ready() and item2.enabled()) { total += item2.amount; } + const item3 = entry.part3; + if (item3.ready() and item3.enabled()) { total += item3.amount; } + const item4 = entry.part4; + if (item4.ready() and item4.enabled()) { total += item4.amount; } + const item5 = entry.part5; + if (item5.ready() and item5.enabled()) { total += item5.amount; } + const item6 = entry.part6; + if (item6.ready() and item6.enabled()) { total += item6.amount; } + const item7 = entry.part7; + if (item7.ready() and item7.enabled()) { total += item7.amount; } + const item8 = entry.part8; + if (item8.ready() and item8.enabled()) { total += item8.amount; } + return total; +} diff --git a/gems/decomplex/examples/zig/function-lcom.zig b/gems/decomplex/examples/zig/function-lcom.zig new file mode 100644 index 000000000..48bd1c086 --- /dev/null +++ b/gems/decomplex/examples/zig/function-lcom.zig @@ -0,0 +1,12 @@ +pub fn mixed(price: i32, tax: i32, logger: Logger) Result { + const subtotal = price + tax; + const total = subtotal * 2; + const rounded = total.round(); + + const timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + logger.info(buffer); + + return Result.init(rounded, buffer); +} diff --git a/gems/decomplex/examples/zig/implicit-control-flow.zig b/gems/decomplex/examples/zig/implicit-control-flow.zig new file mode 100644 index 000000000..3fc7dab36 --- /dev/null +++ b/gems/decomplex/examples/zig/implicit-control-flow.zig @@ -0,0 +1,15 @@ +const FlowExample = struct { + status: i32, + valid: bool, + done: bool, + + pub fn prepare(self: *FlowExample) void { self.status = 1; } + pub fn validate(self: *FlowExample) void { self.valid = self.status == 1; } + pub fn commit(self: *FlowExample) void { self.done = self.valid; } + + pub fn ok1(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn ok2(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn ok3(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn ok4(self: *FlowExample) void { self.prepare(); self.validate(); self.commit(); } + pub fn drift(self: *FlowExample) void { self.validate(); self.prepare(); self.commit(); } +}; diff --git a/gems/decomplex/examples/zig/inconsistent-rename-clone.zig b/gems/decomplex/examples/zig/inconsistent-rename-clone.zig new file mode 100644 index 000000000..e482bdd60 --- /dev/null +++ b/gems/decomplex/examples/zig/inconsistent-rename-clone.zig @@ -0,0 +1,13 @@ +pub fn original() void { + const src = fetch(1); + check(src); + store(src); + finalize(src); +} + +pub fn pasted() void { + const dst = fetch(2); + check(dst); + store(src); + finalize(dst); +} diff --git a/gems/decomplex/examples/zig/local-flow.zig b/gems/decomplex/examples/zig/local-flow.zig new file mode 100644 index 000000000..a0eb2b536 --- /dev/null +++ b/gems/decomplex/examples/zig/local-flow.zig @@ -0,0 +1,9 @@ +pub fn mixed(price: i32, tax: i32) Result { + const subtotal = price + tax; + const total = subtotal.round(); + + const timestamp = now(); + var buffer = Buffer.init(); + buffer.push(timestamp); + return Result.init(total, buffer); +} diff --git a/gems/decomplex/examples/zig/locality-drag.zig b/gems/decomplex/examples/zig/locality-drag.zig new file mode 100644 index 000000000..67276fa64 --- /dev/null +++ b/gems/decomplex/examples/zig/locality-drag.zig @@ -0,0 +1,30 @@ +pub fn run(user: User, cart: Cart, logger: Logger) void { + const receipt_id = user.id; + + const total = cart.total; + if (total > 100) { + if (cart.discountable()) { + const discount = 10; + _ = discount; + } + } + if (cart.taxable()) { + if (cart.region) { + const tax = total * 2; + _ = tax; + } + } + if (logger.enabled()) { + if (logger.debug()) { + logger.info(total); + } + } + if (cart.valid()) { + if (cart.ready()) { + const status = 1; + _ = status; + } + } + + emit(receipt_id); +} diff --git a/gems/decomplex/examples/zig/miner.zig b/gems/decomplex/examples/zig/miner.zig new file mode 100644 index 000000000..e89739c2f --- /dev/null +++ b/gems/decomplex/examples/zig/miner.zig @@ -0,0 +1,15 @@ +pub fn one(a: bool, b: bool, c: bool) bool { + return a and b and c; +} + +pub fn two(a: bool, b: bool, c: bool) bool { + return a and b and c; +} + +pub fn three(a: bool, b: bool, c: bool) bool { + return a and b and c; +} + +pub fn broken(a: bool, b: bool) bool { + return a and b; +} diff --git a/gems/decomplex/examples/zig/operational-discontinuity.zig b/gems/decomplex/examples/zig/operational-discontinuity.zig new file mode 100644 index 000000000..c63ac551d --- /dev/null +++ b/gems/decomplex/examples/zig/operational-discontinuity.zig @@ -0,0 +1,12 @@ +pub fn phase_shift() void { + const a = 1; + const b = 2; + _ = a; + _ = b; + + // Phase 2 + const x = 3; + const y = 4; + print(x); + print(y); +} diff --git a/gems/decomplex/examples/zig/oversized-predicate.zig b/gems/decomplex/examples/zig/oversized-predicate.zig new file mode 100644 index 000000000..d9afdb4f2 --- /dev/null +++ b/gems/decomplex/examples/zig/oversized-predicate.zig @@ -0,0 +1,5 @@ +pub fn complex_check(a: bool, b: bool, c: bool, d: bool) void { + if (a and b and c and d) { + print("too big"); + } +} diff --git a/gems/decomplex/examples/zig/path-condition.zig b/gems/decomplex/examples/zig/path-condition.zig new file mode 100644 index 000000000..df71c9ca2 --- /dev/null +++ b/gems/decomplex/examples/zig/path-condition.zig @@ -0,0 +1,15 @@ +pub fn one(x: X, y: Y, z: Z) void { + if (x.p() and y.q() and z.r()) { go(x); } +} + +pub fn two(x: X, y: Y, z: Z) void { + if (x.p() and y.q() and z.r()) { go(x); } +} + +pub fn three(x: X, y: Y, z: Z) void { + if (x.p() and y.q() and z.r()) { go(x); } +} + +pub fn bug(x: X, y: Y, z: Z) void { + if (x.p() and y.q()) { go(x); } +} diff --git a/gems/decomplex/examples/zig/predicate-alias.zig b/gems/decomplex/examples/zig/predicate-alias.zig new file mode 100644 index 000000000..db6b4bd96 --- /dev/null +++ b/gems/decomplex/examples/zig/predicate-alias.zig @@ -0,0 +1,11 @@ +pub fn first() bool { + return true; +} + +pub fn second() bool { + return true; +} + +pub fn other() bool { + return false; +} diff --git a/gems/decomplex/examples/zig/redundant-nil-guard.zig b/gems/decomplex/examples/zig/redundant-nil-guard.zig new file mode 100644 index 000000000..7b247b668 --- /dev/null +++ b/gems/decomplex/examples/zig/redundant-nil-guard.zig @@ -0,0 +1,5 @@ +pub fn check(value: OptionalItem) void { + if (value.isSome()) { + value.isNull(); + } +} diff --git a/gems/decomplex/examples/zig/semantic-alias.zig b/gems/decomplex/examples/zig/semantic-alias.zig new file mode 100644 index 000000000..f0256494c --- /dev/null +++ b/gems/decomplex/examples/zig/semantic-alias.zig @@ -0,0 +1,8 @@ +pub fn frame(node: Node) bool { return node.provenance == FRAME; } +pub fn is_frame(node: Node) bool { return provenance == FRAME; } +pub fn heap(node: Node) bool { return node.provenance == HEAP; } + +pub fn somewhere(node: Node) i32 { + if (node.provenance == FRAME) { return 1; } + return 0; +} diff --git a/gems/decomplex/examples/zig/sequence-mine.zig b/gems/decomplex/examples/zig/sequence-mine.zig new file mode 100644 index 000000000..3b0b73680 --- /dev/null +++ b/gems/decomplex/examples/zig/sequence-mine.zig @@ -0,0 +1,5 @@ +pub fn one() void { alloc_mark(x); body1(); cleanup(x); } +pub fn two() void { alloc_mark(y); body2(); cleanup(y); } +pub fn three() void { alloc_mark(z); body3(); cleanup(z); } +pub fn four() void { alloc_mark(w); body4(); cleanup(w); } +pub fn leak() void { alloc_mark(q); use_value(q); } diff --git a/gems/decomplex/examples/zig/state-branch-density.zig b/gems/decomplex/examples/zig/state-branch-density.zig new file mode 100644 index 000000000..d9af8376c --- /dev/null +++ b/gems/decomplex/examples/zig/state-branch-density.zig @@ -0,0 +1,13 @@ +const StateBranchChecker = struct { + checked: bool, + + pub fn check(self: *StateBranchChecker, admin: bool, name: []const u8) void { + if (admin) { + self.checked = true; + } + + if (self.checked and name == "admin") { + print("hello"); + } + } +}; diff --git a/gems/decomplex/examples/zig/state-mesh.zig b/gems/decomplex/examples/zig/state-mesh.zig new file mode 100644 index 000000000..b8118c136 --- /dev/null +++ b/gems/decomplex/examples/zig/state-mesh.zig @@ -0,0 +1,21 @@ +const StateMeshExample = struct { + a: i32, + b: i32, + + pub fn initialize(self: *StateMeshExample) void { + self.a = 1; + self.b = 2; + } + + pub fn writer(self: *StateMeshExample) void { + self.a = 3; + } + + pub fn reader(self: *StateMeshExample) i32 { + return self.a + self.b; + } + + pub fn a_alias(self: *StateMeshExample) i32 { + return self.a; + } +}; diff --git a/gems/decomplex/examples/zig/structural-topology.zig b/gems/decomplex/examples/zig/structural-topology.zig new file mode 100644 index 000000000..833286c3d --- /dev/null +++ b/gems/decomplex/examples/zig/structural-topology.zig @@ -0,0 +1,16 @@ +const Worker = struct { + pub fn run(self: *Worker, items: Items) void { + self.prepare(); + if (self.ready()) { + self.validate(); + } + for (items) |item| { + self.helper(item); + } + } + + fn prepare(self: *Worker) void { _ = self; } + fn ready(self: *Worker) bool { _ = self; return true; } + pub fn validate(self: *Worker) void { _ = self; } + fn helper(self: *Worker, item: Item) void { _ = self; _ = item; } +}; diff --git a/gems/decomplex/examples/zig/temporal-ordering-pressure.zig b/gems/decomplex/examples/zig/temporal-ordering-pressure.zig new file mode 100644 index 000000000..858b7ffd2 --- /dev/null +++ b/gems/decomplex/examples/zig/temporal-ordering-pressure.zig @@ -0,0 +1,21 @@ +const TemporalOrderExample = struct { + a: i32, + b: i32, + + pub fn one(self: *TemporalOrderExample) void { + self.a = 1; + } + + pub fn two(self: *TemporalOrderExample) void { + self.a = 2; + self.b = 3; + } + + pub fn three(self: *TemporalOrderExample) void { + self.b = 4; + } + + pub fn reader(self: *TemporalOrderExample) i32 { + return self.a; + } +}; diff --git a/gems/decomplex/examples/zig/weighted-inlined-complexity.zig b/gems/decomplex/examples/zig/weighted-inlined-complexity.zig new file mode 100644 index 000000000..56087431e --- /dev/null +++ b/gems/decomplex/examples/zig/weighted-inlined-complexity.zig @@ -0,0 +1,45 @@ +const WeightedInlineExample = struct { + pub fn checkout(self: *WeightedInlineExample, user: User, cart: Cart) void { + self.validate_user(user); + self.apply_discount(cart); + self.process_payment(user, cart); + self.audit_cart(cart); + } + + fn validate_user(self: *WeightedInlineExample, user: User) bool { + _ = self; + if (user.active() and !user.suspended()) { + if (user.profile.complete()) { return true; } else { return false; } + } else { + return false; + } + } + + fn apply_discount(self: *WeightedInlineExample, cart: Cart) i32 { + _ = self; + if (cart.total > 100 and eligible()) { + if (holiday()) { return 20; } else if (loyalty_month()) { return 15; } else { return 10; } + } + return 0; + } + + fn process_payment(self: *WeightedInlineExample, user: User, cart: Cart) void { + _ = self; + if (gateway.ready()) { + if (cart.total > 0 and user.active()) { + if (fraud_check(user)) { charge(user, cart); } else { decline(user); } + } + } + } + + fn audit_cart(self: *WeightedInlineExample, cart: Cart) void { + _ = self; + for (cart.items) |item| { + if (item.taxable()) { + if (item.region and item.amount > 0) { + record_tax(item); + } + } + } + } +}; diff --git a/gems/decomplex/exe/decomplex b/gems/decomplex/exe/decomplex index e0a7616fa..67fbce8cc 100755 --- a/gems/decomplex/exe/decomplex +++ b/gems/decomplex/exe/decomplex @@ -60,6 +60,163 @@ if ARGV[0] == "delta" exit 0 end +if ARGV[0] == "facts" + require_relative "../lib/decomplex/report_facts" + args = ARGV[1..] + engine = "ruby" + out_path = nil + jobs = nil + benchmark = false + args = args.reject do |arg| + case arg + when /\A--engine=(.+)\z/ + engine = Regexp.last_match(1) + true + when /\A--output=(.+)\z/ + out_path = Regexp.last_match(1) + true + when /\A--jobs=(\d+)\z/ + jobs = Integer(Regexp.last_match(1)) + true + when "--benchmark" + benchmark = true + true + else + false + end + end + files = collect_files(args) + abort no_files_message if files.empty? + started = Process.clock_gettime(Process::CLOCK_MONOTONIC) + facts = Decomplex::ReportFacts.from_files(files, engine: engine, jobs: jobs) + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started + output = Decomplex::ReportFacts.to_json(facts) + if out_path + File.write(out_path, output) + warn "wrote #{out_path} (#{files.size} files)" + else + puts output + end + if benchmark + warn format("decomplex facts engine=%s files=%d elapsed=%.6fs", + engine, files.size, elapsed) + end + exit 0 +end + +if ARGV[0] == "render-report" + require_relative "../lib/decomplex/report" + args = ARGV[1..] + format = "markdown" + out_path = nil + input_path = nil + args = args.reject do |arg| + case arg + when "--from-stdin" + true + when /\A--input=(.+)\z/ + input_path = Regexp.last_match(1) + true + when /\A--format=(.+)\z/ + format = Regexp.last_match(1) + true + when /\A--output=(.+)\z/ + out_path = Regexp.last_match(1) + true + else + false + end + end + abort "usage: decomplex render-report --from-stdin|--input=FILE [--format=markdown|sarif|json] [--output=FILE]" unless args.empty? + + payload = input_path ? File.read(input_path) : STDIN.read + abort "render-report requires facts JSON on stdin or --input=FILE" if payload.to_s.strip.empty? + + report = Decomplex::Report.from_facts(payload) + output = + case format.to_s + when "markdown", "md" + report.to_markdown + when "sarif", "json" + report.to_sarif + else + abort "unsupported render-report format: #{format}" + end + if out_path + File.write(out_path, output) + warn "wrote #{out_path}" + else + puts output + end + exit 0 +end + +if ARGV[0] == "detector" + args = ARGV[1..] + detector = args&.shift + abort "usage: decomplex detector DETECTOR --engine=ruby|rust --json [--jobs=N] FILE..." unless detector + + engine = "ruby" + json = false + compare = false + benchmark = false + detector_options = {} + args = args.reject do |arg| + case arg + when /\A--engine=(.+)\z/ + engine = Regexp.last_match(1) + true + when "--json" + json = true + true + when "--compare-engines" + compare = true + true + when "--benchmark" + benchmark = true + true + when /\A--mass=(\d+)\z/ + detector_options[:mass] = Integer(Regexp.last_match(1)) + true + when /\A--fuzzy=(\d+)\z/ + detector_options[:fuzzy] = Integer(Regexp.last_match(1)) + true + when /\A--jobs=(\d+)\z/ + detector_options[:jobs] = Integer(Regexp.last_match(1)) + true + else + false + end + end + files = collect_files(args) + abort no_files_message if files.empty? + + if compare + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare(detector, files, **detector_options) + unless ok + warn "decomplex detector #{detector} output differs between ruby and rust engines" + warn "--- ruby" + warn ruby_json + warn "--- rust" + warn rust_json + exit 1 + end + puts ruby_json + elsif json + started = Process.clock_gettime(Process::CLOCK_MONOTONIC) + output = Decomplex::DetectorRunner.canonical_json(detector, files, engine: engine, **detector_options) + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started + if benchmark + warn format("decomplex detector=%s engine=%s files=%d elapsed=%.6fs", + detector, engine, files.size, elapsed) + end + print output + else + abort "decomplex detector currently requires --json or --compare-engines" + end + exit 0 +end + if ARGV[0] == "state-mesh" require_relative "../lib/decomplex/state_mesh" args = ARGV[1..] @@ -234,6 +391,10 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" decomplex FILE_OR_DIR [FILE_OR_DIR ...] decomplex report [--output=FILE] [--emit-json=FILE] [--sarif=FILE] [--exclude=GLOB] FILE_OR_DIR ... + decomplex facts [--engine=ruby|rust] [--output=FILE] [--benchmark] [--jobs=N] FILE_OR_DIR ... + decomplex render-report --from-stdin|--input=FILE [--format=markdown|sarif|json] [--output=FILE] + decomplex detector DETECTOR --engine=ruby|rust --json [--benchmark] [--mass=N] [--fuzzy=N] [--jobs=N] FILE_OR_DIR ... + decomplex detector DETECTOR --compare-engines [--mass=N] [--fuzzy=N] [--jobs=N] FILE_OR_DIR ... decomplex state-mesh [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex state-branches [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... decomplex temporal-ordering [--output=FILE] [--exclude=GLOB] FILE_OR_DIR ... @@ -247,6 +408,10 @@ if ARGV.empty? || ARGV[0] == "-h" || ARGV[0] == "--help" Subcommands: report Full markdown report with all detectors + facts Emit report-ready detector facts before convergence/root-cause/rendering + render-report + Render a precomputed facts payload without parsing source or running detectors + detector Single-detector canonical JSON for migration/benchmarking state-mesh JSON graph of reader/writer hierarchy by field state-branches Markdown table of branches whose predicate reads state diff --git a/gems/decomplex/lib/decomplex.rb b/gems/decomplex/lib/decomplex.rb index 263910c35..526377294 100644 --- a/gems/decomplex/lib/decomplex.rb +++ b/gems/decomplex/lib/decomplex.rb @@ -9,6 +9,7 @@ require_relative "decomplex/site_extractor" require_relative "decomplex/miner" require_relative "decomplex/co_update" +require_relative "decomplex/detector_runner" require_relative "decomplex/predicate_alias" require_relative "decomplex/path_condition" require_relative "decomplex/semantic_alias" @@ -32,6 +33,8 @@ require_relative "decomplex/convergence" require_relative "decomplex/root_cause" require_relative "decomplex/delta" +require_relative "decomplex/report_facts" +require_relative "decomplex/syntax_oracle" # Decomplex: decision-level duplication + neglected-condition detector. # See decomplex.gemspec for the rationale. v0 scope is exact-match diff --git a/gems/decomplex/lib/decomplex/ast.rb b/gems/decomplex/lib/decomplex/ast.rb index 38b1d891a..24715b753 100644 --- a/gems/decomplex/lib/decomplex/ast.rb +++ b/gems/decomplex/lib/decomplex/ast.rb @@ -1,6 +1,11 @@ # frozen_string_literal: true require "set" +require_relative "ast/node" +require_relative "ast/cache" +require_relative "ast/source_map" +require_relative "ast/semantic_node" +require_relative "ast/semantic_normalizer" module Decomplex # Shared AST primitives for the v1 detectors. Kept separate from the @@ -8,12 +13,6 @@ module Decomplex # so adding it cannot destabilise them (design principle 3); they # will be migrated onto this once it has proven itself. module Ast - Node = Struct.new( - :type, :children, :first_lineno, :first_column, :last_lineno, :last_column, - :text, - keyword_init: true - ) - module_function def parse(file) @@ -25,2422 +24,16 @@ def parse(file) end end - def normalized_cache - @normalized_cache ||= {} - end - - def node?(n) - n.is_a?(Node) - end - - # Exact source text of a node, trivial formatting normalised. - def slice(node, _lines) - return "" unless node?(node) - - node.text.to_s.strip.gsub(/\s+/, " ") - end - - # Tree-sitter exposes each grammar's native node names. Decomplex's - # detectors share a small language-neutral AST vocabulary, so this - # adapter normalizes common syntax categories into that vocabulary: - # DEFN, CLASS, IF, CASE/WHEN, AND/OR, CALL, LASGN, ATTRASGN, IVAR, - # LVAR, and friends. The goal is portable structural facts, not - # Ruby semantics. - class TreeSitterNormalizer - FUNCTION_KINDS = %w[ - method function_definition function_declaration method_definition - method_declaration function_item singleton_method - ].freeze - CLASS_KINDS = %w[class class_definition class_declaration].freeze - MODULE_KINDS = %w[module].freeze - BLOCK_KINDS = %w[ - block body_statement statement_block statement_list class_body - switch_body match_block then block_body - ].freeze - IF_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze - LOOP_KINDS = { - "while" => :WHILE, - "while_statement" => :WHILE, - "while_modifier" => :WHILE, - "until_modifier" => :UNTIL, - "for" => :FOR, - "for_statement" => :FOR, - "for_in_clause" => :FOR - }.freeze - CASE_KINDS = %w[ - case switch_statement expression_switch_statement switch_expression match_statement match_expression - ].freeze - WHEN_KINDS = %w[when switch_case case_clause expression_case match_arm].freeze - ASSIGNMENT_KINDS = %w[ - assignment assignment_expression assignment_statement augmented_assignment - ].freeze - MEMBER_KINDS = %w[ - call attribute member_expression field selector_expression field_expression expression_list - ].freeze - CALL_KINDS = %w[call call_expression method_call method_call_expression].freeze - IDENTIFIER_KINDS = %w[ - identifier property_identifier field_identifier shorthand_property_identifier - ].freeze - CONST_KINDS = %w[constant scope_resolution type_identifier scoped_type_identifier].freeze - STRING_KINDS = %w[ - string string_content string_literal interpreted_string_literal raw_string_literal - ].freeze - SYMBOL_KINDS = %w[symbol simple_symbol].freeze - NIL_KINDS = %w[nil none null].freeze - RETURN_KINDS = { - "return" => :RETURN, - "return_statement" => :RETURN, - "return_expression" => :RETURN, - "break" => :BREAK, - "break_statement" => :BREAK, - "break_expression" => :BREAK, - "next" => :NEXT, - "continue_statement" => :NEXT - }.freeze - COMPARISON_OPERATORS = %w[== != === !== < <= > >=].freeze - OPERATOR_CALL_OPERATORS = %w[+ - * / % ** | & ^ << >> =~ !~].freeze - INFIX_STATEMENT_OPERATORS = (OPERATOR_CALL_OPERATORS + COMPARISON_OPERATORS).freeze - INLINE_DEF_WRAPPER_MIDS = %w[ - public protected private private_class_method module_function - ].freeze - - def initialize(document) - @document = document - @local_stack = [] - end - - def normalize - children = - if ruby? - with_ruby_scope(@document.root, reset: true) { normalize_children(@document.root) } - else - normalize_children(@document.root) - end - wrap(:ROOT, children: children, source: @document.root) - end - - private - - def normalize_node(node) - return nil unless ts_node?(node) - return nil if node.kind == "comment" - return normalize_assignment_lhs(node) if assignment_lhs?(node) - return normalize_infix_statement(node) if infix_statement?(node) - return normalize_dotted_expression(node) if dotted_expression?(node) - return normalize_unary_not_statement(node) if unary_not_statement?(node) - - if leading_function_statement?(node) - normalize_leading_function_statement(node) - elsif modifier_statement?(node) - normalize_modifier_statement(node) - elsif ternary_statement?(node) - normalize_ternary_statement(node) - elsif statement_call_with_block?(node) - normalize_statement_call_with_block(node) - elsif command_call_statement?(node) - normalize_command_call_statement(node) - elsif FUNCTION_KINDS.include?(node.kind) - normalize_function(node) - elsif class_node?(node) - normalize_class(node) - elsif module_node?(node) - normalize_module(node) - elsif node.kind == "impl_item" - normalize_impl(node) - elsif node.kind == "elsif" - normalize_elsif(node) - elsif IF_KINDS.include?(node.kind) - normalize_if(node) - elsif LOOP_KINDS.key?(node.kind) - normalize_loop(node) - elsif CASE_KINDS.include?(node.kind) || hidden_match?(node) - normalize_case(node) - elsif node.kind == "element_reference" - normalize_element_reference(node) - elsif node.kind == "rescue_modifier" - normalize_rescue_modifier(node) - elsif node.kind == "ensure" - normalize_ensure_clause(node) - elsif node.kind == "begin" - normalize_begin(node) - elsif node.kind == "operator_assignment" - normalize_operator_assignment(node) - elsif ASSIGNMENT_KINDS.include?(node.kind) - normalize_assignment(node) - elsif node.kind == "subshell" - normalize_subshell(node) - elsif node.kind == "block_argument" - normalize_block_argument(node) - elsif node.kind == "pair" - normalize_pair(node) - elsif node.kind == "singleton_class" - normalize_singleton_class(node) - elsif node.kind == "lambda" - normalize_lambda(node) - elsif node.kind == "yield" - normalize_yield(node) - elsif yield_argument_list?(node) - normalize_yield_argument_list(node) - elsif node.kind == "heredoc_beginning" - normalize_heredoc_beginning(node) - elsif node.kind == "chained_string" - normalize_chained_string(node) - elsif node.kind == "interpolation" - normalize_interpolation(node) - elsif unary_minus_expression?(node) - normalize_unary_minus(node) - elsif unary_not_expression?(node) - normalize_unary_not(node) - elsif boolean_expression?(node) - normalize_boolean(node) - elsif operator_call_expression?(node) - normalize_operator_call(node) - elsif comparison_expression?(node) - normalize_comparison(node) - elsif CALL_KINDS.include?(node.kind) - normalize_call(node) - elsif member_read_node?(node) - normalize_member_read(node) - elsif BLOCK_KINDS.include?(node.kind) - wrap(:BLOCK, children: normalize_children(node), source: node) - elsif unwrap_node?(node) - normalize_node(node.named_children.first) - elsif RETURN_KINDS.key?(node.kind) - normalize_return(node) - elsif self_node?(node) - wrap(:SELF, children: [], source: node) - elsif instance_variable?(node) - wrap(:IVAR, children: [node.text.to_s], source: node) - elsif global_variable?(node) - normalize_global_variable(node) - elsif const_node?(node) - normalize_const(node) - elsif ruby? && IDENTIFIER_KINDS.include?(node.kind) && node.text.to_s == "yield" - wrap(:YIELD, children: [nil], source: node) - elsif ruby_vcall_identifier?(node) - return wrap(:YIELD, children: [nil], source: node) if node.text.to_s == "yield" - - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - elsif vcall_identifier?(node) - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - elsif local_identifier?(node) - wrap(:LVAR, children: [node.text.to_s], source: node) - elsif NIL_KINDS.include?(node.kind) - wrap(:NIL, children: [], source: node) - elsif interpolated_string?(node) - normalize_interpolated_string(node) - elsif STRING_KINDS.include?(node.kind) - wrap(:STR, children: [node.text.to_s], source: node) - elsif SYMBOL_KINDS.include?(node.kind) - wrap(:LIT, children: [node.text.to_s.sub(/\A:/, "").to_sym], source: node) - else - wrap(kind_type(node.kind), children: normalize_children(node), source: node) - end - end - - def normalize_function(node) - return normalize_singleton_function(node) if node.kind == "singleton_method" - - name = function_name(node) - args = normalize_parameters(named_field(node, "parameters")) - body = with_ruby_scope(node, reset: true) do - elide_implicit_nil_body( - prepend_inline_parameter_begin( - node, - elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) - ) - ) - end - wrap(:DEFN, children: [name, scope(body, args: args)], source: node) - end - - def normalize_singleton_function(node) - receiver = singleton_receiver(node) - name = singleton_name(node) - args = normalize_parameters(named_field(node, "parameters")) - body = with_ruby_scope(node, reset: true) do - elide_implicit_nil_body( - prepend_inline_parameter_begin( - node, - elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) - ) - ) - end - wrap(:DEFS, children: [normalize_node(receiver), name, scope(body, args: args)], source: node) - end - - def normalize_class(node) - name = const_for(named_field(node, "name") || first_named(node)) - body = normalize_body(named_field(node, "body") || block_child(node)) - wrap(:CLASS, children: [name, nil, scope(body)], source: node) - end - - def normalize_module(node) - name = const_for(named_field(node, "name") || first_named(node)) - body = normalize_body(named_field(node, "body") || block_child(node)) - wrap(:MODULE, children: [name, scope(body)], source: node) - end - - def normalize_impl(node) - type_node = named_field(node, "type") || - node.named_children.find do |child| - %w[type_identifier scoped_type_identifier identifier].include?(child.kind) - end - name = const_for(type_node || node) - body = normalize_body(named_field(node, "body") || block_child(node) || node) - wrap(:CLASS, children: [name, nil, scope(body)], source: node) - end - - def normalize_if(node) - if %w[if_modifier unless_modifier].include?(node.kind) - action, cond_raw = node.named_children - type = node.kind.start_with?("unless") ? :UNLESS : :IF - return wrap(type, children: [normalize_node(cond_raw), normalize_modifier_action(action), nil], source: node) - end - - cond_raw = named_field(node, "condition") || named_field(node, "predicate") || first_named(node) - cond = normalize_node(cond_raw) - positive_raw = named_field(node, "consequence") || named_field(node, "body") || - node.named_children.find { |child| child.kind == "then" } || - branch_child(node, cond_raw, 0) - negative_raw = named_field(node, "alternative") || - explicit_alternative(node) || - (branch_child(node, cond_raw, 1) unless ruby?) - positive = normalize_body(positive_raw) - negative = normalize_else_or_branch(negative_raw) - type = node.kind.start_with?("unless") ? :UNLESS : :IF - wrap(type, children: [cond, positive, negative], source: node) - end - - def normalize_elsif(node) - cond = node.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } - positive = node.named_children.find { |child| child.kind == "then" } - negative = node.named_children.find { |child| %w[elsif else].include?(child.kind) } - wrap(:IF, children: [normalize_node(cond), normalize_body(positive), normalize_else_or_branch(negative)], - source: node) - end - - def normalize_loop(node) - if %w[while_modifier until_modifier].include?(node.kind) - action, cond = node.named_children - return wrap(LOOP_KINDS.fetch(node.kind), children: [normalize_node(cond), normalize_modifier_action(action), true], - source: node) - end - - cond = normalize_node(named_field(node, "condition") || first_named(node)) - body = normalize_body(named_field(node, "body") || named_field(node, "consequence") || block_child(node)) - wrap(LOOP_KINDS.fetch(node.kind), children: [cond, body], source: node) - end - - def normalize_case(node) - value_raw = case_value(node) - value = normalize_node(value_raw) - whens = case_arms(node).map { |arm| normalize_when(arm) }.compact - fallback = case_else_body(node) - chain = link_when_chain(whens, fallback) - return wrap(:CASE2, children: [chain], source: node) unless value_raw - - wrap(:CASE, children: [value, chain], source: node) - end - - def normalize_when(node) - patterns = normalize_patterns(node) - body = normalize_body(when_body(node)) - wrap(:WHEN, children: [list(patterns, source: node), body, nil], source: node) - end - - def normalize_assignment(node) - left = assignment_left(node) - right = normalize_node(assignment_right(node)) - return normalize_multiple_assignment(left, right, node) if left&.kind == "left_assignment_list" - return assignment_target(left, right, source: node) if assignment_target(left, right, source: node) - - wrap(:LASGN, children: [target_name(left), right], source: node) - end - - def normalize_multiple_assignment(left, right, node) - targets = left.named_children.map do |child| - type = global_variable?(child) ? :GASGN : :LASGN - wrap(type, children: [target_name(child), nil], source: child) - end - wrap(:MASGN, children: [right, list(targets, source: left)], source: node) - end - - def normalize_boolean(node) - type = boolean_operator(node) == "or" ? :OR : :AND - operands = node.named_children.map { |child| normalize_node(child) }.compact - operands = operands.flat_map { |child| Ast.node?(child) && child.type == type ? child.children : [child] } - wrap(type, children: operands, source: node) - end - - def normalize_comparison(node) - operands = node.named_children - left = normalize_node(operands[0]) - right = normalize_node(operands[1]) - wrap(:OPCALL, children: [left, comparison_operator(node).to_sym, list([right], source: operands[1] || node)], - source: node) - end - - def normalize_operator_call(node) - operands = node.named_children - left = normalize_node(operands[0]) - right = normalize_node(operands[1]) - if ruby? && binary_operator(node) == "=~" && regex_literal?(operands[1]) - return wrap(:MATCH3, children: [right, left], source: node) - elsif ruby? && binary_operator(node) == "=~" - return wrap(:CALL, children: [left, :=~, list([right], source: operands[1] || node)], source: node) - end - - wrap(:OPCALL, children: [left, binary_operator(node).to_sym, list([right], source: operands[1] || node)], - source: node) - end - - def normalize_element_reference(node) - recv = node.named_children.first - args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact - if ruby? && self_node?(recv) - return wrap(:FCALL, children: [:[], list(args, source: node)], source: node) - end - - wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) - end - - def normalize_rescue_modifier(node) - body = normalize_node(node.named_children.first) - handler = normalize_node(node.named_children[1]) - resbody = wrap(:RESBODY, children: [nil, handler, nil], source: node) - wrap(:RESCUE, children: [body, resbody, nil], source: node) - end - - def normalize_ensure_clause(node) - normalize_body_nodes(node.named_children, source: node) - end - - def normalize_begin(node) - rescue_nodes = node.named_children.select { |child| child.kind == "rescue" } - ensure_node = node.named_children.find { |child| child.kind == "ensure" } - if rescue_nodes.empty? - return wrap(:BEGIN, children: normalize_children(node), source: node) unless ensure_node - - body_nodes = node.named_children.take_while { |child| child.kind != "ensure" } - body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) - ensure_body = normalize_body(ensure_node) - source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) - return wrap(:ENSURE, children: [body, ensure_body], source: source) - end - - body_nodes = node.named_children.take_while { |child| child.kind != "rescue" } - body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) - resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } - source = source_from_nodes(body_nodes.first || node, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || node) - rescued = wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) - return rescued unless ensure_node - - ensure_body = normalize_body(ensure_node) - ensure_source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) - wrap(:ENSURE, children: [rescued, ensure_body], source: ensure_source) - end - - def normalize_operator_assignment(node) - left = assignment_left(node) - right_raw = assignment_right(node) - right = normalize_node(right_raw) - operator = operator_assignment_operator(node) - - if left&.kind == "element_reference" - recv = left.named_children.first - args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact - return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], - source: node) - end - - if member_read_node?(left) - recv, mid = member_parts(left) - return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) - end - - logical = normalize_logical_operator_assignment(left, operator, right, source: node) - return logical if logical - if instance_variable?(left) || global_variable?(left) - return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) - end - - assignment_target(left, right, source: node) || - wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], - source: node) - end - - def normalize_subshell(node) - children = node.named_children.filter_map do |child| - case child.kind - when "interpolation" then normalize_interpolation(child) - when "string_content" then wrap(:STR, children: [child.text.to_s], source: child) - end - end - type = children.any? { |child| child.is_a?(Node) && child.type == :EVSTR } ? :DXSTR : :XSTR - wrap(type, children: children, source: node) - end - - def normalize_pair(node) - key = node.named_children.first - value = node.named_children[1] - if node.children.any? { |child| !child.named? && child.text == "=>" } - return wrap(:HASH, children: [normalize_node(key), normalize_node(value)].compact, source: node) - end - - key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) - if ruby? && key&.kind == "hash_key_symbol" && value.nil? - name = key.text.to_s - return wrap(:HASH, children: [key_lit, local_or_call_for_name(name, key)], source: node) - end - - wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) - end - - def normalize_block_argument(node) - value = normalize_node(node.named_children.first) - wrap(:BLOCK_PASS, children: [nil, value], source: node) - end - - def normalize_singleton_class(node) - recv = normalize_node(node.named_children.first) - body = normalize_body(node.named_children[1]) - wrap(:SCLASS, children: [recv, scope(body)], source: node) - end - - def normalize_lambda(node) - body_node = named_field(node, "body") || block_child(node) || node.named_children.last - body = with_ruby_scope(node) do - dynamic_scope(normalize_body(body_node)) - end - wrap(:LAMBDA, children: [scope(body)], source: node) - end - - def normalize_yield(node) - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) - wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) - end - - def yield_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - node.children.first&.text == "yield" - rescue StandardError - false - end - - def normalize_yield_statement(node) - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) - wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) - end - - def yield_argument_list?(node) - node.kind == "argument_list" && parent_node(node)&.children&.first&.text == "yield" - rescue StandardError - false - end - - def normalize_yield_argument_list(node) - args = yield_argument_nodes(node) - source = parent_node(node) || node - wrap(:YIELD, children: [list(args, source: node)], source: source) - end - - def yield_inline_arguments(node) - node.named_children.reject { |child| child.kind == "yield" }.map { |child| normalize_node(child) }.compact - end - - def yield_argument_nodes(node) - return [scalar_argument_list_value(node)].compact if node.named_children.empty? - - node.named_children.map { |child| normalize_node(child) }.compact - end - - def super_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - node.named_children.first&.kind == "super" && - node.named_children.drop(1).all? { |child| child.kind == "argument_list" } - rescue StandardError - false - end - - def normalize_super_statement(node) - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? args_node.named_children.map { |child| normalize_node(child) }.compact : [] - wrap(:SUPER, children: [list(args, source: args_node || node)], source: node) - end - - def normalize_unary_not(node) - operand = node.named_children.first - wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) - end - - def normalize_unary_not_statement(node) - operand = node.named_children.first - wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) - end - - def normalize_unary_minus(node) - operand = node.named_children.first - if ts_node?(operand) && operand.kind == "integer" - return wrap(:INTEGER, children: [-operand.text.to_i], source: operand) - end - - wrap(:OPCALL, children: [normalize_node(operand), :-@, nil], source: node) - end - - def normalize_infix_statement(node) - left, operator, right = infix_statement_parts(node) - if ruby? && operator == "=~" && regex_literal?(right) - return wrap(:MATCH3, children: [normalize_node(right), normalize_node(left)], source: node) - elsif ruby? && operator == "=~" - return wrap(:CALL, children: [normalize_node(left), :=~, list([normalize_node(right)].compact, source: right)], - source: node) - end - - wrap(:OPCALL, children: [normalize_node(left), operator.to_sym, list([normalize_node(right)].compact, source: right)], - source: node) - end - - def normalize_dotted_expression(node) - block = call_block(node) - call = normalize_dotted_call_expression(node, source: block ? source_before_child(node, block) : node) - return call unless block - - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) - end - - def normalize_dotted_call_expression(node, source: node) - recv, mid = dotted_call_parts(node) - args = call_arguments(node, nil) - type = safe_navigation_call?(node) ? :QCALL : :CALL - wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: source)], source: source) - end - - def normalize_argument_list_call_with_block(node) - block = call_block(node) - call = normalize_argument_list_call(node) - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) - end - - def normalize_argument_list_call(node) - function = node.named_children.first - args_node = node.named_children.find { |child| child.kind == "argument_list" } - args = args_node ? args_node.named_children.map { |child| normalize_node(child) }.compact : [] - wrap(:FCALL, children: [function.text.to_sym, list(args, source: args_node || node)], source: node) - end - - def normalize_call(node) - return normalize_zero_child_call(node) if zero_child_identifier_call?(node) - return normalize_call_with_block(node) if call_block(node) - return normalize_visibility_inline_def(node) if visibility_inline_def_call?(node) - - if named_field(node, "receiver") && named_field(node, "method") - recv, mid = member_parts(node) - args = call_arguments(node, nil) - type = safe_navigation_call?(node) ? :QCALL : :CALL - return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) - end - - function = named_field(node, "function") || named_field(node, "call") || node.named_children.first - args = call_arguments(node, function) - return wrap(:YIELD, children: [list(args, source: node)], source: node) if ruby? && function&.text == "yield" - - if member_read_node?(function) - recv, mid = member_parts(function) - return wrap(:CALL, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) - end - - if function && IDENTIFIER_KINDS.include?(function.kind) - type = args.empty? ? :VCALL : :FCALL - return wrap(type, children: [function.text.to_sym, list(args, source: node)], source: node) - end - - if ruby? && function && const_node?(function) - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: node)], source: node) - end - - wrap(:CALL, children: [normalize_node(function), :call, list(args, source: node)], source: node) - end - - def normalize_return(node) - normalize_return_node(node, elide_symbol: false) - end - - def normalize_return_node(node, elide_symbol:) - children = node.named_children.map { |child| normalize_return_value(child) }.compact - return children.first if elide_symbol && ruby? && children.size == 1 && symbol_literal_node?(children.first) - - wrap(RETURN_KINDS.fetch(node.kind), children: children, source: node) - end - - def normalize_return_value(node) - return normalize_node(node) unless ts_node?(node) && node.kind == "argument_list" - return scalar_argument_list_value(node) if node.named_children.empty? - return normalize_argument_list_element_reference(node) if argument_list_element_reference?(node) - return normalize_boolean(node) if boolean_expression?(node) - return normalize_ternary_statement(node) if ternary_statement?(node) - return normalize_case(node) if case_argument_list?(node) - return normalize_argument_list_call_with_block(node) if argument_list_call_with_block?(node) - return normalize_dotted_expression(node) if dotted_expression?(node) - return normalize_argument_list_unary_not(node) if argument_list_unary_not?(node) - return normalize_infix_statement(node) if infix_statement?(node) - - function = node.named_children.first - nested_args = node.named_children[1] - if function && IDENTIFIER_KINDS.include?(function.kind) && nested_args&.kind == "argument_list" - args = nested_args.named_children.map { |child| normalize_node(child) }.compact - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: nested_args)], source: node) - end - - values = node.named_children.map { |child| normalize_node(child) }.compact - return values.first if values.size == 1 - - list(values, source: node) - end - - def argument_list_element_reference?(node) - node.kind == "argument_list" && - node.children.first&.text != "[" && - node.children.any? { |child| !child.named? && child.text == "[" } && - node.children.any? { |child| !child.named? && child.text == "]" } && - node.named_children.size >= 2 && - node.named_children.none? { |child| %w[block do_block].include?(child.kind) } - end - - def normalize_argument_list_element_reference(node) - recv = node.named_children.first - args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact - wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) - end - - def normalize_call_with_block(node) - block = call_block(node) - call = normalize_call_without_block(node, block) - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) - end - - def normalize_call_without_block(node, block) - call_source = block ? source_before_child(node, block) : node - if dotted_call?(node) - recv, mid = dotted_call_parts(node) - args = call_arguments(node, nil) - arg_list = args.empty? ? nil : list(args, source: call_source) - type = safe_navigation_call?(node) ? :QCALL : :CALL - return wrap(type, children: [normalize_node(recv), mid.to_sym, arg_list], source: call_source) - end - - function = named_field(node, "function") || named_field(node, "call") || - node.named_children.find { |child| !same_ts_node?(child, block) } - args = call_arguments(node, function) - - if function && IDENTIFIER_KINDS.include?(function.kind) - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) - end - - if ruby? && function && const_node?(function) - return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) - end - - if member_read_node?(function) - recv, mid = member_parts(function) - type = safe_navigation_call?(function) ? :QCALL : :CALL - return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: call_source)], source: call_source) - end - - wrap(:CALL, children: [normalize_node(function), :call, list(args, source: call_source)], source: call_source) - end - - def normalize_visibility_inline_def(node) - message = node.named_children.first&.text.to_s - args = node.named_children.find { |child| child.kind == "argument_list" } - method = inline_def_from_argument_list(args) - wrap(:FCALL, children: [message.to_sym, list([method].compact, source: args || node)], source: node) - end - - def normalize_modifier_statement(node) - keyword = modifier_keyword(node) - action, cond = modifier_parts(node) - type = - case keyword - when "unless" then :UNLESS - when "while" then :WHILE - when "until" then :UNTIL - else :IF - end - normalized_action = normalize_modifier_action(action) - children = %i[WHILE UNTIL].include?(type) ? [normalize_node(cond), normalized_action, true] : - [normalize_node(cond), normalized_action, nil] - wrap(type, children: children, source: node) - end - - def normalize_modifier_action(node) - modifier_return_action?(node) ? normalize_return_node(node, elide_symbol: false) : normalize_node(node) - end - - def modifier_return_action?(node) - ts_node?(node) && RETURN_KINDS.key?(node.kind) - end - - def normalize_command_call_statement(node) - function = node.named_children.first - if visibility_inline_def_statement?(node, function) - method = inline_def_from_statement(node) - return wrap(:FCALL, children: [function.text.to_sym, list([method].compact, source: node)], source: node) - end - - args_node = node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } - args = args_node ? command_arguments(args_node) : [] - block = call_block(node) - call_source = block ? source_before_child(node, block) : node - if ruby? && function&.text == "yield" - return wrap(:YIELD, children: [list(args, source: args_node || call_source)], source: call_source) - end - - call = wrap(args.empty? ? :VCALL : :FCALL, - children: [function.text.to_sym, list(args, source: args_node || call_source)], - source: call_source) - return call unless block - - block_args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: block_args)], source: node) - end - - def dynamic_scope(node) - return node unless node.is_a?(Node) - return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA].include?(node.type) - - node.type = :DASGN if node.type == :LASGN - node.type = :DVAR if node.type == :LVAR - node.children = node.children.map { |child| dynamic_scope(child) } - node - end - - def normalize_zero_child_call(node) - wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) - end - - def normalize_member_read(node) - recv, mid = member_parts(node) - return wrap(kind_type(node.kind), children: normalize_children(node), source: node) unless recv && mid - - wrap(:CALL, children: [normalize_node(recv), mid.to_sym, nil], source: node) - end - - def normalize_const(node) - if %w[scope_resolution scoped_type_identifier].include?(node.kind) - parts = node.named_children - base = normalize_const(parts[0]) if parts[0] - name = (named_field(node, "name") || parts[-1])&.text.to_s - return wrap(:COLON2, children: [base, name.to_sym], source: node) - end - - wrap(:CONST, children: [node.text.to_s.to_sym], source: node) - end - - def normalize_children(node) - node.named_children.filter_map do |child| - next if assignment_rhs?(child) - - normalize_node(child) - end - end - - def normalize_body(node) - return nil unless ts_node?(node) - return normalize_leading_function_statement(node) if leading_function_statement?(node) - return normalize_leading_owner_statement(node) if leading_owner_statement?(node) - return normalize_leading_case_statement(node) if leading_case_statement?(node) - return normalize_ensure_body_statement(node) if ensure_body_statement?(node) - return normalize_rescue_body_statement(node) if rescue_body_statement?(node) - return normalize_heredoc_body_statement(node) if heredoc_body_statement?(node) - return normalize_leading_loop_statement(node) if leading_loop_statement?(node) - return normalize_leading_if_statement(node) if leading_if_statement?(node) - return normalize_elsif(node) if node.kind == "elsif" - return normalize_yield_statement(node) if yield_statement?(node) - return normalize_super_statement(node) if super_statement?(node) - return normalize_unary_not_statement(node) if unary_not_statement?(node) - return normalize_operator_assignment_statement(node) if operator_assignment_statement?(node) - return normalize_element_reference_statement(node) if element_reference_statement?(node) - return normalize_hash_literal_statement(node) if hash_literal_statement?(node) - return normalize_array_literal_statement(node) if array_literal_statement?(node) - return normalize_concatenated_string_statement(node) if concatenated_string_statement?(node) - return normalize_interpolated_statement(node) if interpolated_statement?(node) - return nil if empty_body_statement?(node) - return normalize_terminal_statement(node) if terminal_statement?(node) - return normalize_modifier_statement(node) if modifier_statement?(node) - return normalize_ternary_statement(node) if ternary_statement?(node) - return normalize_statement_call_with_block(node) if statement_call_with_block?(node) - return normalize_command_call_statement(node) if command_call_statement?(node) - return normalize_infix_statement(node) if infix_statement?(node) - return normalize_boolean(node) if boolean_expression?(node) - return normalize_dotted_expression(node) if dotted_expression?(node) - - if BLOCK_KINDS.include?(node.kind) - children = normalize_children(node) - if children.empty? && bare_identifier_text?(node.text) - return wrap(:VCALL, children: [node.text.to_s.strip.to_sym], source: node) - end - return nil if children.empty? - return children.first if children.size == 1 - - return wrap(:BLOCK, children: children, source: node) - end - - normalize_node(node) - end - - def normalize_body_nodes(nodes, source:) - children = nodes.map { |child| normalize_body(child) }.compact - return nil if children.empty? - return children.first if children.size == 1 - - wrap(:BLOCK, children: children, source: source) - end - - def normalize_patterns(node) - patterns = node.named_children.select do |child| - %w[pattern case_pattern match_pattern].include?(child.kind) - end - patterns = [named_field(node, "value")].compact if patterns.empty? - patterns = [node.named_children.find { |child| !BLOCK_KINDS.include?(child.kind) && !statement_node?(child) }].compact if patterns.empty? - - patterns.flat_map do |pattern| - if pattern.text.to_s.include?("::") - [wrap(:CONST, children: [pattern.text.to_s.to_sym], source: pattern)] - elsif %w[pattern case_pattern match_pattern expression_list].include?(pattern.kind) - pattern.named_children.map { |child| normalize_node(child) }.compact - else - [normalize_node(pattern)].compact - end - end - end - - def assignment_target(left, right, source: nil) - return nil unless ts_node?(left) - source ||= left - - if instance_variable?(left) - return wrap(:IASGN, children: [left.text.to_s, right], source: source) - end - - if global_variable?(left) - return wrap(:GASGN, children: [left.text.to_s, right], source: source) - end - - if left.kind == "element_reference" - recv = left.named_children.first - args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact - return wrap(:ATTRASGN, children: [normalize_node(recv), :[]=, list(args + [right], source: left)], - source: source) - end - - if member_read_node?(left) - recv, mid = member_parts(left) - writer = left.text.to_s.include?("&.") ? mid.to_sym : "#{mid}=".to_sym - return wrap(:ATTRASGN, children: [normalize_node(recv), writer, list([right], source: left)], - source: source) - end - - return assignment_target(left.named_children.first, right, source: source) if left.kind == "expression_list" - - nil - end - - def normalize_assignment_lhs(node) - right = normalize_node(next_named_sibling(node)) - source = parent_node(node) || node - assignment_target(node, right, source: source) || - wrap(:LASGN, children: [target_name(node), right], source: node) - end - - def target_name(left) - return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) - return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && %w[splat splat_parameter rest_assignment].include?(left.kind) - return left.text.to_s if ts_node?(left) - - Ast.slice(normalize_node(left), @document.lines) - end - - def case_value(node) - named_field(node, "value") || named_field(node, "subject") || - named_field(node, "condition") || - node.named_children.find do |child| - !WHEN_KINDS.include?(child.kind) && !BLOCK_KINDS.include?(child.kind) && child.kind != "else" - end - end - - def case_arms(node) - arms = [] - stack = node.named_children.dup - until stack.empty? - child = stack.shift - next unless ts_node?(child) - - if WHEN_KINDS.include?(child.kind) - arms << child - else - stack.concat(child.named_children) unless FUNCTION_KINDS.include?(child.kind) - end - end - arms - end - - def when_body(node) - named_field(node, "body") || named_field(node, "consequence") || - named_field(node, "value") || - node.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) || statement_node?(child) } - end - - def link_when_chain(whens, fallback = nil) - whens.reverse.inject(fallback) do |next_when, current| - current.children[2] = next_when - current - end - end - - def case_else_body(node) - else_node = node.named_children.find { |child| child.kind == "else" } - normalize_else_or_branch(else_node) - end - - def normalize_else_or_branch(node) - return nil unless ts_node?(node) - return normalize_body(node) unless node.kind == "else" - - normalize_body_nodes(node.named_children, source: node) - end - - def link_rescue_chain(resbodies) - resbodies.reverse.inject(nil) do |next_rescue, current| - current.children[2] = next_rescue - current - end - end - - def boolean_expression?(node) - (%w[binary binary_expression boolean_operator].include?(node.kind) || boolean_statement?(node)) && - %w[and or].include?(boolean_operator(node)) - end - - def boolean_statement?(node) - return false unless %w[body_statement block_body statement argument_list].include?(node.kind) - return false unless %w[&& || and or].include?(binary_operator(node)) - return false if node.named_children.size < 2 - - node.children.all? do |child| - child.named? || %w[&& || and or ( )].include?(child.text.to_s) - end - end - - def operator_call_expression?(node) - %w[binary binary_expression].include?(node.kind) && - OPERATOR_CALL_OPERATORS.include?(binary_operator(node)) - end - - def infix_statement?(node) - left, operator, right = infix_statement_parts(node) - left && right && INFIX_STATEMENT_OPERATORS.include?(operator) - end - - def dotted_expression?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && dotted_call?(node) - end - - def argument_list_call_with_block?(node) - return false unless node.kind == "argument_list" - return false if dotted_call?(node) - return false unless call_block(node) - - IDENTIFIER_KINDS.include?(node.named_children.first&.kind) - end - - def infix_statement_parts(node) - return [nil, nil, nil] unless %w[body_statement block_body statement argument_list].include?(node.kind) - - named_index = 0 - left = nil - right = nil - operator = nil - node.children.each do |child| - if child.named? - left ||= child - right = child if operator - named_index += 1 - elsif INFIX_STATEMENT_OPERATORS.include?(child.text.to_s) - operator = child.text.to_s - end - end - return [nil, nil, nil] unless named_index == 2 && operator - - [left, operator, right] - rescue StandardError - [nil, nil, nil] - end - - def argument_list_unary_not?(node) - node.kind == "argument_list" && - node.children.first&.text == "!" && - node.named_children.size == 1 - rescue StandardError - false - end - - def unary_not_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.children.first&.text == "!" && - node.named_children.size == 1 - rescue StandardError - false - end - - def normalize_argument_list_unary_not(node) - operand = node.named_children.first - wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) - end - - def comparison_expression?(node) - %w[binary binary_expression comparison_operator].include?(node.kind) && - COMPARISON_OPERATORS.include?(comparison_operator(node)) - end - - def regex_literal?(node) - ts_node?(node) && %w[regex regex_literal].include?(node.kind) - end - - def unary_not_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") - end - - def unary_minus_expression?(node) - %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") + def parse_semantic(file, language: nil) + require_relative "syntax" + document = Syntax.parse(file, language: language, parser: "tree_sitter") + key = [:semantic_tree_sitter, document.object_id] + normalized_cache.fetch(key) do + normalized_cache[key] = [SemanticNormalizer.new(document).normalize, document.lines] end + end - def boolean_operator(node) - direct = binary_operator(node) - return "and" if %w[&& and].include?(direct) - return "or" if %w[|| or].include?(direct) - return nil if ts_node?(node) - - text = spaced_text(node) - return "and" if text.include?("&&") || text.match?(/\band\b/) - return "or" if text.include?("||") || text.match?(/\bor\b/) - - nil - end - - def comparison_operator(node) - binary_operator(node) || spaced_text(node)[/(===|!==|==|!=|<=|>=|<|>)/, 1] - end - - def binary_operator(node) - node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s - end - - def spaced_text(node) - " #{node.text} " - end - - def class_node?(node) - CLASS_KINDS.include?(node.kind) - end - - def module_node?(node) - MODULE_KINDS.include?(node.kind) && named_field(node, "name") - end - - def unwrap_node?(node) - %w[ - parenthesized_expression parenthesized_statements expression_statement statement - case_pattern match_pattern pattern - ].include?(node.kind) && node.named_children.size == 1 - end - - def statement_node?(node) - node.kind.end_with?("_statement") || node.kind.end_with?("_expression") || - %w[return break next].include?(node.kind) - end - - def local_identifier?(node) - IDENTIFIER_KINDS.include?(node.kind) - end - - def ruby_vcall_identifier?(node) - return false unless ruby? - return false unless IDENTIFIER_KINDS.include?(node.kind) - return false if assignment_lhs?(node) - return false if ruby_definition_identifier?(node) - - !ruby_local_name?(node.text.to_s) - end - - def ruby_definition_identifier?(node) - parent = parent_node(node) - return false unless ts_node?(parent) - - if %w[method singleton_method].include?(parent.kind) - name = named_field(parent, "name") || - parent.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - return same_ts_node?(name, node) - end - - %w[ - method_parameters block_parameters lambda_parameters - optional_parameter keyword_parameter block_parameter - ].include?(parent.kind) - end - - def ruby_local_name?(name) - @local_stack.reverse.any? { |scope| scope.include?(name) } - end - - def ruby? - @document.language == :ruby - end - - def interpolated_string?(node) - node.kind == "string" && node.named_children.any? { |child| child.kind == "interpolation" } - end - - def normalize_interpolated_string(node) - wrap(:DSTR, children: normalize_children(node), source: node) - end - - def vcall_identifier?(node) - return false unless local_identifier?(node) - return false if ruby? && ruby_local_name?(node.text.to_s) - - parent = parent_node(node) - return false unless ts_node?(parent) - return false if %w[method method_parameters parameter_list argument_list arguments].include?(parent.kind) - return false if member_read_node?(parent) - return false if assignment_lhs?(node) || assignment_rhs?(node) - - return true if %w[body_statement block_body then].include?(parent.kind) && parent_named_child?(parent, node) - return true if %w[if_modifier unless_modifier].include?(parent.kind) && same_ts_node?(parent.named_children.first, node) - - false - end - - def const_node?(node) - CONST_KINDS.include?(node.kind) - end - - def self_node?(node) - %w[self this].include?(node.kind) || node.text == "self" || node.text == "this" - end - - def instance_variable?(node) - node.kind == "instance_variable" || node.text.to_s.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) - end - - def global_variable?(node) - node.kind == "global_variable" || node.text.to_s.match?(/\A\$[A-Za-z_]\w*[!?=]?\z/) - end - - def member_read_node?(node) - ts_node?(node) && MEMBER_KINDS.include?(node.kind) && member_parts(node).all? - end - - def assignment_lhs?(node) - return false if prev_sibling(node)&.text == ":" - return false if literal_fragment_assignment_context?(node) - - sibling = next_sibling(node) - sibling && assignment_operator?(sibling.text) - end - - def assignment_rhs?(node) - sibling = prev_sibling(node) - sibling && assignment_operator?(sibling.text) - end - - def literal_fragment_assignment_context?(node) - parent = parent_node(node) - return false unless ts_node?(parent) - return true if %w[string delimited_symbol regex regex_literal].include?(parent.kind) - - %w[string_content escape_sequence interpolation].include?(node.kind) && - ts_node?(parent_node(parent)) && - %w[string delimited_symbol regex regex_literal].include?(parent_node(parent).kind) - end - - def assignment_operator?(text) - %w[= += -= *= /= %= &&= ||=].include?(text.to_s) - end - - def operator_assignment_operator(node) - raw = node.children.find { |child| !child.named? && child.text.to_s.end_with?("=") }&.text.to_s - op = raw.sub(/=\z/, "") - op = "||" if raw == "||=" - op = "&&" if raw == "&&=" - op.to_sym - end - - def augmented_assignment_value(left, operator, right_raw, source) - receiver = assignment_receiver(left) - right = normalize_node(right_raw) - wrap(:CALL, children: [receiver, operator, list([right].compact, source: right_raw || left)], source: source) - end - - def normalize_logical_operator_assignment(left, operator, right, source:) - return nil unless ruby? && [:"||", :"&&"].include?(operator) - return nil unless ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) - - name = target_name(left) - type = operator == :"||" ? :OP_ASGN_OR : :OP_ASGN_AND - receiver = wrap(:LVAR, children: [name], source: left) - assignment = wrap(:LASGN, children: [name, right], source: source) - wrap(type, children: [receiver, operator, assignment], source: source) - end - - def assignment_receiver(left) - return nil unless ts_node?(left) - return wrap(:LVAR, children: [left.text.to_s], source: left) if IDENTIFIER_KINDS.include?(left.kind) - return wrap(:IVAR, children: [left.text.to_s], source: left) if instance_variable?(left) - return normalize_const(left) if const_node?(left) - - normalize_node(left) - end - - def with_ruby_scope(node, reset: false) - return yield unless ruby? - - previous = @local_stack - @local_stack = [] if reset - @local_stack = @local_stack + [ruby_scope_locals(node)] - yield - ensure - @local_stack = previous if ruby? - end - - def ruby_scope_locals(node) - locals = Set.new - collect_ruby_scope_locals(node, locals, root: true) - locals - end - - def collect_ruby_scope_locals(node, locals, root: false) - return unless ts_node?(node) - return if !root && ruby_scope_boundary?(node) - - collect_ruby_parameter_locals(node, locals) - collect_ruby_assignment_locals(node, locals) - - node.named_children.each do |child| - next if ruby_scope_child_boundary?(child) - - collect_ruby_scope_locals(child, locals) - end - end - - def collect_ruby_parameter_locals(node, locals) - return unless %w[method_parameters block_parameters lambda_parameters].include?(node.kind) - - node.named_children.each do |child| - collect_identifier_names(child, locals) - end - end - - def collect_ruby_assignment_locals(node, locals) - if node.kind == "exception_variable" - collect_identifier_names(node, locals) - return - end - - return unless ruby_assignment_node?(node) - - left = assignment_left(node) - collect_assignment_target_names(left, locals) - end - - def ruby_assignment_node?(node) - return false unless ts_node?(node) - return true if %w[assignment operator_assignment].include?(node.kind) - return true if node.kind == "pattern" && node.children.any? { |child| !child.named? && child.text == "=" } - - %w[body_statement statement].include?(node.kind) && - node.children.any? { |child| !child.named? && assignment_operator?(child.text) } - end - - def collect_assignment_target_names(node, locals) - return unless ts_node?(node) - - if IDENTIFIER_KINDS.include?(node.kind) - locals.add(node.text.to_s.sub(/\A\*/, "")) - return - end - - return unless %w[left_assignment_list expression_list splat splat_parameter rest_assignment].include?(node.kind) - - node.named_children.each { |child| collect_assignment_target_names(child, locals) } - end - - def collect_identifier_names(node, locals) - return unless ts_node?(node) - - locals.add(node.text.to_s.sub(/\A\*/, "")) if IDENTIFIER_KINDS.include?(node.kind) - node.named_children.each { |child| collect_identifier_names(child, locals) } - end - - def ruby_scope_boundary?(node) - return false if %w[block do_block].include?(node.kind) && parent_node(node)&.kind == "lambda" - - FUNCTION_KINDS.include?(node.kind) || class_node?(node) || module_node?(node) || - %w[singleton_class lambda block do_block].include?(node.kind) - end - - def ruby_scope_child_boundary?(node) - ruby_scope_boundary?(node) - end - - def member_parts(node) - return [nil, nil] if node.kind == "expression_list" && - !(named_field(node, "operand") && named_field(node, "field")) - - return dotted_call_parts(node) if dotted_call?(node) - - recv = named_field(node, "receiver") || named_field(node, "object") || - named_field(node, "operand") || named_field(node, "value") || - node.named_children.first - mid = named_field(node, "method") || named_field(node, "field") || - named_field(node, "property") || node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) }.last - return [nil, nil] unless recv && mid && recv != mid - - [recv, mid.text.to_s.sub(/=\z/, "")] - end - - def call_arguments(node, function) - args = named_field(node, "arguments") || named_field(node, "argument") || - node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } - return [] unless args - - children = args.named_children.reject { |child| function && child == function } - return [normalize_dotted_expression(args)] if dotted_expression?(args) - if children.empty? - scalar = scalar_argument_list_value(args) - return [scalar] if scalar - - return literal_arguments_from_text(args) - end - return [normalize_infix_statement(args)] if infix_statement?(args) - - children.map { |child| normalize_node(child) }.compact - end - - def assignment_left(node) - named_field(node, "left") || node.named_children.first - end - - def assignment_right(node) - named_field(node, "right") || node.named_children[1] - end - - def function_name(node) - return singleton_name(node) if node.kind == "singleton_method" - - name = named_field(node, "name") || - node.named_children.find do |child| - IDENTIFIER_KINDS.include?(child.kind) || child.kind == "constant" - end - name&.text.to_s.to_sym - end - - def singleton_receiver(node) - named_field(node, "receiver") || - node.named_children.find { |child| child.kind != "identifier" } || - node.named_children.first - end - - def singleton_name(node) - name = named_field(node, "name")&.text || - node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s - name.to_s.to_sym - end - - def first_named(node) - node.named_children.first - end - - def block_child(node) - node.named_children.find { |child| BLOCK_KINDS.include?(child.kind) || %w[block do_block].include?(child.kind) } - end - - def branch_child(node, cond, index) - node.named_children.reject { |child| child == cond || %w[comment else elsif].include?(child.kind) }[index] - end - - def explicit_alternative(node) - node.named_children.find { |child| %w[elsif else].include?(child.kind) } - end - - def const_for(node) - return wrap(:CONST, children: ["(anonymous)".to_sym], source: @document.root) unless ts_node?(node) - return normalize_const(node) if const_node?(node) - - wrap(:CONST, children: [node.text.to_s.to_sym], source: node) - end - - def normalize_parameters(node) - return nil unless ruby? && ts_node?(node) - - defaults = node.named_children.filter_map do |param| - name = named_field(param, "name") - value = named_field(param, "value") - next unless name && value - - wrap(:LASGN, children: [name.text.to_sym, normalize_node(value)], source: param) - end - return nil if defaults.empty? - - wrap(:ARGS, children: defaults, source: node) - end - - def normalize_block_parameters(block) - return nil unless ruby? && ts_node?(block) - - params = block.named_children.find { |child| child.kind == "block_parameters" } - return nil unless params - - destructured = params.named_children.select { |child| child.kind == "destructured_parameter" } - pre_init = destructured.map { |param| normalize_destructured_block_parameter(param) }.compact - return nil if pre_init.empty? - - wrap(:ARGS, children: pre_init, source: params) - end - - def normalize_destructured_block_parameter(param) - targets = [] - param.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } - return nil if targets.empty? - - wrap(:MASGN, - children: [ - wrap(:DVAR, children: [nil], source: param), - list(targets, source: param), - nil, - ], - source: param) - end - - def collect_destructured_parameter_targets(node, targets) - return unless ts_node?(node) - - if IDENTIFIER_KINDS.include?(node.kind) - targets << wrap(:DASGN, children: [node.text.to_s, nil], source: node) - return - end - - node.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } - end - - def scope(body, args: nil) - wrap(:SCOPE, children: [nil, args, body], source: body || args || @document.root) - end - - def list(children, source:) - return nil if children.nil? || children.empty? - - wrap(:LIST, children: children, source: source) - end - - def wrap(type, children:, source:) - if source.respond_to?(:start_point) - first_lineno = source.start_point.row + 1 - first_column = source.start_point.column - last_lineno = source.end_point.row + 1 - last_column = source.end_point.column - text = source.text.to_s - else - first_lineno = source.first_lineno - first_column = source.first_column - last_lineno = source.last_lineno - last_column = source.last_column - text = source.text.to_s - end - - Node.new( - type: type, - children: children, - first_lineno: first_lineno, - first_column: first_column, - last_lineno: last_lineno, - last_column: last_column, - text: text - ) - end - - def source_before_child(node, child) - text = @document.source.byteslice(node.start_byte...child.start_byte).to_s.rstrip - return node if text.empty? - - lines = text.lines - last_lineno = node.start_point.row + lines.size - last_column = - if lines.size <= 1 - node.start_point.column + text.length - else - lines.last.to_s.chomp.length - end - Node.new( - type: :SOURCE, - children: [], - first_lineno: node.start_point.row + 1, - first_column: node.start_point.column, - last_lineno: last_lineno, - last_column: last_column, - text: text - ) - end - - def source_from_nodes(first_node, last_node) - return first_node unless ts_node?(first_node) && ts_node?(last_node) - - text = @document.source.byteslice(first_node.start_byte...last_node.end_byte).to_s - Node.new( - type: :SOURCE, - children: [], - first_lineno: first_node.start_point.row + 1, - first_column: first_node.start_point.column, - last_lineno: last_node.end_point.row + 1, - last_column: last_node.end_point.column, - text: text - ) - end - - def source_from_normalized_nodes(first_node, last_node) - return first_node unless first_node.is_a?(Node) && last_node.is_a?(Node) - - text = - if first_node.first_lineno == last_node.last_lineno - @document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column...last_node.last_column) - else - ([@document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column..)] + - @document.lines[first_node.first_lineno...(last_node.last_lineno - 1)] + - [@document.lines[last_node.last_lineno - 1].to_s.byteslice(0...last_node.last_column)]).join - end - Node.new( - type: :SOURCE, - children: [], - first_lineno: first_node.first_lineno, - first_column: first_node.first_column, - last_lineno: last_node.last_lineno, - last_column: last_node.last_column, - text: text.to_s - ) - end - - def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil - end - - def parent_node(node) - node.parent - rescue StandardError - nil - end - - def next_sibling(node) - node.next_sibling - rescue StandardError - nil - end - - def prev_sibling(node) - node.prev_sibling - rescue StandardError - nil - end - - def next_named_sibling(node) - node.next_named_sibling - rescue StandardError - nil - end - - def modifier_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - modifier_keyword(node) && - node.named_children.size >= 2 - end - - def ternary_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.size >= 3 && - node.children.any? { |child| !child.named? && child.text == "?" } && - node.children.any? { |child| !child.named? && child.text == ":" } - rescue StandardError - false - end - - def normalize_ternary_statement(node) - cond, positive, negative = node.named_children - wrap(:IF, children: [normalize_node(cond), normalize_node(positive), normalize_node(negative)], source: node) - end - - def case_argument_list?(node) - node.kind == "argument_list" && - node.children.any? { |child| !child.named? && child.kind == "case" } && - node.named_children.any? { |child| WHEN_KINDS.include?(child.kind) } - rescue StandardError - false - end - - def leading_function_statement?(node) - %w[body_statement statement].include?(node.kind) && - node.children.first&.kind.to_s == "def" && - node.named_children.any? { |child| IDENTIFIER_KINDS.include?(child.kind) } - rescue StandardError - false - end - - def normalize_leading_function_statement(node) - name = node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s.to_sym - body = node.named_children.reverse.find { |child| child.kind == "body_statement" } - normalized_body = with_ruby_scope(node, reset: true) do - elide_tail_returns(normalize_body(body)) - end - wrap(:DEFN, children: [name, scope(normalized_body)], source: node) - end - - def command_call_statement?(node) - return false unless %w[body_statement block block_body statement].include?(node.kind) - return false if dotted_call?(node) - return false unless node.named_children.first&.kind == "identifier" - - node.named_children.any? { |child| %w[argument_list arguments].include?(child.kind) } || - call_block(node) - end - - def zero_child_identifier_call?(node) - node.kind == "call" && node.named_children.empty? && - node.text.to_s.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - end - - def dotted_call?(node) - return false unless ts_node?(node) - return false unless node.children.any? { |child| child.text == "." || child.text == "&." } - - callable = node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } - return false if callable.any? { |child| %w[string_content interpolation].include?(child.kind) } - - callable.size >= 2 - end - - def safe_navigation_call?(node) - ts_node?(node) && node.children.any? { |child| !child.named? && child.text == "&." } - end - - def dotted_call_parts(node) - callable = node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } - [callable.first, callable[1].text.to_s.sub(/=\z/, "")] - end - - def leading_if_statement?(node) - %w[body_statement block block_body statement].include?(node.kind) && - %w[if unless].include?(node.children.first&.kind.to_s) && - node.named_children.size >= 2 && - !IF_KINDS.include?(node.named_children.first.kind) - rescue StandardError - false - end - - def leading_case_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.children.first&.kind.to_s == "case" && - node.named_children.any? { |child| WHEN_KINDS.include?(child.kind) } - rescue StandardError - false - end - - def normalize_leading_case_statement(node) - value = normalize_node(case_value(node)) - whens = case_arms(node).map { |arm| normalize_when(arm) }.compact - wrap(:CASE, children: [value, link_when_chain(whens, case_else_body(node))], source: node) - end - - def leading_loop_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - !node.children.first&.named? && - %w[while until].include?(node.children.first&.kind.to_s) && - node.named_children.size >= 2 - rescue StandardError - false - end - - def rescue_body_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.named_children.any? { |child| child.kind == "rescue" } - rescue StandardError - false - end - - def normalize_rescue_body_statement(node) - named = node.named_children - rescue_index = named.index { |child| child.kind == "rescue" } - body = normalize_body_nodes(named[0...rescue_index], source: node) - rescue_nodes = named[rescue_index..].select { |child| child.kind == "rescue" } - resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } - source = source_from_nodes(named.first || node, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || node) - wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) - end - - def normalize_rescue_clause(node) - exceptions = node.named_children.find { |child| child.kind == "exceptions" } - exception_nodes = exceptions ? exceptions.named_children.map { |child| normalize_node(child) }.compact : [] - exception_variable = rescue_exception_variable(node) - handler = node.named_children.reverse.find do |child| - !%w[exceptions exception_variable comment].include?(child.kind) - end - body = prepend_rescue_exception_assignment(normalize_body(handler), exception_variable) - wrap(:RESBODY, children: [list(exception_nodes, source: exceptions || node), body, nil], - source: node) - end - - def rescue_source_end(node) - return nil unless ts_node?(node) - - handler = node.named_children.reverse.find do |child| - !%w[exceptions exception_variable comment].include?(child.kind) - end - return handler.named_children.last || handler if ts_node?(handler) - - node.named_children.reverse.find { |child| !%w[comment].include?(child.kind) } || node - end - - def rescue_exception_variable(node) - var = node.named_children.find { |child| child.kind == "exception_variable" } - name = var&.named_children&.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - return nil unless name - - wrap(:LASGN, children: [name.text.to_s, wrap(:ERRINFO, children: [], source: var)], source: var) - end - - def prepend_rescue_exception_assignment(body, assignment) - return body unless assignment - return assignment unless body.is_a?(Node) - - if body.type == :BLOCK - body.children = [assignment] + body.children.compact - body - else - wrap(:BLOCK, children: [assignment, body], source: source_from_normalized_nodes(assignment, body)) - end - end - - def ensure_body_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.named_children.any? { |child| child.kind == "ensure" } - rescue StandardError - false - end - - def normalize_ensure_body_statement(node) - named = node.named_children - ensure_index = named.index { |child| child.kind == "ensure" } - body = normalize_body_nodes(named[0...ensure_index], source: node) - ensure_body = normalize_body(named[ensure_index]) - wrap(:ENSURE, children: [body, ensure_body], source: body || node) - end - - def array_literal_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.children.first&.text == "[" && - node.children.last&.text == "]" - rescue StandardError - false - end - - def element_reference_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.children.first&.text != "[" && - node.children.any? { |child| !child.named? && child.text == "[" } && - node.children.any? { |child| !child.named? && child.text == "]" } && - node.named_children.size >= 2 - rescue StandardError - false - end - - def normalize_element_reference_statement(node) - recv = node.named_children.first - args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact - wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) - end - - def hash_literal_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.children.first&.text == "{" && - node.children.last&.text == "}" - rescue StandardError - false - end - - def normalize_hash_literal_statement(node) - wrap(:HASH, children: normalize_children(node), source: node) - end - - def normalize_array_literal_statement(node) - values = node.named_children.map { |child| normalize_node(child) }.compact - return wrap(:ZLIST, children: [], source: node) if values.empty? - - list(values, source: node) - end - - def empty_body_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - node.named_children.empty? && - node.text.to_s.strip.empty? - end - - def heredoc_body_statement?(node) - %w[body_statement block_body statement then].include?(node.kind) && - node.named_children.any? { |child| child.kind == "heredoc_body" } - rescue StandardError - false - end - - def normalize_heredoc_body_statement(node) - heredoc_bodies = node.named_children.select { |child| child.kind == "heredoc_body" } - children = node.named_children.filter_map do |child| - next if child.kind == "heredoc_body" - - if heredoc_call_for_body?(child) - with_current_heredoc_body(heredoc_bodies.shift) { normalize_node(child) } - else - normalize_body(child) - end - end - return nil if children.empty? - return children.first if children.size == 1 - - wrap(:BLOCK, children: children, source: node) - end - - def heredoc_call_for_body?(node) - return false unless ts_node?(node) - return true if node.kind == "heredoc_beginning" - - node.named_children.any? do |child| - next false if child.named_children.any? { |grandchild| grandchild.kind == "heredoc_body" } - - heredoc_call_for_body?(child) - end - end - - def with_current_heredoc_body(body) - previous = @current_heredoc_body - @current_heredoc_body = body - yield - ensure - @current_heredoc_body = previous - end - - def normalize_heredoc_beginning(node) - body = @current_heredoc_body - children = body ? normalize_heredoc_children(body) : [] - wrap(:DSTR, children: children, source: node) - end - - def normalize_heredoc_children(node) - node.named_children.filter_map do |child| - case child.kind - when "interpolation" - normalize_interpolation(child) - when "heredoc_content" - text = child.text.to_s - text.empty? ? nil : wrap(:STR, children: [text], source: child) - else - nil - end - end - end - - def normalize_interpolation(node) - exprs = node.named_children.map { |child| normalize_node(child) }.compact - body = exprs.size == 1 ? exprs.first : list(exprs, source: node) - wrap(:EVSTR, children: [body].compact, source: node) - end - - def interpolated_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.any? { |child| child.kind == "interpolation" } - rescue StandardError - false - end - - def normalize_interpolated_statement(node) - wrap(:DSTR, children: normalize_children(node), source: node) - end - - def concatenated_string_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.size > 1 && - node.named_children.all? { |child| child.kind == "string" } - rescue StandardError - false - end - - def normalize_concatenated_string_statement(node) - normalized = node.named_children.map { |child| [child, normalize_node(child)] } - parts = normalized.flat_map do |_child, child_node| - child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] - end.compact - wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first) - end - - def normalize_chained_string(node) - normalized = node.named_children.map { |child| [child, normalize_node(child)] } - parts = normalized.flat_map do |_child, child_node| - child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] - end.compact - wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first || node) - end - - def dynamic_string_source(normalized_children) - normalized_children.find do |_child, child_node| - child_node.is_a?(Node) && child_node.type == :DSTR && - child_node.children.any? { |part| part.is_a?(Node) && part.type == :EVSTR } - end&.first - end - - def terminal_statement?(node) - %w[body_statement block_body statement argument_list].include?(node.kind) && - node.named_children.empty? && - !node.text.to_s.strip.empty? - end - - def normalize_terminal_statement(node) - text = node.text.to_s.strip - return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" - return wrap(:IVAR, children: [text], source: node) if text.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) - return normalize_global_variable(node) if text.match?(/\A\$/) - return wrap(:NIL, children: [], source: node) if text == "nil" - return wrap(:TRUE, children: [], source: node) if text == "true" - return wrap(:FALSE, children: [], source: node) if text == "false" - return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) - return wrap(:INTEGER, children: [text.to_i], source: node) if text.match?(/\A-?\d+\z/) - return wrap(:ZLIST, children: [], source: node) if text == "[]" - - if bare_identifier_text?(text) - return wrap(:VCALL, children: [text.to_sym], source: node) if ruby? && !ruby_local_name?(text) - - return wrap(:LVAR, children: [text], source: node) - end - - wrap(kind_type(node.kind), children: [], source: node) - end - - def normalize_global_variable(node) - text = node.text.to_s - return wrap(:NTH_REF, children: [text.delete_prefix("$").to_i], source: node) if text.match?(/\A\$[1-9]\d*\z/) - - wrap(:GVAR, children: [text], source: node) - end - - def normalize_leading_loop_statement(node) - keyword = node.children.first.kind - cond = normalize_node(node.named_children.first) - body = normalize_body(node.named_children[1]) - wrap(keyword == "until" ? :UNTIL : :WHILE, children: [cond, body], source: node) - end - - def operator_assignment_statement?(node) - %w[body_statement block_body statement].include?(node.kind) && - operator_assignment_statement_parts(node)[1] - rescue StandardError - false - end - - def normalize_operator_assignment_statement(node) - left, operator, right_raw = operator_assignment_statement_parts(node) - right = normalize_node(right_raw) - - if left&.kind == "element_reference" - recv = left.named_children.first - args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact - return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], - source: node) - end - - if member_read_node?(left) - recv, mid = member_parts(left) - return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) - end - - logical = normalize_logical_operator_assignment(left, operator, right, source: node) - return logical if logical - if instance_variable?(left) || global_variable?(left) - return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) - end - - assignment_target(left, right, source: node) || - wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], - source: node) - end - - def operator_assignment_statement_parts(node) - left = nil - operator = nil - right = nil - node.children.each do |child| - if child.named? - left ||= child - right = child if operator - elsif child.text.to_s.match?(/\A(?:[+\-*\/%&|^]|\|\||&&)=\z/) - raw = child.text.to_s - operator = raw.sub(/=\z/, "") - operator = "||" if raw == "||=" - operator = "&&" if raw == "&&=" - end - end - return [nil, nil, nil] unless left && operator && right - - [left, operator.to_sym, right] - end - - def leading_owner_statement?(node) - %w[body_statement statement].include?(node.kind) && - %w[class module].include?(node.children.first&.kind.to_s) && - node.named_children.size >= 2 && - !OWNER_STATEMENT_NESTED_KIND.include?(node.named_children.first.kind) - rescue StandardError - false - end - - OWNER_STATEMENT_NESTED_KIND = %w[class class_definition class_declaration module].freeze - - def normalize_leading_owner_statement(node) - keyword = node.children.first.kind - name = const_for(node.named_children.first) - body_node = named_field(node, "body") || - node.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) } - body = normalize_body(body_node) - if keyword == "module" - wrap(:MODULE, children: [name, scope(body)], source: node) - else - wrap(:CLASS, children: [name, nil, scope(body)], source: node) - end - end - - def normalize_leading_if_statement(node) - keyword = node.children.first.kind - cond = node.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } - consequence = node.named_children.find { |child| child.kind == "then" } || - branch_child(node, cond, 0) - alternative = explicit_alternative(node) - type = keyword == "unless" ? :UNLESS : :IF - wrap(type, children: [normalize_node(cond), normalize_body(consequence), normalize_else_or_branch(alternative)], - source: node) - end - - def modifier_keyword(node) - seen_named = false - node.children.each do |child| - seen_named ||= child.named? - return child.kind if seen_named && !child.named? && %w[if unless while until].include?(child.kind) - end - nil - rescue StandardError - nil - end - - def modifier_parts(node) - [node.named_children.first, node.named_children.last] - end - - def call_block(node) - node.named_children.find { |child| %w[block do_block].include?(child.kind) } - end - - def statement_call_with_block?(node) - %w[body_statement block_body statement].include?(node.kind) && - call_block(node) && - statement_block_call(node) - end - - def statement_block_call(node) - return node if dotted_call?(node) - - block = call_block(node) - node.named_children.find do |child| - !same_ts_node?(child, block) && (CALL_KINDS.include?(child.kind) || member_read_node?(child)) - end - end - - def normalize_statement_call_with_block(node) - block = call_block(node) - call = normalize_call_without_block(statement_block_call(node), block) - args = normalize_block_parameters(block) - body = with_ruby_scope(block) do - dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) - end - wrap(:ITER, children: [call, scope(body, args: args)], source: node) - end - - def visibility_inline_def_call?(node) - return false unless node.kind == "call" - - message = node.named_children.first&.text.to_s - return false unless INLINE_DEF_WRAPPER_MIDS.include?(message) - - args = node.named_children.find { |child| child.kind == "argument_list" } - args&.text.to_s.lstrip.start_with?("def ") - end - - def visibility_inline_def_statement?(node, function) - INLINE_DEF_WRAPPER_MIDS.include?(function&.text.to_s) && node.text.to_s.include?("def ") - end - - def inline_def_from_argument_list(args) - return nil unless ts_node?(args) - - inline_def_from_source(args) - end - - def inline_def_from_statement(node) - source = node.named_children.find { |child| child.kind == "argument_list" } || node - inline_def_from_source(source) - end - - def inline_def_from_source(source) - body = inline_def_body(source) - receiver = inline_def_receiver(source) - normalized_body = with_ruby_scope(source, reset: true) do - elide_tail_returns(normalize_body(body)) - end - if receiver - name = inline_def_name_after_receiver(source, receiver) - return nil if name.to_s.empty? - - return wrap(:DEFS, children: [normalize_node(receiver), name.to_sym, scope(normalized_body)], - source: source) - end - - name = source.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s - return nil if name.to_s.empty? - - wrap(:DEFN, children: [name.to_sym, scope(normalized_body)], source: source) - end - - def inline_def_receiver(source) - return nil unless source.text.to_s.match?(/\bdef\s+[^.\s]+\./) - - source.named_children.find { |child| self_node?(child) || const_node?(child) } - end - - def inline_def_name_after_receiver(source, receiver) - index = source.named_children.index { |child| same_ts_node?(child, receiver) } - source.named_children[(index.to_i + 1)..]&.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s - end - - def inline_def_body(node) - stack = node.named_children.reverse - until stack.empty? - child = stack.shift - return child if child.kind == "body_statement" - - stack.concat(child.named_children.reverse) - end - nil - end - - def literal_arguments_from_text(args) - args.text.to_s.scan(/:([A-Za-z_]\w*[!?=]?)/).map do |name| - wrap(:LIT, children: [name.first.to_sym], source: args) - end - end - - def elide_tail_returns(node) - return node unless ruby? - return node unless node.is_a?(Node) - return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA ITER].include?(node.type) - return node.children.first if node.type == :RETURN - - case node.type - when :BLOCK - children = node.children.dup - children[-1] = elide_tail_returns(children[-1]) if children.any? - node.children = children - when :SCOPE - children = node.children.dup - children[2] = elide_tail_returns(children[2]) - node.children = children - when :IF, :UNLESS - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - children[2] = elide_tail_returns(children[2]) if children.size > 2 - node.children = children - when :CASE - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - node.children = children - when :CASE2 - children = node.children.dup - children[0] = elide_tail_returns(children[0]) - node.children = children - when :WHEN - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - children[2] = elide_tail_returns(children[2]) if children.size > 2 - node.children = children - when :RESCUE - children = node.children.dup - children[0] = elide_tail_returns(children[0]) - children[1] = elide_tail_returns(children[1]) - node.children = children - when :RESBODY - children = node.children.dup - children[1] = elide_tail_returns(children[1]) - children[2] = elide_tail_returns(children[2]) if children.size > 2 - node.children = children - end - - node - end - - def elide_implicit_nil_body(node) - return node unless ruby? - node = drop_trailing_nil_statement(node) - return nil if node.is_a?(Node) && node.type == :NIL - - node - end - - def prepend_inline_parameter_begin(function_node, body) - marker = inline_parameter_begin_marker(function_node) - return body unless marker - - children = body.is_a?(Node) && body.type == :BLOCK ? body.children.compact : [body].compact - return nil if children.empty? - - if body.is_a?(Node) && body.type == :BLOCK - body.children = [marker] + children - body - else - wrap(:BLOCK, children: [marker] + children, source: function_node) - end - end - - def inline_parameter_begin_marker(function_node) - return nil unless ruby? - - params = named_field(function_node, "parameters") || - function_node.named_children.find { |child| child.kind == "method_parameters" } - return nil unless params - - semicolon = params.next_sibling - return nil unless semicolon && !semicolon.named? && semicolon.text == ";" - - Node.new( - type: :BEGIN, - children: [nil], - first_lineno: semicolon.start_point.row + 1, - first_column: semicolon.start_point.column, - last_lineno: semicolon.start_point.row + 1, - last_column: semicolon.start_point.column, - text: "" - ) - rescue StandardError - nil - end - - def drop_trailing_nil_statement(node) - return node unless node.is_a?(Node) && node.type == :BLOCK - - children = node.children.compact - children.pop while children.last.is_a?(Node) && children.last.type == :NIL - return nil if children.empty? - return children.first if children.size == 1 - - node.children = children - node - end - - def scalar_argument_list_value(node) - text = node.text.to_s.strip - return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" - return wrap(:NIL, children: [], source: node) if text == "nil" - return wrap(:TRUE, children: [], source: node) if text == "true" - return wrap(:FALSE, children: [], source: node) if text == "false" - return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) - if text.match?(/\A-?\d+\z/) - return wrap(:INTEGER, children: [text.to_i], source: node) - end - return nil unless bare_identifier_text?(text) - - if ruby? && !ruby_local_name?(text) - wrap(:VCALL, children: [text.to_sym], source: node) - else - wrap(:LVAR, children: [text], source: node) - end - end - - def local_or_call_for_name(name, source) - if ruby? && !ruby_local_name?(name) - wrap(:VCALL, children: [name.to_sym], source: source) - else - wrap(:LVAR, children: [name], source: source) - end - end - - def symbol_literal_node?(node) - node.is_a?(Node) && node.type == :LIT && node.children.first.is_a?(Symbol) - end - - def command_arguments(args) - return [scalar_argument_list_value(args)].compact if args.named_children.empty? - return [normalize_infix_statement(args)] if infix_statement?(args) - return [normalize_dotted_expression(args)] if dotted_expression?(args) - - args.named_children.map { |child| normalize_node(child) }.compact - end - - def parent_named_child?(parent, node) - parent.named_children.any? { |child| same_ts_node?(child, node) } - end - - def same_ts_node?(left, right) - left.kind == right.kind && left.start_byte == right.start_byte && left.end_byte == right.end_byte - rescue StandardError - false - end - - def bare_identifier_text?(text) - text.to_s.strip.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - end - - def hidden_match?(node) - node.kind == "expression_statement" && - node.text.to_s.lstrip.start_with?("match ") && - node.named_children.any? { |child| child.kind == "match_block" } - end - - def kind_type(kind) - kind.to_s.upcase.gsub(/[^A-Z0-9]+/, "_").to_sym - end - - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:named_children) - end - end + require_relative "ast/legacy_normalizer" # Flatten a && chain (binary-nested OR n-ary, version dependent). def flatten_and(node) diff --git a/gems/decomplex/lib/decomplex/ast/adapters/base.rb b/gems/decomplex/lib/decomplex/ast/adapters/base.rb new file mode 100644 index 000000000..46caeb593 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/base.rb @@ -0,0 +1,791 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + UnsupportedLanguageError = Class.new(StandardError) + + # Language-specific syntax-shape decisions live here, before nodes + # are converted into Decomplex's shared AST vocabulary. + class TreeSitterNormalizationAdapter + BINARY_WRAPPER_KINDS = %w[ + binary binary_expression binary_operator boolean_operator comparison_operator + ].freeze + CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze + COMMON_ASSIGNMENT_OPERATORS = %w[= += -= *= /= %=].freeze + RUBY_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[**= &&= ||= &= |= ^= <<= >>=]).freeze + PYTHON_ASSIGNMENT_OPERATORS = (COMMON_ASSIGNMENT_OPERATORS + %w[//= **= @= &= |= ^= <<= >>= :=]).freeze + LUA_ASSIGNMENT_OPERATORS = %w[=].freeze + TYPESCRIPT_ASSIGNMENT_OPERATORS = ( + COMMON_ASSIGNMENT_OPERATORS + %w[**= <<= >>= >>>= &= |= ^= &&= ||= ??=] + ).freeze + OPERATOR_CALL_OPERATORS = %w[+ - * / % ** | & ^ << >> =~ !~].freeze + BOOLEAN_EXPRESSION_KINDS = %w[binary binary_expression boolean_operator].freeze + COMPARISON_EXPRESSION_KINDS = %w[binary binary_expression comparison_operator].freeze + DOTTED_EXPRESSION_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS = (DOTTED_EXPRESSION_WRAPPER_KINDS + %w[expression_statement]).freeze + LITERAL_CONTAINER_KINDS = %w[string delimited_symbol regex regex_literal].freeze + LITERAL_FRAGMENT_KINDS = %w[string_content escape_sequence interpolation string_fragment].freeze + CASE_ARGUMENT_WHEN_KINDS = %w[ + when switch_case case_clause expression_case case_statement switch_section + switch_block_statement_group switch_entry when_entry match_arm + ].freeze + CASE_ELSE_KINDS = %w[else switch_default].freeze + CASE_DEFAULT_PATTERN_KINDS = %w[case_pattern match_pattern pattern].freeze + ADAPTER_FUNCTION_KINDS = %w[ + method function_definition function_declaration method_definition + method_declaration function_item singleton_method + ].freeze + STATEMENT_BLOCK_PARENT_KINDS = %w[ + method_declaration constructor_declaration function_declaration function_body + if_statement while_statement for_statement enhanced_for_statement try_statement + catch_clause finally_clause do_statement lambda_expression + ].freeze + IDENTIFIER_KINDS = %w[ + identifier simple_identifier property_identifier field_identifier shorthand_property_identifier + ].freeze + LEADING_FUNCTION_WRAPPER_KINDS = %w[body_statement statement].freeze + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze + LUA_LEADING_FUNCTION_WRAPPER_KINDS = %w[block].freeze + OWNER_STATEMENT_NESTED_KIND = %w[class class_definition class_declaration module].freeze + LEADING_OWNER_WRAPPER_KINDS = %w[body_statement statement].freeze + PYTHON_LEADING_OWNER_WRAPPER_KINDS = %w[block].freeze + IF_NODE_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze + LEADING_IF_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + PYTHON_LEADING_IF_WRAPPER_KINDS = %w[block].freeze + LUA_LEADING_IF_WRAPPER_KINDS = %w[block].freeze + LEADING_CASE_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + LEADING_LOOP_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + RESCUE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze + ENSURE_BODY_WRAPPER_KINDS = %w[body_statement block_body statement].freeze + ARRAY_LITERAL_WRAPPER_KINDS = %w[ + body_statement block block_body statement argument_list expression_statement + ].freeze + ARRAY_LITERAL_NODE_KINDS = %w[array list].freeze + ELEMENT_REFERENCE_WRAPPER_KINDS = %w[ + body_statement block block_body statement expression_statement expression_list + ].freeze + ELEMENT_REFERENCE_NODE_KINDS = %w[ + element_reference subscript subscript_expression bracket_index_expression + ].freeze + HASH_LITERAL_WRAPPER_KINDS = %w[ + body_statement block block_body statement argument_list expression_statement parenthesized_expression + ].freeze + HASH_LITERAL_NODE_KINDS = %w[hash dictionary object table_constructor].freeze + EMPTY_BODY_WRAPPER_KINDS = %w[body_statement block block_body statement].freeze + HEREDOC_BODY_WRAPPER_KINDS = %w[body_statement block_body statement then].freeze + INTERPOLATED_STATEMENT_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + CONCATENATED_STRING_WRAPPER_KINDS = %w[body_statement block_body statement argument_list].freeze + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS = (CONCATENATED_STRING_WRAPPER_KINDS + %w[block expression_statement]).freeze + CONCATENATED_STRING_NODE_KINDS = %w[chained_string concatenated_string].freeze + UNWRAP_KINDS = %w[ + parenthesized_expression parenthesized_statements expression_statement statement + case_pattern match_pattern pattern + ].freeze + PYTHON_BODY_FIELD_KINDS = %w[ + elif_clause else_clause for_statement function_definition if_statement + try_statement while_statement with_statement + ].freeze + QUESTION_COLON_TERNARY_KINDS = %w[body_statement block_body statement argument_list conditional].freeze + TYPESCRIPT_TERNARY_KINDS = (QUESTION_COLON_TERNARY_KINDS + %w[ternary_expression]).freeze + + class << self + def for(document) + case document&.language&.to_sym + when :ruby then RubyTreeSitterNormalizationAdapter.new(document) + when :python then PythonTreeSitterNormalizationAdapter.new(document) + when :lua then LuaTreeSitterNormalizationAdapter.new(document) + when :typescript, :javascript then TypeScriptTreeSitterNormalizationAdapter.new(document) + when :rust then RustTreeSitterNormalizationAdapter.new(document) + else + raise UnsupportedLanguageError, + "unsupported AST normalization language #{document&.language.inspect}" + end + end + end + + attr_reader :document + + def initialize(document) + @document = document + end + + def ruby? + false + end + + def yield_statement?(_node) + false + end + + def super_statement?(_node) + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[else else_clause else_statement].include?(child.kind) } + rescue StandardError + nil + end + + def unary_not_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("!") + end + + def unary_minus_expression?(node) + %w[unary unary_expression].include?(node.kind) && node.text.to_s.lstrip.start_with?("-") + end + + def binary_operator(node) + direct_binary_operator(node).to_s + end + + def class_node?(node) + CLASS_KINDS.include?(node.kind) + end + + def unwrap_node?(node) + UNWRAP_KINDS.include?(node.kind) && node.named_children.size == 1 + end + + def interpolated_string?(node) + node.kind == "string" && node.named_children.any? { |child| child.kind == "interpolation" } + end + + def lambda_expression?(node) + !lambda_target(node).nil? + rescue StandardError + false + end + + def lambda_target(node) + return node if node.kind == "lambda" + + nil + rescue StandardError + nil + end + + def interpolation_node?(node) + node.kind == "interpolation" + rescue StandardError + false + end + + def instance_variable?(node) + node.kind == "instance_variable" + rescue StandardError + false + end + + def global_variable?(node) + node.kind == "global_variable" + rescue StandardError + false + end + + def member_assignment_target?(_node) + false + end + + def identifier_text_node?(_node) + false + end + + def literal_fragment_assignment_context?(node) + parent = node.parent + return false unless parent.respond_to?(:kind) + return true if literal_container_kind?(parent) + + literal_fragment_kind?(node) && + parent.parent.respond_to?(:kind) && + literal_container_kind?(parent.parent) + rescue StandardError + false + end + + def assignment_operator?(text) + assignment_operators.include?(text.to_s) + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def safe_navigation_call?(_node) + false + end + + def ternary_statement?(node) + !ternary_parts(node).nil? + end + + def ternary_parts(node) + question_colon_ternary_parts(node, QUESTION_COLON_TERNARY_KINDS) + end + + def case_argument_list?(_node) + false + end + + def case_arm?(node) + case_arm_kind?(node) && !case_else_arm?(node) + rescue StandardError + false + end + + def case_else_node(node) + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + + return child if case_else_node?(child) + next if case_arm_kind?(child) + + stack.concat(child.named_children) unless adapter_function_kind?(child) + end + + nil + rescue StandardError + nil + end + + def case_else_arm?(_node) + false + end + + def case_else_node?(node) + CASE_ELSE_KINDS.include?(node&.kind) || case_else_arm?(node) + rescue StandardError + false + end + + def leading_function_statement?(_node) + false + end + + def leading_function_name(node) + node.named_children.find { |child| identifier_kind?(child) }&.text + rescue StandardError + nil + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "body_statement" } + rescue StandardError + nil + end + + def leading_owner_statement?(node) + target = leading_owner_target(node) + return false unless target + + %w[class module].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 && + !OWNER_STATEMENT_NESTED_KIND.include?(target.named_children.first.kind) + rescue StandardError + false + end + + def leading_owner_target(node) + node if LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_if_statement?(node) + target = leading_if_target(node) + return false unless target + + !!( + %w[if unless].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 && + !IF_NODE_KINDS.include?(target.named_children.first.kind) + ) + rescue StandardError + false + end + + def leading_if_target(node) + node if LEADING_IF_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_case_statement?(node) + target = leading_case_target(node) + return false unless target + + %w[case match switch].include?(target.children.first&.kind.to_s) && case_arm_descendant?(target) + rescue StandardError + false + end + + def leading_case_target(node) + node if LEADING_CASE_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def leading_loop_statement?(node) + target = leading_loop_target(node) + return false unless target + + !target.children.first&.named? && + %w[while until].include?(target.children.first&.kind.to_s) && + target.named_children.size >= 2 + rescue StandardError + false + end + + def leading_loop_target(node) + node if LEADING_LOOP_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def rescue_body_statement?(node) + rescue_clauses(node).any? + rescue StandardError + false + end + + def rescue_body_target(node) + node if RESCUE_BODY_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + named = target.named_children + rescue_index = named.index { |child| rescue_clause?(child) } + return [] unless rescue_index + + named[0...rescue_index] + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| rescue_clause?(child) } + rescue StandardError + [] + end + + def rescue_clause_exceptions(node) + exceptions = node.named_children.find { |child| child.kind == "exceptions" } + return [] unless exceptions + return [exceptions] if exceptions.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) + return [exceptions] if exceptions.named_children.empty? && !exceptions.text.to_s.strip.empty? + + exceptions.named_children + rescue StandardError + [] + end + + def rescue_clause_exceptions_source(node) + node.named_children.find { |child| child.kind == "exceptions" } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_name(node) + var = node.named_children.find { |child| child.kind == "exception_variable" } + var&.named_children&.find { |child| identifier_kind?(child) } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + node.named_children.find { |child| child.kind == "exception_variable" } + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find do |child| + !%w[exceptions exception_variable comment].include?(child.kind) + end + rescue StandardError + nil + end + + def ensure_body_statement?(node) + !ensure_clause(node).nil? + rescue StandardError + false + end + + def ensure_body_target(node) + node if ENSURE_BODY_WRAPPER_KINDS.include?(node.kind) + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + named = target.named_children + ensure_index = named.index { |child| ensure_clause?(child) } + return [] unless ensure_index + + named[0...ensure_index] + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| ensure_clause?(child) } + rescue StandardError + nil + end + + def ensure_clause_body(_node) + nil + end + + def array_literal_statement?(node) + !array_literal_target(node).nil? + rescue StandardError + false + end + + def array_literal_target(node) + return node if ARRAY_LITERAL_NODE_KINDS.include?(node.kind) + return nil unless ARRAY_LITERAL_WRAPPER_KINDS.include?(node.kind) + return node if bracketed?(node, "[", "]") + + child = exact_single_named_child(node, kinds: ARRAY_LITERAL_NODE_KINDS) + return child if child + + named = node.named_children + return nil unless named.size == 1 && ARRAY_LITERAL_NODE_KINDS.include?(named.first.kind) + + child = named.first + stripped = node.text.to_s.strip + child if stripped == child.text.to_s || stripped == "#{child.text};" + rescue StandardError + nil + end + + def array_literal_values(node) + target = array_literal_target(node) || node + target.named_children + rescue StandardError + [] + end + + def element_reference_statement?(node) + !element_reference_target(node).nil? + rescue StandardError + false + end + + def element_reference_target(node) + return node if ELEMENT_REFERENCE_NODE_KINDS.include?(node.kind) + return nil unless ELEMENT_REFERENCE_WRAPPER_KINDS.include?(node.kind) + + named = node.named_children + if named.size == 1 && ELEMENT_REFERENCE_NODE_KINDS.include?(named.first.kind) + stripped = node.text.to_s.strip + child = named.first + return child if stripped == child.text.to_s || stripped == "#{child.text};" + end + + node if element_reference_shape?(node) + rescue StandardError + nil + end + + def element_reference_receiver(node) + target = element_reference_target(node) || node + target.named_children.first + rescue StandardError + nil + end + + def element_reference_arguments(node) + target = element_reference_target(node) || node + target.named_children.drop(1) + rescue StandardError + [] + end + + def hash_literal_statement?(node) + !hash_literal_target(node).nil? + rescue StandardError + false + end + + def hash_literal_target(node) + return node if HASH_LITERAL_NODE_KINDS.include?(node.kind) + return nil unless HASH_LITERAL_WRAPPER_KINDS.include?(node.kind) + return nil if statement_block_wrapper?(node) + return node if bracketed?(node, "{", "}") + + named = node.named_children + return nil unless named.size == 1 + + child = named.first + return hash_literal_target(child) if node.kind == "parenthesized_expression" + + stripped = node.text.to_s.strip + if stripped == child.text.to_s || stripped == "#{child.text};" + return child if HASH_LITERAL_NODE_KINDS.include?(child.kind) + return hash_literal_target(child) if HASH_LITERAL_WRAPPER_KINDS.include?(child.kind) + end + + nil + rescue StandardError + nil + end + + def hash_literal_values(node) + target = hash_literal_target(node) || node + target.named_children + rescue StandardError + [] + end + + def empty_body_statement?(node) + EMPTY_BODY_WRAPPER_KINDS.include?(node.kind) && + node.named_children.empty? && + node.text.to_s.strip.empty? + rescue StandardError + false + end + + def heredoc_body_statement?(node) + HEREDOC_BODY_WRAPPER_KINDS.include?(node.kind) && + node.named_children.any? { |child| child.kind == "heredoc_body" } + rescue StandardError + false + end + + def heredoc_call_for_body?(_node) + false + end + + def interpolated_statement?(node) + INTERPOLATED_STATEMENT_WRAPPER_KINDS.include?(node.kind) && + node.named_children.any? { |child| child.kind == "interpolation" } + rescue StandardError + false + end + + def concatenated_string_statement?(node) + !concatenated_string_target(node).nil? + rescue StandardError + false + end + + def concatenated_string_target(node) + return node if concatenated_string_node?(node) + return nil unless concatenated_string_wrapper_kinds.include?(node.kind) + + named = node.named_children + return node if named.size > 1 && named.all? { |child| child.kind == "string" } + return named.first if named.size == 1 && concatenated_string_node?(named.first) + + nil + rescue StandardError + nil + end + + def zero_child_identifier_call?(_node) + false + end + + def operator_call_expression?(node) + operator_call_expression_kinds.include?(node.kind) && + OPERATOR_CALL_OPERATORS.include?(binary_operator(node)) + rescue StandardError + false + end + + def boolean_expression_kind?(node) + boolean_expression_kinds.include?(node.kind) + rescue StandardError + false + end + + def comparison_expression_kind?(node) + comparison_expression_kinds.include?(node.kind) + rescue StandardError + false + end + + def dotted_expression_wrapper?(node) + dotted_expression_wrapper_kinds.include?(node.kind) + rescue StandardError + false + end + + private + + def assignment_operators + COMMON_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + %w[binary binary_expression] + end + + def boolean_expression_kinds + BOOLEAN_EXPRESSION_KINDS + end + + def comparison_expression_kinds + COMPARISON_EXPRESSION_KINDS + end + + def dotted_expression_wrapper_kinds + DOTTED_EXPRESSION_WRAPPER_KINDS + end + + def concatenated_string_wrapper_kinds + CONCATENATED_STRING_WRAPPER_KINDS + end + + def concatenated_string_node?(node) + CONCATENATED_STRING_NODE_KINDS.include?(node&.kind) && + node.named_children.size > 1 && + node.named_children.all? { |child| child.kind == "string" } + end + + def direct_binary_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text + rescue StandardError + nil + end + + def question_colon_ternary_parts(node, kinds) + return nil unless kinds.include?(node.kind) + return nil unless node.children.any? { |child| !child.named? && child.text == "?" } + return nil unless node.children.any? { |child| !child.named? && child.text == ":" } + + children = node.named_children + return nil unless children.size >= 3 + + children.first(3) + rescue StandardError + nil + end + + def leading_function_statement_with_keyword?(node, keyword, wrapper_kinds) + wrapper_kinds.include?(node.kind) && + node.children.first&.kind.to_s == keyword && + node.named_children.any? { |child| identifier_kind?(child) } + rescue StandardError + false + end + + def identifier_kind?(node) + IDENTIFIER_KINDS.include?(node&.kind) + end + + def exact_single_named_child(node, kinds:) + children = node.named_children + return nil unless children.size == 1 + + child = children.first + return nil unless kinds.include?(child.kind) + return nil unless node.text.to_s == child.text.to_s + + child + rescue StandardError + nil + end + + def case_arm_kind?(node) + CASE_ARGUMENT_WHEN_KINDS.include?(node&.kind) + end + + def default_case_pattern?(node) + pattern = node.named_children.find { |child| CASE_DEFAULT_PATTERN_KINDS.include?(child.kind) } + pattern&.text.to_s.strip == "_" + rescue StandardError + false + end + + def adapter_function_kind?(node) + ADAPTER_FUNCTION_KINDS.include?(node&.kind) + end + + def statement_block_wrapper?(node) + node.kind == "block" && STATEMENT_BLOCK_PARENT_KINDS.include?(node.parent&.kind) + rescue StandardError + false + end + + def case_arm_descendant?(node) + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + return true if CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) + + stack.concat(child.named_children) + end + + false + rescue StandardError + false + end + + def literal_container_kind?(node) + LITERAL_CONTAINER_KINDS.include?(node&.kind) + end + + def literal_fragment_kind?(node) + LITERAL_FRAGMENT_KINDS.include?(node&.kind) + end + + def rescue_clause?(node) + node&.kind == "rescue" + end + + def ensure_clause?(node) + node&.kind == "ensure" + end + + def bracketed?(node, opening, closing) + node.children.first&.text == opening && node.children.last&.text == closing + rescue StandardError + false + end + + def element_reference_shape?(node) + node.children.first&.text != "[" && + node.children.any? { |child| !child.named? && child.text == "[" } && + node.children.any? { |child| !child.named? && child.text == "]" } && + node.named_children.size >= 2 && + node.named_children.none? { |child| %w[block do_block].include?(child.kind) } + rescue StandardError + false + end + + def descendant(node, kinds:) + stack = node&.named_children.to_a + until stack.empty? + child = stack.shift + next unless child.respond_to?(:kind) + return child if kinds.include?(child.kind) + + stack.concat(child.named_children) + end + + nil + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/lua.rb b/gems/decomplex/lib/decomplex/ast/adapters/lua.rb new file mode 100644 index 000000000..b4e1c50b4 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/lua.rb @@ -0,0 +1,184 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class LuaTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[elseif_statement else else_statement].include?(child.kind) } + rescue StandardError + nil + end + + def unary_minus_expression?(node) + super || + (node.kind == "expression_list" && node.children.first&.text == "-" && node.named_children.size == 1) + rescue StandardError + false + end + + def binary_operator(node) + direct = direct_binary_operator(node) + return direct.to_s if direct + + child = exact_single_named_child(node, kinds: BINARY_WRAPPER_KINDS) + child ? binary_operator(child) : "" + end + + def unwrap_node?(node) + super || + (node.kind == "expression_list" && + node.named_children.size == 1 && + node.children.first&.text == "(" && + node.children.last&.text == ")") + rescue StandardError + false + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "function", LUA_LEADING_FUNCTION_WRAPPER_KINDS) + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def leading_if_target(node) + if LUA_LEADING_IF_WRAPPER_KINDS.include?(node.kind) + child = exact_single_named_child(node, kinds: %w[if_statement]) + return child if child + end + + super + end + + def array_literal_target(node) + if node.kind == "block" + named = node.named_children + if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? + target = lua_positional_table_arguments(named[1]) + return target if target + end + end + + target = lua_positional_table_arguments(node) + return target if target + + super + rescue StandardError + nil + end + + def hash_literal_target(node) + target = lua_keyed_table_arguments(node) + return target if target + + super + rescue StandardError + nil + end + + def hash_literal_values(node) + target = hash_literal_target(node) || node + return target.named_children if target.kind == "arguments" + + super + rescue StandardError + [] + end + + def identifier_text_node?(node) + %w[variable_list expression_list].include?(node.kind) && + node.text.to_s.match?(/\A[A-Za-z_]\w*\z/) + rescue StandardError + false + end + + def member_assignment_target?(node) + return false unless node.kind == "variable_list" + + node.named_children.size == 2 && + node.children.any? { |child| !child.named? && child.text == "." } + rescue StandardError + false + end + + def literal_fragment_assignment_context?(node) + return true if super + + literal_fragment_kind?(node) && node.parent&.kind == "expression_list" + rescue StandardError + false + end + + def lambda_target(node) + return node if node.kind == "function_definition" + + if node.kind == "expression_list" + return node if node.children.first&.kind == "function" && + node.named_children.any? { |child| child.kind == "block" } + + named = node.named_children + return named.first if named.size == 1 && named.first.kind == "function_definition" + end + + super + rescue StandardError + nil + end + + private + + def lua_positional_table_arguments(node) + return nil unless node&.kind == "arguments" + return nil unless bracketed?(node, "{", "}") + + fields = node.named_children + return nil if fields.empty? + return nil unless fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } + + node + end + + def lua_keyed_table_arguments(node) + if node&.kind == "block" + named = node.named_children + if named.size == 2 && named.first.kind == "identifier" && named.first.text.to_s.empty? + return lua_keyed_table_arguments(named[1]) + end + end + + return nil unless node&.kind == "arguments" + return nil unless bracketed?(node, "{", "}") + + fields = node.named_children + return node if fields.empty? + return nil if fields.all? { |field| field.kind == "field" && field.named_children.size <= 1 } + + node + end + + private + + def assignment_operators + LUA_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + super + %w[expression_list] + end + + def boolean_expression_kinds + super + %w[expression_list] + end + + def comparison_expression_kinds + super + %w[expression_list] + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/python.rb b/gems/decomplex/lib/decomplex/ast/adapters/python.rb new file mode 100644 index 000000000..e0efda06f --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/python.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class PythonTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def yield_statement?(node) + (%w[body_statement block block_body expression_statement statement].include?(node.kind) && + node.children.first&.text == "yield") + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elif_clause else else_clause].include?(child.kind) } + rescue StandardError + nil + end + + def case_else_arm?(node) + node.kind == "case_clause" && default_case_pattern?(node) + rescue StandardError + false + end + + def named_field(node, name) + super || python_body_field(node, name) + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "def", PYTHON_LEADING_FUNCTION_WRAPPER_KINDS) + end + + def leading_function_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def leading_owner_target(node) + return node if PYTHON_LEADING_OWNER_WRAPPER_KINDS.include?(node.kind) + + super + rescue StandardError + nil + end + + def leading_if_target(node) + if PYTHON_LEADING_IF_WRAPPER_KINDS.include?(node.kind) + child = exact_single_named_child(node, kinds: %w[if_statement]) + return child if child + end + + super + end + + def rescue_body_target(node) + return node if node.kind == "try_statement" + return node if flattened_try_block?(node, clauses: %w[except_clause]) + + if node.kind == "block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[except_clause]) + + target.named_children.take_while { |child| !%w[except_clause finally_clause].include?(child.kind) } + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| child.kind == "except_clause" } + rescue StandardError + [] + end + + def rescue_clause_exceptions(node) + pattern = node.named_children.find { |child| !%w[block comment].include?(child.kind) } + return [] unless pattern + return [pattern] unless pattern.kind == "as_pattern" + + exception = pattern.named_children.find { |child| child.kind != "as_pattern_target" } + exception ? [exception] : [] + rescue StandardError + [] + end + + def rescue_clause_exceptions_source(node) + rescue_clause_exceptions(node).first + rescue StandardError + nil + end + + def rescue_clause_exception_variable_name(node) + pattern = node.named_children.find { |child| child.kind == "as_pattern" } + descendant(pattern, kinds: %w[as_pattern_target]) + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + rescue_clause_exception_variable_name(node) + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def ensure_body_target(node) + return node if node.kind == "try_statement" + return node if flattened_try_block?(node, clauses: %w[finally_clause]) + + if node.kind == "block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + return super unless target.kind == "try_statement" || flattened_try_block?(target, clauses: %w[finally_clause]) + + target.named_children.take_while { |child| child.kind != "finally_clause" } + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| child.kind == "finally_clause" } + rescue StandardError + nil + end + + def ensure_clause_body(node) + node.named_children.reverse.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def ternary_parts(node) + return nil unless node.kind == "conditional_expression" + + children = node.named_children + return nil unless children.size >= 3 + + [children[1], children[0], children[2]] + rescue StandardError + nil + end + + def unary_minus_expression?(node) + (%w[unary unary_expression unary_operator].include?(node.kind) && node.text.to_s.lstrip.start_with?("-")) + end + + def empty_body_statement?(node) + super || + (node.kind == "block" && node.named_children.empty? && node.text.to_s.strip == "pass") || + node.kind == "pass_statement" + rescue StandardError + false + end + + private + + def flattened_try_block?(node, clauses:) + node.kind == "block" && + node.children.first&.text == "try" && + node.named_children.any? { |child| clauses.include?(child.kind) } + rescue StandardError + false + end + + def python_body_field(node, name) + return nil unless %w[body consequence].include?(name.to_s) + return nil unless PYTHON_BODY_FIELD_KINDS.include?(node.kind) + + node.named_children.find { |child| child.kind == "block" } + rescue StandardError + nil + end + + def assignment_operators + PYTHON_ASSIGNMENT_OPERATORS + end + + def operator_call_expression_kinds + super + %w[binary_operator] + end + + def concatenated_string_wrapper_kinds + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS + end + + def dotted_expression_wrapper_kinds + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/ruby.rb b/gems/decomplex/lib/decomplex/ast/adapters/ruby.rb new file mode 100644 index 000000000..07719416a --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/ruby.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class RubyTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def ruby? + true + end + + def yield_statement?(node) + %w[body_statement block block_body statement].include?(node.kind) && + node.children.first&.text == "yield" + rescue StandardError + false + end + + def super_statement?(node) + %w[body_statement block block_body statement].include?(node.kind) && + (node.text.to_s.strip == "super" || + (node.named_children.first&.kind == "super" && + node.named_children.drop(1).all? { |child| child.kind == "argument_list" })) + rescue StandardError + false + end + + def explicit_alternative(node) + node.named_children.find { |child| %w[elsif else].include?(child.kind) } + rescue StandardError + nil + end + + def instance_variable?(node) + node.kind == "instance_variable" || ruby_instance_variable_text?(node.text) + rescue StandardError + false + end + + def global_variable?(node) + node.kind == "global_variable" || ruby_global_variable_text?(node.text) + rescue StandardError + false + end + + def case_argument_list?(node) + node.kind == "argument_list" && + node.children.any? { |child| !child.named? && child.kind == "case" } && + node.named_children.any? { |child| CASE_ARGUMENT_WHEN_KINDS.include?(child.kind) } + rescue StandardError + false + end + + def safe_navigation_call?(node) + node.children.any? { |child| !child.named? && child.text == "&." } + rescue StandardError + false + end + + def leading_function_statement?(node) + leading_function_statement_with_keyword?(node, "def", LEADING_FUNCTION_WRAPPER_KINDS) + end + + def zero_child_identifier_call?(node) + node.kind == "call" && node.named_children.empty? && + node.text.to_s.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + rescue StandardError + false + end + + def heredoc_call_for_body?(node) + return true if node.kind == "heredoc_beginning" + return true if %w[call argument_list].include?(node.kind) && + node.text.to_s.match?(/(?:\A|[\s(,])<<[-~]?[A-Za-z_]\w*/) + + node.named_children.any? do |child| + next false if child.named_children.any? { |grandchild| grandchild.kind == "heredoc_body" } + + heredoc_call_for_body?(child) + end + rescue StandardError + false + end + + private + + def assignment_operators + RUBY_ASSIGNMENT_OPERATORS + end + + def ruby_instance_variable_text?(text) + text.to_s.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) + end + + def ruby_global_variable_text?(text) + text.to_s.match?(/\A\$[A-Za-z_]\w*[!?=]?\z/) + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/rust.rb b/gems/decomplex/lib/decomplex/ast/adapters/rust.rb new file mode 100644 index 000000000..50cd53d86 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/rust.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class RustTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/adapters/typescript.rb b/gems/decomplex/lib/decomplex/ast/adapters/typescript.rb new file mode 100644 index 000000000..3859fb4a6 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/adapters/typescript.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +require_relative "base" + +module Decomplex + module Ast + class TypeScriptTreeSitterNormalizationAdapter < TreeSitterNormalizationAdapter + def explicit_alternative(node) + node.named_children.find { |child| %w[else else_clause].include?(child.kind) } + rescue StandardError + nil + end + + def safe_navigation_call?(node) + super || + node.children.any? { |child| child.kind == "optional_chain" && child.text.to_s == "?." } || + (node.kind == "call_expression" && node.named_children.any? { |child| safe_navigation_call?(child) }) + rescue StandardError + false + end + + def ternary_parts(node) + question_colon_ternary_parts(node, TYPESCRIPT_TERNARY_KINDS) + end + + def interpolated_string?(node) + super || + (node.kind == "template_string" && + node.named_children.any? { |child| child.kind == "template_substitution" }) + end + + def lambda_target(node) + return node if %w[arrow_function function_expression].include?(node.kind) + + super + rescue StandardError + nil + end + + def interpolation_node?(node) + super || node.kind == "template_substitution" + rescue StandardError + false + end + + def rescue_body_target(node) + return node if node.kind == "try_statement" + + if node.kind == "statement_block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def rescue_body_nodes(node) + target = rescue_body_target(node) || node + return super unless target.kind == "try_statement" + + target.named_children.take_while { |child| !%w[catch_clause finally_clause].include?(child.kind) } + rescue StandardError + [] + end + + def rescue_clauses(node) + target = rescue_body_target(node) + return [] unless target + + target.named_children.select { |child| child.kind == "catch_clause" } + rescue StandardError + [] + end + + def rescue_clause_exception_variable_name(node) + node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + rescue StandardError + nil + end + + def rescue_clause_exception_variable_source(node) + rescue_clause_exception_variable_name(node) + rescue StandardError + nil + end + + def rescue_clause_handler(node) + node.named_children.reverse.find { |child| child.kind == "statement_block" } + rescue StandardError + nil + end + + def ensure_body_target(node) + return node if node.kind == "try_statement" + + if node.kind == "statement_block" + child = exact_single_named_child(node, kinds: %w[try_statement]) + return child if child + end + + super + rescue StandardError + nil + end + + def ensure_body_nodes(node) + target = ensure_body_target(node) || node + return super unless target.kind == "try_statement" + + target.named_children.take_while { |child| child.kind != "finally_clause" } + rescue StandardError + [] + end + + def ensure_clause(node) + target = ensure_body_target(node) + return nil unless target + + target.named_children.find { |child| child.kind == "finally_clause" } + rescue StandardError + nil + end + + def ensure_clause_body(node) + node.named_children.reverse.find { |child| child.kind == "statement_block" } + rescue StandardError + nil + end + + def empty_body_statement?(node) + super || + (node.kind == "statement_block" && node.named_children.empty? && node.text.to_s.strip == "{}") + rescue StandardError + false + end + + private + + def assignment_operators + TYPESCRIPT_ASSIGNMENT_OPERATORS + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/cache.rb b/gems/decomplex/lib/decomplex/ast/cache.rb new file mode 100644 index 000000000..8339211fe --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/cache.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + module_function + + def normalized_cache + @normalized_cache ||= {} + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb b/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb new file mode 100644 index 000000000..449f6fc6c --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/legacy_normalizer.rb @@ -0,0 +1,2564 @@ +# frozen_string_literal: true + +require "set" +require_relative "node" +require_relative "source_map" +require_relative "adapters/base" +require_relative "adapters/ruby" +require_relative "adapters/python" +require_relative "adapters/lua" +require_relative "adapters/typescript" +require_relative "adapters/rust" + +module Decomplex + module Ast + # Tree-sitter exposes each grammar's native node names. Decomplex's + # detectors share a small language-neutral AST vocabulary, so this + # normalizer converts common syntax categories into that vocabulary: + # DEFN, CLASS, IF, CASE/WHEN, AND/OR, CALL, LASGN, ATTRASGN, IVAR, + # LVAR, and friends. The goal is portable structural facts, not + # Ruby semantics. + class TreeSitterNormalizer + FUNCTION_KINDS = %w[ + method function_definition function_declaration method_definition + method_declaration function_item singleton_method + ].freeze + CLASS_KINDS = %w[class class_definition class_declaration class_specifier].freeze + MODULE_KINDS = %w[module].freeze + BLOCK_KINDS = %w[ + block body_statement statement_block statement_list class_body + switch_body match_block then block_body control_structure_body function_body + ].freeze + IF_KINDS = %w[if if_statement if_modifier unless unless_modifier if_expression conditional].freeze + LOOP_KINDS = { + "while" => :WHILE, + "while_statement" => :WHILE, + "while_modifier" => :WHILE, + "until_modifier" => :UNTIL, + "for" => :FOR, + "for_statement" => :FOR, + "for_in_clause" => :FOR + }.freeze + CASE_KINDS = %w[ + case switch_statement expression_switch_statement switch_expression match_statement match_expression + when_expression + ].freeze + WHEN_KINDS = %w[ + when switch_case case_clause expression_case case_statement switch_section + switch_block_statement_group switch_entry when_entry match_arm + ].freeze + ASSIGNMENT_KINDS = %w[ + assignment assignment_expression assignment_statement augmented_assignment + ].freeze + MEMBER_KINDS = %w[ + call attribute member_expression member_access_expression field field_access selector_expression field_expression + navigation_expression directly_assignable_expression expression_list + ].freeze + CALL_KINDS = %w[call call_expression method_call method_call_expression].freeze + IDENTIFIER_KINDS = %w[ + identifier simple_identifier property_identifier field_identifier shorthand_property_identifier + ].freeze + CONST_KINDS = %w[constant scope_resolution type_identifier scoped_type_identifier].freeze + STRING_KINDS = %w[ + string string_content string_literal interpreted_string_literal raw_string_literal + ].freeze + SYMBOL_KINDS = %w[symbol simple_symbol].freeze + NIL_KINDS = %w[nil none null].freeze + RETURN_KINDS = { + "return" => :RETURN, + "return_statement" => :RETURN, + "return_expression" => :RETURN, + "break" => :BREAK, + "break_statement" => :BREAK, + "break_expression" => :BREAK, + "next" => :NEXT, + "continue_statement" => :NEXT + }.freeze + COMPARISON_OPERATORS = %w[== != === !== < <= > >=].freeze + OPERATOR_CALL_OPERATORS = TreeSitterNormalizationAdapter::OPERATOR_CALL_OPERATORS + INFIX_STATEMENT_OPERATORS = (OPERATOR_CALL_OPERATORS + COMPARISON_OPERATORS).freeze + INLINE_DEF_WRAPPER_MIDS = %w[ + public protected private private_class_method module_function + ].freeze + + def initialize(document) + @document = document + @normalization_adapter = TreeSitterNormalizationAdapter.for(document) + @local_stack = [] + @normalizing = Set.new + end + + def normalize + children = + if ruby? + with_ruby_scope(@document.root, reset: true) { normalize_children(@document.root) } + else + normalize_children(@document.root) + end + wrap(:ROOT, children: children, source: @document.root) + end + + private + + def normalize_node(node) + return nil unless ts_node?(node) + key = node_key(node) + return nil if @normalizing.include?(key) + + @normalizing << key + begin + return nil if node.kind == "comment" + return normalize_assignment_lhs(node) if assignment_lhs?(node) + return normalize_infix_statement(node) if infix_statement?(node) + return normalize_dotted_expression(node) if dotted_expression?(node) + return normalize_unary_not_statement(node) if unary_not_statement?(node) + return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) + + if leading_function_statement?(node) + normalize_leading_function_statement(node) + elsif leading_if_statement?(node) + normalize_leading_if_statement(node) + elsif ensure_body_statement?(node) + normalize_ensure_body_statement(node) + elsif rescue_body_statement?(node) + normalize_rescue_body_statement(node) + elsif modifier_statement?(node) + normalize_modifier_statement(node) + elsif ternary_statement?(node) + normalize_ternary_statement(node) + elsif statement_call_with_block?(node) + normalize_statement_call_with_block(node) + elsif command_call_statement?(node) + normalize_command_call_statement(node) + elsif lambda_expression?(node) + normalize_lambda(node) + elsif FUNCTION_KINDS.include?(node.kind) + normalize_function(node) + elsif class_node?(node) + normalize_class(node) + elsif module_node?(node) + normalize_module(node) + elsif node.kind == "impl_item" + normalize_impl(node) + elsif node.kind == "elsif" + normalize_elsif(node) + elsif IF_KINDS.include?(node.kind) + normalize_if(node) + elsif LOOP_KINDS.key?(node.kind) + normalize_loop(node) + elsif CASE_KINDS.include?(node.kind) || hidden_match?(node) + normalize_case(node) + elsif hash_literal_statement?(node) + normalize_hash_literal_statement(node) + elsif array_literal_statement?(node) + normalize_array_literal_statement(node) + elsif element_reference_statement?(node) + normalize_element_reference_statement(node) + elsif node.kind == "element_reference" + normalize_element_reference(node) + elsif node.kind == "rescue_modifier" + normalize_rescue_modifier(node) + elsif node.kind == "ensure" + normalize_ensure_clause(node) + elsif node.kind == "begin" + normalize_begin(node) + elsif node.kind == "operator_assignment" + normalize_operator_assignment(node) + elsif ASSIGNMENT_KINDS.include?(node.kind) + normalize_assignment(node) + elsif node.kind == "subshell" + normalize_subshell(node) + elsif node.kind == "block_argument" + normalize_block_argument(node) + elsif node.kind == "pair" + normalize_pair(node) + elsif node.kind == "singleton_class" + normalize_singleton_class(node) + elsif node.kind == "yield" + normalize_yield(node) + elsif yield_statement?(node) + normalize_yield_statement(node) + elsif yield_argument_list?(node) + normalize_yield_argument_list(node) + elsif node.kind == "heredoc_beginning" + normalize_heredoc_beginning(node) + elsif node.kind == "chained_string" + normalize_chained_string(node) + elsif interpolation_node?(node) + normalize_interpolation(node) + elsif unary_minus_expression?(node) + normalize_unary_minus(node) + elsif unary_not_expression?(node) + normalize_unary_not(node) + elsif boolean_expression?(node) + normalize_boolean(node) + elsif operator_call_expression?(node) + normalize_operator_call(node) + elsif comparison_expression?(node) + normalize_comparison(node) + elsif CALL_KINDS.include?(node.kind) + normalize_call(node) + elsif member_read_node?(node) + normalize_member_read(node) + elsif BLOCK_KINDS.include?(node.kind) + wrap(:BLOCK, children: normalize_children(node), source: node) + elsif unwrap_node?(node) + normalize_node(node.named_children.first) + elsif RETURN_KINDS.key?(node.kind) + normalize_return(node) + elsif self_node?(node) + wrap(:SELF, children: [], source: node) + elsif instance_variable?(node) + wrap(:IVAR, children: [node.text.to_s], source: node) + elsif global_variable?(node) + normalize_global_variable(node) + elsif const_node?(node) + normalize_const(node) + elsif ruby? && IDENTIFIER_KINDS.include?(node.kind) && node.text.to_s == "yield" + wrap(:YIELD, children: [nil], source: node) + elsif ruby_vcall_identifier?(node) + return wrap(:YIELD, children: [nil], source: node) if node.text.to_s == "yield" + + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + elsif vcall_identifier?(node) + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + elsif local_identifier?(node) + wrap(:LVAR, children: [node.text.to_s], source: node) + elsif NIL_KINDS.include?(node.kind) + wrap(:NIL, children: [], source: node) + elsif interpolated_string?(node) + normalize_interpolated_string(node) + elsif STRING_KINDS.include?(node.kind) + wrap(:STR, children: [node.text.to_s], source: node) + elsif SYMBOL_KINDS.include?(node.kind) + wrap(:LIT, children: [node.text.to_s.sub(/\A:/, "").to_sym], source: node) + else + wrap(kind_type(node.kind), children: normalize_children(node), source: node) + end + ensure + @normalizing.delete(key) + end + end + + def normalize_function(node) + return normalize_singleton_function(node) if node.kind == "singleton_method" + + name = function_name(node) + args = normalize_parameters(named_field(node, "parameters")) + body = with_ruby_scope(node, reset: true) do + elide_implicit_nil_body( + prepend_inline_parameter_begin( + node, + elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) + ) + ) + end + wrap(:DEFN, children: [name, scope(body, args: args, source: node)], source: node) + end + + def normalize_singleton_function(node) + receiver = singleton_receiver(node) + name = singleton_name(node) + args = normalize_parameters(named_field(node, "parameters")) + body = with_ruby_scope(node, reset: true) do + elide_implicit_nil_body( + prepend_inline_parameter_begin( + node, + elide_tail_returns(normalize_body(named_field(node, "body") || block_child(node))) + ) + ) + end + wrap(:DEFS, children: [normalize_node(receiver), name, scope(body, args: args, source: node)], source: node) + end + + def normalize_class(node) + name = const_for(named_field(node, "name") || first_named(node)) + body = normalize_body(named_field(node, "body") || block_child(node)) + wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) + end + + def normalize_module(node) + name = const_for(named_field(node, "name") || first_named(node)) + body = normalize_body(named_field(node, "body") || block_child(node)) + wrap(:MODULE, children: [name, scope(body, source: node)], source: node) + end + + def normalize_impl(node) + type_node = named_field(node, "type") || + node.named_children.find do |child| + %w[type_identifier scoped_type_identifier identifier].include?(child.kind) + end + name = const_for(type_node || node) + body = normalize_body(named_field(node, "body") || block_child(node) || node) + wrap(:CLASS, children: [name, nil, scope(body, source: node)], source: node) + end + + def normalize_if(node) + if %w[if_modifier unless_modifier].include?(node.kind) + action, cond_raw = node.named_children + type = node.kind.start_with?("unless") ? :UNLESS : :IF + return wrap(type, children: [normalize_node(cond_raw), normalize_modifier_action(action), nil], source: node) + end + + cond_raw = named_field(node, "condition") || named_field(node, "predicate") || first_named(node) + cond = normalize_node(cond_raw) + positive_raw = named_field(node, "consequence") || named_field(node, "body") || + node.named_children.find { |child| child.kind == "then" } || + branch_child(node, cond_raw, 0) + negative_raw = named_field(node, "alternative") || + explicit_alternative(node) || + (branch_child(node, cond_raw, 1) unless ruby?) + positive = normalize_body(positive_raw) + negative = normalize_else_or_branch(negative_raw) + type = node.kind.start_with?("unless") ? :UNLESS : :IF + wrap(type, children: [cond, positive, negative], source: node) + end + + def normalize_elsif(node) + cond = node.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } + positive = node.named_children.find { |child| child.kind == "then" } + negative = node.named_children.find { |child| %w[elsif else].include?(child.kind) } + wrap(:IF, children: [normalize_node(cond), normalize_body(positive), normalize_else_or_branch(negative)], + source: node) + end + + def normalize_loop(node) + if %w[while_modifier until_modifier].include?(node.kind) + action, cond = node.named_children + return wrap(LOOP_KINDS.fetch(node.kind), children: [normalize_node(cond), normalize_modifier_action(action), true], + source: node) + end + + cond = normalize_node(named_field(node, "condition") || first_named(node)) + body = normalize_body(named_field(node, "body") || named_field(node, "consequence") || block_child(node)) + wrap(LOOP_KINDS.fetch(node.kind), children: [cond, body], source: node) + end + + def normalize_case(node) + value_raw = case_value(node) + value = normalize_node(value_raw) + whens = case_arms(node).map { |arm| normalize_when(arm) }.compact + fallback = case_else_body(node) + chain = link_when_chain(whens, fallback) + return wrap(:CASE2, children: [chain], source: node) unless value_raw + + wrap(:CASE, children: [value, chain], source: node) + end + + def normalize_when(node) + patterns = normalize_patterns(node) + body = normalize_body(when_body(node)) + wrap(:WHEN, children: [list(patterns, source: node), body, nil], source: node) + end + + def normalize_assignment(node) + left = assignment_left(node) + right = normalize_node(assignment_right(node)) + return normalize_multiple_assignment(left, right, node) if left&.kind == "left_assignment_list" + return assignment_target(left, right, source: node) if assignment_target(left, right, source: node) + + wrap(:LASGN, children: [target_name(left), right], source: node) + end + + def normalize_multiple_assignment(left, right, node) + targets = left.named_children.map do |child| + type = global_variable?(child) ? :GASGN : :LASGN + wrap(type, children: [target_name(child), nil], source: child) + end + wrap(:MASGN, children: [right, list(targets, source: left)], source: node) + end + + def normalize_boolean(node) + type = boolean_operator(node) == "or" ? :OR : :AND + operands = node.named_children.map { |child| normalize_node(child) }.compact + operands = operands.flat_map { |child| Ast.node?(child) && child.type == type ? child.children : [child] } + wrap(type, children: operands, source: node) + end + + def normalize_comparison(node) + operands = node.named_children + left = normalize_node(operands[0]) + right = normalize_node(operands[1]) + wrap(:OPCALL, children: [left, comparison_operator(node).to_sym, list([right], source: operands[1] || node)], + source: node) + end + + def normalize_operator_call(node) + operands = node.named_children + left = normalize_node(operands[0]) + right = normalize_node(operands[1]) + if ruby? && binary_operator(node) == "=~" && regex_literal?(operands[1]) + return wrap(:MATCH3, children: [right, left], source: node) + elsif ruby? && binary_operator(node) == "=~" + return wrap(:CALL, children: [left, :=~, list([right], source: operands[1] || node)], source: node) + end + + wrap(:OPCALL, children: [left, binary_operator(node).to_sym, list([right], source: operands[1] || node)], + source: node) + end + + def normalize_element_reference(node) + recv = node.named_children.first + args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact + if ruby? && self_node?(recv) + return wrap(:FCALL, children: [:[], list(args, source: node)], source: node) + end + + wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) + end + + def normalize_rescue_modifier(node) + body = normalize_node(node.named_children.first) + handler = normalize_node(node.named_children[1]) + resbody = wrap(:RESBODY, children: [nil, handler, nil], source: node) + wrap(:RESCUE, children: [body, resbody, nil], source: node) + end + + def normalize_ensure_clause(node) + normalize_body_nodes(node.named_children, source: node) + end + + def normalize_begin(node) + rescue_nodes = node.named_children.select { |child| child.kind == "rescue" } + ensure_node = node.named_children.find { |child| child.kind == "ensure" } + if rescue_nodes.empty? + return wrap(:BEGIN, children: normalize_children(node), source: node) unless ensure_node + + body_nodes = node.named_children.take_while { |child| child.kind != "ensure" } + body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) + ensure_body = normalize_body(ensure_node) + source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) + return wrap(:ENSURE, children: [body, ensure_body], source: source) + end + + body_nodes = node.named_children.take_while { |child| child.kind != "rescue" } + body = normalize_body_nodes(body_nodes, source: body_nodes.first || node) + resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } + source = source_from_nodes(body_nodes.first || node, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || node) + rescued = wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) + return rescued unless ensure_node + + ensure_body = normalize_body(ensure_node) + ensure_source = source_from_nodes(body_nodes.first || node, ensure_node.named_children.last || ensure_node) + wrap(:ENSURE, children: [rescued, ensure_body], source: ensure_source) + end + + def normalize_operator_assignment(node) + left = assignment_left(node) + right_raw = assignment_right(node) + right = normalize_node(right_raw) + operator = operator_assignment_operator(node) + + if left&.kind == "element_reference" + recv = left.named_children.first + args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact + return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], + source: node) + end + + if member_read_node?(left) + recv, mid = member_parts(left) + return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) + end + + logical = normalize_logical_operator_assignment(left, operator, right, source: node) + return logical if logical + if instance_variable?(left) || global_variable?(left) + return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) + end + + assignment_target(left, right, source: node) || + wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], + source: node) + end + + def normalize_subshell(node) + children = node.named_children.filter_map do |child| + case child.kind + when "interpolation" then normalize_interpolation(child) + when "string_content" then wrap(:STR, children: [child.text.to_s], source: child) + end + end + type = children.any? { |child| child.is_a?(Node) && child.type == :EVSTR } ? :DXSTR : :XSTR + wrap(type, children: children, source: node) + end + + def normalize_pair(node) + key = node.named_children.first + value = node.named_children[1] + if node.children.any? { |child| !child.named? && child.text == "=>" } + return wrap(:HASH, children: [normalize_node(key), normalize_node(value)].compact, source: node) + end + + key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) + if ruby? && key&.kind == "hash_key_symbol" && value.nil? + name = key.text.to_s + return wrap(:HASH, children: [key_lit, local_or_call_for_name(name, key)], source: node) + end + + wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) + end + + def normalize_block_argument(node) + value = normalize_node(node.named_children.first) + wrap(:BLOCK_PASS, children: [nil, value], source: node) + end + + def normalize_singleton_class(node) + recv = normalize_node(node.named_children.first) + body = normalize_body(node.named_children[1]) + wrap(:SCLASS, children: [recv, scope(body, source: node)], source: node) + end + + def normalize_lambda(node) + target = lambda_target(node) || node + body_node = named_field(target, "body") || block_child(target) || target.named_children.last + body = with_ruby_scope(target) do + dynamic_scope(normalize_body(body_node)) + end + wrap(:LAMBDA, children: [scope(body, source: target)], source: target) + end + + def normalize_yield(node) + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) + wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) + end + + def yield_statement?(node) + normalization_adapter.yield_statement?(node) + end + + def normalize_yield_statement(node) + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = args_node ? yield_argument_nodes(args_node) : yield_inline_arguments(node) + wrap(:YIELD, children: [list(args, source: args_node || node)], source: node) + end + + def yield_argument_list?(node) + node.kind == "argument_list" && parent_node(node)&.children&.first&.text == "yield" + rescue StandardError + false + end + + def normalize_yield_argument_list(node) + args = yield_argument_nodes(node) + source = parent_node(node) || node + wrap(:YIELD, children: [list(args, source: node)], source: source) + end + + def yield_inline_arguments(node) + node.named_children.reject { |child| child.kind == "yield" }.map { |child| normalize_node(child) }.compact + end + + def yield_argument_nodes(node) + return [scalar_argument_list_value(node)].compact if node.named_children.empty? + + node.named_children.map { |child| normalize_node(child) }.compact + end + + def super_statement?(node) + normalization_adapter.super_statement?(node) + end + + def normalize_super_statement(node) + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = + if args_node && args_node.named_children.empty? + [scalar_argument_list_value(args_node)].compact + elsif args_node + args_node.named_children.map { |child| normalize_node(child) }.compact + else + [] + end + wrap(:SUPER, children: [list(args, source: args_node || node)], source: node) + end + + def normalize_unary_not(node) + operand = node.named_children.first + wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) + end + + def normalize_unary_not_statement(node) + operand = node.named_children.first + wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) + end + + def normalize_unary_minus(node) + operand = node.named_children.first + if ts_node?(operand) && operand.kind == "integer" + return wrap(:INTEGER, children: [-operand.text.to_i], source: operand) + end + + wrap(:OPCALL, children: [normalize_node(operand), :-@, nil], source: node) + end + + def normalize_infix_statement(node) + left, operator, right = infix_statement_parts(node) + if ruby? && operator == "=~" && regex_literal?(right) + return wrap(:MATCH3, children: [normalize_node(right), normalize_node(left)], source: node) + elsif ruby? && operator == "=~" + return wrap(:CALL, children: [normalize_node(left), :=~, list([normalize_node(right)].compact, source: right)], + source: node) + end + + wrap(:OPCALL, children: [normalize_node(left), operator.to_sym, list([normalize_node(right)].compact, source: right)], + source: node) + end + + def normalize_dotted_expression(node) + block = call_block(node) + call = normalize_dotted_call_expression(node, source: block ? source_before_child(node, block) : node) + return call unless block + + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def normalize_dotted_call_expression(node, source: node) + target = dotted_call_target(node) || node + recv, mid = dotted_call_parts(target) + args = call_arguments(target, nil) + type = safe_navigation_call?(target) ? :QCALL : :CALL + wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: source)], source: source) + end + + def normalize_argument_list_call_with_block(node) + return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" + + block = call_block(node) + return nil unless block + + call = normalize_argument_list_call(node) + return nil unless call + + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def normalize_argument_list_call(node) + return nil unless ruby? && ts_node?(node) && node.kind == "argument_list" + + function = node.named_children.first + args_node = node.named_children.find { |child| child.kind == "argument_list" } + args = args_node ? args_node.named_children.map { |child| normalize_node(child) }.compact : [] + wrap(:FCALL, children: [function.text.to_sym, list(args, source: args_node || node)], source: node) + end + + def normalize_call(node) + return normalize_zero_child_call(node) if zero_child_identifier_call?(node) + return normalize_call_with_block(node) if call_block(node) + return normalize_visibility_inline_def(node) if visibility_inline_def_call?(node) + + if named_field(node, "receiver") && named_field(node, "method") + recv, mid = member_parts(node) + args = call_arguments(node, nil) + type = safe_navigation_call?(node) ? :QCALL : :CALL + return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) + end + + function = named_field(node, "function") || named_field(node, "call") || node.named_children.first + args = call_arguments(node, function) + return wrap(:YIELD, children: [list(args, source: node)], source: node) if ruby? && function&.text == "yield" + + if member_read_node?(function) + recv, mid = member_parts(function) + return wrap(:CALL, children: [normalize_node(recv), mid.to_sym, list(args, source: node)], source: node) + end + + if function && IDENTIFIER_KINDS.include?(function.kind) + type = args.empty? ? :VCALL : :FCALL + return wrap(type, children: [function.text.to_sym, list(args, source: node)], source: node) + end + + if ruby? && function && const_node?(function) + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: node)], source: node) + end + + wrap(:CALL, children: [normalize_node(function), :call, list(args, source: node)], source: node) + end + + def normalize_return(node) + normalize_return_node(node, elide_symbol: false) + end + + def wrapped_return_statement?(node) + return false unless ts_node?(node) + return false unless %w[body_statement block_body statement block].include?(node.kind) + return false if node.text.to_s.include?("\n") + + keyword = node.children.first + keyword && !keyword.named? && RETURN_KINDS.key?(keyword.kind) + end + + def normalize_wrapped_return_statement(node) + keyword = node.children.first + children = node.named_children.map { |child| normalize_return_value(child) }.compact + wrap(RETURN_KINDS.fetch(keyword.kind), children: children, source: node) + end + + def normalize_return_node(node, elide_symbol:) + children = node.named_children.map { |child| normalize_return_value(child) }.compact + return children.first if elide_symbol && ruby? && children.size == 1 && symbol_literal_node?(children.first) + + wrap(RETURN_KINDS.fetch(node.kind), children: children, source: node) + end + + def normalize_return_value(node) + return normalize_node(node) unless ts_node?(node) && node.kind == "argument_list" + return scalar_argument_list_value(node) if node.named_children.empty? + return normalize_argument_list_element_reference(node) if argument_list_element_reference?(node) + return normalize_boolean(node) if boolean_expression?(node) + return normalize_ternary_statement(node) if ternary_statement?(node) + return normalize_case(node) if case_argument_list?(node) + return normalize_argument_list_call_with_block(node) if argument_list_call_with_block?(node) + return normalize_dotted_expression(node) if dotted_expression?(node) + return normalize_argument_list_unary_not(node) if argument_list_unary_not?(node) + return normalize_infix_statement(node) if infix_statement?(node) + + function = node.named_children.first + nested_args = node.named_children[1] + if function && IDENTIFIER_KINDS.include?(function.kind) && nested_args&.kind == "argument_list" + args = nested_args.named_children.map { |child| normalize_node(child) }.compact + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: nested_args)], source: node) + end + + values = node.named_children.map { |child| normalize_node(child) }.compact + return values.first if values.size == 1 + + list(values, source: node) + end + + def argument_list_element_reference?(node) + node.kind == "argument_list" && + node.children.first&.text != "[" && + node.children.any? { |child| !child.named? && child.text == "[" } && + node.children.any? { |child| !child.named? && child.text == "]" } && + node.named_children.size >= 2 && + node.named_children.none? { |child| %w[block do_block].include?(child.kind) } + end + + def normalize_argument_list_element_reference(node) + return nil unless ruby? && ts_node?(node) && argument_list_element_reference?(node) + + recv = node.named_children.first + args = node.named_children.drop(1).map { |child| normalize_node(child) }.compact + wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: node)], source: node) + end + + def normalize_call_with_block(node) + block = call_block(node) + call = normalize_call_without_block(node, block) + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def normalize_call_without_block(node, block) + call_source = block ? source_before_child(node, block) : node + if dotted_call?(node) + recv, mid = dotted_call_parts(node) + args = call_arguments(node, nil) + arg_list = args.empty? ? nil : list(args, source: call_source) + type = safe_navigation_call?(node) ? :QCALL : :CALL + return wrap(type, children: [normalize_node(recv), mid.to_sym, arg_list], source: call_source) + end + + function = named_field(node, "function") || named_field(node, "call") || + node.named_children.find { |child| !same_ts_node?(child, block) } + args = call_arguments(node, function) + + if function && IDENTIFIER_KINDS.include?(function.kind) + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) + end + + if ruby? && function && const_node?(function) + return wrap(:FCALL, children: [function.text.to_sym, list(args, source: call_source)], source: call_source) + end + + if member_read_node?(function) + recv, mid = member_parts(function) + type = safe_navigation_call?(function) ? :QCALL : :CALL + return wrap(type, children: [normalize_node(recv), mid.to_sym, list(args, source: call_source)], source: call_source) + end + + wrap(:CALL, children: [normalize_node(function), :call, list(args, source: call_source)], source: call_source) + end + + def normalize_visibility_inline_def(node) + message = node.named_children.first&.text.to_s + args = node.named_children.find { |child| child.kind == "argument_list" } + method = inline_def_from_argument_list(args) + wrap(:FCALL, children: [message.to_sym, list([method].compact, source: args || node)], source: node) + end + + def normalize_modifier_statement(node) + keyword = modifier_keyword(node) + action, cond = modifier_parts(node) + type = + case keyword + when "unless" then :UNLESS + when "while" then :WHILE + when "until" then :UNTIL + else :IF + end + normalized_action = normalize_modifier_action(action) + children = %i[WHILE UNTIL].include?(type) ? [normalize_node(cond), normalized_action, true] : + [normalize_node(cond), normalized_action, nil] + wrap(type, children: children, source: node) + end + + def normalize_modifier_action(node) + modifier_return_action?(node) ? normalize_return_node(node, elide_symbol: false) : normalize_node(node) + end + + def modifier_return_action?(node) + ts_node?(node) && RETURN_KINDS.key?(node.kind) + end + + def normalize_command_call_statement(node) + function = node.named_children.first + if visibility_inline_def_statement?(node, function) + method = inline_def_from_statement(node) + return wrap(:FCALL, children: [function.text.to_sym, list([method].compact, source: node)], source: node) + end + + args_node = node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } + args = args_node ? command_arguments(args_node) : [] + block = call_block(node) + call_source = block ? source_before_child(node, block) : node + if ruby? && function&.text == "yield" + return wrap(:YIELD, children: [list(args, source: args_node || call_source)], source: call_source) + end + + call = wrap(args.empty? ? :VCALL : :FCALL, + children: [function.text.to_sym, list(args, source: args_node || call_source)], + source: call_source) + return call unless block + + block_args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: block_args, source: node)], source: node) + end + + def dynamic_scope(node) + return node unless node.is_a?(Node) + return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA].include?(node.type) + + node.type = :DASGN if node.type == :LASGN + node.type = :DVAR if node.type == :LVAR + node.children = node.children.map { |child| dynamic_scope(child) } + node + end + + def normalize_zero_child_call(node) + wrap(:VCALL, children: [node.text.to_s.to_sym], source: node) + end + + def normalize_member_read(node) + recv, mid = member_parts(node) + return wrap(kind_type(node.kind), children: normalize_children(node), source: node) unless recv && mid + + wrap(:CALL, children: [normalize_node(recv), mid.to_sym, nil], source: node) + end + + def normalize_const(node) + if %w[scope_resolution scoped_type_identifier].include?(node.kind) + parts = node.named_children + base = normalize_const(parts[0]) if parts[0] + name = (named_field(node, "name") || parts[-1])&.text.to_s + return wrap(:COLON2, children: [base, name.to_sym], source: node) + end + + wrap(:CONST, children: [node.text.to_s.to_sym], source: node) + end + + def normalize_children(node) + node.named_children.filter_map do |child| + next if child.kind == "heredoc_body" + next if assignment_rhs?(child) + + normalize_node(child) + end + end + + def normalize_body(node) + return nil unless ts_node?(node) + return normalize_leading_function_statement(node) if leading_function_statement?(node) + return normalize_leading_owner_statement(node) if leading_owner_statement?(node) + return normalize_leading_case_statement(node) if leading_case_statement?(node) + return normalize_ensure_body_statement(node) if ensure_body_statement?(node) + return normalize_rescue_body_statement(node) if rescue_body_statement?(node) + return normalize_heredoc_body_statement(node) if heredoc_body_statement?(node) + return normalize_leading_loop_statement(node) if leading_loop_statement?(node) + return normalize_leading_if_statement(node) if leading_if_statement?(node) + return normalize_elsif(node) if node.kind == "elsif" + return normalize_wrapped_return_statement(node) if wrapped_return_statement?(node) + return normalize_yield_statement(node) if yield_statement?(node) + return normalize_super_statement(node) if super_statement?(node) + return normalize_unary_not_statement(node) if unary_not_statement?(node) + return normalize_operator_assignment_statement(node) if operator_assignment_statement?(node) + return normalize_element_reference_statement(node) if element_reference_statement?(node) + return normalize_hash_literal_statement(node) if hash_literal_statement?(node) + return normalize_array_literal_statement(node) if array_literal_statement?(node) + return normalize_concatenated_string_statement(node) if concatenated_string_statement?(node) + return normalize_interpolated_statement(node) if interpolated_statement?(node) + return nil if empty_body_statement?(node) + return normalize_terminal_statement(node) if terminal_statement?(node) + return normalize_modifier_statement(node) if modifier_statement?(node) + return normalize_ternary_statement(node) if ternary_statement?(node) + return normalize_statement_call_with_block(node) if statement_call_with_block?(node) + return normalize_command_call_statement(node) if command_call_statement?(node) + return normalize_infix_statement(node) if infix_statement?(node) + return normalize_boolean(node) if boolean_expression?(node) + return normalize_dotted_expression(node) if dotted_expression?(node) + + if BLOCK_KINDS.include?(node.kind) + children = normalize_children(node) + if children.empty? && bare_identifier_text?(node.text) + return wrap(:VCALL, children: [node.text.to_s.strip.to_sym], source: node) + end + return nil if children.empty? + return children.first if children.size == 1 + + return wrap(:BLOCK, children: children, source: node) + end + + normalize_node(node) + end + + def normalize_body_nodes(nodes, source:) + children = nodes.map { |child| normalize_body(child) }.compact + return nil if children.empty? + return children.first if children.size == 1 + + wrap(:BLOCK, children: children, source: source) + end + + def normalize_patterns(node) + patterns = node.named_children.select do |child| + %w[pattern case_pattern match_pattern switch_pattern when_condition].include?(child.kind) + end + patterns = [named_field(node, "value")].compact if patterns.empty? + patterns = [node.named_children.find { |child| !BLOCK_KINDS.include?(child.kind) && !statement_node?(child) }].compact if patterns.empty? + + patterns.flat_map do |pattern| + pattern_text = pattern.text.to_s + pattern_children = pattern.named_children + if pattern_text.include?("::") + [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] + elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A-?\d+\z/) + [wrap(:INTEGER, children: [], source: pattern)] + elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A[A-Z]\w*\z/) + [wrap(:CONST, children: [pattern_text.to_sym], source: pattern)] + elsif ruby? && %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) && + pattern_children.empty? && pattern_text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + [local_or_call_for_name(pattern_text, pattern)] + elsif %w[pattern case_pattern match_pattern switch_pattern when_condition expression_list].include?(pattern.kind) + pattern_children.map { |child| normalize_node(child) }.compact + else + [normalize_node(pattern)].compact + end + end + end + + def assignment_target(left, right, source: nil) + return nil unless ts_node?(left) + source ||= left + + if instance_variable?(left) + return wrap(:IASGN, children: [left.text.to_s, right], source: source) + end + + if global_variable?(left) + return wrap(:GASGN, children: [left.text.to_s, right], source: source) + end + + if left.kind == "element_reference" + recv = left.named_children.first + args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact + return wrap(:ATTRASGN, children: [normalize_node(recv), :[]=, list(args + [right], source: left)], + source: source) + end + + if member_read_node?(left) || normalization_adapter.member_assignment_target?(left) + recv, mid = member_parts(left) + writer = left.text.to_s.include?("&.") ? mid.to_sym : "#{mid}=".to_sym + return wrap(:ATTRASGN, children: [normalize_node(recv), writer, list([right], source: left)], + source: source) + end + + return assignment_target(left.named_children.first, right, source: source) if left.kind == "expression_list" + + nil + end + + def normalize_assignment_lhs(node) + right = normalize_node(next_named_sibling(node)) + source = parent_node(node) || node + assignment_target(node, right, source: source) || + wrap(:LASGN, children: [target_name(node), right], source: source) + end + + def target_name(left) + return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) + return left.text.to_s.sub(/\A\*/, "") if ts_node?(left) && %w[splat splat_parameter rest_assignment].include?(left.kind) + return left.text.to_s if ts_node?(left) + + Ast.slice(normalize_node(left), @document.lines) + end + + def case_value(node) + named_field(node, "value") || named_field(node, "subject") || + named_field(node, "condition") || + node.named_children.find do |child| + !WHEN_KINDS.include?(child.kind) && !BLOCK_KINDS.include?(child.kind) && child.kind != "else" + end + end + + def case_arms(node) + arms = [] + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless ts_node?(child) + + if normalization_adapter.case_arm?(child) + arms << child + elsif normalization_adapter.case_else_node?(child) + next + else + stack.concat(child.named_children) unless FUNCTION_KINDS.include?(child.kind) + end + end + arms + end + + def when_body(node) + named_field(node, "body") || named_field(node, "consequence") || + named_field(node, "value") || + node.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) || statement_node?(child) } + end + + def link_when_chain(whens, fallback = nil) + whens.reverse.inject(fallback) do |next_when, current| + current.children[2] = next_when + current + end + end + + def case_else_body(node) + else_node = normalization_adapter.case_else_node(node) + return nil unless else_node + + if normalization_adapter.case_else_arm?(else_node) || else_node.kind == "switch_default" + body = when_body(else_node) + return normalize_body(body) if body + end + + normalize_else_or_branch(else_node) + end + + def normalize_else_or_branch(node) + return nil unless ts_node?(node) + return normalize_body(node) unless node.kind == "else" + + normalize_body_nodes(node.named_children, source: node) + end + + def link_rescue_chain(resbodies) + resbodies.reverse.inject(nil) do |next_rescue, current| + current.children[2] = next_rescue + current + end + end + + def boolean_expression?(node) + (normalization_adapter.boolean_expression_kind?(node) || boolean_statement?(node)) && + %w[and or].include?(boolean_operator(node)) + end + + def boolean_statement?(node) + return false unless %w[body_statement block_body statement argument_list].include?(node.kind) + return false unless %w[&& || and or].include?(binary_operator(node)) + return false if node.named_children.size < 2 + + node.children.all? do |child| + child.named? || %w[&& || and or ( )].include?(child.text.to_s) + end + end + + def operator_call_expression?(node) + normalization_adapter.operator_call_expression?(node) + end + + def infix_statement?(node) + left, operator, right = infix_statement_parts(node) + left && right && INFIX_STATEMENT_OPERATORS.include?(operator) + end + + def dotted_expression?(node) + normalization_adapter.dotted_expression_wrapper?(node) && dotted_call?(node) + end + + def argument_list_call_with_block?(node) + return false unless node.kind == "argument_list" + return false if dotted_call?(node) + return false unless call_block(node) + + IDENTIFIER_KINDS.include?(node.named_children.first&.kind) + end + + def infix_statement_parts(node) + return [nil, nil, nil] unless %w[body_statement block_body statement argument_list].include?(node.kind) + + named_index = 0 + left = nil + right = nil + operator = nil + node.children.each do |child| + if child.named? + left ||= child + right = child if operator + named_index += 1 + elsif INFIX_STATEMENT_OPERATORS.include?(child.text.to_s) + operator = child.text.to_s + end + end + return [nil, nil, nil] unless named_index == 2 && operator + + [left, operator, right] + rescue StandardError + [nil, nil, nil] + end + + def argument_list_unary_not?(node) + node.kind == "argument_list" && + node.children.first&.text == "!" && + node.named_children.size == 1 + rescue StandardError + false + end + + def unary_not_statement?(node) + %w[body_statement block_body statement argument_list].include?(node.kind) && + node.children.first&.text == "!" && + node.named_children.size == 1 + rescue StandardError + false + end + + def normalize_argument_list_unary_not(node) + return nil unless ruby? && ts_node?(node) && argument_list_unary_not?(node) + + operand = node.named_children.first + wrap(:OPCALL, children: [normalize_node(operand), :!, nil], source: node) + end + + def comparison_expression?(node) + return false if literal_fragment_expression_list?(node) + + normalization_adapter.comparison_expression_kind?(node) && + COMPARISON_OPERATORS.include?(comparison_operator(node)) + end + + def regex_literal?(node) + ts_node?(node) && %w[regex regex_literal].include?(node.kind) + end + + def unary_not_expression?(node) + normalization_adapter.unary_not_expression?(node) + end + + def unary_minus_expression?(node) + normalization_adapter.unary_minus_expression?(node) + end + + def boolean_operator(node) + direct = binary_operator(node) + return "and" if %w[&& and].include?(direct) + return "or" if %w[|| or].include?(direct) + return nil if ts_node?(node) + + text = spaced_text(node) + return "and" if text.include?("&&") || text.match?(/\band\b/) + return "or" if text.include?("||") || text.match?(/\bor\b/) + + nil + end + + def comparison_operator(node) + direct = binary_operator(node) + return direct if COMPARISON_OPERATORS.include?(direct) + + spaced_text(node)[/(===|!==|==|!=|<=|>=|<|>)/, 1] + end + + def binary_operator(node) + normalization_adapter.binary_operator(node) + end + + def spaced_text(node) + " #{node.text} " + end + + def class_node?(node) + normalization_adapter.class_node?(node) + end + + def module_node?(node) + MODULE_KINDS.include?(node.kind) && named_field(node, "name") + end + + def unwrap_node?(node) + normalization_adapter.unwrap_node?(node) + end + + def statement_node?(node) + node.kind.end_with?("_statement") || node.kind.end_with?("_expression") || + %w[return break next].include?(node.kind) + end + + def local_identifier?(node) + IDENTIFIER_KINDS.include?(node.kind) + end + + def ruby_vcall_identifier?(node) + return false unless ruby? + return false unless IDENTIFIER_KINDS.include?(node.kind) + return false if assignment_lhs?(node) + return false if ruby_definition_identifier?(node) + + !ruby_local_name?(node.text.to_s) + end + + def ruby_definition_identifier?(node) + parent = parent_node(node) + return false unless ts_node?(parent) + + if %w[method singleton_method].include?(parent.kind) + name = named_field(parent, "name") || + parent.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + return same_ts_node?(name, node) + end + + %w[ + method_parameters block_parameters lambda_parameters + optional_parameter keyword_parameter block_parameter + ].include?(parent.kind) + end + + def ruby_local_name?(name) + @local_stack.reverse.any? { |scope| scope.include?(name) } + end + + def ruby? + normalization_adapter.ruby? + end + + def normalization_adapter + @normalization_adapter ||= TreeSitterNormalizationAdapter.for(@document) + end + + def interpolated_string?(node) + normalization_adapter.interpolated_string?(node) + end + + def lambda_expression?(node) + normalization_adapter.lambda_expression?(node) + end + + def lambda_target(node) + normalization_adapter.lambda_target(node) + end + + def interpolation_node?(node) + normalization_adapter.interpolation_node?(node) + end + + def normalize_interpolated_string(node) + wrap(:DSTR, children: normalize_children(node), source: node) + end + + def vcall_identifier?(node) + return false unless local_identifier?(node) + return false if ruby? && ruby_local_name?(node.text.to_s) + + parent = parent_node(node) + return false unless ts_node?(parent) + return false if %w[method method_parameters parameter_list argument_list arguments].include?(parent.kind) + return false if member_read_node?(parent) + return false if dotted_expression?(parent) + return false if assignment_lhs?(node) || assignment_rhs?(node) + + return true if %w[body_statement block_body then].include?(parent.kind) && parent_named_child?(parent, node) + return true if %w[if_modifier unless_modifier].include?(parent.kind) && same_ts_node?(parent.named_children.first, node) + + false + end + + def const_node?(node) + CONST_KINDS.include?(node.kind) + end + + def self_node?(node) + %w[self this].include?(node.kind) || node.text == "self" || node.text == "this" + end + + def instance_variable?(node) + normalization_adapter.instance_variable?(node) + end + + def global_variable?(node) + normalization_adapter.global_variable?(node) + end + + def member_read_node?(node) + ts_node?(node) && MEMBER_KINDS.include?(node.kind) && member_parts(node).all? + end + + def assignment_lhs?(node) + return false if prev_sibling(node)&.text == ":" + return false if literal_fragment_assignment_context?(node) + + sibling = next_sibling(node) + sibling && assignment_operator?(sibling.text) + end + + def assignment_rhs?(node) + return false if literal_fragment_assignment_context?(node) + + sibling = prev_sibling(node) + sibling && assignment_operator?(sibling.text) + end + + def literal_fragment_assignment_context?(node) + normalization_adapter.literal_fragment_assignment_context?(node) + end + + def literal_fragment_expression_list?(node) + return false unless ts_node?(node) && node.kind == "expression_list" + + named = node.named_children + named.size == 1 && literal_fragment_assignment_context?(named.first) + rescue StandardError + false + end + + def assignment_operator?(text) + normalization_adapter.assignment_operator?(text) + end + + def operator_assignment_operator(node) + raw = node.children.find { |child| !child.named? && child.text.to_s.end_with?("=") }&.text.to_s + op = raw.sub(/=\z/, "") + op = "||" if raw == "||=" + op = "&&" if raw == "&&=" + op.to_sym + end + + def augmented_assignment_value(left, operator, right_raw, source) + receiver = assignment_receiver(left) + right = normalize_node(right_raw) + wrap(:CALL, children: [receiver, operator, list([right].compact, source: right_raw || left)], source: source) + end + + def normalize_logical_operator_assignment(left, operator, right, source:) + return nil unless ruby? && [:"||", :"&&"].include?(operator) + return nil unless ts_node?(left) && IDENTIFIER_KINDS.include?(left.kind) + + name = target_name(left) + type = operator == :"||" ? :OP_ASGN_OR : :OP_ASGN_AND + receiver = wrap(:LVAR, children: [name], source: left) + assignment = wrap(:LASGN, children: [name, right], source: source) + wrap(type, children: [receiver, operator, assignment], source: source) + end + + def assignment_receiver(left) + return nil unless ts_node?(left) + return wrap(:LVAR, children: [left.text.to_s], source: left) if IDENTIFIER_KINDS.include?(left.kind) + return wrap(:IVAR, children: [left.text.to_s], source: left) if instance_variable?(left) + return normalize_global_variable(left) if global_variable?(left) + return normalize_const(left) if const_node?(left) + + normalize_node(left) + end + + def with_ruby_scope(node, reset: false) + return yield unless ruby? + + previous = @local_stack + @local_stack = [] if reset + @local_stack = @local_stack + [ruby_scope_locals(node)] + yield + ensure + @local_stack = previous if ruby? + end + + def ruby_scope_locals(node) + locals = Set.new + collect_ruby_scope_locals(node, locals, root: true) + locals + end + + def collect_ruby_scope_locals(node, locals, root: false) + return unless ts_node?(node) + return if !root && ruby_scope_boundary?(node) + + collect_ruby_parameter_locals(node, locals) + collect_ruby_assignment_locals(node, locals) + + node.named_children.each do |child| + next if ruby_scope_child_boundary?(child) + + collect_ruby_scope_locals(child, locals) + end + end + + def collect_ruby_parameter_locals(node, locals) + return unless %w[method_parameters block_parameters lambda_parameters].include?(node.kind) + + node.named_children.each do |child| + collect_identifier_names(child, locals) + end + end + + def collect_ruby_assignment_locals(node, locals) + if node.kind == "exception_variable" + collect_identifier_names(node, locals) + return + end + + return unless ruby_assignment_node?(node) + + left = assignment_left(node) + collect_assignment_target_names(left, locals) + end + + def ruby_assignment_node?(node) + return false unless ts_node?(node) + return true if %w[assignment operator_assignment].include?(node.kind) + return true if node.kind == "pattern" && node.children.any? { |child| !child.named? && child.text == "=" } + + %w[body_statement block_body statement].include?(node.kind) && + node.children.any? { |child| !child.named? && assignment_operator?(child.text) } + end + + def collect_assignment_target_names(node, locals) + return unless ts_node?(node) + + if IDENTIFIER_KINDS.include?(node.kind) + locals.add(node.text.to_s.sub(/\A\*/, "")) + return + end + + return unless %w[left_assignment_list expression_list splat splat_parameter rest_assignment].include?(node.kind) + + node.named_children.each { |child| collect_assignment_target_names(child, locals) } + end + + def collect_identifier_names(node, locals) + return unless ts_node?(node) + + locals.add(node.text.to_s.sub(/\A\*/, "")) if IDENTIFIER_KINDS.include?(node.kind) + locals.add(node.text.to_s) if normalization_adapter.identifier_text_node?(node) + node.children.select(&:named?).each { |child| collect_identifier_names(child, locals) } + end + + def ruby_scope_boundary?(node) + return false if %w[block do_block].include?(node.kind) && parent_node(node)&.kind == "lambda" + + FUNCTION_KINDS.include?(node.kind) || class_node?(node) || module_node?(node) || + %w[singleton_class lambda block do_block].include?(node.kind) + end + + def ruby_scope_child_boundary?(node) + ruby_scope_boundary?(node) + end + + def member_parts(node) + return [nil, nil] if node.kind == "expression_list" && + !(named_field(node, "operand") && named_field(node, "field")) + + return dotted_call_parts(node) if dotted_call?(node) + + recv = named_field(node, "receiver") || named_field(node, "object") || + named_field(node, "operand") || named_field(node, "value") || + named_field(node, "expression") || + node.named_children.find { |child| child.kind != "navigation_suffix" } + mid = named_field(node, "method") || named_field(node, "field") || + named_field(node, "property") || named_field(node, "suffix") || + node.named_children.find { |child| child.kind == "navigation_suffix" } || + node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) }.last + return [nil, nil] unless recv && mid && recv != mid + + [recv, member_name(mid).sub(/=\z/, "")] + end + + def member_name(node) + return "" unless ts_node?(node) + + if node.kind == "navigation_suffix" + suffix = named_field(node, "suffix") || + node.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) } || + node.named_children.last + return suffix&.text.to_s.sub(/\A[.?]+/, "") + end + + node.text.to_s.sub(/\A[.?]+/, "") + end + + def call_arguments(node, function) + args = named_field(node, "arguments") || named_field(node, "argument") || + node.named_children.find { |child| %w[argument_list arguments].include?(child.kind) } + return [] unless args + + children = args.named_children.reject { |child| function && child == function } + return [normalize_dotted_expression(args)] if dotted_expression?(args) + if children.empty? + scalar = scalar_argument_list_value(args) + return [scalar] if scalar + + return literal_arguments_from_text(args) + end + return [normalize_infix_statement(args)] if infix_statement?(args) + + children.map { |child| normalize_node(child) }.compact + end + + def assignment_left(node) + named_field(node, "left") || node.named_children.first + end + + def assignment_right(node) + named_field(node, "right") || node.named_children[1] + end + + def function_name(node) + return singleton_name(node) if node.kind == "singleton_method" + + name = named_field(node, "name") || + node.named_children.find do |child| + IDENTIFIER_KINDS.include?(child.kind) || child.kind == "constant" + end + name&.text.to_s.to_sym + end + + def singleton_receiver(node) + receiver = named_field(node, "receiver") + return receiver if receiver + + name = named_field(node, "name") || + node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) } + parameters = named_field(node, "parameters") + body = named_field(node, "body") || block_child(node) + node.named_children.find do |child| + !same_ts_node?(child, name) && + !same_ts_node?(child, parameters) && + !same_ts_node?(child, body) + end + end + + def singleton_name(node) + name = named_field(node, "name")&.text || + node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s + name.to_s.to_sym + end + + def first_named(node) + node.named_children.first + end + + def block_child(node) + node.named_children.find { |child| BLOCK_KINDS.include?(child.kind) || %w[block do_block].include?(child.kind) } + end + + def branch_child(node, cond, index) + node.named_children.reject { |child| child == cond || %w[comment else elsif].include?(child.kind) }[index] + end + + def explicit_alternative(node) + normalization_adapter.explicit_alternative(node) + end + + def const_for(node) + return wrap(:CONST, children: ["(anonymous)".to_sym], source: @document.root) unless ts_node?(node) + return normalize_const(node) if const_node?(node) + + wrap(:CONST, children: [node.text.to_s.to_sym], source: node) + end + + def normalize_parameters(node) + return nil unless ruby? && ts_node?(node) + + defaults = node.named_children.filter_map do |param| + name = named_field(param, "name") + value = named_field(param, "value") + next unless name && value + + wrap(:LASGN, children: [name.text.to_sym, normalize_node(value)], source: param) + end + return nil if defaults.empty? + + wrap(:ARGS, children: defaults, source: node) + end + + def normalize_block_parameters(block) + return nil unless ruby? && ts_node?(block) + + params = block.named_children.find { |child| child.kind == "block_parameters" } + return nil unless params + + destructured = params.named_children.select { |child| child.kind == "destructured_parameter" } + pre_init = destructured.map { |param| normalize_destructured_block_parameter(param) }.compact + return nil if pre_init.empty? + + wrap(:ARGS, children: pre_init, source: params) + end + + def normalize_destructured_block_parameter(param) + targets = [] + param.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } + return nil if targets.empty? + + wrap(:MASGN, + children: [ + wrap(:DVAR, children: [nil], source: param), + list(targets, source: param), + nil, + ], + source: param) + end + + def collect_destructured_parameter_targets(node, targets) + return unless ts_node?(node) + + if IDENTIFIER_KINDS.include?(node.kind) + targets << wrap(:DASGN, children: [node.text.to_s, nil], source: node) + return + end + + node.named_children.each { |child| collect_destructured_parameter_targets(child, targets) } + end + + def scope(body, args: nil, source: nil) + wrap(:SCOPE, children: [nil, args, body], source: body || args || source || @document.root) + end + + def list(children, source:) + return nil if children.nil? || children.empty? + + wrap(:LIST, children: children, source: source) + end + + def wrap(type, children:, source:) + if source.respond_to?(:start_point) + first_lineno = source.start_point.row + 1 + first_column = source.start_point.column + last_lineno = source.end_point.row + 1 + last_column = source.end_point.column + text = source.text.to_s + else + first_lineno = source.first_lineno + first_column = source.first_column + last_lineno = source.last_lineno + last_column = source.last_column + text = source.text.to_s + end + + Node.new( + type: type, + children: children, + first_lineno: first_lineno, + first_column: first_column, + last_lineno: last_lineno, + last_column: last_column, + text: text + ) + end + + def source_before_child(node, child) + text = @document.source.byteslice(node.start_byte...child.start_byte).to_s.rstrip + return node if text.empty? + + lines = text.lines + last_lineno = node.start_point.row + lines.size + last_column = + if lines.size <= 1 + node.start_point.column + text.length + else + lines.last.to_s.chomp.length + end + Node.new( + type: :SOURCE, + children: [], + first_lineno: node.start_point.row + 1, + first_column: node.start_point.column, + last_lineno: last_lineno, + last_column: last_column, + text: text + ) + end + + def source_from_nodes(first_node, last_node) + return first_node unless ts_node?(first_node) && ts_node?(last_node) + + text = @document.source.byteslice(first_node.start_byte...last_node.end_byte).to_s + Node.new( + type: :SOURCE, + children: [], + first_lineno: first_node.start_point.row + 1, + first_column: first_node.start_point.column, + last_lineno: last_node.end_point.row + 1, + last_column: last_node.end_point.column, + text: text + ) + end + + def source_from_normalized_nodes(first_node, last_node) + return first_node unless first_node.is_a?(Node) && last_node.is_a?(Node) + + text = + if first_node.first_lineno == last_node.last_lineno + @document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column...last_node.last_column) + else + ([@document.lines[first_node.first_lineno - 1].to_s.byteslice(first_node.first_column..)] + + @document.lines[first_node.first_lineno...(last_node.last_lineno - 1)] + + [@document.lines[last_node.last_lineno - 1].to_s.byteslice(0...last_node.last_column)]).join + end + Node.new( + type: :SOURCE, + children: [], + first_lineno: first_node.first_lineno, + first_column: first_node.first_column, + last_lineno: last_node.last_lineno, + last_column: last_node.last_column, + text: text.to_s + ) + end + + def named_field(node, name) + normalization_adapter.named_field(node, name) + end + + def parent_node(node) + node.parent + rescue StandardError + nil + end + + def next_sibling(node) + node.next_sibling + rescue StandardError + nil + end + + def prev_sibling(node) + node.prev_sibling + rescue StandardError + nil + end + + def next_named_sibling(node) + node.next_named_sibling + rescue StandardError + nil + end + + def modifier_statement?(node) + %w[body_statement block_body statement].include?(node.kind) && + modifier_keyword(node) && + node.named_children.size >= 2 + end + + def ternary_statement?(node) + normalization_adapter.ternary_statement?(node) + end + + def normalize_ternary_statement(node) + cond, positive, negative = normalization_adapter.ternary_parts(node) + wrap(:IF, children: [normalize_node(cond), normalize_node(positive), normalize_node(negative)], source: node) + end + + def case_argument_list?(node) + normalization_adapter.case_argument_list?(node) + end + + def leading_function_statement?(node) + normalization_adapter.leading_function_statement?(node) + end + + def normalize_leading_function_statement(node) + name = normalization_adapter.leading_function_name(node).to_s.to_sym + body = normalization_adapter.leading_function_body(node) + normalized_body = with_ruby_scope(node, reset: true) do + elide_tail_returns(normalize_body(body)) + end + wrap(:DEFN, children: [name, scope(normalized_body, source: node)], source: node) + end + + def command_call_statement?(node) + return false unless %w[body_statement block block_body statement].include?(node.kind) + return false if dotted_call?(node) + return false unless node.named_children.first&.kind == "identifier" + + node.named_children.any? { |child| %w[argument_list arguments].include?(child.kind) } || + call_block(node) + end + + def zero_child_identifier_call?(node) + normalization_adapter.zero_child_identifier_call?(node) + end + + def dotted_call?(node) + return false unless ts_node?(node) + target = dotted_call_target(node) + return true if target && dotted_call_node?(target) + + dotted_call_node?(node) + end + + def dotted_call_node?(node) + return false unless ts_node?(node) + return false unless node.children.any? { |child| child.text == "." || child.text == "&." } + + callable = dotted_callable_children(node) + return false if callable.any? { |child| %w[string_content interpolation].include?(child.kind) } + + callable.size >= 2 + end + + def dotted_call_target(node) + return nil unless ts_node?(node) + + named = node.named_children + return nil unless named.size == 1 + + child = named.first + dotted_call_node?(child) ? child : nil + rescue StandardError + nil + end + + def dotted_callable_children(node) + node.named_children.reject { |child| %w[block do_block argument_list arguments].include?(child.kind) } + end + + def safe_navigation_call?(node) + ts_node?(node) && normalization_adapter.safe_navigation_call?(node) + end + + def dotted_call_parts(node) + target = dotted_call_target(node) || node + callable = dotted_callable_children(target) + [callable.first, callable[1].text.to_s.sub(/=\z/, "")] + end + + def leading_if_statement?(node) + normalization_adapter.leading_if_statement?(node) + end + + def leading_case_statement?(node) + normalization_adapter.leading_case_statement?(node) + end + + def normalize_leading_case_statement(node) + target = normalization_adapter.leading_case_target(node) || node + value = normalize_node(case_value(target)) + whens = case_arms(target).map { |arm| normalize_when(arm) }.compact + wrap(:CASE, children: [value, link_when_chain(whens, case_else_body(target))], source: target) + end + + def leading_loop_statement?(node) + normalization_adapter.leading_loop_statement?(node) + end + + def rescue_body_statement?(node) + normalization_adapter.rescue_body_statement?(node) + end + + def normalize_rescue_body_statement(node) + target = normalization_adapter.rescue_body_target(node) || node + body_nodes = normalization_adapter.rescue_body_nodes(target) + body = normalize_body_nodes(body_nodes, source: target) + rescue_nodes = normalization_adapter.rescue_clauses(target) + resbodies = rescue_nodes.map { |child| normalize_rescue_clause(child) } + source = source_from_nodes(body_nodes.first || target, rescue_source_end(rescue_nodes.last) || rescue_nodes.last || target) + wrap(:RESCUE, children: [body, link_rescue_chain(resbodies), nil], source: source) + end + + def normalize_rescue_clause(node) + exceptions = normalization_adapter.rescue_clause_exceptions(node) + exception_nodes = exceptions.map do |child| + if child.kind == "exceptions" && child.text.to_s.match?(/\A[A-Z]\w*(?:::\w+)*\z/) + normalize_const(child) + else + normalize_node(child) + end + end.compact + exception_source = normalization_adapter.rescue_clause_exceptions_source(node) + exception_variable = rescue_exception_variable(node) + handler = normalization_adapter.rescue_clause_handler(node) + body = prepend_rescue_exception_assignment(normalize_body(handler), exception_variable) + wrap(:RESBODY, children: [list(exception_nodes, source: exception_source || node), body, nil], + source: node) + end + + def rescue_source_end(node) + return nil unless ts_node?(node) + + handler = normalization_adapter.rescue_clause_handler(node) + return handler.named_children.last || handler if ts_node?(handler) + + node.named_children.reverse.find { |child| !%w[comment].include?(child.kind) } || node + end + + def rescue_exception_variable(node) + name = normalization_adapter.rescue_clause_exception_variable_name(node) + return nil unless name + + source = normalization_adapter.rescue_clause_exception_variable_source(node) || name + wrap(:LASGN, children: [name.text.to_s, wrap(:ERRINFO, children: [], source: source)], source: source) + end + + def prepend_rescue_exception_assignment(body, assignment) + return body unless assignment + return assignment unless body.is_a?(Node) + + if body.type == :BLOCK + body.children = [assignment] + body.children.compact + body + else + wrap(:BLOCK, children: [assignment, body], source: source_from_normalized_nodes(assignment, body)) + end + end + + def ensure_body_statement?(node) + normalization_adapter.ensure_body_statement?(node) + end + + def normalize_ensure_body_statement(node) + target = normalization_adapter.ensure_body_target(node) || node + body = if rescue_body_statement?(target) + normalize_rescue_body_statement(target) + else + normalize_body_nodes(normalization_adapter.ensure_body_nodes(target), source: target) + end + ensure_node = normalization_adapter.ensure_clause(target) + ensure_body = normalize_body(normalization_adapter.ensure_clause_body(ensure_node) || ensure_node) + wrap(:ENSURE, children: [body, ensure_body], source: body || node) + end + + def array_literal_statement?(node) + normalization_adapter.array_literal_statement?(node) + end + + def element_reference_statement?(node) + normalization_adapter.element_reference_statement?(node) + end + + def normalize_element_reference_statement(node) + target = normalization_adapter.element_reference_target(node) || node + recv = normalization_adapter.element_reference_receiver(target) + args = normalization_adapter.element_reference_arguments(target).map { |child| normalize_node(child) }.compact + if ruby? && self_node?(recv) + return wrap(:FCALL, children: [:[], list(args, source: target)], source: target) + end + + wrap(:CALL, children: [normalize_node(recv), :[], list(args, source: target)], source: target) + end + + def hash_literal_statement?(node) + normalization_adapter.hash_literal_statement?(node) + end + + def normalize_hash_literal_statement(node) + target = normalization_adapter.hash_literal_target(node) || node + children = normalization_adapter.hash_literal_values(target).map do |child| + normalize_hash_literal_value(child) + end.compact + wrap(:HASH, children: children, source: target) + end + + def normalize_hash_literal_value(node) + if node.kind == "field" + named = node.named_children + if named.size >= 2 + key = named.first + value = named[1] + key_lit = wrap(:LIT, children: [key.text.to_s.to_sym], source: key || node) + return wrap(:HASH, children: [key_lit, normalize_node(value)].compact, source: node) + end + end + + normalize_node(node) + end + + def normalize_array_literal_statement(node) + target = normalization_adapter.array_literal_target(node) || node + values = normalization_adapter.array_literal_values(target).map do |child| + normalize_array_literal_value(child) + end.compact + return wrap(:ZLIST, children: [], source: target) if values.empty? + + list(values, source: target) + end + + def normalize_array_literal_value(node) + if node.kind == "field" + named = node.named_children + return normalize_node(named.first) if named.size == 1 + return normalize_terminal_statement(node) if named.empty? + end + + normalize_node(node) + end + + def empty_body_statement?(node) + normalization_adapter.empty_body_statement?(node) + end + + def heredoc_body_statement?(node) + normalization_adapter.heredoc_body_statement?(node) + end + + def normalize_heredoc_body_statement(node) + heredoc_bodies = node.named_children.select { |child| child.kind == "heredoc_body" } + children = node.named_children.filter_map do |child| + next if child.kind == "heredoc_body" + + if heredoc_call_for_body?(child) + with_current_heredoc_body(heredoc_bodies.shift) { normalize_node(child) } + else + normalize_body(child) + end + end + return nil if children.empty? + return children.first if children.size == 1 + + wrap(:BLOCK, children: children, source: node) + end + + def heredoc_call_for_body?(node) + return false unless ts_node?(node) + + normalization_adapter.heredoc_call_for_body?(node) + end + + def with_current_heredoc_body(body) + previous = @current_heredoc_body + @current_heredoc_body = body + yield + ensure + @current_heredoc_body = previous + end + + def normalize_heredoc_beginning(node) + body = @current_heredoc_body || + parent_node(parent_node(node))&.named_children&.find { |child| child.kind == "heredoc_body" } + children = body ? normalize_heredoc_children(body) : [] + wrap(:DSTR, children: children, source: node) + end + + def normalize_heredoc_children(node) + node.named_children.filter_map do |child| + case child.kind + when "interpolation" + normalize_interpolation(child) + when "heredoc_content" + text = child.text.to_s + text.empty? ? nil : wrap(:STR, children: [text], source: child) + else + nil + end + end + end + + def normalize_interpolation(node) + exprs = node.named_children.map { |child| normalize_node(child) }.compact + body = exprs.size == 1 ? exprs.first : list(exprs, source: node) + wrap(:EVSTR, children: [body].compact, source: node) + end + + def interpolated_statement?(node) + normalization_adapter.interpolated_statement?(node) + end + + def normalize_interpolated_statement(node) + wrap(:DSTR, children: normalize_children(node), source: node) + end + + def concatenated_string_statement?(node) + normalization_adapter.concatenated_string_statement?(node) + end + + def normalize_concatenated_string_statement(node) + normalized = node.named_children.map { |child| [child, normalize_node(child)] } + parts = normalized.flat_map do |_child, child_node| + child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] + end.compact + wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first) + end + + def normalize_chained_string(node) + normalized = node.named_children.map { |child| [child, normalize_node(child)] } + parts = normalized.flat_map do |_child, child_node| + child_node.is_a?(Node) && child_node.type == :DSTR ? child_node.children : [child_node] + end.compact + wrap(:DSTR, children: parts, source: dynamic_string_source(normalized) || node.named_children.first || node) + end + + def dynamic_string_source(normalized_children) + normalized_children.find do |_child, child_node| + child_node.is_a?(Node) && child_node.type == :DSTR && + child_node.children.any? { |part| part.is_a?(Node) && part.type == :EVSTR } + end&.first + end + + def terminal_statement?(node) + %w[body_statement block_body statement argument_list].include?(node.kind) && + node.named_children.empty? && + !node.text.to_s.strip.empty? + end + + def normalize_terminal_statement(node) + text = node.text.to_s.strip + return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" + return wrap(:IVAR, children: [text], source: node) if text.match?(/\A@[A-Za-z_]\w*[!?=]?\z/) + return normalize_global_variable(node) if text.match?(/\A\$/) + return wrap(:NIL, children: [], source: node) if text == "nil" + return wrap(:TRUE, children: [], source: node) if text == "true" + return wrap(:FALSE, children: [], source: node) if text == "false" + return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) + return wrap(:INTEGER, children: [text.to_i], source: node) if text.match?(/\A-?\d+\z/) + return wrap(:ZLIST, children: [], source: node) if text == "[]" + + if bare_identifier_text?(text) + return wrap(:VCALL, children: [text.to_sym], source: node) if ruby? && !ruby_local_name?(text) + + return wrap(:LVAR, children: [text], source: node) + end + + wrap(kind_type(node.kind), children: [], source: node) + end + + def normalize_global_variable(node) + text = node.text.to_s + return wrap(:NTH_REF, children: [text.delete_prefix("$").to_i], source: node) if text.match?(/\A\$[1-9]\d*\z/) + + wrap(:GVAR, children: [text], source: node) + end + + def normalize_leading_loop_statement(node) + target = normalization_adapter.leading_loop_target(node) || node + return normalize_loop(target) unless same_ts_node?(target, node) + + keyword = target.children.first.kind + cond = normalize_node(target.named_children.first) + body = normalize_body(target.named_children[1]) + wrap(keyword == "until" ? :UNTIL : :WHILE, children: [cond, body], source: target) + end + + def operator_assignment_statement?(node) + %w[body_statement block_body statement].include?(node.kind) && + operator_assignment_statement_parts(node)[1] + rescue StandardError + false + end + + def normalize_operator_assignment_statement(node) + left, operator, right_raw = operator_assignment_statement_parts(node) + right = normalize_node(right_raw) + + if left&.kind == "element_reference" + recv = left.named_children.first + args = left.named_children.drop(1).map { |child| normalize_node(child) }.compact + return wrap(:OP_ASGN1, children: [normalize_node(recv), operator, list(args, source: left), right], + source: node) + end + + if member_read_node?(left) + recv, mid = member_parts(left) + return wrap(:OP_ASGN2, children: [normalize_node(recv), false, mid.to_sym, operator, right], source: node) + end + + logical = normalize_logical_operator_assignment(left, operator, right, source: node) + return logical if logical + if instance_variable?(left) || global_variable?(left) + return assignment_target(left, augmented_assignment_value(left, operator, right_raw, node), source: node) + end + + assignment_target(left, right, source: node) || + wrap(:LASGN, children: [target_name(left), augmented_assignment_value(left, operator, right_raw, node)], + source: node) + end + + def operator_assignment_statement_parts(node) + left = nil + operator = nil + right = nil + node.children.each do |child| + if child.named? + left ||= child + right = child if operator + elsif child.text.to_s.match?(/\A(?:[+\-*\/%&|^]|\|\||&&)=\z/) + raw = child.text.to_s + operator = raw.sub(/=\z/, "") + operator = "||" if raw == "||=" + operator = "&&" if raw == "&&=" + end + end + return [nil, nil, nil] unless left && operator && right + + [left, operator.to_sym, right] + end + + def leading_owner_statement?(node) + normalization_adapter.leading_owner_statement?(node) + end + + def normalize_leading_owner_statement(node) + target = normalization_adapter.leading_owner_target(node) || node + keyword = target.children.first.kind + name = const_for(target.named_children.first) + body_node = named_field(target, "body") || + target.named_children.reverse.find { |child| BLOCK_KINDS.include?(child.kind) } + body = normalize_body(body_node) + if keyword == "module" + wrap(:MODULE, children: [name, scope(body, source: target)], source: target) + else + wrap(:CLASS, children: [name, nil, scope(body, source: target)], source: target) + end + end + + def normalize_leading_if_statement(node) + target = normalization_adapter.leading_if_target(node) || node + return normalize_if(target) unless same_ts_node?(target, node) + + keyword = target.children.first.kind + cond = target.named_children.find { |child| !%w[comment then elsif else].include?(child.kind) } + consequence = target.named_children.find { |child| child.kind == "then" } || + branch_child(target, cond, 0) + alternative = explicit_alternative(target) + type = keyword == "unless" ? :UNLESS : :IF + wrap(type, children: [normalize_node(cond), normalize_body(consequence), normalize_else_or_branch(alternative)], + source: target) + end + + def modifier_keyword(node) + seen_named = false + node.children.each do |child| + seen_named ||= child.named? + return child.kind if seen_named && !child.named? && %w[if unless while until].include?(child.kind) + end + nil + rescue StandardError + nil + end + + def modifier_parts(node) + [node.named_children.first, node.named_children.last] + end + + def call_block(node) + node.named_children.find { |child| %w[block do_block].include?(child.kind) } + end + + def statement_call_with_block?(node) + %w[body_statement block_body statement].include?(node.kind) && + call_block(node) && + statement_block_call(node) + end + + def statement_block_call(node) + return node if dotted_call?(node) + return node if member_read_node?(node) + + block = call_block(node) + node.named_children.find do |child| + !same_ts_node?(child, block) && (CALL_KINDS.include?(child.kind) || member_read_node?(child)) + end + end + + def normalize_statement_call_with_block(node) + block = call_block(node) + call = normalize_call_without_block(statement_block_call(node), block) + args = normalize_block_parameters(block) + body = with_ruby_scope(block) do + dynamic_scope(normalize_body(named_field(block, "body") || block_child(block) || block)) + end + wrap(:ITER, children: [call, scope(body, args: args, source: node)], source: node) + end + + def visibility_inline_def_call?(node) + return false unless node.kind == "call" + + message = node.named_children.first&.text.to_s + return false unless INLINE_DEF_WRAPPER_MIDS.include?(message) + + args = node.named_children.find { |child| child.kind == "argument_list" } + args&.text.to_s.lstrip.start_with?("def ") + end + + def visibility_inline_def_statement?(node, function) + INLINE_DEF_WRAPPER_MIDS.include?(function&.text.to_s) && node.text.to_s.include?("def ") + end + + def inline_def_from_argument_list(args) + return nil unless ruby? && ts_node?(args) + + inline_def_from_source(args) + end + + def inline_def_from_statement(node) + source = node.named_children.find { |child| child.kind == "argument_list" } || node + inline_def_from_source(source) + end + + def inline_def_from_source(source) + return nil unless ruby? && ts_node?(source) + + body = inline_def_body(source) + receiver = inline_def_receiver(source) + normalized_body = with_ruby_scope(source, reset: true) do + elide_tail_returns(normalize_body(body)) + end + if receiver + name = inline_def_name_after_receiver(source, receiver) + return nil if name.to_s.empty? + + return wrap(:DEFS, children: [normalize_node(receiver), name.to_sym, scope(normalized_body, source: source)], + source: source) + end + + name = source.named_children.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s + return nil if name.to_s.empty? + + wrap(:DEFN, children: [name.to_sym, scope(normalized_body, source: source)], source: source) + end + + def inline_def_receiver(source) + return nil unless source.text.to_s.match?(/\bdef\s+[^.\s]+\./) + + source.named_children.find { |child| self_node?(child) || const_node?(child) } + end + + def inline_def_name_after_receiver(source, receiver) + index = source.named_children.index { |child| same_ts_node?(child, receiver) } + source.named_children[(index.to_i + 1)..]&.find { |child| IDENTIFIER_KINDS.include?(child.kind) }&.text.to_s + end + + def inline_def_body(node) + stack = node.named_children.reverse + until stack.empty? + child = stack.shift + return child if child.kind == "body_statement" + + stack.concat(child.named_children.reverse) + end + nil + end + + def literal_arguments_from_text(args) + text = args.text.to_s + return [normalize_heredoc_beginning(args)] if text.match?(/\A\s*<<[-~]?[A-Za-z_]\w*/) + + text.scan(/:([A-Za-z_]\w*[!?=]?)/).map do |name| + wrap(:LIT, children: [name.first.to_sym], source: args) + end + end + + def elide_tail_returns(node) + return node unless ruby? + return node unless node.is_a?(Node) + return node if %i[DEFN DEFS CLASS MODULE SCLASS LAMBDA ITER].include?(node.type) + return node.children.first if node.type == :RETURN + + case node.type + when :BLOCK + children = node.children.dup + children[-1] = elide_tail_returns(children[-1]) if children.any? + node.children = children + when :SCOPE + children = node.children.dup + children[2] = elide_tail_returns(children[2]) + node.children = children + when :IF, :UNLESS + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + children[2] = elide_tail_returns(children[2]) if children.size > 2 + node.children = children + when :CASE + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + node.children = children + when :CASE2 + children = node.children.dup + children[0] = elide_tail_returns(children[0]) + node.children = children + when :WHEN + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + children[2] = elide_tail_returns(children[2]) if children.size > 2 + node.children = children + when :RESCUE + children = node.children.dup + children[0] = elide_tail_returns(children[0]) + children[1] = elide_tail_returns(children[1]) + node.children = children + when :RESBODY + children = node.children.dup + children[1] = elide_tail_returns(children[1]) + children[2] = elide_tail_returns(children[2]) if children.size > 2 + node.children = children + end + + node + end + + def elide_implicit_nil_body(node) + return node unless ruby? + node = drop_trailing_nil_statement(node) + return nil if node.is_a?(Node) && node.type == :NIL + + node + end + + def prepend_inline_parameter_begin(function_node, body) + marker = inline_parameter_begin_marker(function_node) + return body unless marker + + children = body.is_a?(Node) && body.type == :BLOCK ? body.children.compact : [body].compact + return nil if children.empty? + + if body.is_a?(Node) && body.type == :BLOCK + body.children = [marker] + children + body + else + wrap(:BLOCK, children: [marker] + children, source: function_node) + end + end + + def inline_parameter_begin_marker(function_node) + return nil unless ruby? + + params = named_field(function_node, "parameters") || + function_node.named_children.find { |child| child.kind == "method_parameters" } + return nil unless params + + semicolon = params.next_sibling + return nil unless semicolon && !semicolon.named? && semicolon.text == ";" + + Node.new( + type: :BEGIN, + children: [nil], + first_lineno: semicolon.start_point.row + 1, + first_column: semicolon.start_point.column, + last_lineno: semicolon.start_point.row + 1, + last_column: semicolon.start_point.column, + text: "" + ) + rescue StandardError + nil + end + + def drop_trailing_nil_statement(node) + return node unless node.is_a?(Node) && node.type == :BLOCK + + children = node.children.compact + children.pop while children.last.is_a?(Node) && children.last.type == :NIL + return nil if children.empty? + return children.first if children.size == 1 + + node.children = children + node + end + + def scalar_argument_list_value(node) + text = node.text.to_s.strip + return wrap(:YIELD, children: [nil], source: node) if ruby? && text == "yield" + return wrap(:NIL, children: [], source: node) if text == "nil" + return wrap(:TRUE, children: [], source: node) if text == "true" + return wrap(:FALSE, children: [], source: node) if text == "false" + return wrap(:LIT, children: [text.delete_prefix(":").to_sym], source: node) if text.match?(/\A:[A-Za-z_]\w*[!?=]?\z/) + if text.match?(/\A-?\d+\z/) + return wrap(:INTEGER, children: [text.to_i], source: node) + end + return nil unless bare_identifier_text?(text) + + if ruby? && !ruby_local_name?(text) + wrap(:VCALL, children: [text.to_sym], source: node) + else + wrap(:LVAR, children: [text], source: node) + end + end + + def local_or_call_for_name(name, source) + if ruby? && !ruby_local_name?(name) + wrap(:VCALL, children: [name.to_sym], source: source) + else + wrap(:LVAR, children: [name], source: source) + end + end + + def symbol_literal_node?(node) + node.is_a?(Node) && node.type == :LIT && node.children.first.is_a?(Symbol) + end + + def command_arguments(args) + return [scalar_argument_list_value(args)].compact if args.named_children.empty? + return [normalize_infix_statement(args)] if infix_statement?(args) + return [normalize_dotted_expression(args)] if dotted_expression?(args) + + args.named_children.map { |child| normalize_node(child) }.compact + end + + def parent_named_child?(parent, node) + parent.named_children.any? { |child| same_ts_node?(child, node) } + end + + def same_ts_node?(left, right) + left.kind == right.kind && left.start_byte == right.start_byte && left.end_byte == right.end_byte + rescue StandardError + false + end + + def node_key(node) + [node.kind, node.start_byte, node.end_byte] + rescue StandardError + node.object_id + end + + def bare_identifier_text?(text) + text.to_s.strip.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + end + + def hidden_match?(node) + node.kind == "expression_statement" && + node.text.to_s.lstrip.start_with?("match ") && + node.named_children.any? { |child| child.kind == "match_block" } + end + + def kind_type(kind) + kind.to_s.upcase.gsub(/[^A-Z0-9]+/, "_").to_sym + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:named_children) + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/ast/node.rb b/gems/decomplex/lib/decomplex/ast/node.rb new file mode 100644 index 000000000..44f618049 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/node.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + Node = Struct.new( + :type, :children, :first_lineno, :first_column, :last_lineno, :last_column, + :text, + keyword_init: true + ) + + module_function + + def node?(node) + node.is_a?(Node) + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/semantic_node.rb b/gems/decomplex/lib/decomplex/ast/semantic_node.rb new file mode 100644 index 000000000..dfba884c3 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/semantic_node.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Decomplex + module Ast + SemanticNode = Struct.new( + :type, :children, :span, :text, :language, :metadata, + keyword_init: true + ) do + def [](key) + metadata.fetch(key) + end + + def fetch(key, *fallback) + metadata.fetch(key, *fallback) + end + + def walk(&block) + return enum_for(:walk) unless block + + block.call(self) + children.each { |child| child.walk(&block) if child.respond_to?(:walk) } + end + end + + module_function + + def semantic_node?(node) + node.is_a?(SemanticNode) + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb b/gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb new file mode 100644 index 000000000..c56030a99 --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/semantic_normalizer.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +require_relative "semantic_node" + +module Decomplex + module Ast + class SemanticNormalizer + FACT_COLLECTIONS = { + owner_defs: :owner, + function_defs: :function, + call_sites: :call, + state_declarations: :state_declaration, + state_param_origins: :state_param_origin, + state_reads: :state_read, + state_writes: :state_write, + decision_sites: :decision, + branch_arms: :branch_arm + }.freeze + + attr_reader :document + + def initialize(document) + @document = document + end + + def normalize + SemanticNode.new( + type: :root, + children: semantic_facts, + span: root_span, + text: document.source.to_s, + language: document.language&.to_sym, + metadata: { + file: document.file, + language: document.language&.to_sym + } + ) + end + + private + + def semantic_facts + FACT_COLLECTIONS.flat_map do |collection, type| + Array(document.public_send(collection)).map { |fact| semantic_fact(type, fact) } + end.sort_by { |node| [node.span[0], node.span[1], node.type.to_s, node.text.to_s] } + end + + def semantic_fact(type, fact) + metadata = fact.to_h + source_text = source_text(metadata[:span]) + metadata[:enclosing_span] = enclosing_decision_span(metadata) if type == :decision + SemanticNode.new( + type: type, + children: [], + span: metadata[:span] || line_span(metadata[:line]), + text: source_text.empty? ? fact_text(type, metadata) : source_text, + language: document.language&.to_sym, + metadata: metadata.merge(language: document.language&.to_sym, source_text: source_text) + ) + end + + def fact_text(type, metadata) + case type + when :function + metadata[:signature] || metadata[:name].to_s + when :call + compact_text(metadata[:receiver], metadata[:message]).join(".") + when :decision + metadata[:predicate].to_s + when :branch_arm + metadata[:body].to_s + when :state_read, :state_write, :state_declaration, :state_param_origin + compact_text(metadata[:receiver], metadata[:field]).join(".") + else + metadata[:name].to_s + end + end + + def compact_text(*values) + values.compact.map(&:to_s).reject(&:empty?) + end + + def root_span + last_line = document.lines.length + last_column = document.lines.last.to_s.length + [1, 0, [last_line, 1].max, last_column] + end + + def line_span(line) + line_number = line || 1 + [line_number, 0, line_number, 0] + end + + def enclosing_decision_span(metadata) + span = metadata[:span] + return span unless span + + line = span[0] + source_line = document.lines[line - 1].to_s + keyword_column = source_line.index(/\b(if|unless|while|until)\b/) + return span unless keyword_column && keyword_column <= span[1] + + end_line, end_column = matching_end_point(line, keyword_column) + [line, keyword_column, end_line, end_column] + end + + def matching_end_point(start_line, keyword_column) + depth = 0 + document.lines[(start_line - 1)..].to_a.each_with_index do |line_text, offset| + stripped = line_text.strip + depth += 1 if stripped.match?(/\A(?:if|unless|while|until)\b/) + if stripped == "end" && line_text.index(/\S/).to_i == keyword_column + depth -= 1 + return [start_line + offset, keyword_column + stripped.length] if depth <= 0 + end + end + [start_line, document.lines[start_line - 1].to_s.length] + end + + def source_text(span) + return "" unless span + + first_line, first_column, last_line, last_column = span + if first_line == last_line + return document.lines[first_line - 1].to_s[first_column...last_column].to_s + end + + parts = [] + parts << document.lines[first_line - 1].to_s[first_column..].to_s + parts.concat(document.lines[first_line...(last_line - 1)] || []) + parts << document.lines[last_line - 1].to_s[0...last_column].to_s + parts.join + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/ast/source_map.rb b/gems/decomplex/lib/decomplex/ast/source_map.rb new file mode 100644 index 000000000..101c4ff3e --- /dev/null +++ b/gems/decomplex/lib/decomplex/ast/source_map.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require_relative "node" + +module Decomplex + module Ast + module_function + + # Exact source text of a node, trivial formatting normalised. + def slice(node, _lines) + return "" unless node?(node) + + node.text.to_s.strip.gsub(/\s+/, " ") + end + end +end diff --git a/gems/decomplex/lib/decomplex/decision_pressure.rb b/gems/decomplex/lib/decomplex/decision_pressure.rb index 7c2e9b8da..ce486dbed 100644 --- a/gems/decomplex/lib/decomplex/decision_pressure.rb +++ b/gems/decomplex/lib/decomplex/decision_pressure.rb @@ -1,190 +1,133 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex - # Decision-pressure: attribute every defensive type/nil guard to the - # canonical ROOT CONTRACT its subject comes from, then rank contracts + # Decision-pressure: attribute defensive type/nil guards to the + # canonical root contract their subject comes from, then rank contracts # by how many re-derived decisions they drive. - # - # Use-role discipline (Rapps & Weyuker 1985 c-use/p-use; McCabe / - # Cognitive Complexity count DECISIONS, not reads): a single blended - # "N defensive decisions" scalar is a category error -- it sums - # populations with OPPOSITE actions. This detector therefore splits, - # and the report NEVER presents one combined number: - # - # * c-use (`emit(x.full_type)`, `y = x.full_type`, `return - # x.full_type`) -- pure consumption, NOT a decision. Excluded by - # construction (never recorded). Not complexity. - # * ELIMINABLE guard (`x.nil?`, `is_a?`, `kind_of?`, - # `instance_of?`, `respond_to?`, `x&.m`, `x.acc rescue nil`) -- - # contract-eliminable: a stronger contract removes it. The - # actionable slop. -> tighten the contract / nil-kill (DELETE). - # * ESSENTIAL dispatch (`x.string?`, `.collection?`, - # `.heap_provenance?` -- a domain `?` query over a value that is - # legitimately a sum). NOT removable by typing; it IS the - # contract. Debt ONLY if the same dispatch is re-scattered, which - # is a DIFFERENT metric (Fat-Union / Missing-Abstractions). Shown - # as a per-contract context count, never summed into the headline. - # - # Pressure is decomplex-scoped: intra-procedural only (a local is - # resolved to the accessor it was assigned from IN THE SAME METHOD). - # Cross-procedure pressure is nil-kill's, by the recorded boundary. class DecisionPressure - GUARD_MIDS = %i[is_a? kind_of? instance_of? nil? respond_to?].freeze - TRANSIENT_NOARG_MIDS = %i[pop shift].freeze + GUARD_MIDS = %w[ + is_a? kind_of? instance_of? nil? respond_to? + is_none is_some is_null isNull + ].freeze + TRANSIENT_NOARG_MIDS = %w[pop shift].freeze Hit = Struct.new(:contract, :file, :defn, :line, :span, keyword_init: true) def self.scan(files) guard = [] dispatch = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, [], {}) - guard.concat(e.guard_hits) - dispatch.concat(e.dispatch_hits) + files.each do |file| + document = Syntax.parse(file, parser: "tree_sitter") + assignment_maps = document.local_methods.to_h do |method| + [method.name, build_assignment_map(document, method)] + end + + document.call_sites.each do |call| + next if call.receiver.to_s.empty? + + asgmap = assignment_maps.fetch(call.function, {}) + if eliminable_guard?(call) + contract = contract_of(call.receiver, asgmap) + guard << hit(contract, call) if contract + elsif essential_dispatch?(call) + contract = contract_of(call.receiver, asgmap) + dispatch << hit(contract, call) if contract + end + end + + guard.concat(rescue_nil_hits(document, assignment_maps)) end + guard.uniq! { |hit| [hit.contract, hit.file, hit.defn, hit.line] } Report.new(guard, dispatch) end - attr_reader :guard_hits, :dispatch_hits + def self.eliminable_guard?(call) + GUARD_MIDS.include?(call.message.to_s) || call.safe_navigation + end - def initialize(file, lines) - @file = file - @lines = lines - @guard_hits = [] - @dispatch_hits = [] + def self.essential_dispatch?(call) + call.message.to_s.end_with?("?") end - def walk(node, defstack, asgmap) - return unless Ast.node?(node) + def self.hit(contract, call) + Hit.new( + contract: contract, + file: call.file, + defn: call.function, + line: call.line, + span: call.span + ) + end - if %i[DEFN DEFS].include?(node.type) - name = node.children[node.type == :DEFS ? 1 : 0].to_s - defstack = defstack + [name] - asgmap = build_asgmap(node) - end + def self.rescue_nil_hits(document, assignment_maps) + document.local_methods.flat_map do |method| + asgmap = assignment_maps.fetch(method.name, {}) + method.statements.filter_map do |statement| + next unless statement.source.match?(/\brescue\s+nil\b/) - record_decision(node, defstack, asgmap) - record_rescue_nil(node, defstack, asgmap) - node.children.each { |c| walk(c, defstack, asgmap) } - end + call = document.call_sites.find do |candidate| + candidate.function == method.name && inside_span?(candidate.span, statement.span) + end + next unless call + + contract = contract_of(call_expression(call), asgmap) + next unless contract - private - - # name => rhs-source-node, for `name = ` LASGNs in - # this method (intra-procedural only). First simple assignment wins. - def build_asgmap(defn_node) - map = {} - stack = Ast.body_stmts(defn_node).dup - until stack.empty? - n = stack.pop - next unless Ast.node?(n) - - if n.type == :LASGN - nm = n.children[0].to_s - src = n.children[1] - map[nm] ||= src if !map.key?(nm) && simple_source?(src) + Hit.new( + contract: contract, + file: method.file, + defn: method.name, + line: statement.line, + span: statement.span + ) end - n.children.each { |c| stack << c } end - map end - def simple_source?(n) - return false unless Ast.node?(n) - - case n.type - when :IVAR then true - when :CALL, :QCALL - recv, mid, args = n.children - recv && (args.nil? || mid == :[]) - else false - end + def self.build_assignment_map(document, method) + document.local_contract_assignments(method).transform_values do |source| + contract_of(source, {}) + end.compact end - def hit(contract, defstack, node) - Hit.new(contract: contract, file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end + def self.contract_of(receiver, assignment_map, depth = 0) + source = receiver.to_s.strip + return nil if source.empty? || depth >= 8 + + mapped = assignment_map[source] + return mapped if mapped - # At most ONE record per node. ELIMINABLE guard takes precedence - # over ESSENTIAL dispatch (a `?` that is also a GUARD_MID, or a - # safe-nav, is the eliminable kind). - def record_decision(node, defstack, asgmap) - return unless %i[CALL QCALL].include?(node.type) - - recv, mid, _args = node.children - return unless recv - - guard = - (node.type == :CALL && GUARD_MIDS.include?(mid)) || - node.type == :QCALL # safe-nav = implicit nil decision on recv - if guard - c = contract_of(recv, asgmap) - @guard_hits << hit(c, defstack, node) if c - return + return source if source.start_with?("@") + + if (match = source.match(/\A(?:[A-Za-z_]\w*|self)\s*\[(.+)\]\z/)) + return "[#{match[1].strip}]" end - # essential dispatch: a domain `?` query over a contract. NOT a - # GUARD_MID (those are eliminable, handled above). Legitimate - # polymorphism -- counted separately, never as pressure. - return unless node.type == :CALL && mid.to_s.end_with?("?") + return "~local" if source.match?(/\A[A-Za-z_]\w*\z/) + + if source.include?(".") + member = source.split(".").last.to_s + member = member.sub(/\(.*\)\z/, "") + return nil if TRANSIENT_NOARG_MIDS.include?(member) - c = contract_of(recv, asgmap) - @dispatch_hits << hit(c, defstack, node) if c + return ".#{member}" unless member.empty? + end + + nil end - # `x.accessor rescue nil` -- a defensive nil-swallow that exists - # only because the receiver is loosely typed. Eliminable guard - # (the exact idiom typed contracts remove). Conservative: bare - # `rescue nil` wrapping a single contract-resolvable call. - def record_rescue_nil(node, defstack, asgmap) - return unless node.type == :RESCUE - - body, resb, = node.children - return unless Ast.node?(resb) && resb.type == :RESBODY - return unless resb.children[0].nil? # bare rescue (no class list) - - handler = resb.children[1] - nil_handler = handler.nil? || - (Ast.node?(handler) && handler.type == :NIL) - return unless nil_handler - return unless Ast.node?(body) && %i[CALL QCALL].include?(body.type) - - c = contract_of(body, asgmap) - @guard_hits << hit(c, defstack, node) if c + def self.call_expression(call) + [call.receiver, call.message].map(&:to_s).reject(&:empty?).join(".") end - # Canonical root contract of a subject node, resolving locals - # through the intra-method assignment map. - def contract_of(n, asgmap, depth = 0) - return nil unless Ast.node?(n) && depth < 8 - - case n.type - when :LVAR, :DVAR - nm = n.children[0].to_s - src = asgmap[nm] - src ? contract_of(src, asgmap, depth + 1) : "~local" - when :IVAR - n.children[0].to_s # already includes the leading @ - when :CALL, :QCALL - recv, mid, args = n.children - if mid == :[] - key = args && Ast.node?(args) ? args.children.compact.first : nil - kt = (Ast.node?(key) ? Ast.slice(key, @lines) : key.inspect) - "[#{kt}]" - elsif args.nil? && recv && !TRANSIENT_NOARG_MIDS.include?(mid) - ".#{mid}" # no-arg accessor: the contract - end - when :VCALL - ".#{n.children[0]}" - end + def self.inside_span?(inner, outer) + return false unless inner && outer + + starts_after_or_at = (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]) + ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]) + starts_after_or_at && ends_before_or_at end class Report @@ -193,19 +136,6 @@ def initialize(guard_hits, dispatch_hits) @dispatch = dispatch_hits end - # Rows are keyed/driven by ELIMINABLE guards (the actionable - # slop). A contract with only ESSENTIAL dispatch and zero - # eliminable guards produces NO row -- legitimate polymorphism is - # not pressure and must not be surfaced as actionable. - # - # `decisions` == eliminable guard count (the headline number, - # back-compat). `essential` == count of essential dispatches on - # the SAME contract (context only; NEVER summed into decisions, - # and deliberately NOT added to sites/spans so downstream - # consumers see the eliminable signal unchanged). - # - # [{ contract:, decisions:, essential:, methods:, sites:[...], - # spans:{} }] ; ranked by eliminable decisions; "~local" last. def ranked ess = Hash.new(0) @dispatch.each { |h| ess[h.contract] += 1 } diff --git a/gems/decomplex/lib/decomplex/derived_state.rb b/gems/decomplex/lib/decomplex/derived_state.rb index 2ac1b2e40..b6c202432 100644 --- a/gems/decomplex/lib/decomplex/derived_state.rb +++ b/gems/decomplex/lib/decomplex/derived_state.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "local_flow" module Decomplex # Derived-state def-use staleness (intra-procedural, the design @@ -8,91 +8,37 @@ module Decomplex # # Plague: redundant state that drifts. `b = f(a)` makes b a derived # copy of a. If a is then reassigned later in the same method but b - # is NOT recomputed, every later use of b is stale -- the exact - # "field copied from elsewhere then used for similar decisions" bug. + # is NOT recomputed, every later use of b is stale. class DerivedState - Asgn = Struct.new(:name, :deps, :line, :span, keyword_init: true) + Asgn = Struct.new(:name, :deps, :line, :span, :statement_index, keyword_init: true) def self.scan(files) - out = [] - files.each do |f| - root, lines = Ast.parse(f) - new(f, lines).each_method(root) do |defn, stmts| - out.concat(analyze(f, defn, stmts)) - end - end - out.sort_by { |h| -h[:gap] } - end - - def initialize(file, lines) - @file = file - @lines = lines + LocalFlow.scan(files).flat_map do |method| + analyze(method.file, method.name, assignments(method)) + end.sort_by { |h| -h[:gap] } end - def each_method(node, defstack = [], &blk) - return unless Ast.node?(node) - - if %i[DEFN DEFS].include?(node.type) - name = node.children[node.type == :DEFS ? 1 : 0].to_s - yield name, Ast.body_stmts(node) - end - node.children.each { |c| each_method(c, defstack, &blk) } - end - - # RHS constructs whose nested LASGNs are BRANCH-LOCAL initialization - # of the binding being assigned -- not later method-scope sequential - # reassignments. Recursing into them is the dominant DSS false - # positive (`x = if c; y = ...; use y; end` flattens `y` into the - # ordered list, so `analyze` mis-reads it as "y reassigned after x"). - BRANCH_RHS = %i[IF CASE CASE2 CASE3 AND OR WHILE UNTIL - RESCUE ENSURE].freeze - - # Flatten statements (incl. inside simple blocks) to ordered LASGNs. - # - # Fail-safe scoping: when an LASGN's VALUE child is a branch - # construct, record the LASGN itself but DO NOT descend into the - # conditional RHS. A genuine method-scope reassignment is always a - # top-level statement (an LASGN whose parent is the method body, not - # the value child of another LASGN), so it still enters the list -> - # the real `b = f(a); a = ...; use b` desync is still caught (no - # false negative). Non-branch values still recurse (`a = b = c`). - def self.lasgns(stmts) - acc = [] - walk = lambda do |n| - return unless Ast.node?(n) - - if n.type == :LASGN - acc << n - val = n.children[1] - if Ast.node?(val) && BRANCH_RHS.include?(val.type) - # branch-local RHS: do not flatten its inner assignments - else - n.children.each { |c| walk.call(c) } - end - else - n.children.each { |c| walk.call(c) } + def self.assignments(method) + method.statements.flat_map do |statement| + statement.writes.map do |name| + Asgn.new( + name: name, + deps: dependencies_for(statement, name), + line: statement.line, + span: statement.span, + statement_index: statement.index + ) end end - stmts.each { |s| walk.call(s) } - acc end - def self.lvars(node, acc = []) - return acc unless Ast.node?(node) - - acc << node.children[0].to_s if node.type == :LVAR - node.children.each { |c| lvars(c, acc) } - acc + def self.dependencies_for(statement, name) + statement.dependencies.filter_map do |left, right| + right.to_s if left.to_s == name.to_s + end.uniq end - def self.analyze(file, defn, stmts) - asgns = lasgns(stmts).map do |n| - Asgn.new(name: n.children[0].to_s, - deps: lvars(n.children[1]).uniq, - line: n.first_lineno, - span: [n.first_lineno, n.first_column, - n.last_lineno, n.last_column]) - end + def self.analyze(file, defn, asgns) out = [] asgns.each_with_index do |b, i| next if b.deps.empty? @@ -100,13 +46,13 @@ def self.analyze(file, defn, stmts) b.deps.each do |a| next if a == b.name - # a reassigned strictly after b's definition? - reasn = asgns[(i + 1)..].find { |x| x.name == a } + reasn = asgns[(i + 1)..].find do |x| + x.name == a && x.statement_index > b.statement_index + end next unless reasn - # b recomputed at or after a's reassignment? recomputed = asgns[(i + 1)..].any? do |x| - x.name == b.name && x.line >= reasn.line + x.name == b.name && x.statement_index >= reasn.statement_index end next if recomputed diff --git a/gems/decomplex/lib/decomplex/detector_runner.rb b/gems/decomplex/lib/decomplex/detector_runner.rb new file mode 100644 index 000000000..83ccd218f --- /dev/null +++ b/gems/decomplex/lib/decomplex/detector_runner.rb @@ -0,0 +1,727 @@ +# frozen_string_literal: true + +require "json" +require "ostruct" +require_relative "co_update" +require_relative "flay_similarity" +require_relative "local_flow" +require_relative "structural_topology" +require_relative "native/co_update" +require_relative "native/decision_pressure" +require_relative "native/predicate_aliases" +require_relative "native/flay_similarity" +require_relative "native/miner" +require_relative "native/semantic_aliases" +require_relative "native/local_flow" +require_relative "native/structural_topology" +require_relative "miner" +require_relative "decision_pressure" +require_relative "predicate_alias" +require_relative "semantic_alias" +require_relative "state_mesh" +require_relative "state_branch_density" +require_relative "temporal_ordering_pressure" +require_relative "redundant_nil_guard" +require_relative "inconsistent_rename_clone" +require_relative "derived_state" +require_relative "ordered_protocol_mine" +require_relative "weighted_inlined_cognitive_complexity" +require_relative "locality_drag" +require_relative "operational_discontinuity" +require_relative "oversized_predicate" +require_relative "path_condition" +require_relative "sequence_mine" +require_relative "function_lcom" +require_relative "false_simplicity" +require_relative "fat_union" + +module Decomplex + # Runs one detector in isolation and emits deterministic machine output. + # + # This is intentionally narrower than Report: it gives parser/runtime + # migration work an apples-to-apples target that excludes report wording, + # timing, SARIF metadata, and other nondeterministic details. + module DetectorRunner + DETECTORS = { + "co-update" => :co_update, + "decision-pressure" => :decision_pressure, + "predicate-alias" => :predicate_alias, + "predicate-aliases" => :predicate_alias, + "miner" => :miner, + "decision-miner" => :miner, + "missing-abstractions" => :miner, + "neglected-conditions" => :miner, + "semantic-alias" => :semantic_alias, + "semantic-aliases" => :semantic_alias, + "semantic-predicate-aliases" => :semantic_alias, + "reification-misses" => :semantic_alias, + "flay-similarity" => :flay_similarity, + "structural-similarity" => :flay_similarity, + "temporal-ordering-pressure" => :temporal_ordering_pressure, + "state-branch-density" => :state_branch_density, + "redundant-nil-guard" => :redundant_nil_guard, + "state-mesh" => :state_mesh, + "state-heatmap" => :state_mesh, + "inconsistent-rename-clone" => :inconsistent_rename_clone, + "derived-state" => :derived_state, + "implicit-control-flow" => :implicit_control_flow, + "weighted-inlined-complexity" => :weighted_inlined_complexity, + "locality-drag" => :locality_drag, + "operational-discontinuity" => :operational_discontinuity, + "oversized-predicate" => :oversized_predicate, + "path-condition" => :path_condition, + "broken-protocol" => :sequence_mine, + "sequence-mine" => :sequence_mine, + "function-lcom" => :function_lcom, + "false-simplicity" => :false_simplicity, + "fat-union" => :fat_union, + "local-flow" => :local_flow, + "structural-topology" => :structural_topology + }.freeze + ENGINES = %w[ruby rust].freeze + + module_function + + def run(detector, files, engine: "ruby", mass: FlaySimilarity::DEFAULT_MASS, fuzzy: FlaySimilarity::DEFAULT_FUZZY, jobs: nil) + canonical = canonical_detector(detector) + validate_engine!(engine) + + case canonical + when :co_update + co_update(files, engine: engine, jobs: jobs) + when :decision_pressure + decision_pressure(files, engine: engine, jobs: jobs) + when :predicate_alias + predicate_alias(files, engine: engine, jobs: jobs) + when :miner + miner(files, engine: engine, jobs: jobs) + when :semantic_alias + semantic_alias(files, engine: engine, jobs: jobs) + when :flay_similarity + flay_similarity(files, engine: engine, mass: mass, fuzzy: fuzzy, jobs: jobs) + when :temporal_ordering_pressure + temporal_ordering_pressure(files, engine: engine, jobs: jobs) + when :state_branch_density + state_branch_density(files, engine: engine, jobs: jobs) + when :redundant_nil_guard + redundant_nil_guard(files, engine: engine, jobs: jobs) + when :state_mesh + state_mesh(files, engine: engine, jobs: jobs) + when :inconsistent_rename_clone + inconsistent_rename_clone(files, engine: engine, jobs: jobs) + when :derived_state + derived_state(files, engine: engine, jobs: jobs) + when :implicit_control_flow + implicit_control_flow(files, engine: engine, jobs: jobs) + when :weighted_inlined_complexity + weighted_inlined_complexity(files, engine: engine, jobs: jobs) + when :locality_drag + locality_drag(files, engine: engine, jobs: jobs) + when :operational_discontinuity + operational_discontinuity(files, engine: engine, jobs: jobs) + when :oversized_predicate + oversized_predicate(files, engine: engine, jobs: jobs) + when :path_condition + path_condition(files, engine: engine, jobs: jobs) + when :sequence_mine + sequence_mine(files, engine: engine, jobs: jobs) + when :function_lcom + function_lcom(files, engine: engine, jobs: jobs) + when :false_simplicity + false_simplicity(files, engine: engine, jobs: jobs) + when :fat_union + fat_union(files, engine: engine, jobs: jobs) + when :local_flow + local_flow(files, engine: engine, jobs: jobs) + when :structural_topology + structural_topology(files, engine: engine, jobs: jobs) + else + raise ArgumentError, "unsupported decomplex detector: #{detector}" + end + end + + def canonical_json(detector, files, engine: "ruby", **options) + JSON.generate(canonicalize(run(detector, files, engine: engine, **options))) << "\n" + end + + def run_fact_fixture(path, engine: "ruby") + fixture = JSON.parse(File.read(path.to_s)) + detector = fixture.fetch("detector") + + case engine.to_s + when "ruby" + documents = fact_documents(fixture.fetch("input").fetch("documents")) + options = symbolize_options(fixture.fetch("options", {})) + with_fact_documents(documents) do + run(detector, documents.map(&:file), engine: "ruby", **options) + end + when "rust" + JSON.parse(Native::Command.run("detector-facts", "--input", path.to_s)) + else + raise ArgumentError, "unsupported decomplex detector engine: #{engine}" + end + end + + def canonical_json_from_fact_fixture(path, engine: "ruby") + JSON.generate(canonicalize(run_fact_fixture(path, engine: engine))) << "\n" + end + + def compare(detector, files, **options) + ruby_json = canonical_json(detector, files, engine: "ruby", **options) + rust_json = canonical_json(detector, files, engine: "rust", **options) + [ruby_json == rust_json, ruby_json, rust_json] + end + + def compare_fact_fixture(path) + ruby_json = canonical_json_from_fact_fixture(path, engine: "ruby") + rust_json = canonical_json_from_fact_fixture(path, engine: "rust") + [ruby_json == rust_json, ruby_json, rust_json] + end + + def detector_names + DETECTORS.keys + end + + private_class_method def self.canonical_detector(detector) + DETECTORS.fetch(detector.to_s) do + raise ArgumentError, "unsupported decomplex detector: #{detector}" + end + end + + private_class_method def self.validate_engine!(engine) + return if ENGINES.include?(engine.to_s) + + raise ArgumentError, "unsupported decomplex detector engine: #{engine}" + end + + private_class_method def self.symbolize_options(options) + options.each_with_object({}) { |(key, value), out| out[key.to_sym] = value } + end + + private_class_method def self.fact_documents(rows) + Array(rows).map { |row| FactDocument.new(row) } + end + + private_class_method def self.with_fact_documents(documents) + by_file = documents.to_h { |document| [document.file.to_s, document] } + original_parse = Syntax.method(:parse) + Syntax.define_singleton_method(:parse) do |file, **kwargs| + by_file.fetch(file.to_s) { original_parse.call(file, **kwargs) } + end + yield + ensure + Syntax.define_singleton_method(:parse, original_parse) + end + + class FactDocument + attr_reader :file, :language, :source, :lines + + FACT_ARRAYS = %w[ + branch_arms branch_decisions call_sites comparison_sites decision_sites + dispatch_sites function_defs local_methods owner_defs path_condition_sites + predicate_aliases predicate_defs semantic_effect_sites state_declarations + state_param_origins state_reads state_writes + ].freeze + + def initialize(row) + @row = row + @file = row.fetch("file") + @language = row.fetch("language", "ruby").to_sym + @source = row.fetch("source", "") + @lines = row.fetch("lines", @source.lines) + @root = objectify(row.fetch("root", empty_fact_node("program"))) + @normalized_root = objectify(row.fetch("normalized_root", { + "type" => "ROOT", + "children" => [], + "first_lineno" => 1, + "first_column" => 0, + "last_lineno" => 1, + "last_column" => 0, + "text" => "" + })) + @immutable_struct_readers = object_hash(row.fetch("immutable_struct_readers", {})) + @immutable_struct_reader_types = object_hash(row.fetch("immutable_struct_reader_types", {})) + @type_aliases = object_hash(row.fetch("type_aliases", {})) + @local_complexity_scores = row.fetch("local_complexity_scores", {}).to_h do |id, score| + [id.to_s, symbolized_value(score)] + end + @local_contract_assignments = row.fetch("local_contract_assignments", {}) + + FACT_ARRAYS.each do |name| + instance_variable_set("@#{name}", fact_array(row.fetch(name, []))) + end + end + + FACT_ARRAYS.each do |name| + define_method(name) { instance_variable_get("@#{name}") } + end + + attr_reader :root, :normalized_root + + def clone_candidates + Syntax.language_profile(language).clone_candidates(self) + end + + def local_methods + return @local_methods if @row.key?("local_methods") + + Syntax.language_profile(language).local_methods(self) + end + + def path_condition_sites + return @path_condition_sites if @row.key?("path_condition_sites") + + Syntax.language_profile(language).path_condition_sites(self) + end + + def branch_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) + @branch_decisions + end + + def immutable_struct_readers + @immutable_struct_readers + end + + def immutable_struct_reader_types + @immutable_struct_reader_types + end + + def type_aliases + @type_aliases + end + + def local_complexity_scores + @local_complexity_scores + end + + def local_contract_assignments(method) + @local_contract_assignments.fetch(method.name.to_s, {}) + end + + def redundant_nil_guard_findings + Syntax::NilGuardAnalyzer.new(self).scan + end + + private + + def fact_array(value) + Array(value).map { |item| objectify(item) } + end + + def empty_fact_node(kind) + { + "kind" => kind, + "text" => "", + "span" => [1, 0, 1, 0], + "named" => true, + "field_name" => nil, + "children" => [] + } + end + + def object_hash(value) + value.to_h { |key, child| [key.to_s, child] } + end + + def objectify(value) + case value + when Hash + if value.key?("kind") && value.key?("span") && value.key?("children") + return FactNode.new(value, method(:objectify_field)) + end + + OpenStruct.new(value.to_h { |key, child| [key.to_s, objectify_field(key.to_s, child)] }) + when Array + value.map { |child| objectify(child) } + else + value + end + end + + def objectify_field(key, value) + if key == "control" && %w[conditional iterates].include?(value.to_s) + return value.to_sym + end + if key == "visibility" && %w[public protected private].include?(value.to_s) + return value.to_sym + end + + objectify(value) + end + + def symbolized_value(value) + case value + when Hash + value.to_h { |key, child| [key.to_sym, symbolized_value(child)] } + when Array + value.map { |child| symbolized_value(child) } + else + value + end + end + end + + class FactPoint + attr_reader :row, :column + + def initialize(row, column) + @row = row + @column = column + end + end + + class FactNode + attr_reader :kind, :text, :span, :field_name, :children, :start_point, :end_point + attr_reader :start_byte, :end_byte + attr_accessor :parent, :prev_sibling, :next_sibling + + def initialize(row, objectifier) + @kind = row.fetch("kind") + @text = row.fetch("text", "") + @span = row.fetch("span") + @field_name = row["field_name"] + @named = row.fetch("named", true) + @start_byte = row.fetch("start_byte", byte_offset(@span[0], @span[1])) + @end_byte = row.fetch("end_byte", byte_offset(@span[2], @span[3])) + @children = Array(row.fetch("children", [])).map { |child| objectifier.call("node", child) } + @children.each { |child| child.parent = self if child.respond_to?(:parent=) } + @children.each_cons(2) do |left, right| + left.next_sibling = right if left.respond_to?(:next_sibling=) + right.prev_sibling = left if right.respond_to?(:prev_sibling=) + end + @start_point = FactPoint.new(@span[0].to_i - 1, @span[1].to_i) + @end_point = FactPoint.new(@span[2].to_i - 1, @span[3].to_i) + end + + def named? + @named + end + + def child_count + @children.length + end + + def named_children + @children.select { |child| child.respond_to?(:named?) && child.named? } + end + + def named_child_count + named_children.length + end + + def child_by_field_name(name) + @children.find { |child| child.respond_to?(:field_name) && child.field_name.to_s == name.to_s } + end + + private + + def byte_offset(line, column) + ((line.to_i - 1) * 1_000_000) + column.to_i + end + end + + private_class_method def self.co_update(files, engine:, jobs:) + return Native::CoUpdate.scan(files, jobs: jobs) if engine.to_s == "rust" + + report = CoUpdate.scan(files) + + { + "co_written_pairs" => report.co_written_pairs, + "neglected_updates" => report.neglected_updates + } + end + + private_class_method def self.decision_pressure(files, engine:, jobs:) + return Native::DecisionPressure.scan(files, jobs: jobs) if engine.to_s == "rust" + + DecisionPressure.scan(files).ranked + end + + private_class_method def self.predicate_alias(files, engine:, jobs:) + return Native::PredicateAliases.scan(files, jobs: jobs) if engine.to_s == "rust" + + report = PredicateAlias.scan(files) + + { "alias_clusters" => report.alias_clusters } + end + + private_class_method def self.miner(files, engine:, jobs:) + return Native::Miner.scan(files, jobs: jobs) if engine.to_s == "rust" + + report = Miner.scan(files) + + { + "missing_abstractions" => report.missing_abstractions, + "neglected_conditions" => report.neglected_conditions + } + end + + private_class_method def self.semantic_alias(files, engine:, jobs:) + return Native::SemanticAliases.scan(files, jobs: jobs) if engine.to_s == "rust" + + report = SemanticAlias.scan(files) + + { + "alias_clusters" => report.alias_clusters, + "reification_misses" => report.reification_misses + } + end + + private_class_method def self.flay_similarity(files, engine:, mass:, fuzzy:, jobs:) + findings = + if engine.to_s == "rust" + Native::FlaySimilarity.scan(files, mass: mass, fuzzy: fuzzy, jobs: jobs) + else + FlaySimilarity.scan(files, mass: mass, fuzzy: fuzzy) + end + + { "findings" => findings } + end + + private_class_method def self.temporal_ordering_pressure(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/temporal_ordering_pressure" + return Native::TemporalOrderingPressure.scan(files, jobs: jobs) + end + + TemporalOrderingPressure.scan(files) + end + + private_class_method def self.state_branch_density(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/state_branch_density" + return Native::StateBranchDensity.scan(files, jobs: jobs) + end + + StateBranchDensity.scan(files).findings + end + + private_class_method def self.redundant_nil_guard(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/redundant_nil_guard" + return Native::RedundantNilGuard.scan(files, jobs: jobs) + end + + RedundantNilGuard.scan(files) + end + + private_class_method def self.state_mesh(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/state_mesh" + return Native::StateMesh.scan(files, jobs: jobs) + end + + StateMesh.scan(files).tap(&:run).to_json_graph + end + + private_class_method def self.inconsistent_rename_clone(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/inconsistent_rename_clone" + return Native::InconsistentRenameClone.scan(files, jobs: jobs) + end + + InconsistentRenameClone.scan(files) + end + + private_class_method def self.derived_state(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/derived_state" + return Native::DerivedState.scan(files, jobs: jobs) + end + + DerivedState.scan(files) + end + + private_class_method def self.implicit_control_flow(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/implicit_control_flow" + return Native::ImplicitControlFlow.scan(files, jobs: jobs) + end + + report = ImplicitControlFlow.scan(files) + { + "ordered_protocols" => report.ordered_protocols, + "order_drift" => report.drift + } + end + + private_class_method def self.weighted_inlined_complexity(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/weighted_inlined_complexity" + return Native::WeightedInlinedComplexity.scan(files, jobs: jobs) + end + + WeightedInlinedCognitiveComplexity.scan(files) + end + + private_class_method def self.locality_drag(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/locality_drag" + return Native::LocalityDrag.scan(files, jobs: jobs) + end + + LocalityDrag.scan(files) + end + + private_class_method def self.operational_discontinuity(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/operational_discontinuity" + return Native::OperationalDiscontinuity.scan(files, jobs: jobs) + end + + OperationalDiscontinuity.scan(files) + end + + private_class_method def self.oversized_predicate(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/oversized_predicate" + return Native::OversizedPredicate.scan(files, jobs: jobs) + end + + { "findings" => OversizedPredicate.scan(files).findings } + end + + private_class_method def self.path_condition(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/path_condition" + return Native::PathCondition.scan(files, jobs: jobs) + end + + report = PathCondition.scan(files) + { "neglected" => report.neglected } + end + + private_class_method def self.sequence_mine(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/sequence_mine" + return Native::SequenceMine.scan(files, jobs: jobs) + end + + report = SequenceMine.scan(files) + { "broken" => report.broken_protocol } + end + + private_class_method def self.function_lcom(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/function_lcom" + return Native::FunctionLcom.scan(files, jobs: jobs) + end + + FunctionLCOM.scan(files) + end + + private_class_method def self.false_simplicity(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/false_simplicity" + return Native::FalseSimplicity.scan(files, jobs: jobs) + end + + FalseSimplicity.scan(files).findings + end + + private_class_method def self.fat_union(files, engine:, jobs:) + if engine.to_s == "rust" + require_relative "native/fat_union" + return Native::FatUnion.scan(files, jobs: jobs) + end + + { "fat_unions" => FatUnion.scan(files).fat_unions } + end + + private_class_method def self.local_flow(files, engine:, jobs:) + return Native::LocalFlow.scan(files, jobs: jobs) if engine.to_s == "rust" + + LocalFlow.scan(files).map { |summary| local_flow_summary(summary) } + end + + private_class_method def self.structural_topology(files, engine:, jobs:) + return Native::StructuralTopology.scan(files, jobs: jobs) if engine.to_s == "rust" + + graph = StructuralTopology.scan(files) + { + "methods" => graph.methods.map { |method| structural_method(method) }, + "edges" => graph.edges.map { |edge| structural_edge(edge) } + } + end + + private_class_method def self.local_flow_summary(summary) + { + "id" => summary.id, + "owner" => summary.owner, + "name" => summary.name, + "file" => summary.file, + "line" => summary.line, + "span" => summary.span, + "statements" => summary.statements.map { |statement| local_flow_statement(statement) }, + "boundaries" => summary.boundaries.map { |boundary| local_flow_boundary(boundary) } + } + end + + private_class_method def self.local_flow_statement(statement) + { + "index" => statement.index, + "line" => statement.line, + "end_line" => statement.end_line, + "span" => statement.span, + "source" => statement.source, + "reads" => statement.reads.to_a.sort, + "writes" => statement.writes.to_a.sort, + "dependencies" => statement.dependencies.map { |edge| Array(edge).map(&:to_s) }.sort, + "co_uses" => statement.co_uses.map { |edge| Array(edge).map(&:to_s).sort }.sort + } + end + + private_class_method def self.local_flow_boundary(boundary) + { + "before_index" => boundary.before_index, + "after_index" => boundary.after_index, + "line" => boundary.line, + "kind" => boundary.kind.to_s, + "text" => boundary.text + } + end + + private_class_method def self.structural_method(method) + { + "id" => method.id, + "owner" => method.owner, + "name" => method.name, + "file" => method.file, + "line" => method.line, + "span" => method.span, + "visibility" => method.visibility.to_s + } + end + + private_class_method def self.structural_edge(edge) + { + "caller" => edge.caller, + "callee" => edge.callee, + "caller_name" => edge.caller_name, + "callee_name" => edge.callee_name, + "file" => edge.file, + "line" => edge.line, + "span" => edge.span, + "type" => edge.type.to_s, + "kind" => edge.kind.to_s, + "confidence" => edge.confidence.to_s + } + end + + private_class_method def self.canonicalize(value) + case value + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + original = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key } + out[key] = canonicalize(value.fetch(original)) + end + when Array + value.map { |item| canonicalize(item) } + when Symbol + value.to_s + else + value + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/false_simplicity.rb b/gems/decomplex/lib/decomplex/false_simplicity.rb index ea2ab7059..fd5505547 100644 --- a/gems/decomplex/lib/decomplex/false_simplicity.rb +++ b/gems/decomplex/lib/decomplex/false_simplicity.rb @@ -1,381 +1,74 @@ # frozen_string_literal: true -require_relative "ast" +require "set" require_relative "syntax" module Decomplex - # False simplicity: code whose local syntax understates its non-local - # behaviour -- hidden dynamic dispatch, hidden mutation, hidden - # global/context dependency, hidden IO/effects, callback/control - # inversion, runtime reflection, monkeypatch/reopen. Seven - # sub-detectors, one category, ranked support x scatter (same - # blast-radius thesis as Missing Abstractions: one trigger reinvented - # across N methods is one missing abstraction). + # False simplicity: code whose local syntax understates non-local behavior. # - # #8 (protocol-pair names: open/close, lock/unlock) is NOT here -- it - # is already Broken Protocols (SequenceMine, Engler co-call mining). - # - # Pure normalized syntax-tree matching. No dataflow, no CFG, no points-to. - # Language lexicons are provider data: Ruby's was mined from - # RuboCop/Reek/stdlib, while other languages use their own effectful - # runtime surfaces instead of inheriting Ruby's. - # See docs/agents/false-simplicity.md. + # The detector does not mine language grammar directly. Production scanning + # consumes Syntax::Document semantic effect sites and owner/function facts; + # language adapters own language-specific effect lexicons and syntax quirks. class FalseSimplicity Hit = Struct.new(:kind, :detail, :file, :defn, :line, :span, keyword_init: true) ClassRec = Struct.new(:name, :file, :line, :core, :span, keyword_init: true) - Lexicon = Struct.new( - :dispatch_mids, :meta_mids, :method_obj_mids, :io_consts, - :io_bare, :dir_context, :context_pairs, :context_bare, - :callback_set, :core_consts, - keyword_init: true - ) - - EMPTY_PAIRS = {}.freeze - GENERIC_LEXICON = Lexicon.new( - dispatch_mids: %w[eval reflect Reflect Proxy getattr setattr].freeze, - meta_mids: %w[eval exec].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[Console console fs process subprocess socket Deno Bun].freeze, - io_bare: %w[print println open system exec spawn sleep].freeze, - dir_context: [].freeze, - context_pairs: EMPTY_PAIRS, - context_bare: %w[rand random].freeze, - callback_set: %w[transaction synchronize lock with_lock unlock mutex atomic subscribe callback hook].freeze, - core_consts: [].freeze - ).freeze - RUBY_LEXICON = Lexicon.new( - dispatch_mids: %w[send __send__ public_send const_get constantize - instance_variable_get].freeze, - meta_mids: %w[define_method define_singleton_method alias_method - class_eval module_eval instance_eval class_exec - module_exec instance_exec eval const_set - instance_variable_set remove_method undef_method - prepend singleton_class binding].freeze, - method_obj_mids: %i[method public_method instance_method].freeze, - io_consts: %w[File IO Dir FileUtils Open3 Socket TCPSocket UDPSocket - TCPServer UNIXSocket Tempfile Pathname Marshal].freeze, - io_bare: %w[puts print warn gets readline readlines system - exec spawn fork sleep open abort exit exit!].freeze, - dir_context: %w[pwd getwd home].freeze, - context_pairs: { - "Time" => %w[now current], "Date" => %w[today current], - "DateTime" => %w[now current], "Process" => %w[pid ppid uid gid euid], - "Thread" => %w[current list main], "Fiber" => %w[current], - "Random" => %w[rand bytes], "GC" => %w[stat count], - "ObjectSpace" => %w[each_object count_objects] - }.freeze, - context_bare: %w[rand srand].freeze, - callback_set: %w[transaction synchronize lock with_lock unlock - mutex atomic reentrant subscribe callback hook].freeze, - core_consts: %w[String Symbol Integer Float Numeric Rational Complex - Array Hash Set Range Struct Object BasicObject Kernel - Module Class Comparable Enumerable Enumerator Proc Method - UnboundMethod NilClass TrueClass FalseClass Exception - StandardError RuntimeError ArgumentError TypeError - NameError NoMethodError IO File Dir Time Date DateTime - Regexp MatchData Thread Mutex Fiber Process Math GC - ObjectSpace Marshal Random Encoding].freeze - ).freeze - PYTHON_LEXICON = Lexicon.new( - dispatch_mids: %w[getattr setattr hasattr __getattr__ __setattr__ import_module].freeze, - meta_mids: %w[eval exec compile type globals locals vars setattr delattr].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[Path pathlib os sys subprocess socket shutil].freeze, - io_bare: %w[print input open exec eval].freeze, - dir_context: %w[getcwd home].freeze, - context_pairs: { - "time" => %w[time monotonic perf_counter], - "datetime" => %w[now today utcnow], - "random" => %w[random randint randrange choice] - }.freeze, - context_bare: %w[random randint randrange].freeze, - callback_set: GENERIC_LEXICON.callback_set, - core_consts: [].freeze - ).freeze - JS_LEXICON = Lexicon.new( - dispatch_mids: %w[eval Function call apply bind].freeze, - meta_mids: %w[eval Function defineProperty defineProperties setPrototypeOf].freeze, - method_obj_mids: %i[method].freeze, - io_consts: %w[console Console fs process Deno Bun].freeze, - io_bare: %w[setTimeout setInterval fetch require import].freeze, - dir_context: [].freeze, - context_pairs: { - "Date" => %w[now], - "Math" => %w[random], - "performance" => %w[now] - }.freeze, - context_bare: [].freeze, - callback_set: GENERIC_LEXICON.callback_set, - core_consts: [].freeze - ).freeze - LANGUAGE_LEXICONS = { - ruby: RUBY_LEXICON, - python: PYTHON_LEXICON, - javascript: JS_LEXICON, - typescript: JS_LEXICON - }.freeze - - # Compatibility aliases for tests and downstream code that inspect - # detector constants directly. - DISPATCH_MIDS = RUBY_LEXICON.dispatch_mids - META_MIDS = RUBY_LEXICON.meta_mids - METHOD_OBJ_MIDS = RUBY_LEXICON.method_obj_mids - IO_CONSTS = RUBY_LEXICON.io_consts - # bare `p`/`pp` deliberately excluded: single/double-letter, too - # often a local-var bareword (VCALL) to flag as Kernel#p. - IO_BARE = RUBY_LEXICON.io_bare - DIR_CONTEXT = RUBY_LEXICON.dir_context - CONTEXT_PAIRS = RUBY_LEXICON.context_pairs - CONTEXT_BARE = RUBY_LEXICON.context_bare - CALLBACK_SET = RUBY_LEXICON.callback_set - CORE = RUBY_LEXICON.core_consts def self.scan(files) hits = [] recs = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines, language: Syntax.language_for(f)) - e.walk(root, [], []) - hits.concat(e.hits) - recs.concat(e.classrecs) + files.each do |file| + document = Syntax.parse(file, parser: "tree_sitter") + hits.concat(hits_for_document(document)) + doc_recs, doc_hits = class_records_for_document(document) + recs.concat(doc_recs) + hits.concat(doc_hits) end Report.new(hits, recs) end - attr_reader :hits, :classrecs - - def initialize(file, lines, language: :ruby, lexicon: nil) - @file = file - @lines = lines - @language = language.to_sym - @lexicon = lexicon || self.class.lexicon_for(@language) - @hits = [] - @classrecs = [] - end - - def self.lexicon_for(language) - LANGUAGE_LEXICONS.fetch(language.to_sym, GENERIC_LEXICON) - end - - def walk(node, defs, cls) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - return walk_class(node, defs, cls) - when :SCLASS - return unless @language == :ruby - - recv = node.children[0] - emit(:metaprogramming, "class << #{Ast.slice(recv, @lines)}", - dn(defs), node) unless recv.type == :SELF - when :DEFN, :DEFS - nm = (node.type == :DEFN ? node.children[0] : node.children[1]) - emit(:metaprogramming, "def #{nm}", dn(defs), node) \ - if @language == :ruby && %i[method_missing respond_to_missing?].include?(nm) - nd = Ast.def_push(node, defs) - return node.children.each { |c| walk(c, nd, cls) } - when :CALL, :FCALL, :VCALL, :OPCALL - classify_call(node, defs) - when :ATTRASGN - emit(:hidden_mutation, node.children[1].to_s, dn(defs), node) - when :OP_ASGN1, :OP_ASGN2 - emit(:hidden_mutation, "op-assign", dn(defs), node) - when :GVAR, :GASGN - emit(:context_dependency, node.children[0].to_s, dn(defs), node) if @language == :ruby - when :XSTR, :DXSTR - emit(:hidden_io, "backtick", dn(defs), node) if @language == :ruby - when :YIELD - emit(:dynamic_dispatch, "yield", dn(defs), node) if @language == :ruby - when :ITER - cm = callee_mid(node.children[0]) - emit(:callback_inversion, cm.to_s, dn(defs), node) \ - if cm && callback?(cm.to_s) && !@lexicon.meta_mids.include?(cm.to_s) - end - - node.children.each { |c| walk(c, defs, cls) } - end - - private - - def dn(defs) - defs.last || "(top-level)" - end - - # Takes the triggering node so line AND span come from one place. - def emit(kind, detail, defn, node) - @hits << Hit.new(kind: kind, detail: detail, file: @file, - defn: defn, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - - def walk_class(node, defs, cls) - cpath = node.children[0] - body = node.children[node.type == :CLASS ? 2 : 1] - simple = const_simple(cpath) - based = Ast.node?(cpath) && cpath.type == :COLON2 && - !cpath.children[0].nil? && !cpath.text.to_s.start_with?("::") - fqn = (cls + [const_text(cpath)]).join("::") - if has_def?(body) - core = cls.empty? && !based && @lexicon.core_consts.include?(simple) - @classrecs << ClassRec.new(name: fqn, file: @file, - line: node.first_lineno, core: core, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - emit(:monkeypatch, simple, simple, node) if core - end - newcls = cls + [const_text(cpath)] - node.children.each { |c| walk(c, defs, newcls) } - end - - # Exactly one hit per call node, highest-signal kind first, so - # counts are not inflated by a node matching two lexicons. - def classify_call(call, defs) - recv, mid = - case call.type - when :CALL, :OPCALL then [call.children[0], call.children[1]] - else [nil, call.children[0]] - end - m = mid.to_s - - if block_pass?(call) && callback?(m) && !@lexicon.meta_mids.include?(m) - return emit(:callback_inversion, m, dn(defs), call) - end - return emit(:metaprogramming, m, dn(defs), call) if @lexicon.meta_mids.include?(m) - return emit(:dynamic_dispatch, m, dn(defs), call) if @lexicon.dispatch_mids.include?(m) - - if m == "call" && recv - return emit(:dynamic_dispatch, "method(...).call", dn(defs), call) \ - if method_obj?(recv) - return emit(:dynamic_dispatch, "#{Ast.slice(recv, @lines)}.call", - dn(defs), call) if var_recv?(recv) - end - - cp = const_recv(recv) - if cp - base = cp.sub(/\A::/, "").split("::").first - if base == "Dir" && @lexicon.dir_context.include?(m) - return emit(:context_dependency, "Dir.#{m}", dn(defs), call) - end - if @lexicon.io_consts.include?(base) || (@language == :ruby && cp.start_with?("Net::")) - return emit(:hidden_io, "#{cp}.#{m}", dn(defs), call) - end - if @language == :ruby - return emit(:hidden_io, "URI.open", dn(defs), call) \ - if base == "URI" && m == "open" - return emit(:context_dependency, "ENV", dn(defs), call) if cp == "ENV" - end - if @lexicon.context_pairs[base]&.include?(m) - return emit(:context_dependency, "#{base}.#{m}", dn(defs), call) - end - end - - if recv.nil? - return emit(:hidden_io, m, dn(defs), call) if @lexicon.io_bare.include?(m) - return emit(:context_dependency, m, dn(defs), call) \ - if @lexicon.context_bare.include?(m) - end - - if m.length > 1 && m.end_with?("!") && !%w[!= !~].include?(m) - return emit(:hidden_mutation, m, dn(defs), call) - end - emit(:hidden_mutation, "<<", dn(defs), call) \ - if call.type == :OPCALL && m == "<<" - end - - def callback?(str) - @lexicon.callback_set.include?(str) || - str =~ /\A(with_|around_|on_|before_|after_)/ || - str =~ /_hook\z/ - end - - def callee_mid(call) - return nil unless Ast.node?(call) - - case call.type - when :CALL, :OPCALL then call.children[1] - when :FCALL, :VCALL then call.children[0] + def self.hits_for_document(document) + document.semantic_effect_sites.map do |site| + defn = site.function.to_s.empty? ? "(top-level)" : site.function + Hit.new(kind: site.kind, detail: site.detail, file: site.file, + defn: defn, line: site.line, + span: site.span) end end - def block_pass?(call) - args = - case call.type - when :CALL, :OPCALL then call.children[2] - when :FCALL then call.children[1] - end - return false unless Ast.node?(args) - # `f(&b)` -> args IS the BLOCK_PASS; `f(a, &b)` -> LIST[..., BLOCK_PASS]. - return true if args.type == :BLOCK_PASS - - args.type == :LIST && - args.children.any? { |c| Ast.node?(c) && c.type == :BLOCK_PASS } - end - - def method_obj?(recv) - Ast.node?(recv) && %i[CALL FCALL].include?(recv.type) && - @lexicon.method_obj_mids.include?( - recv.type == :CALL ? recv.children[1] : recv.children[0] - ) - end - - def var_recv?(recv) - Ast.node?(recv) && - %i[VCALL LVAR DVAR IVAR CVAR GVAR].include?(recv.type) - end - - def const_recv(recv) - return nil unless Ast.node?(recv) && - %i[CONST COLON2 COLON3].include?(recv.type) - - const_text(recv) - end - - def const_text(n) - return n.to_s unless Ast.node?(n) - - case n.type - when :CONST then n.children[0].to_s - when :COLON3 then "::#{n.children[0]}" - when :COLON2 - return "::#{n.children[1]}" if n.text.to_s.start_with?("::") - - b = n.children[0] - b ? "#{const_text(b)}::#{n.children[1]}" : n.children[1].to_s - else Ast.slice(n, @lines) - end - end - - def const_simple(n) - return n.to_s unless Ast.node?(n) - - case n.type - when :CONST, :COLON3 then n.children[0].to_s - when :COLON2 then n.children[1].to_s - else const_text(n) + def self.class_records_for_document(document) + function_owners = document.function_defs.map(&:owner).compact.to_set + core_names = core_owner_names(document.language) + recs = [] + hits = [] + document.owner_defs.each do |owner| + canonical = owner.name.to_s.sub(/\A::/, "") + next if canonical.empty? + next unless function_owners.include?(owner.name) || function_owners.include?(canonical) + + simple = canonical.split("::").last + core = !canonical.include?("::") && core_names.include?(simple) + rec = ClassRec.new(name: canonical, file: owner.file, line: owner.line, + core: core, span: owner.span) + recs << rec + next unless core + + hits << Hit.new(kind: :monkeypatch, detail: simple, file: owner.file, + defn: simple, line: owner.line, span: owner.span) end + [recs, hits] end - # A def reachable without crossing a nested namespace -- methods - # added to THIS class/module. SCLASS is descended (its defs attach - # to the enclosing object); CLASS/MODULE prune (separate namespace). - def has_def?(n) - return false unless Ast.node?(n) - return true if %i[DEFN DEFS].include?(n.type) - return false if %i[CLASS MODULE].include?(n.type) - - n.children.any? { |c| has_def?(c) } + def self.core_owner_names(language) + Syntax.core_owner_names(language) end # Groups hits by [kind, detail] and ranks by blast radius: # scatter = distinct (file, method) units, support = occurrences. # Cross-file project-class reopen (same FQN with methods in >=2 # files) becomes monkeypatch hits here; core reopens were already - # emitted per occurrence during the walk. + # emitted per occurrence during the scan. class Report def initialize(hits, classrecs) @hits = hits.dup @@ -383,10 +76,10 @@ def initialize(hits, classrecs) next if recs.first.core next if recs.map(&:file).uniq.size < 2 - recs.each do |r| - @hits << Hit.new(kind: :monkeypatch, detail: "reopen #{r.name}", - file: r.file, defn: r.name, line: r.line, - span: r.span) + recs.each do |rec| + @hits << Hit.new(kind: :monkeypatch, detail: "reopen #{rec.name}", + file: rec.file, defn: rec.name, line: rec.line, + span: rec.span) end end end @@ -394,14 +87,14 @@ def initialize(hits, classrecs) attr_reader :hits def findings - @hits.group_by { |h| [h.kind, h.detail] }.map do |(kind, detail), hs| - units = hs.map { |h| [h.file, h.defn] }.uniq - sites = hs.map { |h| "#{h.file}:#{h.defn}:#{h.line}" }.uniq + @hits.group_by { |hit| [hit.kind, hit.detail] }.map do |(kind, detail), hits| + units = hits.map { |hit| [hit.file, hit.defn] }.uniq + sites = hits.map { |hit| "#{hit.file}:#{hit.defn}:#{hit.line}" }.uniq spans = {} - hs.each { |h| spans["#{h.file}:#{h.defn}:#{h.line}"] ||= h.span } - { kind: kind, detail: detail, support: hs.size, + hits.each { |hit| spans["#{hit.file}:#{hit.defn}:#{hit.line}"] ||= hit.span } + { kind: kind, detail: detail, support: hits.size, scatter: units.size, at: sites.first, sites: sites, spans: spans } - end.sort_by { |h| [-h[:scatter], -h[:support], h[:kind].to_s, h[:detail]] } + end.sort_by { |hit| [-hit[:scatter], -hit[:support], hit[:kind].to_s, hit[:detail]] } end end end diff --git a/gems/decomplex/lib/decomplex/fat_union.rb b/gems/decomplex/lib/decomplex/fat_union.rb index 37d9d9d98..7d87accec 100644 --- a/gems/decomplex/lib/decomplex/fat_union.rb +++ b/gems/decomplex/lib/decomplex/fat_union.rb @@ -1,149 +1,23 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Fat-union detector: Missing-Abstractions for product-vs-sum # decomposition. A `case when ClassA when ClassB ...` # dispatch where the arms read mostly the SAME members of - # (and/or members are read OUTSIDE the dispatch in the same - # method) is a union whose common core should be a struct, with a - # SMALL union for the genuinely-varying part. Every such fat union - # is a Neglected-Updates / Missing-Abstractions generator (the - # storage/provenance invariant-#16 shape at the type level). - # - # decomplex MEASURES and ranks the use-site cohesion evidence; the - # extraction is value-object work, nil-kill's owned territory - # (design.md nil-kill boundary). Output routes there. - # - # v1 scope (principle 4, exact before semantic): `case` over CLASS - # CONSTANTS only. `is_a?` if/elsif chains and `:kind`-tagged hashes - # are a documented future scope limit, reported absent not - # approximated. Zero deps, AST only, no points-to. + # is a union whose common core should be a struct, with a small union + # for the genuinely-varying part. class FatUnion - CONST_TYPES = %i[CONST COLON2 COLON3].freeze - Site = Struct.new(:variant_set, :arm_members, :outside, :file, - :defn, :line, :span, keyword_init: true) - def self.scan(files, min_variants: 3, min_common: 2, ratio: 0.6) - sites = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, "(top-level)", nil) - sites.concat(e.sites) + sites = files.flat_map do |file| + document = Syntax.parse(file, parser: "tree_sitter") + document.dispatch_sites end Report.new(sites, min_variants: min_variants, min_common: min_common, ratio: ratio) end - attr_reader :sites - - def initialize(file, lines) - @file = file - @lines = lines - @sites = [] - end - - # Carries the enclosing def NAME and NODE so "accessed outside the - # dispatch but in the same method" (the strongest common-core - # tell) is computable by pruning the case subtree. - def walk(node, defn, defn_node) - return unless Ast.node?(node) - - case node.type - when :DEFN then defn = node.children[0].to_s; defn_node = node - when :DEFS then defn = node.children[1].to_s; defn_node = node - when :CASE - s = record_case(node, defn, defn_node) - @sites << s if s - end - node.children.each { |c| walk(c, defn, defn_node) } - end - - private - - def record_case(node, defn, defn_node) - disc = node.children[0] - return nil unless Ast.node?(disc) # predicate-less = if-chain - - disc_txt = Ast.slice(disc, @lines) - arms = {} # "ClassName" => [member, ...] - whenn = node.children[1] - while Ast.node?(whenn) && whenn.type == :WHEN - consts = const_patterns(whenn.children[0]) - unless consts.empty? # class-constant dispatch only (v1 scope) - mem = subtree_members(whenn.children[1], disc_txt) - consts.each { |c| (arms[c] ||= []).concat(mem) } - end - whenn = whenn.children[2] - end - return nil if arms.size < 2 - - arms.transform_values!(&:uniq) - Site.new(variant_set: arms.keys.sort, arm_members: arms, - outside: outside_members(defn_node, node, disc_txt), - file: @file, defn: defn, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - - # disc-members read in the enclosing method but NOT inside this - # case. Pruned by the case's LINE SPAN, not object identity; adapters - # are free to materialize fresh wrapper nodes per traversal. Empty - # for a top-level case (no enclosing method) -- documented limit. - def outside_members(defn_node, case_node, disc_txt) - return [] unless Ast.node?(defn_node) - - acc = [] - collect(defn_node, disc_txt, case_node.first_lineno, - case_node.last_lineno, acc) - acc.uniq - end - - def collect(node, disc_txt, cfl, cll, acc) - return unless Ast.node?(node) - # entire subtree lies within the case -> it is inside, skip. - return if node.first_lineno >= cfl && node.last_lineno <= cll - - m = member_access(node, disc_txt) - acc << m if m - node.children.each { |c| collect(c, disc_txt, cfl, cll, acc) } - end - - def subtree_members(body, disc_txt) - acc = [] - stack = [body] - until stack.empty? - n = stack.pop - next unless Ast.node?(n) - - m = member_access(n, disc_txt) - acc << m if m - n.children.each { |c| stack << c } - end - acc.uniq - end - - # `.foo` / `.foo(..)` / ` << x` / `.foo = x` - # -> "foo" / "<<" / "foo". nil otherwise. - def member_access(n, disc_txt) - return nil unless %i[CALL OPCALL ATTRASGN].include?(n.type) - - recv, mid, = n.children - return nil unless Ast.node?(recv) && Ast.slice(recv, @lines) == disc_txt - - mid.to_s.sub(/=\z/, "") - end - - def const_patterns(plist) - return [] unless Ast.node?(plist) - - plist.children.filter_map do |p| - Ast.slice(p, @lines) if Ast.node?(p) && CONST_TYPES.include?(p.type) - end - end - class Report def initialize(sites, min_variants:, min_common:, ratio:) @sites = sites @@ -159,18 +33,12 @@ def fat_unions v = vset.size next if v < @min_variants - # member -> distinct variant-classes accessing it, across - # every dispatch site of this variant-set. vcls = Hash.new { |h, k| h[k] = {} } outside = {} group.each do |s| s.arm_members.each { |cls, ms| ms.each { |m| vcls[m][cls] = true } } s.outside.each { |m| outside[m] = true } end - # member universe = accessed in an arm OR only outside the - # dispatch (a member read ONLY outside is the strongest - # 'belongs in the common struct' signal -- it must not be - # dropped just because no arm names it). keys = vcls.keys | outside.keys common = keys.select do |m| outside[m] || (vcls[m] && vcls[m].size >= v) @@ -182,16 +50,16 @@ def fat_unions next if common.size < @min_common next if total.zero? || common.size.to_f / total < @ratio - locs = group.map { |s| "#{s.file}:#{s.defn}:#{s.line}" } + locs = group.map { |s| "#{s.file}:#{s.function}:#{s.line}" } { variant_set: vset, common: common.sort, variant: variant.sort, degenerate: variant.empty?, support: group.size, - scatter: group.map { |s| [s.file, s.defn] }.uniq.size, + scatter: group.map { |s| [s.file, s.function] }.uniq.size, rank: group.size * common.size, kind: :case_dispatch, members: vset, at: locs.first, sites: locs.uniq, - spans: group.to_h { |s| ["#{s.file}:#{s.defn}:#{s.line}", s.span] } + spans: group.to_h { |s| ["#{s.file}:#{s.function}:#{s.line}", s.span] } } end.sort_by { |h| [h[:degenerate] ? 0 : 1, -h[:rank]] } end diff --git a/gems/decomplex/lib/decomplex/flay_similarity.rb b/gems/decomplex/lib/decomplex/flay_similarity.rb index 64a169985..39d92333c 100644 --- a/gems/decomplex/lib/decomplex/flay_similarity.rb +++ b/gems/decomplex/lib/decomplex/flay_similarity.rb @@ -4,53 +4,15 @@ require_relative "syntax" module Decomplex - # Tree-sitter structural similarity scanner for Type-2 / Type-3 clone pressure. + # Structural similarity scanner for Type-2 / Type-3 clone pressure. # - # The public class name is retained for report compatibility. The detector no - # longer shells through the flay gem: it builds language-neutral structural - # fingerprints from Tree-sitter node kinds, normalizing identifiers/literals - # so renamed-but-isomorphic code groups as Type-2. Type-3 uses a small fuzzy - # signature over child statements, matching functions/subtrees with a missing - # or inserted child within the configured fuzzy budget. + # Parser-specific structural fingerprinting is owned by Syntax adapters. This + # detector ranks already-normalized clone candidates and emits report rows. class FlaySimilarity DEFAULT_MASS = 32 DEFAULT_FUZZY = 1 MAX_FUZZY_CHILDREN = 14 - MethodSpan = Struct.new(:name, :first_line, :last_line, keyword_init: true) - Candidate = Struct.new(:file, :line, :span, :method_name, :node_name, :mass, - :fingerprint, :raw, :child_fingerprints, - :child_masses, keyword_init: true) - - IDENTIFIER_KINDS = %w[ - identifier constant type_identifier field_identifier property_identifier - shorthand_property_identifier_pattern variable_name - ].freeze - LITERAL_KINDS = %w[ - string string_content string_literal interpreted_string_literal raw_string_literal - integer float int number rational imaginary character char_literal - symbol simple_symbol true false nil none null - ].freeze - SKIP_CANDIDATE_KINDS = %w[ - comment identifier constant type_identifier field_identifier property_identifier - parameters formal_parameters parameter_list argument_list arguments - block_parameters method_parameters - scope_resolution - ].freeze - CLONE_CANDIDATE_KINDS = %w[ - array assignment assignment_statement block case case_clause class - class_definition class_declaration do_block enum_declaration for - for_statement hash if if_statement match_expression match_statement - method method_definition module operator_assignment singleton_method - struct_declaration switch_case switch_expression switch_statement - unless until while while_statement - ].freeze - BODY_KINDS = %w[ - body block body_statement declaration_list statement_block compound_statement - suite do_block - ].freeze - CALL_KINDS = %w[call call_expression method_invocation invocation_expression].freeze - def self.scan(files, mass: DEFAULT_MASS, fuzzy: DEFAULT_FUZZY) new(files, mass: mass, fuzzy: fuzzy).scan end @@ -59,7 +21,6 @@ def initialize(files, mass:, fuzzy:) @files = files @mass = mass @fuzzy = fuzzy - @method_spans = {} end def scan @@ -77,68 +38,18 @@ def scan def candidates_for_file(file) return [] unless Syntax.supported_source?(file, parser: "tree_sitter") - doc = Syntax.parse(file, parser: "tree_sitter") - @method_spans[file] = collect_method_spans(doc) - out = [] - seen = Set.new - - doc.function_defs.each do |fn| - candidate = candidate_for(file, fn.body, node_name: "defn") - add_candidate(out, seen, candidate) if candidate + Syntax.parse(file, parser: "tree_sitter").clone_candidates.select do |candidate| + candidate.mass >= effective_mass_floor end - - walk(doc.root) do |node| - next unless candidate_node?(node) - - add_candidate(out, seen, candidate_for(file, node)) - end - - out rescue StandardError [] end - def add_candidate(out, seen, candidate) - return unless candidate - return if candidate.mass < effective_mass_floor - return if typed_struct_schema_text?(candidate.raw) - - key = [candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint] - return if seen.include?(key) - - seen << key - out << candidate - end - - def candidate_for(file, node, node_name: nil) - fp, mass = fingerprint(node) - return nil if fp.to_s.empty? - - line = line(node) - method = method_span_for(file, line) - children = fuzzy_children_for(node) - child_data = children.map { |child| fingerprint(child) }.reject { |child_fp, child_mass| child_fp.to_s.empty? || child_mass.zero? } - - Candidate.new( - file: file, - line: line, - span: span(node), - method_name: method.name, - node_name: node_name || flay_node_name(node), - mass: mass, - fingerprint: fp, - raw: normalize_text(node.text), - child_fingerprints: child_data.map(&:first), - child_masses: child_data.map(&:last) - ) - end - def type2_findings(candidates) candidates.group_by(&:fingerprint).values.filter_map do |cluster| cluster = uniq_sites(cluster) next if cluster.size < 2 next if cluster.map(&:raw).uniq.size < 2 - next if typed_struct_schema_cluster?(cluster) finding_for(cluster, clone_type: :type2, mass: cluster.map(&:mass).min) end @@ -161,7 +72,6 @@ def type3_findings(candidates) cluster = uniq_sites(rows.map(&:first)) next if cluster.size < 2 next if cluster.map(&:fingerprint).uniq.size < 2 - next if typed_struct_schema_cluster?(cluster) key = cluster.map { |candidate| [candidate.file, candidate.line, candidate.node_name] }.sort next if seen.include?(key) @@ -185,8 +95,12 @@ def finding_for(cluster, clone_type:, mass:) end def prune_nested_findings(findings) + defn_site_sets = findings.select { |finding| finding[:node].to_s == "defn" } + .map { |finding| [finding[:clone_type], site_identities(finding)] } kept = [] findings.each do |finding| + next if finding[:node].to_s != "defn" && + defn_site_sets.include?([finding[:clone_type], site_identities(finding)]) next if kept.any? { |larger| nested_finding?(finding, larger) } kept << finding @@ -219,12 +133,18 @@ def site_file(site) parts[0...-2].join(":") end + def site_identities(finding) + Array(finding[:sites]).map do |site| + parts = site.to_s.split(":") + [parts[0...-2].join(":"), parts[-2]] + end.sort + end + def spans_for(cluster) cluster.each_with_object({}) do |candidate, out| out[site_for(candidate)] = if candidate.node_name == "defn" - method = method_span_for(candidate.file, candidate.line) - [method.first_line, 0, method.last_line, 1] + [candidate.span[0], 0, candidate.span[2], 1] else candidate.span end @@ -236,7 +156,7 @@ def site_for(candidate) end def uniq_sites(candidates) - candidates.uniq { |candidate| [candidate.file, candidate.line, candidate.node_name] } + candidates.uniq { |candidate| [candidate.file, candidate.line, candidate.span, candidate.node_name] } end def fuzzy_signatures(candidate) @@ -263,185 +183,8 @@ def fuzzy_signatures(candidate) signatures end - def candidate_node?(node) - return false unless ts_node?(node) - return false unless node.named? - return false if SKIP_CANDIDATE_KINDS.include?(node.kind) - return false unless CLONE_CANDIDATE_KINDS.include?(node.kind) - return false if typed_struct_schema_text?(node.text) - - node.named_child_count.positive? - end - def effective_mass_floor @effective_mass_floor ||= [@mass, (@mass * 23.0 / 8.0).ceil].max end - - def fuzzy_children_for(node) - body = body_node(node) - source = body || node - children = source.named_children - children = node.named_children if children.empty? - children.reject { |child| SKIP_CANDIDATE_KINDS.include?(child.kind) || typed_struct_schema_text?(child.text) } - end - - def body_node(node) - named_field(node, "body") || - node.named_children.find { |child| BODY_KINDS.include?(child.kind) } - end - - def fingerprint(node) - return ["", 0] unless ts_node?(node) - return ["", 0] if node.kind == "comment" - return fingerprint_call(node) if CALL_KINDS.include?(node.kind) && call_message(node) - - if node.child_count.zero? - token = terminal_token(node) - return ["", 0] if token.empty? - - return [token, 1] - end - - child_parts = [] - mass = 1 - node.children.each do |child| - child_fp, child_mass = fingerprint(child) - next if child_fp.empty? - - child_parts << child_fp - mass += child_mass - end - - return [terminal_token(node), 1] if child_parts.empty? - - ["#{node.kind}(#{child_parts.join(' ')})", mass] - end - - def fingerprint_call(node) - message = call_message(node) - child_parts = [] - mass = 1 - node.children.each do |child| - child_fp, child_mass = fingerprint(child) - next if child_fp.empty? - - child_parts << child_fp - mass += child_mass - end - ["#{node.kind}<#{message}>(#{child_parts.join(' ')})", mass] - end - - def call_message(node) - return nil unless node.children.any? { |child| %w[argument_list arguments].include?(child.kind) } - - callee = named_field(node, "function") || named_field(node, "callee") - return callee_message(callee) if callee - - argument_node = node.children.find { |child| %w[argument_list arguments].include?(child.kind) } - named_before_args = node.named_children.select do |child| - argument_node.nil? || child.start_byte < argument_node.start_byte - end - callee_message(named_before_args.last) - end - - def callee_message(node) - return nil unless ts_node?(node) - return node.text if IDENTIFIER_KINDS.include?(node.kind) - - leaf = node.named_children.reverse.find { |child| IDENTIFIER_KINDS.include?(child.kind) } - leaf&.text - end - - def terminal_token(node) - kind = node.kind.to_s - return "id" if IDENTIFIER_KINDS.include?(kind) - return literal_token(kind) if LITERAL_KINDS.include?(kind) - - text = normalize_text(node.text) - return "" if text.empty? - return "id" if text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - return "lit" if text.match?(/\A(?::[A-Za-z_]\w*|[-+]?\d+(?:\.\d+)?|".*"|'.*')\z/) - - "#{kind}:#{text}" - end - - def literal_token(kind) - case kind - when "true", "false" then "bool" - when "nil", "none", "null" then "nil" - else "lit" - end - end - - def flay_node_name(node) - return "defn" if %w[method function_definition function_declaration method_definition function_item].include?(node.kind) - return "defs" if node.kind == "singleton_method" - - node.kind - end - - def typed_struct_schema_cluster?(cluster) - cluster.all? { |candidate| typed_struct_schema_line?(candidate.file, candidate.line) || typed_struct_schema_text?(candidate.raw) } - end - - def typed_struct_schema_line?(file, line_no) - source_line(file, line_no).match?(/\A\s*(?:const|prop)\s+:[A-Za-z_]\w*\b/) - end - - def typed_struct_schema_text?(text) - text.to_s.match?(/<\s*T::Struct\b/) || - text.to_s.lines.all? { |line| line.strip.empty? || line.match?(/\A\s*(?:const|prop)\s+:[A-Za-z_]\w*\b/) } - end - - def source_line(file, line_no) - (@source_lines ||= {}) - (@source_lines[file] ||= File.readlines(file))[line_no - 1].to_s - rescue StandardError - "" - end - - def collect_method_spans(document) - document.function_defs.map do |fn| - MethodSpan.new(name: fn.name.to_s, first_line: fn.span[0], last_line: fn.span[2]) - end.sort_by { |span| [span.first_line, -span.last_line] } - rescue StandardError - [] - end - - def method_span_for(file, line_no) - spans = @method_spans[file] || [] - spans.find { |span| span.first_line <= line_no && line_no <= span.last_line } || - MethodSpan.new(name: "(top-level)", first_line: line_no, last_line: line_no) - end - - def walk(node, &block) - return unless ts_node?(node) - - yield node - node.children.each { |child| walk(child, &block) } - end - - def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil - end - - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:children) - end - - def span(node) - [node.start_point.row + 1, node.start_point.column, - node.end_point.row + 1, node.end_point.column] - end - - def line(node) - node.start_point.row + 1 - end - - def normalize_text(text) - text.to_s.strip.gsub(/\s+/, " ") - end end end diff --git a/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb b/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb index 59c91cb9b..0a8bcd730 100644 --- a/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb +++ b/gems/decomplex/lib/decomplex/inconsistent_rename_clone.rb @@ -1,86 +1,62 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "local_flow" module Decomplex # Narrow clone bug detector: a pasted block was renamed, but one # occurrence kept the old spelling. This is intentionally not a # general Type-2/Type-3 clone detector; the structural similarity # scanner owns that broader signal. - # - # The important false-positive guard is cross-method evidence. Local - # branch symmetry inside one method often has the same skeleton with - # different receiver/container variables, but that is not a pasted - # rename bug. class InconsistentRenameClone Block = Struct.new(:skeleton, :names, :file, :defn, :line, :span, keyword_init: true) - HOLE_TYPES = %i[LVAR DVAR IVAR LASGN DASGN IASGN].freeze MIN_TOKENS = 8 def self.scan(files) - blocks = [] - files.each do |f| - root, = Ast.parse(f) - new(f).collect(root, [], blocks) - end - Report.new(blocks).inconsistent_renames - end - - def initialize(file) - @file = file - end - - def collect(node, defstack, blocks) - return unless Ast.node?(node) + blocks = LocalFlow.scan(files).filter_map do |method| + next if method.statements.size < 3 - defstack = Ast.def_push(node, defstack) - if node.type == :BLOCK - stmts = node.children.compact - add_block(stmts, defstack, blocks) if stmts.size >= 3 + new.add_block(method) end - node.children.each { |child| collect(child, defstack, blocks) } + Report.new(blocks).inconsistent_renames end - private - - def add_block(stmts, defstack, blocks) + def add_block(method) skeleton = [] names = [] - stmts.each { |stmt| tokenize(stmt, skeleton, names) } - return if skeleton.size < MIN_TOKENS - - blocks << Block.new(skeleton: skeleton, names: names, file: @file, - defn: defstack.last || "(top-level)", - line: stmts.first.first_lineno, - span: [stmts.first.first_lineno, - stmts.first.first_column, - stmts.last.last_lineno, - stmts.last.last_column]) + method.statements.each { |statement| tokenize(statement.source, skeleton, names) } + return nil if skeleton.size < MIN_TOKENS + + Block.new( + skeleton: skeleton, + names: names, + file: method.file, + defn: method.name, + line: method.statements.first.line, + span: [ + method.statements.first.span[0], + method.statements.first.span[1], + method.statements.last.span[2], + method.statements.last.span[3] + ] + ) end - def tokenize(node, skeleton, names) - return unless Ast.node?(node) - - case node.type - when *HOLE_TYPES - skeleton << :ID - names << node.children[0].to_s - when :VCALL - skeleton << :ID - names << node.children[0].to_s - when :CALL, :FCALL - skeleton << node.type - mid = node.children[node.type == :CALL ? 1 : 0] - skeleton << :MID - names << mid.to_s - when :LIT, :STR, :SYM, :INTEGER, :FLOAT - skeleton << node.type - else - skeleton << node.type + private + + def tokenize(source, skeleton, names) + source.to_s.scan(/[A-Za-z_]\w*[!?=]?|@\w+|\d+(?:\.\d+)?|:[A-Za-z_]\w*|\"[^\"]*\"|'[^']*'|\S/) do |token| + case token + when /\A[@A-Za-z_]\w*[!?=]?\z/ + skeleton << :ID + names << token.delete_prefix("@").delete_suffix("=") + when /\A(?::[A-Za-z_]\w*|\d+(?:\.\d+)?|\"[^\"]*\"|'[^']*')\z/ + skeleton << :LIT + else + skeleton << token + end end - node.children.each { |child| tokenize(child, skeleton, names) } end class Report diff --git a/gems/decomplex/lib/decomplex/local_flow.rb b/gems/decomplex/lib/decomplex/local_flow.rb index 10c9ef2c6..135aae474 100644 --- a/gems/decomplex/lib/decomplex/local_flow.rb +++ b/gems/decomplex/lib/decomplex/local_flow.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require "set" -require_relative "ast" +require_relative "syntax" module Decomplex # Conservative intra-procedural local data-flow support for function-level @@ -19,222 +19,50 @@ class LocalFlow :before_index, :after_index, :line, :kind, :text, keyword_init: true ) - OWNER_TYPES = %i[CLASS MODULE].freeze - METHOD_TYPES = %i[DEFN DEFS].freeze - SKIP_NESTED_TYPES = %i[CLASS MODULE DEFN DEFS LAMBDA].freeze - LOCAL_READ_TYPES = %i[LVAR DVAR].freeze - LOCAL_WRITE_TYPES = %i[LASGN DASGN].freeze - def self.scan(files) files.flat_map do |file| - root, lines = Ast.parse(file) - new(file, lines).scan(root) - end - end - - def initialize(file, lines) - @file = file - @lines = lines - end - - def scan(root) - out = [] - collect_methods(root, [], out) - out - end - - private - - def collect_methods(node, owners, out) - return unless Ast.node?(node) - - if OWNER_TYPES.include?(node.type) - owner = full_owner_name(owners, node) - owner_methods(node).each { |method| out << method_summary(method, owner) } - collect_nested_owners(node, owners + [owner_segment(node)], out) - elsif METHOD_TYPES.include?(node.type) && owners.empty? - out << method_summary(node, "(top-level)") - else - node.children.each { |child| collect_methods(child, owners, out) } - end - end - - def collect_nested_owners(node, owners, out) - return unless Ast.node?(node) - return if METHOD_TYPES.include?(node.type) - - node.children.each do |child| - next unless Ast.node?(child) - - if OWNER_TYPES.include?(child.type) - collect_methods(child, owners, out) - else - collect_nested_owners(child, owners, out) + Syntax.parse(file, parser: "tree_sitter").local_methods.map do |method| + method_summary(method) end end end - def method_summary(node, owner) - statements = Ast.body_stmts(node).each_with_index.map do |stmt, index| - statement_summary(stmt, index) - end + private_class_method def self.method_summary(method) MethodSummary.new( - id: "#{owner}##{method_name(node)}", - owner: owner, - name: method_name(node), - file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - node: node, - statements: statements, - boundaries: structural_boundaries(statements) + id: method.id, + owner: method.owner, + name: method.name, + file: method.file, + line: method.line, + span: method.span, + node: method.node, + statements: method.statements.map { |statement| statement_summary(statement) }, + boundaries: method.boundaries.map { |boundary| boundary_summary(boundary) } ) end - def statement_summary(node, index) + private_class_method def self.statement_summary(statement) Statement.new( - index: index, - line: node.first_lineno, - end_line: node.last_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - source: Ast.slice(node, @lines), - reads: local_reads(node).to_set, - writes: local_writes(node).to_set, - dependencies: assignment_dependencies(node), - co_uses: co_use_edges(node) + index: statement.index, + line: statement.line, + end_line: statement.end_line, + span: statement.span, + source: statement.source, + reads: statement.reads.to_set, + writes: statement.writes.to_set, + dependencies: statement.dependencies, + co_uses: statement.co_uses ) end - def structural_boundaries(statements) - statements.each_cons(2).filter_map do |left, right| - boundary = source_boundary(left.end_line + 1, right.line - 1) - next unless boundary - - Boundary.new( - before_index: left.index, - after_index: right.index, - line: boundary[:line], - kind: boundary[:kind], - text: boundary[:text] - ) - end - end - - def source_boundary(first_line, last_line) - return nil if first_line > last_line - - blank = nil - (first_line..last_line).each do |line_number| - text = @lines[line_number - 1].to_s - stripped = text.strip - if stripped.start_with?("#") - return { - line: line_number, - kind: :comment, - text: stripped, - } - end - blank ||= { line: line_number, kind: :blank, text: stripped } if stripped.empty? - end - blank - end - - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - owner_statements(body).flat_map do |stmt| - next [] unless Ast.node?(stmt) - - if METHOD_TYPES.include?(stmt.type) - [stmt] - elsif visibility_call?(stmt) - inline_methods(stmt) - else - [] - end - end - end - - def inline_methods(stmt) - args = stmt.children[1] - return [] unless Ast.node?(args) - - args.children.compact.select { |arg| Ast.node?(arg) && METHOD_TYPES.include?(arg.type) } - end - - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE - - scope.children[2] - end - - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] - end - - def visibility_call?(node) - node.type == :FCALL && %i[public protected private].include?(node.children[0]) - end - - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s - end - end - - def full_owner_name(owners, node) - (owners + [owner_segment(node)]).join("::") - end - - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text - end - - def local_reads(node) - reads = [] - walk_local(node) do |child| - reads << child.children[0].to_s if LOCAL_READ_TYPES.include?(child.type) - end - reads - end - - def local_writes(node) - writes = [] - walk_local(node) do |child| - writes << child.children[0].to_s if LOCAL_WRITE_TYPES.include?(child.type) - end - writes - end - - def assignment_dependencies(node) - deps = [] - walk_local(node) do |child| - next unless LOCAL_WRITE_TYPES.include?(child.type) - - lhs = child.children[0].to_s - rhs = child.children[1] - local_reads(rhs).uniq.each { |read| deps << [lhs, read] unless lhs == read } - end - deps.uniq - end - - def co_use_edges(node) - local_reads(node).uniq.combination(2).map { |left, right| [left, right] } - end - - def walk_local(node, &block) - return unless Ast.node?(node) - return if SKIP_NESTED_TYPES.include?(node.type) - - yield node - node.children.each { |child| walk_local(child, &block) } + private_class_method def self.boundary_summary(boundary) + Boundary.new( + before_index: boundary.before_index, + after_index: boundary.after_index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text + ) end end end diff --git a/gems/decomplex/lib/decomplex/locality_drag.rb b/gems/decomplex/lib/decomplex/locality_drag.rb index 0c70fa079..384253976 100644 --- a/gems/decomplex/lib/decomplex/locality_drag.rb +++ b/gems/decomplex/lib/decomplex/locality_drag.rb @@ -2,7 +2,7 @@ require "set" require_relative "local_flow" -require_relative "weighted_inlined_cognitive_complexity" +require_relative "syntax" module Decomplex # Finds locals that are initialized substantially before their first use @@ -28,8 +28,17 @@ def self.scan( min_score: DEFAULT_MIN_SCORE, max_findings_per_method: DEFAULT_MAX_FINDINGS_PER_METHOD ) + summaries = LocalFlow.scan(files) + complexity_scores = Array(files).each_with_object({}) do |file, scores| + document = Syntax.parse(file, parser: "tree_sitter") + document.local_methods.each do |method| + scores[complexity_key(method)] = + document.local_complexity_scores.fetch(method.id, { score: 0.0 }) + end + end new( - LocalFlow.scan(files), + summaries, + complexity_scores: complexity_scores, min_unrelated_statements: min_unrelated_statements, min_gap_lines: min_gap_lines, min_local_complexity: min_local_complexity, @@ -40,6 +49,7 @@ def self.scan( def initialize( summaries, + complexity_scores:, min_unrelated_statements:, min_gap_lines:, min_local_complexity:, @@ -52,7 +62,7 @@ def initialize( @min_local_complexity = min_local_complexity.to_f @min_score = min_score.to_i @max_findings_per_method = max_findings_per_method.to_i - @scorer = WeightedInlinedCognitiveComplexity::LocalScorer.new + @complexity_scores = complexity_scores end def findings @@ -68,7 +78,7 @@ def findings def findings_for(summary) return [] if summary.statements.size < @min_unrelated_statements + 2 - local_complexity = @scorer.score(summary.node)[:score].to_f + local_complexity = @complexity_scores.fetch(complexity_key(summary), { score: 0.0 })[:score].to_f return [] if local_complexity < @min_local_complexity findings = summary.statements.each_with_index.flat_map do |statement, index| @@ -272,5 +282,13 @@ def reason_for(variable, unrelated, gap_lines, boundaries, local_complexity) def round(value) (value * 10).round / 10.0 end + + def self.complexity_key(method) + [method.file, method.line, method.name] + end + + def complexity_key(method) + self.class.complexity_key(method) + end end end diff --git a/gems/decomplex/lib/decomplex/mutability_pressure.rb b/gems/decomplex/lib/decomplex/mutability_pressure.rb new file mode 100644 index 000000000..b91eaadd1 --- /dev/null +++ b/gems/decomplex/lib/decomplex/mutability_pressure.rb @@ -0,0 +1,180 @@ +# frozen_string_literal: true + +require_relative "state_mesh" + +module Decomplex + # MutabilityPressure -- rank fields by how many methods participate in + # writing them, and classify each field by lifecycle pattern. + # + # Post-analyzer over StateMesh. No new AST walks. + # + # Classifications: + # immutable_convention -- written once in initialize, never mutated. + # Also catches memos (same-method write+read with read-first pattern). + # pass_through -- written and read in a SINGLE method body. + # shadow_state -- always written in strict subset of another + # field's write methods. Zero operational autonomy. Coupled state. + # one_way_state -- written in >=2 methods, read in <=1. + # mutable_entity -- written in >=2 methods, read in >=2. + # dead_state -- written but never read. + class MutabilityPressure + Finding = Struct.new(:field, :classification, :write_spread, + :read_spread, :total_writes, :total_reads, + :write_sites, :read_sites, :shadowed_by, + keyword_init: true) do + def to_h + h = { + field: field, + classification: classification, + write_spread: write_spread, + read_spread: read_spread, + total_writes: total_writes, + total_reads: total_reads, + write_sites: write_sites, + read_sites: read_sites + } + h[:shadowed_by] = shadowed_by if shadowed_by + h + end + end + + def self.scan(files) + sm = StateMesh.scan(files, min_writes: 1) + sm.run + new(sm).scan + end + + def initialize(state_mesh) + @sm = state_mesh + end + + def scan + group_by_field + classify_and_rank + end + + private + + def group_by_field + @writes_by = Hash.new { |h, k| h[k] = [] } + @reads_by = Hash.new { |h, k| h[k] = [] } + + @sm.writes.each do |w| + next unless w.recv == "self" + @writes_by[w.norm] << w + end + @sm.reads.each do |r| + next unless r.recv == "self" + @reads_by[r.norm] << r + end + end + + def classify_and_rank + results = [] + all_norms = (@writes_by.keys + @reads_by.keys).uniq + + # Build write-method signatures: field -> Set of (file, defn) + field_write_sigs = {} + all_norms.each do |norm| + ws = @writes_by[norm] || [] + field_write_sigs[norm] = ws.map { |w| [w.file, w.defn] }.uniq.sort + end + + # Shadow detection: field Y always written in strict subset of X's methods + field_shadows = Hash.new { |h, k| h[k] = [] } + field_write_sigs.each do |y_norm, y_methods| + next if y_methods.size <= 1 + field_write_sigs.each do |x_norm, x_methods| + next if x_norm == y_norm + next if x_methods.size <= y_methods.size + next if x_methods.size < 2 + next unless y_methods.all? { |ym| x_methods.include?(ym) } + field_shadows[y_norm] << x_norm + end + end + + all_norms.each do |norm| + writers = @writes_by[norm] || [] + readers = @reads_by[norm] || [] + + wmethods = writers.map { |w| [w.file, w.defn] }.uniq + rmethods = readers.map { |r| [r.file, r.defn] }.uniq + + ws = wmethods.size + rs = rmethods.size + tw = writers.size + tr = readers.size + + next if tw == 0 && tr == 0 + + # ---- dead state: written but never read ---- + if tw > 0 && tr == 0 + next if @sm.reads.any? { |r| r.norm == norm } + results << Finding.new( + field: norm, classification: "dead_state", + write_spread: ws, read_spread: 0, + total_writes: tw, total_reads: 0, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: [] + ) + next + end + + next if tw == 0 + + # ---- classify ---- + all_one_method = (wmethods + rmethods).uniq.size == 1 + init_only = ws == 1 && wmethods[0][1] == "initialize" + + is_memo = false + if all_one_method + first_read = readers.map(&:line).min + first_write = writers.map(&:line).min + is_memo = first_read && first_write && first_read <= first_write + end + + shadow = field_shadows[norm].first + + classification = if init_only + "immutable_convention" + elsif is_memo + "immutable_convention" + elsif all_one_method + "pass_through" + elsif shadow + "shadow_state" + elsif ws >= 2 && rs <= 1 + "one_way_state" + elsif ws >= 2 && rs >= 2 + "mutable_entity" + else + "immutable_convention" + end + + results << Finding.new( + field: norm, + classification: classification, + write_spread: ws, + read_spread: rs, + total_writes: tw, + total_reads: tr, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: readers.map { |r| "#{r.file}:#{r.defn}:#{r.line}" }.uniq, + shadowed_by: shadow + ) + end + + results.sort_by do |r| + c = case r.classification + when "dead_state" then 0 + when "shadow_state" then 1 + when "one_way_state" then 2 + when "mutable_entity" then 3 + when "pass_through" then 4 + when "immutable_convention" then 5 + else 6 end + [-r.write_spread, -r.read_spread, c, r.field] + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/co_update.rb b/gems/decomplex/lib/decomplex/native/co_update.rb new file mode 100644 index 000000000..beb474326 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/co_update.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module CoUpdate + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("co-update", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/command.rb b/gems/decomplex/lib/decomplex/native/command.rb new file mode 100644 index 000000000..5adb4dbac --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/command.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require "open3" + +module Decomplex + module Native + # Shared launcher for the native Decomplex migration slices. + module Command + module_function + + def run(*args) + stdout, stderr, status = Open3.capture3(*native_command(args)) + return stdout if status.success? + + raise "decomplex rust #{args.first} failed: #{stderr.empty? ? stdout : stderr}" + rescue Errno::ENOENT => e + raise "decomplex rust #{args.first} requires cargo or DECOMPLEX_RUST_BIN: #{e.message}" + end + + def binary_path + env = ENV["DECOMPLEX_RUST_BIN"] + return env if env && !env.empty? + + exe = Gem.win_platform? ? "decomplex-rust.exe" : "decomplex-rust" + File.join(crate_root, "target", "release", exe) + end + + def crate_root + File.expand_path("../../../rust", __dir__) + end + + def jobs_args(jobs) + return [] if jobs.nil? + + count = Integer(jobs) + raise ArgumentError, "jobs must be greater than zero" if count <= 0 + + ["--jobs", count.to_s] + end + + def language_for(path) + case File.extname(path) + when ".rb" then "ruby" + when ".py" then "python" + when ".js", ".jsx", ".mjs", ".cjs" then "javascript" + when ".ts", ".tsx" then "typescript" + when ".java" then "java" + when ".swift" then "swift" + when ".kt", ".kts" then "kotlin" + when ".go" then "go" + when ".rs" then "rust" + when ".zig" then "zig" + when ".lua" then "lua" + when ".c", ".h" then "c" + when ".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx" then "cpp" + when ".cs" then "csharp" + when ".php" then "php" + else "ruby" + end + end + + private_class_method def self.native_command(args) + if fresh_binary?(binary_path) + [binary_path, *args] + else + ["cargo", "run", "--quiet", "--release", "--manifest-path", + File.join(crate_root, "Cargo.toml"), "--bin", "decomplex-rust", "--", *args] + end + end + + private_class_method def self.fresh_binary?(path) + return false unless File.executable?(path) + return true if ENV["DECOMPLEX_RUST_BIN"] && !ENV["DECOMPLEX_RUST_BIN"].empty? + + binary_mtime = File.mtime(path) + rust_sources.all? { |source| File.mtime(source) <= binary_mtime } + end + + private_class_method def self.rust_sources + Dir[File.join(crate_root, "Cargo.toml"), File.join(crate_root, "src", "**", "*.rs")] + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/decision_pressure.rb b/gems/decomplex/lib/decomplex/native/decision_pressure.rb new file mode 100644 index 000000000..f7653e7ee --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/decision_pressure.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module DecisionPressure + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("decision-pressure", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/derived_state.rb b/gems/decomplex/lib/decomplex/native/derived_state.rb new file mode 100644 index 000000000..1092a4310 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/derived_state.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module DerivedState + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("derived-state", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/false_simplicity.rb b/gems/decomplex/lib/decomplex/native/false_simplicity.rb new file mode 100644 index 000000000..b8b7ec5e8 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/false_simplicity.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FalseSimplicity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("false-simplicity", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/fat_union.rb b/gems/decomplex/lib/decomplex/native/fat_union.rb new file mode 100644 index 000000000..b1b4f242f --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/fat_union.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FatUnion + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("fat-union", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/flay_similarity.rb b/gems/decomplex/lib/decomplex/native/flay_similarity.rb new file mode 100644 index 000000000..999af6ff6 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/flay_similarity.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FlaySimilarity + module_function + + def scan(files, mass:, fuzzy:, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse( + Command.run( + "flay-similarity", + "--language", language, + *Command.jobs_args(jobs), + "--mass", mass.to_i.to_s, + "--fuzzy", fuzzy.to_i.to_s, + *paths + ), + symbolize_names: true + ).map { |finding| normalize_finding(finding) } + end + + private_class_method def self.normalize_finding(finding) + finding.merge( + clone_type: finding.fetch(:clone_type).to_sym, + spans: finding.fetch(:spans).transform_values { |span| Array(span).map(&:to_i) } + ) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/function_lcom.rb b/gems/decomplex/lib/decomplex/native/function_lcom.rb new file mode 100644 index 000000000..9d4f1254a --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/function_lcom.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module FunctionLcom + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("function-lcom", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb new file mode 100644 index 000000000..43944acbc --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/implicit_control_flow.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module ImplicitControlFlow + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("implicit-control-flow", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb new file mode 100644 index 000000000..d34566d66 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/inconsistent_rename_clone.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module InconsistentRenameClone + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("inconsistent-rename-clone", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/local_flow.rb b/gems/decomplex/lib/decomplex/native/local_flow.rb new file mode 100644 index 000000000..4f2571aa8 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/local_flow.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module LocalFlow + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("local-flow", "--language", language, *Command.jobs_args(jobs), *paths)) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/locality_drag.rb b/gems/decomplex/lib/decomplex/native/locality_drag.rb new file mode 100644 index 000000000..604bcc3b2 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/locality_drag.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module LocalityDrag + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("locality-drag", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/miner.rb b/gems/decomplex/lib/decomplex/native/miner.rb new file mode 100644 index 000000000..8a8032f4f --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/miner.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module Miner + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("miner", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb new file mode 100644 index 000000000..21744b764 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/operational_discontinuity.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module OperationalDiscontinuity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("operational-discontinuity", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/oversized_predicate.rb b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb new file mode 100644 index 000000000..d2d148597 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/oversized_predicate.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module OversizedPredicate + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("oversized-predicate", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/path_condition.rb b/gems/decomplex/lib/decomplex/native/path_condition.rb new file mode 100644 index 000000000..aa0090f7c --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/path_condition.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module PathCondition + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + payload = JSON.parse(Command.run("path-condition", "--language", language, *Command.jobs_args(jobs), *paths)) + { "neglected" => payload.fetch("neglected", []) } + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/predicate_aliases.rb b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb new file mode 100644 index 000000000..f8472f200 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/predicate_aliases.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module PredicateAliases + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("predicate-aliases", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb new file mode 100644 index 000000000..5c1991f02 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/redundant_nil_guard.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module RedundantNilGuard + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("redundant-nil-guard", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/report_facts.rb b/gems/decomplex/lib/decomplex/native/report_facts.rb new file mode 100644 index 000000000..332a77f10 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/report_facts.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module ReportFacts + module_function + + def collect(files, jobs: nil) + paths = Array(files).map(&:to_s) + JSON.parse(Command.run("facts", *Command.jobs_args(jobs), *paths)) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/semantic_aliases.rb b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb new file mode 100644 index 000000000..6f72a1324 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/semantic_aliases.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module SemanticAliases + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("semantic-aliases", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/sequence_mine.rb b/gems/decomplex/lib/decomplex/native/sequence_mine.rb new file mode 100644 index 000000000..722c58fe9 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/sequence_mine.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module SequenceMine + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("sequence-mine", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/state_branch_density.rb b/gems/decomplex/lib/decomplex/native/state_branch_density.rb new file mode 100644 index 000000000..4a310687b --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/state_branch_density.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module StateBranchDensity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("state-branch-density", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/state_mesh.rb b/gems/decomplex/lib/decomplex/native/state_mesh.rb new file mode 100644 index 000000000..3f3374e20 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/state_mesh.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module StateMesh + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("state-mesh", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/state_writes.rb b/gems/decomplex/lib/decomplex/native/state_writes.rb new file mode 100644 index 000000000..b2761f1cb --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/state_writes.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +require "json" +require_relative "../co_update" +require_relative "command" + +module Decomplex + module Native + # Bridge from the Ruby detector layer to the native Decomplex state-write + # fact extractor. The full co-update detector now runs in native Rust too; + # this module remains for focused fact debugging. + module StateWrites + module_function + + def extract(files) + paths = Array(files).map(&:to_s) + payload = run_native(paths) + JSON.parse(payload).map do |row| + CoUpdate::Write.new( + attr: row.fetch("field"), + recv: row.fetch("receiver"), + file: row.fetch("file"), + defn: row.fetch("function"), + line: row.fetch("line"), + span: row.fetch("span"), + ) + end + end + + private_class_method def self.run_native(paths) + language = Command.language_for(paths.first) + Command.run("state-writes", "--language", language, *paths) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/structural_topology.rb b/gems/decomplex/lib/decomplex/native/structural_topology.rb new file mode 100644 index 000000000..f131d89d6 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/structural_topology.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module StructuralTopology + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("structural-topology", "--language", language, *Command.jobs_args(jobs), *paths)) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb new file mode 100644 index 000000000..21419c067 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/temporal_ordering_pressure.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module TemporalOrderingPressure + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("temporal-ordering-pressure", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb new file mode 100644 index 000000000..7abd769b4 --- /dev/null +++ b/gems/decomplex/lib/decomplex/native/weighted_inlined_complexity.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "json" +require_relative "command" + +module Decomplex + module Native + module WeightedInlinedComplexity + module_function + + def scan(files, jobs: nil) + paths = Array(files).map(&:to_s) + language = Command.language_for(paths.first) + JSON.parse(Command.run("weighted-inlined-complexity", "--language", language, *Command.jobs_args(jobs), *paths)) + end + + end + end +end diff --git a/gems/decomplex/lib/decomplex/operational_discontinuity.rb b/gems/decomplex/lib/decomplex/operational_discontinuity.rb index d1500353f..4e487ddc2 100644 --- a/gems/decomplex/lib/decomplex/operational_discontinuity.rb +++ b/gems/decomplex/lib/decomplex/operational_discontinuity.rb @@ -12,7 +12,7 @@ class OperationalDiscontinuity DEFAULT_MAX_CONTINUING = 1 DEFAULT_MIN_SCORE = 12 DEFAULT_HIGH_CONFIDENCE_MIN_SCORE = 20 - PHASE_COMMENT_PATTERN = /\A#\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)/i + PHASE_COMMENT_PATTERN = %r{\A(?:#|//|--)\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)}i GRAMMAR_METHOD_PATTERN = /\Aparse(?:_|$)/ RangeInfo = Struct.new(:first, :last, keyword_init: true) diff --git a/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb b/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb index f61c16bf8..cb9c81bd8 100644 --- a/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb +++ b/gems/decomplex/lib/decomplex/ordered_protocol_mine.rb @@ -1,213 +1,54 @@ # frozen_string_literal: true require "set" -require_relative "ast" +require_relative "syntax" module Decomplex # ImplicitControlFlow finds internal call order where order is state-dependent, # e.g. `prepare; validate` when `prepare` writes state that `validate` reads. # Generic call-order repetition is intentionally ignored. class ImplicitControlFlow - MethodEffect = Struct.new(:file, :owner, :name, :line, :reads, :writes, keyword_init: true) + MethodEffect = Syntax::ProtocolMethodEffect Call = Struct.new(:mid, :file, :owner, :defn, :line, :span, :reads, :writes, keyword_init: true) MethodSequence = Struct.new(:file, :owner, :defn, :line, :calls, keyword_init: true) - Path = Struct.new(:calls, :terminal, keyword_init: true) - PATH_LIMIT = 64 - - DECLARATIVE_MIDS = %w[ - abstract! alias_method any attr_accessor attr_reader attr_writer bind - cast checked enum extend final include interface! let must must_because - nilable override overridable params prepend private private_class_method - protected public require require_relative requires_ancestor sealed! sig - type_member type_template untyped unsafe void - ].freeze - TEST_DSL_MIDS = %w[ - a_kind_of after around before be be_a be_an be_empty be_falsey be_nil - be_truthy change contain_exactly context describe eq eql equal expect - have_attributes have_key have_received it match not_to raise_error - receive subject to - ].freeze + + DECLARATIVE_MIDS = Syntax::RUBY_PROTOCOL_DECLARATIVE_MIDS + TEST_DSL_MIDS = Syntax::RUBY_PROTOCOL_TEST_DSL_MIDS IGNORED_MIDS = (DECLARATIVE_MIDS + TEST_DSL_MIDS).freeze - OPTIONAL_DIAGNOSTIC_MIDS = %w[ - error! fixable! read_interpolated_string warn! - ].freeze - MUTATING_MIDS = %w[ - << []= add append clear collect! compact! concat declare delete delete_if - each_key= fill filter! keep_if mark merge! move push reject! replace - resolve shift stamp store unshift update write - ].freeze - NON_MUTATING_OPERATOR_MIDS = %w[! != !~].freeze - MUTATING_SUFFIXES = %w[!].freeze + OPTIONAL_DIAGNOSTIC_MIDS = Syntax::RUBY_PROTOCOL_OPTIONAL_DIAGNOSTIC_MIDS + MUTATING_MIDS = Syntax::RUBY_PROTOCOL_MUTATING_MIDS + NON_MUTATING_OPERATOR_MIDS = Syntax::RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS + MUTATING_SUFFIXES = Syntax::RUBY_PROTOCOL_MUTATING_SUFFIXES def self.scan(files) - parsed = files.each_with_object({}) do |file, out| - out[file] = Ast.parse(file) - end - effect_index = EffectIndex.build(parsed) - sequences = [] - parsed.each do |file, (root, lines)| - miner = new(file, lines, effect_index) - miner.walk(root, []) - sequences.concat(miner.sequences) - end + documents = files.map { |file| Syntax.parse(file, parser: "tree_sitter") } + effect_index = EffectIndex.new(documents.flat_map(&:protocol_method_effects)) + sequences = documents.flat_map { |document| sequences_for_document(document, effect_index) } Report.new(sequences) end - attr_reader :sequences - - def initialize(file, lines, effect_index) - @file = file - @lines = lines - @effect_index = effect_index - @sequences = [] - end - - def walk(node, owners) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - owners = owners + [owner_name(node)] - when :DEFN, :DEFS - record_method_paths(node, owners.join("::")) - return - end - - node.children.each { |child| walk(child, owners) } - end - - private - - def record_method_paths(node, owner) - defn = method_name(node) - method_paths(node).each do |path| - calls = path.calls.map { |call| call_for(call, owner, defn) } + def self.sequences_for_document(document, effect_index) + document.protocol_call_paths.filter_map do |path| + calls = path.calls.map { |call| call_for_path(call, path, effect_index) } next if calls.count { |call| stateful_call?(call) } < 2 - @sequences << MethodSequence.new( - file: @file, - owner: owner, - defn: defn, - line: node.first_lineno, + MethodSequence.new( + file: path.file, + owner: path.owner, + defn: path.name, + line: path.line, calls: calls ) end end - def method_paths(node) - paths_for_statements(Ast.body_stmts(node)) - end - - def paths_for_statements(statements) - statements.compact.each_with_object([empty_path]) do |statement, paths| - next if Ast.node?(statement) && statement.type == :BEGIN - - statement_paths = paths_for(statement) - paths.replace(append_statement_paths(paths, statement_paths)) - end - end - - def append_statement_paths(paths, statement_paths) - combine_path_lists(paths, statement_paths) - end - - def combine_path_lists(left_paths, right_paths) - combined = left_paths.flat_map do |path| - if path.terminal - [path] - else - right_paths.map do |right_path| - Path.new(calls: path.calls + right_path.calls, terminal: right_path.terminal) - end - end - end - combined.first(PATH_LIMIT) - end - - def paths_for(node) - return [empty_path] unless Ast.node?(node) - - case node.type - when :BLOCK - paths_for_statements(node.children) - when :SCOPE - paths_for(scope_body(node)) - when :IF, :UNLESS - branch_paths(node) - when :CASE, :CASE2 - case_paths(node) - when :RETURN, :BREAK, :NEXT, :REDO, :RETRY - generic_paths(node).map { |path| Path.new(calls: path.calls, terminal: true) } - else - generic_paths(node) - end - end - - def branch_paths(node) - condition = node.children[0] - positive = node.children[1] - negative = node.children[2] - alternatives = paths_for(positive) + (negative ? paths_for(negative) : [empty_path]) - combine_path_lists(paths_for(condition), alternatives) - end - - def case_paths(node) - condition, first_when = case_parts(node) - combine_path_lists(paths_for(condition), when_paths(first_when)) - end - - def case_parts(node) - return [nil, node.children[0]] if node.type == :CASE2 - - [node.children[0], node.children[1]] - end - - def when_paths(node) - return [empty_path] unless Ast.node?(node) - - return paths_for(node) unless node.type == :WHEN - - patterns = node.children[0] - body = node.children[1] - next_node = node.children[2] - current_branch = combine_path_lists(paths_for(patterns), paths_for(body)) - (current_branch + when_paths(next_node)).first(PATH_LIMIT) - end - - def generic_paths(node) - return [empty_path] unless Ast.node?(node) - return [empty_path] if %i[CLASS MODULE DEFN DEFS LAMBDA].include?(node.type) - - child_paths = node.children.each_with_object([empty_path]) do |child, paths| - paths.replace(combine_path_lists(paths, paths_for(child))) - end - - internal_mid = internal_protocol_call(node) - return child_paths unless internal_mid - - combine_path_lists([Path.new(calls: [raw_call(internal_mid, node)], terminal: false)], child_paths) - end - - def raw_call(mid, node) - Call.new( - mid: mid, - file: @file, - owner: nil, - defn: nil, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - reads: [], - writes: [] - ) - end - - def call_for(call, owner, defn) - effect = @effect_index.effect_for(owner, call.mid) + def self.call_for_path(call, path, effect_index) + effect = effect_index.effect_for(path.owner, call.mid) Call.new( mid: call.mid, - file: call.file, - owner: owner, - defn: defn, + file: path.file, + owner: path.owner, + defn: path.name, line: call.line, span: call.span, reads: effect ? effect.reads : [], @@ -215,58 +56,11 @@ def call_for(call, owner, defn) ) end - def stateful_call?(call) + def self.stateful_call?(call) !(call.reads + call.writes).empty? end - def empty_path - Path.new(calls: [], terminal: false) - end - - def scope_body(node) - node.children[2] - end - - def owner_name(node) - Ast.slice(node.children[0], @lines).to_s.empty? ? "(anonymous)" : Ast.slice(node.children[0], @lines) - end - - def method_name(node) - node.children[node.type == :DEFS ? 1 : 0].to_s - end - - def internal_protocol_call(node) - mid = call_mid(node) - return nil unless mid - return nil if IGNORED_MIDS.include?(mid) - return nil unless internal_receiver?(node) - - mid - end - - def call_mid(node) - case node.type - when :CALL, :OPCALL, :ATTRASGN then node.children[1].to_s - when :FCALL, :VCALL then node.children[0].to_s - end - end - - def internal_receiver?(node) - return true if %i[FCALL VCALL].include?(node.type) - - receiver = node.children[0] - Ast.node?(receiver) && receiver.type == :SELF - end - class EffectIndex - def self.build(parsed) - effects = [] - parsed.each do |file, (root, lines)| - effects.concat(EffectCollector.new(file, lines).scan(root)) - end - new(effects) - end - def initialize(effects) @by_owner_name = effects.to_h { |effect| [[effect.owner, effect.name], effect] } @by_name = effects.group_by(&:name) @@ -289,182 +83,6 @@ def effect_stateful?(effect) end end - class EffectCollector - def initialize(file, lines) - @file = file - @lines = lines - end - - def scan(root) - out = [] - walk(root, [], out) - out - end - - private - - def walk(node, owners, out) - return unless Ast.node?(node) - - case node.type - when :CLASS, :MODULE - owners = owners + [owner_name(node)] - when :DEFN, :DEFS - out << method_effect(node, owners.join("::")) - return - end - - node.children.each { |child| walk(child, owners, out) } - end - - def method_effect(node, owner) - reads = Set.new - writes = Set.new - collect_state_access(node, reads, writes) - MethodEffect.new( - file: @file, - owner: owner, - name: method_name(node), - line: node.first_lineno, - reads: reads.to_a.sort, - writes: writes.to_a.sort - ) - end - - def collect_state_access(node, reads, writes) - return unless Ast.node?(node) - return if %i[CLASS MODULE DEFN DEFS LAMBDA].include?(node.type) && !%i[DEFN DEFS].include?(node.type) - - case node.type - when :IASGN - writes << normalize_state(node.children[0].to_s) - when :LASGN - collect_index_write(node, writes) - when :IVAR - reads << normalize_state(node.children[0].to_s) - when :ATTRASGN - collect_attr_write(node, writes) - when :CALL, :OPCALL - collect_bare_reader_comparison(node, reads) - collect_receiver_mutation(node, writes) - collect_self_reader(node, reads) - when :VCALL, :FCALL - collect_self_reader(node, reads) - end - - node.children.each { |child| collect_state_access(child, reads, writes) } - end - - def collect_attr_write(node, writes) - receiver, mid = node.children - attr = mid.to_s.sub(/=$/, "") - if mid == :[]= - writes << state_receiver_token(receiver) if state_receiver_token(receiver) - elsif self_receiver?(receiver) - writes << normalize_state(attr) - elsif (receiver_token = state_receiver_token(receiver)) - writes << "#{receiver_token}.#{attr}" - end - end - - def collect_index_write(node, writes) - name = node.children[0].to_s - return unless name.match?(/\A@?[A-Za-z_]\w*\[/) - - writes << normalize_state(name.sub(/\[.*\]\z/, "")) - end - - def collect_bare_reader_comparison(node, reads) - receiver, mid = node.children - return unless %w[== != === < <= > >=].include?(mid.to_s) - return unless Ast.node?(receiver) && receiver.type == :LVAR - - reads << normalize_state(receiver.children[0].to_s) - end - - def collect_receiver_mutation(node, writes) - receiver, mid = node.children - return unless mutating_mid?(mid.to_s) - - token = state_receiver_token(receiver) - writes << token if token - end - - def collect_self_reader(node, reads) - mid = call_mid(node) - return unless mid - return if mutating_mid?(mid) - return if IGNORED_MIDS.include?(mid) - return unless no_args?(node) - return if node.type == :CALL && !self_receiver?(node.children[0]) - - reads << normalize_state(mid) - end - - def mutating_mid?(mid) - return false if NON_MUTATING_OPERATOR_MIDS.include?(mid) - - MUTATING_MIDS.include?(mid) || MUTATING_SUFFIXES.any? { |suffix| mid.end_with?(suffix) } - end - - def no_args?(node) - case node.type - when :CALL, :OPCALL - node.children[2].nil? - when :VCALL - true - when :FCALL - node.children[1].nil? - else - false - end - end - - def state_receiver_token(node) - return nil unless Ast.node?(node) - - case node.type - when :IVAR - normalize_state(node.children[0].to_s) - when :SELF - "self" - when :VCALL, :FCALL - normalize_state(node.children[0].to_s) - when :LVAR - normalize_state(node.children[0].to_s) - when :CALL - return nil unless no_args?(node) - - normalize_state(node.children[1].to_s) - else - nil - end - end - - def self_receiver?(node) - Ast.node?(node) && node.type == :SELF - end - - def call_mid(node) - case node.type - when :CALL, :OPCALL, :ATTRASGN then node.children[1].to_s - when :FCALL, :VCALL then node.children[0].to_s - end - end - - def owner_name(node) - Ast.slice(node.children[0], @lines).to_s.empty? ? "(anonymous)" : Ast.slice(node.children[0], @lines) - end - - def method_name(node) - node.children[node.type == :DEFS ? 1 : 0].to_s - end - - def normalize_state(name) - name.to_s.sub(/\A@/, "").sub(/=\z/, "") - end - end - class Report def initialize(sequences) @sequences = sequences diff --git a/gems/decomplex/lib/decomplex/oversized_predicate.rb b/gems/decomplex/lib/decomplex/oversized_predicate.rb index 366f3cafa..358aeb0b4 100644 --- a/gems/decomplex/lib/decomplex/oversized_predicate.rb +++ b/gems/decomplex/lib/decomplex/oversized_predicate.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Flags boolean predicates with too many independent condition atoms. @@ -11,14 +11,13 @@ module Decomplex # AST preserves the same AND/OR tree either way. class OversizedPredicate LIMIT = 3 - PREDICATE_NODES = %i[IF WHILE UNTIL].freeze def self.scan(files, limit: LIMIT) findings = [] files.each do |file| - root, lines = Ast.parse(file) - new(file, lines, limit).tap do |scanner| - scanner.walk(root, []) + document = Syntax.parse(file, parser: "tree_sitter") + new(file, limit).tap do |scanner| + scanner.collect(document) findings.concat(scanner.findings) end end @@ -29,55 +28,40 @@ def self.scan(files, limit: LIMIT) attr_reader :findings - def initialize(file, lines, limit) + def initialize(file, limit) @file = file - @lines = lines @limit = limit @findings = [] end - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - record_predicate(node, defstack) - node.children.each { |child| walk(child, defstack) } + def collect(document) + document.decision_sites.each { |site| record_predicate(site) } end private - def record_predicate(node, defstack) - return unless PREDICATE_NODES.include?(node.type) - return if predicate_helper?(defstack.last) - - cond = node.children[0] - return unless Ast.node?(cond) + def record_predicate(site) + return if predicate_helper?(site.function) - atoms = condition_atoms(cond) + atoms = condition_atoms(site.predicate) return unless atoms.size > @limit - defn = defstack.last || "" - at = "#{@file}:#{defn}:#{node.first_lineno}" + defn = site.function || "" + at = "#{@file}:#{defn}:#{site.line}" @findings << { at: at, count: atoms.size, - predicate: Ast.slice(cond, @lines), - atoms: atoms.map { |atom| Ast.slice(atom, @lines) }, - spans: { at => [node.first_lineno, node.first_column, node.last_lineno, node.last_column] }, + predicate: site.predicate, + atoms: atoms, + spans: { at => site.enclosing_span || site.span }, } end - def condition_atoms(node) - return [] unless Ast.node?(node) - - case node.type - when :AND, :OR - node.children.flat_map { |child| condition_atoms(child) } - when :NOT - condition_atoms(node.children[0]) - else - [node] - end + def condition_atoms(predicate) + predicate.to_s + .split(/\s*(?:&&|\|\||\band\b|\bor\b)\s*/) + .map { |atom| atom.gsub(/[()]/, "").strip } + .reject(&:empty?) end def predicate_helper?(name) diff --git a/gems/decomplex/lib/decomplex/path_condition.rb b/gems/decomplex/lib/decomplex/path_condition.rb index 24aee0223..300c37ff6 100644 --- a/gems/decomplex/lib/decomplex/path_condition.rb +++ b/gems/decomplex/lib/decomplex/path_condition.rb @@ -1,90 +1,31 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex - # Path-condition normal form. `if x; if y; act` and `if x && y; act` - # and `act if x && y` all reduce to the same guarded action with - # path condition {x, y}. Mining the PATH CONDITION (not the syntactic - # if) is what makes nested control flow and flat conjunction the same - # decision -- the user's "is `if x / if y` related to `if x && y`?". - # - # A site = an effectful leaf (call / assignment) reached under >= 2 - # guard atoms. Scatter = the same guard set reached in >= 2 (file, - # def) units. Neglected = a guarded action that is a high-support - # guard set minus exactly one atom. + # Path-condition normal form. `if x; if y; act` and + # `act if x && y` reduce to the same guarded action with path + # condition {x, y}. class PathCondition Site = Struct.new(:guards, :action, :file, :defn, :line, :span, keyword_init: true) def self.scan(files) - sites = [] - files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, [], []) - sites.concat(e.sites) + sites = files.flat_map do |file| + Syntax.parse(file, parser: "tree_sitter").path_condition_sites.map do |site| + Site.new( + guards: site.guards, + action: site.action, + file: site.file, + defn: site.function, + line: site.line, + span: site.span + ) + end end Report.new(sites) end - attr_reader :sites - - def initialize(file, lines) - @file = file - @lines = lines - @sites = [] - end - - # guards: array of [text, negated?] atoms currently in scope. - def walk(node, defstack, guards) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - - case node.type - when :IF, :UNLESS - cond, a, b = node.children - atoms = cond_atoms(cond) - then_g = node.type == :IF ? atoms : negate(atoms) - else_g = node.type == :IF ? negate(atoms) : atoms - walk(a, defstack, guards + then_g) if a - walk(b, defstack, guards + else_g) if b - # the condition itself may contain nested constructs - walk(cond, defstack, guards) - return - when :CALL, :FCALL, :VCALL, :ATTRASGN, :LASGN, :IASGN, :OPCALL - record(node, defstack, guards) if guards.size >= 2 - end - - node.children.each { |c| walk(c, defstack, guards) } - end - - private - - def cond_atoms(cond) - Ast.flatten_and(cond).map do |a| - t = Ast.slice(a, @lines) - text, neg = Ast.canon_polarity(t) - [text, neg] - end - end - - def negate(atoms) - atoms.map { |t, n| [t, !n] } - end - - def record(node, defstack, guards) - members = guards.map { |t, n| (n ? "!" : "") + t }.uniq.sort - return if members.size < 2 - - @sites << Site.new(guards: members, action: Ast.slice(node, @lines)[0, 80], - file: @file, defn: defstack.last || "(top-level)", - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - class Report def initialize(sites) @sites = sites diff --git a/gems/decomplex/lib/decomplex/predicate_alias.rb b/gems/decomplex/lib/decomplex/predicate_alias.rb index 9832d35c6..092768ca5 100644 --- a/gems/decomplex/lib/decomplex/predicate_alias.rb +++ b/gems/decomplex/lib/decomplex/predicate_alias.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require_relative "ast" require_relative "syntax" module Decomplex @@ -22,49 +21,20 @@ class PredicateAlias def self.scan(files) preds = [] files.each do |f| - root, lines = Ast.parse(f) - new(f, lines).tap { |p| p.walk(root) }.preds.each { |p| preds << p } + Syntax.parse(f, parser: "tree_sitter").predicate_defs.each do |predicate| + preds << Pred.new( + name: predicate.name, + body: predicate.body, + file: predicate.file, + defn: predicate.name, + line: predicate.line, + span: predicate.span + ) + end end Report.new(preds) end - attr_reader :preds - - def initialize(file, lines) - @file = file - @lines = lines - @preds = [] - end - - def walk(node) - return unless Ast.node?(node) - - record_def(node) if node.type == :DEFN - node.children.each { |c| walk(c) } - end - - private - - # Single-expression boolean-ish method: `def x?(...) end`. - # The scope node's body is one statement (not a BLOCK of many). - def record_def(node) - name = node.children[0].to_s - scope = node.children[1] - return unless Ast.node?(scope) && scope.type == :SCOPE - - body = scope.children[2] - return unless Ast.node?(body) - return if body.type == :BLOCK # multi-statement => not a pure predicate - - txt = Ast.slice(body, @lines) - return if txt.empty? || txt.length > 200 - - @preds << Pred.new(name: name, body: txt, file: @file, - defn: name, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - class Report def initialize(preds) @preds = preds diff --git a/gems/decomplex/lib/decomplex/redundant_nil_guard.rb b/gems/decomplex/lib/decomplex/redundant_nil_guard.rb index 98ff88a42..f0214cd9a 100644 --- a/gems/decomplex/lib/decomplex/redundant_nil_guard.rb +++ b/gems/decomplex/lib/decomplex/redundant_nil_guard.rb @@ -1,289 +1,26 @@ # frozen_string_literal: true -require "set" -require_relative "ast" +require_relative "syntax" module Decomplex - # Redundant nil-guard detector. Finds nil checks or safe-navigation - # performed after the same stable subject is already proven non-nil - # on the current intra-method path. - # - # Conservative by design: local variables and no-arg accessor-style - # subjects only, no loop reasoning, no interprocedural facts. Local - # reassignment invalidates the proof. + # Finds nil checks or safe-navigation performed after the same stable subject + # is already proven non-nil on the current intra-method path. class RedundantNilGuard - Finding = Struct.new(:file, :defn, :line, :span, :local, :guard, - :proof, keyword_init: true) do - def to_h - loc = "#{file}:#{defn}:#{line}" - super.merge(at: loc, spans: { loc => span }) - end - end - Flow = Struct.new(:known, :terminated, keyword_init: true) - NilFact = Struct.new(:local, :non_nil_when_true, keyword_init: true) - - TERMINATING_CALLS = %i[raise fail abort exit exit!].freeze - def self.scan(files) - files.flat_map do |file| - root, lines = Ast.parse(file) - new(file, lines).tap { |scanner| scanner.walk(root, []) }.findings - end.sort_by { |f| [f.file, f.line, f.local, f.guard] }.map(&:to_h) - end - - attr_reader :findings - - def initialize(file, lines) - @file = file - @lines = lines - @findings = [] - end - - def walk(node, defstack) - return unless Ast.node?(node) - - if %i[DEFN DEFS].include?(node.type) - name = node.children[node.type == :DEFS ? 1 : 0].to_s - process_block(Ast.body_stmts(node), defstack + [name], Set.new) - return - end - - node.children.each { |child| walk(child, defstack) } - end - - private - - def process_block(stmts, defstack, known) - current = known.dup - stmts.each do |stmt| - flow = process_stmt(stmt, defstack, current) - current = flow.known - return flow if flow.terminated + findings = files.flat_map do |file| + Syntax.parse(file, parser: "tree_sitter").redundant_nil_guard_findings end - Flow.new(known: current, terminated: false) + dedupe(findings) + .sort_by { |finding| [finding.file, finding.line, finding.local, finding.guard] } + .map(&:to_h) end - def process_stmt(node, defstack, known) - return Flow.new(known: known.dup, terminated: false) unless Ast.node?(node) - - case node.type - when :IF, :UNLESS - process_branch(node, defstack, known) - when :LASGN - inspect_node(node.children[1], defstack, known) - next_known = known.dup - next_known.delete(node.children[0].to_s) - Flow.new(known: next_known, terminated: false) - else - inspect_node(node, defstack, known) - Flow.new(known: known.dup, terminated: terminating?(node)) + def self.dedupe(findings) + findings.group_by do |finding| + [finding.file, finding.defn, finding.line, finding.local, finding.guard.to_s.delete_suffix("()")] + end.values.map do |group| + group.max_by { |finding| finding.guard.to_s.length } end end - - def process_branch(node, defstack, known) - cond, then_body, else_body = node.children - inspect_node(cond, defstack, known) - - then_known = known_for_branch(node.type, true, cond, known) - else_known = known_for_branch(node.type, false, cond, known) - then_flow = process_block(stmts_for(then_body), defstack, then_known) - else_flow = process_block(stmts_for(else_body), defstack, else_known) - - if then_flow.terminated && else_flow.terminated - Flow.new(known: Set.new, terminated: true) - elsif then_flow.terminated - Flow.new(known: else_flow.known, terminated: false) - elsif else_flow.terminated - Flow.new(known: then_flow.known, terminated: false) - else - Flow.new(known: then_flow.known & else_flow.known, terminated: false) - end - end - - def known_for_branch(node_type, body_branch, cond, known) - next_known = known.dup - cond_true_branch = - if node_type == :IF - body_branch - else - !body_branch - end - branch_nil_facts(cond, cond_true_branch).each { |fact| next_known.add(fact.local) } - next_known - end - - def inspect_node(node, defstack, known) - return unless Ast.node?(node) - - recorded = record_redundant(node, defstack, known) - return if %i[DEFN DEFS].include?(node.type) - return if recorded && node.type == :OPCALL - - node.children.each { |child| inspect_node(child, defstack, known) } - end - - def record_redundant(node, defstack, known) - local = redundant_nil_subject(node, known) - return false unless local - - @findings << Finding.new( - file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, - span: span(node), - local: local, - guard: Ast.slice(node, @lines), - proof: "#{local} is already proven non-nil on this path" - ) - true - end - - def redundant_nil_subject(node, known) - return qcall_subject(node, known) if node.type == :QCALL - - fact = nil_fact(node) - return nil unless fact && known.include?(fact.local) - - fact.local - end - - def nil_fact(node) - return nil unless Ast.node?(node) - - case node.type - when :CALL - recv, mid, args = node.children - return nil unless mid == :nil? && args.nil? - - subject = subject_key(recv) - subject ? NilFact.new(local: subject, non_nil_when_true: false) : nil - when :OPCALL - recv, mid, args = node.children - return negated_nil_fact(recv) if mid == :! - return comparison_nil_fact(recv, mid, args) if %i[== !=].include?(mid) - - nil - else - nil - end - end - - def branch_nil_facts(node, cond_truth) - return [] unless Ast.node?(node) - - if node.type == :AND - return [] unless cond_truth - - return Ast.flatten_and(node).flat_map { |child| branch_nil_facts(child, true) } - end - - if node.type == :OPCALL && node.children[1] == :! - return branch_nil_facts(node.children[0], !cond_truth) - end - - safe_receiver = safe_nav_receiver_fact(node) - return [safe_receiver] if safe_receiver && cond_truth - - fact = nil_fact(node) - return [fact] if fact && cond_truth == fact.non_nil_when_true - - truthy = truthy_subject_fact(node) - truthy && cond_truth ? [truthy] : [] - end - - def safe_nav_receiver_fact(node) - return nil unless Ast.node?(node) && node.type == :QCALL - - subject = subject_key(node.children[0]) - subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil - end - - def truthy_subject_fact(node) - subject = subject_key(node) - return nil unless subject - - NilFact.new(local: subject, non_nil_when_true: true) - end - - def negated_nil_fact(node) - fact = nil_fact(node) - return nil unless fact - - NilFact.new(local: fact.local, - non_nil_when_true: !fact.non_nil_when_true) - end - - def comparison_nil_fact(recv, mid, args) - subject = subject_key(recv) - return nil unless subject && nil_arg?(args) - - NilFact.new(local: subject, non_nil_when_true: mid == :!=) - end - - def qcall_subject(node, known) - recv = node.children[0] - subject = subject_key(recv) - subject if subject && known.include?(subject) - end - - def subject_key(node) - return nil unless Ast.node?(node) - - case node.type - when :LVAR, :DVAR - node.children[0].to_s - when :VCALL - node.children[0].to_s - when :CALL - recv, mid, args = node.children - return nil unless args.nil? && stable_reader_name?(mid) - return "self.#{mid}" if recv&.type == :SELF - - recv_key = subject_key(recv) - recv_key ? "#{recv_key}.#{mid}" : nil - else - nil - end - end - - def stable_reader_name?(mid) - name = mid.to_s - !(name.end_with?("=", "!") || name == "[]") - end - - def local_name(node) - return nil unless Ast.node?(node) && %i[LVAR DVAR].include?(node.type) - - node.children[0].to_s - end - - def nil_arg?(args) - return false unless Ast.node?(args) - - args.children.any? { |child| Ast.node?(child) && child.type == :NIL } - end - - def stmts_for(node) - return [] unless Ast.node?(node) - - node.type == :BLOCK ? node.children.compact : [node] - end - - def terminating?(node) - return false unless Ast.node?(node) - return true if %i[RETURN NEXT BREAK].include?(node.type) - return false unless %i[FCALL VCALL CALL].include?(node.type) - - mid = if node.type == :CALL - node.children[1] - else - node.children[0] - end - TERMINATING_CALLS.include?(mid) - end - - def span(node) - [node.first_lineno, node.first_column, node.last_lineno, node.last_column] - end end end diff --git a/gems/decomplex/lib/decomplex/report.rb b/gems/decomplex/lib/decomplex/report.rb index 6d10b6d86..a842f6b93 100644 --- a/gems/decomplex/lib/decomplex/report.rb +++ b/gems/decomplex/lib/decomplex/report.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "../decomplex" +require_relative "report_facts" module Decomplex # Aggregates every detector over a file set and renders a single @@ -8,134 +9,68 @@ module Decomplex # prioritisation, per-detector sections, run summary). Every number # is a ranked CANDIDATE count, never a verdict. class Report - def initialize(files) + def initialize(files, facts: nil) @files = files - run + facts ? apply_facts(facts) : run + end + + def self.from_facts(facts) + normalized = ReportFacts.normalize(facts) + new(normalized.fetch(:files), facts: normalized) end def run - m = Miner.scan(@files) - @miss = m.missing_abstractions - @negc = m.neglected_conditions - cu = CoUpdate.scan(@files) - @negu = cu.neglected_updates - @copair = cu.co_written_pairs - pa = PredicateAlias.scan(@files) - @palias = pa.alias_clusters - sa = SemanticAlias.scan(@files) - @salias = sa.alias_clusters - @reif = sa.reification_misses - pc = PathCondition.scan(@files) - @pcneg = pc.neglected - @pcsc = pc.scattered - sm = SequenceMine.scan(@files) - @broken = sm.broken_protocol - icf = ImplicitControlFlow.scan(@files) - @implicit_control_flow = icf.ordered_protocols( - min_support: Integer(ENV.fetch("DECOMPLEX_ICF_MIN_SUPPORT", "1")) - ) - @derived = DerivedState.scan(@files) - @rename_clones = InconsistentRenameClone.scan(@files) - @similarity = FlaySimilarity.scan( - @files, - mass: Integer(ENV.fetch("DECOMPLEX_SIMILARITY_MASS", - ENV.fetch("DECOMPLEX_FLAY_MASS", FlaySimilarity::DEFAULT_MASS))), - fuzzy: Integer(ENV.fetch("DECOMPLEX_SIMILARITY_FUZZY", - ENV.fetch("DECOMPLEX_FLAY_FUZZY", FlaySimilarity::DEFAULT_FUZZY))) - ) - @pressure = DecisionPressure.scan(@files).ranked - @redundant_nil = RedundantNilGuard.scan(@files) - @fsimple = FalseSimplicity.scan(@files).findings - @oversized_predicates = OversizedPredicate.scan(@files).findings - @fatu = FatUnion.scan(@files).fat_unions - state_mesh = StateMesh.scan(@files, min_writes: 1) - state_mesh.run - @state_heat = state_mesh.findings - @state_branch = StateBranchDensity.scan(@files).findings - @temporal_ordering = TemporalOrderingPressure.scan(@files) - @weighted_inlined_complexity = WeightedInlinedCognitiveComplexity.scan( - @files, - min_score: Float(ENV.fetch( - "DECOMPLEX_WICC_MIN_SCORE", - WeightedInlinedCognitiveComplexity::DEFAULT_MIN_SCORE - )), - min_hidden: Float(ENV.fetch( - "DECOMPLEX_WICC_MIN_HIDDEN", - WeightedInlinedCognitiveComplexity::DEFAULT_MIN_HIDDEN - )), - max_depth: Integer(ENV.fetch( - "DECOMPLEX_WICC_MAX_DEPTH", - WeightedInlinedCognitiveComplexity::DEFAULT_MAX_DEPTH - )) - ) - @locality_drag = LocalityDrag.scan( - @files, - min_unrelated_statements: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_UNRELATED_STATEMENTS", - LocalityDrag::DEFAULT_MIN_UNRELATED_STATEMENTS - )), - min_gap_lines: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_GAP_LINES", - LocalityDrag::DEFAULT_MIN_GAP_LINES - )), - min_local_complexity: Float(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_LOCAL_COMPLEXITY", - LocalityDrag::DEFAULT_MIN_LOCAL_COMPLEXITY - )), - min_score: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MIN_SCORE", - LocalityDrag::DEFAULT_MIN_SCORE - )), - max_findings_per_method: Integer(ENV.fetch( - "DECOMPLEX_LOCALITY_DRAG_MAX_FINDINGS_PER_METHOD", - LocalityDrag::DEFAULT_MAX_FINDINGS_PER_METHOD - )) - ) - @function_lcom = FunctionLCOM.scan( - @files, - min_components: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_COMPONENTS", - FunctionLCOM::DEFAULT_MIN_COMPONENTS - )), - min_locals: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_LOCALS", - FunctionLCOM::DEFAULT_MIN_LOCALS - )), - min_statements: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_STATEMENTS", - FunctionLCOM::DEFAULT_MIN_STATEMENTS - )), - min_score: Integer(ENV.fetch( - "DECOMPLEX_FUNCTION_LCOM_MIN_SCORE", - FunctionLCOM::DEFAULT_MIN_SCORE - )) - ) - operational_discontinuity = OperationalDiscontinuity.scan( - @files, - min_dead: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_DEAD", - OperationalDiscontinuity::DEFAULT_MIN_DEAD - )), - min_new: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_NEW", - OperationalDiscontinuity::DEFAULT_MIN_NEW - )), - max_continuing: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MAX_CONTINUING", - OperationalDiscontinuity::DEFAULT_MAX_CONTINUING - )), - min_score: Integer(ENV.fetch( - "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_SCORE", - OperationalDiscontinuity::DEFAULT_MIN_SCORE - )) - ) + apply_facts(ReportFacts.from_files(@files, engine: "ruby")) + end + + def apply_facts(facts) + normalized = ReportFacts.normalize(facts) + @files = normalized.fetch(:files) + detectors = normalized.fetch(:detectors) + + miner = detectors.fetch(:miner) + @miss = miner.fetch(:missing_abstractions, []) + @negc = miner.fetch(:neglected_conditions, []) + + co_update = detectors.fetch(:co_update) + @negu = co_update.fetch(:neglected_updates, []) + @copair = co_update.fetch(:co_written_pairs, []) + + @palias = detectors.fetch(:predicate_alias).fetch(:alias_clusters, []) + + semantic_alias = detectors.fetch(:semantic_alias) + @salias = semantic_alias.fetch(:alias_clusters, []) + @reif = semantic_alias.fetch(:reification_misses, []) + + path_condition = detectors.fetch(:path_condition) + @pcneg = path_condition.fetch(:neglected, []) + @pcsc = path_condition.fetch(:scattered, []) + + @broken = detectors.fetch(:sequence_mine).fetch(:broken_protocol, []) + @implicit_control_flow = detectors.fetch(:implicit_control_flow).fetch(:ordered_protocols, []) + @derived = detectors.fetch(:derived_state, []) + @rename_clones = detectors.fetch(:inconsistent_rename_clone, []) + @similarity = detectors.fetch(:flay_similarity, []) + @pressure = detectors.fetch(:decision_pressure, []) + @redundant_nil = detectors.fetch(:redundant_nil_guard, []) + @fsimple = detectors.fetch(:false_simplicity, []) + @oversized_predicates = detectors.fetch(:oversized_predicate, []) + @fatu = detectors.fetch(:fat_union).fetch(:fat_unions, []) + @state_heat = detectors.fetch(:state_heatmap, []) + @state_branch = detectors.fetch(:state_branch_density, []) + @temporal_ordering = detectors.fetch(:temporal_ordering_pressure, []) + @weighted_inlined_complexity = detectors.fetch(:weighted_inlined_complexity, []) + @locality_drag = detectors.fetch(:locality_drag, []) + @function_lcom = detectors.fetch(:function_lcom, []) + operational_discontinuity = detectors.fetch(:operational_discontinuity, []) @operational_discontinuity_high_confidence, @operational_discontinuity = - operational_discontinuity.partition { |finding| OperationalDiscontinuity.high_confidence?(finding) } + operational_discontinuity.partition { |finding| finding[:confidence].to_s == "high" } # sections_data also asserts the span contract -- running it on # the normal report path keeps that tripwire live. sd = sections_data @convergence = Convergence.rollup(sd) @root = RootCause.cluster(sd) + self end # tier = signal quality (1 = highest signal / lowest false-positive, @@ -371,8 +306,7 @@ def to_json(*_args) private def sarif_rules - SECTIONS.reject { |title, *_| CONVERGENCE_EXCLUDED_SECTIONS.include?(title) } - .map do |title, _ivar, tier, desc| + sarif_sections_data(include_findings: false).map do |title, tier, _findings, desc| Decomplex::Sarif.rule( id: sarif_rule_id(title), name: title, @@ -397,7 +331,7 @@ def ranked_sarif_results(results) end def sarif_results(include_finding_payload: true) - sections_data.flat_map do |title, tier, findings| + sarif_sections_data.flat_map do |title, tier, findings, _desc| Array(findings).flat_map do |finding| sarif_locations_for_finding(finding).map do |location| properties = { @@ -432,12 +366,102 @@ def sarif_rule_id(title) end def sarif_message(title, finding, location) + detail = sarif_message_detail(title, finding) + return "#{title}: #{detail}" unless detail.to_s.empty? + subject = location[:method] || finding[:method] || finding[:name] || finding[:field] || finding[:contract] || finding[:owner] || finding[:token] || finding[:kind] [title, subject].compact.join(": ") end + def sarif_message_detail(title, finding) + case title + when "Decision Pressure" + "`#{finding[:contract]}` creates #{finding[:decisions]} eliminable guard decision(s) across " \ + "#{finding[:methods]} method(s)" + when "Redundant Nil Guards" + "`#{finding[:local]}` is nil-guarded by `#{finding[:guard]}` after proof `#{finding[:proof]}`" + when "State Heatmap" + writers = Array(finding[:top_writers]).first(3).join(" | ") + readers = Array(finding[:top_readers]).first(3).join(" | ") + "state `#{finding[:field]}` has pressure=#{finding[:pressure]}, messiness=#{finding[:messiness]} " \ + "(writes=#{finding[:writes]}, reads=#{finding[:reads]}, re-derived=#{finding[:re_derivations]}, " \ + "scatter=#{finding[:scatter]}); writers #{writers}; readers #{readers}" + when "Missing Abstractions" + "guard tuple `#{Array(finding[:members]).join(' | ')}` repeats in #{finding[:support]} site(s) " \ + "with scatter=#{finding[:scatter]}" + when "State-Based Branch Density" + refs = Array(finding[:state_refs]).first(8).join(" | ") + "#{finding[:decisions]} state-based branch decision(s) over `#{refs}`; " \ + "example predicate `#{finding[:predicate]}`" + when "Temporal Ordering Pressure" + "`#{finding[:owner]}` exposes mutable lifecycle pressure score=#{finding[:score]} " \ + "(public=#{finding[:public_methods]}, state_methods=#{finding[:state_methods]}, " \ + "writers=#{finding[:writers]})" + when "Neglected Conditions", "Neglected Path Conditions" + "missing condition `#{finding[:missing]}` from `#{Array(finding[:pattern] || finding[:guards]).join(' | ')}` " \ + "(support=#{finding[:support]})" + when "Oversized Predicates" + "#{finding[:count]} condition atoms in predicate `#{finding[:predicate]}`" + when "Neglected Updates" + "writes `.#{finding[:has]}` but not co-written `.#{finding[:missing]}` on receiver `#{finding[:recv]}` " \ + "(support=#{finding[:support]})" + when "Semantic Predicate Aliases", "Exact Predicate Aliases" + "predicate aliases `#{Array(finding[:names]).join(' = ')}` for `#{finding[:canon] || finding[:body]}`" + when "Reification Misses" + "predicate `#{finding[:predicate]}` is reinvented inline as `#{finding[:raw]}`" + when "Broken Protocols" + "does `#{finding[:has]}` without co-called `#{finding[:missing]}` " \ + "(support=#{finding[:support]}, confidence=#{finding[:confidence]})" + when "Implicit Control Flow" + sarif_implicit_control_flow_detail(finding) + when "Weighted Inlined Cognitive Complexity" + "inlined=#{finding[:inlined]} (local=#{finding[:local]}, hidden=#{finding[:hidden]}, " \ + "depth=#{finding[:depth]}); chain `#{Array(finding[:call_chain]).join(' -> ')}`" + when "Locality Drag" + "`#{finding[:variable]}` is initialized at line #{finding[:defined_at]} but first used at line " \ + "#{finding[:used_at]} after #{finding[:unrelated_statements]} unrelated statement(s)" + when "Function LCOM" + mode = finding[:mode] == :late_join ? "late_join" : "disjoint" + "#{mode} local data-flow: score=#{finding[:score]}, components=#{finding[:components]}, " \ + "locals=#{finding[:locals]}, statements=#{finding[:statements]}" + when "Operational Discontinuity", "Operational Discontinuity (High Confidence)" + "score=#{finding[:score]}, reset_boundaries=#{finding[:resets]}, dead=#{finding[:dead_total]}, " \ + "new=#{finding[:new_total]}, confidence=#{finding[:confidence] || :review}" + when "False Simplicity" + "[#{finding[:kind]}] `#{finding[:detail]}` support=#{finding[:support]}, scatter=#{finding[:scatter]}" + when "Fat Unions" + "union `#{Array(finding[:variant_set]).join(' | ')}` has #{Array(finding[:common]).size} common and " \ + "#{Array(finding[:variant]).size} variant member(s), scatter=#{finding[:scatter]}" + when "Derived-State Staleness" + "`#{finding[:derived]}` derived from `#{finding[:source]}` at line #{finding[:derived_at]}; " \ + "`#{finding[:source]}` reassigned at line #{finding[:source_reassigned_at]} but " \ + "`#{finding[:derived]}` is not recomputed" + when "Inconsistent Rename Clones" + "clone of #{finding[:ref_at]}: reference variable `#{finding[:ref_name]}` diverges as " \ + "#{Array(finding[:divergent]).inspect}" + when "Structural Similarity (Type-2/3)" + "[#{finding[:clone_type]}] mass=#{finding[:mass]} node=`#{finding[:node]}` across " \ + "#{Array(finding[:sites]).size} site(s)" + else + nil + end + end + + def sarif_implicit_control_flow_detail(finding) + protocol = Array(finding[:protocol]).join(" -> ") + dependency = Array(finding[:dependency]).join("|") + states = Array(finding[:states]).join(" | ") + if finding[:kind] == :order_drift + observed = Array(finding[:observed]).join(" -> ") + return "[order_drift] observed `#{observed}` against protocol `#{protocol}` " \ + "(#{dependency} state=`#{states}`)" + end + + "[protocol_pressure] protocol `#{protocol}` (#{dependency} state=`#{states}`), support=#{finding[:support]}" + end + def sarif_locations_for_finding(finding) spans = finding[:spans] if spans.is_a?(Hash) && !spans.empty? @@ -477,6 +501,13 @@ def parse_sarif_loc(loc) } end + def sarif_sections_data(include_findings: true) + SECTIONS.map do |title, ivar, tier, desc| + findings = include_findings ? instance_variable_get(ivar) : nil + [title, tier, findings, desc] + end + end + def zero_based_column_to_sarif(value) return nil if value.nil? diff --git a/gems/decomplex/lib/decomplex/report_facts.rb b/gems/decomplex/lib/decomplex/report_facts.rb new file mode 100644 index 000000000..d8871d64c --- /dev/null +++ b/gems/decomplex/lib/decomplex/report_facts.rb @@ -0,0 +1,270 @@ +# frozen_string_literal: true + +require "json" +require_relative "miner" +require_relative "co_update" +require_relative "predicate_alias" +require_relative "semantic_alias" +require_relative "path_condition" +require_relative "sequence_mine" +require_relative "ordered_protocol_mine" +require_relative "derived_state" +require_relative "inconsistent_rename_clone" +require_relative "flay_similarity" +require_relative "decision_pressure" +require_relative "redundant_nil_guard" +require_relative "false_simplicity" +require_relative "oversized_predicate" +require_relative "fat_union" +require_relative "state_mesh" +require_relative "state_branch_density" +require_relative "temporal_ordering_pressure" +require_relative "weighted_inlined_cognitive_complexity" +require_relative "locality_drag" +require_relative "function_lcom" +require_relative "operational_discontinuity" +require_relative "native/report_facts" + +module Decomplex + # Stable boundary between analysis and reporting. + # + # ReportFacts contains the report-ready detector outputs before + # Convergence, RootCause, Markdown, or SARIF post-processing runs. + module ReportFacts + FORMAT = "decomplex.report-facts.v1" + ENUM_KEYS = %i[kind mode confidence clone_type].freeze + + module_function + + def from_files(files, engine: "ruby", jobs: nil) + paths = Array(files).map(&:to_s) + case engine.to_s + when "ruby" + { + "format" => FORMAT, + "files" => paths, + "detectors" => json_safe(ruby_detector_facts(paths)) + } + when "rust" + Native::ReportFacts.collect(paths, jobs: jobs) + else + raise ArgumentError, "unsupported decomplex facts engine: #{engine}" + end + end + + def to_json(facts, pretty: true) + pretty ? JSON.pretty_generate(json_safe(facts)) : JSON.generate(json_safe(facts)) + end + + def normalize(payload) + raw = payload.is_a?(String) ? JSON.parse(payload) : payload + deep_hydrate(raw) + end + + def json_safe(value) + case value + when Hash + value.to_h { |key, child| [key.to_s, json_safe(child)] } + when Array + value.map { |child| json_safe(child) } + when Symbol + value.to_s + else + value + end + end + + def state_heatmap_findings_from_graph(graph, limit_sites: 12) + fields = graph.fetch("fields", {}) + fields.map do |field, row| + writers = Array(row["writers"]) + readers = Array(row["readers"]) + re_derivations = Array(row["re_derivations"]) + metrics = row.fetch("metrics", {}) + sites = site_locations(writers + readers) + + re_derivations.map { |site| site_location(site) } + spans = (writers + readers).each_with_object({}) do |site, out| + out[site_location(site)] = site["span"] + end + + { + "at" => sites.first, + "field" => field, + "writes" => metrics.fetch("writes", 0), + "reads" => metrics.fetch("reads", 0), + "re_derivations" => metrics.fetch("re_derivations", 0), + "scatter" => metrics.fetch("scatter", 0), + "write_scatter" => metrics.fetch("write_scatter", 0), + "read_scatter" => metrics.fetch("read_scatter", 0), + "receiver_types" => metrics.fetch("receiver_types", 0), + "messiness" => row.fetch("messiness", 0), + "pressure" => metrics.fetch("pressure", 0), + "top_writers" => site_locations(writers.first(4)), + "top_readers" => site_locations(readers.first(4)), + "sites" => sites.first(limit_sites), + "spans" => spans + } + end + end + + def ruby_detector_facts(files) + m = Miner.scan(files) + cu = CoUpdate.scan(files) + pa = PredicateAlias.scan(files) + sa = SemanticAlias.scan(files) + pc = PathCondition.scan(files) + sm = SequenceMine.scan(files) + icf = ImplicitControlFlow.scan(files) + state_mesh = StateMesh.scan(files, min_writes: 1) + state_mesh.run + operational_discontinuity = OperationalDiscontinuity.scan( + files, + min_dead: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_DEAD", + OperationalDiscontinuity::DEFAULT_MIN_DEAD + )), + min_new: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_NEW", + OperationalDiscontinuity::DEFAULT_MIN_NEW + )), + max_continuing: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MAX_CONTINUING", + OperationalDiscontinuity::DEFAULT_MAX_CONTINUING + )), + min_score: Integer(ENV.fetch( + "DECOMPLEX_OPERATIONAL_DISCONTINUITY_MIN_SCORE", + OperationalDiscontinuity::DEFAULT_MIN_SCORE + )) + ) + + { + miner: { + missing_abstractions: m.missing_abstractions, + neglected_conditions: m.neglected_conditions + }, + co_update: { + co_written_pairs: cu.co_written_pairs, + neglected_updates: cu.neglected_updates + }, + predicate_alias: { alias_clusters: pa.alias_clusters }, + semantic_alias: { + alias_clusters: sa.alias_clusters, + reification_misses: sa.reification_misses + }, + path_condition: { + neglected: pc.neglected, + scattered: pc.scattered + }, + sequence_mine: { broken_protocol: sm.broken_protocol }, + implicit_control_flow: { + ordered_protocols: icf.ordered_protocols( + min_support: Integer(ENV.fetch("DECOMPLEX_ICF_MIN_SUPPORT", "1")) + ) + }, + derived_state: DerivedState.scan(files), + inconsistent_rename_clone: InconsistentRenameClone.scan(files), + flay_similarity: FlaySimilarity.scan( + files, + mass: Integer(ENV.fetch( + "DECOMPLEX_SIMILARITY_MASS", + ENV.fetch("DECOMPLEX_FLAY_MASS", FlaySimilarity::DEFAULT_MASS) + )), + fuzzy: Integer(ENV.fetch( + "DECOMPLEX_SIMILARITY_FUZZY", + ENV.fetch("DECOMPLEX_FLAY_FUZZY", FlaySimilarity::DEFAULT_FUZZY) + )) + ), + decision_pressure: DecisionPressure.scan(files).ranked, + redundant_nil_guard: RedundantNilGuard.scan(files), + false_simplicity: FalseSimplicity.scan(files).findings, + oversized_predicate: OversizedPredicate.scan(files).findings, + fat_union: { fat_unions: FatUnion.scan(files).fat_unions }, + state_heatmap: state_mesh.findings, + state_branch_density: StateBranchDensity.scan(files).findings, + temporal_ordering_pressure: TemporalOrderingPressure.scan(files), + weighted_inlined_complexity: WeightedInlinedCognitiveComplexity.scan( + files, + min_score: Float(ENV.fetch( + "DECOMPLEX_WICC_MIN_SCORE", + WeightedInlinedCognitiveComplexity::DEFAULT_MIN_SCORE + )), + min_hidden: Float(ENV.fetch( + "DECOMPLEX_WICC_MIN_HIDDEN", + WeightedInlinedCognitiveComplexity::DEFAULT_MIN_HIDDEN + )), + max_depth: Integer(ENV.fetch( + "DECOMPLEX_WICC_MAX_DEPTH", + WeightedInlinedCognitiveComplexity::DEFAULT_MAX_DEPTH + )) + ), + locality_drag: LocalityDrag.scan( + files, + min_unrelated_statements: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_UNRELATED_STATEMENTS", + LocalityDrag::DEFAULT_MIN_UNRELATED_STATEMENTS + )), + min_gap_lines: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_GAP_LINES", + LocalityDrag::DEFAULT_MIN_GAP_LINES + )), + min_local_complexity: Float(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_LOCAL_COMPLEXITY", + LocalityDrag::DEFAULT_MIN_LOCAL_COMPLEXITY + )), + min_score: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MIN_SCORE", + LocalityDrag::DEFAULT_MIN_SCORE + )), + max_findings_per_method: Integer(ENV.fetch( + "DECOMPLEX_LOCALITY_DRAG_MAX_FINDINGS_PER_METHOD", + LocalityDrag::DEFAULT_MAX_FINDINGS_PER_METHOD + )) + ), + function_lcom: FunctionLCOM.scan( + files, + min_components: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_COMPONENTS", + FunctionLCOM::DEFAULT_MIN_COMPONENTS + )), + min_locals: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_LOCALS", + FunctionLCOM::DEFAULT_MIN_LOCALS + )), + min_statements: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_STATEMENTS", + FunctionLCOM::DEFAULT_MIN_STATEMENTS + )), + min_score: Integer(ENV.fetch( + "DECOMPLEX_FUNCTION_LCOM_MIN_SCORE", + FunctionLCOM::DEFAULT_MIN_SCORE + )) + ), + operational_discontinuity: operational_discontinuity + } + end + + def deep_hydrate(value, key: nil) + case value + when Hash + value.each_with_object({}) do |(child_key, child), out| + hydrated_key = key == :spans ? child_key.to_s : child_key.to_s.to_sym + out[hydrated_key] = deep_hydrate(child, key: hydrated_key) + end + when Array + value.map { |child| deep_hydrate(child, key: key) } + when String + ENUM_KEYS.include?(key) ? value.to_sym : value + else + value + end + end + + def site_locations(sites) + Array(sites).map { |site| site_location(site) } + end + + def site_location(site) + "#{site.fetch('file')}:#{site.fetch('defn')}:#{site.fetch('line')}" + end + end +end diff --git a/gems/decomplex/lib/decomplex/semantic_alias.rb b/gems/decomplex/lib/decomplex/semantic_alias.rb index 1a8ca6aff..b88c1679b 100644 --- a/gems/decomplex/lib/decomplex/semantic_alias.rb +++ b/gems/decomplex/lib/decomplex/semantic_alias.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Semantic predicate alias. The exact-text PredicateAlias misses the @@ -26,73 +26,66 @@ def self.scan(files) preds = [] uses = [] files.each do |f| - root, lines = Ast.parse(f) - e = new(f, lines) - e.walk(root, []) - preds.concat(e.preds) - uses.concat(e.uses) + document = Syntax.parse(f, parser: "tree_sitter") + document.predicate_defs.each do |predicate| + next unless semantic_predicate_definition?(predicate) + + preds << Pred.new( + name: predicate.name, + canon: canon(predicate.body), + file: predicate.file, + line: predicate.line, + span: predicate.span + ) + end + document.comparison_sites.each do |comparison| + uses << Use.new( + canon: canon(comparison.source), + file: comparison.file, + defn: comparison.function, + line: comparison.line, + raw: comparison.source, + span: comparison.span + ) + end + document.branch_arms.each do |arm| + next unless arm.predicate.to_s.match?(/(?:==|!=)/) + + uses << Use.new( + canon: canon(arm.predicate), + file: arm.file, + defn: arm.function, + line: arm.decision_line, + raw: arm.predicate, + span: arm.decision_span + ) + end end + uses.uniq! { |use| [use.file, use.defn, use.line, use.canon, use.raw] } Report.new(preds, uses) end - attr_reader :preds, :uses - - def initialize(file, lines) - @file = file - @lines = lines - @preds = [] - @uses = [] - end - - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - record_pred(node) if node.type == :DEFN - if %i[CALL OPCALL].include?(node.type) && comparison?(node) - c = canon(Ast.slice(node, @lines)) - @uses << Use.new(canon: c, file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, - raw: Ast.slice(node, @lines), - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) - end - node.children.each { |ch| walk(ch, defstack) } + def self.semantic_predicate_definition?(predicate) + predicate.name.to_s.end_with?("?") || + predicate.body.to_s.match?(/(?:==|!=|&&|\|\||\band\b|\bor\b)/) end # Canonical predicate form: drop a leading `!`, strip a leading # receiver chain (`a.b.`, `@`, `self.`) before the final # `name OP value`, collapse spaces. Pure syntactic folding. def self.canon(text) - t, = Ast.canon_polarity(text) + t, = canon_polarity(text) t = t.sub(/\Aself\./, "").sub(/\A@/, "") # strip a single receiver hop: `recv.attr == :v` -> `attr == :v` t = t.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") t.gsub(/\s+/, " ").strip end - private - - def canon(text) = self.class.canon(text) - - def comparison?(node) - mid = node.children[node.type == :OPCALL ? 1 : 1] - %i[== != nil?].include?(mid) || - (node.type == :CALL && node.children[1] == :nil?) - end - - def record_pred(node) - name = node.children[0].to_s - return unless name.end_with?("?") - - stmts = Ast.body_stmts(node) - return unless stmts.size == 1 + def self.canon_polarity(text) + source = text.to_s.strip + return [source[1..].to_s.strip, true] if source.start_with?("!") - @preds << Pred.new(name: name, canon: canon(Ast.slice(stmts.first, @lines)), - file: @file, line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) + [source, false] end class Report @@ -122,7 +115,7 @@ def reification_misses @uses.filter_map do |u| ps = bycanon[u.canon] next unless ps && !ps.empty? - next if u.defn.end_with?("?") && ps.any? { |p| p.name == u.defn } + next if ps.any? { |p| p.name == u.defn } { predicate: ps.first.name, canon: u.canon, at: "#{u.file}:#{u.defn}:#{u.line}", diff --git a/gems/decomplex/lib/decomplex/sequence_mine.rb b/gems/decomplex/lib/decomplex/sequence_mine.rb index 99994d3c4..87f60680f 100644 --- a/gems/decomplex/lib/decomplex/sequence_mine.rb +++ b/gems/decomplex/lib/decomplex/sequence_mine.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # Guarded-pair / protocol mining (Engler "Bugs as Deviant Behavior", @@ -10,8 +10,8 @@ module Decomplex # deviant -- the "similar path, one missing the step" plague that is # the literal shape of bugs #1/#2/#9. # - # Unit = the SET of distinct call message-names in a method (FCALL / - # CALL mid). Domain-agnostic (Engler): no name heuristics, mine all + # Unit = the SET of distinct semantic call message-names in a method. + # Domain-agnostic (Engler): no name heuristics, mine all # pairs, rank by support, accept FP. Same proven shape as co_update, # over calls instead of assigned attributes. class SequenceMine @@ -45,9 +45,9 @@ class SequenceMine def self.scan(files) calls = [] files.each do |f| - root, lines = Ast.parse(f) - e = new(f) - e.walk(root, []) + document = Syntax.parse(f, parser: "tree_sitter") + e = new(f, document) + e.collect calls.concat(e.calls) end Report.new(calls) @@ -55,49 +55,54 @@ def self.scan(files) attr_reader :calls - def initialize(file) + def initialize(file, document) @file = file + @document = document @calls = [] end - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - if %i[CALL FCALL VCALL].include?(node.type) - mid = node.children[node.type == :CALL ? 1 : 0] - if protocol_event?(node, mid.to_s) - @calls << Call.new(mid: mid.to_s, file: @file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column]) + def collect + @document.call_sites.each do |call| + mid = call.message.to_s + nested_protocol_events(call).each do |nested_mid| + @calls << Call.new(mid: nested_mid, file: @file, + defn: call.function || "(top-level)", + line: call.line, + span: call.span) + end + if protocol_event?(call, mid) + @calls << Call.new(mid: mid, file: @file, + defn: call.function || "(top-level)", + line: call.line, + span: call.span) end end - node.children.each { |c| walk(c, defstack) } end private - def protocol_event?(node, mid) + def protocol_event?(call, mid) return false if IGNORED_MIDS.include?(mid) - return false if passive_reader_call?(node, mid) + return false if passive_reader_call?(call, mid) true end - def passive_reader_call?(node, mid) + def passive_reader_call?(call, mid) return false if zero_arg_action_name?(mid) - case node.type - when :CALL - node.children[2].nil? - when :VCALL - true - when :FCALL - node.children[1].nil? - else - false + return false unless call.arguments.to_a.empty? + + true + end + + def nested_protocol_events(call) + return [] unless IGNORED_MIDS.include?(call.message.to_s) + + candidates = call.arguments.to_a + candidates += source_text(call.span).scan(/\b[a-z_]\w*[!?]?\b/) + candidates.uniq.select do |candidate| + !IGNORED_MIDS.include?(candidate) && zero_arg_action_name?(candidate) end end @@ -110,6 +115,21 @@ def zero_arg_action_name?(mid) end end + def source_text(span) + return "" unless span + + first_line, first_column, last_line, last_column = span + if first_line == last_line + return @document.lines[first_line - 1].to_s[first_column...last_column].to_s + end + + parts = [] + parts << @document.lines[first_line - 1].to_s[first_column..].to_s + parts.concat(@document.lines[first_line...(last_line - 1)] || []) + parts << @document.lines[last_line - 1].to_s[0...last_column].to_s + parts.join + end + class Report # No frequency blocklist: a pervasive protocol (alloc_mark + # cleanup in every method) is exactly the high-frequency case we diff --git a/gems/decomplex/lib/decomplex/state_branch_density.rb b/gems/decomplex/lib/decomplex/state_branch_density.rb index 4deb8cbf6..03a8ef636 100644 --- a/gems/decomplex/lib/decomplex/state_branch_density.rb +++ b/gems/decomplex/lib/decomplex/state_branch_density.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require "set" -require_relative "ast" require_relative "syntax" module Decomplex @@ -9,260 +8,97 @@ module Decomplex # object-owned state. This is the "state + control flow" surface: # branch decisions over ivars, globals, or receiver attributes. class StateBranchDensity - BRANCH_TYPES = %i[IF UNLESS WHILE UNTIL].freeze - NOISE_MIDS = %i[! != == === < <= > >= [] []= to_s inspect class].freeze Decision = Struct.new(:file, :defn, :line, :span, :predicate, :state_refs, keyword_init: true) def self.scan(files) - decisions = [] - parsed = files.to_h { |file| [file, Ast.parse(file)] } - global_immutable_readers = Hash.new { |h, k| h[k] = Set.new } - global_immutable_reader_types = Hash.new { |h, k| h[k] = {} } - global_type_aliases = {} - parsed.each_value do |_root, lines| - scanner = new(nil, lines) - scanner.immutable_struct_readers(lines).each do |name, readers| - global_immutable_readers[name].merge(readers) + documents = files.to_h do |file| + [file, Syntax.parse(file, parser: "tree_sitter")] + end + immutable_readers = Hash.new { |h, k| h[k] = Set.new } + immutable_reader_types = Hash.new { |h, k| h[k] = {} } + type_aliases = {} + + documents.each_value do |document| + document.immutable_struct_readers.each do |name, readers| + immutable_readers[name].merge(readers) end - scanner.immutable_struct_reader_types(lines).each do |name, readers| - global_immutable_reader_types[name].merge!(readers) + document.immutable_struct_reader_types.each do |name, readers| + immutable_reader_types[name].merge!(readers) end - global_type_aliases.merge!(scanner.type_aliases(lines)) + type_aliases.merge!(document.type_aliases) end - parsed.each do |file, (root, lines)| - scanner = new( + + decisions = documents.flat_map do |file, document| + new( file, - lines, - immutable_readers: global_immutable_readers, - immutable_reader_types: global_immutable_reader_types, - type_aliases: global_type_aliases - ) - scanner.walk(root, []) - decisions.concat(scanner.decisions) + document, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + ).decisions end Report.new(decisions) end attr_reader :decisions - def initialize(file, lines, immutable_readers: nil, immutable_reader_types: nil, type_aliases: nil) + def initialize(file, document, immutable_readers:, immutable_reader_types:, type_aliases:) @file = file - @lines = lines - @decisions = [] - @totals = Hash.new(0) - @immutable_readers = immutable_readers || immutable_struct_readers(lines) - @immutable_reader_types = immutable_reader_types || immutable_struct_reader_types(lines) - @type_aliases = type_aliases || type_aliases(lines) - @method_param_types = method_param_types(lines) - end - - def walk(node, defstack) - return unless Ast.node?(node) - - defstack = Ast.def_push(node, defstack) - record_branch(node, defstack) - node.children.each { |child| walk(child, defstack) } - end - - def record_branch(node, defstack) - cond = - case node.type - when *BRANCH_TYPES - node.children[0] - when :CASE - node.children[0] - else - nil - end - return unless Ast.node?(cond) - - defn = defstack.last || "(top-level)" - @totals[[@file, defn]] += 1 - refs = state_refs(cond, defn) - return if refs.empty? - - @decisions << Decision.new( - file: @file, - defn: defn, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, - node.last_lineno, node.last_column], - predicate: Ast.slice(cond, @lines), - state_refs: refs.uniq.sort + @document = document + @decisions = semantic_decisions( + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases ) end - def state_refs(node, defn) - refs = [] - collect_state_refs(node, refs, defn) - refs - end - - def collect_state_refs(node, refs, defn) - return unless Ast.node?(node) - - case node.type - when :IVAR - refs << node.children[0].to_s - when :GVAR - refs << node.children[0].to_s - when :CALL, :QCALL, :OPCALL - recv, mid, args = node.children - if state_attr_read?(recv, mid, args, defn) - refs << "#{Ast.slice(recv, @lines)}.#{mid}" - end - end - node.children.each { |child| collect_state_refs(child, refs, defn) } - end - - def state_attr_read?(recv, mid, args, defn) - return false unless recv - return false if NOISE_MIDS.include?(mid) - return false unless args.nil? || empty_arg_list?(args) - return false if immutable_struct_const_read?(recv, mid, defn) - - # `user.admin?`, `user.name`, `@cart.empty?`, `config.enabled` - # are state-derived decisions. `a == 0` has no no-arg receiver - # read and is deliberately not counted. - true - end - - def immutable_struct_const_read?(recv, mid, defn) - owner_type = immutable_receiver_type(recv, defn) - return false unless owner_type - - immutable_reader?(owner_type, mid) - end - - def immutable_receiver_type(recv, defn) - return false unless Ast.node?(recv) - - if %i[CALL QCALL OPCALL].include?(recv.type) - recv_recv, recv_mid, recv_args = recv.children - return immutable_reader_result_type(recv_recv, recv_mid, recv_args, defn) - end - return false unless recv.type == :LVAR - - param_types = @method_param_types[defn] - return false unless param_types - - param_types[recv.children[0].to_s] - end - - def immutable_reader?(type_name, mid) - return false unless type_name - - resolved_type_name = resolve_type_alias(type_name) - readers = if @immutable_readers.key?(resolved_type_name) - @immutable_readers[resolved_type_name] - else - @immutable_readers[resolved_type_name.split("::").last] - end - readers&.include?(mid) || false - end - - def immutable_reader_result_type(recv, mid, args, defn) - return nil unless args.nil? || empty_arg_list?(args) + private - owner_type = immutable_receiver_type(recv, defn) - return nil unless owner_type - - resolved_type_name = resolve_type_alias(owner_type) - reader_types = if @immutable_reader_types.key?(resolved_type_name) - @immutable_reader_types[resolved_type_name] - else - @immutable_reader_types[resolved_type_name.split("::").last] - end - reader_types[mid] - end - - def empty_arg_list?(args) - Ast.node?(args) && args.type == :LIST && args.children.compact.empty? - end - - def immutable_struct_readers(lines) - readers = Hash.new { |h, k| h[k] = Set.new } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) - readers[class_stack.last].add(match[1].to_sym) - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + def semantic_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) + branch_decisions = @document.branch_decisions( + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + ) + filter_wrapper_decisions(branch_decisions).map do |decision| + Decision.new( + file: @file, + defn: decision.function, + line: decision.line, + span: decision.span, + predicate: decision.predicate, + state_refs: decision.state_refs.uniq.sort + ) end - readers end - def immutable_struct_reader_types(lines) - reader_types = Hash.new { |h, k| h[k] = {} } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) - reader_types[class_stack.last][match[1].to_sym] = match[2] - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + def filter_wrapper_decisions(decisions) + decisions.reject do |decision| + wrapper_predicate?(decision.predicate) && nested_state_decision?(decision, decisions) end - reader_types end - def type_aliases(lines) - aliases = {} - lines.each do |line| - if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) - aliases[match[1]] = match[2] - elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - aliases[match[1]] = match[2] - end - end - aliases + def wrapper_predicate?(predicate) + predicate.to_s.match?(/\A(?:if|unless|while|until)\b/) end - def resolve_type_alias(type_name) - seen = Set.new - current = type_name - loop do - break current if seen.include?(current) - - seen.add(current) - target = @type_aliases[current] || @type_aliases[current.split("::").last] - break current unless target + def nested_state_decision?(decision, decisions) + decisions.any? do |candidate| + next false if candidate.equal?(decision) + next false unless candidate.function == decision.function + next false unless encloses?(decision.span, candidate.span) - current = target + (Array(candidate.state_refs) - Array(decision.state_refs)).empty? end end - def method_param_types(lines) - types_by_method = {} - pending_sig = +"" - lines.each do |line| - pending_sig << line if pending_sig_active?(line, pending_sig) - if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) - types_by_method[match[1]] = sig_param_types(pending_sig) - pending_sig = +"" - end - end - types_by_method - end - - def pending_sig_active?(line, pending_sig) - !pending_sig.empty? || line.match?(/\A\s*sig\b/) - end - - def sig_param_types(sig_source) - match = sig_source.match(/params\s*\((.*?)\)/m) - return {} unless match + def encloses?(outer, inner) + return false unless outer && inner - match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h + starts_before = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]) + ends_after = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]) + starts_before && ends_after end class Report diff --git a/gems/decomplex/lib/decomplex/state_mesh.rb b/gems/decomplex/lib/decomplex/state_mesh.rb index edc476998..1ac809619 100644 --- a/gems/decomplex/lib/decomplex/state_mesh.rb +++ b/gems/decomplex/lib/decomplex/state_mesh.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" require_relative "semantic_alias" require "json" @@ -12,9 +12,9 @@ module Decomplex # organized by dir -> file -> function. # # Phases: - # 1. Discover state fields (ATTRASGN + IASGN with >= min_writes) - # 2. Find all write sites (reusing CoUpdate's walk logic) - # 3. Find all read sites (new walker: CALL/IVAR matching field names) + # 1. Discover state fields from Syntax state-write facts + # 2. Find all write sites from Syntax state-write facts + # 3. Find all read sites from Syntax state-read facts # 4. Find re-derivation sites via SemanticAlias reification misses # 5. Compute messiness per field # 6. Render hierarchical JSON graph @@ -32,16 +32,14 @@ class StateMesh # `custom_fields` overrides field discovery with an explicit list. # `min_writes` is the threshold for auto-discovered fields (default 2). def self.scan(files, min_writes: 2, custom_fields: nil) - src_map = {} - files.each do |f| - root, lines = Ast.parse(f) - src_map[f] = { root: root, lines: lines } + documents = files.to_h do |file| + [file, Syntax.parse(file, parser: "tree_sitter")] end - new(src_map, min_writes: min_writes, custom_fields: custom_fields) + new(documents, min_writes: min_writes, custom_fields: custom_fields) end - def initialize(src_map, min_writes: 2, custom_fields: nil) - @src_map = src_map + def initialize(documents, min_writes: 2, custom_fields: nil) + @documents = documents @min_writes = min_writes @custom_fields = custom_fields @writes = [] @@ -52,44 +50,19 @@ def initialize(src_map, min_writes: 2, custom_fields: nil) # ---- Phase 1+2: discover fields and walk write sites --------------- def discover_fields! - @src_map.each do |file, data| - walk_writes(data[:root], data[:lines], [], file) - end - end - - def walk_writes(node, lines, defstack, file) - return unless Ast.node?(node) - - case node.type - when :DEFN then defstack = defstack + [node.children[0].to_s] - when :DEFS then defstack = defstack + [node.children[1].to_s] - when :ATTRASGN - recv, msg, = node.children - if msg == :[]= - node.children.each { |c| walk_writes(c, lines, defstack, file) } - return + @documents.each do |file, document| + document.state_writes.each do |write| + @writes << Write.new( + attr: write.field, + norm: normalize(write.field), + recv: write.receiver, + file: file, + defn: write.function, + line: write.line, + span: write.span + ) end - attr = msg.to_s.sub(/=$/, "") - norm = normalize(attr) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @writes << Write.new(attr: attr, norm: norm, - recv: recv_slice(node.children[0], lines), - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) - when :IASGN - attr = node.children[0].to_s # "@storage" - norm = normalize(attr) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @writes << Write.new(attr: attr, norm: norm, recv: "self", - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) end - - node.children.each { |c| walk_writes(c, lines, defstack, file) } end # ---- Phase 3: walk read sites ------------------------------------- @@ -98,50 +71,23 @@ def find_reads! # Build the set of normalized field names we care about. field_norms = known_field_norms - @src_map.each do |file, data| - walk_reads(data[:root], data[:lines], [], file, field_norms) - end - end - - def walk_reads(node, lines, defstack, file, field_norms) - return unless Ast.node?(node) - - case node.type - when :DEFN then defstack = defstack + [node.children[0].to_s] - when :DEFS then defstack = defstack + [node.children[1].to_s] - when :CALL, :OPCALL - # CALL(recv, :method, args) - attribute reads have no args - recv = node.children[0] - mid = node.children[1] - args = node.children[2] - - # Skip if called with arguments (it's a method call, not attr read) - if args.nil? || (Ast.node?(args) && args.type == :LIST && args.children.compact.empty?) - name = mid.to_s - if field_norms.include?(name) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @reads << Read.new(attr: name, norm: name, - recv: recv_slice(recv, lines), - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) - end - end - when :IVAR - name = node.children[0].to_s # e.g. "@storage" - norm = normalize(name) - if field_norms.include?(norm) - span = [node.first_lineno, node.first_column, - node.last_lineno, node.last_column] - @reads << Read.new(attr: name, norm: norm, recv: "self", - file: file, - defn: defstack.last || "(top-level)", - line: node.first_lineno, span: span) + @documents.each do |file, document| + document.state_reads.each do |read| + norm = normalize(read.field) + next unless field_norms.include?(norm) + next if write_target_read?(read) + + @reads << Read.new( + attr: read.field, + norm: norm, + recv: read.receiver, + file: file, + defn: read.function, + line: read.line, + span: read.span + ) end end - - node.children.each { |c| walk_reads(c, lines, defstack, file, field_norms) } end # ---- Phase 4: re-derivation sites --------------------------------- @@ -152,7 +98,7 @@ def find_re_derivations!(reification_misses = nil) # Accept pre-computed misses (for testing) or compute them. if reification_misses.nil? - files = @src_map.keys + files = @documents.keys sa = SemanticAlias.scan(files) reification_misses = sa.reification_misses end @@ -428,13 +374,21 @@ def known_field_norms end end - def recv_slice(node, lines) - return "?" unless Ast.node?(node) + def write_target_read?(read) + @writes.any? do |write| + write.file == read.file && + write.defn == read.function && + write.recv == read.receiver && + write.attr == read.field && + write.line == read.line && + same_start?(write.span, read.span) + end + end + + def same_start?(write_span, read_span) + return false unless write_span && read_span - sl = node.first_lineno - el = node.last_lineno - t = sl == el ? lines[sl - 1][node.first_column...node.last_column] : lines[sl - 1][node.first_column..] - t.to_s.strip.gsub(/\s+/, " ") + write_span[0] == read_span[0] && write_span[1] == read_span[1] end end end diff --git a/gems/decomplex/lib/decomplex/structural_topology.rb b/gems/decomplex/lib/decomplex/structural_topology.rb index cdd53c120..e0f6692ee 100644 --- a/gems/decomplex/lib/decomplex/structural_topology.rb +++ b/gems/decomplex/lib/decomplex/structural_topology.rb @@ -1,11 +1,11 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # StructuralTopology is Decomplex's conservative static model of method - # ownership and direct internal calls over the normalized Tree-sitter AST. - # It deliberately resolves only same-owner bare/self calls; dynamic dispatch + # ownership and direct internal calls over Syntax structural facts. It + # deliberately resolves only same-owner bare/self calls; dynamic dispatch # belongs to higher-recall detectors. class StructuralTopology Method = Struct.new(:id, :owner, :name, :file, :line, :span, :visibility, keyword_init: true) @@ -14,26 +14,18 @@ class StructuralTopology keyword_init: true ) - VISIBILITY_MIDS = %i[public protected private].freeze - OWNER_TYPES = %i[CLASS MODULE].freeze - METHOD_TYPES = %i[DEFN DEFS].freeze - SKIP_NESTED_TYPES = %i[CLASS MODULE DEFN DEFS LAMBDA].freeze - CONDITIONAL_TYPES = %i[IF UNLESS CASE CASE2].freeze - ITERATION_TYPES = %i[ITER FOR WHILE UNTIL].freeze - def self.scan(files) - methods = [] - parsed = files.each_with_object({}) do |file, out| - out[file] = Ast.parse(file) + documents = files.to_h do |file| + [file, Syntax.parse(file, parser: "tree_sitter")] end - parsed.each do |file, (root, lines)| - methods.concat(MethodCollector.new(file, lines).scan(root)) + methods = documents.flat_map do |file, document| + MethodFacts.new(file, document).methods end - - edges = parsed.flat_map do |file, (root, lines)| - EdgeCollector.new(file, lines, methods).scan(root) + edges = documents.flat_map do |file, document| + EdgeFacts.new(file, document, methods).edges end + edges.uniq! { |edge| [edge.caller, edge.callee, edge.type] } Graph.new(methods, edges) end @@ -102,237 +94,61 @@ def call_sites(id) end end - class MethodCollector - def initialize(file, lines) + class MethodFacts + def initialize(file, document) @file = file - @lines = lines - end - - def scan(root) - out = [] - top_level_methods(root).each { |method| out << method } - walk(root, [], out) - out - end - - private - - def top_level_methods(root) - methods_from_statements(top_level_statements(root), top_level_owner) - end - - def walk(node, owners, out) - return unless Ast.node?(node) - - if OWNER_TYPES.include?(node.type) - owner = full_owner_name(owners, node) - owner_methods(node, owner).each { |method| out << method } - node.children.each { |child| walk(child, owners + [owner_segment(node)], out) } - else - node.children.each { |child| walk(child, owners, out) } - end - end - - def owner_methods(owner_node, owner) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - methods_from_statements(owner_statements(body), owner) - end - - def methods_from_statements(statements, owner) - methods = [] - visibility = :public - statements.each do |stmt| - next unless Ast.node?(stmt) - - if bare_visibility_marker?(stmt) - visibility = stmt.children[0].to_sym - elsif visibility_call?(stmt) - visibility = handle_visibility_call(stmt, owner, visibility, methods) - elsif METHOD_TYPES.include?(stmt.type) - methods << method_record(stmt, owner, visibility) - end - end - methods - end - - def handle_visibility_call(stmt, owner, current_visibility, methods) - visibility = stmt.children[0].to_sym - args = stmt.children[1] - return visibility unless Ast.node?(args) - - each_arg(args) do |arg| - if METHOD_TYPES.include?(arg.type) - methods << method_record(arg, owner, visibility) - elsif (name = literal_method_name(arg)) - method = methods.reverse.find { |row| row.name == name } - method.visibility = visibility if method - end - end - - current_visibility - end - - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE - - scope.children[2] - end - - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] - end - - def top_level_statements(root) - return [] unless Ast.node?(root) - - root.children.compact.flat_map do |child| - Ast.node?(child) && child.type == :BLOCK ? child.children.compact : [child] - end - end - - def bare_visibility_marker?(node) - node.type == :VCALL && VISIBILITY_MIDS.include?(node.children[0]) - end - - def visibility_call?(node) - node.type == :FCALL && VISIBILITY_MIDS.include?(node.children[0]) - end - - def each_arg(args) - args.children.compact.each do |arg| - yield arg if Ast.node?(arg) - end - end - - def literal_method_name(node) - return node.children[0].to_s if node.type == :LIT && node.children[0].is_a?(Symbol) - return node.children[0].to_s if %i[STR DSTR].include?(node.type) - - nil - end - - def method_record(node, owner, visibility) - name = method_name(node) - Method.new( - id: "#{owner}##{name}", - owner: owner, - name: name, - file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - visibility: node.type == :DEFS ? :public : visibility - ) - end - - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s + @document = document + end + + def methods + @document.function_defs.map do |function| + owner = owner_for_fact(function) + Method.new( + id: "#{owner}##{function.name}", + owner: owner, + name: function.name, + file: @file, + line: function.line, + span: function.span, + visibility: function.visibility || :public + ) end end - def full_owner_name(owners, node) - (owners + [owner_segment(node)]).join("::") - end - - def top_level_owner - "(top-level:#{@file})" - end + private - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text + def owner_for_fact(fact) + TopLevelOwner.new(@file, @document).owner_for(fact) end end - class EdgeCollector - def initialize(file, lines, methods) + class EdgeFacts + def initialize(file, document, methods) @file = file - @lines = lines + @document = document @method_by_id = methods.to_h { |method| [method.id, method] } + @owner_mapper = TopLevelOwner.new(file, document) end - def scan(root) - out = [] - top_level_methods(root).each do |method_node| - method = @method_by_id["#{top_level_owner}##{method_name(method_node)}"] - collect_calls(method_node, method, [], out) if method + def edges + @document.call_sites.filter_map do |call| + edge_for_call(call) end - walk(root, [], out) - out end private - def top_level_methods(root) - top_level_statements(root).select { |stmt| Ast.node?(stmt) && METHOD_TYPES.include?(stmt.type) } - end - - def walk(node, owners, out) - return unless Ast.node?(node) - - if OWNER_TYPES.include?(node.type) - owner = (owners + [owner_segment(node)]).join("::") - owner_methods(node).each do |method_node| - method = @method_by_id["#{owner}##{method_name(method_node)}"] - collect_calls(method_node, method, [], out) if method - end - node.children.each { |child| walk(child, owners + [owner_segment(node)], out) } - else - node.children.each { |child| walk(child, owners, out) } - end - end - - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - owner_statements(body).flat_map do |stmt| - next [] unless Ast.node?(stmt) - - if METHOD_TYPES.include?(stmt.type) - [stmt] - elsif visibility_call?(stmt) - inline_methods(stmt) - else - [] - end - end - end + def edge_for_call(call) + return nil unless call.receiver.to_s == "self" - def inline_methods(stmt) - args = stmt.children[1] - return [] unless Ast.node?(args) + owner = @owner_mapper.owner_for(call) + caller = @method_by_id["#{owner}##{call.function}"] + return nil unless caller - args.children.compact.select { |arg| Ast.node?(arg) && METHOD_TYPES.include?(arg.type) } - end - - def collect_calls(node, caller, context_stack, out) - return unless Ast.node?(node) - return if SKIP_NESTED_TYPES.include?(node.type) && !METHOD_TYPES.include?(node.type) - - context_stack = context_stack + [:conditional] if CONDITIONAL_TYPES.include?(node.type) - context_stack = context_stack + [:iterates] if ITERATION_TYPES.include?(node.type) - - if (edge = internal_edge(node, caller, context_stack)) - out << edge unless edge.caller == edge.callee - end - - node.children.each { |child| collect_calls(child, caller, context_stack, out) } - end - - def internal_edge(node, caller, context_stack) - call = internal_call_name(node, caller) - return nil unless call - - callee = @method_by_id["#{caller.owner}##{call[:name]}"] + callee_name = scoped_name(caller, call.message) + callee = @method_by_id["#{owner}##{callee_name}"] return nil unless callee + return nil if caller.id == callee.id Edge.new( caller: caller.id, @@ -340,70 +156,74 @@ def internal_edge(node, caller, context_stack) caller_name: caller.name, callee_name: callee.name, file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - type: edge_type(context_stack), - kind: call[:kind], - confidence: call[:confidence] + line: call.line, + span: call.span, + type: edge_type(call.control), + kind: call_kind(call), + confidence: :high ) end - def internal_call_name(node, caller) - case node.type - when :FCALL, :VCALL - { name: scoped_name(caller, node.children[0]), kind: :bare_internal, confidence: :high } - when :CALL, :OPCALL - receiver, mid = node.children - return nil unless Ast.node?(receiver) && receiver.type == :SELF - - { name: scoped_name(caller, mid), kind: :direct_self, confidence: :high } - end + def scoped_name(caller, message) + caller.name.to_s.start_with?("self.") ? "self.#{message}" : message.to_s end - def scoped_name(caller, mid) - caller.name.start_with?("self.") ? "self.#{mid}" : mid.to_s + def edge_type(control) + %i[conditional iterates].include?(control) ? control : :always end - def edge_type(context_stack) - context_stack.last || :always + def call_kind(call) + source_text(call.span).lstrip.start_with?("self.") ? :direct_self : :bare_internal end - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE + def source_text(span) + return "" unless span - scope.children[2] + first_line, first_column, last_line, last_column = span + if first_line == last_line + return @document.lines[first_line - 1].to_s[first_column...last_column].to_s + end + + parts = [] + parts << @document.lines[first_line - 1].to_s[first_column..].to_s + parts.concat(@document.lines[first_line...(last_line - 1)] || []) + parts << @document.lines[last_line - 1].to_s[0...last_column].to_s + parts.join end + end - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] + class TopLevelOwner + def initialize(file, document) + @file = file + @document = document end - def top_level_statements(root) - return [] unless Ast.node?(root) + def owner_for(fact) + owner = fact.owner.to_s + return owner unless owner == file_owner + return owner if enclosed_by_matching_owner?(fact) - root.children.compact.flat_map do |child| - Ast.node?(child) && child.type == :BLOCK ? child.children.compact : [child] - end + top_level_owner end - def visibility_call?(node) - node.type == :FCALL && VISIBILITY_MIDS.include?(node.children[0]) - end + private - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s + def enclosed_by_matching_owner?(fact) + @document.owner_defs.any? do |owner| + owner.name.to_s == fact.owner.to_s && encloses?(owner.span, fact.span) end end - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text + def encloses?(outer, inner) + return false unless outer && inner + + starts_before = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]) + ends_after = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]) + starts_before && ends_after + end + + def file_owner + File.basename(@file.to_s, File.extname(@file.to_s)) end def top_level_owner diff --git a/gems/decomplex/lib/decomplex/superfluous_state.rb b/gems/decomplex/lib/decomplex/superfluous_state.rb new file mode 100644 index 000000000..93a0c7505 --- /dev/null +++ b/gems/decomplex/lib/decomplex/superfluous_state.rb @@ -0,0 +1,209 @@ +# frozen_string_literal: true + +require "set" + +module Decomplex + # SuperfluousState -- fields that could be eliminated entirely. + # + # Post-analyzer over StateMesh + ImplicitControlFlow. Does no new AST + # walks. Detects four eliminability patterns: + # + # 1. Dead state -- written but never read. The field captures a + # value that is never used. Provably removable. + # + # 2. Intra-method pass-through -- field written and read within the + # same method body. The value never escapes the stack frame. + # Memoized cache accessors (read-before-write pattern) are + # disqualified. + # + # 3. Adjacent-call pass-through -- single-writer single-reader where + # every observed callsite has writer immediately preceding reader. + # + # 4. Derived cache -- computed from other fields, never independently + # mutated. Includes memoized accessors and constructor-set config. + # + # Noise gating: + # - Only self-state (@ivar and self.attr); ignores other.attr. + # - Read-before-write within same method disqualifies intra-method + # (memo pattern, not pass-through). + # - Constructor-set fields get a 0.33x penalty. + # - Fields read only via hash/eql?/to_s/inspect are flagged as + # identity-only (may be eligible for structural replacement). + class SuperfluousState + Finding = Struct.new(:field, :score, :classification, + :writer_method_count, :reader_method_count, + :write_sites, :read_sites, + :writer_methods, :reader_methods, + :ctorset, :adjacent_sites, + keyword_init: true) do + def to_h + { + field: field, + score: score.round(3), + classification: classification, + writer_method_count: writer_method_count, + reader_method_count: reader_method_count, + write_sites: write_sites, + read_sites: read_sites, + writer_methods: writer_methods, + reader_methods: reader_methods, + ctorset: ctorset, + adjacent_sites: adjacent_sites + } + end + end + + def self.scan(files) + sm = StateMesh.scan(files, min_writes: 1) + sm.run + + adjacent_pairs = build_adjacent_pairs(files) + new(sm, adjacent_pairs).scan + end + + def initialize(state_mesh, adjacent_pairs = {}) + @sm = state_mesh + @adjacent_pairs = adjacent_pairs + end + + def scan + group_by_field + score_and_rank + end + + private + + def group_by_field + @writes_by = Hash.new { |h, k| h[k] = [] } + @reads_by = Hash.new { |h, k| h[k] = [] } + + @sm.writes.each do |w| + next unless w.recv == "self" # ignore other.attr + @writes_by[w.norm] << w + end + @sm.reads.each do |r| + next unless r.recv == "self" + @reads_by[r.norm] << r + end + end + + def score_and_rank + results = [] + + all_norms = (@writes_by.keys + @reads_by.keys).uniq + + # ---- Pattern 1: dead state (written, never read) ---- + all_norms.each do |norm| + next unless @writes_by.key?(norm) && !@reads_by.key?(norm) + # Reject if StateMesh has ANY read (including non-self reads + # like metaprogramming access), not just self-filtered reads. + next if @sm.reads.any? { |r| r.norm == norm } + + writers = @writes_by[norm] + results << Finding.new( + field: norm, score: 0.85, classification: "dead_state", + writer_method_count: writers.map { |w| [w.file, w.defn] }.uniq.size, + reader_method_count: 0, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: [], + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: [], + writer_methods: writers.map(&:defn).uniq, + reader_methods: [], + ctorset: writers.all? { |w| w.defn == "initialize" }, + adjacent_sites: nil + ) + end + + # ---- Pattern 2-4: eliminability scoring ---- + all_norms.each do |norm| + writers = @writes_by[norm] || [] + readers = @reads_by[norm] || [] + next if writers.empty? || readers.empty? + + writer_methods = writers.map { |w| [w.file, w.defn] }.uniq + reader_methods = readers.map { |r| [r.file, r.defn] }.uniq + all_sites = (writer_methods + reader_methods).uniq + + wc = writer_methods.size + rc = reader_methods.size + + # ---- base dampened score ---- + base = 1.0 / (wc * rc + 1) + + # ---- intra-method pass-through ---- + intra = (all_sites.size == 1) + if intra + # Disqualify if any read precedes the earliest write (the field + # carries state from outside this method -- e.g. read-modify-write + # or a method that reads prior-call state before writing). + first_write_line = writers.map(&:line).min + intra = false if readers.any? { |r| r.line < first_write_line } + end + intra_bonus = intra ? 10.0 : 1.0 + + # ---- constructor-set penalty ---- + ctorset = wc == 1 && writer_methods[0][1] == "initialize" + ctor_penalty = ctorset ? 0.33 : 1.0 + + # ---- adjacent-call bonus ---- + adj_bonus = 1.0 + adj_sites = nil + if wc == 1 && rc == 1 && !intra + wm_name = writer_methods[0][1] + rm_name = reader_methods[0][1] + pair_key = [wm_name, rm_name] + fields = @adjacent_pairs[pair_key] + if fields.include?(norm) + adj_bonus = 5.0 + adj_sites = fields.to_a # would be the sites list from ICF + end + end + + score = base * intra_bonus * adj_bonus * ctor_penalty + next if score < 0.1 + + classification = if intra + "intra_method" + elsif adj_bonus > 1.0 + "adjacent_call" + else + "derived_cache" + end + + results << Finding.new( + field: norm, + score: score, + classification: classification, + writer_method_count: wc, + reader_method_count: rc, + write_sites: writers.map { |w| "#{w.file}:#{w.defn}:#{w.line}" }.uniq, + read_sites: readers.map { |r| "#{r.file}:#{r.defn}:#{r.line}" }.uniq, + writer_methods: writer_methods.map { |_f, d| d }.uniq, + reader_methods: reader_methods.map { |_f, d| d }.uniq, + ctorset: ctorset, + adjacent_sites: adj_sites + ) + end + + results.sort_by { |r| -r.score } + end + + # Build a lookup: (writer_method, reader_method) -> Set[field_norm] + # from ImplicitControlFlow's ordered protocol facts. + def self.build_adjacent_pairs(files) + pairs = Hash.new { |h, k| h[k] = Set.new } + report = ImplicitControlFlow.scan(files) + report.ordered_protocols.each do |proto| + next unless proto[:dependency] == "write_read" + writer, reader = proto[:protocol] + fields = proto[:states] + fields.each { |f| pairs[[writer, reader]].add(f) } + end + pairs + rescue StandardError => e + warn "SuperfluousState: ImplicitControlFlow unavailable: #{e.message}" + {} + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 44ca62834..7a3528ef2 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -2,7 +2,6 @@ require "set" require "rbconfig" -require_relative "ast" module Decomplex module Syntax @@ -10,17 +9,27 @@ module Syntax :params, :signature, :kind, keyword_init: true) OwnerDef = Struct.new(:file, :name, :kind, :line, :span, keyword_init: true) CallSite = Struct.new(:receiver, :message, :file, :function, :owner, :line, :span, - :conditional, :arguments, :control, keyword_init: true) + :conditional, :arguments, :control, :safe_navigation, :block, + keyword_init: true) StateDeclaration = Struct.new(:field, :owner, :type, :file, :line, :span, keyword_init: true) StateParamOrigin = Struct.new(:field, :receiver, :owner, :param, :file, :function, :line, :span, keyword_init: true) - DecisionSite = Struct.new(:kind, :members, :file, :function, :line, :span, :predicate, keyword_init: true) + DecisionSite = Struct.new(:kind, :members, :file, :function, :line, :span, :predicate, + :enclosing_span, keyword_init: true) StateRead = Struct.new(:field, :receiver, :file, :function, :line, :span, :owner, keyword_init: true) StateWrite = Struct.new(:field, :receiver, :file, :function, :line, :span, :owner, keyword_init: true) BranchDecision = Struct.new(:file, :function, :line, :span, :predicate, :state_refs, keyword_init: true) BranchArm = Struct.new(:file, :function, :kind, :line, :span, :decision_line, :decision_span, :predicate, :member, :body, keyword_init: true) + PredicateDef = Struct.new(:file, :name, :owner, :body, :line, :span, keyword_init: true) + ComparisonSite = Struct.new(:file, :function, :line, :span, :source, :operator, keyword_init: true) + LocalMethod = Struct.new(:id, :owner, :name, :file, :line, :span, :node, + :statements, :boundaries, keyword_init: true) + LocalStatement = Struct.new(:index, :line, :end_line, :span, :source, :reads, + :writes, :dependencies, :co_uses, keyword_init: true) + LocalBoundary = Struct.new(:before_index, :after_index, :line, :kind, :text, keyword_init: true) + PathConditionSite = Struct.new(:guards, :action, :file, :function, :line, :span, keyword_init: true) LanguageLexicon = Struct.new( :type_guard_patterns, :diagnostic_patterns, :trivial_patterns, :nil_literal_patterns, @@ -57,822 +66,1223 @@ def call_name?(source, names) end end - GENERIC_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\b(?:nil|null|none|undefined)\b/i].freeze, - type_guard_patterns: [ - /\b(?:isinstance|typeof|typeid|instanceof)\b/, - /(?:\?\.|&\.)/, - /@typeInfo\b/, - /\bkind\s*(?:==|!=)/ - ].freeze, - diagnostic_patterns: [ - /\b(?:throw|panic|abort|unreachable)\b/, - /\breturn\s+error[.\w]*/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|null|None|undefined|true|false|0|1|break|continue|unreachable)\s*;?\z/, - /\Areturn\s+(?:nil|null|None|undefined|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - RUBY_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnil\b/].freeze, - type_guard_patterns: [ - /(?:\A|[^\w!?])(?:nil\?|is_a\?|kind_of\?|instance_of\?|respond_to\?)(?:\s*\(|\b)/, - /&\./ - ].freeze, - diagnostic_patterns: [ - /(?:\A|[^\w!?])(?:raise|fail|abort)[!?]?(?:\s*\(|\b)/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|true|false|0|1|break|next)\s*;?\z/, - /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - PYTHON_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bNone\b/].freeze, - type_guard_patterns: [ - /\b(?:isinstance|issubclass|hasattr)\s*\(/, - /\bis\s+(?:not\s+)?None\b/, - /\btype\s*\([^)]*\)\s*(?:==|is)\s*/ - ].freeze, - diagnostic_patterns: [ - /\braise\b/, - /\bassert\b/, - /\bsys\.exit\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:None|True|False|0|1|break|continue|pass)\s*;?\z/, - /\Areturn\s+(?:None|True|False|0|1)\s*;?\z/ - ].freeze - ).freeze - JAVASCRIPT_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\b(?:null|undefined)\b/].freeze, - type_guard_patterns: [ - /\btypeof\b/, - /\binstanceof\b/, - /(?:\?\.|\b(?:==|!=|===|!==)\s*(?:null|undefined)\b)/ - ].freeze, - diagnostic_patterns: [ - /\bthrow\b/, - /\bprocess\.exit\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:null|undefined|true|false|0|1|break|continue)\s*;?\z/, - /\Areturn\s+(?:null|undefined|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - GO_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnil\b/].freeze, - type_guard_patterns: [ - /\bnil\b/, - /\.\(type\)/, - /\.\([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*\)/ - ].freeze, - diagnostic_patterns: [ - /\bpanic\s*\(/, - /\breturn\s+error[.\w]*/ - ].freeze, - trivial_patterns: [ - /\A(?:nil|true|false|0|1|break|continue|fallthrough)\s*;?\z/, - /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - RUST_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bNone\b/].freeze, - type_guard_patterns: [ - /\b(?:is_some|is_none)\s*\(/, - /\b(?:Some|None)\b/, - /\bmatches!\s*\(/ - ].freeze, - diagnostic_patterns: [ - /\b(?:panic|unreachable|todo|unimplemented)!\s*\(/, - /\breturn\s+Err\s*\(/ - ].freeze, - trivial_patterns: [ - /\A(?:None|true|false|0|1|break|continue|unreachable!)\s*;?\z/, - /\Areturn\s+(?:None|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - ZIG_LEXICON = LanguageLexicon.new( - nil_literal_patterns: [/\bnull\b/].freeze, - type_guard_patterns: [ - /\bnull\b/, - /@typeInfo\b/, - /\bif\s*\([^)]*\)\s*\|/ - ].freeze, - diagnostic_patterns: [ - /@panic\s*\(/, - /\bunreachable\b/, - /\breturn\s+error[.\w]*/ - ].freeze, - trivial_patterns: [ - /\A(?:null|true|false|0|1|break|continue|unreachable)\s*;?\z/, - /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ - ].freeze - ).freeze - LANGUAGE_LEXICONS = { - ruby: RUBY_LEXICON, - python: PYTHON_LEXICON, - javascript: JAVASCRIPT_LEXICON, - typescript: JAVASCRIPT_LEXICON, - go: GO_LEXICON, - rust: RUST_LEXICON, - zig: ZIG_LEXICON - }.freeze - - module_function + class TreeSitterLanguageAdapter + EMPTY_NODE_KINDS = [].freeze + ADAPTER_KIND_METHODS = { + function_node_kinds: :FUNCTION_NODE_KINDS, + class_owner_node_kinds: :CLASS_OWNER_NODE_KINDS, + module_owner_node_kinds: :MODULE_OWNER_NODE_KINDS, + generic_owner_node_kinds: :GENERIC_OWNER_NODE_KINDS, + impl_owner_node_kinds: :IMPL_OWNER_NODE_KINDS, + struct_owner_node_kinds: :STRUCT_OWNER_NODE_KINDS, + union_owner_node_kinds: :UNION_OWNER_NODE_KINDS, + enum_owner_node_kinds: :ENUM_OWNER_NODE_KINDS, + anonymous_owner_node_kinds: :ANONYMOUS_OWNER_NODE_KINDS, + call_node_kinds: :CALL_NODE_KINDS, + adjacent_call_node_kinds: :ADJACENT_CALL_NODE_KINDS, + parameter_list_node_kinds: :PARAMETER_LIST_NODE_KINDS, + method_parameter_list_node_kinds: :METHOD_PARAMETER_LIST_NODE_KINDS, + inline_parameter_node_kinds: :INLINE_PARAMETER_NODE_KINDS, + function_body_node_kinds: :FUNCTION_BODY_NODE_KINDS, + nested_statement_wrapper_node_kinds: :NESTED_STATEMENT_WRAPPER_NODE_KINDS, + identifier_node_kinds: :IDENTIFIER_NODE_KINDS, + local_identifier_wrapper_node_kinds: :LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS, + indexed_lhs_node_kinds: :INDEXED_LHS_NODE_KINDS, + assignment_node_kinds: :ASSIGNMENT_NODE_KINDS, + assignment_operator_tokens: :ASSIGNMENT_OPERATOR_TOKENS, + local_declaration_node_kinds: :LOCAL_DECLARATION_NODE_KINDS, + short_variable_declaration_node_kinds: :SHORT_VARIABLE_DECLARATION_NODE_KINDS, + variable_declaration_node_kinds: :VARIABLE_DECLARATION_NODE_KINDS, + declaration_assignment_node_kinds: :DECLARATION_ASSIGNMENT_NODE_KINDS, + path_action_node_kinds: :PATH_ACTION_NODE_KINDS, + simple_action_wrapper_node_kinds: :SIMPLE_ACTION_WRAPPER_NODE_KINDS, + comparison_node_kinds: :COMPARISON_NODE_KINDS, + branch_node_kinds: :BRANCH_NODE_KINDS, + loop_node_kinds: :LOOP_NODE_KINDS, + text_loop_node_kinds: :TEXT_LOOP_NODE_KINDS, + labeled_loop_node_kinds: :LABELED_LOOP_NODE_KINDS, + case_node_kinds: :CASE_NODE_KINDS, + hidden_case_wrapper_node_kinds: :HIDDEN_CASE_WRAPPER_NODE_KINDS, + hidden_match_node_kinds: :HIDDEN_MATCH_NODE_KINDS, + branch_loop_node_kinds: :BRANCH_LOOP_NODE_KINDS, + branch_case_node_kinds: :BRANCH_CASE_NODE_KINDS, + if_node_kinds: :IF_NODE_KINDS, + hidden_if_token_kinds: :HIDDEN_IF_TOKEN_KINDS, + hidden_case_token_kinds: :HIDDEN_CASE_TOKEN_KINDS, + case_arm_node_kinds: :CASE_ARM_NODE_KINDS, + when_case_arm_node_kinds: :WHEN_CASE_ARM_NODE_KINDS, + switch_case_arm_node_kinds: :SWITCH_CASE_ARM_NODE_KINDS, + case_pattern_node_kinds: :CASE_PATTERN_NODE_KINDS, + case_subject_node_kinds: :CASE_SUBJECT_NODE_KINDS, + case_container_stop_node_kinds: :CASE_CONTAINER_STOP_NODE_KINDS, + case_subject_skip_node_kinds: :CASE_SUBJECT_SKIP_NODE_KINDS, + default_case_patterns: :DEFAULT_CASE_PATTERNS, + boolean_and_operators: :BOOLEAN_AND_OPERATORS, + boolean_container_node_kinds: :BOOLEAN_CONTAINER_NODE_KINDS, + boolean_wrapper_node_kinds: :BOOLEAN_WRAPPER_NODE_KINDS, + parenthesized_wrapper_node_kinds: :PARENTHESIZED_WRAPPER_NODE_KINDS, + parenthesized_pattern_node_kinds: :PARENTHESIZED_PATTERN_NODE_KINDS, + hidden_if_wrapper_node_kinds: :HIDDEN_IF_WRAPPER_NODE_KINDS, + local_variable_declarator_node_kinds: :LOCAL_VARIABLE_DECLARATOR_NODE_KINDS, + field_declaration_node_kinds: :FIELD_DECLARATION_NODE_KINDS, + declaration_site_parent_node_kinds: :DECLARATION_SITE_PARENT_NODE_KINDS, + receiver_type_node_kinds: :RECEIVER_TYPE_NODE_KINDS, + method_receiver_node_kinds: :METHOD_RECEIVER_NODE_KINDS, + receiver_parameter_node_kinds: :RECEIVER_PARAMETER_NODE_KINDS, + first_argument_receiver_type_node_kinds: :FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS, + first_argument_receiver_name_node_kinds: :FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS, + bound_container_wrapper_node_kinds: :BOUND_CONTAINER_WRAPPER_NODE_KINDS, + bound_container_parent_node_kinds: :BOUND_CONTAINER_PARENT_NODE_KINDS, + bound_container_name_node_kinds: :BOUND_CONTAINER_NAME_NODE_KINDS, + adjacent_method_invocation_node_kinds: :ADJACENT_METHOD_INVOCATION_NODE_KINDS, + argument_list_node_kinds: :ARGUMENT_LIST_NODE_KINDS, + self_call_identifier_node_kinds: :SELF_CALL_IDENTIFIER_NODE_KINDS, + self_receiver_names: :SELF_RECEIVER_NAMES, + field_identifier_node_kinds: :FIELD_IDENTIFIER_NODE_KINDS, + declarator_node_kinds: :DECLARATOR_NODE_KINDS, + assignment_state_declaration_node_kinds: :ASSIGNMENT_STATE_DECLARATION_NODE_KINDS, + accessor_call_node_kinds: :ACCESSOR_CALL_NODE_KINDS, + expression_list_node_kinds: :EXPRESSION_LIST_NODE_KINDS, + navigation_suffix_node_kinds: :NAVIGATION_SUFFIX_NODE_KINDS, + literal_field_expression_node_kinds: :LITERAL_FIELD_EXPRESSION_NODE_KINDS, + block_argument_node_kinds: :BLOCK_ARGUMENT_NODE_KINDS, + parameter_identifier_node_kinds: :PARAMETER_IDENTIFIER_NODE_KINDS, + member_access_operator_tokens: :MEMBER_ACCESS_OPERATOR_TOKENS, + public_visibility_tokens: :PUBLIC_VISIBILITY_TOKENS, + field_like_node_kinds: :FIELD_LIKE_NODE_KINDS + }.freeze - def parse(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) - normalized_parser = parser.to_s.tr("-", "_") - lang = (language || language_for(file)).to_sym - key = document_cache_key(file, lang, normalized_parser) - document_cache.fetch(key) do - document_cache[key] = - case normalized_parser - when "", "tree_sitter", "treesitter" - TreeSitterAdapter.new.parse(file, language: lang) - else - raise ArgumentError, "unknown decomplex parser #{parser.inspect}" - end + ADAPTER_KIND_METHODS.each do |method_name, constant_name| + define_method(method_name) { adapter_node_kinds(constant_name) } end - end - def document_cache - @document_cache ||= {} - end - - def document_cache_key(file, language, parser) - stat = File.stat(file) - [File.expand_path(file), language, parser, stat.size, stat.mtime.to_f] - end + attr_reader :language, :extensions, :lexicon, :package, :grammar_names, + :tree_sitter_language_name - def parse_uncached(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) - case parser.to_s.tr("-", "_") - when "", "tree_sitter", "treesitter" - TreeSitterAdapter.new.parse(file, language: language) - else - raise ArgumentError, "unknown decomplex parser #{parser.inspect}" + def initialize(language:, extensions:, lexicon:, package:, grammar_names: nil, + tree_sitter_language_name: nil, first_argument_receiver: false) + @language = language.to_sym + @extensions = Array(extensions).freeze + @lexicon = lexicon + @package = package + @grammar_names = Array(grammar_names || language.to_s).freeze + @tree_sitter_language_name = tree_sitter_language_name || language.to_s + @first_argument_receiver = first_argument_receiver end - end - - def parser - ENV.fetch("DECOMPLEX_PARSER", "tree_sitter").to_s.tr("-", "_") - end - - def tree_sitter? - %w[tree_sitter treesitter].include?(parser) - end - def language_for(file) - case File.extname(file).downcase - when ".rb" then :ruby - when ".py", ".pyi" then :python - when ".js", ".jsx", ".mjs", ".cjs" then :javascript - when ".ts", ".tsx" then :typescript - when ".go" then :go - when ".rs" then :rust - when ".zig" then :zig - else :ruby + def first_argument_receiver? + @first_argument_receiver end - end - def supported_exts(parser: self.parser) - case parser.to_s.tr("-", "_") - when "", "tree_sitter", "treesitter" - %w[.rb .py .pyi .js .jsx .mjs .cjs .ts .tsx .go .rs .zig] - else - [] + def adapter_node_kinds(constant_name) + self.class.const_defined?(constant_name) ? self.class.const_get(constant_name) : EMPTY_NODE_KINDS end - end - - def supported_source?(file, parser: self.parser) - supported_exts(parser: parser).include?(File.extname(file).downcase) - end - - def language_lexicon(language) - key = language.to_s.empty? ? nil : language.to_sym - LANGUAGE_LEXICONS.fetch(key, GENERIC_LEXICON) - end - class Document - attr_reader :file, :language, :source, :lines, :root, :adapter + def function_name(node) + return nil unless function_node_kinds.include?(node.kind) - def initialize(file:, language:, source:, lines:, root:, adapter:) - @file = file - @language = language - @source = source - @lines = lines - @root = root - @adapter = adapter + named_field(node, "name")&.text || + declarator_name(named_field(node, "declarator")) || + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) end - def decision_sites - @decision_sites ||= adapter.decision_sites(self) + def function_kind(_document, node, stack) + owner_for_node(nil, node, stack: stack) ? :method : :function end - def state_writes - @state_writes ||= adapter.state_writes(self) + def visibility(_document, node) + modifier_visibility(node) end - def state_reads - @state_reads ||= adapter.state_reads(self) + def owner_name_from_declaration(document, node) + if (class_owner_node_kinds + module_owner_node_kinds).include?(node.kind) + named_field(node, "name")&.text || + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) + elsif generic_owner_node_kinds.include?(node.kind) + named_field(node, "name")&.text || + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) + elsif impl_owner_node_kinds.include?(node.kind) + impl_owner_name(node) + elsif struct_owner_node_kinds.include?(node.kind) + named_field(node, "name")&.text || + first_named_text(node, identifier_node_kinds + field_identifier_node_kinds) + elsif anonymous_owner_node_kinds.include?(node.kind) + bound_container_name(node) || + returned_container_owner(document, node) || + anonymous_owner_name(document, node) + end end - def branch_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) - adapter.branch_decisions( - self, - immutable_readers: immutable_readers, - immutable_reader_types: immutable_reader_types, - type_aliases: type_aliases - ) + def owner_kind(node) + if class_owner_node_kinds.include?(node.kind) + :class + elsif module_owner_node_kinds.include?(node.kind) + :module + elsif impl_owner_node_kinds.include?(node.kind) + :impl + elsif union_owner_node_kinds.include?(node.kind) + :union + elsif enum_owner_node_kinds.include?(node.kind) + :enum + elsif (struct_owner_node_kinds + anonymous_owner_node_kinds).include?(node.kind) + :struct + else :owner + end end - def function_defs - @function_defs ||= adapter.function_defs(self) + def function_receiver_name(node, stack) + receiver_param = method_receiver_param_node(node) + receiver_param&.text || + receiver_convention_param_name(node, stack: stack) end - def owner_defs - @owner_defs ||= adapter.owner_defs(self) - end + def receiver_convention_owner_name(node, **_context) + return nil unless first_argument_receiver? + return nil unless function_node_kinds.include?(node.kind) - def call_sites - @call_sites ||= adapter.call_sites(self) - end + receiver = first_argument_receiver_parameter(node) + return nil unless receiver - def state_declarations - @state_declarations ||= adapter.state_declarations(self) - end + type = normalize_type_owner(receiver[:type]) + name = function_name(node).to_s + return nil if type.empty? || name.empty? - def state_param_origins - @state_param_origins ||= adapter.state_param_origins(self) + prefix = snake_case_type_name(type) + name.start_with?("#{prefix}_") ? type : nil end - def branch_arms - @branch_arms ||= adapter.branch_arms(self) - end + def receiver_convention_param_name(node, **_context) + return nil unless first_argument_receiver? - def immutable_struct_readers - adapter.immutable_struct_readers(lines) + first_argument_receiver_parameter(node)&.fetch(:name, nil) end - def immutable_struct_reader_types - adapter.immutable_struct_reader_types(lines) + def generated_prelude?(_document, _node) + false end - def type_aliases - adapter.type_aliases(lines) + def call_target(document, node) + if call_node_kinds.include?(node.kind) + generic_call_target(document, node) + elsif adjacent_call_node_kinds.include?(node.kind) + adjacent_argument_call_target(node) + end end - end - module SourceTextHelpers - module_function - - def immutable_struct_readers(lines) - readers = Hash.new { |h, k| h[k] = Set.new } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) - readers[class_stack.last].add(match[1].to_sym) - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) - end - readers + def state_declaration(node) + generic_state_declaration(node) end - def immutable_struct_reader_types(lines) - reader_types = Hash.new { |h, k| h[k] = {} } - class_stack = [] - lines.each do |line| - if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) - class_stack << match[1] - next - end - if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) - reader_types[class_stack.last][match[1].to_sym] = match[2] - next - end - class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) - end - reader_types + def state_read_target(node) + generic_state_read_target(node) end - def type_aliases(lines) - aliases = {} - lines.each do |line| - if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) - aliases[match[1]] = match[2] - elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - aliases[match[1]] = match[2] - end - end - aliases + def state_target(lhs) + generic_state_target(lhs) end end - class TreeSitterAdapter - BRANCH_KINDS = %w[if unless if_statement if_modifier unless_modifier if_expression - while until while_statement for for_statement - case switch_statement expression_switch_statement switch_expression - match_statement match_expression].freeze + class TreeSitterLanguageAdapter + COMPARISON_OPERATORS = %w[== !=].freeze NOISE_MESSAGES = %w[! != == === < <= > >= [] []= to_s inspect class].freeze - LANGUAGE_PACKAGES = { - ruby: "tree-sitter-ruby", - python: "tree-sitter-python", - javascript: "tree-sitter-javascript", - typescript: "tree-sitter-typescript", - go: "tree-sitter-go", - rust: "tree-sitter-rust", - zig: "@tree-sitter-grammars/tree-sitter-zig" - }.freeze - def parse(file, language: nil) - lang = (language || Syntax.language_for(file)).to_sym - source = File.read(file) - parser = parser_for(lang) - tree = parser.parse(source) - raise "tree-sitter parse timed out for #{file}" unless tree + def initial_stack(document) + [{ file_owner: file_owner(document.file), language: document.language }] + end - Document.new( - file: file, - language: lang, - source: source, - lines: source.lines, - root: tree.root_node, - adapter: self - ) + def push_context(document, stack, node) + next_stack = push_owner_context(document, stack, node) + name = function_name(node) + next_stack = name ? next_stack + [function_context(node, next_stack)] : next_stack + control = control_context(node) + control ? next_stack + [{ control: control }] : next_stack end - def decision_sites(document) - out = [] - walk(document.root, []) do |node, stack| - record_decision_site(document, node, stack, out) - end + def structural_facts_for_node(document, node, stack) + out = { + function_defs: [], + owner_defs: [], + call_sites: [], + state_declarations: [], + state_param_origins: [], + state_reads: [], + state_writes: [] + } + record_function_def(document, node, stack, out[:function_defs]) + record_owner_def(document, node, stack, out[:owner_defs]) + record_call_site(document, node, stack, out[:call_sites]) + record_state_declaration(document, node, stack, out[:state_declarations]) + record_state_param_origin(document, node, stack, out[:state_param_origins]) + record_state_read(document, node, stack, out[:state_reads]) + record_state_write(document, node, stack, out[:state_writes]) out end - def state_writes(document) - structural_facts(document).fetch(:state_writes) + def descend_into_children?(_node, _stack) + true end - def state_reads(document) - structural_facts(document).fetch(:state_reads) + def after_structural_facts(document, out) + record_implicit_state_accesses(document, out) if implicit_state_accesses? end - def branch_decisions(document, immutable_readers:, immutable_reader_types:, type_aliases:) + def decision_site_facts(document, node, stack) out = [] - walk(document.root, []) do |node, stack| - record_branch_decision( - document, - node, - stack, + record_decision_site(document, node, stack, out) + out + end + + def branch_decision_facts(document, node, stack, immutable_readers:, immutable_reader_types:, type_aliases:) + return [] if branch_decision_wrapper_for_real_branch?(node) + + out = [] + record_branch_decision( + document, + node, + stack, out, immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, type_aliases: type_aliases, - method_param_types: method_param_types(document.lines) + method_param_types: method_param_types(document) ) + out end + + def branch_arm_facts(document, node, stack) + return [] if branch_decision_wrapper_for_real_branch?(node) + + out = [] + record_branch_arm(document, node, stack, out) out end - def function_defs(document) - structural_facts(document).fetch(:function_defs) + def comparison_site_facts(document, node, stack) + target = comparison_target(node) + return [] unless target + + [ + ComparisonSite.new( + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + source: target[:source], + operator: target[:operator] + ) + ] end - def owner_defs(document) - structural_facts(document).fetch(:owner_defs) + def implicit_state_accesses? + false end - def call_sites(document) - structural_facts(document).fetch(:call_sites) + def function_params(node) + params = if method_parameter_list_node_kinds.any? && function_node_kinds.include?(node.kind) + lists = node.named_children.select { |child| method_parameter_list_node_kinds.include?(child.kind) } + lists.size > 1 ? lists[1] : lists.first + else + named_field(node, "parameters") || + node.named_children.find do |child| + parameter_list_node_kinds.include?(child.kind) + end + end + params ||= node.named_children.select { |child| inline_parameter_node_kinds.include?(child.kind) } + return [] unless params + + Array(params.respond_to?(:named_children) ? params.named_children : params).filter_map do |param| + parameter_name(param) + end.uniq end - def state_declarations(document) - structural_facts(document).fetch(:state_declarations) + def function_signature(document, node) + body = named_field(node, "body") + text = + if body + document.source.byteslice(node.start_byte, body.start_byte - node.start_byte).to_s.strip + else + line_text(document, node).strip + end + normalize_text(text.empty? ? line_text(document, node) : text) + rescue StandardError + normalize_text(line_text(document, node)) end - def state_param_origins(document) - structural_facts(document).fetch(:state_param_origins) + def method_param_types(_document) + {} end - def structural_facts(document) - @structural_fact_cache ||= {} - @structural_fact_cache[document.object_id] ||= begin - out = { - function_defs: [], - owner_defs: [], - call_sites: [], - state_declarations: [], - state_param_origins: [], - state_reads: [], - state_writes: [] - } - walk(document.root, [{ file_owner: file_owner(document.file) }]) do |node, stack| - record_function_def(document, node, stack, out[:function_defs]) - record_owner_def(document, node, stack, out[:owner_defs]) - record_call_site(document, node, stack, out[:call_sites]) - record_state_declaration(document, node, stack, out[:state_declarations]) - record_state_param_origin(document, node, stack, out[:state_param_origins]) - record_state_read(document, node, stack, out[:state_reads]) - record_state_write(document, node, stack, out[:state_writes]) + def immutable_struct_readers(_document) + {} + end + + def immutable_struct_reader_types(_document) + {} + end + + def type_aliases(_document) + {} + end + + def predicate_def(_document, function_def) + body = generic_predicate_body(function_def.body) + return nil unless body + + PredicateDef.new( + file: function_def.file, + name: function_def.name, + owner: function_def.owner, + body: body, + line: function_def.line, + span: function_def.span + ) + end + + def local_methods(document) + document.function_defs.map do |function_def| + statements = generic_function_body_statements(function_def.body) + local_names = generic_local_names(function_def, statements) + local_statements = statements.each_with_index.map do |statement, index| + generic_local_statement(statement, index, local_names) end - out[:function_defs].uniq! { |fn| [fn.file, fn.owner, fn.name, fn.line] } - out[:owner_defs].uniq! { |owner| [owner.file, owner.name, owner.kind] } - out[:call_sites].uniq! { |call| [call.file, call.owner, call.function, call.line, call.receiver, call.message] } - out[:state_declarations].uniq! { |decl| [decl.file, decl.owner, decl.field] } - out[:state_param_origins].uniq! { |origin| [origin.file, origin.owner, origin.function, origin.field, origin.param] } - out[:state_reads].uniq! { |read| [read.file, read.owner, read.function, read.line, read.receiver, read.field] } - out[:state_writes].uniq! { |write| [write.file, write.owner, write.function, write.line, write.receiver, write.field] } - out + owner = local_method_owner(document, function_def.owner) + + LocalMethod.new( + id: "#{owner}##{function_def.name}", + owner: owner, + name: function_def.name, + file: function_def.file, + line: function_def.line, + span: function_def.span, + node: function_def.body, + statements: local_statements, + boundaries: generic_structural_boundaries(document, local_statements) + ) end end - def branch_arms(document) + def path_condition_sites(document) out = [] - walk(document.root, []) do |node, stack| - record_branch_arm(document, node, stack, out) + document.function_defs.each do |function_def| + generic_function_body_statements(function_def.body).each do |statement| + generic_path_walk(document, statement, function_def.name, [], out) + end end out end - def immutable_struct_readers(lines) - SourceTextHelpers.immutable_struct_readers(lines) - end + private - def immutable_struct_reader_types(lines) - SourceTextHelpers.immutable_struct_reader_types(lines) - end + def local_method_owner(document, owner) + file_owner_name = file_owner(document.file) + owner_name = owner.to_s + return "(top-level)" if owner_name == file_owner_name - def type_aliases(lines) - SourceTextHelpers.type_aliases(lines) + owner_name.sub(/\A#{Regexp.escape(file_owner_name)}::/, "") end - private + def generic_predicate_body(node) + body = generic_function_body_node(node) + return nil unless body - def parser_for(language) - require_tree_sitter - lang_name = language.to_s - register_language(lang_name, grammar_path(language)) - ::TreeSitter::Parser.new.tap { |parser| parser.language = lang_name } - end + statement = generic_function_body_statements(node).last || body + source = normalize_text(statement.text) + source = source.sub(/\Areturn\s+/, "").sub(/;\z/, "").strip + return nil if source.empty? || source.length > 200 + return nil unless source.match?(/\A(?:true|false)\z|\b(?:true|false|null|nil)\b|(?:==|!=|&&|\|\||\band\b|\bor\b)/i) - def require_tree_sitter - gem "tree_sitter", "~> 0.1" - require "tree_sitter" - rescue Gem::LoadError, LoadError => e - raise LoadError, "DECOMPLEX_PARSER=tree_sitter requires the tree_sitter gem: #{e.message}" + source end - def register_language(name, path) - @registered ||= {} - return if @registered[name] + def generic_function_body_node(node) + return nil unless ts_node?(node) - ::TreeSitter.register_language(name, path) - @registered[name] = true + named_field(node, "body") || + node.named_children.reverse.find do |child| + function_body_node_kinds.include?(child.kind) + end end - def grammar_path(language) - env_name = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" - return ENV.fetch(env_name) if ENV[env_name] && File.file?(ENV[env_name]) + def generic_function_body_statements(node) + body = generic_function_body_node(node) + return [] unless body - candidates = grammar_candidates(language) - found = candidates.find { |path| File.file?(path) } - return found if found + named = body.named_children.reject { |child| comment_node?(child) } + if named.size == 1 && nested_statement_wrapper_node_kinds.include?(named.first.kind) + return [named.first] if branch_node?(named.first) - raise LoadError, - "missing Tree-sitter grammar for #{language}. Set #{env_name} " \ - "to a parser shared library (.so/.dylib/.node). Checked: #{candidates.join(', ')}" + named = named.first.named_children.reject { |child| comment_node?(child) } + end + return [] if named.empty? && body.text.to_s.strip.empty? + return [body] if branch_node?(body) + return [body] if generic_assignment_statement?(body) + return [body] if named.empty? + + named end - def grammar_candidates(language) - pkg = LANGUAGE_PACKAGES.fetch(language) - names = ["#{language}.so", "tree-sitter-#{language}.so", - "libtree-sitter-#{language}.so", "#{language}.node", - "tree-sitter-#{language}.node", - "@tree-sitter-grammars+tree-sitter-#{language}.node"] - roots = [ - File.expand_path("../../vendor/tree-sitter", __dir__), - File.expand_path("../../vendor/tree-sitter/#{language}", __dir__), - File.expand_path("../../node_modules/#{pkg}", __dir__), - File.expand_path("../../../../node_modules/#{pkg}", __dir__), - File.expand_path("../../../../../node_modules/#{pkg}", __dir__) - ] - all_prebuilds = roots.flat_map do |root| - Dir.glob(File.join(root, "prebuilds", "*", "*tree-sitter-#{language}.node")) + def generic_local_names(function_def, statements) + names = Set.new(function_def.params.to_a.map(&:to_s)) + if method_receiver_node_kinds.include?(function_def.body.kind) && + (receiver = function_receiver_name(function_def.body, [])) + names.add(receiver) end - prebuilds = platform_prebuilds(all_prebuilds) - roots.product(names).map { |root, name| File.join(root, name) } + prebuilds + statements.each do |statement| + names.merge(generic_local_writes(statement)) + end + names + end + + def generic_local_statement(node, index, local_names) + declaration_target_keys = generic_declaration_target_keys(node) + assignment_lhs_read_target_keys = generic_assignment_lhs_read_target_keys(node) + assignment_lhs_target_keys = generic_assignment_lhs_target_keys(node) + reads = generic_local_reads( + node, + local_names, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_read_target_keys + ).uniq + writes = generic_local_writes( + node, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_target_keys + ).uniq + LocalStatement.new( + index: index, + line: line(node), + end_line: span(node)[2], + span: span(node), + source: normalize_text(node.text), + reads: reads.to_set, + writes: writes.to_set, + dependencies: generic_assignment_dependencies( + node, + local_names, + declaration_target_keys: declaration_target_keys, + assignment_lhs_read_target_keys: assignment_lhs_read_target_keys, + assignment_lhs_target_keys: assignment_lhs_target_keys + ), + co_uses: reads.sort.combination(2).map { |left, right| [left, right] } + ) end - def platform_prebuilds(paths) - os = host_os - arch = host_arch - return paths if os.nil? || arch.nil? - - paths.select { |path| path.include?("/#{os}-#{arch}/") } + def generic_local_reads(node, local_names, declaration_target_keys: nil, assignment_lhs_target_keys: nil) + declaration_target_keys ||= generic_declaration_target_keys(node) + assignment_lhs_target_keys ||= generic_assignment_lhs_read_target_keys(node) + reads = [] + generic_walk_local(node) do |child| + name = generic_local_identifier_text(child) + next unless name + next unless local_names.include?(name) + next if generic_local_write_node?(child) + next if assignment_lhs_target_keys.include?(node_key(child)) + next if declaration_target_keys.include?(node_key(child)) + next if generic_declaration_name?(child) + next if generic_member_name?(child) + next if skip_local_read_identifier?(child) + + reads << name + end + reads end - def host_os - case RbConfig::CONFIG["host_os"] - when /linux/i then "linux" - when /darwin/i then "darwin" - when /mswin|mingw|cygwin/i then "win32" + def generic_local_writes(node, declaration_target_keys: nil, assignment_lhs_target_keys: nil) + declaration_target_keys ||= generic_declaration_target_keys(node) + assignment_lhs_target_keys ||= generic_assignment_lhs_target_keys(node) + writes = [] + writes.concat(generic_local_declaration_names(node)) + writes.concat(generic_assignment_lhs_names(node)) + + generic_walk_local(node) do |child| + name = generic_local_identifier_text(child) + next unless name + next unless generic_local_write_node?(child) || + declaration_target_keys.include?(node_key(child)) || + assignment_lhs_target_keys.include?(node_key(child)) + + writes << name + end + writes + end + + def generic_assignment_dependencies( + node, + local_names, + declaration_target_keys: nil, + assignment_lhs_read_target_keys: nil, + assignment_lhs_target_keys: nil + ) + declaration_target_keys ||= generic_declaration_target_keys(node) + assignment_lhs_read_target_keys ||= generic_assignment_lhs_read_target_keys(node) + assignment_lhs_target_keys ||= generic_assignment_lhs_target_keys(node) + lhs_names = generic_local_writes( + node, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_target_keys + ) + return [] if lhs_names.empty? + + reads = generic_local_reads( + node, + local_names, + declaration_target_keys: declaration_target_keys, + assignment_lhs_target_keys: assignment_lhs_read_target_keys + ) - lhs_names + lhs_names.product(reads).reject { |left, right| left == right }.uniq + end + + def generic_structural_boundaries(document, statements) + statements.each_cons(2).filter_map do |left, right| + boundary = generic_source_boundary(document, left.end_line + 1, right.line - 1) + next unless boundary + + LocalBoundary.new( + before_index: left.index, + after_index: right.index, + line: boundary[:line], + kind: boundary[:kind], + text: boundary[:text] + ) end end - def host_arch - case RbConfig::CONFIG["host_cpu"] - when /x86_64|amd64/i then "x64" - when /aarch64|arm64/i then "arm64" + def generic_source_boundary(document, first_line, last_line) + return nil if first_line > last_line + + blank = nil + (first_line..last_line).each do |line_number| + text = document.lines[line_number - 1].to_s + stripped = text.strip + return { line: line_number, kind: :comment, text: stripped } if stripped.start_with?("#", "//", "--") + + blank ||= { line: line_number, kind: :blank, text: stripped } if stripped.empty? end + blank end - def walk(node, stack, &block) + def generic_walk_local(node, &block) return unless ts_node?(node) - stack = push_context(stack, node) - yield node, stack - node.children.each { |child| walk(child, stack, &block) } + stack = [node] + until stack.empty? + current = stack.pop + next unless ts_node?(current) + next if current != node && generic_nested_local_scope?(current) + + yield current + current.named_children.reverse_each { |child| stack << child } + end end - def push_context(stack, node) - next_stack = push_owner_context(stack, node) - name = function_name(node) - next_stack = name ? next_stack + [function_context(node, next_stack)] : next_stack - control = control_context(node) - control ? next_stack + [{ control: control }] : next_stack + def generic_nested_local_scope?(node) + function_name(node) || owner_name_from_declaration(nil, node) end - def push_owner_context(stack, node) - owner = owner_name_from_declaration(nil, node) - return stack unless owner + def generic_identifier?(node) + ts_node?(node) && identifier_node_kinds.include?(node.kind) + end - parent_owner = current_owner_from_stack(stack) - full_owner = if parent_owner && parent_owner != owner && !owner.include?("::") - "#{parent_owner}::#{owner}" - else - owner - end - stack + [{ owner: full_owner, owner_declaration: true, owner_kind: owner_kind(node) }] + def generic_local_identifier_text(node) + return node.text.to_s if generic_identifier?(node) + return nil unless ts_node?(node) + return nil unless local_identifier_wrapper_node_kinds.include?(node.kind) + return nil unless node.named_children.empty? + + text = node.text.to_s + simple_identifier_text?(text) ? text : nil end - def current_function(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:function] } - entry ? entry[:function] : "(top-level)" + def generic_assignment_statement?(node) + ts_node?(node) && + (assignment_node_kinds.include?(node.kind) || + node.children.any? { |child| !child.named? && assignment_operator_tokens.include?(child.text.to_s) }) end - def current_owner(document, stack) - current_owner_from_stack(stack) || file_owner(document.file) + def generic_local_write_node?(node) + return false unless generic_identifier?(node) + + parent = parent_node(node) + return false unless parent + return false if generic_member_name?(node) + return true if generic_declaration_name?(node) + + if assignment_node_kinds.include?(parent.kind) + lhs = named_field(parent, "left") || parent.named_children.first + return lhs == node + end + + assignment_lhs?(node) end - def current_owner_from_stack(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:owner] } - entry && entry[:owner] + def generic_declaration_name?(node) + parent = parent_node(node) + return false unless parent + + generic_local_declaration_name_nodes(parent).any? { |candidate| candidate == node } end - def conditional_context?(stack) - stack.any? { |item| item.is_a?(Hash) && %i[conditional iterates].include?(item[:control]) } + def generic_declaration_name_in_tree?(root, target) + generic_local_declaration_name_nodes(root).any? { |candidate| candidate == target } || + root.named_children.any? { |child| generic_declaration_name_in_tree?(child, target) } end - def current_control(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:control] } - entry ? entry[:control] : :always + def generic_declaration_target_keys(root) + keys = Set.new + generic_walk_local(root) do |node| + generic_local_declaration_name_nodes(node).each { |target| keys << node_key(target) } + end + keys end - def function_context(node, stack) - { - function: function_name(node), - owner: function_owner_name(node, stack), - params: function_params(node), - receiver: function_receiver_name(node) - } + def generic_local_declaration_name(node) + generic_local_declaration_name_nodes(node).filter_map { |child| generic_local_declaration_text(child) }.first end - def function_owner_name(node, stack) - receiver_owner_name(node) || current_owner_from_stack(stack) + def generic_local_declaration_names(node) + generic_local_declaration_name_nodes(node).filter_map { |child| generic_local_declaration_text(child) } end - def function_name(node) - case node.kind - when "body_statement" - hidden_ruby_method_name(node) - when "method", "function_definition", "function_declaration", - "method_definition", "function_item" - named_field(node, "name")&.text || first_named_text(node, %w[identifier constant property_identifier]) - when "singleton_method" - name = named_field(node, "name")&.text || - node.named_children.reverse.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - name && "self.#{name}" - when "argument_list" - inline_def_name(node) - when "method_declaration" - named_field(node, "name")&.text || first_named_text(node, %w[field_identifier identifier]) + def generic_local_declaration_text(node) + generic_local_identifier_text(node) || (simple_identifier_text?(node&.text) ? node.text.to_s : nil) + end + + def generic_local_declaration_name_node(node) + generic_local_declaration_name_nodes(node).first + end + + def generic_local_declaration_name_nodes(node) + return [] unless ts_node?(node) + return [] unless local_declaration_node_kinds.include?(node.kind) + + if short_variable_declaration_node_kinds.include?(node.kind) + left = node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) } + if left + identifiers = left.named_children.select { |child| generic_local_identifier_text(child) } + return identifiers unless identifiers.empty? + return [left] if simple_identifier_text?(left.text) + end + return [] + end + + variable = node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) } + return [variable] if simple_identifier_text?(variable&.text) + + declaration_assignment = node.named_children.find { |child| declaration_assignment_node_kinds.include?(child.kind) } + if declaration_assignment + lhs = declaration_assignment.named_children.first + identifier = lhs&.named_children&.find { |child| generic_identifier?(child) } + return [identifier] if identifier + return [lhs] if simple_identifier_text?(lhs&.text) end + + candidate = named_field(node, "pattern") || + named_field(node, "name") || + node.named_children.find { |child| local_identifier_wrapper_node_kinds.include?(child.kind) } || + node.named_children.find { |child| variable_declaration_node_kinds.include?(child.kind) }&.named_children&.find { |child| generic_identifier?(child) } || + node.named_children.find { |child| generic_identifier?(child) } + candidate ? [candidate] : [] end - def function_kind(node, stack) - return :method if owner_for_node(nil, node, stack: stack) + def generic_assignment_lhs_names(node) + return [] unless ts_node?(node) + return [] unless assignment_node_kinds.include?(node.kind) - :function + lhs = named_field(node, "left") || node.named_children.first + collect_generic_assignment_lhs_names(lhs) end - def visibility_for(node) - return ruby_inline_def_visibility(node) if inline_def_argument_list?(node) - return :public if node.children.any? { |child| child.text == "pub" } + def collect_generic_assignment_lhs_names(lhs) + return [] unless ts_node?(lhs) - nil + if indexed_lhs_node?(lhs) + object = lhs.named_children.first + return collect_generic_assignment_lhs_names(object) + end + + if expression_list_node_kinds.include?(lhs.kind) + return [lhs.text] if lhs.named_children.empty? && generic_local_identifier_text(lhs) + + return lhs.named_children.flat_map { |child| collect_generic_assignment_lhs_names(child) } + end + + if indexed_lhs_node_kinds.include?(lhs.kind) + object = lhs.named_children.first + return collect_generic_assignment_lhs_names(object) + end + + if (name = generic_local_identifier_text(lhs)) + return [name] + end + if field_assignment_writes_receiver? && field_like_node?(lhs) + receiver = lhs.named_children.first + return collect_generic_assignment_lhs_names(receiver) + end + return [] if generic_identifier?(lhs) + return [] if generic_member_name?(lhs) + return [lhs.text] if simple_identifier_text?(lhs.text) + + lhs.named_children.filter_map { |child| child.text if generic_identifier?(child) } end - def function_params(node) - return hidden_ruby_method_params(node) if hidden_ruby_method_definition?(node) + def generic_assignment_lhs_in_tree?(root, target) + return false unless ts_node?(root) - params = if node.kind == "method_declaration" - node.named_children.select { |child| child.kind == "parameter_list" }[1] - else - named_field(node, "parameters") || - node.named_children.find { |child| %w[parameters formal_parameters parameter_list].include?(child.kind) } - end - params ||= node.named_children.find { |child| child.kind == "method_parameters" } if inline_def_argument_list?(node) - return [] unless params + if assignment_node_kinds.include?(root.kind) + lhs = named_field(root, "left") || root.named_children.first + return generic_assignment_lhs_read_target?(lhs, target) + end - params.named_children.filter_map do |param| - parameter_name(param) - end.uniq + root.named_children.any? { |child| generic_assignment_lhs_in_tree?(child, target) } end - def parameter_name(param) - return nil unless ts_node?(param) - return param.text if %w[identifier shorthand_property_identifier_pattern].include?(param.kind) + def generic_assignment_lhs_read_target_keys(root) + keys = Set.new + generic_walk_local(root) do |node| + next unless assignment_node_kinds.include?(node.kind) - name = named_field(param, "name") || - param.named_children.find do |child| - %w[identifier field_identifier property_identifier].include?(child.kind) - end - text = name&.text.to_s - return nil if text.empty? || text == "_" + lhs = named_field(node, "left") || node.named_children.first + collect_generic_assignment_lhs_read_target_keys(lhs, keys) + end + keys + end - text + def generic_assignment_lhs_target_keys(root) + keys = Set.new + generic_walk_local(root) do |node| + next unless assignment_node_kinds.include?(node.kind) + + lhs = named_field(node, "left") || node.named_children.first + collect_generic_assignment_lhs_target_keys(lhs, keys) + end + keys end - def function_signature(document, node) - if hidden_ruby_method_definition?(node) - return normalize_text(hidden_ruby_method_signature(document, node)) + def collect_generic_assignment_lhs_read_target_keys(lhs, keys) + return unless ts_node?(lhs) + + if indexed_lhs_node?(lhs) + lhs.named_children.each { |child| collect_generic_assignment_lhs_read_target_keys(child, keys) } + return end - if document.language == :ruby - signature = preceding_ruby_signature(document, node) - return signature unless signature.empty? + + if expression_list_node_kinds.include?(lhs.kind) + if lhs.named_children.empty? && generic_local_identifier_text(lhs) + keys << node_key(lhs) + return + end + + lhs.named_children.each { |child| collect_generic_assignment_lhs_read_target_keys(child, keys) } + return end - body = named_field(node, "body") - text = - if body - document.source.byteslice(node.start_byte, body.start_byte - node.start_byte).to_s.strip - else - line_text(document, node).strip + if field_like_node?(lhs) + if suppress_field_receiver_lhs_reads? + receiver = lhs.named_children.first + collect_generic_assignment_lhs_read_target_keys(receiver, keys) end - normalize_text(text.empty? ? line_text(document, node) : text) - rescue StandardError - normalize_text(line_text(document, node)) + return + end + + if generic_identifier?(lhs) || generic_local_identifier_text(lhs) + keys << node_key(lhs) + return + end + + lhs.named_children.each { |child| collect_generic_assignment_lhs_read_target_keys(child, keys) } end - def preceding_ruby_signature(document, node) - cursor = line(node) - 2 - lines = document.lines - cursor -= 1 while cursor >= 0 && lines[cursor].to_s.strip.empty? - return "" if cursor.negative? + def collect_generic_assignment_lhs_target_keys(lhs, keys) + return unless ts_node?(lhs) - stripped = lines[cursor].to_s.strip - if stripped == "end" - start = cursor - while start >= 0 - text = lines[start].to_s.strip - return normalize_text(lines[start..cursor].join("\n")) if text == "sig do" - return "" if start != cursor && text.match?(/\A(?:def|class|module)\b/) + if indexed_lhs_node?(lhs) + object = lhs.named_children.first + collect_generic_assignment_lhs_target_keys(object, keys) + return + end - start -= 1 + if expression_list_node_kinds.include?(lhs.kind) + if lhs.named_children.empty? && generic_local_identifier_text(lhs) + keys << node_key(lhs) + return end - return "" if start.negative? + + lhs.named_children.each { |child| collect_generic_assignment_lhs_target_keys(child, keys) } + return end - return normalize_text(stripped) if stripped.start_with?("sig ") - return "" unless stripped == "}" || stripped.end_with?("}") + if field_like_node?(lhs) + return unless field_assignment_writes_receiver? - start = cursor - while start >= 0 - text = lines[start].to_s.strip - return normalize_text(lines[start..cursor].join("\n")) if text.start_with?("sig ") - return "" if text.match?(/\A(?:def|class|module)\b/) + receiver = lhs.named_children.first + collect_generic_assignment_lhs_target_keys(receiver, keys) + return + end - start -= 1 + if generic_identifier?(lhs) || generic_local_identifier_text(lhs) + keys << node_key(lhs) + return end - "" + + lhs.named_children.each { |child| collect_generic_assignment_lhs_target_keys(child, keys) } end - def line_text(document, node) - document.lines[line(node) - 1].to_s + def generic_assignment_lhs_target?(lhs, target) + generic_assignment_lhs_read_target?(lhs, target) end - def control_context(node) - return :iterates if %w[while until while_statement for for_statement for_in_statement - loop_expression do_block].include?(node.kind) - return :conditional if branch_node?(node) + def generic_assignment_lhs_read_target?(lhs, target) + return false unless ts_node?(lhs) - nil + return ts_node_contains?(lhs, target) if indexed_lhs_node?(lhs) + + if expression_list_node_kinds.include?(lhs.kind) + return lhs == target if lhs.named_children.empty? && generic_local_identifier_text(lhs) + + return lhs.named_children.any? { |child| generic_assignment_lhs_read_target?(child, target) } + end + + if field_like_node?(lhs) + return suppress_field_receiver_lhs_reads? && + generic_assignment_lhs_read_target?(lhs.named_children.first, target) + end + + return lhs == target if generic_identifier?(lhs) + + ts_node_contains?(lhs, target) end - def record_decision_site(document, node, stack, out) - if boolean_container?(node) && boolean_and?(node) - record_conjunction_decision(document, node, stack, out) - return + def indexed_lhs_node?(node) + ts_node?(node) && indexed_lhs_node_kinds.include?(node.kind) + end + + def suppress_field_receiver_lhs_reads? + false + end + + def field_assignment_writes_receiver? + false + end + + def ts_node_contains?(root, target) + return false unless ts_node?(root) + return true if root == target + + root.named_children.any? { |child| ts_node_contains?(child, target) } + end + + def simple_identifier_text?(text) + text.to_s.match?(/\A[A-Za-z_]\w*\z/) + end + + def generic_member_name?(node) + parent = parent_node(node) + if parent&.kind == "navigation_suffix" + owner = parent_node(parent) + return true if owner && field_like_node?(owner) end + return false if parent && expression_list_node_kinds.include?(parent.kind) && !member_expression_list?(parent) + return false unless parent && field_like_node?(parent) - case node.kind - when "case", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression" - return if ruby_predicate_less_case?(node) + field = named_field(parent, "field") || named_field(parent, "property") || + named_field(parent, "name") || named_field(parent, "suffix") || + parent.named_children.last + field == node + end - patterns = case_patterns(node) - return if patterns.size < 2 + def skip_local_read_identifier?(_node) + false + end - out << DecisionSite.new( - kind: :case_dispatch, - members: patterns, - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - predicate: decision_predicate(node) - ) - when "body_statement", "block_body", "argument_list" - return unless hidden_case?(node) - return if node.named_children.any? { |child| child.kind == "case" } - return if ruby_predicate_less_case?(node) + def generic_call_name?(node) + parent = parent_node(node) + return false unless parent + return false if field_like_node?(parent) - patterns = case_patterns(node) - return if patterns.size < 2 + if adjacent_method_invocation_node_kinds.include?(parent.kind) + names = parent.named_children.select { |child| generic_identifier?(child) } + return names.size >= 2 ? names.last == node : parent.named_children.first == node + end - out << DecisionSite.new( - kind: :case_dispatch, - members: patterns, - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - predicate: decision_predicate(node) - ) - when "expression_statement" - return unless hidden_match?(node) + call_node_kinds.include?(parent.kind) && + (named_field(parent, "function") == node || parent.named_children.first == node) + end - patterns = case_patterns(node) - return if patterns.size < 2 + def generic_path_walk(document, node, function, guards, out) + return unless ts_node?(node) + return if generic_nested_local_scope?(node) - out << DecisionSite.new( - kind: :case_dispatch, - members: patterns, + if branch_node?(node) + condition = generic_branch_condition(node) + atoms = generic_path_condition_atoms(condition) + generic_branch_body_nodes(node).each do |child| + generic_path_walk(document, child, function, guards + atoms, out) + end + return + end + + if guards.size >= 2 && generic_path_action_node?(node) + out << PathConditionSite.new( + guards: guards.uniq.sort, + action: normalize_text(node.text), file: document.file, - function: current_function(stack), + function: function, line: line(node), - span: span(node), - predicate: decision_predicate(node) + span: span(node) ) + return end + + node.named_children.each { |child| generic_path_walk(document, child, function, guards, out) } end - def record_conjunction_decision(document, node, stack, out) - from_wrapper = parenthesized_wrapper?(node) - return if from_wrapper && - ts_node?(node.parent) && - boolean_container?(node.parent) && - boolean_and?(node.parent) + def generic_branch_condition(node) + named_field(node, "condition") || named_field(node, "value") || + named_field(node, "subject") || node.named_children.first + end - node = node.named_children.first if from_wrapper - return if !from_wrapper && - ts_node?(node.parent) && - boolean_container?(node.parent) && - boolean_and?(node.parent) && - !same_span?(node.parent, node) + def generic_branch_body_nodes(node) + bodies = [ + named_field(node, "consequence"), + named_field(node, "body"), + named_field(node, "alternative") + ].compact + bodies = node.named_children.drop(1) if bodies.empty? + bodies.flat_map do |body| + next [body] if simple_action_wrapper?(body) - members = flatten_boolean_and(node).map { |child| decision_member_text(child) }.uniq.sort - return if members.size < 2 + children = body.named_children.reject { |child| comment_node?(child) } + children.empty? ? [body] : children + end + end - out << DecisionSite.new( - kind: :conjunction, - members: members, - file: document.file, - function: current_function(stack), - line: conjunction_span(node)[0], - span: conjunction_span(node), - predicate: normalize_text(node.text) - ) + def comment_node?(node) + node.kind.to_s.include?("comment") end - def record_function_def(document, node, stack, out) - name = function_name(node) + def generic_path_condition_atoms(condition) + return [] unless ts_node?(condition) + + if boolean_container?(condition) && boolean_and?(condition) + flatten_boolean_and(condition).map { |child| decision_member_text(child) }.uniq.sort + else + [decision_member_text(condition)] + end + end + + def generic_path_action_node?(node) + return false unless ts_node?(node) + return false if branch_node?(node) + + return true if simple_action_wrapper?(node) + + generic_assignment_statement?(node) || + path_action_node_kinds.include?(node.kind) + end + + def simple_action_wrapper?(node) + return false unless simple_action_wrapper_node_kinds.include?(node.kind) + + normalize_text(node.text).match?(/\A[A-Za-z_]\w*(?:\.[A-Za-z_]\w*)?\s*\([^{};]*\)\s*;?\z/) + end + + def comparison_target(node) + return nil unless comparison_node_kinds.include?(node.kind) + + operator = direct_operator(node) + return nil unless COMPARISON_OPERATORS.include?(operator) + + { source: normalize_text(node.text), operator: operator } + end + + def push_owner_context(document, stack, node) + owner = owner_name_from_declaration(document, node) + return stack unless owner + + parent_owner = current_owner_from_stack(stack) + parent_owner ||= current_file_owner_from_stack(stack) \ + if current_language(stack) == :python && current_function_entry?(stack) + full_owner = if parent_owner && parent_owner != owner && !owner.include?("::") + "#{parent_owner}::#{owner}" + else + owner + end + stack + [{ owner: full_owner, owner_declaration: true, owner_kind: owner_kind(node) }] + end + + def current_function(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:function] } + entry ? entry[:function] : "(top-level)" + end + + def current_owner(document, stack) + current_owner_from_stack(stack) || file_owner(document.file) + end + + def current_owner_from_stack(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:owner] } + entry && entry[:owner] + end + + def current_file_owner_from_stack(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:file_owner] } + entry && entry[:file_owner] + end + + def current_function_entry?(stack) + stack.reverse.any? { |item| item.is_a?(Hash) && item[:function] } + end + + def current_language(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:language] } + entry && entry[:language] + end + + def conditional_context?(stack) + stack.any? { |item| item.is_a?(Hash) && %i[conditional iterates].include?(item[:control]) } + end + + def current_control(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:control] } + entry ? entry[:control] : :always + end + + def function_context(node, stack) + { + function: function_name(node), + owner: function_owner_name(node, stack), + params: function_params(node), + receiver: function_receiver_name(node, stack) + } + end + + def function_owner_name(node, stack) + receiver_owner_name(node) || + current_owner_from_stack(stack) || + receiver_convention_owner_name(node, stack: stack) + end + + def line_text(document, node) + document.lines[line(node) - 1].to_s + end + + def control_context(node) + return :iterates if loop_node_kinds.include?(node.kind) + return :iterates if text_loop_node_kinds.include?(node.kind) && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/) + return :iterates if labeled_loop_node_kinds.include?(node.kind) && node.text.to_s.lstrip.start_with?("for ") + return :conditional if branch_node?(node) + + nil + end + + def record_decision_site(document, node, stack, out) + return if generated_prelude?(document, node) + + if boolean_container?(node) && boolean_and?(node) + record_conjunction_decision(document, node, stack, out) + return + end + + if case_node_kinds.include?(node.kind) + return if predicate_less_case?(node) + + patterns = case_patterns(node) + return if patterns.size < 2 + + out << DecisionSite.new( + kind: :case_dispatch, + members: patterns, + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + predicate: decision_predicate(node), + enclosing_span: span(node) + ) + elsif hidden_case_wrapper_node_kinds.include?(node.kind) + return unless hidden_case?(node) + return if node.named_children.any? { |child| case_node_kinds.include?(child.kind) } + return if predicate_less_case?(node) + + patterns = case_patterns(node) + return if patterns.size < 2 + + out << DecisionSite.new( + kind: :case_dispatch, + members: patterns, + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + predicate: decision_predicate(node), + enclosing_span: span(node) + ) + elsif hidden_match_node_kinds.include?(node.kind) + return unless hidden_match?(node) + + patterns = case_patterns(node) + return if patterns.size < 2 + + out << DecisionSite.new( + kind: :case_dispatch, + members: patterns, + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + predicate: decision_predicate(node), + enclosing_span: span(node) + ) + end + end + + def record_conjunction_decision(document, node, stack, out) + from_wrapper = parenthesized_wrapper?(node) + return if from_wrapper && + ts_node?(node.parent) && + boolean_container?(node.parent) && + boolean_and?(node.parent) + + node = node.named_children.first if from_wrapper + return if !from_wrapper && + ts_node?(node.parent) && + boolean_container?(node.parent) && + boolean_and?(node.parent) && + !same_span?(node.parent, node) + + members = flatten_boolean_and(node).map { |child| decision_member_text(child) }.uniq.sort + return if members.size < 2 + + out << DecisionSite.new( + kind: :conjunction, + members: members, + file: document.file, + function: current_function(stack), + line: conjunction_span(node)[0], + span: conjunction_span(node), + predicate: conjunction_predicate(node), + enclosing_span: decision_enclosing_span(node) + ) + end + + def conjunction_predicate(node) + normalize_text(node.text) + end + + def decision_enclosing_span(node) + parent = parent_node(node) + seen = Set.new + while ts_node?(parent) && !seen.include?(node_key(parent)) + seen << node_key(parent) + return span(parent) if branch_node?(parent) || loop_node_kinds.include?(parent.kind) + + parent = parent_node(parent) + end + span(node) + end + + def record_function_def(document, node, stack, out) + name = function_name(node) return unless name out << FunctionDef.new( @@ -882,10 +1292,10 @@ def record_function_def(document, node, stack, out) line: line(node), span: span(node), body: node, - visibility: visibility_for(node), + visibility: visibility(document, node), params: function_params(node), signature: function_signature(document, node), - kind: function_kind(node, stack) + kind: function_kind(document, node, stack) ) end @@ -909,17 +1319,20 @@ def record_call_site(document, node, stack, out) target = normalize_target_receiver(target, stack) return if noise_call?(target) + source_node = target[:source_node] || node out << CallSite.new( receiver: target[:receiver], message: target[:message], file: document.file, function: current_function(stack), owner: current_owner(document, stack), - line: line(node), - span: span(node), + line: line(source_node), + span: span(source_node), conditional: conditional_context?(stack), arguments: target[:arguments], - control: current_control(stack) + control: current_control(stack), + safe_navigation: target[:safe_navigation] || false, + block: target[:block] || call_has_block?(source_node) ) end @@ -937,244 +1350,72 @@ def record_state_declaration(document, node, stack, out) ) end - def case_patterns(node) - case_arms(node).flat_map do |child| - case_arm_patterns(child).reject { |normalized| default_case_pattern?(normalized) } - end.uniq.sort - end - - def case_arm_patterns(child) - case child.kind - when "when", "match_arm" - patterns = child.named_children.select { |node| %w[pattern case_pattern match_pattern].include?(node.kind) } - patterns = [named_field(child, "pattern") || child.named_children.first].compact if patterns.empty? - ruby_when_pattern_texts(patterns) - when "switch_case", "case_clause", "expression_case" - return [] if child.text.to_s.lstrip.start_with?("else") + def record_state_write(document, node, stack, out) + return if skip_state_write_node?(node) - value = named_field(child, "value") || child.named_children.first - value && value.kind !~ /statement|block/ ? [normalize_text(value.text)] : [] - else - [] - end - end + lhs = + if assignment_node_kinds.include?(node.kind) + named_field(node, "left") || node.named_children.first + elsif assignment_lhs?(node) + node + end + return unless lhs - def case_arm_pattern(child) - patterns = case_arm_patterns(child) - return nil if patterns.empty? + target = state_target(lhs) + return unless target + target = normalize_target_receiver(target, stack) + return if skip_state_write_target?(target) - patterns.join(", ") + source_node = state_write_source_node(node) + out << StateWrite.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(source_node), + span: span(source_node), + owner: current_owner(document, stack) + ) end - def ruby_when_pattern_texts(patterns) - return [] if patterns.empty? - - texts = patterns.map { |pattern| normalize_text(pattern.text) } - return texts unless texts.any? { |text| text.start_with?("*") } + def skip_state_write_node?(node) + parent = parent_node(node) + return false unless parent - out = [] - pending_plain = [] - texts.each_with_index do |text, index| - splat = text.start_with?("*") - if splat - out << pending_plain.join(", ") unless pending_plain.empty? - pending_plain = [] - out << if texts.size == 1 || index.positive? - text.delete_prefix("*") - else - text - end - else - pending_plain << text - end - end - out << pending_plain.join(", ") unless pending_plain.empty? - out + assignment_lhs?(node) && + assignment_node_kinds.include?(parent.kind) end - def case_arm_body(child) - pattern = named_field(child, "pattern") || named_field(child, "value") || child.named_children.first - members = child.named_children - body = members.drop_while { |node| node == pattern }.drop(1) - body = members[1..] if body.empty? - Array(body).map(&:text).join(" ") + def skip_state_write_target?(target) + target[:field] == "[]" end - def case_arms(node) - arms = [] - stack = node.named_children.dup - until stack.empty? - child = stack.shift - next unless ts_node?(child) - - if %w[when switch_case case_clause expression_case match_arm].include?(child.kind) - arms << child - elsif !%w[method function_definition function_declaration method_definition - method_declaration function_item class class_definition - class_declaration].include?(child.kind) - stack.concat(child.named_children) - end - end - arms + def state_write_source_node(node) + node end - def decision_predicate(node) - return normalize_text(modifier_condition(node).text) if hidden_modifier_if?(node) && modifier_condition(node) + def record_state_read(document, node, stack, out) + return if assignment_lhs?(node) - target = decision_subject(node) - normalize_text(target ? target.text : node.text) - end + target = state_read_target(node) + return unless target + target = normalize_target_receiver(target, stack) - def decision_subject(node) - named_field(node, "value") || named_field(node, "subject") || - named_field(node, "condition") || - node.named_children.find do |child| - !%w[when switch_case case_clause expression_case match_arm else then comment].include?(child.kind) - end - end - - def ruby_predicate_less_case?(node) - return false unless node.kind == "case" || hidden_case?(node) - - !decision_subject(node) - end - - def default_case_pattern?(text) - text.nil? || %w[_ default].include?(text) - end - - def boolean_and?(node) - if parenthesized_wrapper?(node) - child = node.named_children.first - return boolean_and?(child) - end - - %w[&& and].include?(direct_operator(node)) - end - - def flatten_boolean_and(node) - return [node] unless ts_node?(node) && - boolean_container?(node) && - boolean_and?(node) - return flatten_boolean_and(node.named_children.first) if parenthesized_wrapper?(node) - - node.named_children.flat_map { |child| flatten_boolean_and(child) } - end - - def boolean_container?(node) - return false unless ts_node?(node) - return true if %w[binary binary_expression boolean_operator].include?(node.kind) - return boolean_container?(node.named_children.first) if parenthesized_wrapper?(node) - return false unless %w[body_statement block_body statement pattern argument_list].include?(node.kind) - return false unless %w[&& and].include?(direct_operator(node)) - return false if node.named_children.size < 2 - - node.children.all? do |child| - child.named? || %w[&& and ( )].include?(child.text.to_s) - end - end - - def same_span?(left, right) - span(left) == span(right) - end - - def conjunction_span(node) - base = span(node) - if node.kind == "pattern" && node.text.to_s.lstrip.start_with?("(") - base = base.dup - base[1] += 1 - end - base - end - - def parenthesized_wrapper?(node) - ts_node?(node) && %w[parenthesized_statements parenthesized_expression].include?(node.kind) && - node.named_children.size == 1 - end - - def decision_member_text(node) - normalize_text(strip_enclosing_parentheses(node.text)) - end - - def strip_enclosing_parentheses(text) - value = text.to_s.strip - loop do - break value unless value.start_with?("(") && value.end_with?(")") - break value unless enclosing_parentheses_wrap_all?(value) - - value = value[1...-1].strip - end - value - end - - def enclosing_parentheses_wrap_all?(text) - depth = 0 - text.each_char.with_index do |char, index| - depth += 1 if char == "(" - depth -= 1 if char == ")" - return false if depth.zero? && index < text.length - 1 - return false if depth.negative? - end - depth.zero? - end - - def direct_operator(node) - node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s - rescue StandardError - "" - end - - def record_state_write(document, node, stack, out) - return if document.language == :ruby && node.kind == "operator_assignment" - return if document.language == :ruby && assignment_lhs?(node) && next_sibling(node)&.text.to_s != "=" && - !instance_variable_node?(node) - - lhs = - if %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(node.kind) - named_field(node, "left") || node.named_children.first - elsif assignment_lhs?(node) - node - end - return unless lhs - - target = state_target(lhs) - return unless target - target = normalize_target_receiver(target, stack) - return if target[:field] == "[]" - return if document.language == :ruby && target[:field].to_s.start_with?("$") - - source_node = document.language == :ruby && assignment_lhs?(node) ? (parent_node(node) || node) : node - out << StateWrite.new( - field: target[:field], - receiver: target[:receiver], - file: document.file, - function: current_function(stack), - line: line(source_node), - span: span(source_node), - owner: current_owner(document, stack) - ) - end - - def record_state_read(document, node, stack, out) - target = state_read_target(node) - return unless target - target = normalize_target_receiver(target, stack) - - out << StateRead.new( - field: target[:field], - receiver: target[:receiver], - file: document.file, - function: current_function(stack), - line: line(node), - span: span(node), - owner: current_owner(document, stack) - ) + out << StateRead.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(node), + span: span(node), + owner: current_owner(document, stack) + ) end def record_state_param_origin(document, node, stack, out) lhs = nil rhs = nil - if %w[assignment assignment_expression augmented_assignment assignment_statement].include?(node.kind) + if assignment_node_kinds.include?(node.kind) lhs = named_field(node, "left") || node.named_children.first rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] elsif assignment_lhs?(node) @@ -1224,7 +1465,8 @@ def record_branch_decision(document, node, stack, out, immutable_readers:, immut immutable_readers: immutable_readers, immutable_reader_types: immutable_reader_types, type_aliases: type_aliases, - method_param_types: method_param_types + method_param_types: method_param_types, + params: current_params(stack) ) refs.uniq! refs.sort! @@ -1235,23 +1477,28 @@ def record_branch_decision(document, node, stack, out, immutable_readers:, immut function: current_function(stack), line: line(node), span: span(node), - predicate: normalize_text(cond.text), + predicate: branch_predicate(cond), state_refs: refs ) end + def branch_predicate(node) + normalize_text(node.text) + end + def record_branch_arm(document, node, stack, out) + return if generated_prelude?(document, node) + if if_node?(node) record_if_arms(document, node, stack, out) return end - case node.kind - when "while", "until", "while_statement", "for", "for_statement" + if branch_loop_node_kinds.include?(node.kind) record_loop_arm(document, node, stack, out) - when "case", "body_statement", "switch_statement", "expression_switch_statement", "switch_expression", - "match_statement", "match_expression" - return if node.kind == "body_statement" && !hidden_case?(node) + elsif branch_case_node_kinds.include?(node.kind) + return if hidden_case_wrapper_node_kinds.include?(node.kind) && !hidden_case?(node) + return if hidden_match_node_kinds.include?(node.kind) && !hidden_match?(node) record_case_arms(document, node, stack, out) end @@ -1326,156 +1573,368 @@ def record_case_arms(document, node, stack, out) end end - def branch_node?(node) - BRANCH_KINDS.include?(node.kind) || hidden_match?(node) || hidden_if?(node) || - hidden_modifier_if?(node) || hidden_case?(node) - end - - def if_node?(node) - %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) || - hidden_if?(node) || hidden_modifier_if?(node) - end + def record_implicit_state_accesses(document, out) + declared = declared_state_index(out[:state_declarations]) + return if declared.empty? - def hidden_if?(node) - return false unless ts_node?(node) - return false unless %w[expression_statement block body_statement].include?(node.kind) + locals = local_declaration_index(document) + params = function_param_index(out[:function_defs]) + TreeSitterAdapter.walk_document(document, initial_stack(document), self) do |node, stack| + next unless implicit_state_identifier?(node) - %w[if unless].include?(first_token_kind(node)) - end + owner = current_owner(document, stack) + function = current_function(stack) + next if function == "(top-level)" - def hidden_modifier_if?(node) - return false unless ts_node?(node) - return false unless node.kind == "body_statement" + field = node.text.to_s + next unless declared[owner].include?(field) + next if params[[owner, function]].include?(field) + next if locals[[owner, function]].include?(field) + next if identifier_declaration_site?(node) + next if member_message_identifier?(node) - seen_named = false - node.children.any? do |child| - seen_named ||= child.named? - seen_named && !child.named? && %w[if unless].include?(child.kind) + if implicit_assignment_lhs?(node) + out[:state_writes] << StateWrite.new( + field: field, + receiver: "self", + file: document.file, + function: function, + line: line(node), + span: span(node), + owner: owner + ) + else + out[:state_reads] << StateRead.new( + field: field, + receiver: "self", + file: document.file, + function: function, + line: line(node), + span: span(node), + owner: owner + ) + end end end - def modifier_condition(node) - node.named_children.last + def case_patterns(node) + case_arms(node).flat_map do |child| + case_arm_patterns(child).reject { |normalized| default_case_pattern?(normalized) } + end.uniq.sort end - def hidden_case?(node) - return false unless ts_node?(node) - return false unless %w[body_statement block_body argument_list].include?(node.kind) - - first_token_kind(node) == "case" - end + def case_arm_patterns(child) + if when_case_arm_node_kinds.include?(child.kind) + patterns = child.named_children.select { |node| case_pattern_node_kinds.include?(node.kind) } + patterns = [named_field(child, "pattern") || child.named_children.first].compact if patterns.empty? + case_pattern_texts(patterns) + elsif switch_case_arm_node_kinds.include?(child.kind) + return [] if child.text.to_s.lstrip.start_with?("else") - def first_token_kind(node) - node.children.first&.kind.to_s - end + patterns = child.named_children.select { |node| case_pattern_node_kinds.include?(node.kind) } + return case_pattern_texts(patterns) unless patterns.empty? - def collect_state_refs(node, refs, defn:, immutable_readers:, immutable_reader_types:, type_aliases:, - method_param_types:) - if node.kind == "instance_variable" || node.kind == "global_variable" - refs << node.text - elsif (target = state_read_target(node)) - unless namespace_receiver?(target[:receiver]) - unless immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) - refs << (target[:receiver] == "self" ? target[:field] : "#{target[:receiver]}.#{target[:field]}") - end - end - end - node.children.each do |child| - collect_state_refs( - child, - refs, - defn: defn, - immutable_readers: immutable_readers, - immutable_reader_types: immutable_reader_types, - type_aliases: type_aliases, - method_param_types: method_param_types - ) if ts_node?(child) + value = named_field(child, "value") || named_field(child, "pattern") || + child.named_children.find { |candidate| candidate.kind == "when_condition" } || + child.named_children.find { |candidate| candidate.kind == "switch_pattern" } || + child.named_children.first + value && value.kind !~ /statement|block/ ? [normalize_text(value.text)] : [] + else + [] end end - def immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) - receiver = target[:receiver].to_s - field = target[:field].to_sym - return false if receiver.empty? || receiver == "self" - - parts = receiver.split(".") - param = parts.shift - type = method_param_types.fetch(defn, {})[param] - return false unless type + def case_arm_pattern(child) + patterns = case_arm_patterns(child) + return nil if patterns.empty? - parts.each do |reader| - type = immutable_reader_result_type(type, reader.to_sym, immutable_reader_types, type_aliases) - return false unless type - end - immutable_reader?(type, field, immutable_readers, type_aliases) + patterns.join(", ") end - def immutable_reader?(type_name, field, immutable_readers, type_aliases) - resolved = resolve_type_alias(type_name, type_aliases) - short = resolved.to_s.split("::").last - readers = if immutable_readers.key?(resolved) - immutable_readers[resolved] - else - immutable_readers[short] - end - readers&.include?(field) || false - end + def case_pattern_texts(patterns) + return [] if patterns.empty? - def immutable_reader_result_type(type_name, field, immutable_reader_types, type_aliases) - resolved = resolve_type_alias(type_name, type_aliases) - short = resolved.to_s.split("::").last - reader_types = if immutable_reader_types.key?(resolved) - immutable_reader_types[resolved] - else - immutable_reader_types[short] - end - reader_types && reader_types[field] + patterns.map { |pattern| normalize_text(pattern.text) } end - def resolve_type_alias(type_name, type_aliases) - seen = Set.new - current = type_name.to_s - loop do - break current if seen.include?(current) - - seen.add(current) - target = type_aliases[current] || type_aliases[current.split("::").last] - break current unless target - - current = target - end + def case_arm_body(child) + pattern = named_field(child, "pattern") || named_field(child, "value") || child.named_children.first + members = child.named_children + body = members.drop_while { |node| node == pattern }.drop(1) + body = members[1..] if body.empty? + Array(body).map(&:text).join(" ") end - def method_param_types(lines) - types_by_method = {} - pending_sig = +"" - lines.each do |line| - pending_sig << line if pending_sig_active?(line, pending_sig) - if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) - types_by_method[match[1]] = sig_param_types(pending_sig) - pending_sig = +"" + def case_arms(node) + arms = [] + stack = node.named_children.dup + until stack.empty? + child = stack.shift + next unless ts_node?(child) + + if case_arm_node_kinds.include?(child.kind) + arms << child + elsif !case_container_stop_node_kinds.include?(child.kind) + stack.concat(child.named_children) end end - types_by_method + arms end - def pending_sig_active?(line, pending_sig) - !pending_sig.empty? || line.match?(/\A\s*sig\b/) + def decision_predicate(node) + return normalize_text(modifier_condition(node).text) if hidden_modifier_if?(node) && modifier_condition(node) + + target = decision_subject(node) + strip_enclosing_parentheses(normalize_text(target ? target.text : node.text)) end - def sig_param_types(sig_source) - match = sig_source.match(/params\s*\((.*?)\)/m) - return {} unless match + def decision_subject(node) + named_field(node, "value") || named_field(node, "subject") || + node.named_children.find { |child| case_subject_node_kinds.include?(child.kind) } || + named_field(node, "condition") || + node.named_children.find do |child| + !case_subject_skip_node_kinds.include?(child.kind) + end + end - match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h + def predicate_less_case?(node) + (case_node_kinds.include?(node.kind) || hidden_case?(node)) && !decision_subject(node) end - def current_params(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:params] } - Array(entry && entry[:params]) + def default_case_pattern?(text) + text.nil? || default_case_patterns.include?(text) end - def rhs_param_names(node, params) + def boolean_and?(node) + if parenthesized_wrapper?(node) + child = node.named_children.first + return boolean_and?(child) + end + + boolean_and_operators.include?(direct_operator(node)) + end + + def flatten_boolean_and(node) + return [node] unless ts_node?(node) && + boolean_container?(node) && + boolean_and?(node) + return flatten_boolean_and(node.named_children.first) if parenthesized_wrapper?(node) + + node.named_children.flat_map { |child| flatten_boolean_and(child) } + end + + def boolean_container?(node) + return false unless ts_node?(node) + return true if boolean_container_node_kinds.include?(node.kind) + return boolean_container?(node.named_children.first) if parenthesized_wrapper?(node) + return false unless boolean_wrapper_node_kinds.include?(node.kind) + return false unless boolean_and_operators.include?(direct_operator(node)) + return false if node.named_children.size < 2 + + node.children.all? do |child| + child.named? || (boolean_and_operators + %w[( )]).include?(child.text.to_s) + end + end + + def same_span?(left, right) + span(left) == span(right) + end + + def conjunction_span(node) + base = span(node) + if parenthesized_pattern_node_kinds.include?(node.kind) && node.text.to_s.lstrip.start_with?("(") + base = base.dup + base[1] += 1 + end + base + end + + def parenthesized_wrapper?(node) + ts_node?(node) && parenthesized_wrapper_node_kinds.include?(node.kind) && + node.named_children.size == 1 + end + + def decision_member_text(node) + normalize_text(strip_enclosing_parentheses(node.text)) + end + + def strip_enclosing_parentheses(text) + value = text.to_s.strip + loop do + break value unless value.start_with?("(") && value.end_with?(")") + break value unless enclosing_parentheses_wrap_all?(value) + + value = value[1...-1].strip + end + value + end + + def enclosing_parentheses_wrap_all?(text) + depth = 0 + text.each_char.with_index do |char, index| + depth += 1 if char == "(" + depth -= 1 if char == ")" + return false if depth.zero? && index < text.length - 1 + return false if depth.negative? + end + depth.zero? + end + + def direct_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s + rescue StandardError + "" + end + + def branch_node?(node) + branch_node_kinds.include?(node.kind) || hidden_match?(node) || hidden_if?(node) || + hidden_modifier_if?(node) || hidden_case?(node) + end + + def branch_decision_wrapper_for_real_branch?(node) + return false unless ts_node?(node) + return false if branch_node_kinds.include?(node.kind) || hidden_match?(node) || hidden_case?(node) + return false unless hidden_if?(node) || hidden_modifier_if?(node) + + first_named = node.named_children.first + ts_node?(first_named) && branch_node?(first_named) + end + + def if_node?(node) + if_node_kinds.include?(node.kind) || + hidden_if?(node) || hidden_modifier_if?(node) + end + + def hidden_if?(node) + return false unless ts_node?(node) + return true if hidden_match_node_kinds.include?(node.kind) && node.text.to_s.lstrip.start_with?("if ") + return false unless hidden_if_wrapper_node_kinds.include?(node.kind) + + first_token = node.children.first + first_token && !first_token.named? && hidden_if_token_kinds.include?(first_token.kind.to_s) + end + + def hidden_modifier_if?(node) + false + end + + def modifier_condition(node) + node.named_children.last + end + + def hidden_case?(node) + return false unless ts_node?(node) + return false unless hidden_case_wrapper_node_kinds.include?(node.kind) + + first_token = node.children.first + first_token && !first_token.named? && hidden_case_token_kinds.include?(first_token.kind.to_s) + end + + def hidden_match?(node) + ts_node?(node) && + hidden_match_node_kinds.include?(node.kind) && + node.text.to_s.lstrip.start_with?("match ") + end + + def first_token_kind(node) + node.children.first&.kind.to_s + end + + def collect_state_refs(node, refs, defn:, immutable_readers:, immutable_reader_types:, type_aliases:, + method_param_types:, params:) + if (ref = direct_state_ref(node)) + refs << ref + elsif (target = state_read_target(node)) + unless namespace_receiver?(target[:receiver]) + unless branch_local_param_ref?(node, target, params) || + immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) + refs << (target[:receiver] == "self" ? target[:field] : "#{target[:receiver]}.#{target[:field]}") + end + end + end + node.children.each do |child| + collect_state_refs( + child, + refs, + defn: defn, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases, + method_param_types: method_param_types, + params: params + ) if ts_node?(child) + end + end + + def branch_local_param_ref?(node, target, params) + field = target[:field].to_s + return false unless params.include?(field) + + receiver = target[:receiver].to_s + return false unless receiver.empty? || receiver == "self" + + normalize_text(node.text) == field + end + + def immutable_state_read?(target, defn, immutable_readers, immutable_reader_types, type_aliases, method_param_types) + receiver = target[:receiver].to_s + field = target[:field].to_sym + return false if receiver.empty? || receiver == "self" + + parts = receiver.split(".") + param = parts.shift + type = method_param_types.fetch(defn, {})[param] + return false unless type + + parts.each do |reader| + type = immutable_reader_result_type(type, reader.to_sym, immutable_reader_types, type_aliases) + return false unless type + end + immutable_reader?(type, field, immutable_readers, type_aliases) + end + + def immutable_reader?(type_name, field, immutable_readers, type_aliases) + resolved = resolve_type_alias(type_name, type_aliases) + short = resolved.to_s.split("::").last + readers = if immutable_readers.key?(resolved) + immutable_readers[resolved] + else + immutable_readers[short] + end + readers&.include?(field) || false + end + + def immutable_reader_result_type(type_name, field, immutable_reader_types, type_aliases) + resolved = resolve_type_alias(type_name, type_aliases) + short = resolved.to_s.split("::").last + reader_types = if immutable_reader_types.key?(resolved) + immutable_reader_types[resolved] + else + immutable_reader_types[short] + end + reader_types && reader_types[field] + end + + def resolve_type_alias(type_name, type_aliases) + seen = Set.new + current = type_name.to_s + loop do + break current if seen.include?(current) + + seen.add(current) + target = type_aliases[current] || type_aliases[current.split("::").last] + break current unless target + + current = target + end + end + + def current_params(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:params] } + Array(entry && entry[:params]) + end + + def rhs_param_names(node, params) found = [] collect_identifiers(node, found) found & params @@ -1484,13 +1943,109 @@ def rhs_param_names(node, params) def collect_identifiers(node, out) return unless ts_node?(node) - out << node.text if node.kind == "identifier" - node.children.each { |child| collect_identifiers(child, out) } + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + out << current.text if current.kind == "identifier" + current.children.reverse_each { |child| pending << child } + end + end + + def declared_state_index(declarations) + declarations.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |decl, index| + index[decl.owner.to_s].add(decl.field.to_s) + end + end + + def function_param_index(functions) + functions.each_with_object(Hash.new { |h, k| h[k] = Set.new }) do |fn, index| + index[[fn.owner.to_s, fn.name.to_s]].merge(Array(fn.params).map(&:to_s)) + end + end + + def local_declaration_index(document) + index = Hash.new { |h, k| h[k] = Set.new } + TreeSitterAdapter.walk_document(document, initial_stack(document), self) do |node, stack| + next unless local_variable_declarator?(node) + + owner = current_owner(document, stack) + function = current_function(stack) + next if function == "(top-level)" + + local_name_node(node)&.then { |name| index[[owner, function]].add(name.text.to_s) } + end + index + end + + def local_variable_declarator?(node) + return false unless ts_node?(node) + return false unless local_variable_declarator_node_kinds.include?(node.kind) + + !inside_kind?(node, field_declaration_node_kinds) + end + + def local_name_node(node) + named_field(node, "name") || + node.named_children.find { |child| (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) } + end + + def implicit_state_identifier?(node) + ts_node?(node) && (identifier_node_kinds + field_identifier_node_kinds).include?(node.kind) + end + + def identifier_declaration_site?(node) + parent = parent_node(node) + return false unless parent + return true if declaration_site_parent_node_kinds.include?(parent.kind) + return true if inside_kind?(node, field_declaration_node_kinds) + + false + end + + def member_message_identifier?(node) + parent = parent_node(node) + return false unless parent && field_like_node?(parent) + + field = named_field(parent, "field") || named_field(parent, "property") || + named_field(parent, "name") || parent.named_children.last + field == node + end + + def implicit_assignment_lhs?(node) + parent = parent_node(node) + return false unless parent + + if assignment_node_kinds.include?(parent.kind) + lhs = named_field(parent, "left") || parent.named_children.first + return lhs == node + end + + assignment_lhs?(node) + end + + def inside_kind?(node, kinds) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return true if kinds.include?(parent.kind) + + parent = parent_node(parent) + end + false end def owner_for_node(document, node, stack: nil) receiver_owner = receiver_owner_name(node) return receiver_owner if receiver_owner + convention_owner = receiver_convention_owner_name(node) + return convention_owner if convention_owner stacked_owner = current_owner_from_stack(Array(stack)) return stacked_owner if stacked_owner @@ -1521,37 +2076,6 @@ def owner_chain_for_node(document, node) chain.reverse end - def owner_name_from_declaration(document, node) - if hidden_ruby_owner_declaration?(node) - return hidden_ruby_owner_name(node) - end - - case node.kind - when "class", "class_definition", "class_declaration", "module" - named_field(node, "name")&.text || first_named_text(node, %w[constant identifier type_identifier]) - when "impl_item", "impl_block" - impl_owner_name(node) - when "struct_item", "struct_spec", "type_spec", "type_declaration" - named_field(node, "name")&.text || first_named_text(node, %w[type_identifier identifier]) - when "struct_declaration", "union_declaration", "enum_declaration" - bound_container_name(node) || returned_container_owner(node) || anonymous_owner_name(document, node) - end - end - - def owner_kind(node) - return hidden_ruby_owner_kind(node) if hidden_ruby_owner_declaration?(node) - - case node.kind - when "class", "class_definition", "class_declaration" then :class - when "module" then :module - when "impl_item", "impl_block" then :impl - when "struct_declaration", "struct_item", "struct_spec" then :struct - when "union_declaration" then :union - when "enum_declaration" then :enum - else :owner - end - end - def impl_owner_name(node) type = named_field(node, "type") || node.named_children.find { |child| child.kind.match?(/type|identifier/) } @@ -1563,17 +2087,12 @@ def receiver_owner_name(node) receiver_type && normalize_type_owner(receiver_type.text) end - def function_receiver_name(node) - receiver_param = method_receiver_param_node(node) - receiver_param&.text - end - def method_receiver_type_node(node) declaration = method_receiver_declaration(node) return nil unless declaration declaration.named_children.reverse.find do |child| - %w[pointer_type type_identifier qualified_type generic_type scoped_type_identifier].include?(child.kind) + receiver_type_node_kinds.include?(child.kind) end end @@ -1581,171 +2100,214 @@ def method_receiver_param_node(node) declaration = method_receiver_declaration(node) return nil unless declaration - declaration.named_children.find { |child| child.kind == "identifier" } + declaration.named_children.find { |child| identifier_node_kinds.include?(child.kind) } end def method_receiver_declaration(node) - return nil unless ts_node?(node) && node.kind == "method_declaration" + return nil unless ts_node?(node) && method_receiver_node_kinds.include?(node.kind) - receiver_params = node.named_children.find { |child| child.kind == "parameter_list" } - receiver_params&.named_children&.find { |child| child.kind == "parameter_declaration" } + receiver_params = node.named_children.find { |child| method_parameter_list_node_kinds.include?(child.kind) } + receiver_params&.named_children&.find { |child| receiver_parameter_node_kinds.include?(child.kind) } end - def bound_container_name(node) + def first_argument_receiver_parameter(node) + params = named_field(named_field(node, "declarator"), "parameters") || + named_field(node, "parameters") || + node.named_children.find { |child| parameter_list_node_kinds.include?(child.kind) } || + named_field(node, "declarator")&.named_children&.find { |child| parameter_list_node_kinds.include?(child.kind) } + first = params&.named_children&.find { |child| receiver_parameter_node_kinds.include?(child.kind) } + return nil unless first + + type_node = first.named_children.find do |child| + first_argument_receiver_type_node_kinds.include?(child.kind) + end + name_node = first.named_children.reverse.find do |child| + first_argument_receiver_name_node_kinds.include?(child.kind) + end + name_node ||= nested_receiver_name_node(first) + name_node ||= declarator_name(first) + return nil unless type_node && name_node + + name = ts_node?(name_node) ? name_node.text : name_node.to_s + { type: type_node.text, name: name } + end + + def nested_receiver_name_node(node) + node.named_children.reverse_each do |child| + next unless ts_node?(child) + + direct = child.named_children.reverse.find do |grandchild| + first_argument_receiver_name_node_kinds.include?(grandchild.kind) + end + return direct if direct + end + nil + end + + def snake_case_type_name(type) + type.to_s + .split("::").last + .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') + .gsub(/([a-z\d])([A-Z])/, '\1_\2') + .downcase + end + + def bound_container_name(node) parent = parent_node(node) seen_nodes = Set.new while parent && !seen_nodes.include?(node_key(parent)) && - %w[ERROR expression_statement return_expression].include?(parent.kind) + bound_container_wrapper_node_kinds.include?(parent.kind) seen_nodes << node_key(parent) parent = parent_node(parent) end return nil unless parent - if %w[variable_declaration const_declaration lexical_declaration public_field_definition - field_declaration property_declaration].include?(parent.kind) + if bound_container_parent_node_kinds.include?(parent.kind) name = named_field(parent, "name") || - parent.named_children.find { |child| %w[identifier field_identifier property_identifier type_identifier].include?(child.kind) } + parent.named_children.find { |child| bound_container_name_node_kinds.include?(child.kind) } return name.text if name end nil end - def returned_container_owner(node) + def returned_container_owner(document, node) parent = parent_node(node) seen_nodes = Set.new while parent && !seen_nodes.include?(node_key(parent)) seen_nodes << node_key(parent) - return function_name(parent) if function_name(parent) + if (name = function_name(parent)) + return name + end + parent = parent_node(parent) end nil end - def node_key(node) - [node.kind, node.start_byte, node.end_byte] - rescue StandardError - node.object_id - end - def anonymous_owner_name(document, node) return nil unless document "#{file_owner(document.file)}::anonymous@#{line(node)}" end - def file_owner(file) - base = File.basename(file.to_s, File.extname(file.to_s)) - base.empty? ? "(file)" : base - end - - def call_target(document, node) - case node.kind - when "call" - ruby_call_target(node) - when "body_statement" - ruby_bare_body_call_target(document, node) - when "identifier" - ruby_bare_call_target(document, node) - when "call_expression", "method_invocation", "invocation_expression" - generic_call_target(node) - when "attribute", "selector_expression", "field", "member_expression", - "field_expression", "expression_list" - adjacent_argument_call_target(node) + def generic_call_target(document, node) + if adjacent_method_invocation_node_kinds.include?(node.kind) + adjacent = generic_adjacent_method_invocation_target(node) + return adjacent if adjacent end - end - def ruby_call_target(node) - receiver = named_field(node, "receiver") - method = named_field(node, "method") - message = method&.text || first_named_text(node, %w[identifier constant]) - message ||= normalize_text(node.text) if receiver.nil? && ruby_simple_call_text?(node.text) - return nil unless message + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + return nil unless callee + return nil if callee.kind == "builtin_function" || callee.text.to_s.start_with?("@") - { - receiver: receiver ? normalize_text(receiver.text) : "self", - message: message, - arguments: ruby_argument_texts(node) - } + target = target_from_callee(callee).merge( + arguments: call_argument_nodes(node).map { |argument| normalize_text(argument.text) } + ) + first_argument_receiver_call_target(document, node, target) || target + rescue NoMethodError + nil end - def ruby_bare_call_target(document, node) - return nil unless document.language == :ruby - return nil unless ruby_bare_call_identifier?(node) + def generic_adjacent_method_invocation_target(node) + names = node.named_children.select { |child| identifier_node_kinds.include?(child.kind) } + return nil unless names.size >= 2 + args = node.named_children.find { |child| argument_list_node_kinds.include?(child.kind) } { - receiver: "self", - message: node.text, - arguments: [] + receiver: normalize_text(names.first.text), + message: names[1].text, + arguments: Array(args&.named_children).map { |child| normalize_text(child.text) } } end - def ruby_bare_body_call_target(document, node) - return nil unless document.language == :ruby - return nil if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) + def first_argument_receiver_call_target(_document, node, target) + return nil unless first_argument_receiver? + return nil unless target[:receiver] == "self" - explicit = ruby_explicit_receiver_body_call_target(node) - return explicit if explicit + first_arg = call_argument_nodes(node).first + return nil unless first_arg - message = node.text.to_s.strip - return nil unless ruby_simple_call_text?(message) - return nil if %w[true false nil self].include?(message) + arg_target = state_read_target(first_arg) + return nil unless arg_target { - receiver: "self", - message: message, - arguments: [] + receiver: "#{arg_target[:receiver]}.#{arg_target[:field]}", + message: target[:message], + arguments: target[:arguments] } end - def ruby_explicit_receiver_body_call_target(node) - receiver, message = node.named_children - return nil unless receiver && message - return nil unless %w[self constant identifier].include?(receiver.kind) - return nil unless %w[identifier constant].include?(message.kind) - - { - receiver: normalize_text(receiver.text), - message: message.text, - arguments: [] - } - end + def call_argument_nodes(node) + args = named_field(node, "arguments") || + node.named_children.find { |child| argument_list_node_kinds.include?(child.kind) } + return Array(args&.named_children) if args + return [] unless call_node_kinds.include?(node.kind) - def ruby_simple_call_text?(text) - text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + node.named_children.reject { |child| child == callee } end - def generic_call_target(node) - callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first - return nil unless callee - return nil if callee.kind == "builtin_function" || callee.text.to_s.start_with?("@") + def adjacent_argument_call_target(node) + return nil if generic_member_name?(node) && !member_message_identifier?(node) + return nil if call_node_ancestor?(node) + + callee = node + args = nil + if member_message_identifier?(node) + parent = parent_node(node) + if parent && field_like_node?(parent) + parent_args = next_sibling(parent) + if argument_list_node_kinds.include?(parent_args&.kind) + callee = parent + args = parent_args + elsif argument_list_node_kinds.include?(next_sibling(node)&.kind) + callee = parent + args = next_sibling(node) + end + end + end + args ||= next_sibling(callee) + return nil unless argument_list_node_kinds.include?(args&.kind) - target_from_callee(callee).merge(arguments: []) + target_from_callee(callee).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) rescue NoMethodError nil end - def adjacent_argument_call_target(node) - return nil unless next_sibling(node)&.kind == "argument_list" + def call_node_ancestor?(node) + parent = parent_node(node) + seen = Set.new + while parent + key = node_key(parent) + return false if seen.include?(key) + return true if call_node_kinds.include?(parent.kind) - target_from_callee(node).merge(arguments: []) + seen << key + parent = parent_node(parent) + end + false rescue NoMethodError - nil + false end def target_from_callee(callee) if field_like_node?(callee) object = named_field(callee, "object") || named_field(callee, "receiver") || named_field(callee, "operand") || named_field(callee, "value") || - callee.named_children.first + named_field(callee, "expression") || + callee.named_children.find { |child| child.kind != "navigation_suffix" } field = named_field(callee, "field") || named_field(callee, "property") || + named_field(callee, "suffix") || + callee.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } || callee.named_children.last - return nil unless object && field + field_text = member_field_text(field) + return nil unless object && field_text { receiver: normalize_text(object.text).sub(/\A\*/, ""), - message: field.text + message: field_text } - elsif %w[identifier field_identifier property_identifier constant type_identifier].include?(callee.kind) + elsif self_call_identifier_node_kinds.include?(callee.kind) { receiver: "self", message: callee.text @@ -1777,330 +2339,1132 @@ def noise_call?(target) return true if message.start_with?("@") return true if receiver.match?(/\A(?:std|builtin|build_options)(?:\.|\z)/) - false + false + end + + def generic_state_declaration(node) + if assignment_state_declaration_node_kinds.include?(node.kind) + assignment_state_declaration(node) + elsif field_declaration_node_kinds.include?(node.kind) + generic_field_declaration(node) + end + end + + def generic_field_declaration(node) + name = field_declaration_name_node(node) + return nil unless name + + { field: name.text, type: declared_type_text(node, name) } + end + + def field_declaration_name_node(node) + named_field(node, "name") || + variable_declarator_name(node) || + node.named_children.find { |child| field_identifier_node_kinds.include?(child.kind) } || + node.named_children.reverse.find { |child| identifier_node_kinds.include?(child.kind) } + end + + def variable_declarator_name(node) + pending = node.named_children.dup + seen = Set.new + until pending.empty? + current = pending.shift + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + if declarator_node_kinds.include?(current.kind) + direct_name = named_field(current, "name") || + current.named_children.find do |child| + (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) + end + return direct_name if direct_name + return current if local_variable_declarator_node_kinds.include?(current.kind) && current.text.match?(/\A[A-Za-z_]\w*\z/) + elsif local_variable_declarator_node_kinds.include?(current.kind) + return named_field(current, "name") || + current.named_children.find do |child| + (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) + end + end + pending.concat(current.named_children) + end + nil + end + + def declared_type_text(node, name_node) + text = node.text.to_s + after_name = text[(name_node.end_byte - node.start_byte)..].to_s + if (match = after_name.match(/\A\s*:\s*([^=,\n]+)/)) + normalize_text(match[1]) + elsif (match = text.match(/\A\s*(?:pub\s+)?(?:const|var)\s+\w+\s*:\s*([^=;\n]+)/)) + normalize_text(match[1]) + elsif (match = after_name.match(/\A\s+([^=;,\n]+)/)) + normalize_text(match[1]) + elsif (type = declared_type_before_name(text, node, name_node)) + type + end + rescue StandardError + nil + end + + def declared_type_before_name(text, node, name_node) + before_name = text[0...(name_node.start_byte - node.start_byte)].to_s + before_name = before_name.gsub(/\b(?:public|private|protected|internal|static|readonly|const|pub|mut|var|let)\b/, " ") + before_name = before_name.gsub(/[;,{].*\z/m, " ") + before_name = normalize_text(before_name) + return nil if before_name.empty? + + tokens = before_name.split(/\s+/).reject { |token| token.match?(/\A[*&]+\z/) } + candidate = tokens.last.to_s.delete_suffix("*").delete_suffix("&") + return nil if candidate.empty? + + candidate + end + + def assignment_state_declaration(node) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + target = state_target(lhs) + return nil unless target + return nil unless self_receiver_names.include?(target[:receiver].to_s) + + type = inferred_assignment_type(rhs) + return nil unless type + + { field: target[:field], type: type } + end + + def inferred_assignment_type(node) + return nil unless ts_node?(node) + + text = normalize_text(node.text) + patterns = [ + /\Anew\s+([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/, + /\A([A-Z][A-Za-z0-9_:]*)\s*(?:[({<]|$)/ + ] + match = patterns.filter_map { |pattern| text.match(pattern) }.first + match && match[1] + end + + def generic_state_read_target(node) + if accessor_call_node_kinds.include?(node.kind) + receiver = named_field(node, "receiver") + method = named_field(node, "method") + return nil unless receiver && method + return nil if namespace_receiver?(receiver.text) + return nil if NOISE_MESSAGES.include?(method.text) + return nil if named_field(node, "arguments") + + { receiver: normalize_text(receiver.text), field: method.text } + elsif field_like_node?(node) + return nil if expression_list_node_kinds.include?(node.kind) && !(named_field(node, "operand") && named_field(node, "field")) + + object = named_field(node, "object") || named_field(node, "receiver") || + named_field(node, "expression") || + named_field(node, "operand") || named_field(node, "value") || + named_field(node, "argument") || + node.named_children.find { |child| child.kind != "navigation_suffix" } + field = named_field(node, "field") || named_field(node, "property") || + named_field(node, "name") || named_field(node, "suffix") || + node.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } || + node.named_children.last + if literal_field_expression_node_kinds.include?(node.kind) && node.text.to_s.start_with?(".") + field = node.named_children.find { |child| identifier_node_kinds.include?(child.kind) } || field + return { receiver: ".literal", field: field.text } if field + end + field_text = member_field_text(field) + return nil unless object && field_text + return nil if namespace_receiver?(object.text) + return nil if NOISE_MESSAGES.include?(field_text) + + { receiver: normalize_text(object.text), field: field_text } + end + end + + def generic_state_target(lhs) + return nil unless ts_node?(lhs) + return nil if prev_sibling(lhs)&.text == ":" + + if accessor_call_node_kinds.include?(lhs.kind) + receiver = named_field(lhs, "receiver") + method = named_field(lhs, "method") + return nil unless receiver && method + + { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } + elsif field_like_node?(lhs) + if expression_list_node_kinds.include?(lhs.kind) && !(named_field(lhs, "operand") && named_field(lhs, "field")) + return generic_state_target(lhs.named_children.first) + end + + object = named_field(lhs, "object") || named_field(lhs, "receiver") || + named_field(lhs, "expression") || + named_field(lhs, "operand") || named_field(lhs, "value") || + named_field(lhs, "argument") || + lhs.named_children.find { |child| child.kind != "navigation_suffix" } + field = named_field(lhs, "field") || named_field(lhs, "property") || + named_field(lhs, "name") || named_field(lhs, "suffix") || + lhs.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } || + lhs.named_children.last + if literal_field_expression_node_kinds.include?(lhs.kind) && lhs.text.to_s.start_with?(".") + field = lhs.named_children.find { |child| identifier_node_kinds.include?(child.kind) } || field + return { receiver: ".literal", field: field.text.sub(/=\z/, "") } if field + end + field_text = member_field_text(field) + return nil unless object && field_text + + { receiver: normalize_text(object.text), field: field_text.sub(/=\z/, "") } + end + end + + def assignment_lhs?(node) + return false if prev_sibling(node)&.text == ":" + + sibling = next_sibling(node) + sibling && assignment_operator_tokens.include?(sibling.text.to_s) + end + + def direct_state_ref(_node) + nil + end + + def call_has_block?(node) + ts_node?(node) && + node.named_children.any? { |child| block_argument_node_kinds.include?(child.kind) } + end + + def next_sibling(node) + node.next_sibling + rescue StandardError + nil + end + + def prev_sibling(node) + node.prev_sibling + rescue StandardError + nil + end + + def namespace_receiver?(text) + receiver = text.to_s + return true if receiver.match?(/\A(?:std|builtin|build_options)(?:\.|\z)/) + return true if receiver.start_with?("@") + + receiver.match?(/\A[A-Z][A-Za-z0-9_]*(?:\.[A-Z][A-Za-z0-9_]*)*\z/) + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def parent_node(node) + node.parent + rescue StandardError + nil + end + + def field_like_node?(node) + field_like_node_kinds.include?(node.kind) + end + + def member_expression_list?(node) + return false unless expression_list_node_kinds.include?(node.kind) + return true if named_field(node, "operand") && named_field(node, "field") + + node.children.any? do |child| + !child.named? && member_access_operator_tokens.include?(child.text.to_s) + end + end + + def member_field_text(field) + return nil unless ts_node?(field) + + if navigation_suffix_node_kinds.include?(field.kind) + suffix = named_field(field, "suffix") || + field.named_children.find { |child| (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) } || + field.named_children.last + text = suffix&.text.to_s + return nil if text.empty? + + return text.sub(/\A[.?]+/, "") + end + + field.text.to_s.sub(/\A[.?]+/, "") + end + + def normalize_type_owner(text) + value = text.to_s.strip + value = value.sub(/\A[&*]+/, "") + value = value.gsub(/\b(?:const|mut|var)\b/, "").strip + value.split(/[({<\s]/).first.to_s.split(".").last + end + + def first_named_text(node, kinds) + child = node.named_children.find { |c| kinds.include?(c.kind) } + child&.text + end + + def declarator_name(node) + return nil unless ts_node?(node) + + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + return current.text if (identifier_node_kinds + field_identifier_node_kinds).include?(current.kind) + + current.named_children.reverse_each { |child| pending << child } + end + nil + end + + def exported_name_visibility(name) + text = name.to_s + return nil if text.empty? + + text.match?(/\A[A-Z]/) ? :public : :private + end + + def modifier_visibility(node) + return :private if node.children.any? { |child| child.text == "private" } + return :protected if node.children.any? { |child| child.text == "protected" } + return :public if node.children.any? { |child| public_visibility_tokens.include?(child.text) } + + nil + end + + def parameter_name(param) + return nil unless ts_node?(param) + return param.text if parameter_identifier_node_kinds.include?(param.kind) + + name = named_field(param, "name") || + param.named_children.select do |child| + parameter_identifier_node_kinds.include?(child.kind) + end.last + text = name&.text.to_s + return nil if text.empty? || text == "_" + + text + end + + def normalize_target_receiver(target, stack) + receiver = target[:receiver].to_s + return target.merge(receiver: "self") if self_receiver_names.include?(receiver) + + current_receiver = current_receiver_name(stack) + return target unless current_receiver + return target.merge(receiver: "self") if receiver == current_receiver + + if receiver.start_with?("#{current_receiver}.") + return target.merge(receiver: "self.#{receiver.delete_prefix("#{current_receiver}.")}") + end + + target + end + + def current_receiver_name(stack) + entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:receiver] } + entry && entry[:receiver] + end + + def file_owner(file) + base = File.basename(file.to_s, File.extname(file.to_s)) + base.empty? ? "(file)" : base + end + + def node_key(node) + [node.kind, node.start_byte, node.end_byte] + rescue StandardError + node.object_id + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) + end + + def span(node) + [node.start_point.row + 1, node.start_point.column, + node.end_point.row + 1, node.end_point.column] + end + + def line(node) + node.start_point.row + 1 + end + + def normalize_text(text) + text.to_s.tr("\u00A0", " ").strip.gsub(/\s+/, " ") + end + end + + require_relative "syntax/adapters" + + LanguageProfile = TreeSitterLanguageAdapter + + LANGUAGE_PROFILES = { + ruby: RubySyntaxAdapter.new( + language: :ruby, + extensions: %w[.rb], + lexicon: RUBY_LEXICON, + package: "tree-sitter-ruby" + ), + python: PythonSyntaxAdapter.new( + language: :python, + extensions: %w[.py .pyi], + lexicon: PYTHON_LEXICON, + package: "tree-sitter-python" + ), + javascript: JavaScriptSyntaxAdapter.new( + language: :javascript, + extensions: %w[.js .jsx .mjs .cjs], + lexicon: JAVASCRIPT_LEXICON, + package: "tree-sitter-javascript" + ), + typescript: TypeScriptSyntaxAdapter.new( + language: :typescript, + extensions: %w[.ts .tsx], + lexicon: TYPESCRIPT_LEXICON, + package: "tree-sitter-typescript" + ), + go: GoSyntaxAdapter.new( + language: :go, + extensions: %w[.go], + lexicon: GO_LEXICON, + package: "tree-sitter-go" + ), + rust: RustSyntaxAdapter.new( + language: :rust, + extensions: %w[.rs], + lexicon: RUST_LEXICON, + package: "tree-sitter-rust" + ), + zig: ZigSyntaxAdapter.new( + language: :zig, + extensions: %w[.zig], + lexicon: ZIG_LEXICON, + package: "@tree-sitter-grammars/tree-sitter-zig" + ), + lua: LuaSyntaxAdapter.new( + language: :lua, + extensions: %w[.lua], + lexicon: LUA_LEXICON, + package: "@tree-sitter-grammars/tree-sitter-lua" + ), + c: CSyntaxAdapter.new( + language: :c, + extensions: %w[.c .h], + lexicon: C_LEXICON, + package: "tree-sitter-c", + first_argument_receiver: true + ), + cpp: CppSyntaxAdapter.new( + language: :cpp, + extensions: %w[.cc .cpp .cxx .hh .hpp .hxx], + lexicon: CPP_LEXICON, + package: "tree-sitter-cpp" + ), + csharp: CSharpSyntaxAdapter.new( + language: :csharp, + extensions: %w[.cs], + lexicon: CSHARP_LEXICON, + package: "tree-sitter-c-sharp", + grammar_names: %w[c-sharp csharp], + tree_sitter_language_name: "c_sharp" + ), + java: JavaSyntaxAdapter.new( + language: :java, + extensions: %w[.java], + lexicon: JAVA_LEXICON, + package: "tree-sitter-java" + ), + swift: SwiftSyntaxAdapter.new( + language: :swift, + extensions: %w[.swift], + lexicon: SWIFT_LEXICON, + package: "tree-sitter-swift" + ), + kotlin: KotlinSyntaxAdapter.new( + language: :kotlin, + extensions: %w[.kt .kts], + lexicon: KOTLIN_LEXICON, + package: "tree-sitter-kotlin" + ), + php: PhpSyntaxAdapter.new( + language: :php, + extensions: %w[.php], + lexicon: PHP_LEXICON, + package: "tree-sitter-php" + ) + }.freeze + + LANGUAGE_BY_EXTENSION = LANGUAGE_PROFILES.values.each_with_object({}) do |profile, index| + profile.extensions.each { |extension| index[extension] ||= profile.language } + end.freeze + + module_function + + def parse(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) + normalized_parser = parser.to_s.tr("-", "_") + lang = (language || language_for(file)).to_sym + key = document_cache_key(file, lang, normalized_parser) + document_cache.fetch(key) do + document_cache[key] = + case normalized_parser + when "", "tree_sitter", "treesitter" + TreeSitterAdapter.new.parse(file, language: lang) + else + raise ArgumentError, "unknown decomplex parser #{parser.inspect}" + end + end + end + + def document_cache + @document_cache ||= {} + end + + def document_cache_key(file, language, parser) + stat = File.stat(file) + [File.expand_path(file), language, parser, stat.size, stat.mtime.to_f] + end + + def parse_uncached(file, language: nil, parser: ENV.fetch("DECOMPLEX_PARSER", "tree_sitter")) + case parser.to_s.tr("-", "_") + when "", "tree_sitter", "treesitter" + TreeSitterAdapter.new.parse(file, language: language) + else + raise ArgumentError, "unknown decomplex parser #{parser.inspect}" + end + end + + def parser + ENV.fetch("DECOMPLEX_PARSER", "tree_sitter").to_s.tr("-", "_") + end + + def tree_sitter? + %w[tree_sitter treesitter].include?(parser) + end + + def language_for(file) + forced = ENV["DECOMPLEX_FORCE_LANGUAGE"].to_s.strip + return forced.tr("-", "_").to_sym unless forced.empty? + + LANGUAGE_BY_EXTENSION.fetch(File.extname(file).downcase, :ruby) + end + + def supported_exts(parser: self.parser) + case parser.to_s.tr("-", "_") + when "", "tree_sitter", "treesitter" + LANGUAGE_PROFILES.values.flat_map(&:extensions).uniq + else + [] + end + end + + def supported_source?(file, parser: self.parser) + supported_exts(parser: parser).include?(File.extname(file).downcase) + end + + def language_lexicon(language) + language_profile(language).lexicon + end + + def language_profile(language) + key = language.to_s.empty? ? nil : language.to_sym + raise ArgumentError, "missing Syntax language profile" unless key + + LANGUAGE_PROFILES.fetch(key) + rescue KeyError + raise ArgumentError, "unsupported Syntax language profile: #{language.inspect}" + end + + class Document + attr_reader :file, :language, :source, :lines, :root, :adapter + + def initialize(file:, language:, source:, lines:, root:, adapter:) + @file = file + @language = language + @source = source + @lines = lines + @tree_sitter_facade = TreeSitterFacadeContext.new(root) + @root = @tree_sitter_facade.root + @adapter = adapter + end + + def decision_sites + @decision_sites ||= adapter.decision_sites(self) + end + + def state_writes + @state_writes ||= adapter.state_writes(self) + end + + def state_reads + @state_reads ||= adapter.state_reads(self) + end + + def branch_decisions(immutable_readers:, immutable_reader_types:, type_aliases:) + adapter.branch_decisions( + self, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + ) + end + + def function_defs + @function_defs ||= adapter.function_defs(self) + end + + def owner_defs + @owner_defs ||= adapter.owner_defs(self) + end + + def call_sites + @call_sites ||= adapter.call_sites(self) + end + + def state_declarations + @state_declarations ||= adapter.state_declarations(self) + end + + def state_param_origins + @state_param_origins ||= adapter.state_param_origins(self) + end + + def branch_arms + @branch_arms ||= adapter.branch_arms(self) + end + + def predicate_defs + @predicate_defs ||= adapter.predicate_defs(self) + end + + def comparison_sites + @comparison_sites ||= adapter.comparison_sites(self) + end + + def local_methods + @local_methods ||= adapter.local_methods(self) + end + + def path_condition_sites + @path_condition_sites ||= adapter.path_condition_sites(self) + end + + def immutable_struct_readers + adapter.immutable_struct_readers(self) + end + + def immutable_struct_reader_types + adapter.immutable_struct_reader_types(self) + end + + def type_aliases + adapter.type_aliases(self) + end + end + + class TreeSitterFacadeContext + attr_reader :root + + def initialize(raw_root) + @wrappers = {} + @children_cache = {} + @named_children_cache = {} + @named_field_cache = {} + @parent_cache = {} + @prev_sibling_cache = {} + @next_sibling_cache = {} + @prev_named_sibling_cache = {} + @next_named_sibling_cache = {} + @root = wrap(raw_root) + index_tree(raw_root) + end + + def wrap(raw) + return nil unless raw + return raw if raw.is_a?(TreeSitterNodeFacade) + + key = node_key(raw) + @wrappers[key] ||= TreeSitterNodeFacade.new(self, raw, key) + end + + def children(raw) + node = unwrap(raw) + @children_cache.fetch(node_key(node)) { [] } + end + + def named_children(raw) + node = unwrap(raw) + @named_children_cache.fetch(node_key(node)) { [] } + end + + def child_by_field_name(raw, name) + node = unwrap(raw) + key = [node_key(node), name.to_s] + return @named_field_cache[key] if @named_field_cache.key?(key) + + @named_field_cache[key] = wrap(node.child_by_field_name(name)) + rescue StandardError + nil + end + + def parent(raw) + @parent_cache[node_key(unwrap(raw))] + end + + def prev_sibling(raw) + @prev_sibling_cache[node_key(unwrap(raw))] + end + + def next_sibling(raw) + @next_sibling_cache[node_key(unwrap(raw))] + end + + def prev_named_sibling(raw) + @prev_named_sibling_cache[node_key(unwrap(raw))] + end + + def next_named_sibling(raw) + @next_named_sibling_cache[node_key(unwrap(raw))] end - def state_declaration(node) - case node.kind - when "assignment" - ruby_t_let_state_declaration(node) - when "container_field" - zig_container_field_declaration(node) - when "property_declaration", "public_field_definition", "field_definition", "field_declaration" - generic_field_declaration(node) - else - nil - end + def node_key(raw) + node = unwrap(raw) + [node.kind, node.start_byte, node.end_byte, node.named?] end - def zig_container_field_declaration(node) - name = node.named_children.find { |child| child.kind == "identifier" } - return nil unless name + private - { field: name.text, type: declared_type_text(node, name) } + def unwrap(raw) + raw.is_a?(TreeSitterNodeFacade) ? raw.raw : raw end - def generic_field_declaration(node) - name = named_field(node, "name") || - node.named_children.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) } - return nil unless name + def index_tree(raw_root) + pending = [raw_root] + until pending.empty? + raw = pending.pop + key = node_key(raw) + raw_children = Array(raw.children) + wrapped_children = raw_children.map { |child| wrap(child) } + @children_cache[key] = wrapped_children + @named_children_cache[key] = wrapped_children.select(&:named?) - { field: name.text, type: declared_type_text(node, name) } + raw_children.each do |child| + child_key = node_key(child) + @parent_cache[child_key] = wrap(raw) + end + + index_siblings(raw_children, @prev_sibling_cache, @next_sibling_cache) + index_siblings(raw_children.select(&:named?), @prev_named_sibling_cache, @next_named_sibling_cache) + + pending.concat(raw_children.reverse) + end end - def declared_type_text(node, name_node) - text = node.text.to_s - after_name = text[(name_node.end_byte - node.start_byte)..].to_s - if (match = after_name.match(/\A\s*:\s*([^=,\n]+)/)) - normalize_text(match[1]) - elsif (match = text.match(/\A\s*(?:pub\s+)?(?:const|var)\s+\w+\s*:\s*([^=;\n]+)/)) - normalize_text(match[1]) + def index_siblings(raw_children, prev_cache, next_cache) + raw_children.each_with_index do |child, index| + key = node_key(child) + prev_cache[key] = wrap(raw_children[index - 1]) if index.positive? + next_cache[key] = wrap(raw_children[index + 1]) if index + 1 < raw_children.length end - rescue StandardError - nil end + end - def state_read_target(node) - case node.kind - when "call" - receiver = named_field(node, "receiver") - method = named_field(node, "method") - return nil unless receiver && method - return nil if namespace_receiver?(receiver.text) - return nil if NOISE_MESSAGES.include?(method.text) - return nil if named_field(node, "arguments") + class TreeSitterNodeFacade + attr_reader :context, :raw - { receiver: normalize_text(receiver.text), field: method.text } - when "field", "selector_expression", "member_expression", "attribute", - "field_expression", "expression_list" - return nil if node.kind == "expression_list" && !(named_field(node, "operand") && named_field(node, "field")) + def initialize(context, raw, key) + @context = context + @raw = raw + @key = key + end - object = named_field(node, "object") || named_field(node, "receiver") || - named_field(node, "operand") || named_field(node, "value") - field = named_field(node, "field") || named_field(node, "property") || node.named_children.last - if node.kind == "field_expression" && node.text.to_s.start_with?(".") - field = node.named_children.find { |child| child.kind == "identifier" } || field - return { receiver: ".literal", field: field.text } if field - end - return nil unless object && field - return nil if namespace_receiver?(object.text) - return nil if NOISE_MESSAGES.include?(field.text) + def kind + @kind ||= raw.kind + end - { receiver: normalize_text(object.text), field: field.text } - when "instance_variable", "global_variable" - { receiver: "self", field: node.text } - end + def text + @text ||= raw.text.to_s end - def state_target(lhs) - return nil unless ts_node?(lhs) - return nil if prev_sibling(lhs)&.text == ":" + def start_byte + raw.start_byte + end - case lhs.kind - when "call" - receiver = named_field(lhs, "receiver") - method = named_field(lhs, "method") - return nil unless receiver && method + def end_byte + raw.end_byte + end - { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } - when "field", "selector_expression", "member_expression", "attribute", - "field_expression", "expression_list" - if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) - return state_target(lhs.named_children.first) - end + def start_point + raw.start_point + end - object = named_field(lhs, "object") || named_field(lhs, "receiver") || - named_field(lhs, "operand") || named_field(lhs, "value") - field = named_field(lhs, "field") || named_field(lhs, "property") || lhs.named_children.last - if lhs.kind == "field_expression" && lhs.text.to_s.start_with?(".") - field = lhs.named_children.find { |child| child.kind == "identifier" } || field - return { receiver: ".literal", field: field.text.sub(/=\z/, "") } if field - end - return nil unless object && field + def end_point + raw.end_point + end - { receiver: normalize_text(object.text), field: field.text.sub(/=\z/, "") } - when "instance_variable", "global_variable" - { receiver: "self", field: lhs.text } - end + def named? + raw.named? end - def hidden_match?(node) - node.kind == "expression_statement" && - first_token_kind(node) == "match" && - node.named_children.any? { |child| child.kind == "match_block" } + def has_error? + raw.respond_to?(:has_error?) && raw.has_error? end - def assignment_lhs?(node) - return false if prev_sibling(node)&.text == ":" + def children + context.children(self) + end - sibling = next_sibling(node) - sibling && %w[= += -= *= /= %= &&= ||=].include?(sibling.text.to_s) + def child_count + children.length end - def instance_variable_node?(node) - ts_node?(node) && node.kind == "instance_variable" + def named_children + context.named_children(self) end - def next_sibling(node) - node.next_sibling - rescue StandardError - nil + def named_child_count + named_children.length end - def prev_sibling(node) - node.prev_sibling - rescue StandardError - nil + def child_by_field_name(name) + context.child_by_field_name(self, name) end - def namespace_receiver?(text) - receiver = text.to_s - return true if receiver.match?(/\A(?:std|builtin|build_options)(?:\.|\z)/) - return true if receiver.start_with?("@") + def parent + context.parent(self) + end - receiver.match?(/\A[A-Z][A-Za-z0-9_]*(?:\.[A-Z][A-Za-z0-9_]*)*\z/) + def prev_sibling + context.prev_sibling(self) end - def named_field(node, name) - node.child_by_field_name(name) - rescue StandardError - nil + def next_sibling + context.next_sibling(self) end - def parent_node(node) - node.parent - rescue StandardError - nil + def prev_named_sibling + context.prev_named_sibling(self) end - def field_like_node?(node) - %w[field selector_expression member_expression attribute field_expression - expression_list scoped_identifier].include?(node.kind) + def next_named_sibling + context.next_named_sibling(self) end - def normalize_type_owner(text) - value = text.to_s.strip - value = value.sub(/\A[&*]+/, "") - value = value.gsub(/\b(?:const|mut|var)\b/, "").strip - value.split(/[({<\s]/).first.to_s.split(".").last + def ==(other) + other = other.raw if other.is_a?(TreeSitterNodeFacade) + other.respond_to?(:kind) && + kind == other.kind && + start_byte == other.start_byte && + end_byte == other.end_byte && + named? == other.named? end - def first_named_text(node, kinds) - child = node.named_children.find { |c| kinds.include?(c.kind) } - child&.text + alias eql? == + + def hash + @key.hash end - def inline_def_argument_list?(node) - ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" + def inspect + "#<#{self.class} kind=#{kind.inspect} start_byte=#{start_byte} end_byte=#{end_byte}>" end + end + + class TreeSitterAdapter + def self.walk_document(document, stack, profile, &block) + node = document.root + return unless tree_sitter_node?(node) - def inline_def_name(node) - return nil unless inline_def_argument_list?(node) + pending = [[node, stack]] + seen = Set.new + until pending.empty? + current, current_stack = pending.pop + next unless tree_sitter_node?(current) + key = node_key(current) + next if seen.include?(key) + + seen << key + + next_stack = profile.push_context(document, current_stack, current) + yield current, next_stack + next unless profile.descend_into_children?(current, current_stack) + + current.children.reverse_each { |child| pending << [child, next_stack] } + end + end - receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } - search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children - name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - receiver_index ? "self.#{name}" : name + def self.tree_sitter_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) end - def hidden_ruby_method_definition?(node) - ts_node?(node) && node.kind == "body_statement" && node.children.first&.kind.to_s == "def" + def self.node_key(node) + [node.kind, node.start_byte, node.end_byte] + rescue StandardError + node.object_id end - def hidden_ruby_method_name(node) - return nil unless hidden_ruby_method_definition?(node) + def parse(file, language: nil) + lang = (language || Syntax.language_for(file)).to_sym + source = File.read(file) + parser = parser_for(lang) + tree = parser.parse(source) + raise "tree-sitter parse timed out for #{file}" unless tree + + Document.new( + file: file, + language: lang, + source: source, + lines: source.lines, + root: tree.root_node, + adapter: self + ) + end - receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } - search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children - name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text - receiver_index ? "self.#{name}" : name + def decision_sites(document) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.decision_site_facts(document, node, stack)) + end + out end - def hidden_ruby_method_params(node) - params = node.named_children.find { |child| child.kind == "method_parameters" } - return [] unless params + def state_writes(document) + structural_facts(document).fetch(:state_writes) + end - params.named_children.filter_map { |param| parameter_name(param) }.uniq + def state_reads(document) + structural_facts(document).fetch(:state_reads) end - def hidden_ruby_method_signature(document, node) - body = node.named_children.find { |child| child.kind == "body_statement" } - end_byte = body ? body.start_byte : node.end_byte - document.source.byteslice(node.start_byte, end_byte - node.start_byte).to_s.strip.sub(/;+\z/, "") - rescue StandardError - line_text(document, node).strip + def branch_decisions(document, immutable_readers:, immutable_reader_types:, type_aliases:) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.branch_decision_facts( + document, + node, + stack, + immutable_readers: immutable_readers, + immutable_reader_types: immutable_reader_types, + type_aliases: type_aliases + )) + end + out end - def hidden_ruby_owner_declaration?(node) - return false unless ts_node?(node) - return false unless node.kind == "body_statement" + def function_defs(document) + structural_facts(document).fetch(:function_defs) + end + + def owner_defs(document) + structural_facts(document).fetch(:owner_defs) + end - %w[class module].include?(node.children.first&.kind.to_s) + def call_sites(document) + structural_facts(document).fetch(:call_sites) end - def hidden_ruby_owner_name(node) - node.named_children.find { |child| %w[constant identifier type_identifier].include?(child.kind) }&.text + def state_declarations(document) + structural_facts(document).fetch(:state_declarations) end - def hidden_ruby_owner_kind(node) - node.children.first&.kind.to_s == "module" ? :module : :class + def state_param_origins(document) + structural_facts(document).fetch(:state_param_origins) end - def ruby_inline_def_visibility(node) - parent = parent_node(node) - return nil unless parent&.kind == "call" + def structural_facts(document) + @structural_fact_cache ||= {} + @structural_fact_cache[document.object_id] ||= begin + profile = syntax_profile(document.language) + out = { + function_defs: [], + owner_defs: [], + call_sites: [], + state_declarations: [], + state_param_origins: [], + state_reads: [], + state_writes: [] + } + walk(document, profile) do |node, stack| + facts = profile.structural_facts_for_node(document, node, stack) + facts.each do |key, values| + out.fetch(key).concat(values) + end + end + profile.after_structural_facts(document, out) + out[:function_defs].uniq! { |fn| [fn.file, fn.owner, fn.name, fn.line] } + out[:owner_defs].uniq! { |owner| [owner.file, owner.name, owner.kind] } + out[:call_sites].uniq! { |call| [call.file, call.owner, call.function, call.span, call.receiver, call.message] } + out[:state_declarations].uniq! { |decl| [decl.file, decl.owner, decl.field] } + out[:state_param_origins].uniq! { |origin| [origin.file, origin.owner, origin.function, origin.field, origin.param] } + out[:state_reads].uniq! { |read| [read.file, read.owner, read.function, read.span, read.receiver, read.field] } + out[:state_writes].uniq! { |write| [write.file, write.owner, write.function, write.span, write.receiver, write.field] } + out + end + end - target = ruby_call_target(parent) - visibility = target && target[:receiver] == "self" && target[:message]&.to_sym - %i[private protected public].include?(visibility) ? visibility : nil + def branch_arms(document) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.branch_arm_facts(document, node, stack)) + end + out end - def ruby_bare_call_identifier?(node) - parent = parent_node(node) - return false unless parent - return false if ruby_declaration_name?(node, parent) - return false if %w[method_parameters block_parameters argument_list assignment].include?(parent.kind) - if parent.kind == "call" - return false if named_field(parent, "receiver") + def predicate_defs(document) + profile = syntax_profile(document.language) + document.function_defs.filter_map { |function_def| profile.predicate_def(document, function_def) } + end - first = parent.named_children.first - return first == node && next_sibling(node)&.kind == "argument_list" + def comparison_sites(document) + profile = syntax_profile(document.language) + out = [] + walk(document, profile) do |node, stack| + out.concat(profile.comparison_site_facts(document, node, stack)) end - return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" - return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + out + end - %w[body_statement then else elsif ensure rescue].include?(parent.kind) || - next_sibling(node)&.kind == "argument_list" + def local_methods(document) + syntax_profile(document.language).local_methods(document) end - def ruby_declaration_name?(node, parent) - return true if hidden_ruby_method_definition?(parent) - return true if hidden_ruby_owner_declaration?(parent) - return true if %w[method singleton_method class module].include?(parent.kind) + def path_condition_sites(document) + syntax_profile(document.language).path_condition_sites(document) + end - false + def immutable_struct_readers(document) + syntax_profile(document.language).immutable_struct_readers(document) + end + + def immutable_struct_reader_types(document) + syntax_profile(document.language).immutable_struct_reader_types(document) end - def ruby_argument_texts(node) - args = named_field(node, "arguments") || node.named_children.find { |child| child.kind == "argument_list" } - return [] unless args + def type_aliases(document) + syntax_profile(document.language).type_aliases(document) + end + + private - values = args.named_children.map { |child| normalize_text(child.text) } - return values unless values.empty? + def syntax_profile(language) + raise ArgumentError, "missing Syntax language profile context" if language.nil? - text = args.text.to_s.strip - text = text[1...-1] if text.start_with?("(") && text.end_with?(")") - text.split(/\s*,\s*/).map { |arg| normalize_text(arg) }.reject(&:empty?) + Syntax.language_profile(language) end - def normalize_target_receiver(target, stack) - receiver = target[:receiver].to_s - current_receiver = current_receiver_name(stack) - return target unless current_receiver && receiver == current_receiver + def parser_for(language) + require_tree_sitter + lang_name = Syntax.language_profile(language).tree_sitter_language_name + register_language(lang_name, grammar_path(language)) + ::TreeSitter::Parser.new.tap { |parser| parser.language = lang_name } + end - target.merge(receiver: "self") + def require_tree_sitter + gem "tree_sitter", "~> 0.1" + require "tree_sitter" + rescue Gem::LoadError, LoadError => e + raise LoadError, "DECOMPLEX_PARSER=tree_sitter requires the tree_sitter gem: #{e.message}" end - def current_receiver_name(stack) - entry = stack.reverse.find { |item| item.is_a?(Hash) && item[:receiver] } - entry && entry[:receiver] + def register_language(name, path) + @registered ||= {} + return if @registered[name] + + ::TreeSitter.register_language(name, path) + @registered[name] = true end - def ruby_t_let_state_declaration(node) - lhs = named_field(node, "left") || node.named_children.first - rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] - target = state_target(lhs) - return nil unless target && target[:receiver] == "self" && target[:field].to_s.start_with?("@") - return nil unless rhs&.kind == "call" + def grammar_path(language) + env_name = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" + return ENV.fetch(env_name) if ENV[env_name] && File.file?(ENV[env_name]) - receiver = named_field(rhs, "receiver") || rhs.named_children.first - method = named_field(rhs, "method") || rhs.named_children.find { |child| child.kind == "identifier" } - return nil unless receiver&.text == "T" && method&.text == "let" + candidates = grammar_candidates(language) + found = candidates.find { |path| File.file?(path) } + return found if found - args = named_field(rhs, "arguments") || rhs.named_children.find { |child| child.kind == "argument_list" } - type = args&.named_children&.[](1)&.text - return nil if type.to_s.empty? + raise LoadError, + "missing Tree-sitter grammar for #{language}. Set #{env_name} " \ + "to a parser shared library (.so/.dylib/.node). Checked: #{candidates.join(', ')}" + end - { field: target[:field], type: normalize_text(type) } + def grammar_candidates(language) + profile = Syntax.language_profile(language) + pkg = profile.package + stems = profile.grammar_names + names = stems.flat_map do |stem| + ["#{stem}.so", "tree-sitter-#{stem}.so", + "libtree-sitter-#{stem}.so", "#{stem}.node", + "tree-sitter-#{stem}.node", + "#{stem}_binding.node", + "tree_sitter_#{stem.tr('-', '_')}_binding.node", + "@tree-sitter-grammars+tree-sitter-#{stem}.node"] + end + roots = [ + File.expand_path("../../vendor/tree-sitter", __dir__), + File.expand_path("../../vendor/tree-sitter/#{language}", __dir__), + File.expand_path("../../node_modules/#{pkg}", __dir__), + File.expand_path("../../node_modules/#{pkg}/build/Release", __dir__), + File.expand_path("../../../../node_modules/#{pkg}", __dir__), + File.expand_path("../../../../node_modules/#{pkg}/build/Release", __dir__), + File.expand_path("../../../../../node_modules/#{pkg}", __dir__), + File.expand_path("../../../../../node_modules/#{pkg}/build/Release", __dir__) + ] + all_prebuilds = roots.flat_map do |root| + stems.flat_map do |stem| + Dir.glob(File.join(root, "prebuilds", "*", "*tree-sitter-#{stem}.node")) + end + end + prebuilds = platform_prebuilds(all_prebuilds) + roots.product(names).map { |root, name| File.join(root, name) } + prebuilds end - def ts_node?(node) - node && node.respond_to?(:kind) && node.respond_to?(:children) + def platform_prebuilds(paths) + os = host_os + arch = host_arch + return paths if os.nil? || arch.nil? + + paths.select { |path| path.include?("/#{os}-#{arch}/") } end - def span(node) - [node.start_point.row + 1, node.start_point.column, - node.end_point.row + 1, node.end_point.column] + def host_os + case RbConfig::CONFIG["host_os"] + when /linux/i then "linux" + when /darwin/i then "darwin" + when /mswin|mingw|cygwin/i then "win32" + end end - def line(node) - node.start_point.row + 1 + def host_arch + case RbConfig::CONFIG["host_cpu"] + when /x86_64|amd64/i then "x64" + when /aarch64|arm64/i then "arm64" + end end - def normalize_text(text) - text.to_s.strip.gsub(/\s+/, " ") + def walk(document, profile, &block) + self.class.walk_document(document, profile.initial_stack(document), profile, &block) end + end end end + +require_relative "syntax/effects" +require_relative "syntax/protocols" +require_relative "syntax/contracts" +require_relative "syntax/dispatch" +require_relative "syntax/clone_similarity" +require_relative "syntax/complexity" +require_relative "syntax/nil_guards" diff --git a/gems/decomplex/lib/decomplex/syntax/adapters.rb b/gems/decomplex/lib/decomplex/syntax/adapters.rb new file mode 100644 index 000000000..ed7648ed5 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/adapters.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + class TreeSitterLanguageAdapter + private + + def c_family_function_params(node) + return nil unless node.kind == "function_definition" + + declarator = named_field(node, "declarator") || + node.named_children.find { |child| child.kind == "function_declarator" } + params = declarator&.named_children&.find { |child| child.kind == "parameter_list" } + return nil unless params + + params.named_children.filter_map { |param| c_family_parameter_name(param) || parameter_name(param) }.uniq + end + + def c_family_parameter_name(param) + declarator = param.named_children.reverse.find { |child| child.kind.end_with?("_declarator") } + name = c_family_declarator_name_node(declarator) + return name.text if name + + direct = param.named_children.select do |child| + parameter_identifier_node_kinds.include?(child.kind) + end.last + direct&.text + end + + def c_family_declarator_name_node(node) + return nil unless ts_node?(node) + return node if parameter_identifier_node_kinds.include?(node.kind) + + node.named_children.reverse_each do |child| + nested = c_family_declarator_name_node(child) + return nested if nested + end + nil + end + + def boolean_expression_list?(node, operator) + return false unless node.kind == "expression_list" + return false unless direct_operator(node) == operator + return false if node.named_children.size < 2 + + node.children.all? do |child| + child.named? || [operator, "(", ")"].include?(child.text.to_s) + end + end + end + end +end + +require_relative "ruby" +require_relative "python" +require_relative "javascript" +require_relative "typescript" +require_relative "go" +require_relative "rust" +require_relative "zig" +require_relative "lua" +require_relative "c" +require_relative "cpp" +require_relative "csharp" +require_relative "java" +require_relative "swift" +require_relative "kotlin" +require_relative "php" diff --git a/gems/decomplex/lib/decomplex/syntax/c.rb b/gems/decomplex/lib/decomplex/syntax/c.rb new file mode 100644 index 000000000..5d0c51e0c --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/c.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + C_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bNULL\b/].freeze, + type_guard_patterns: [ + /\bNULL\b/, + /\bsizeof\s*\(/, + /\b_Generic\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\b(?:assert|abort|exit)\s*\(/, + /\breturn\s+errno\b/ + ].freeze, + trivial_patterns: [ + /\A(?:NULL|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:NULL|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class CSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_definition].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + CLASS_OWNER_NODE_KINDS = [].freeze + STRUCT_OWNER_NODE_KINDS = %w[struct_specifier].freeze + UNION_OWNER_NODE_KINDS = %w[union_declaration].freeze + ENUM_OWNER_NODE_KINDS = %w[enum_declaration].freeze + ANONYMOUS_OWNER_NODE_KINDS = %w[struct_declaration union_declaration enum_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + FUNCTION_BODY_NODE_KINDS = %w[compound_statement].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[declaration init_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[init_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter_declaration init_declarator function_declarator struct_specifier].freeze + FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS = %w[type_identifier primitive_type qualified_identifier scoped_type_identifier].freeze + FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS = %w[identifier field_identifier].freeze + RECEIVER_PARAMETER_NODE_KINDS = %w[parameter_declaration].freeze + BOUND_CONTAINER_WRAPPER_NODE_KINDS = %w[ERROR expression_statement return_expression].freeze + BOUND_CONTAINER_PARENT_NODE_KINDS = %w[declaration field_declaration].freeze + BOUND_CONTAINER_NAME_NODE_KINDS = %w[identifier field_identifier type_identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[compound_statement].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[case_statement].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_statement].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition struct_specifier].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_statement else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + c_visibility(node) + end + + def function_params(node) + c_family_function_params(node) || super + end + + private + + def receiver_convention_owner_name(node, **_context) + return nil unless first_argument_receiver? + return nil unless node.kind == "function_definition" + + receiver = first_argument_receiver_parameter(node) + return nil unless receiver && receiver[:name] == "self" + + normalize_type_owner(receiver[:type]) + end + + def c_visibility(node) + node.children.any? { |child| child.text == "static" } ? :private : :public + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/clone_similarity.rb b/gems/decomplex/lib/decomplex/syntax/clone_similarity.rb new file mode 100644 index 000000000..733785a95 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/clone_similarity.rb @@ -0,0 +1,275 @@ +# frozen_string_literal: true + +require "set" + +module Decomplex + module Syntax + CloneCandidate = Struct.new( + :file, :line, :span, :method_name, :node_name, :mass, + :fingerprint, :raw, :child_fingerprints, :child_masses, + keyword_init: true + ) + + class Document + def clone_candidates + @clone_candidates ||= adapter.clone_candidates(self) + end + end + + class TreeSitterAdapter + def clone_candidates(document) + syntax_profile(document.language).clone_candidates(document) + end + end + + class TreeSitterLanguageAdapter + CLONE_IDENTIFIER_KINDS = %w[ + identifier constant type_identifier field_identifier property_identifier + shorthand_property_identifier_pattern simple_identifier variable_name + ].freeze + CLONE_LITERAL_KINDS = %w[ + string string_content string_literal interpreted_string_literal raw_string_literal + integer float int number rational imaginary character char_literal + symbol simple_symbol true false nil none null + ].freeze + CLONE_SKIP_KINDS = %w[ + comment identifier constant type_identifier field_identifier property_identifier + parameters formal_parameters parameter_list argument_list arguments + block_parameters call_suffix function_value_parameters method_parameters value_argument + scope_resolution + ].freeze + CLONE_CANDIDATE_KINDS = %w[ + array assignment assignment_statement block case case_clause class + class_definition class_declaration compound_statement conjunction_expression control_structure_body + do_block enum_declaration for for_statement function_body hash if if_statement match_expression + match_statement method method_definition module operator_assignment singleton_method statements + struct_declaration switch_case switch_expression switch_statement + unless until while while_statement + ].freeze + CLONE_BODY_KINDS = %w[ + body block body_statement declaration_list statement_block compound_statement + function_body statements suite do_block + ].freeze + CLONE_CALL_KINDS = %w[ + call call_expression function_call method_call method_invocation invocation_expression + ].freeze + + def clone_candidates(document) + out = [] + seen = Set.new + + document.function_defs.each do |fn| + candidate = clone_candidate_for(document, fn.body, node_name: "defn", function_name: fn.name) + clone_add_candidate(out, seen, candidate) if candidate + end + + clone_walk(document.root) do |node| + next unless clone_candidate_node?(node) + + function = clone_method_span_for(document, line(node)) + clone_add_candidate(out, seen, clone_candidate_for(document, node, function_name: function&.name)) + end + + out + rescue StandardError + [] + end + + private + + def clone_add_candidate(out, seen, candidate) + return unless candidate + return if clone_typed_struct_schema_text?(candidate.raw) + + key = [candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint] + return if seen.include?(key) + + seen << key + out << candidate + end + + def clone_candidate_for(document, node, node_name: nil, function_name: nil) + fp, mass = clone_fingerprint(node) + return nil if fp.to_s.empty? + + line_no = line(node) + method = clone_method_span_for(document, line_no) + children = clone_fuzzy_children_for(node) + child_data = children.map { |child| clone_fingerprint(child) } + .reject { |child_fp, child_mass| child_fp.to_s.empty? || child_mass.zero? } + + CloneCandidate.new( + file: document.file, + line: line_no, + span: span(node), + method_name: function_name || method&.name || "(top-level)", + node_name: node_name || clone_node_name(node), + mass: mass, + fingerprint: fp, + raw: normalize_text(node.text), + child_fingerprints: child_data.map(&:first), + child_masses: child_data.map(&:last) + ) + end + + def clone_candidate_node?(node) + return false unless ts_node?(node) + return false unless node.named? + return false if CLONE_SKIP_KINDS.include?(node.kind) + return false unless CLONE_CANDIDATE_KINDS.include?(node.kind) + return false if clone_typed_struct_schema_text?(node.text) + + node.named_child_count.positive? + end + + def clone_fuzzy_children_for(node) + body = clone_body_node(node) + source = body || node + children = source.named_children + children = node.named_children if children.empty? + children.reject { |child| CLONE_SKIP_KINDS.include?(child.kind) || clone_typed_struct_schema_text?(child.text) } + end + + def clone_body_node(node) + named_field(node, "body") || + node.named_children.find { |child| CLONE_BODY_KINDS.include?(child.kind) } + end + + def clone_fingerprint(node, active = nil) + return ["", 0] unless ts_node?(node) + + active ||= Set.new + key = node_key(node) + return ["", 0] if active.include?(key) + + active << key + begin + return ["", 0] if node.kind == "comment" + return clone_fingerprint_call(node, active) if CLONE_CALL_KINDS.include?(node.kind) && clone_call_message(node) + + if node.child_count.zero? + token = clone_terminal_token(node) + return ["", 0] if token.empty? + + return [token, 1] + end + + child_parts = [] + mass = 1 + node.children.each do |child| + child_fp, child_mass = clone_fingerprint(child, active) + next if child_fp.empty? + + child_parts << child_fp + mass += child_mass + end + + return [clone_terminal_token(node), 1] if child_parts.empty? + + ["#{node.kind}(#{child_parts.join(' ')})", mass] + ensure + active.delete(key) + end + end + + def clone_fingerprint_call(node, active) + message = clone_call_message(node) + child_parts = [] + mass = 1 + node.children.each do |child| + child_fp, child_mass = clone_fingerprint(child, active) + next if child_fp.empty? + + child_parts << child_fp + mass += child_mass + end + ["#{node.kind}<#{message}>(#{child_parts.join(' ')})", mass] + end + + def clone_call_message(node) + return nil unless node.children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } + + callee = named_field(node, "function") || named_field(node, "callee") + return clone_callee_message(callee) if callee + + argument_node = node.children.find { |child| %w[argument_list arguments call_suffix].include?(child.kind) } + named_before_args = node.named_children.select do |child| + argument_node.nil? || child.start_byte < argument_node.start_byte + end + clone_callee_message(named_before_args.last) + end + + def clone_callee_message(node) + return nil unless ts_node?(node) + return node.text if CLONE_IDENTIFIER_KINDS.include?(node.kind) + return clone_navigation_suffix_message(node) if %w[navigation_expression directly_assignable_expression].include?(node.kind) + + leaf = node.named_children.reverse.find { |child| CLONE_IDENTIFIER_KINDS.include?(child.kind) } + leaf&.text + end + + def clone_navigation_suffix_message(node) + suffix = node.named_children.reverse.find { |child| child.kind == "navigation_suffix" } + leaf = suffix&.named_children&.reverse&.find { |child| CLONE_IDENTIFIER_KINDS.include?(child.kind) } + leaf&.text + end + + def clone_terminal_token(node) + kind = node.kind.to_s + return "id" if CLONE_IDENTIFIER_KINDS.include?(kind) + return clone_literal_token(kind) if CLONE_LITERAL_KINDS.include?(kind) + + text = normalize_text(node.text) + return "" if text.empty? + return "id" if text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + return "lit" if text.match?(/\A(?::[A-Za-z_]\w*|[-+]?\d+(?:\.\d+)?|".*"|'.*')\z/) + + "#{kind}:#{text}" + end + + def clone_literal_token(kind) + case kind + when "true", "false" then "bool" + when "nil", "none", "null" then "nil" + else "lit" + end + end + + def clone_node_name(node) + return "defn" if %w[method function_definition function_declaration method_definition function_item].include?(node.kind) + return "defs" if node.kind == "singleton_method" + + node.kind + end + + def clone_typed_struct_schema_text?(text) + text.to_s.match?(/<\s*T::Struct\b/) || + text.to_s.lines.all? { |line| line.strip.empty? || line.match?(/\A\s*(?:const|prop)\s+:[A-Za-z_]\w*\b/) } + end + + def clone_method_span_for(document, line_no) + document.function_defs.find { |fn| fn.span[0] <= line_no && line_no <= fn.span[2] } + rescue StandardError + nil + end + + def clone_walk(node, &block) + return unless ts_node?(node) + + pending = [node] + seen = Set.new + until pending.empty? + current = pending.pop + next unless ts_node?(current) + + key = node_key(current) + next if seen.include?(key) + + seen << key + yield current + current.children.reverse_each { |child| pending << child } + end + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/complexity.rb b/gems/decomplex/lib/decomplex/syntax/complexity.rb new file mode 100644 index 000000000..5f32993e0 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/complexity.rb @@ -0,0 +1,187 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + class Document + def local_complexity_scores + @local_complexity_scores ||= adapter.local_complexity_scores(self) + end + end + + class TreeSitterAdapter + def local_complexity_scores(document) + profile = syntax_profile(document.language) + document.local_methods.to_h do |method| + [method.id, profile.local_complexity_score(method.node)] + end + end + end + + class TreeSitterLanguageAdapter + def local_complexity_score(method_node) + LocalComplexityScorer.new.score(method_node) + end + + class LocalComplexityScorer + def score(method_node) + signals = Hash.new(0) + { + score: round(score_node(method_node, nesting: 0, signals: signals)), + signals: signals.to_h + } + end + + private + + def score_node(node, nesting:, signals:) + return 0.0 unless tree_sitter_node?(node) + return 0.0 if skip_nested?(node) + + if branch?(node) + signals[:branches] += 1 + signals[:nested] += 1 if nesting.positive? + return branch_cost(nesting) + + predicate_cost(node, signals) + + score_children(node, nesting: nesting + 1, signals: signals) + end + + if loop?(node) + signals[:loops] += 1 + signals[:nested] += 1 if nesting.positive? + return branch_cost(nesting) + + score_children(node, nesting: nesting + 1, signals: signals) + end + + if case?(node) + signals[:cases] += 1 + return 0.5 + score_children(node, nesting: nesting + 1, signals: signals) + end + + if rescue?(node) + signals[:rescues] += 1 + return branch_cost(nesting) + + score_children(node, nesting: nesting + 1, signals: signals) + end + + if early_exit?(node) + signals[:early_exits] += 1 + exit_cost = nesting.positive? ? 0.5 + (nesting * 0.25) : 0.0 + return exit_cost + score_children(node, nesting: nesting, signals: signals) + end + + if boolean_node?(node) + signals[:boolean_ops] += 1 + return 0.25 + score_children(node, nesting: nesting, signals: signals) + end + + score_children(node, nesting: nesting, signals: signals) + end + + def score_children(node, nesting:, signals:) + node.children.sum { |child| score_node(child, nesting: nesting, signals: signals) } + end + + def predicate_cost(node, signals) + predicate = condition_node(node) + bools = boolean_count(predicate) + signals[:boolean_ops] += bools + bools * 0.5 + end + + def condition_node(node) + return node.named_children.last if modifier_if?(node) + return node.named_children.first if node.kind == "body_statement" + + node.named_children.first + end + + def boolean_count(node) + return 0 unless tree_sitter_node?(node) + + own = boolean_node?(node) ? 1 : 0 + own + node.children.sum { |child| boolean_count(child) } + end + + def boolean_node?(node) + tree_sitter_node?(node) && + %w[binary binary_expression boolean_operator conjunction_expression disjunction_expression].include?(node.kind) && + node.children.any? { |child| !child.named? && %w[&& || and or].include?(child.text.to_s) } + end + + def branch?(node) + return false unless tree_sitter_node?(node) + return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && + node.named_children.any? + + hidden_if?(node) || modifier_if?(node) + end + + def hidden_if?(node) + return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + first_token = node.children.first + first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) + end + + def modifier_if?(node) + return true if %w[if_modifier unless_modifier].include?(node.kind) + return false unless node.kind == "body_statement" + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def loop?(node) + return false unless tree_sitter_node?(node) + return true if %w[while until while_statement for for_statement for_in_statement do_block].include?(node.kind) + return true if hidden_loop?(node) + + (node.kind == "expression_statement" && node.text.to_s.lstrip.match?(/\A(?:for|while|loop)\b/)) || + (node.kind == "labeled_statement" && node.text.to_s.lstrip.start_with?("for ")) + end + + def hidden_loop?(node) + %w[body_statement block statements statement_list].include?(node.kind) && + node.children.first && + !node.children.first.named? && + %w[for while loop].include?(node.children.first.kind.to_s) + end + + def case?(node) + tree_sitter_node?(node) && + (%w[case switch_statement switch_expression match_statement match_expression].include?(node.kind) || + (node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("match "))) + end + + def rescue?(node) + tree_sitter_node?(node) && %w[rescue rescue_modifier rescue_clause rescue_body].include?(node.kind) + end + + def early_exit?(node) + tree_sitter_node?(node) && + %w[return break next redo retry return_statement break_statement continue_statement].include?(node.kind) + end + + def skip_nested?(node) + %w[class module lambda].include?(node.kind) + end + + def tree_sitter_node?(node) + node.respond_to?(:kind) && node.respond_to?(:children) + end + + def branch_cost(nesting) + 1.1 + nesting + end + + def round(value) + (value * 10).round / 10.0 + end + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/contracts.rb b/gems/decomplex/lib/decomplex/syntax/contracts.rb new file mode 100644 index 000000000..be6200669 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/contracts.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + class Document + def local_contract_assignments(method) + adapter.local_contract_assignments(self, method) + end + end + + class TreeSitterLanguageAdapter + def local_contract_assignments(_document, method) + method.statements.each_with_object({}) do |statement, map| + next unless statement.writes.size == 1 + + name = statement.writes.first.to_s + map[name] ||= local_contract_source(name, statement.source) + end.compact + end + + private + + def local_contract_source(name, source) + match = source.to_s.match(/\b#{Regexp.escape(name)}\b\s*(?::=|=)\s*(.+?)\s*;?\z/m) + return nil unless match + + rhs = match[1].strip + return nil if rhs.match?(/\s(?:if|unless|rescue)\s|\?|:/) + + rhs + end + end + + class TreeSitterAdapter + def local_contract_assignments(document, method) + syntax_profile(document.language).local_contract_assignments(document, method) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/cpp.rb b/gems/decomplex/lib/decomplex/syntax/cpp.rb new file mode 100644 index 000000000..4276a9cbb --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/cpp.rb @@ -0,0 +1,133 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + CPP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\b(?:nullptr|NULL)\b/].freeze, + type_guard_patterns: [ + /\b(?:nullptr|NULL)\b/, + /\b(?:dynamic_cast|typeid)\s*[<(]/, + /\bstd::(?:get_if|holds_alternative)\s*[<(]/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:assert|abort|exit)\s*\(/, + /\bstd::terminate\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nullptr|NULL|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:nullptr|NULL|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class CppSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_definition].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + CLASS_OWNER_NODE_KINDS = %w[class_specifier].freeze + STRUCT_OWNER_NODE_KINDS = %w[struct_specifier].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + FUNCTION_BODY_NODE_KINDS = %w[compound_statement].freeze + IDENTIFIER_NODE_KINDS = %w[identifier type_identifier qualified_identifier namespace_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[declaration init_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[init_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter_declaration init_declarator function_declarator class_specifier struct_specifier].freeze + RECEIVER_TYPE_NODE_KINDS = %w[type_identifier qualified_identifier scoped_type_identifier].freeze + FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS = %w[type_identifier primitive_type qualified_identifier scoped_type_identifier].freeze + FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS = %w[identifier field_identifier].freeze + RECEIVER_PARAMETER_NODE_KINDS = %w[parameter_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[compound_statement].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_range_loop switch_statement].freeze + LOOP_NODE_KINDS = %w[for_range_loop].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[case_statement].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_statement].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition class_specifier struct_specifier].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_statement else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[condition_clause parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier type_identifier field_identifier qualified_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || cpp_visibility(node) + end + + def function_params(node) + c_family_function_params(node) || super + end + + def implicit_state_accesses? + true + end + + def field_declaration_name_node(node) + declarator = node.named_children.reverse.find { |child| child.kind.end_with?("_declarator") } + name = declarator&.named_children&.reverse&.find do |child| + (identifier_node_kinds + field_identifier_node_kinds).include?(child.kind) + end + return name if name + + super + end + + private + + def control_context(node) + return :iterates if node.kind == "for_range_loop" + + super + end + + def cpp_visibility(node) + visibility = previous_cpp_access_specifier(node) + return visibility if visibility + + owner = nearest_owner_declaration(node) + return :public if owner&.kind == "struct_specifier" + + :private + end + + def previous_cpp_access_specifier(node) + sibling = prev_sibling(node) + while sibling + return sibling.text.to_sym if sibling.kind == "access_specifier" && + %w[public private protected].include?(sibling.text) + + sibling = prev_sibling(sibling) + end + nil + end + + def nearest_owner_declaration(node) + parent = parent_node(node) + seen = Set.new + while parent && !seen.include?(node_key(parent)) + seen << node_key(parent) + return parent if %w[class_specifier struct_specifier class class_definition class_declaration].include?(parent.kind) + + parent = parent_node(parent) + end + nil + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/csharp.rb b/gems/decomplex/lib/decomplex/syntax/csharp.rb new file mode 100644 index 000000000..d8937acb5 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/csharp.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + CSHARP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /(?:\?\.|\?\?)/, + /\b(?:is|as|typeof)\b/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:Debug\.Assert|Trace\.Assert|Environment\.Exit)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class CSharpSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[method_declaration].freeze + CALL_NODE_KINDS = %w[invocation_expression].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + FUNCTION_BODY_NODE_KINDS = %w[block declaration_list].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[argument].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[local_declaration_statement variable_declaration variable_declarator].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[variable_declarator].freeze + DECLARATOR_NODE_KINDS = %w[variable_declaration variable_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declarator method_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[invocation_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement foreach_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[foreach_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_section].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_section].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[method_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_section else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = %w[invocation_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[member_access_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || :private + end + + def implicit_state_accesses? + true + end + + def field_declaration_name_node(node) + declaration = node.named_children.find { |child| child.kind == "variable_declaration" } + declarator = declaration&.named_children&.find { |child| child.kind == "variable_declarator" } + return named_field(declarator, "name") || declarator if declarator + + super + end + + def state_read_target(node) + if node.kind == "argument" + object = named_field(node, "expression") + field = named_field(node, "name") + field_text = member_field_text(field) + return nil unless object && field_text + return nil if namespace_receiver?(object.text) + return nil if NOISE_MESSAGES.include?(field_text) + + return { receiver: normalize_text(object.text), field: field_text } + end + + super + end + + private + + def control_context(node) + return :iterates if node.kind == "foreach_statement" + + super + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/dispatch.rb b/gems/decomplex/lib/decomplex/syntax/dispatch.rb new file mode 100644 index 000000000..d847e0a13 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/dispatch.rb @@ -0,0 +1,148 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + DispatchSite = Struct.new(:variant_set, :arm_members, :outside, :file, + :function, :line, :span, keyword_init: true) + + class Document + def dispatch_sites + @dispatch_sites ||= adapter.dispatch_sites(self) + end + end + + class TreeSitterAdapter + def dispatch_sites(document) + syntax_profile(document.language).dispatch_sites(document) + end + end + + class TreeSitterLanguageAdapter + DISPATCH_CONSTANT_PATTERN = /\A[A-Z]\w*(?:(?:::|\.|_)[A-Z]\w*)*\z/ + IF_DISPATCH_PATTERN = /\A(?.+?)\s*(?:==|===)\s*(?[A-Z]\w*(?:(?:::|\.|_)[A-Z]\w*)*)\z/ + + def dispatch_sites(document) + arms = document.branch_arms + case_dispatch_sites(document, arms) + if_dispatch_sites(document, arms) + end + + private + + def case_dispatch_sites(document, arms) + arms.select { |arm| arm.kind == :case } + .group_by { |arm| [arm.file, arm.function, arm.decision_span, arm.predicate] } + .filter_map { |_key, case_arms| record_case_dispatch_site(document, case_arms) } + end + + def record_case_dispatch_site(document, arms) + predicate = arms.first.predicate.to_s + return nil if predicate.empty? + + arm_members = {} + arms.each do |arm| + variants = dispatch_constant_patterns(arm.member) + next if variants.empty? + + members = dispatch_members_inside(document, predicate, arm.function, arm.span) + variants.each { |variant| (arm_members[variant] ||= []).concat(members) } + end + return nil if arm_members.size < 2 + + arm_members.transform_values!(&:uniq) + DispatchSite.new( + variant_set: arm_members.keys.sort, + arm_members: arm_members, + outside: dispatch_members_outside(document, predicate, arms.first.function, arms.first.decision_span), + file: arms.first.file, + function: arms.first.function, + line: arms.first.decision_line, + span: arms.first.decision_span + ) + end + + def if_dispatch_sites(document, arms) + arms.select { |arm| arm.kind == :if && arm.member == "then" } + .filter_map { |arm| [arm, if_dispatch_match(arm.predicate)] } + .reject { |_arm, match| match.nil? } + .group_by { |arm, match| [arm.file, arm.function, match[:subject]] } + .filter_map { |_key, matched| record_if_dispatch_site(document, matched) } + end + + def record_if_dispatch_site(document, matched) + predicate = matched.first[1][:subject] + arm_members = {} + matched.each do |arm, match| + members = dispatch_members_inside(document, predicate, arm.function, arm.span) + (arm_members[match[:variant]] ||= []).concat(members) + end + return nil if arm_members.size < 2 + + arm_members.transform_values!(&:uniq) + DispatchSite.new( + variant_set: arm_members.keys.sort, + arm_members: arm_members, + outside: dispatch_members_outside_spans(document, predicate, matched.first[0].function, matched.map { |arm, _match| arm.span }), + file: matched.first[0].file, + function: matched.first[0].function, + line: matched.first[0].decision_line, + span: matched.first[0].decision_span + ) + end + + def if_dispatch_match(predicate) + source = predicate.to_s.strip + source = source[1...-1].strip if source.start_with?("(") && source.end_with?(")") + match = source.match(IF_DISPATCH_PATTERN) + return nil unless match + + { subject: match[:subject].strip, variant: match[:variant].strip } + end + + def dispatch_members_inside(document, predicate, function, span) + dispatch_member_calls(document, predicate, function) + .select { |call| dispatch_inside_span?(call.span, span) } + .map { |call| dispatch_member_name(call) } + .uniq + end + + def dispatch_members_outside(document, predicate, function, decision_span) + dispatch_member_calls(document, predicate, function) + .reject { |call| dispatch_inside_span?(call.span, decision_span) } + .map { |call| dispatch_member_name(call) } + .uniq + end + + def dispatch_members_outside_spans(document, predicate, function, spans) + dispatch_member_calls(document, predicate, function) + .reject { |call| spans.any? { |span| dispatch_inside_span?(call.span, span) } } + .map { |call| dispatch_member_name(call) } + .uniq + end + + def dispatch_member_calls(document, predicate, function) + document.call_sites.select do |call| + call.function == function && + call.receiver.to_s == predicate && + !call.message.to_s.empty? + end + end + + def dispatch_member_name(call) + call.message.to_s.sub(/=\z/, "") + end + + def dispatch_constant_patterns(member) + member.to_s.split(/\s*,\s*/).map { |pattern| pattern.sub(/\Acase\s+/, "") } + .select { |pattern| pattern.match?(DISPATCH_CONSTANT_PATTERN) } + end + + def dispatch_inside_span?(inner, outer) + return false unless inner && outer + + starts_after_or_at = (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]) + ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]) + starts_after_or_at && ends_before_or_at + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/effects.rb b/gems/decomplex/lib/decomplex/syntax/effects.rb new file mode 100644 index 000000000..6b834a444 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/effects.rb @@ -0,0 +1,531 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + SemanticEffectSite = Struct.new(:kind, :detail, :file, :function, :owner, :line, :span, + keyword_init: true) + EffectLexicon = Struct.new( + :dispatch_mids, :meta_mids, :method_obj_mids, :io_consts, + :io_bare, :dir_context, :context_pairs, :context_bare, + :callback_set, :core_consts, + keyword_init: true + ) + + def self.core_owner_names(language) + profile = language_profile(language) + return [] unless profile.respond_to?(:effect_lexicon, true) + + lexicon = profile.send(:effect_lexicon) + Array(lexicon&.core_consts) + rescue ArgumentError + [] + end + + class Document + def semantic_effect_sites + @semantic_effect_sites ||= adapter.semantic_effect_sites(self) + end + end + + class TreeSitterLanguageAdapter + def semantic_effect_sites(document) + semantic_effect_sites_from_calls(document) + end + + private + + def effect_lexicon + nil + end + + def semantic_effect_sites_from_calls(document) + return [] unless effect_lexicon + + by_operation = {} + document.call_sites.each do |call| + site = semantic_effect_site_for_call(call) + next unless site + + key = [site.kind, site.detail, site.file, site.function, site.owner, + site.line, call.receiver, call.message, call.arguments] + current = by_operation[key] + if current.nil? || span_width(site.span) > span_width(current.span) + by_operation[key] = site + end + end + by_operation.values + end + + def span_width(span_value) + ((span_value[2] - span_value[0]) * 100_000) + (span_value[3] - span_value[1]) + end + + def semantic_effect_site_for_call(call) + lexicon = effect_lexicon + message = call.message.to_s + + if effect_callback_call?(call, message) + return semantic_effect_site_from_call(call, :callback_inversion, message) + end + return semantic_effect_site_from_call(call, :metaprogramming, message) if lexicon.meta_mids.include?(message) + return semantic_effect_site_from_call(call, :dynamic_dispatch, message) if lexicon.dispatch_mids.include?(message) + + if message == "call" && !call.receiver.to_s.empty? + return semantic_effect_site_from_call(call, :dynamic_dispatch, "method(...).call") if method_object_receiver?(call.receiver) + return semantic_effect_site_from_call(call, :dynamic_dispatch, "#{call.receiver}.call") if variable_receiver?(call.receiver) + end + + const_effect_site_for_call(call, message) || + bare_effect_site_for_call(call, message) || + mutation_effect_site_for_call(call, message) + end + + def const_effect_site_for_call(call, message) + receiver = call.receiver.to_s + return nil if receiver.empty? || receiver == "self" + + lexicon = effect_lexicon + base = receiver.sub(/\A::/, "").split("::").first + return semantic_effect_site_from_call(call, :context_dependency, "Dir.#{message}") \ + if base == "Dir" && lexicon.dir_context.include?(message) + + if lexicon.io_consts.include?(base) || ruby_net_receiver?(receiver) + return semantic_effect_site_from_call(call, :hidden_io, "#{receiver.sub(/\A::/, "")}.#{message}") + end + return semantic_effect_site_from_call(call, :context_dependency, "ENV") if receiver == "ENV" + + if lexicon.context_pairs[base]&.include?(message) + return semantic_effect_site_from_call(call, :context_dependency, "#{base}.#{message}") + end + + nil + end + + def bare_effect_site_for_call(call, message) + return nil unless call.receiver.to_s == "self" + + lexicon = effect_lexicon + return semantic_effect_site_from_call(call, :hidden_io, message) \ + if lexicon.io_bare.include?(message) || GENERIC_SYSTEM_IO_BARE.include?(message) + return semantic_effect_site_from_call(call, :context_dependency, message) if lexicon.context_bare.include?(message) + + nil + end + + def mutation_effect_site_for_call(call, message) + return semantic_effect_site_from_call(call, :hidden_mutation, message) \ + if message.length > 1 && message.end_with?("!") && !%w[!= !~].include?(message) + + nil + end + + def effect_callback_call?(call, message) + (call.block || call.arguments.to_a.any? { |arg| arg.to_s.start_with?("&") }) && + effect_callback_name?(message) && + !effect_lexicon.meta_mids.include?(message) + end + + def effect_callback_name?(message) + effect_lexicon.callback_set.include?(message) || + message.match?(/\A(with_|around_|on_|before_|after_)/) || + message.match?(/_hook\z/) + end + + def method_object_receiver?(receiver) + names = effect_lexicon.method_obj_mids.map(&:to_s).map { |name| Regexp.escape(name) } + return false if names.empty? + + receiver.to_s.match?(/(?:\A|\.)(?:#{names.join("|")})\s*\(/) + end + + def variable_receiver?(receiver) + receiver.to_s.match?(/\A(?:[a-z_]\w*|[@$][A-Za-z_]\w*)\z/) + end + + def ruby_net_receiver?(_receiver) + false + end + + def semantic_effect_site_from_call(call, kind, detail) + SemanticEffectSite.new( + kind: kind, + detail: detail, + file: call.file, + function: call.function, + owner: call.owner, + line: call.line, + span: call.span + ) + end + + def semantic_effect_site(document, node, stack, kind, detail) + SemanticEffectSite.new( + kind: kind, + detail: detail, + file: document.file, + function: current_function(stack), + owner: current_owner(document, stack), + line: line(node), + span: span(node) + ) + end + end + + class TreeSitterAdapter + def semantic_effect_sites(document) + syntax_profile(document.language).semantic_effect_sites(document) + end + end + + GENERIC_SYSTEM_IO_BARE = %w[print println eprintln printf puts panic].freeze + COMMON_CALLBACK_SET = %w[transaction synchronize lock with_lock unlock + mutex atomic subscribe callback hook].freeze + + GENERIC_SYSTEM_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: [].freeze, + meta_mids: [].freeze, + method_obj_mids: [].freeze, + io_consts: [].freeze, + io_bare: GENERIC_SYSTEM_IO_BARE, + dir_context: [].freeze, + context_pairs: {}.freeze, + context_bare: [].freeze, + callback_set: [].freeze, + core_consts: [].freeze + ).freeze + + PYTHON_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[getattr setattr hasattr __getattr__ __setattr__ import_module].freeze, + meta_mids: %w[eval exec compile type globals locals vars setattr delattr].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[Path pathlib os sys subprocess socket shutil].freeze, + io_bare: %w[print input open exec eval].freeze, + dir_context: %w[getcwd home].freeze, + context_pairs: { + "time" => %w[time monotonic perf_counter], + "datetime" => %w[now today utcnow], + "random" => %w[random randint randrange choice] + }.freeze, + context_bare: %w[random randint randrange].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + + JAVASCRIPT_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[eval Function call apply bind].freeze, + meta_mids: %w[eval Function defineProperty defineProperties setPrototypeOf].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[console Console fs process Deno Bun].freeze, + io_bare: %w[setTimeout setInterval fetch require import].freeze, + dir_context: [].freeze, + context_pairs: { + "Date" => %w[now], + "Math" => %w[random], + "performance" => %w[now] + }.freeze, + context_bare: [].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + + GO_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[Call CallSlice Method MethodByName ValueOf TypeOf].freeze, + meta_mids: %w[Call CallSlice MethodByName New MakeFunc].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[os io ioutil fs net http exec syscall].freeze, + io_bare: %w[panic print println recover].freeze, + dir_context: %w[Getwd UserHomeDir].freeze, + context_pairs: { + "time" => %w[Now Since Until], + "rand" => %w[Int Intn Float64 Read] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[Lock Unlock RLock RUnlock Do Go Add Done Wait]).freeze, + core_consts: [].freeze + ).freeze + + RUST_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[downcast downcast_ref downcast_mut call call_mut call_once].freeze, + meta_mids: %w[transmute from_raw_parts from_raw_parts_mut].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[std tokio fs env process net io].freeze, + io_bare: %w[panic todo unimplemented unreachable].freeze, + dir_context: %w[current_dir home_dir].freeze, + context_pairs: { + "SystemTime" => %w[now], + "Instant" => %w[now] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[read write spawn await]).freeze, + core_consts: [].freeze + ).freeze + + ZIG_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[field fieldParentPtr ptrCast alignCast call].freeze, + meta_mids: %w[typeInfo TypeOf ptrCast intFromPtr ptrFromInt eval].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[std os fs process net Thread Mutex Atomic].freeze, + io_bare: %w[panic unreachable].freeze, + dir_context: [].freeze, + context_pairs: { + "time" => %w[timestamp nanoTimestamp milliTimestamp] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[spawn wait signal]).freeze, + core_consts: [].freeze + ).freeze + + LUA_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[load loadfile dofile require rawget rawset].freeze, + meta_mids: %w[setmetatable getmetatable debug eval load loadfile].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[io os debug package].freeze, + io_bare: %w[print error assert require collectgarbage].freeze, + dir_context: [].freeze, + context_pairs: { + "os" => %w[time clock date getenv], + "math" => %w[random] + }.freeze, + context_bare: [].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + + C_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[dlsym dlopen GetProcAddress].freeze, + meta_mids: %w[setjmp longjmp va_start va_arg].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[FILE DIR pthread mutex atomic].freeze, + io_bare: %w[printf fprintf fopen open read write close system exec abort exit assert].freeze, + dir_context: %w[getcwd getenv].freeze, + context_pairs: {}.freeze, + context_bare: %w[rand time clock].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[pthread_mutex_lock pthread_mutex_unlock]).freeze, + core_consts: [].freeze + ).freeze + + CPP_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[dynamic_cast typeid any_cast get_if visit invoke].freeze, + meta_mids: %w[reinterpret_cast const_cast dlsym dlopen].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[std filesystem fstream iostream thread mutex atomic].freeze, + io_bare: %w[throw abort exit assert system].freeze, + dir_context: %w[current_path].freeze, + context_pairs: { + "chrono" => %w[now], + "random_device" => %w[operator()] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[try_lock wait notify_one notify_all]).freeze, + core_consts: [].freeze + ).freeze + + CSHARP_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[Invoke GetMethod GetProperty GetField Activator CreateInstance].freeze, + meta_mids: %w[Invoke GetType Reflection Emit DynamicMethod].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[Console File Directory Path Process Socket HttpClient Environment].freeze, + io_bare: %w[throw].freeze, + dir_context: %w[CurrentDirectory GetEnvironmentVariable].freeze, + context_pairs: { + "DateTime" => %w[Now UtcNow Today], + "Guid" => %w[NewGuid], + "Random" => %w[Next NextDouble] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[Lock Monitor Enter Exit Wait Pulse]).freeze, + core_consts: [].freeze + ).freeze + + JAVA_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[invoke getMethod getDeclaredMethod getField getDeclaredField forName].freeze, + meta_mids: %w[invoke setAccessible newInstance Proxy].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Lock AtomicReference].freeze, + io_bare: %w[throw].freeze, + dir_context: %w[getProperty getenv].freeze, + context_pairs: { + "System" => %w[currentTimeMillis nanoTime getenv getProperty], + "Instant" => %w[now], + "UUID" => %w[randomUUID], + "Math" => %w[random] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[wait notify notifyAll submit execute]).freeze, + core_consts: [].freeze + ).freeze + + SWIFT_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[perform value setValue selector NSClassFromString].freeze, + meta_mids: %w[Mirror unsafeBitCast withUnsafePointer withUnsafeBytes].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[FileManager Process URLSession DispatchQueue Thread Lock NSLock].freeze, + io_bare: %w[print fatalError preconditionFailure assertionFailure].freeze, + dir_context: %w[currentDirectoryPath homeDirectoryForCurrentUser].freeze, + context_pairs: { + "Date" => %w[now], + "UUID" => %w[init] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[async sync]).freeze, + core_consts: [].freeze + ).freeze + + KOTLIN_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[invoke call callBy memberProperties declaredMemberFunctions].freeze, + meta_mids: %w[reflection javaClass Class forName setAccessible].freeze, + method_obj_mids: %w[method].freeze, + io_consts: %w[System File Files Paths ProcessBuilder Socket HttpClient Thread Mutex AtomicReference].freeze, + io_bare: %w[println print error check require TODO].freeze, + dir_context: %w[getProperty getenv].freeze, + context_pairs: { + "System" => %w[currentTimeMillis nanoTime getenv getProperty], + "Instant" => %w[now], + "UUID" => %w[randomUUID], + "Random" => %w[nextInt nextLong nextDouble] + }.freeze, + context_bare: [].freeze, + callback_set: (COMMON_CALLBACK_SET + %w[synchronized launch async await]).freeze, + core_consts: [].freeze + ).freeze + + PHP_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[call_user_func call_user_func_array __call __callStatic].freeze, + meta_mids: %w[eval ReflectionClass ReflectionMethod ReflectionFunction class_alias].freeze, + method_obj_mids: %w[Closure fromCallable].freeze, + io_consts: %w[FilesystemIterator DirectoryIterator PDO mysqli].freeze, + io_bare: %w[print printf fopen file_get_contents file_put_contents exec shell_exec system passthru die exit trigger_error].freeze, + dir_context: %w[getcwd getenv].freeze, + context_pairs: { + "DateTime" => %w[createFromFormat], + "DateTimeImmutable" => %w[createFromFormat], + "random_int" => %w[call] + }.freeze, + context_bare: %w[time microtime random_int rand mt_rand].freeze, + callback_set: COMMON_CALLBACK_SET, + core_consts: [].freeze + ).freeze + + class TreeSitterLanguageAdapter + private + + def effect_lexicon + GENERIC_SYSTEM_EFFECT_LEXICON + end + end + + class RustSyntaxAdapter + private + + def effect_lexicon + RUST_EFFECT_LEXICON + end + end + + class ZigSyntaxAdapter + private + + def effect_lexicon + ZIG_EFFECT_LEXICON + end + end + + class PythonSyntaxAdapter + private + + def effect_lexicon + PYTHON_EFFECT_LEXICON + end + end + + class JavaScriptSyntaxAdapter + private + + def effect_lexicon + JAVASCRIPT_EFFECT_LEXICON + end + end + + class TypeScriptSyntaxAdapter + private + + def effect_lexicon + JAVASCRIPT_EFFECT_LEXICON + end + end + + class GoSyntaxAdapter + private + + def effect_lexicon + GO_EFFECT_LEXICON + end + end + + class LuaSyntaxAdapter + private + + def effect_lexicon + LUA_EFFECT_LEXICON + end + end + + class CSyntaxAdapter + private + + def effect_lexicon + C_EFFECT_LEXICON + end + end + + class CppSyntaxAdapter + private + + def effect_lexicon + CPP_EFFECT_LEXICON + end + end + + class CSharpSyntaxAdapter + private + + def effect_lexicon + CSHARP_EFFECT_LEXICON + end + end + + class JavaSyntaxAdapter + private + + def effect_lexicon + JAVA_EFFECT_LEXICON + end + end + + class SwiftSyntaxAdapter + private + + def effect_lexicon + SWIFT_EFFECT_LEXICON + end + end + + class KotlinSyntaxAdapter + private + + def effect_lexicon + KOTLIN_EFFECT_LEXICON + end + end + + class PhpSyntaxAdapter + private + + def effect_lexicon + PHP_EFFECT_LEXICON + end + end + end +end + +require_relative "ruby_effects" diff --git a/gems/decomplex/lib/decomplex/syntax/go.rb b/gems/decomplex/lib/decomplex/syntax/go.rb new file mode 100644 index 000000000..c6b0b96f7 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/go.rb @@ -0,0 +1,224 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + GO_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\bnil\b/, + /\.\(type\)/, + /\.\([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*\)/ + ].freeze, + diagnostic_patterns: [ + /\bpanic\s*\(/, + /\breturn\s+error[.\w]*/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break|continue|fallthrough)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class GoSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration method_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[selector_expression identifier].freeze + GENERIC_OWNER_NODE_KINDS = %w[type_spec].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + METHOD_PARAMETER_LIST_NODE_KINDS = %w[parameter_list].freeze + METHOD_RECEIVER_NODE_KINDS = %w[method_declaration].freeze + FUNCTION_BODY_NODE_KINDS = %w[block statement_list].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[statement_list].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[expression_list literal_element].freeze + INDEXED_LHS_NODE_KINDS = %w[index_expression slice_expression].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[short_var_declaration range_clause var_declaration variable_declaration].freeze + SHORT_VARIABLE_DECLARATION_NODE_KINDS = %w[short_var_declaration range_clause].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[expression_list var_spec variable_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter_declaration function_declaration method_declaration type_spec].freeze + RECEIVER_TYPE_NODE_KINDS = %w[pointer_type type_identifier].freeze + RECEIVER_PARAMETER_NODE_KINDS = %w[parameter_declaration].freeze + FIRST_ARGUMENT_RECEIVER_TYPE_NODE_KINDS = %w[type_identifier pointer_type].freeze + FIRST_ARGUMENT_RECEIVER_NAME_NODE_KINDS = %w[identifier field_identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block statement_list].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement expression_switch_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[expression_switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[expression_switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[block statement_list].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[expression_case].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[expression_case].freeze + CASE_PATTERN_NODE_KINDS = [].freeze + CASE_SUBJECT_NODE_KINDS = [].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration method_declaration type_spec].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[expression_case else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[expression_list].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier field_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = %w[expression_list].freeze + FIELD_LIKE_NODE_KINDS = %w[selector_expression expression_list].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + exported_name_visibility(function_name(node)) + end + + def function_params(node) + lists = node.named_children.select { |child| child.kind == "parameter_list" } + params = node.kind == "method_declaration" ? lists[1] : lists.first + return super unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + + def call_target(document, node) + return generic_call_target(document, node) if call_node_kinds.include?(node.kind) + return go_adjacent_call_target(node) if adjacent_call_node_kinds.include?(node.kind) + + nil + end + + def state_read_target(node) + go_literal_element_member_target(node) || super + end + + def generic_function_body_statements(node) + body = generic_function_body_node(node) + return super unless body + + named = body.named_children.reject { |child| comment_node?(child) } + if named.size == 1 && named.first.kind == "statement_list" && go_adjacent_call_statement?(named.first) + return [named.first] + end + + super + end + + def generic_local_identifier_text(node) + name = super + name == "_" ? nil : name + end + + def generic_local_declaration_text(node) + return nil if node.text == "_" + + super + end + + def generic_local_write_node?(node) + go_update_statement_target?(node) || super + end + + def skip_local_read_identifier?(node) + go_keyed_element_key?(node) || super + end + + def generic_local_declaration_name_nodes(node) + return go_var_spec_name_nodes(node) if node.kind == "var_declaration" + + super + end + + def indexed_lhs_node?(node) + super || (node.kind == "expression_list" && node.children.any? { |child| !child.named? && child.text == "[" }) + end + + def suppress_field_receiver_lhs_reads? + true + end + + def field_assignment_writes_receiver? + true + end + + private + + def boolean_container?(node) + return true if boolean_expression_list?(node, "&&") + + super + end + + def go_update_statement_target?(node) + parent = parent_node(node) + return false unless parent && %w[inc_statement dec_statement].include?(parent.kind) + + parent.named_children.first == node + end + + def go_adjacent_call_statement?(node) + named = node.named_children.reject { |child| comment_node?(child) } + named.size == 2 && + adjacent_call_node_kinds.include?(named.first.kind) && + argument_list_node_kinds.include?(named.last.kind) + end + + def go_adjacent_call_target(node) + target = adjacent_argument_call_target(node) + return nil unless target + + args = next_sibling(node) || next_sibling(parent_node(node)) + source = go_adjacent_call_source_node(node, args) + target.merge(source_node: source) + end + + def go_adjacent_call_source_node(node, args) + parent = parent_node(node) + return node unless parent && args + + call_text = "#{node.text}#{args.text}" + parent.text.to_s.include?(call_text) ? parent : node + end + + def go_keyed_element_key?(node) + parent = parent_node(node) + return false unless parent&.kind == "keyed_element" + + parent.named_children.first == node + end + + def go_literal_element_member_target(node) + return nil unless node.kind == "literal_element" + return nil if go_keyed_element_key?(node) + + receiver, field = node.named_children + return nil unless receiver && field + return nil unless generic_identifier?(receiver) && field_identifier_node_kinds.include?(field.kind) + + { receiver: normalize_text(receiver.text), field: field.text } + end + + def go_var_spec_name_nodes(node) + go_var_spec_nodes(node).flat_map do |spec| + names = spec.named_children.take_while { |child| child.kind == "identifier" } + names.empty? ? [] : names + end + end + + def go_var_spec_nodes(node) + return [node] if node.kind == "var_spec" + + node.named_children.flat_map { |child| go_var_spec_nodes(child) } + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/java.rb b/gems/decomplex/lib/decomplex/syntax/java.rb new file mode 100644 index 000000000..d63befb78 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/java.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + JAVA_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /\binstanceof\b/, + /\bObjects\.(?:isNull|nonNull|requireNonNull)\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\bassert\b/, + /\bSystem\.exit\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class JavaSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[method_declaration].freeze + CALL_NODE_KINDS = %w[method_invocation].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[formal_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block].freeze + IDENTIFIER_NODE_KINDS = %w[identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[local_variable_declaration variable_declarator].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[variable_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[formal_parameter variable_declarator method_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[method_invocation expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement enhanced_for_statement switch_expression].freeze + LOOP_NODE_KINDS = %w[enhanced_for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_expression].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_expression].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_block_statement_group].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_block_statement_group].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[method_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_block_statement_group else line_comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = %w[method_invocation].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[field_access].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_params(node) + return super unless node.kind == "method_declaration" + + params = node.named_children.find { |child| child.kind == "formal_parameters" } + return super unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + end + + class JavaSyntaxAdapter + private + + def control_context(node) + return :iterates if node.kind == "enhanced_for_statement" + + super + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/javascript.rb b/gems/decomplex/lib/decomplex/syntax/javascript.rb new file mode 100644 index 000000000..d802dce93 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/javascript.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + JAVASCRIPT_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\b(?:null|undefined)\b/].freeze, + type_guard_patterns: [ + /\btypeof\b/, + /\binstanceof\b/, + /(?:\?\.|\b(?:==|!=|===|!==)\s*(?:null|undefined)\b)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\bprocess\.exit\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|undefined|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|undefined|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class JavaScriptSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration method_definition].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[member_expression identifier property_identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[formal_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[statement_block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = [].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[property_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[lexical_declaration variable_declarator].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declarator].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = %w[variable_declarator].freeze + FIELD_DECLARATION_NODE_KINDS = %w[public_field_definition].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[formal_parameters variable_declarator method_definition function_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression augmented_assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %= &&= ||=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[statement_block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_in_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[for_in_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_case].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_case].freeze + CASE_PATTERN_NODE_KINDS = [].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration method_definition class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_case else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + BOUND_CONTAINER_PARENT_NODE_KINDS = %w[lexical_declaration public_field_definition].freeze + BOUND_CONTAINER_NAME_NODE_KINDS = %w[identifier property_identifier].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = [].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier property_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[member_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || private_name_visibility(node) + end + + private + + def private_name_visibility(node) + function_name(node).to_s.start_with?("#") ? :private : :public + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/kotlin.rb b/gems/decomplex/lib/decomplex/syntax/kotlin.rb new file mode 100644 index 000000000..be46b957e --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/kotlin.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + KOTLIN_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /(?:\?\.|\?\?)/, + /\b(?:is|as\?)(?:\s|$)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:error|require|check|assert|TODO)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class KotlinSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[navigation_expression directly_assignable_expression simple_identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[function_value_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[function_body statements].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[statements].freeze + IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[simple_identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[directly_assignable_expression value_argument].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[property_declaration variable_declaration].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration directly_assignable_expression].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[property_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declaration property_declaration function_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression jump_expression].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[statements control_structure_body function_body].freeze + COMPARISON_NODE_KINDS = %w[equality_expression comparison_expression conjunction_expression additive_expression multiplicative_expression].freeze + BRANCH_NODE_KINDS = %w[if_expression for_statement when_expression].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[when_expression].freeze + HIDDEN_CASE_WRAPPER_NODE_KINDS = %w[statements].freeze + HIDDEN_CASE_TOKEN_KINDS = %w[when].freeze + BRANCH_CASE_NODE_KINDS = %w[when_expression statements].freeze + IF_NODE_KINDS = %w[if_expression].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[statements].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[when_entry].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[when_entry].freeze + CASE_PATTERN_NODE_KINDS = %w[when_condition pattern].freeze + CASE_SUBJECT_NODE_KINDS = %w[when_subject].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[when_entry else line_comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default else].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[conjunction_expression equality_expression comparison_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[statements pattern].freeze + ARGUMENT_LIST_NODE_KINDS = %w[call_suffix value_argument].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[this self].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + NAVIGATION_SUFFIX_NODE_KINDS = %w[navigation_suffix].freeze + FIELD_LIKE_NODE_KINDS = %w[navigation_expression directly_assignable_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def state_read_target(node) + kotlin_value_argument_state_target(node) || super + end + + private + + def kotlin_value_argument_state_target(node) + return nil unless ts_node?(node) && node.kind == "value_argument" + + suffix = node.named_children.find { |child| navigation_suffix_node_kinds.include?(child.kind) } + receiver = node.named_children.find { |child| child != suffix } + field = member_field_text(suffix) + return nil unless receiver && field + return nil if namespace_receiver?(receiver.text) + + { receiver: normalize_text(receiver.text), field: field } + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/lua.rb b/gems/decomplex/lib/decomplex/syntax/lua.rb new file mode 100644 index 000000000..4d66e0cc6 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/lua.rb @@ -0,0 +1,209 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + LUA_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\btype\s*\(/, + /\bnil\b/, + /\b(?:pcall|xpcall)\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\berror\s*\(/, + /\bassert\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class LuaSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[function_call method_call].freeze + ADJACENT_CALL_NODE_KINDS = %w[dot_index_expression method_index_expression identifier expression_list variable_list].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[block].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[variable_declaration].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration variable_list].freeze + DECLARATION_ASSIGNMENT_NODE_KINDS = %w[assignment_statement].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameters variable_declaration function_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_statement].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[function_call expression_list return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[block].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[and &&].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[expression_list].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[pub public].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = %w[expression_list].freeze + FIELD_LIKE_NODE_KINDS = %w[dot_index_expression variable_list].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_name(node) + lua_method_name(node) || super + end + + def receiver_owner_name(node) + lua_method_owner_name(node) || super + end + + def call_target(document, node) + lua_method_call_target(node) || + lua_expression_list_call_target(node) || + lua_adjacent_member_call_target(node) || + super + end + + def state_read_target(node) + target = lua_expression_list_member_target(node) || lua_single_return_member_target(node) || super + return nil if target && target[:receiver] == "_" && target[:field] == "_" + + target + end + + def generated_prelude?(document, node) + return false unless line(node) == 1 + + first_line = document.lines.first.to_s + first_line.include?("_tl_compat") && first_line.include?("compat53.module") + end + + private + + def boolean_container?(node) + return true if boolean_expression_list?(node, "and") + + super + end + + def lua_method_name(node) + method = lua_method_index_expression(node) + return nil unless method + + method.named_children.last&.text + end + + def lua_method_owner_name(node) + method = lua_method_index_expression(node) + return nil unless method + + method.named_children.first&.text + end + + def lua_method_index_expression(node) + return nil unless node.kind == "function_declaration" + + node.named_children.find { |child| child.kind == "method_index_expression" } + end + + def lua_expression_list_call_target(node) + return nil unless node.kind == "expression_list" + + callee = node.named_children.find { |child| field_like_node?(child) } + args = node.named_children.find { |child| child.kind == "arguments" } + return nil unless callee && args + + target_from_callee(callee).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) + rescue StandardError + nil + end + + def lua_method_call_target(node) + if node.kind == "function_call" + callee = node.named_children.find { |child| child.kind == "method_index_expression" } + args = node.named_children.find { |child| child.kind == "arguments" } + return nil unless callee && args + + return lua_method_target(callee, args) + end + + return nil if call_node_ancestor?(node) + return nil unless node.kind == "method_index_expression" + + args = next_sibling(node) + return nil unless args&.kind == "arguments" + + lua_method_target(node, args) + rescue StandardError + nil + end + + def lua_method_target(callee, args) + receiver = callee.named_children.first + message = callee.named_children.last + return nil unless receiver && message + + { + receiver: normalize_text(receiver.text), + message: normalize_text(message.text), + arguments: args.named_children.map { |child| normalize_text(child.text) } + } + end + + def lua_expression_list_member_target(node) + return nil unless node.kind == "expression_list" + + children = node.named_children + return nil unless children.size == 2 + return nil unless field_like_node?(children.first) && identifier_node_kinds.include?(children.last.kind) + + { receiver: normalize_text(children.first.text), field: children.last.text } + end + + def lua_adjacent_member_call_target(node) + return nil if call_node_ancestor?(node) + return nil unless node.kind == "identifier" + + args = next_sibling(node) + return nil unless args&.kind == "arguments" + + parent = parent_node(node) + return nil unless parent && field_like_node?(parent) + + target_from_callee(parent).merge(arguments: args.named_children.map { |child| normalize_text(child.text) }) + rescue StandardError + nil + end + + def lua_single_return_member_target(node) + return nil unless node.kind == "expression_list" + + text = normalize_text(node.text) + if (match = text.match(/\A([A-Za-z_]\w*)\.([A-Za-z_]\w*)\z/)) + return { receiver: match[1], field: match[2] } + end + + parent = parent_node(node) + return nil unless parent&.kind == "block" + return nil unless prev_sibling(node)&.kind.to_s == "return" || + parent.children.first&.kind.to_s == "return" + + return nil unless node.named_children.size == 1 + child = node.named_children.first + return nil unless field_like_node?(child) + + generic_state_read_target(child) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/nil_guards.rb b/gems/decomplex/lib/decomplex/syntax/nil_guards.rb new file mode 100644 index 000000000..d61ac5835 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/nil_guards.rb @@ -0,0 +1,549 @@ +# frozen_string_literal: true + +require "set" + +module Decomplex + module Syntax + NilGuardFinding = Struct.new(:file, :defn, :line, :span, :local, :guard, + :proof, keyword_init: true) do + def to_h + loc = "#{file}:#{defn}:#{line}" + super.merge(at: loc, spans: { loc => span }) + end + end + + class Document + def redundant_nil_guard_findings + @redundant_nil_guard_findings ||= adapter.redundant_nil_guard_findings(self) + end + end + + class TreeSitterLanguageAdapter + def redundant_nil_guard_findings(document) + NilGuardAnalyzer.new(document).scan + end + end + + class TreeSitterAdapter + def redundant_nil_guard_findings(document) + syntax_profile(document.language).redundant_nil_guard_findings(document) + end + end + + class NilGuardAnalyzer + Flow = Struct.new(:known, :terminated, keyword_init: true) + NilFact = Struct.new(:local, :non_nil_when_true, keyword_init: true) + + TERMINATING_CALLS = %w[raise fail abort exit exit!].freeze + + attr_reader :document, :findings + + def initialize(document) + @document = document + @findings = [] + end + + def scan + document.function_defs.each do |function| + process_block(method_statements(function.body), function.name, Set.new) + end + findings + end + + private + + def process_block(stmts, function, known) + current = known.dup + stmts.each do |stmt| + flow = process_stmt(stmt, function, current) + current = flow.known + return flow if flow.terminated + end + Flow.new(known: current, terminated: false) + end + + def process_stmt(node, function, known) + return Flow.new(known: known.dup, terminated: false) unless ts_node?(node) + + if if_node?(node) + process_branch(node, function, known) + elsif assignment_node?(node) + inspect_node(assignment_rhs(node), function, known) + next_known = known.dup + next_known.delete(assignment_lhs_name(node).to_s) + Flow.new(known: next_known, terminated: false) + else + inspect_node(node, function, known) + Flow.new(known: known.dup, terminated: terminating?(node)) + end + end + + def process_branch(node, function, known) + cond = branch_condition(node) + inspect_node(cond, function, known) + + then_known = known_for_branch(node, true, cond, known) + else_known = known_for_branch(node, false, cond, known) + then_flow = process_block(stmts_for(branch_then_body(node)), function, then_known) + else_flow = process_block(stmts_for(branch_else_body(node)), function, else_known) + + if then_flow.terminated && else_flow.terminated + Flow.new(known: Set.new, terminated: true) + elsif then_flow.terminated + Flow.new(known: else_flow.known, terminated: false) + elsif else_flow.terminated + Flow.new(known: then_flow.known, terminated: false) + else + Flow.new(known: then_flow.known & else_flow.known, terminated: false) + end + end + + def known_for_branch(node, body_branch, cond, known) + next_known = known.dup + cond_true_branch = unless_node?(node) ? !body_branch : body_branch + branch_nil_facts(cond, cond_true_branch).each { |fact| next_known.add(fact.local) } + next_known + end + + def inspect_node(node, function, known) + return unless ts_node?(node) + + recorded = record_redundant(node, function, known) + return if recorded && safe_navigation_call?(node) + return if method_like_node?(node) + + node.children.each { |child| inspect_node(child, function, known) } + end + + def record_redundant(node, function, known) + local = redundant_nil_subject(node, known) + return false unless local + + @findings << NilGuardFinding.new( + file: document.file, + defn: function, + line: line(node), + span: span(node), + local: local, + guard: normalize_text(node.text), + proof: "#{local} is already proven non-nil on this path" + ) + true + end + + def redundant_nil_subject(node, known) + subject = safe_navigation_subject(node) + return subject if subject && known.include?(subject) + + fact = nil_fact(node) + return nil unless fact && known.include?(fact.local) + + fact.local + end + + def nil_fact(node) + return nil unless ts_node?(node) + return nil_fact(node.named_children.first) if parenthesized_wrapper?(node) + + if nil_predicate_call?(node) + subject = subject_key(call_receiver_node(node)) + return subject ? NilFact.new(local: subject, non_nil_when_true: false) : nil + end + if non_nil_predicate_call?(node) + subject = subject_key(call_receiver_node(node)) + return subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil + end + + return negated_nil_fact(node.named_children.first) if unary_not?(node) + + comparison_nil_fact(node) + end + + def branch_nil_facts(node, cond_truth) + return [] unless ts_node?(node) + return branch_nil_facts(node.named_children.first, cond_truth) if parenthesized_wrapper?(node) + + if boolean_and?(node) + return [] unless cond_truth + + return flatten_boolean_and(node).flat_map { |child| branch_nil_facts(child, true) } + end + + return branch_nil_facts(node.named_children.first, !cond_truth) if unary_not?(node) + + safe_receiver = safe_nav_receiver_fact(node) + return [safe_receiver] if safe_receiver && cond_truth + + fact = nil_fact(node) + return [fact] if fact && cond_truth == fact.non_nil_when_true + + truthy = truthy_subject_fact(node) + truthy && cond_truth ? [truthy] : [] + end + + def safe_nav_receiver_fact(node) + subject = safe_navigation_subject(node) + subject ? NilFact.new(local: subject, non_nil_when_true: true) : nil + end + + def truthy_subject_fact(node) + subject = subject_key(node) + return nil unless subject + + NilFact.new(local: subject, non_nil_when_true: true) + end + + def negated_nil_fact(node) + fact = nil_fact(node) + return nil unless fact + + NilFact.new(local: fact.local, + non_nil_when_true: !fact.non_nil_when_true) + end + + def comparison_nil_fact(node) + return nil unless ts_node?(node) && node.kind == "binary" + + operator = direct_operator(node) + return nil unless %w[== !=].include?(operator) + + left, right = node.named_children + subject = nil + if nil_literal?(right) + subject = subject_key(left) + elsif nil_literal?(left) + subject = subject_key(right) + end + return nil unless subject + + NilFact.new(local: subject, non_nil_when_true: operator == "!=") + end + + def method_statements(node) + body = method_body_node(node) + return [] unless body + + stmts_for(body) + end + + def method_body_node(node) + return nil unless ts_node?(node) + + case node.kind + when "method", "singleton_method", "argument_list", "function_definition", "function_item", + "function_declaration", "method_declaration" + node.named_children.reverse.find do |child| + %w[body_statement block compound_statement function_body statement_block].include?(child.kind) + end + when "body_statement", "block", "compound_statement", "function_body", "statement_block" + if method_like_node?(node) + node.named_children.reverse.find do |child| + %w[body_statement block compound_statement function_body statement_block].include?(child.kind) + end + else + node + end + end + end + + def stmts_for(node) + return [] unless ts_node?(node) + return [node] if if_node?(node) + return [node] if assignment_node?(node) + return [node] if call_node?(node) + + named = node.named_children.reject { |child| child.kind == "comment" } + if named.size == 1 && %w[statements statement_list].include?(named.first.kind) + return [named.first] if if_node?(named.first) + + named = named.first.named_children.reject { |child| child.kind == "comment" } + end + return [node] if named.empty? && !node.text.to_s.strip.empty? + + named + end + + def if_node?(node) + return false unless ts_node?(node) + return true if %w[if unless if_statement if_expression if_modifier unless_modifier].include?(node.kind) && node.named_children.any? + return true if node.kind == "expression_statement" && node.text.to_s.lstrip.start_with?("if ") + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + first_token = node.children.first + return true if first_token && !first_token.named? && %w[if unless].include?(first_token.kind.to_s) + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def unless_node?(node) + node.kind.to_s.include?("unless") || first_token_kind(node) == "unless" + end + + def modifier_if_node?(node) + return true if %w[if_modifier unless_modifier].include?(node.kind) + return false unless %w[body_statement block statements statement_list].include?(node.kind) + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind.to_s) + end + end + + def branch_condition(node) + modifier_if_node?(node) ? node.named_children.last : node.named_children.first + end + + def branch_then_body(node) + if modifier_if_node?(node) + node.named_children.first + else + node.named_children.find { |child| child.kind == "then" } || node.named_children[1] + end + end + + def branch_else_body(node) + return nil if modifier_if_node?(node) + + node.named_children.find { |child| %w[else elsif].include?(child.kind) } || node.named_children[2] + end + + def assignment_node?(node) + ts_node?(node) && (%w[assignment assignment_expression assignment_statement].include?(node.kind) || flat_assignment_statement?(node)) + end + + def assignment_lhs_name(node) + assignment_lhs(node)&.text + end + + def assignment_lhs(node) + node.named_children.first if assignment_node?(node) + end + + def assignment_rhs(node) + node.named_children[1] if assignment_node?(node) + end + + def flat_assignment_statement?(node) + return false unless ts_node?(node) && node.kind == "body_statement" + + node.children.count { |child| !child.named? && child.text == "=" } == 1 && + node.named_children.size >= 2 + end + + def nil_predicate_call?(node) + call_node?(node) && %w[nil? is_none is_null isNull].include?(call_message(node).to_s) + end + + def non_nil_predicate_call?(node) + call_node?(node) && %w[is_some isSome present?].include?(call_message(node).to_s) + end + + def safe_navigation_call?(node) + ts_node?(node) && node.kind == "call" && + node.children.any? { |child| !child.named? && child.text == "&." } + end + + def safe_navigation_subject(node) + return nil unless safe_navigation_call?(node) + + subject_key(call_receiver_node(node)) + end + + def call_receiver_node(node) + return nil unless call_node?(node) + + if adjacent_field_call?(node) + return named_field(node, "object") || named_field(node, "receiver") || + named_field(node, "expression") || named_field(node, "operand") || + node.named_children.first + end + + if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) + if node.kind == "call" + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.first if names.size >= 2 + end + + if %w[invocation_expression method_invocation].include?(node.kind) + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.first if names.size >= 2 + end + + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + if field_like_node?(callee) + return named_field(callee, "object") || named_field(callee, "receiver") || + named_field(callee, "expression") || named_field(callee, "operand") || + callee.named_children.first + end + end + + node.named_children.first + end + + def call_message(node) + return nil unless call_node?(node) + + if adjacent_field_call?(node) + field = named_field(node, "field") || named_field(node, "property") || + named_field(node, "name") || named_field(node, "suffix") || + node.named_children.last + return field&.text.to_s.sub(/\A[.?]+/, "") + end + + if %w[call call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) + if node.kind == "call" + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names.last.text if names.size >= 2 + end + + if %w[invocation_expression method_invocation].include?(node.kind) + names = node.named_children.select { |child| %w[identifier simple_identifier].include?(child.kind) } + return names[1].text if names.size >= 2 + end + + callee = named_field(node, "function") || named_field(node, "callee") || node.named_children.first + if field_like_node?(callee) + field = named_field(callee, "field") || named_field(callee, "property") || + named_field(callee, "name") || named_field(callee, "suffix") || + callee.named_children.last + return field&.text.to_s.sub(/\A[.?]+/, "") + end + return callee.text if %w[identifier simple_identifier].include?(callee&.kind) + end + + node.named_children.reverse.find { |child| %w[identifier simple_identifier].include?(child.kind) }&.text + end + + def call_has_arguments?(node) + ts_node?(node) && + (node.named_children.any? { |child| %w[argument_list arguments call_suffix].include?(child.kind) } || + %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind)) + end + + def subject_key(node) + return nil unless ts_node?(node) + + case node.kind + when "identifier", "simple_identifier" + node.text + when "self", "this" + "self" + when "call", "call_expression", "function_call", "method_invocation", "invocation_expression", "method_call" + return nil if call_has_arguments?(node) + + receiver = call_receiver_node(node) + message = call_message(node) + return nil unless message && stable_reader_name?(message) + return "self.#{message}" if receiver&.kind == "self" + + recv_key = subject_key(receiver) + recv_key ? "#{recv_key}.#{message}" : nil + else + nil + end + end + + def stable_reader_name?(name) + text = name.to_s + !(text.end_with?("=", "!") || text == "[]") + end + + def nil_literal?(node) + ts_node?(node) && node.kind == "nil" + end + + def unary_not?(node) + ts_node?(node) && node.kind == "unary" && + node.children.any? { |child| !child.named? && child.text == "!" } + end + + def parenthesized_wrapper?(node) + ts_node?(node) && %w[condition_clause parenthesized_expression parenthesized_statements].include?(node.kind) && + node.named_children.size == 1 + end + + def boolean_and?(node) + ts_node?(node) && node.kind == "binary" && direct_operator(node) == "&&" + end + + def flatten_boolean_and(node) + return [node] unless boolean_and?(node) + + node.named_children.flat_map { |child| flatten_boolean_and(child) } + end + + def direct_operator(node) + node.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s + end + + def terminating?(node) + return false unless ts_node?(node) + return true if %w[return break next].include?(node.kind) + return true if node.text.to_s.strip.match?(/\A(?:return|break|next)\b/) + return true if node.kind == "identifier" && TERMINATING_CALLS.include?(node.text.to_s) + + call_node?(node) && TERMINATING_CALLS.include?(call_message(node).to_s) + end + + def method_like_node?(node) + ts_node?(node) && %w[method singleton_method function_definition function_item function_declaration method_declaration].include?(node.kind) + end + + def call_node?(node) + ts_node?(node) && + (%w[call argument_list call_expression function_call invocation_expression method_invocation method_call].include?(node.kind) || + adjacent_field_call?(node)) + end + + def adjacent_field_call?(node) + field_like_node?(node) && %w[argument_list arguments call_suffix].include?(next_sibling(node)&.kind) + end + + def next_sibling(node) + node.next_sibling + rescue StandardError + nil + end + + def first_token_kind(node) + node.children.find { |child| !child.named? }&.kind.to_s + end + + def line(node) + node.start_point.row + 1 + end + + def span(node) + [node.start_point.row + 1, node.start_point.column, node.end_point.row + 1, node.end_point.column] + end + + def normalize_text(text) + text.to_s.lines.map(&:strip).reject(&:empty?).join(" ") + end + + def named_field(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def field_like_node?(node) + ts_node?(node) && + %w[ + attribute directly_assignable_expression dot_index_expression expression_list field field_access + field_expression member_access_expression member_expression navigation_expression scoped_identifier + selector_expression variable_list + ].include?(node.kind) + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/php.rb b/gems/decomplex/lib/decomplex/syntax/php.rb new file mode 100644 index 000000000..f3624c247 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/php.rb @@ -0,0 +1,525 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + PHP_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/i].freeze, + type_guard_patterns: [ + /\bnull\b/i, + /\b(?:is_null|isset|empty|is_a|instanceof)\s*(?:\(|\b)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:die|exit|trigger_error)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue)\s*;?\z/i, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/i + ].freeze + ).freeze + + class PhpSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_definition method_declaration].freeze + CALL_NODE_KINDS = %w[function_call_expression member_call_expression scoped_call_expression print_intrinsic].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[formal_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[compound_statement declaration_list].freeze + IDENTIFIER_NODE_KINDS = %w[name variable_name].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[name variable_name simple_parameter].freeze + LOCAL_DECLARATION_NODE_KINDS = [].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[property_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[simple_parameter method_declaration function_definition class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression augmented_assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[function_call_expression member_call_expression scoped_call_expression expression_statement return_statement print_intrinsic].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[compound_statement declaration_list].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement foreach_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[foreach_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[case_statement].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_statement].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition method_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_statement else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments argument].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[name variable_name].freeze + SELF_RECEIVER_NAMES = %w[$this this self].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + FIELD_LIKE_NODE_KINDS = %w[ + member_access_expression nullsafe_member_access_expression member_call_expression + class_constant_access_expression + ].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_name(node) + return php_name_text(named_field(node, "name") || node.named_children.find { |child| child.kind == "name" }) if %w[ + function_definition method_declaration + ].include?(node.kind) + + super + end + + def owner_name_from_declaration(document, node) + return php_name_text(named_field(node, "name") || node.named_children.find { |child| child.kind == "name" }) if node.kind == "class_declaration" + + super + end + + def visibility(_document, node) + modifier = node.named_children.find { |child| child.kind == "visibility_modifier" } + return modifier.text.to_sym if modifier && %w[public private protected].include?(modifier.text) + + :public + end + + def function_params(node) + params = named_field(node, "parameters") || + node.named_children.find { |child| child.kind == "formal_parameters" } + return super unless params + + params.named_children.filter_map { |param| php_parameter_name(param) }.uniq + end + + def call_target(document, node) + php_call_target(node) || super + end + + def state_read_target(node) + return nil if php_assignment_lhs?(node) + + php_argument_member_target(node) || super + end + + def state_declaration(node) + php_property_declaration(node) || super + end + + def predicate_def(document, function_def) + predicate = super + return nil unless predicate + + PredicateDef.new( + file: predicate.file, + name: predicate.name, + owner: predicate.owner, + body: php_normalize_source(predicate.body), + line: predicate.line, + span: predicate.span + ) + end + + def path_condition_sites(document) + super.map do |site| + PathConditionSite.new( + guards: site.guards.map { |guard| php_normalize_source(guard) }, + action: php_normalize_source(site.action), + file: site.file, + function: site.function, + line: site.line, + span: site.span + ) + end + end + + def local_contract_assignments(document, method) + super.transform_values { |source| php_normalize_source(source) } + end + + def redundant_nil_guard_findings(document) + findings = [] + document.function_defs.each do |function_def| + php_nil_guard_walk(document, function_def.body, function_def.name, Set.new, findings) + end + findings + end + + private + + def php_call_target(node) + return php_print_target(node) if node.kind == "print_intrinsic" + return nil unless %w[function_call_expression member_call_expression scoped_call_expression].include?(node.kind) + + names = node.named_children.select do |child| + php_name_node?(child) || child.kind == "variable_name" || child.kind == "member_access_expression" + end + args = node.named_children.find { |child| child.kind == "arguments" } + + case node.kind + when "member_call_expression" + receiver = php_member_receiver(node) || names.first + message = php_member_name(node) || names[1] + return nil unless receiver && message + + { + receiver: php_normalize_receiver(php_identifier_text(receiver)), + message: php_name_text(message), + arguments: php_argument_texts(args) + } + when "scoped_call_expression" + receiver = names.first + message = names[1] + return nil unless receiver && message + + { + receiver: php_name_text(receiver), + message: php_name_text(message), + arguments: php_argument_texts(args) + } + when "function_call_expression" + name = names.first + return nil unless name + + { + receiver: "self", + message: php_name_text(name), + arguments: php_argument_texts(args) + } + end + end + + def php_print_target(node) + { + receiver: "self", + message: "print", + arguments: node.named_children.map { |child| php_print_argument_text(child) } + } + end + + def conjunction_predicate(node) + php_normalize_source(super) + end + + def branch_predicate(node) + php_normalize_source(super) + end + + def php_property_declaration(node) + return nil unless node.kind == "property_declaration" + + property = node.named_children.find { |child| child.kind == "property_element" } + name = property&.named_children&.find { |child| child.kind == "variable_name" } + return nil unless name + + { field: php_identifier_text(name), type: declared_type_text(node, name) } + end + + def php_parameter_name(param) + variable = param.named_children.find { |child| child.kind == "variable_name" } + php_identifier_text(variable) || php_identifier_text(param) + end + + def generic_identifier?(node) + super || (ts_node?(node) && %w[name variable_name].include?(node.kind)) + end + + def generic_local_identifier_text(node) + return php_identifier_text(node) if ts_node?(node) && node.kind == "variable_name" + + super + end + + def generic_member_name?(node) + return true if parent_node(node)&.kind == "variable_name" + return false if node.kind == "variable_name" + + super + end + + def generic_local_writes(node, **kwargs) + (super(node, **kwargs) + php_local_write_names(node)).map { |name| php_identifier_text_value(name) }.uniq + end + + def generic_local_write_node?(node) + return true if ts_node?(node) && node.kind == "variable_name" && php_assignment_lhs?(node) + + super + end + + def decision_member_text(node) + php_normalize_source(super) + end + + def decision_predicate(node) + php_normalize_source(super) + end + + def comparison_target(node) + target = super + return nil unless target + + target.merge(source: php_normalize_source(target[:source])) + end + + def control_context(node) + return :iterates if node.kind == "foreach_statement" + + super + end + + def target_from_callee(callee) + target = super + return target unless target + + target.merge(receiver: php_normalize_receiver(target[:receiver])) + end + + def generic_state_read_target(node) + target = super + return target unless target + + target.merge(receiver: php_normalize_receiver(target[:receiver])) + end + + def generic_state_target(lhs) + target = super + return target unless target + + target.merge(receiver: php_normalize_receiver(target[:receiver])) + end + + def member_field_text(field) + php_name_text(field) + end + + def simple_identifier_text?(text) + php_identifier_text_value(text).match?(/\A[A-Za-z_]\w*\z/) + end + + def php_name_node?(node) + ts_node?(node) && %w[name qualified_name].include?(node.kind) + end + + def php_assignment_lhs?(node) + parent = parent_node(node) + return false unless parent + + %w[assignment_expression augmented_assignment_expression].include?(parent.kind) && + parent.named_children.first == node + end + + def php_local_write_names(node) + writes = [] + generic_walk_local(node) do |child| + next unless ts_node?(child) && child.kind == "variable_name" + next unless php_assignment_lhs?(child) + + writes << php_identifier_text(child) + end + writes.compact + end + + def php_argument_texts(args) + Array(args&.named_children).map { |child| php_normalize_source(child.text) } + end + + def php_print_argument_text(node) + value = php_unwrap_parenthesized(node) + php_normalize_source(value&.text || node.text) + end + + def php_argument_member_target(node) + return nil unless ts_node?(node) && node.kind == "argument" + return nil unless node.text.to_s.include?("->") || node.text.to_s.include?("::") + return nil if node.text.to_s.include?("(") + + parts = php_normalize_source(node.text).split(".") + return nil unless parts.size >= 2 + + { + receiver: php_normalize_receiver(parts[0...-1].join(".")), + field: php_identifier_text_value(parts.last) + } + end + + def php_member_receiver(node) + return nil unless ts_node?(node) + + named_field(node, "object") || named_field(node, "receiver") || + named_field(node, "expression") || node.named_children.first + end + + def php_member_name(node) + return nil unless ts_node?(node) + + named_field(node, "name") || named_field(node, "field") || + node.named_children.reverse.find { |child| php_name_node?(child) } + end + + def php_identifier_text(node) + text = php_identifier_text_value(node&.text) + text.empty? ? nil : text + end + + def php_name_text(node) + text = php_identifier_text_value(node&.text) + text.empty? ? nil : text + end + + def php_identifier_text_value(text) + text.to_s.sub(/\A\$/, "") + end + + def php_normalize_receiver(receiver) + value = php_normalize_source(php_identifier_text_value(receiver)) + value == "this" ? "self" : value + end + + def php_normalize_source(source) + source.to_s + .gsub(/\$([A-Za-z_]\w*)/, '\1') + .gsub(/->|::/, ".") + end + + def php_nil_guard_walk(document, node, function, known, findings) + return unless ts_node?(node) + return if generic_nested_local_scope?(node) && function_name(node) != function + + if node.kind == "if_statement" + php_process_nil_guard_if(document, node, function, known, findings) + return + end + + php_record_redundant_nil_guard(document, node, function, known, findings) + node.named_children.each do |child| + php_nil_guard_walk(document, child, function, known, findings) + end + end + + def php_process_nil_guard_if(document, node, function, known, findings) + condition = named_field(node, "condition") || node.named_children.first + body = named_field(node, "body") || node.named_children[1] + branch_known = known.dup + php_non_nil_facts(condition).each { |local| branch_known.add(local) } + php_nil_guard_walk(document, body, function, branch_known, findings) + end + + def php_record_redundant_nil_guard(document, node, function, known, findings) + subject = php_nil_guard_subject(node) + return unless subject && known.include?(subject) + + findings << NilGuardFinding.new( + file: document.file, + defn: function, + line: line(node), + span: span(node), + local: subject, + guard: php_normalize_source(node.text), + proof: "#{subject} is already proven non-nil on this path" + ) + end + + def php_non_nil_facts(node) + node = php_unwrap_parenthesized(node) + return [] unless ts_node?(node) + + subject = php_subject_key(node) + return [subject] if subject + + call = php_member_call_parts(node) + return [call[:receiver]] if call && %w[isSome is_some present].include?(call[:message]) + + comparison = php_nil_comparison(node) + return [comparison[:subject]] if comparison && %w[!== !=].include?(comparison[:operator]) + + [] + end + + def php_nil_guard_subject(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) + + call = php_member_call_parts(node) + return call[:receiver] if call && %w[isNull is_null nil is_none].include?(call[:message]) + + comparison = php_nil_comparison(node) + return comparison[:subject] if comparison && %w[=== ==].include?(comparison[:operator]) + + function_call = php_function_call_parts(node) + return function_call[:arguments].first if function_call && %w[is_null].include?(function_call[:message]) + + nil + end + + def php_nil_comparison(node) + return nil unless ts_node?(node) && node.kind == "binary_expression" + + operator = direct_operator(node) + return nil unless %w[=== !== == !=].include?(operator) + + left, right = node.named_children + if php_null_literal?(right) + subject = php_subject_key(left) + elsif php_null_literal?(left) + subject = php_subject_key(right) + end + subject ? { subject: subject, operator: operator } : nil + end + + def php_member_call_parts(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) && node.kind == "member_call_expression" + + access = node.named_children.find { |child| child.kind == "member_access_expression" } + receiver_node = access ? php_member_receiver(access) : node.named_children.find { |child| child.kind == "variable_name" } + message_node = access ? php_member_name(access) : node.named_children.find { |child| php_name_node?(child) } + receiver = php_subject_key(receiver_node) + message = php_name_text(message_node) + return nil unless receiver && message + + { receiver: receiver, message: message } + end + + def php_function_call_parts(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) && node.kind == "function_call_expression" + + name = node.named_children.find { |child| php_name_node?(child) } + args = node.named_children.find { |child| child.kind == "arguments" } + { + message: php_name_text(name), + arguments: Array(args&.named_children).filter_map { |child| php_subject_key(child) } + } + end + + def php_subject_key(node) + node = php_unwrap_parenthesized(node) + return nil unless ts_node?(node) + + case node.kind + when "variable_name", "name" + php_identifier_text(node) + when "member_access_expression" + receiver = php_subject_key(php_member_receiver(node)) + message = php_name_text(php_member_name(node)) + receiver && message ? "#{receiver}.#{message}" : nil + else + nil + end + end + + def php_unwrap_parenthesized(node) + current = node + while ts_node?(current) && + %w[parenthesized_expression parenthesized_statements].include?(current.kind) && + current.named_children.size == 1 + current = current.named_children.first + end + current + end + + def php_null_literal?(node) + ts_node?(node) && node.kind == "null" + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/protocols.rb b/gems/decomplex/lib/decomplex/syntax/protocols.rb new file mode 100644 index 000000000..355138940 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/protocols.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + ProtocolMethodEffect = Struct.new(:file, :owner, :name, :line, :reads, :writes, + keyword_init: true) + ProtocolCall = Struct.new(:mid, :file, :owner, :defn, :line, :span, keyword_init: true) + ProtocolMethodPath = Struct.new(:file, :owner, :name, :line, :calls, keyword_init: true) + ProtocolPath = Struct.new(:calls, :terminal, keyword_init: true) + + class Document + def protocol_method_effects + @protocol_method_effects ||= adapter.protocol_method_effects(self) + end + + def protocol_call_paths + @protocol_call_paths ||= adapter.protocol_call_paths(self) + end + end + + class TreeSitterLanguageAdapter + def protocol_method_effects(document) + document.function_defs.map do |function_def| + reads = document.state_reads.select do |read| + read.owner == function_def.owner && read.function == function_def.name + end.map(&:field).uniq.sort + writes = document.state_writes.select do |write| + write.owner == function_def.owner && write.function == function_def.name + end.map(&:field).uniq.sort + + ProtocolMethodEffect.new( + file: function_def.file, + owner: function_def.owner, + name: function_def.name.to_s.split(/[.:]/).last, + line: function_def.line, + reads: reads, + writes: writes + ) + end + end + + def protocol_call_paths(document) + document.function_defs.map do |function_def| + calls = document.call_sites.select do |call| + call.owner == function_def.owner && + call.function == function_def.name && + call.receiver.to_s == "self" + end.map do |call| + ProtocolCall.new( + mid: call.message.to_s.split(/[.:]/).last, + file: call.file, + owner: call.owner, + defn: call.function, + line: call.line, + span: call.span + ) + end + + ProtocolMethodPath.new( + file: function_def.file, + owner: function_def.owner, + name: function_def.name.to_s.split(/[.:]/).last, + line: function_def.line, + calls: calls + ) + end + end + end + + class TreeSitterAdapter + def protocol_method_effects(document) + syntax_profile(document.language).protocol_method_effects(document) + end + + def protocol_call_paths(document) + syntax_profile(document.language).protocol_call_paths(document) + end + end + end +end + +require_relative "ruby_protocols" diff --git a/gems/decomplex/lib/decomplex/syntax/python.rb b/gems/decomplex/lib/decomplex/syntax/python.rb new file mode 100644 index 000000000..5cf827a4e --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/python.rb @@ -0,0 +1,429 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + PYTHON_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bNone\b/].freeze, + type_guard_patterns: [ + /\b(?:isinstance|issubclass|hasattr)\s*\(/, + /\bis\s+(?:not\s+)?None\b/, + /\btype\s*\([^)]*\)\s*(?:==|is)\s*/ + ].freeze, + diagnostic_patterns: [ + /\braise\b/, + /\bassert\b/, + /\bsys\.exit\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:None|True|False|0|1|break|continue|pass)\s*;?\z/, + /\Areturn\s+(?:None|True|False|0|1)\s*;?\z/ + ].freeze + ).freeze + + class PythonSyntaxAdapter < TreeSitterLanguageAdapter + PythonSyntheticStatement = Struct.new(:kind, :children, :text, :start_point, :end_point, keyword_init: true) do + def named? + true + end + + def named_children + children.select { |child| child.respond_to?(:named?) && child.named? } + end + end + + FUNCTION_NODE_KINDS = %w[function_definition].freeze + CALL_NODE_KINDS = %w[call].freeze + ADJACENT_CALL_NODE_KINDS = %w[attribute identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_definition].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[block].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment augmented_assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %= :=].freeze + PATH_ACTION_NODE_KINDS = %w[call expression_statement return_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[comparison_operator binary_operator boolean_operator].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement match_statement].freeze + LOOP_NODE_KINDS = %w[for_statement while_statement].freeze + TEXT_LOOP_NODE_KINDS = %w[block].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[match_statement].freeze + HIDDEN_CASE_WRAPPER_NODE_KINDS = %w[block].freeze + HIDDEN_CASE_TOKEN_KINDS = %w[match case].freeze + BRANCH_CASE_NODE_KINDS = %w[match_statement block].freeze + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[block statement_list].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[case_clause].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[case_clause].freeze + CASE_PATTERN_NODE_KINDS = %w[case_pattern pattern].freeze + CASE_SUBJECT_NODE_KINDS = [].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_definition class_definition].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[case_clause else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[and &&].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_operator boolean_operator comparison_operator].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[block].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[with_clause].freeze + FIELD_DECLARATION_NODE_KINDS = [].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameters].freeze + ADJACENT_METHOD_INVOCATION_NODE_KINDS = [].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + ACCESSOR_CALL_NODE_KINDS = %w[call].freeze + FIELD_LIKE_NODE_KINDS = %w[attribute].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def function_name(node) + hidden_python_function_name(node) || super + end + + def visibility(_document, node) + name = function_name(node).to_s + return :private if name.start_with?("_") && !name.start_with?("__") + + :public + end + + def parameter_name(param) + name = super + return name if name + + python_nested_parameter_identifier(param)&.text + end + + def call_target(document, node) + return nil if node.kind == "identifier" && parent_node(node)&.kind == "attribute" + + python_adjacent_call_target(node) || super + end + + def state_read_target(node) + return nil if python_hidden_assignment_parts(node) || python_annotation_lhs?(node) + + super + end + + def record_state_write(document, node, stack, out) + parts = python_hidden_assignment_parts(node) + unless parts + super + return + end + + target = state_target(parts.fetch(:lhs)) + return unless target + target = normalize_target_receiver(target, stack) + return if skip_state_write_target?(target) + + out << StateWrite.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(parts.fetch(:lhs)), + span: python_assignment_span(parts.fetch(:lhs), parts.fetch(:rhs)), + owner: current_owner(document, stack) + ) + end + + def record_state_param_origin(document, node, stack, out) + parts = python_hidden_assignment_parts(node) + unless parts + super + return + end + + target = state_target(parts.fetch(:lhs)) + return unless target + target = normalize_target_receiver(target, stack) + + params = current_params(stack) + return if params.empty? + + rhs_param_names(parts.fetch(:rhs), params).each do |param| + out << StateParamOrigin.new( + field: target[:field], + receiver: target[:receiver], + owner: current_owner(document, stack), + param: param, + file: document.file, + function: current_function(stack), + line: line(parts.fetch(:lhs)), + span: python_assignment_span(parts.fetch(:lhs), parts.fetch(:rhs)) + ) + end + end + + def state_write_source_node(node) + assignment_lhs?(node) ? (parent_node(node) || node) : super + end + + def local_methods(document) + document.function_defs.map do |function_def| + statements = python_function_body_statements(function_def.body, document) + local_names = generic_local_names(function_def, statements) + local_statements = statements.each_with_index.map do |statement, index| + generic_local_statement(statement, index, local_names) + end + owner = local_method_owner(document, function_def.owner) + + LocalMethod.new( + id: "#{owner}##{function_def.name}", + owner: owner, + name: function_def.name, + file: function_def.file, + line: function_def.line, + span: function_def.span, + node: function_def.body, + statements: local_statements, + boundaries: generic_structural_boundaries(document, local_statements) + ) + end + end + + private + + def hidden_python_function_name(node) + return nil unless node.kind == "block" + return nil unless node.children.first&.kind.to_s == "def" + + node.named_children.find { |child| child.kind == "identifier" }&.text + end + + def python_nested_parameter_identifier(param) + return nil unless ts_node?(param) + return nil unless %w[typed_parameter default_parameter].include?(param.kind) + + param.named_children.each do |child| + next unless %w[list_splat_pattern dictionary_splat_pattern].include?(child.kind) + + identifier = child.named_children.find { |grandchild| parameter_identifier_node_kinds.include?(grandchild.kind) } + return identifier if identifier + end + nil + end + + def python_function_body_statements(node, document) + body = named_field(node, "body") || + node.named_children.find { |child| child.kind == "block" } + return [] unless body + + groups = python_statement_child_groups(body) + return [] if groups.empty? && body.text.to_s.strip.empty? + return [body] if groups.empty? + + groups.map { |children| python_synthetic_statement(document, children) } + end + + def python_adjacent_call_target(node) + return nil if call_node_ancestor?(node) + return python_adjacent_member_call_target(node) if node.kind == "attribute" + return nil if parent_node(node)&.kind == "attribute" + return nil unless node.kind == "identifier" + + args = next_sibling(node) + return nil unless args&.kind == "argument_list" + + { + receiver: "self", + message: node.text, + arguments: args.named_children.map { |child| normalize_text(child.text) }, + source_node: python_adjacent_call_source_node(node, args) + } + rescue StandardError + nil + end + + def python_adjacent_member_call_target(node) + return nil if call_node_ancestor?(node) + args = next_sibling(node) + return nil unless args&.kind == "argument_list" + + target_from_callee(node).merge( + arguments: args.named_children.map { |child| normalize_text(child.text) }, + source_node: python_adjacent_call_source_node(node, args) + ) + rescue StandardError + nil + end + + def python_adjacent_call_source_node(node, args) + parent = parent_node(node) + return node unless parent + + call_text = "#{node.text}#{args.text}" + parent.text.to_s.include?(call_text) ? parent : node + end + + def assignment_lhs?(node) + return false if parent_node(node)&.kind == "keyword_argument" + + super || !!python_hidden_assignment_parts(node) + end + + def generic_local_write_node?(node) + super || python_annotation_lhs?(node) || python_loop_target?(node) + end + + def generic_local_writes(node, **kwargs) + (super(node, **kwargs) + python_with_alias_names(node)).uniq + end + + def skip_local_read_identifier?(node) + parent_node(node)&.kind == "dotted_name" || super + end + + def python_hidden_assignment_parts(node) + return nil unless ts_node?(node) + + operator = next_sibling(node) + return nil unless operator + + if assignment_operator_tokens.include?(operator.text.to_s) + return nil unless python_statement_assignment_context?(node) + + rhs = next_sibling(operator) + return { lhs: node, rhs: rhs } if rhs + elsif operator.text.to_s == ":" + type_node = next_sibling(operator) + return nil unless type_node&.kind == "type" + + equal = next_sibling(type_node) + rhs = next_sibling(equal) + return { lhs: node, rhs: rhs } if equal&.text.to_s == "=" && rhs + end + + nil + end + + def python_annotation_lhs?(node) + return false unless ts_node?(node) + return false unless generic_identifier?(node) || field_like_node?(node) + + colon = next_sibling(node) + return false unless colon&.text.to_s == ":" + + type_node = next_sibling(colon) + return false unless type_node&.kind == "type" + + equal = next_sibling(type_node) + !equal || equal.text.to_s != "=" + end + + def python_statement_assignment_context?(node) + parent_node(node)&.kind == "expression_statement" + end + + def python_loop_target?(node) + return false unless generic_identifier?(node) + + before = prev_sibling(node) + after = next_sibling(node) + return true if before&.text.to_s == "for" && after&.text.to_s != ":" + + seen_for = false + current = before + while ts_node?(current) + text = current.text.to_s + return false if %w[in :].include?(text) + if text == "for" + seen_for = true + break + end + current = prev_sibling(current) + end + return false unless seen_for + + current = after + while ts_node?(current) + text = current.text.to_s + return true if text == "in" + return false if text == ":" + current = next_sibling(current) + end + false + end + + def python_with_alias_names(node) + names = [] + generic_walk_local(node) do |child| + next unless child.kind == "as_pattern_target" + + text = child.text.to_s + names << text if simple_identifier_text?(text) + end + names + end + + def python_assignment_span(lhs, rhs) + [ + lhs.start_point.row + 1, + lhs.start_point.column, + rhs.end_point.row + 1, + rhs.end_point.column + ] + end + + def python_statement_child_groups(body) + children = body.children.reject { |child| comment_node?(child) } + return [] if children.empty? + + groups = [] + current = [] + body_column = body.start_point.column + + children.each do |child| + if current.any? && python_new_statement_child?(current, child, body_column) + groups << current + current = [] + end + current << child + end + groups << current if current.any? + groups + end + + def python_new_statement_child?(current, child, body_column) + return false unless child.start_point.row > current.map { |item| item.end_point.row }.max + return false if %w[ + elif else except finally case + elif_clause else_clause except_clause finally_clause case_clause + ].include?(child.kind) + + child.start_point.column <= body_column + end + + def python_synthetic_statement(document, children) + first = children.first + last = children.last + PythonSyntheticStatement.new( + kind: "python_statement", + children: children, + text: python_source_slice(document, first.start_point, last.end_point), + start_point: first.start_point, + end_point: last.end_point + ) + end + + def python_source_slice(document, start_point, end_point) + if start_point.row == end_point.row + return document.lines[start_point.row].to_s[start_point.column...end_point.column].to_s + end + + lines = document.lines[start_point.row..end_point.row].to_a + return "" if lines.empty? + + lines[0] = lines[0].to_s[start_point.column..].to_s + lines[-1] = lines[-1].to_s[..end_point.column - 1].to_s + lines.join + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby.rb b/gems/decomplex/lib/decomplex/syntax/ruby.rb new file mode 100644 index 000000000..d25c152e1 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/ruby.rb @@ -0,0 +1,1133 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + RUBY_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /(?:\A|[^\w!?])(?:nil\?|is_a\?|kind_of\?|instance_of\?|respond_to\?)(?:\s*\(|\b)/, + /&\./ + ].freeze, + diagnostic_patterns: [ + /(?:\A|[^\w!?])(?:raise|fail|abort)[!?]?(?:\s*\(|\b)/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break|next)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class RubySyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[method].freeze + CALL_NODE_KINDS = %w[call].freeze + CLASS_OWNER_NODE_KINDS = %w[class].freeze + MODULE_OWNER_NODE_KINDS = %w[module].freeze + PARAMETER_LIST_NODE_KINDS = %w[method_parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[body_statement do_block].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[body_statement].freeze + IDENTIFIER_NODE_KINDS = %w[identifier constant].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[pattern].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment operator_assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %= &&= ||=].freeze + PATH_ACTION_NODE_KINDS = %w[call return].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[body_statement].freeze + COMPARISON_NODE_KINDS = %w[binary].freeze + BRANCH_NODE_KINDS = %w[if unless if_modifier unless_modifier case while until for].freeze + LOOP_NODE_KINDS = %w[while until for do_block].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[case].freeze + BRANCH_CASE_NODE_KINDS = %w[case body_statement].freeze + IF_NODE_KINDS = %w[if unless if_modifier unless_modifier].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[body_statement].freeze + HIDDEN_CASE_WRAPPER_NODE_KINDS = %w[body_statement].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if unless].freeze + HIDDEN_CASE_TOKEN_KINDS = %w[case when].freeze + CASE_ARM_NODE_KINDS = %w[when].freeze + WHEN_CASE_ARM_NODE_KINDS = %w[when].freeze + CASE_PATTERN_NODE_KINDS = %w[pattern].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[method class module].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[when else then comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default else].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[body_statement pattern argument_list].freeze + PARENTHESIZED_PATTERN_NODE_KINDS = %w[pattern].freeze + ACCESSOR_CALL_NODE_KINDS = %w[call].freeze + BLOCK_ARGUMENT_NODE_KINDS = %w[block do_block lambda].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + + def function_name(node) + case node.kind + when "body_statement" + hidden_ruby_method_name(node) + when "singleton_method" + receiver = named_field(node, "receiver") || + node.named_children.find { |child| %w[self constant identifier].include?(child.kind) } + name = named_field(node, "name")&.text || + node.named_children.reverse.find do |child| + %w[identifier field_identifier property_identifier].include?(child.kind) + end&.text + receiver_text = receiver&.text.to_s + name && "#{receiver_text.empty? || receiver_text == "self" ? "self" : receiver_text}.#{name}" + when "argument_list" + inline_def_name(node) + else + super + end + end + + def visibility(_document, node) + return ruby_inline_def_visibility(node) if inline_def_argument_list?(node) + + ruby_method_visibility(node) + end + + def owner_name_from_declaration(document, node) + return hidden_ruby_owner_name(node) if hidden_ruby_owner_declaration?(node) + + super + end + + def owner_kind(node) + return hidden_ruby_owner_kind(node) if hidden_ruby_owner_declaration?(node) + + super + end + + def call_target(document, node) + case node.kind + when "call" + ruby_proc_call_target(node) || ruby_call_target(node) + when "body_statement", "block_body" + ruby_bare_body_call_target(node) + when "identifier" + ruby_bare_call_target(node) + else + super + end + end + end + + + class RubySyntaxAdapter + def function_params(node) + return hidden_ruby_method_params(node) if hidden_ruby_method_definition?(node) + + params = super + if inline_def_argument_list?(node) + params = node.named_children.find { |child| child.kind == "method_parameters" } + &.named_children + &.filter_map { |param| parameter_name(param) } + &.uniq || params + end + params + end + + def function_signature(document, node) + if hidden_ruby_method_definition?(node) + return normalize_text(hidden_ruby_method_signature(document, node)) + end + + signature = preceding_ruby_signature(document, node) + return signature unless signature.empty? + + super + end + + def state_declaration(node) + ruby_t_let_state_declaration(node) || super + end + + def state_read_target(node) + if ruby_explicit_receiver_body_read_node?(node) && + (target = ruby_explicit_receiver_body_call_target(node)) + return { receiver: target[:receiver], field: target[:message] } + end + + ruby_unparenthesized_member_argument_target(node) || ruby_state_variable_target(node) || super + end + + def state_target(lhs) + ruby_state_variable_target(lhs) || super + end + + def after_structural_facts(document, out) + super + apply_ruby_visibility!(out) + end + + def descend_into_children?(node, stack) + return false if node.kind == "lambda" + return false if ruby_stabby_lambda_node?(node) + return false if ruby_nested_local_scope?(node) && stack.any? { |frame| frame[:function] } + + true + end + + def predicate_def(_document, function_def) + expression = ruby_single_expression_function_body(function_def.body) || + ruby_predicate_expression_body(function_def.body) + return nil unless expression + + body = normalize_text(expression.text).delete_suffix(";").strip + return nil if body.empty? || body == "nil" || body.length > 200 + return nil unless predicate_body?(body) + + PredicateDef.new( + file: function_def.file, + name: function_def.name, + owner: function_def.owner, + body: body, + line: function_def.line, + span: function_def.span + ) + end + + def local_methods(document) + document.function_defs.map do |function_def| + statements = ruby_function_body_statements(function_def.body) + local_names = ruby_local_names(function_def, statements) + local_statements = statements.each_with_index.map do |statement, index| + ruby_local_statement(statement, index, local_names) + end + owner = ruby_local_flow_owner(document, function_def.owner) + + LocalMethod.new( + id: "#{owner}##{function_def.name}", + owner: owner, + name: function_def.name, + file: function_def.file, + line: function_def.line, + span: function_def.span, + node: function_def.body, + statements: local_statements, + boundaries: ruby_structural_boundaries(document, local_statements) + ) + end + end + + def path_condition_sites(document) + out = [] + document.function_defs.each do |function_def| + ruby_function_body_statements(function_def.body).each do |statement| + ruby_path_walk(document, statement, function_def.name, [], out) + end + end + out.uniq { |site| [site.guards, site.action, site.file, site.function, site.line] } + end + + def immutable_struct_readers(document) + ruby_immutable_struct_readers(document.lines) + end + + def immutable_struct_reader_types(document) + ruby_immutable_struct_reader_types(document.lines) + end + + def type_aliases(document) + ruby_type_aliases(document.lines) + end + + private + + def comparison_target(node) + ruby_nil_predicate_comparison(node) || ruby_flat_comparison_statement(node) || super + end + + def ruby_nil_predicate_comparison(node) + return nil unless node.kind == "call" + + target = ruby_call_target(node) + return nil unless target && target[:message].to_s == "nil?" + + { source: normalize_text(node.text), operator: "nil?" } + end + + def ruby_flat_comparison_statement(node) + return nil unless node.kind == "body_statement" + + operator = direct_operator(node) + return nil unless COMPARISON_OPERATORS.include?(operator) + + { source: normalize_text(node.text), operator: operator } + end + + def inline_def_argument_list?(node) + ts_node?(node) && node.kind == "argument_list" && node.children.first&.kind.to_s == "def" + end + + def inline_def_name(node) + return nil unless inline_def_argument_list?(node) + + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name + end + + def hidden_ruby_method_definition?(node) + ts_node?(node) && node.kind == "body_statement" && node.children.first&.kind.to_s == "def" + end + + def hidden_ruby_method_name(node) + return nil unless hidden_ruby_method_definition?(node) + + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name + end + + def hidden_ruby_method_params(node) + params = node.named_children.find { |child| child.kind == "method_parameters" } + return [] unless params + + params.named_children.filter_map { |param| parameter_name(param) }.uniq + end + + def hidden_ruby_method_signature(document, node) + body = node.named_children.find { |child| child.kind == "body_statement" } + end_byte = body ? body.start_byte : node.end_byte + document.source.byteslice(node.start_byte, end_byte - node.start_byte).to_s.strip.sub(/;+\z/, "") + rescue StandardError + line_text(document, node).strip + end + + def ruby_single_expression_function_body(node) + body = ruby_method_body_wrapper(node) + return ruby_endless_method_expression(node) unless body + + return nil unless body + + ruby_single_expression_body_child(body) + end + + def ruby_endless_method_expression(node) + return nil unless ts_node?(node) + return nil unless %w[method singleton_method].include?(node.kind) + return nil if node.named_children.any? { |child| child.kind == "body_statement" } + + node.named_children.reverse.find do |child| + !%w[ + identifier field_identifier property_identifier constant self + method_parameters superclass + ].include?(child.kind) + end + end + + def ruby_method_body_wrapper(node) + return nil unless ts_node?(node) + + case node.kind + when "method", "singleton_method", "argument_list" + node.named_children.reverse.find { |child| child.kind == "body_statement" } + when "body_statement" + if hidden_ruby_method_definition?(node) + node.named_children.reverse.find { |child| child.kind == "body_statement" } + else + node + end + end + end + + def ruby_single_expression_body_child(body) + named = body.named_children.reject { |child| child.kind == "comment" } + return body if named.empty? + return named.first if named.size == 1 + return named.first if ruby_heredoc_body?(body, named) + + nil + end + + def ruby_predicate_expression_body(node) + body = ruby_method_body_wrapper(node) + return nil unless body + + expression = ruby_single_expression_body_child(body) + return expression if expression + + source = normalize_text(body.text).delete_suffix(";").strip + return body if ruby_flat_predicate_body_statement?(body, source) + + nil + end + + def ruby_flat_predicate_body_statement?(body, source) + body.kind == "body_statement" && + predicate_body?(source) && + COMPARISON_OPERATORS.include?(direct_operator(body)) + end + + def predicate_body?(source) + text = source.to_s + lower = text.downcase + %w[true false].include?(lower) || + lower.include?("true") || + lower.include?("false") || + lower.include?("null") || + lower.include?("nil") || + text.include?("==") || + text.include?("!=") || + text.include?("&&") || + text.include?("||") || + lower.include?(" and ") || + lower.include?(" or ") + end + + def ruby_heredoc_body?(_body, named_children) + named_children.first&.kind == "call" && + named_children[1..].to_a.all? { |child| child.kind == "heredoc_body" } + end + + def ruby_function_body_statements(node) + body = ruby_method_body_wrapper(node) + return [] unless body + + named = body.named_children.reject { |child| child.kind == "comment" } + return [] if named.empty? && body.text.to_s.strip.empty? + return [body] if hidden_if?(body) || hidden_modifier_if?(body) || hidden_case?(body) + return [body] if ruby_flat_assignment_statement?(body) + return [body] if named.empty? || ruby_heredoc_body?(body, named) + + named + end + + def ruby_local_names(function_def, statements) + names = Set.new(function_def.params.to_a.map(&:to_s)) + statements.each do |statement| + ruby_walk_local(statement) do |node| + names.add(node.text.to_s) if ruby_local_write_identifier?(node) + end + end + names + end + + def ruby_local_statement(node, index, local_names) + reads = ruby_local_reads(node, local_names).uniq + writes = ruby_local_writes(node).uniq + LocalStatement.new( + index: index, + line: line(node), + end_line: span(node)[2], + span: span(node), + source: normalize_text(node.text), + reads: reads.to_set, + writes: writes.to_set, + dependencies: ruby_assignment_dependencies(node, local_names), + co_uses: reads.sort.combination(2).map { |left, right| [left, right] } + ) + end + + def ruby_local_reads(node, local_names) + reads = [] + ruby_walk_local(node) do |child| + reads << child.text.to_s if ruby_local_read_identifier?(child, local_names) + end + reads + end + + def ruby_local_writes(node) + writes = [] + ruby_walk_local(node) do |child| + writes << child.text.to_s if ruby_local_write_identifier?(child) + end + writes + end + + def ruby_assignment_dependencies(node, local_names) + deps = [] + if ruby_flat_assignment_statement?(node) + lhs = node.named_children.first + rhs = node.named_children[1] + ruby_local_reads(rhs, local_names).uniq.each do |read| + deps << [lhs.text.to_s, read] unless lhs.text.to_s == read + end + return deps.uniq + end + + ruby_walk_local(node) do |child| + next unless child.kind == "assignment" + + lhs = child.named_children.first + rhs = child.named_children[1] + next unless lhs&.kind == "identifier" && rhs + + ruby_local_reads(rhs, local_names).uniq.each do |read| + deps << [lhs.text.to_s, read] unless lhs.text.to_s == read + end + end + deps.uniq + end + + def ruby_structural_boundaries(document, statements) + statements.each_cons(2).filter_map do |left, right| + boundary = ruby_source_boundary(document, left.end_line + 1, right.line - 1) + next unless boundary + + LocalBoundary.new( + before_index: left.index, + after_index: right.index, + line: boundary[:line], + kind: boundary[:kind], + text: boundary[:text] + ) + end + end + + def ruby_source_boundary(document, first_line, last_line) + return nil if first_line > last_line + + blank = nil + (first_line..last_line).each do |line_number| + text = document.lines[line_number - 1].to_s + stripped = text.strip + return { line: line_number, kind: :comment, text: stripped } if stripped.start_with?("#") + + blank ||= { line: line_number, kind: :blank, text: stripped } if stripped.empty? + end + blank + end + + def ruby_walk_local(node, &block) + return unless ts_node?(node) + + stack = [node] + until stack.empty? + current = stack.pop + next unless ts_node?(current) + next if current != node && ruby_nested_local_scope?(current) + + yield current + current.children.reverse_each { |child| stack << child } + end + end + + def ruby_nested_local_scope?(node) + %w[class module method singleton_method lambda].include?(node.kind) + end + + def ruby_stabby_lambda_node?(node) + return false unless ts_node?(node) + return true if node.kind == "body_statement" && node.children.first&.kind == "->" + + node.kind == "block" && prev_sibling(node)&.kind == "->" + end + + def ruby_local_read_identifier?(node, local_names) + return false unless node.kind == "identifier" + return false unless local_names.include?(node.text.to_s) + return false if ruby_local_write_identifier?(node) + return false if ruby_declaration_name?(node, parent_node(node)) + return false if ruby_call_message_identifier?(node) + return false if ruby_unary_assertion_argument?(node) + + true + end + + def ruby_local_write_identifier?(node) + return false unless node.kind == "identifier" + + parent = parent_node(node) + (parent&.kind == "assignment" && parent.named_children.first == node) || + (parent&.kind == "left_assignment_list" && parent_node(parent)&.kind == "assignment") || + (ruby_flat_assignment_statement?(parent) && parent.named_children.first == node) + end + + def ruby_unparenthesized_member_argument_target(node) + return nil unless node.kind == "argument_list" + return nil if node.text.to_s.strip.start_with?("(") + return nil unless node.children.any? { |child| !child.named? && child.text == "." } + + named = node.named_children + return nil unless named.size == 2 + return nil unless named.all? { |child| %w[identifier constant].include?(child.kind) } + + { receiver: normalize_text(named.first.text), field: named.last.text } + end + + def ruby_unary_assertion_argument?(node) + parent = parent_node(node) + return false unless parent&.kind == "argument_list" + + call = parent_node(parent) + return false unless call&.kind == "call" + return false unless %w[assert_empty refute_empty assert_nil refute_nil].include?(call.named_children.first&.text) + + true + end + + def ruby_flat_assignment_statement?(node) + return false unless ts_node?(node) && node.kind == "body_statement" + + node.children.count { |child| !child.named? && child.text == "=" } == 1 && + node.named_children.size >= 2 + end + + def ruby_call_message_identifier?(node) + parent = parent_node(node) + return false unless parent&.kind == "call" + + prev_sibling(node)&.text == "." || + (named_field(parent, "receiver").nil? && parent.named_children.first == node) + end + + def ruby_local_flow_owner(document, owner) + owner.to_s == file_owner(document.file) ? "(top-level)" : owner + end + + def ruby_path_walk(document, node, function, guards, out) + return unless ts_node?(node) + + if ruby_path_if_node?(node) + ruby_path_walk_if(document, node, function, guards, out) + return + end + + if guards.size >= 2 && ruby_path_action_node?(node) + record_ruby_path_condition(document, node, function, guards, out) + return + end + + node.children.each { |child| ruby_path_walk(document, child, function, guards, out) } + end + + def ruby_path_walk_if(document, node, function, guards, out) + condition = ruby_path_condition(node) + atoms = ruby_path_condition_atoms(condition) + then_guards = ruby_unless_node?(node) ? ruby_negate_guards(atoms) : atoms + else_guards = ruby_unless_node?(node) ? atoms : ruby_negate_guards(atoms) + + ruby_path_body_nodes(ruby_path_then_body(node)).each do |child| + ruby_path_walk(document, child, function, guards + then_guards, out) + end + ruby_path_body_nodes(ruby_path_else_body(node)).each do |child| + ruby_path_walk(document, child, function, guards + else_guards, out) + end + ruby_path_walk(document, condition, function, guards, out) + end + + def ruby_path_if_node?(node) + return false unless ts_node?(node) + return true if node.named? && %w[if unless if_modifier unless_modifier].include?(node.kind) + + hidden_if?(node) || hidden_modifier_if?(node) + end + + def ruby_unless_node?(node) + node.kind.to_s.include?("unless") || first_token_kind(node) == "unless" + end + + def ruby_path_condition(node) + if hidden_modifier_if?(node) || %w[if_modifier unless_modifier].include?(node.kind) + node.named_children.last + elsif hidden_if?(node) + node.named_children.first + else + node.named_children.first + end + end + + def ruby_path_then_body(node) + if hidden_modifier_if?(node) || %w[if_modifier unless_modifier].include?(node.kind) + node.named_children.first + else + node.named_children.find { |child| child.kind == "then" } || node.named_children[1] + end + end + + def ruby_path_else_body(node) + return nil if hidden_modifier_if?(node) || %w[if_modifier unless_modifier].include?(node.kind) + + node.named_children.find { |child| child.kind == "else" } || + node.named_children.find { |child| child.kind == "elsif" } || + node.named_children[2] + end + + def ruby_path_body_nodes(node) + return [] unless ts_node?(node) + + return [node] if ruby_path_action_node?(node) || ruby_path_if_node?(node) + + node.named_children.reject { |child| child.kind == "comment" } + end + + def ruby_path_condition_atoms(condition) + return [] unless ts_node?(condition) + + flatten_boolean_and(condition).map do |atom| + text, negated = ruby_path_canon_polarity(decision_member_text(atom)) + [text, negated] + end + end + + def ruby_path_canon_polarity(text) + source = text.to_s.strip + return [source[1..].to_s.strip, true] if source.start_with?("!") + + [source, false] + end + + def ruby_negate_guards(guards) + guards.map { |text, negated| [text, !negated] } + end + + def ruby_path_action_node?(node) + return true if %w[call assignment operator_assignment binary].include?(node.kind) + + ruby_flat_assignment_statement?(node) + end + + def record_ruby_path_condition(document, node, function, guards, out) + members = guards.map { |text, negated| "#{negated ? "!" : ""}#{text}" }.uniq.sort + return if members.size < 2 + + out << PathConditionSite.new( + guards: members, + action: normalize_text(node.text)[0, 80], + file: document.file, + function: function, + line: line(node), + span: span(node) + ) + end + + def hidden_ruby_owner_declaration?(node) + return false unless ts_node?(node) + return false unless node.kind == "body_statement" + + %w[class module].include?(node.children.first&.kind.to_s) + end + + def hidden_ruby_owner_name(node) + node.named_children.find { |child| %w[constant identifier type_identifier].include?(child.kind) }&.text + end + + def hidden_ruby_owner_kind(node) + node.children.first&.kind.to_s == "module" ? :module : :class + end + + def ruby_method_visibility(node) + modifier_visibility(node) + end + + def ruby_inline_def_visibility(node) + parent = parent_node(node) + return nil unless parent&.kind == "call" + + target = ruby_call_target(parent) + visibility = target && target[:receiver] == "self" && target[:message]&.to_sym + %i[private protected public].include?(visibility) ? visibility : nil + end + + def ruby_call_target(node) + receiver = named_field(node, "receiver") + method = named_field(node, "method") + message = method&.text || first_named_text(node, %w[identifier constant]) + message ||= normalize_text(node.text) if receiver.nil? && ruby_simple_call_text?(node.text) + return nil unless message + + { + receiver: receiver ? normalize_text(receiver.text) : "self", + message: message, + arguments: ruby_argument_texts(node), + safe_navigation: ruby_safe_navigation_call?(node) + } + end + + def ruby_bare_call_target(node) + return nil unless ruby_bare_call_identifier?(node) + + parent = parent_node(node) + source_node = + if parent&.kind == "call" || next_sibling(node)&.kind == "argument_list" + parent + else + node + end + { + receiver: "self", + message: node.text, + arguments: ruby_argument_texts(source_node), + source_node: source_node, + safe_navigation: source_node && ruby_safe_navigation_call?(source_node) + } + end + + def ruby_bare_body_call_target(node) + return nil if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) + + explicit = ruby_explicit_receiver_body_call_target(node) + return explicit if explicit + + message = node.text.to_s.strip + return nil unless ruby_simple_call_text?(message) + return nil if %w[true false nil self].include?(message) + + { + receiver: "self", + message: message, + arguments: [] + } + end + + def ruby_explicit_receiver_body_call_target(node) + receiver, message = node.named_children + return nil unless receiver && message + return nil unless %w[self constant identifier].include?(receiver.kind) + return nil unless %w[identifier constant].include?(message.kind) + + { + receiver: normalize_text(receiver.text), + message: message.text, + arguments: [] + } + end + + def ruby_explicit_receiver_body_read_node?(node) + return true if node.kind == "block_body" + + node.kind == "body_statement" && parent_node(node)&.kind == "do_block" + end + + def ruby_simple_call_text?(text) + text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) + end + + def ruby_bare_call_identifier?(node) + parent = parent_node(node) + return false unless parent + return false if ruby_declaration_name?(node, parent) + return false if %w[method_parameters block_parameters argument_list assignment].include?(parent.kind) + if parent.kind == "call" + return false if named_field(parent, "receiver") + + first = parent.named_children.first + return first == node && next_sibling(node)&.kind == "argument_list" + end + return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" + return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + + %w[body_statement then else elsif ensure rescue if_modifier unless_modifier].include?(parent.kind) || + next_sibling(node)&.kind == "argument_list" + end + + def ruby_declaration_name?(node, parent) + return true if hidden_ruby_method_definition?(parent) + return true if hidden_ruby_owner_declaration?(parent) + return true if %w[method singleton_method class module].include?(parent.kind) + + false + end + + def ruby_argument_texts(node) + args = named_field(node, "arguments") || node.named_children.find { |child| child.kind == "argument_list" } + return [] unless args + + values = args.named_children.map { |child| normalize_text(child.text) } + return values unless values.empty? + + text = args.text.to_s.strip + text = text[1...-1] if text.start_with?("(") && text.end_with?(")") + text.split(/\s*,\s*/).map { |arg| normalize_text(arg) }.reject(&:empty?) + end + + def ruby_proc_call_target(node) + return nil unless ts_node?(node) && node.kind == "call" + return nil unless node.children.any? { |child| !child.named? && child.text == "." } + return nil unless named_field(node, "method").nil? + + receiver = named_field(node, "receiver") || node.named_children.first + args = named_field(node, "arguments") || + node.named_children.find { |child| child.kind == "argument_list" } + return nil unless receiver && args + + { + receiver: normalize_text(receiver.text), + message: "call", + arguments: ruby_argument_texts(node), + safe_navigation: ruby_safe_navigation_call?(node), + block: call_has_block?(node) + } + end + + def ruby_safe_navigation_call?(node) + ts_node?(node) && node.children.any? { |child| !child.named? && child.text == "&." } + end + + def ruby_t_let_state_declaration(node) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + target = state_target(lhs) + return nil unless target && target[:receiver] == "self" && target[:field].to_s.start_with?("@") + return nil unless rhs&.kind == "call" + + receiver = named_field(rhs, "receiver") || rhs.named_children.first + method = named_field(rhs, "method") || rhs.named_children.find { |child| child.kind == "identifier" } + return nil unless receiver&.text == "T" && method&.text == "let" + + args = named_field(rhs, "arguments") || rhs.named_children.find { |child| child.kind == "argument_list" } + type = args&.named_children&.[](1)&.text + return nil if type.to_s.empty? + + { field: target[:field], type: normalize_text(type) } + end + + def skip_state_write_node?(node) + node.kind == "operator_assignment" || + (assignment_lhs?(node) && next_sibling(node)&.text.to_s != "=" && !ruby_instance_variable_node?(node)) + end + + def skip_state_write_target?(target) + super || target[:field].to_s.start_with?("$") + end + + def state_write_source_node(node) + assignment_lhs?(node) ? (parent_node(node) || node) : super + end + + def direct_state_ref(node) + node.text if ruby_state_variable_node?(node) + end + + def hidden_if?(node) + return false unless ts_node?(node) + return false unless %w[expression_statement block body_statement].include?(node.kind) + + %w[if unless].include?(first_token_kind(node)) + end + + def hidden_modifier_if?(node) + return false unless ts_node?(node) + return false unless node.kind == "body_statement" + + seen_named = false + node.children.any? do |child| + seen_named ||= child.named? + seen_named && !child.named? && %w[if unless].include?(child.kind) + end + end + + def modifier_condition(node) + node.named_children.last + end + + def hidden_case?(node) + return false unless ts_node?(node) + return false unless %w[body_statement block_body argument_list].include?(node.kind) + + first_token_kind(node) == "case" + end + + def hidden_match?(node) + node.kind == "expression_statement" && + first_token_kind(node) == "match" && + node.named_children.any? { |child| child.kind == "match_block" } + end + + def case_pattern_texts(patterns) + texts = super + return texts unless texts.any? { |text| text.start_with?("*") } + + out = [] + pending_plain = [] + texts.each_with_index do |text, index| + if text.start_with?("*") + out << pending_plain.join(", ") unless pending_plain.empty? + pending_plain = [] + out << if texts.size == 1 || index.positive? + text.delete_prefix("*") + else + text + end + else + pending_plain << text + end + end + out << pending_plain.join(", ") unless pending_plain.empty? + out + end + + def ruby_state_variable_target(node) + return nil unless ruby_state_variable_node?(node) + + { receiver: "self", field: node.text } + end + + def ruby_state_variable_node?(node) + return false unless ts_node?(node) + return false if ruby_embedded_text_node?(node) + return true if %w[instance_variable global_variable].include?(node.kind) + + node.named_children.empty? && node.text.to_s.match?(/\A[@$][A-Za-z_]\w*[!?=]?\z/) + end + + def ruby_embedded_text_node?(node) + current = node + while ts_node?(current) + return true if %w[string string_content heredoc_body simple_symbol symbol delimited_symbol].include?(current.kind) + + current = parent_node(current) + end + false + end + + def ruby_instance_variable_node?(node) + ts_node?(node) && node.kind == "instance_variable" + end + + def preceding_ruby_signature(document, node) + cursor = line(node) - 2 + lines = document.lines + cursor -= 1 while cursor >= 0 && lines[cursor].to_s.strip.empty? + return "" if cursor.negative? + + stripped = lines[cursor].to_s.strip + if stripped == "end" + start = cursor + while start >= 0 + text = lines[start].to_s.strip + return normalize_text(lines[start..cursor].join("\n")) if text == "sig do" + return "" if start != cursor && text.match?(/\A(?:def|class|module)\b/) + + start -= 1 + end + return "" if start.negative? + end + + return normalize_text(stripped) if stripped.start_with?("sig ") + return "" unless stripped == "}" || stripped.end_with?("}") + + start = cursor + while start >= 0 + text = lines[start].to_s.strip + return normalize_text(lines[start..cursor].join("\n")) if text.start_with?("sig ") + return "" if text.match?(/\A(?:def|class|module)\b/) + + start -= 1 + end + "" + end + + def method_param_types(document) + types_by_method = {} + pending_sig = +"" + document.lines.each do |line| + pending_sig << line if pending_sig_active?(line, pending_sig) + if (match = line.match(/\A\s*def\s+([A-Za-z_]\w*[!?=]?)(?:\s|\(|$)/)) + types_by_method[match[1]] = sig_param_types(pending_sig) + pending_sig = +"" + end + end + types_by_method + end + + def pending_sig_active?(line, pending_sig) + !pending_sig.empty? || line.match?(/\A\s*sig\b/) + end + + def sig_param_types(sig_source) + match = sig_source.match(/params\s*\((.*?)\)/m) + return {} unless match + + match[1].scan(/([A-Za-z_]\w*)\s*:\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)/).to_h + end + + def ruby_immutable_struct_readers(lines) + readers = Hash.new { |h, k| h[k] = Set.new } + class_stack = [] + lines.each do |line| + if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) + class_stack << match[1] + next + end + if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\b/)) + readers[class_stack.last].add(match[1].to_sym) + next + end + class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + end + readers + end + + def ruby_immutable_struct_reader_types(lines) + reader_types = Hash.new { |h, k| h[k] = {} } + class_stack = [] + lines.each do |line| + if (match = line.match(/\A\s*class\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*<\s*T::Struct\b/)) + class_stack << match[1] + next + end + if class_stack.any? && (match = line.match(/\A\s*const\s+:([A-Za-z_]\w*)\s*,\s*([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\b/)) + reader_types[class_stack.last][match[1].to_sym] = match[2] + next + end + class_stack.pop if class_stack.any? && line.match?(/\A\s*end\s*(?:#.*)?\z/) + end + reader_types + end + + def ruby_type_aliases(lines) + aliases = {} + lines.each do |line| + if (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*([A-Z]\w*(?:::[A-Z]\w*)*)\s*\}/)) + aliases[match[1]] = match[2] + elsif (match = line.match(/\A\s*([A-Z]\w*)\s*=\s*([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) + aliases[match[1]] = match[2] + end + end + aliases + end + + def apply_ruby_visibility!(out) + functions_by_owner = out.fetch(:function_defs).group_by(&:owner) + calls_by_owner = out.fetch(:call_sites).group_by(&:owner) + functions_by_owner.each do |owner, functions| + calls = Array(calls_by_owner[owner]) + + visibility = :public + events = (functions + ruby_visibility_calls(calls)).sort_by do |event| + [event.line, event.is_a?(CallSite) ? 0 : 1] + end + + events.each do |event| + if event.is_a?(FunctionDef) + event.visibility ||= event.name.to_s.include?(".") ? :public : visibility + elsif event.arguments.to_a.empty? + visibility = event.message.to_sym + else + event.arguments.each do |arg| + name = ruby_visibility_arg_name(arg) + functions.reverse_each do |function| + next unless function.name.to_s == name + + function.visibility = event.message.to_sym + break + end + end + end + end + end + end + + def ruby_visibility_calls(calls) + calls.select do |call| + call.function == "(top-level)" && + call.receiver == "self" && + %w[public protected private].include?(call.message.to_s) + end + end + + def ruby_visibility_arg_name(arg) + arg.to_s.strip + .delete_prefix(":") + .delete_prefix("\"") + .delete_suffix("\"") + .delete_prefix("'") + .delete_suffix("'") + end + end + + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb b/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb new file mode 100644 index 000000000..3bf9ae081 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/ruby_effects.rb @@ -0,0 +1,221 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + RUBY_EFFECT_LEXICON = EffectLexicon.new( + dispatch_mids: %w[send __send__ public_send const_get constantize + instance_variable_get].freeze, + meta_mids: %w[define_method define_singleton_method alias_method + class_eval module_eval instance_eval class_exec + module_exec instance_exec eval const_set + instance_variable_set remove_method undef_method + prepend singleton_class binding].freeze, + method_obj_mids: %i[method public_method instance_method].freeze, + io_consts: %w[File IO Dir FileUtils Open3 Socket TCPSocket UDPSocket + TCPServer UNIXSocket Tempfile Pathname Marshal].freeze, + io_bare: %w[puts print warn gets readline readlines system + exec spawn fork sleep open abort exit exit!].freeze, + dir_context: %w[pwd getwd home].freeze, + context_pairs: { + "Time" => %w[now current], "Date" => %w[today current], + "DateTime" => %w[now current], "Process" => %w[pid ppid uid gid euid], + "Thread" => %w[current list main], "Fiber" => %w[current], + "Random" => %w[rand bytes], "GC" => %w[stat count], + "ObjectSpace" => %w[each_object count_objects] + }.freeze, + context_bare: %w[rand srand].freeze, + callback_set: %w[transaction synchronize lock with_lock unlock + mutex atomic reentrant subscribe callback hook].freeze, + core_consts: %w[String Symbol Integer Float Numeric Rational Complex + Array Hash Set Range Struct Object BasicObject Kernel + Module Class Comparable Enumerable Enumerator Proc Method + UnboundMethod NilClass TrueClass FalseClass Exception + StandardError RuntimeError ArgumentError TypeError + NameError NoMethodError IO File Dir Time Date DateTime + Regexp MatchData Thread Mutex Fiber Process Math GC + ObjectSpace Marshal Random Encoding].freeze + ).freeze + + class RubySyntaxAdapter + def semantic_effect_sites(document) + sites = super + sites.concat(ruby_global_context_sites(document)) + sites.concat(ruby_state_mutation_sites(document)) + sites.concat(ruby_method_hook_sites(document)) + TreeSitterAdapter.walk_document(document, initial_stack(document), self) do |node, stack| + sites.concat(ruby_semantic_effect_sites_for_node(document, node, stack)) + end + sites.uniq { |site| [site.kind, site.detail, site.file, site.function, site.line, site.span] } + end + + private + + def effect_lexicon + RUBY_EFFECT_LEXICON + end + + def ruby_net_receiver?(receiver) + receiver.to_s.sub(/\A::/, "").start_with?("Net::") + end + + def const_effect_site_for_call(call, message) + receiver = call.receiver.to_s.sub(/\A::/, "") + return semantic_effect_site_from_call(call, :hidden_io, "URI.open") \ + if receiver == "URI" && message == "open" + + super + end + + def ruby_global_context_sites(document) + document.state_reads.filter_map do |read| + next unless read.field.to_s.start_with?("$") + next if ruby_global_assignment_read?(document, read) + + SemanticEffectSite.new( + kind: :context_dependency, + detail: read.field, + file: read.file, + function: read.function, + owner: read.owner, + line: read.line, + span: read.span + ) + end + end + + def ruby_global_assignment_read?(document, read) + line_text = document.lines[read.line - 1].to_s + line_text[read.span[3]..].to_s.lstrip.start_with?("=") + end + + def ruby_state_mutation_sites(document) + document.state_writes.filter_map do |write| + next if write.receiver.to_s == "self" + next if write.field.to_s.start_with?("@", "$") + + SemanticEffectSite.new( + kind: :hidden_mutation, + detail: "#{write.field}=", + file: write.file, + function: write.function, + owner: write.owner, + line: write.line, + span: write.span + ) + end + end + + def ruby_method_hook_sites(document) + document.function_defs.filter_map do |function_def| + name = function_def.name.to_s.split(".").last + next unless %w[method_missing respond_to_missing?].include?(name) + + SemanticEffectSite.new( + kind: :metaprogramming, + detail: "def #{name}", + file: function_def.file, + function: function_def.name, + owner: function_def.owner, + line: function_def.line, + span: function_def.span + ) + end + end + + def ruby_semantic_effect_sites_for_node(document, node, stack) + case node.kind + when "yield" + [semantic_effect_site(document, node, stack, :dynamic_dispatch, "yield")] + when "subshell" + [semantic_effect_site(document, node, stack, :hidden_io, "backtick")] + when "singleton_class" + ruby_singleton_class_effect(document, node, stack) + when "element_reference" + ruby_element_reference_effect(document, node, stack) + when "assignment" + ruby_global_assignment_effect(document, node, stack) + + ruby_assignment_effect(document, node, stack) + when "operator_assignment" + ruby_operator_assignment_effect(document, node, stack) + when "binary" + ruby_binary_effect(document, node, stack) + when "body_statement", "block_body" + ruby_flat_statement_effects(document, node, stack) + else + [] + end + end + + def ruby_singleton_class_effect(document, node, stack) + receiver = node.named_children.first + return [] unless receiver + return [] if receiver.text == "self" + + [semantic_effect_site(document, node, stack, :metaprogramming, "class << #{normalize_text(receiver.text)}")] + end + + def ruby_element_reference_effect(document, node, stack) + receiver = node.named_children.first + return [] unless receiver&.text == "ENV" + + [semantic_effect_site(document, node, stack, :context_dependency, "ENV")] + end + + def ruby_assignment_effect(document, node, stack) + lhs = named_field(node, "left") || node.named_children.first + return [] unless lhs&.kind == "element_reference" + return [] if lhs.named_children.first&.text == "ENV" + + [semantic_effect_site(document, node, stack, :hidden_mutation, "[]=")] + end + + def ruby_global_assignment_effect(document, node, stack) + lhs = named_field(node, "left") || node.named_children.first + return [] unless lhs&.kind == "global_variable" + + [semantic_effect_site(document, node, stack, :context_dependency, lhs.text)] + end + + def ruby_operator_assignment_effect(document, node, stack) + lhs = named_field(node, "left") || node.named_children.first + return [] if ruby_local_operator_assignment_lhs?(lhs) + + [semantic_effect_site(document, node, stack, :hidden_mutation, "op-assign")] + end + + def ruby_local_operator_assignment_lhs?(lhs) + return true unless lhs + + %w[identifier instance_variable global_variable].include?(lhs.kind) + end + + def ruby_binary_effect(document, node, stack) + return [] unless direct_operator(node) == "<<" + + [semantic_effect_site(document, node, stack, :hidden_mutation, "<<")] + end + + def ruby_flat_statement_effects(document, node, stack) + operator = direct_operator(node) + case operator + when "<<" + [semantic_effect_site(document, node, stack, :hidden_mutation, "<<")] + when "=" + ruby_flat_element_assignment_effect(document, node, stack, "[]=") + when "+=", "-=", "*=", "/=", "%=", "&&=", "||=" + ruby_flat_element_assignment_effect(document, node, stack, "op-assign") + else + [] + end + end + + def ruby_flat_element_assignment_effect(document, node, stack, detail) + lhs = node.named_children.first + return [] unless lhs&.kind == "element_reference" + return [] if lhs.named_children.first&.text == "ENV" + + [semantic_effect_site(document, node, stack, :hidden_mutation, detail)] + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb b/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb new file mode 100644 index 000000000..b0a26852f --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/ruby_protocols.rb @@ -0,0 +1,368 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + RUBY_PROTOCOL_PATH_LIMIT = 64 + RUBY_PROTOCOL_DECLARATIVE_MIDS = %w[ + abstract! alias_method any attr_accessor attr_reader attr_writer bind + cast checked enum extend final include interface! let must must_because + nilable override overridable params prepend private private_class_method + protected public require require_relative requires_ancestor sealed! sig + type_member type_template untyped unsafe void + ].freeze + RUBY_PROTOCOL_TEST_DSL_MIDS = %w[ + a_kind_of after around before be be_a be_an be_empty be_falsey be_nil + be_truthy change contain_exactly context describe eq eql equal expect + have_attributes have_key have_received it match not_to raise_error + receive subject to + ].freeze + RUBY_PROTOCOL_IGNORED_MIDS = (RUBY_PROTOCOL_DECLARATIVE_MIDS + RUBY_PROTOCOL_TEST_DSL_MIDS).freeze + RUBY_PROTOCOL_OPTIONAL_DIAGNOSTIC_MIDS = %w[ + error! fixable! read_interpolated_string warn! + ].freeze + RUBY_PROTOCOL_MUTATING_MIDS = %w[ + << []= add append clear collect! compact! concat declare delete delete_if + each_key= fill filter! keep_if mark merge! move push reject! replace + resolve shift stamp store unshift update write + ].freeze + RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS = %w[! != !~].freeze + RUBY_PROTOCOL_MUTATING_SUFFIXES = %w[!].freeze + + class RubySyntaxAdapter + def protocol_method_effects(document) + document.function_defs.map do |function_def| + reads = Set.new + writes = Set.new + statements = ruby_function_body_statements(function_def.body) + local_names = ruby_local_names(function_def, statements) + ruby_protocol_collect_state_access(function_def.body, reads, writes, + local_names: local_names, + root: true) + ProtocolMethodEffect.new( + file: function_def.file, + owner: function_def.owner, + name: ruby_protocol_method_name(function_def.name), + line: function_def.line, + reads: reads.to_a.sort, + writes: writes.to_a.sort + ) + end + end + + def protocol_call_paths(document) + document.function_defs.flat_map do |function_def| + statements = ruby_function_body_statements(function_def.body) + local_names = ruby_local_names(function_def, statements) + ruby_protocol_paths_for_statements(statements, local_names: local_names).map do |path| + ProtocolMethodPath.new( + file: function_def.file, + owner: function_def.owner, + name: ruby_protocol_method_name(function_def.name), + line: function_def.line, + calls: path.calls + ) + end + end + end + + private + + def ruby_protocol_method_name(name) + name.to_s.split(".").last + end + + def ruby_protocol_collect_state_access(node, reads, writes, local_names:, root: false) + return unless ts_node?(node) + return if !root && ruby_protocol_nested_boundary?(node) + + if ruby_flat_assignment_statement?(node) + lhs = node.named_children.first + rhs = node.named_children[1] + ruby_protocol_record_write(lhs, writes, local_names) + ruby_protocol_collect_state_access(rhs, reads, writes, local_names: local_names) + return + end + + case node.kind + when "assignment" + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + ruby_protocol_record_write(lhs, writes, local_names) + ruby_protocol_collect_state_access(rhs, reads, writes, local_names: local_names) + return + when "operator_assignment" + lhs = named_field(node, "left") || node.named_children.first + if (state = ruby_protocol_state_target(lhs, local_names)) + reads << state + writes << state + end + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + ruby_protocol_collect_state_access(rhs, reads, writes, local_names: local_names) + return + when "instance_variable" + reads << ruby_protocol_normalize_state(node.text) + when "call" + ruby_protocol_collect_call_state(node, reads, writes, local_names) + when "identifier" + reads << ruby_protocol_normalize_state(node.text) if ruby_protocol_bare_reader?(node, local_names) + end + + node.named_children.each do |child| + ruby_protocol_collect_state_access(child, reads, writes, local_names: local_names) + end + end + + def ruby_protocol_collect_call_state(node, reads, writes, local_names) + target = ruby_proc_call_target(node) || ruby_call_target(node) + return unless target + + mid = target[:message].to_s + receiver = target[:receiver].to_s + if receiver == "self" && target[:arguments].to_a.empty? && + !ruby_protocol_mutating_mid?(mid) && !RUBY_PROTOCOL_IGNORED_MIDS.include?(mid) + reads << ruby_protocol_normalize_state(mid) + end + + return unless ruby_protocol_mutating_mid?(mid) + + token = ruby_protocol_receiver_state_token(receiver, local_names) + writes << token if token + end + + def ruby_protocol_record_write(lhs, writes, local_names) + state = ruby_protocol_state_target(lhs, local_names) + writes << state if state + end + + def ruby_protocol_state_target(node, local_names) + return nil unless ts_node?(node) + + case node.kind + when "instance_variable" + ruby_protocol_normalize_state(node.text) + when "element_reference" + ruby_protocol_receiver_state_token(node.named_children.first&.text, local_names) + when "call" + target = ruby_proc_call_target(node) || ruby_call_target(node) + return nil unless target + + receiver = ruby_protocol_receiver_state_token(target[:receiver], local_names) + field = target[:message].to_s.sub(/=\z/, "") + return ruby_protocol_normalize_state(field) if receiver == "self" + return "#{receiver}.#{field}" if receiver + + nil + else + nil + end + end + + def ruby_protocol_receiver_state_token(receiver, local_names) + text = receiver.to_s + return nil if text.empty? + return "self" if text == "self" + return ruby_protocol_normalize_state(text) if text.start_with?("@") + return ruby_protocol_normalize_state(text) if text.match?(/\A[a-z_]\w*[!?]?\z/) + return nil if local_names.include?(text) + + nil + end + + def ruby_protocol_bare_reader?(node, local_names) + name = node.text.to_s + return false unless name.match?(/\A[a-z_]\w*[!?]?\z/) + return false if local_names.include?(name) + return false if RUBY_PROTOCOL_IGNORED_MIDS.include?(name) + + parent = parent_node(node) + return false unless parent + return false if ruby_declaration_name?(node, parent) + return false if %w[call method_parameters block_parameters argument_list assignment + operator_assignment pair hash_key_symbol].include?(parent.kind) + return false if next_sibling(node)&.text == "=" || prev_sibling(node)&.text == "=" + return false if next_sibling(node)&.text == "." || prev_sibling(node)&.text == "." + return false if next_sibling(node)&.text == ":" || prev_sibling(node)&.text == ":" + + true + end + + def ruby_protocol_paths_for_statements(statements, local_names:) + statements.compact.each_with_object([ruby_protocol_empty_path]) do |statement, paths| + statement_paths = ruby_protocol_paths_for(statement, local_names: local_names) + paths.replace(ruby_protocol_combine_path_lists(paths, statement_paths)) + end + end + + def ruby_protocol_paths_for(node, local_names:) + return [ruby_protocol_empty_path] unless ts_node?(node) + return [ruby_protocol_empty_path] if ruby_protocol_nested_boundary?(node) + + if ruby_path_if_node?(node) + return ruby_protocol_branch_paths(node, local_names: local_names) + end + return ruby_protocol_case_paths(node, local_names: local_names) if ruby_protocol_case_node?(node) + + paths = ruby_protocol_generic_paths(node, local_names: local_names) + return paths unless %w[return break next redo retry].include?(node.kind) + + paths.map { |path| ProtocolPath.new(calls: path.calls, terminal: true) } + end + + def ruby_protocol_branch_paths(node, local_names:) + condition_paths = ruby_protocol_paths_for(ruby_path_condition(node), local_names: local_names) + then_paths = ruby_protocol_body_paths(ruby_path_then_body(node), local_names: local_names) + else_node = ruby_path_else_body(node) + else_paths = else_node ? ruby_protocol_body_paths(else_node, local_names: local_names) : [ruby_protocol_empty_path] + alternatives = then_paths + else_paths + ruby_protocol_combine_path_lists(condition_paths, alternatives) + end + + def ruby_protocol_case_paths(node, local_names:) + subject = ruby_protocol_case_subject(node) + subject_paths = subject ? ruby_protocol_paths_for(subject, local_names: local_names) : [ruby_protocol_empty_path] + branches = ruby_protocol_case_branch_paths(node, local_names: local_names) + ruby_protocol_combine_path_lists(subject_paths, branches.empty? ? [ruby_protocol_empty_path] : branches) + end + + def ruby_protocol_case_subject(node) + first = node.named_children.first + return nil unless first + return nil if %w[when else].include?(first.kind) + + first + end + + def ruby_protocol_case_branch_paths(node, local_names:) + node.named_children.flat_map do |child| + case child.kind + when "when" + pattern_paths = child.named_children.take_while { |part| part.kind != "then" } + .each_with_object([ruby_protocol_empty_path]) do |pattern, paths| + paths.replace(ruby_protocol_combine_path_lists( + paths, + ruby_protocol_paths_for(pattern, local_names: local_names) + )) + end + body = child.named_children.find { |part| part.kind == "then" } + ruby_protocol_combine_path_lists(pattern_paths, ruby_protocol_body_paths(body, local_names: local_names)) + when "else" + ruby_protocol_body_paths(child, local_names: local_names) + else + [] + end + end.first(RUBY_PROTOCOL_PATH_LIMIT) + end + + def ruby_protocol_body_paths(node, local_names:) + return [ruby_protocol_empty_path] unless ts_node?(node) + + if %w[then else body_statement block block_body].include?(node.kind) + return ruby_protocol_paths_for_statements( + node.named_children.reject { |child| child.kind == "comment" }, + local_names: local_names + ) + end + + ruby_protocol_paths_for(node, local_names: local_names) + end + + def ruby_protocol_generic_paths(node, local_names:) + children = ruby_protocol_child_nodes(node) + child_paths = children.each_with_object([ruby_protocol_empty_path]) do |child, paths| + paths.replace(ruby_protocol_combine_path_lists( + paths, + ruby_protocol_paths_for(child, local_names: local_names) + )) + end + + mid = ruby_protocol_internal_call(node, local_names) + return child_paths unless mid + + call_path = ProtocolPath.new(calls: [ruby_protocol_raw_call(mid, node)], terminal: false) + ruby_protocol_combine_path_lists([call_path], child_paths) + end + + def ruby_protocol_child_nodes(node) + return [] if ruby_protocol_nested_boundary?(node) + + case node.kind + when "call" + node.named_children.select { |child| %w[argument_list block do_block].include?(child.kind) } + when "assignment", "operator_assignment" + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + rhs ? [rhs] : [] + else + node.named_children.reject { |child| child.kind == "comment" } + end + end + + def ruby_protocol_internal_call(node, local_names) + target = + case node.kind + when "call" + ruby_proc_call_target(node) || ruby_call_target(node) + when "identifier" + ruby_bare_call_target(node) + end + return nil unless target + return nil unless target[:receiver].to_s == "self" + + mid = target[:message].to_s + return nil if local_names.include?(mid) + return nil if RUBY_PROTOCOL_IGNORED_MIDS.include?(mid) + + mid + end + + def ruby_protocol_raw_call(mid, node) + ProtocolCall.new( + mid: mid, + file: nil, + owner: nil, + defn: nil, + line: line(node), + span: span(node) + ) + end + + def ruby_protocol_combine_path_lists(left_paths, right_paths) + left_paths.flat_map do |path| + if path.terminal + [path] + else + right_paths.map do |right_path| + ProtocolPath.new(calls: path.calls + right_path.calls, terminal: right_path.terminal) + end + end + end.first(RUBY_PROTOCOL_PATH_LIMIT) + end + + def ruby_protocol_empty_path + ProtocolPath.new(calls: [], terminal: false) + end + + def ruby_protocol_case_node?(node) + ts_node?(node) && (node.kind == "case" || hidden_case?(node)) + end + + def ruby_protocol_nested_boundary?(node) + return false unless ts_node?(node) + return true if %w[class module method singleton_method lambda].include?(node.kind) + return true if hidden_ruby_method_definition?(node) || hidden_ruby_owner_declaration?(node) + + false + end + + def ruby_protocol_mutating_mid?(mid) + return false if RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS.include?(mid) + + RUBY_PROTOCOL_MUTATING_MIDS.include?(mid) || + RUBY_PROTOCOL_MUTATING_SUFFIXES.any? { |suffix| mid.end_with?(suffix) } + end + + def ruby_protocol_normalize_state(name) + name.to_s.sub(/\A@/, "").sub(/=\z/, "") + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/rust.rb b/gems/decomplex/lib/decomplex/syntax/rust.rb new file mode 100644 index 000000000..e86d9cdfa --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/rust.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + RUST_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bNone\b/].freeze, + type_guard_patterns: [ + /\b(?:is_some|is_none)\s*\(/, + /\b(?:Some|None)\b/, + /\bmatches!\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\b(?:panic|unreachable|todo|unimplemented)!\s*\(/, + /\breturn\s+Err\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:None|true|false|0|1|break|continue|unreachable!)\s*;?\z/, + /\Areturn\s+(?:None|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class RustSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_item].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + IMPL_OWNER_NODE_KINDS = %w[impl_item].freeze + STRUCT_OWNER_NODE_KINDS = %w[struct_item].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block declaration_list].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = [].freeze + IDENTIFIER_NODE_KINDS = %w[identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = %w[field_identifier].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier self_parameter].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[pattern].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[let_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[field_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter let_declaration function_item struct_item impl_item].freeze + RECEIVER_TYPE_NODE_KINDS = %w[type_identifier generic_type scoped_type_identifier].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression compound_assignment_expr].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_expression].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_expression match_expression for_expression].freeze + LOOP_NODE_KINDS = %w[for_expression].freeze + TEXT_LOOP_NODE_KINDS = %w[expression_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[match_expression].freeze + HIDDEN_MATCH_NODE_KINDS = %w[expression_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[match_expression expression_statement].freeze + IF_NODE_KINDS = %w[if_expression].freeze + CASE_ARM_NODE_KINDS = %w[match_arm].freeze + WHEN_CASE_ARM_NODE_KINDS = %w[match_arm].freeze + CASE_PATTERN_NODE_KINDS = %w[match_pattern pattern].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_item impl_item struct_item].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[match_arm else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + PARENTHESIZED_WRAPPER_NODE_KINDS = %w[parenthesized_expression tuple_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier type_identifier field_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[pub public].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + EXPRESSION_LIST_NODE_KINDS = [].freeze + NAVIGATION_SUFFIX_NODE_KINDS = [].freeze + LITERAL_FIELD_EXPRESSION_NODE_KINDS = %w[field_expression].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression scoped_identifier].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || :private + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/swift.rb b/gems/decomplex/lib/decomplex/syntax/swift.rb new file mode 100644 index 000000000..2eb402927 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/swift.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + SWIFT_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\bnil\b/, + /(?:\?\.|\?\?)/, + /\b(?:if|guard)\s+let\b/, + /\b(?:as\?|is)(?:\s|$)/ + ].freeze, + diagnostic_patterns: [ + /\bthrow\b/, + /\b(?:fatalError|preconditionFailure|assertionFailure|assert|precondition)\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break|continue)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class SwiftSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[navigation_expression directly_assignable_expression simple_identifier].freeze + CLASS_OWNER_NODE_KINDS = %w[class_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[function_value_parameters].freeze + INLINE_PARAMETER_NODE_KINDS = %w[parameter].freeze + FUNCTION_BODY_NODE_KINDS = %w[function_body statements].freeze + NESTED_STATEMENT_WRAPPER_NODE_KINDS = %w[statements].freeze + IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[simple_identifier].freeze + LOCAL_IDENTIFIER_WRAPPER_NODE_KINDS = %w[directly_assignable_expression value_argument pattern].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[property_declaration variable_declaration].freeze + VARIABLE_DECLARATION_NODE_KINDS = %w[variable_declaration directly_assignable_expression].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[property_declaration].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declaration property_declaration function_declaration class_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression control_transfer_statement].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[statements control_structure_body function_body].freeze + COMPARISON_NODE_KINDS = %w[equality_expression comparison_expression conjunction_expression additive_expression multiplicative_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement for_statement switch_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + BRANCH_LOOP_NODE_KINDS = LOOP_NODE_KINDS + CASE_NODE_KINDS = %w[switch_statement].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_statement].freeze + IF_NODE_KINDS = %w[if_statement].freeze + HIDDEN_IF_WRAPPER_NODE_KINDS = %w[statements].freeze + HIDDEN_IF_TOKEN_KINDS = %w[if].freeze + CASE_ARM_NODE_KINDS = %w[switch_entry].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_entry].freeze + CASE_PATTERN_NODE_KINDS = %w[switch_pattern pattern].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration class_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_entry else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default].freeze + BOOLEAN_AND_OPERATORS = %w[&& and].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[conjunction_expression equality_expression comparison_expression].freeze + BOOLEAN_WRAPPER_NODE_KINDS = %w[statements pattern].freeze + ARGUMENT_LIST_NODE_KINDS = %w[call_suffix value_argument].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[simple_identifier type_identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[public pub].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + NAVIGATION_SUFFIX_NODE_KINDS = %w[navigation_suffix].freeze + FIELD_LIKE_NODE_KINDS = %w[navigation_expression directly_assignable_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/typescript.rb b/gems/decomplex/lib/decomplex/syntax/typescript.rb new file mode 100644 index 000000000..6fce63161 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/typescript.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + TYPESCRIPT_LEXICON = JAVASCRIPT_LEXICON + + class TypeScriptSyntaxAdapter < JavaScriptSyntaxAdapter + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax/zig.rb b/gems/decomplex/lib/decomplex/syntax/zig.rb new file mode 100644 index 000000000..fcf985953 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax/zig.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +module Decomplex + module Syntax + ZIG_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnull\b/].freeze, + type_guard_patterns: [ + /\bnull\b/, + /@typeInfo\b/, + /\bif\s*\([^)]*\)\s*\|/ + ].freeze, + diagnostic_patterns: [ + /@panic\s*\(/, + /\bunreachable\b/, + /\breturn\s+error[.\w]*/ + ].freeze, + trivial_patterns: [ + /\A(?:null|true|false|0|1|break|continue|unreachable)\s*;?\z/, + /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze + + class ZigSyntaxAdapter < TreeSitterLanguageAdapter + FUNCTION_NODE_KINDS = %w[function_declaration].freeze + CALL_NODE_KINDS = %w[call_expression].freeze + ADJACENT_CALL_NODE_KINDS = %w[field_expression identifier].freeze + ANONYMOUS_OWNER_NODE_KINDS = %w[struct_declaration].freeze + PARAMETER_LIST_NODE_KINDS = %w[parameters].freeze + FUNCTION_BODY_NODE_KINDS = %w[block block_expression].freeze + IDENTIFIER_NODE_KINDS = %w[identifier].freeze + FIELD_IDENTIFIER_NODE_KINDS = [].freeze + PARAMETER_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + LOCAL_DECLARATION_NODE_KINDS = %w[variable_declaration].freeze + LOCAL_VARIABLE_DECLARATOR_NODE_KINDS = [].freeze + FIELD_DECLARATION_NODE_KINDS = %w[container_field].freeze + BOUND_CONTAINER_PARENT_NODE_KINDS = %w[variable_declaration].freeze + BOUND_CONTAINER_NAME_NODE_KINDS = %w[identifier].freeze + DECLARATION_SITE_PARENT_NODE_KINDS = %w[parameter variable_declaration function_declaration struct_declaration].freeze + ASSIGNMENT_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_STATE_DECLARATION_NODE_KINDS = %w[assignment_expression].freeze + ASSIGNMENT_OPERATOR_TOKENS = %w[= += -= *= /= %=].freeze + PATH_ACTION_NODE_KINDS = %w[call_expression expression_statement return_expression].freeze + SIMPLE_ACTION_WRAPPER_NODE_KINDS = %w[block].freeze + COMPARISON_NODE_KINDS = %w[binary_expression].freeze + BRANCH_NODE_KINDS = %w[if_statement switch_expression for_statement labeled_statement].freeze + LOOP_NODE_KINDS = %w[for_statement].freeze + TEXT_LOOP_NODE_KINDS = %w[labeled_statement].freeze + BRANCH_LOOP_NODE_KINDS = %w[for_statement labeled_statement].freeze + CASE_NODE_KINDS = %w[switch_expression].freeze + BRANCH_CASE_NODE_KINDS = %w[switch_expression].freeze + IF_NODE_KINDS = %w[if_statement].freeze + CASE_ARM_NODE_KINDS = %w[switch_case].freeze + SWITCH_CASE_ARM_NODE_KINDS = %w[switch_case].freeze + CASE_CONTAINER_STOP_NODE_KINDS = %w[function_declaration struct_declaration].freeze + CASE_SUBJECT_SKIP_NODE_KINDS = %w[switch_case else comment].freeze + DEFAULT_CASE_PATTERNS = %w[_ default else].freeze + BOOLEAN_AND_OPERATORS = %w[and &&].freeze + BOOLEAN_CONTAINER_NODE_KINDS = %w[binary_expression].freeze + ARGUMENT_LIST_NODE_KINDS = %w[argument_list arguments].freeze + SELF_CALL_IDENTIFIER_NODE_KINDS = %w[identifier].freeze + SELF_RECEIVER_NAMES = %w[self].freeze + PUBLIC_VISIBILITY_TOKENS = %w[pub public].freeze + ACCESSOR_CALL_NODE_KINDS = [].freeze + LITERAL_FIELD_EXPRESSION_NODE_KINDS = %w[field_expression].freeze + FIELD_LIKE_NODE_KINDS = %w[field_expression].freeze + BLOCK_ARGUMENT_NODE_KINDS = [].freeze + + def visibility(_document, node) + modifier_visibility(node) || :private + end + + def state_declaration(node) + return zig_container_field_declaration(node) if node.kind == "container_field" + + super + end + + private + + def zig_container_field_declaration(node) + name = node.named_children.find { |child| child.kind == "identifier" } + return nil unless name + + { field: name.text, type: declared_type_text(node, name) } + end + end + end +end diff --git a/gems/decomplex/lib/decomplex/syntax_oracle.rb b/gems/decomplex/lib/decomplex/syntax_oracle.rb new file mode 100644 index 000000000..fa891f0c8 --- /dev/null +++ b/gems/decomplex/lib/decomplex/syntax_oracle.rb @@ -0,0 +1,220 @@ +# frozen_string_literal: true + +require "json" +require_relative "syntax" +require_relative "native/command" + +module Decomplex + module SyntaxOracle + FORMAT = "decomplex.syntax-facts.v1" + + module_function + + def project(files, engine: "ruby", language: nil) + paths = Array(files).map(&:to_s) + projection = + case engine.to_s + when "ruby" + project_files(paths, language: language) + when "rust" + rust_project_files(paths, language: language) + else + raise ArgumentError, "unsupported syntax oracle engine: #{engine}" + end + canonical_projection(projection) + end + + def canonical_json(files, engine: "ruby", language: nil) + JSON.pretty_generate(project(files, engine: engine, language: language)) << "\n" + end + + def project_files(files, language: nil) + { + "format" => FORMAT, + "documents" => Array(files).map do |file| + lang = (language || Syntax.language_for(file)).to_sym + project_document(Syntax.parse(file, language: lang)) + end + } + end + + def project_document(document) + { + "file" => logical_file(document.file), + "language" => document.language.to_s, + "functions" => rows(document.function_defs, %i[name owner line span visibility params]), + "owners" => rows(document.owner_defs, %i[name kind line span]), + "calls" => rows( + document.call_sites, + %i[receiver message function owner line span conditional arguments control safe_navigation block] + ), + "state_declarations" => rows(document.state_declarations, %i[field owner type line span]), + "state_param_origins" => rows(document.state_param_origins, %i[field receiver owner param function line span]), + "state_reads" => rows(document.state_reads, %i[field receiver function owner line span]), + "state_writes" => rows(document.state_writes, %i[field receiver function owner line span]), + "decisions" => rows(document.decision_sites, %i[kind members function line span predicate enclosing_span]), + "branch_decisions" => branch_decision_rows(document), + "branch_arms" => rows( + document.branch_arms, + %i[function kind line span decision_line decision_span predicate member body] + ), + "dispatch_sites" => rows(document.dispatch_sites, %i[variant_set arm_members outside function line span]), + "semantic_effects" => rows(document.semantic_effect_sites, %i[kind detail function line span]), + "predicate_bodies" => rows(document.predicate_defs, %i[name owner body line span]), + "comparisons" => comparison_rows(document), + "path_conditions" => rows(document.path_condition_sites, %i[guards action function line span]), + "protocol_method_effects" => rows(document.protocol_method_effects, %i[owner name line reads writes]), + "protocol_call_paths" => protocol_call_path_rows(document), + "clone_candidates" => clone_candidate_rows(document), + "redundant_nil_guards" => rows(document.redundant_nil_guard_findings, %i[defn line span local guard proof]), + "local_methods" => local_method_rows(document), + "local_complexity_scores" => local_complexity_rows(document) + } + end + + def rust_project_files(files, language:) + lang = language || Syntax.language_for(files.first).to_s + JSON.parse(Native::Command.run("syntax-facts", "--language", lang.to_s, *files)) + end + + def canonical_projection(projection) + { + "format" => projection.fetch("format"), + "documents" => Array(projection.fetch("documents")).map { |document| canonical_document(document) } + } + end + + def canonical_document(document) + sections = %w[ + functions owners calls state_declarations state_param_origins state_reads + state_writes decisions branch_decisions branch_arms dispatch_sites + semantic_effects predicate_bodies comparisons path_conditions + protocol_method_effects protocol_call_paths clone_candidates redundant_nil_guards + local_methods local_complexity_scores + ] + out = { + "file" => document.fetch("file"), + "language" => document.fetch("language") + } + sections.each do |section| + rows = Array(document.fetch(section)).map { |row| normalize_value(row) } + out[section] = rows.sort_by { |row| JSON.generate(row) } + end + out + end + + def rows(items, keys) + Array(items).map do |item| + keys.each_with_object({}) do |key, out| + out[key.to_s] = normalize_value(item.public_send(key)) + end + end.sort_by { |row| JSON.generate(row) } + end + + def branch_decision_rows(document) + rows = document.branch_decisions( + immutable_readers: document.immutable_struct_readers, + immutable_reader_types: document.immutable_struct_reader_types, + type_aliases: document.type_aliases + ) + rows(rows, %i[function line span predicate state_refs]) + end + + def local_complexity_rows(document) + document.local_complexity_scores.map do |id, score| + { + "id" => id.to_s, + "score" => normalize_value(score.fetch(:score)), + "signals" => normalize_value(score.fetch(:signals)) + } + end.sort_by { |row| row.fetch("id") } + end + + def comparison_rows(document) + rows(document.comparison_sites, %i[source operator function line span]).map do |row| + row.merge("raw" => row.fetch("source"), "canon_source" => normalize_comparison_source(row.fetch("source"))) + end + end + + def protocol_call_path_rows(document) + document.protocol_call_paths.map do |path| + { + "owner" => path.owner, + "name" => path.name, + "line" => path.line, + "calls" => Array(path.calls).map { |call| normalize_value(call.to_h.slice(:mid, :line, :span)) } + } + end.sort_by { |row| JSON.generate(row) } + end + + def clone_candidate_rows(document) + document.clone_candidates.map do |candidate| + { + "line" => candidate.line, + "span" => normalize_value(candidate.span), + "method_name" => candidate.method_name, + "node_name" => candidate.node_name, + "mass" => candidate.mass, + "fingerprint" => candidate.fingerprint, + "child_fingerprints" => normalize_value(candidate.child_fingerprints), + "child_masses" => normalize_value(candidate.child_masses) + } + end.sort_by { |row| JSON.generate(row) } + end + + def local_method_rows(document) + document.local_methods.map do |method| + { + "id" => method.id, + "owner" => method.owner, + "name" => method.name, + "line" => method.line, + "span" => normalize_value(method.span), + "statements" => Array(method.statements).map do |statement| + normalize_value(statement.to_h.slice(:index, :line, :end_line, :span, :source, + :reads, :writes, :dependencies, :co_uses)) + end, + "boundaries" => Array(method.boundaries).map do |boundary| + normalize_value(boundary.to_h.slice(:before_index, :after_index, :line, :kind, :text)) + end, + "local_contract_assignments" => normalize_value(document.local_contract_assignments(method)) + } + end.sort_by { |row| JSON.generate(row) } + end + + def normalize_comparison_source(source) + text = source.to_s.strip + text = text[1..].to_s.strip if text.start_with?("!") + text = text.sub(/\Aself\./, "").sub(/\A@/, "") + text = text.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") + text.gsub(/\s+/, " ").strip + end + + def normalize_value(value) + case value + when Symbol + value.to_s + when Set + value.to_a.map { |item| normalize_value(item) }.sort_by { |item| JSON.generate(item) } + when Array + value.map { |item| normalize_value(item) } + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + raw_key = value.key?(key) ? key : key.to_sym + out[key] = normalize_value(value.fetch(raw_key)) + end + else + value + end + end + + def logical_file(file) + path = file.to_s.tr("\\", "/") + marker = "gems/decomplex/examples/" + index = path.index(marker) + return path[index..] if index + + path + end + end +end diff --git a/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb b/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb index fc88a5903..1ea2087f2 100644 --- a/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb +++ b/gems/decomplex/lib/decomplex/temporal_ordering_pressure.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -require_relative "ast" +require_relative "syntax" module Decomplex # TemporalOrderingPressure -- classes/modules whose public method @@ -12,98 +12,54 @@ class TemporalOrderingPressure keyword_init: true) def self.scan(files) - rows = [] - files.each do |file| - root, lines = Ast.parse(file) - rows.concat(new(file, lines).scan(root)) + rows = files.flat_map do |file| + document = Syntax.parse(file, parser: "tree_sitter") + new(file, document).scan end rows.sort_by { |h| [-h[:score], -h[:state_methods], h[:file], h[:owner]] } end - def initialize(file, lines) + def initialize(file, document) @file = file - @lines = lines + @document = document end - def scan(root) - out = [] - walk_owners(root, [], out) - out - end - - def walk_owners(node, owners, out) - return unless Ast.node?(node) - - if %i[CLASS MODULE].include?(node.type) - owner = owner_name(node) - methods = owner_methods(node) - row = pressure_row(owner, methods) - out << row if row - node.children.each { |child| walk_owners(child, owners + [owner], out) } - else - node.children.each { |child| walk_owners(child, owners, out) } + def scan + temporal_owners.filter_map do |owner| + row = pressure_row(owner, owner_methods(owner)) + row if row end end - def owner_name(node) - Ast.slice(node.children[0], @lines).to_s.empty? ? "(anonymous)" : Ast.slice(node.children[0], @lines) - end + private - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - stmts = body.type == :BLOCK ? body.children.compact : [body] - visibility = :public - methods = [] - stmts.each do |stmt| - next unless Ast.node?(stmt) - - if visibility_marker?(stmt) - visibility = stmt.children[0].to_sym - elsif %i[DEFN DEFS].include?(stmt.type) - methods << method_state(stmt, visibility) - end - end - methods + def temporal_owners + (@document.owner_defs.map(&:name) + @document.function_defs.map(&:owner)).compact.uniq end - def owner_body(owner_node) - scope = owner_node.children[2] - return nil unless Ast.node?(scope) && scope.type == :SCOPE - - scope.children[2] - end - - def visibility_marker?(node) - node.type == :VCALL && %i[public protected private].include?(node.children[0]) + def owner_methods(owner) + @document.function_defs.select { |function| function.owner == owner }.map do |function| + MethodState.new( + name: function.name, + line: function.line, + span: function.span, + visibility: function.visibility || :public, + reads: state_reads_for(function).uniq.sort, + writes: state_writes_for(function).uniq.sort + ) + end end - def method_state(defn_node, visibility) - reads = [] - writes = [] - collect_state_access(defn_node, reads, writes) - MethodState.new( - name: defn_node.children[defn_node.type == :DEFS ? 1 : 0].to_s, - line: defn_node.first_lineno, - span: [defn_node.first_lineno, defn_node.first_column, - defn_node.last_lineno, defn_node.last_column], - visibility: visibility, - reads: reads.uniq.sort, - writes: writes.uniq.sort - ) + def state_reads_for(function) + @document.state_reads.select do |read| + read.owner == function.owner && read.function == function.name + end.map(&:field) end - def collect_state_access(node, reads, writes) - return unless Ast.node?(node) - - case node.type - when :IASGN - writes << node.children[0].to_s - when :IVAR - reads << node.children[0].to_s - end - node.children.each { |child| collect_state_access(child, reads, writes) } + def state_writes_for(function) + @document.state_writes.select do |write| + write.owner == function.owner && write.function == function.name + end.map(&:field) end def pressure_row(owner, methods) diff --git a/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb b/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb index fba969214..f438bbfd3 100644 --- a/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb +++ b/gems/decomplex/lib/decomplex/weighted_inlined_cognitive_complexity.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true require "set" -require_relative "ast" +require_relative "syntax" require_relative "structural_topology" module Decomplex @@ -9,7 +9,8 @@ module Decomplex # same-owner bare/self helper calls. This catches "small" orchestration # methods whose complexity was moved into private/single-use helpers. class WeightedInlinedCognitiveComplexity - MethodBody = Struct.new(:id, :owner, :name, :file, :line, :span, :node, keyword_init: true) + MethodBody = Struct.new(:id, :owner, :name, :file, :line, :span, :node, + :complexity, keyword_init: true) LocalScore = Struct.new(:id, :owner, :name, :file, :line, :span, :score, :signals, keyword_init: true) Contribution = Struct.new(:callee_id, :callee_name, :score, :weight, :depth, :chain, keyword_init: true) @@ -18,16 +19,6 @@ class WeightedInlinedCognitiveComplexity DEFAULT_MAX_DEPTH = 2 DEPTH_WEIGHTS = [1.0, 1.0, 0.6, 0.35].freeze EDGE_WEIGHTS = { always: 1.0, conditional: 0.75, iterates: 1.15 }.freeze - OWNER_TYPES = %i[CLASS MODULE].freeze - METHOD_TYPES = %i[DEFN DEFS].freeze - SKIP_NESTED_TYPES = %i[CLASS MODULE DEFN DEFS LAMBDA].freeze - BRANCH_TYPES = %i[IF UNLESS].freeze - LOOP_TYPES = %i[WHILE UNTIL FOR ITER].freeze - CASE_TYPES = %i[CASE CASE2].freeze - RESCUE_TYPES = %i[RESCUE RESBODY].freeze - EARLY_EXIT_TYPES = %i[RETURN BREAK NEXT REDO RETRY].freeze - BOOLEAN_TYPES = %i[AND OR].freeze - def self.scan(files, min_score: DEFAULT_MIN_SCORE, min_hidden: DEFAULT_MIN_HIDDEN, max_depth: DEFAULT_MAX_DEPTH) new(files, min_score: min_score, min_hidden: min_hidden, max_depth: max_depth).scan end @@ -40,13 +31,10 @@ def initialize(files, min_score:, min_hidden:, max_depth:) end def scan - parsed = parse_files topology = StructuralTopology.scan(@files) - bodies = parsed.flat_map do |file, (root, lines)| - MethodBodyCollector.new(file, lines).scan(root) - end + bodies = syntax_method_bodies scores = bodies.to_h do |body| - score = LocalScorer.new.score(body.node) + score = body.complexity [body.id, LocalScore.new( id: body.id, owner: body.owner, @@ -64,244 +52,28 @@ def scan private - def parse_files - @files.each_with_object({}) do |file, out| - out[file] = Ast.parse(file) - end - end - - class MethodBodyCollector - def initialize(file, lines) - @file = file - @lines = lines - end - - def scan(root) - out = [] - top_level_methods(root).each do |method_node| - out << method_body(method_node, top_level_owner) - end - walk(root, [], out) - out - end - - private - - def top_level_methods(root) - top_level_statements(root).select { |stmt| Ast.node?(stmt) && METHOD_TYPES.include?(stmt.type) } - end - - def walk(node, owners, out) - return unless Ast.node?(node) - - if OWNER_TYPES.include?(node.type) - owner = (owners + [owner_segment(node)]).join("::") - owner_methods(node).each do |method_node| - out << method_body(method_node, owner) - end - node.children.each { |child| walk(child, owners + [owner_segment(node)], out) } - else - node.children.each { |child| walk(child, owners, out) } - end - end - - def owner_methods(owner_node) - body = owner_body(owner_node) - return [] unless Ast.node?(body) - - owner_statements(body).flat_map do |stmt| - next [] unless Ast.node?(stmt) - - if METHOD_TYPES.include?(stmt.type) - [stmt] - elsif visibility_call?(stmt) - inline_methods(stmt) - else - [] - end + def syntax_method_bodies + @files.flat_map do |file| + document = Syntax.parse(file, parser: "tree_sitter") + score_by_id = document.local_complexity_scores + document.local_methods.map do |method| + method_body(method, complexity: score_by_id.fetch(method.id, { score: 0.0, signals: {} })) end end - - def method_body(node, owner) - name = method_name(node) - MethodBody.new( - id: "#{owner}##{name}", - owner: owner, - name: name, - file: @file, - line: node.first_lineno, - span: [node.first_lineno, node.first_column, node.last_lineno, node.last_column], - node: node - ) - end - - def inline_methods(stmt) - args = stmt.children[1] - return [] unless Ast.node?(args) - - args.children.compact.select { |arg| Ast.node?(arg) && METHOD_TYPES.include?(arg.type) } - end - - def owner_body(owner_node) - scope = owner_node.children[owner_node.type == :CLASS ? 2 : 1] - return nil unless Ast.node?(scope) && scope.type == :SCOPE - - scope.children[2] - end - - def owner_statements(body) - body.type == :BLOCK ? body.children.compact : [body] - end - - def top_level_statements(root) - return [] unless Ast.node?(root) - - root.children.compact.flat_map do |child| - Ast.node?(child) && child.type == :BLOCK ? child.children.compact : [child] - end - end - - def visibility_call?(node) - node.type == :FCALL && StructuralTopology::VISIBILITY_MIDS.include?(node.children[0]) - end - - def method_name(node) - if node.type == :DEFS - receiver = node.children[0] - prefix = Ast.node?(receiver) && receiver.type == :SELF ? "self" : Ast.slice(receiver, @lines) - "#{prefix}.#{node.children[1]}" - else - node.children[0].to_s - end - end - - def owner_segment(node) - text = Ast.slice(node.children[0], @lines) - text.empty? ? "(anonymous)" : text - end - - def top_level_owner - "(top-level:#{@file})" - end end - class LocalScorer - def score(method_node) - signals = Hash.new(0) - { - score: round(score_node(method_node, nesting: 0, signals: signals)), - signals: signals.to_h - } - end - - private - - def score_node(node, nesting:, signals:) - return 0.0 unless Ast.node?(node) - return 0.0 if skip_nested?(node) - - case node.type - when *BRANCH_TYPES - score_branch(node, nesting, signals) - when *LOOP_TYPES - score_loop(node, nesting, signals) - when *CASE_TYPES - score_case(node, nesting, signals) - when *RESCUE_TYPES - score_rescue(node, nesting, signals) - when *EARLY_EXIT_TYPES - score_early_exit(node, nesting, signals) - when *BOOLEAN_TYPES - score_boolean_node(node, nesting, signals) - else - score_children(node, nesting: nesting, signals: signals) - end - end - - def skip_nested?(node) - SKIP_NESTED_TYPES.include?(node.type) && !METHOD_TYPES.include?(node.type) - end - - def score_branch(node, nesting, signals) - signals[:branches] += 1 - signals[:nested] += 1 if nesting.positive? - condition = node.children[0] - positive = node.children[1] - negative = node.children[2] - branch_cost(nesting) + - predicate_cost(condition, signals) + - score_node(positive, nesting: nesting + 1, signals: signals) + - score_node(negative, nesting: nesting + 1, signals: signals) - end - - def score_loop(node, nesting, signals) - signals[:loops] += 1 - signals[:nested] += 1 if nesting.positive? - branch_cost(nesting) + score_children(node, nesting: nesting + 1, signals: signals) - end - - def score_case(node, nesting, signals) - signals[:cases] += 1 - 0.5 + score_case_children(node, nesting, signals) - end - - def score_case_children(node, nesting, signals) - node.children.sum do |child| - if Ast.node?(child) && child.type == :WHEN - score_when(child, nesting, signals) - else - score_node(child, nesting: nesting, signals: signals) - end - end - end - - def score_when(node, nesting, signals) - body = node.children[1] - next_when = node.children[2] - score_node(body, nesting: nesting + 1, signals: signals) + - score_node(next_when, nesting: nesting, signals: signals) - end - - def score_rescue(node, nesting, signals) - signals[:rescues] += 1 - branch_cost(nesting) + score_children(node, nesting: nesting + 1, signals: signals) - end - - def score_early_exit(node, nesting, signals) - signals[:early_exits] += 1 - exit_cost = nesting.positive? ? 0.5 + (nesting * 0.25) : 0.0 - exit_cost + score_children(node, nesting: nesting, signals: signals) - end - - def score_boolean_node(node, nesting, signals) - signals[:boolean_ops] += 1 - 0.25 + score_children(node, nesting: nesting, signals: signals) - end - - def score_children(node, nesting:, signals:) - node.children.sum { |child| score_node(child, nesting: nesting, signals: signals) } - end - - def predicate_cost(node, signals) - bools = boolean_count(node) - signals[:boolean_ops] += bools - bools * 0.5 - end - - def boolean_count(node) - return 0 unless Ast.node?(node) - - own = BOOLEAN_TYPES.include?(node.type) ? 1 : 0 - own + node.children.sum { |child| boolean_count(child) } - end - - def branch_cost(nesting) - 1.0 + nesting - end - - def round(value) - (value * 10).round / 10.0 - end + def method_body(summary, complexity:) + owner = summary.owner == "(top-level)" ? "(top-level:#{summary.file})" : summary.owner + MethodBody.new( + id: "#{owner}##{summary.name}", + owner: owner, + name: summary.name, + file: summary.file, + line: summary.line, + span: summary.span, + node: summary.node, + complexity: complexity + ) end class Analyzer diff --git a/gems/decomplex/ruby_core.json b/gems/decomplex/ruby_core.json new file mode 100644 index 000000000..e69de29bb diff --git a/gems/decomplex/rust/Cargo.lock b/gems/decomplex/rust/Cargo.lock new file mode 100644 index 000000000..00787e223 --- /dev/null +++ b/gems/decomplex/rust/Cargo.lock @@ -0,0 +1,554 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "bitflags" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "cc" +version = "1.2.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "decomplex-rust" +version = "0.1.0" +dependencies = [ + "anyhow", + "regex", + "serde", + "serde_json", + "tempfile", + "tree-sitter", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-kotlin-ng", + "tree-sitter-language", + "tree-sitter-lua", + "tree-sitter-php", + "tree-sitter-python", + "tree-sitter-ruby", + "tree-sitter-rust", + "tree-sitter-swift", + "tree-sitter-typescript", + "tree-sitter-zig", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" + +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "syn" +version = "2.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "tree-sitter" +version = "0.25.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1aac67f1ad71de1d6d39708d34811081c26dfa495658de6c14c34200849357c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-kotlin-ng" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e800ebbda938acfbf224f4d2c34947a31994b1295ee6e819b65226c7b51b4450" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c199356c799a8945965bb5f2c55b2ad9d9aa7c4b4f6e587fe9dea0bc715e5f9c" + +[[package]] +name = "tree-sitter-lua" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea992f4164d83f371ef1239ae178c4d4596c296c09055e9a48bb02a2760403af" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-swift" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ef216011c3e3df4fa864736f347cb8d509b1066cf0c8549fb1fd81ac9832e59" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/gems/decomplex/rust/Cargo.toml b/gems/decomplex/rust/Cargo.toml new file mode 100644 index 000000000..818495631 --- /dev/null +++ b/gems/decomplex/rust/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "decomplex-rust" +version = "0.1.0" +edition = "2021" +description = "Native fact extraction slices for Decomplex" +license = "MIT" + +[[bin]] +name = "decomplex-rust" +path = "src/main.rs" + +[dependencies] +anyhow = "1.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +regex = "1.10" +tree-sitter = "=0.25.8" +tree-sitter-language = "=0.1.3" +tree-sitter-ruby = "=0.23.1" +tree-sitter-python = "=0.25.0" +tree-sitter-javascript = "=0.25.0" +tree-sitter-java = "0.23.5" +tree-sitter-typescript = "0.23.2" +tree-sitter-go = "=0.25.0" +tree-sitter-rust = "=0.24.0" +tree-sitter-zig = "=1.1.2" +tree-sitter-lua = "=0.4.1" +tree-sitter-c = "=0.24.1" +tree-sitter-cpp = "0.23.4" +tree-sitter-c-sharp = "=0.23.5" +tree-sitter-swift = "=0.7.1" +tree-sitter-kotlin-ng = "1.1.0" +tree-sitter-php = "=0.24.2" + +[dev-dependencies] +tempfile = "=3.10.1" diff --git a/gems/decomplex/rust/rust_core.json b/gems/decomplex/rust/rust_core.json new file mode 100644 index 000000000..fb50f4462 --- /dev/null +++ b/gems/decomplex/rust/rust_core.json @@ -0,0 +1 @@ +[{"field":"tv_sec","receiver":"ts","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_clock_gettime","line":147,"span":[147,2,147,23],"owner":"core"},{"field":"tv_nsec","receiver":"ts","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_clock_gettime","line":148,"span":[148,2,148,25],"owner":"core"},{"field":"flags","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_close","line":162,"span":[162,2,162,36],"owner":"core"},{"field":"close_cb","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_close","line":163,"span":[163,2,163,29],"owner":"core"},{"field":"next_closing","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__make_close_pending","line":271,"span":[271,2,271,54],"owner":"core"},{"field":"closing_handles","receiver":"handle->loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__make_close_pending","line":272,"span":[272,2,272,40],"owner":"core"},{"field":"flags","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__finish_close","line":316,"span":[316,2,316,35],"owner":"core"},{"field":"flags","receiver":"handle","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__finish_close","line":338,"span":[338,8,338,41],"owner":"core"},{"field":"closing_handles","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__run_closing_handles","line":373,"span":[373,2,373,30],"owner":"core"},{"field":"stop_flag","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_run","line":489,"span":[489,4,489,23],"owner":"core"},{"field":"watchers","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"maybe_resize","line":900,"span":[900,2,900,27],"owner":"core"},{"field":"nwatchers","receiver":"loop","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"maybe_resize","line":901,"span":[901,2,901,29],"owner":"core"},{"field":"fd","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":945,"span":[945,2,945,12],"owner":"uv__io_t"},{"field":"bits","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":946,"span":[946,2,946,13],"owner":"uv__io_t"},{"field":"events","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":947,"span":[947,2,947,15],"owner":"uv__io_t"},{"field":"pevents","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_init","line":948,"span":[948,2,948,16],"owner":"uv__io_t"},{"field":"pevents","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_start","line":963,"span":[963,2,963,22],"owner":"core"},{"field":"pevents","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_stop","line":1018,"span":[1018,2,1018,23],"owner":"core"},{"field":"events","receiver":"w","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_stop","line":1023,"span":[1023,4,1023,17],"owner":"core"},{"field":"current_timeout","receiver":"lfields","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__io_poll_prepare","line":1073,"span":[1073,2,1073,36],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1103,"span":[1103,2,1103,49],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1104,"span":[1104,2,1104,51],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1106,"span":[1106,2,1106,49],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1107,"span":[1107,2,1107,51],"owner":"core"},{"field":"ru_maxrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1110,"span":[1110,2,1110,37],"owner":"core"},{"field":"ru_ixrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1111,"span":[1111,2,1111,35],"owner":"core"},{"field":"ru_idrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1112,"span":[1112,2,1112,35],"owner":"core"},{"field":"ru_isrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1113,"span":[1113,2,1113,35],"owner":"core"},{"field":"ru_minflt","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1114,"span":[1114,2,1114,37],"owner":"core"},{"field":"ru_majflt","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1115,"span":[1115,2,1115,37],"owner":"core"},{"field":"ru_nswap","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1116,"span":[1116,2,1116,35],"owner":"core"},{"field":"ru_inblock","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1117,"span":[1117,2,1117,39],"owner":"core"},{"field":"ru_oublock","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1118,"span":[1118,2,1118,39],"owner":"core"},{"field":"ru_msgsnd","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1119,"span":[1119,2,1119,37],"owner":"core"},{"field":"ru_msgrcv","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1120,"span":[1120,2,1120,37],"owner":"core"},{"field":"ru_nsignals","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1121,"span":[1121,2,1121,41],"owner":"core"},{"field":"ru_nvcsw","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1122,"span":[1122,2,1122,35],"owner":"core"},{"field":"ru_nivcsw","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1123,"span":[1123,2,1123,37],"owner":"core"},{"field":"ru_maxrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1130,"span":[1130,2,1130,27],"owner":"core"},{"field":"ru_maxrss","receiver":"rusage","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getrusage","line":1132,"span":[1132,2,1132,43],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1165,"span":[1165,2,1165,50],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_utime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1166,"span":[1166,2,1166,56],"owner":"core"},{"field":"tv_sec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1167,"span":[1167,2,1167,52],"owner":"core"},{"field":"tv_usec","receiver":"rusage->ru_stime","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_getrusage_thread","line":1168,"span":[1168,2,1168,58],"owner":"core"},{"field":"username","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1395,"span":[1395,2,1395,67],"owner":"core"},{"field":"homedir","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1406,"span":[1406,2,1406,42],"owner":"core"},{"field":"shell","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1410,"span":[1410,2,1410,42],"owner":"core"},{"field":"uid","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1414,"span":[1414,2,1414,22],"owner":"core"},{"field":"gid","receiver":"pwd","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv__getpwuid_r","line":1415,"span":[1415,2,1415,22],"owner":"core"},{"field":"members","receiver":"grp","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_get_group","line":1483,"span":[1483,2,1483,32],"owner":"core"},{"field":"groupname","receiver":"grp","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_get_group","line":1494,"span":[1494,2,1494,25],"owner":"core"},{"field":"gid","receiver":"grp","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_get_group","line":1499,"span":[1499,2,1499,22],"owner":"core"},{"field":"name","receiver":"envitem","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_environ","line":1558,"span":[1558,4,1558,23],"owner":"core"},{"field":"value","receiver":"envitem","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_os_environ","line":1559,"span":[1559,4,1559,28],"owner":"core"},{"field":"sched_priority","receiver":"param","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_thread_setpriority","line":1837,"span":[1837,4,1837,31],"owner":"uv_thread_t"},{"field":"tv_sec","receiver":"tv","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_gettimeofday","line":1936,"span":[1936,2,1936,36],"owner":"core"},{"field":"tv_usec","receiver":"tv","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_gettimeofday","line":1937,"span":[1937,2,1937,38],"owner":"core"},{"field":"tv_sec","receiver":"timeout","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_sleep","line":1945,"span":[1945,2,1945,30],"owner":"core"},{"field":"tv_nsec","receiver":"timeout","file":"/tmp/lineage-c-libuv/src/unix/core.c","function":"uv_sleep","line":1946,"span":[1946,2,1946,47],"owner":"core"}] diff --git a/gems/decomplex/rust/rust_writes.json b/gems/decomplex/rust/rust_writes.json new file mode 100644 index 000000000..fe51488c7 --- /dev/null +++ b/gems/decomplex/rust/rust_writes.json @@ -0,0 +1 @@ +[] diff --git a/gems/decomplex/rust/src/bin/dump_ast.rs b/gems/decomplex/rust/src/bin/dump_ast.rs new file mode 100644 index 000000000..a0094175c --- /dev/null +++ b/gems/decomplex/rust/src/bin/dump_ast.rs @@ -0,0 +1,101 @@ +use anyhow::{bail, Result}; +use decomplex_rust::decomplex::ast::{self, Child, Node}; +use decomplex_rust::decomplex::syntax::Language; +use serde_json::{json, Value}; +use std::env; +use std::fs; +use std::path::PathBuf; +use tree_sitter::{Language as TreeSitterLanguage, Parser}; + +fn main() -> Result<()> { + let mut args = env::args().skip(1).collect::>(); + let raw = args.first().map(|arg| arg == "--raw").unwrap_or(false); + if raw { + args.remove(0); + } + let mut args = args.into_iter(); + let language = args + .next() + .ok_or_else(|| anyhow::anyhow!("usage: dump_ast [--raw] LANGUAGE FILE"))?; + let file = args + .next() + .ok_or_else(|| anyhow::anyhow!("usage: dump_ast [--raw] LANGUAGE FILE"))?; + if args.next().is_some() { + bail!("usage: dump_ast [--raw] LANGUAGE FILE"); + } + + let language = Language::parse(&language)?; + let file = PathBuf::from(file); + if raw { + let source = fs::read_to_string(&file)?; + let mut parser = Parser::new(); + parser.set_language(&language_grammar(language))?; + let tree = parser + .parse(&source, None) + .ok_or_else(|| anyhow::anyhow!("tree-sitter produced no tree"))?; + println!( + "{}", + serde_json::to_string(&raw_node_value(tree.root_node(), &source))? + ); + } else { + let (root, _lines) = ast::parse_with_language(&file, language)?; + println!("{}", serde_json::to_string(&node_value(&root))?); + } + Ok(()) +} + +fn node_value(node: &Node) -> Value { + json!({ + "type": node.r#type, + "children": node.children.iter().map(child_value).collect::>(), + "first_lineno": node.first_lineno, + "first_column": node.first_column, + "last_lineno": node.last_lineno, + "last_column": node.last_column, + "text": node.text, + }) +} + +fn child_value(child: &Child) -> Value { + match child { + Child::Node(node) => node_value(node), + Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), + Child::Integer(value) => Value::Number((*value).into()), + Child::Bool(value) => Value::Bool(*value), + Child::Nil => Value::Null, + } +} + +fn raw_node_value(node: tree_sitter::Node<'_>, source: &str) -> Value { + let mut cursor = node.walk(); + json!({ + "kind": node.kind(), + "named": node.is_named(), + "start_byte": node.start_byte(), + "end_byte": node.end_byte(), + "start": {"row": node.start_position().row, "column": node.start_position().column}, + "end": {"row": node.end_position().row, "column": node.end_position().column}, + "text": node.utf8_text(source.as_bytes()).unwrap_or(""), + "children": node.children(&mut cursor).map(|child| raw_node_value(child, source)).collect::>(), + }) +} + +fn language_grammar(language: Language) -> TreeSitterLanguage { + match language { + Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), + Language::Python => tree_sitter_python::LANGUAGE.into(), + Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::Java => tree_sitter_java::LANGUAGE.into(), + Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Language::Swift => tree_sitter_swift::LANGUAGE.into(), + Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(), + Language::Go => tree_sitter_go::LANGUAGE.into(), + Language::Rust => tree_sitter_rust::LANGUAGE.into(), + Language::Zig => tree_sitter_zig::LANGUAGE.into(), + Language::Lua => tree_sitter_lua::LANGUAGE.into(), + Language::C => tree_sitter_c::LANGUAGE.into(), + Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), + Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Language::Php => tree_sitter_php::LANGUAGE_PHP.into(), + } +} diff --git a/gems/decomplex/rust/src/decomplex/architecture_test.rs b/gems/decomplex/rust/src/decomplex/architecture_test.rs new file mode 100644 index 000000000..6c98a1f57 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/architecture_test.rs @@ -0,0 +1,422 @@ +use std::fs; +use std::path::{Path, PathBuf}; + +fn crate_src() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("src/decomplex") +} + +fn detector_files() -> Vec { + rust_files(crate_src().join("detectors")) +} + +fn post_syntax_consumer_files() -> Vec { + let mut files = detector_files(); + files.extend( + [ + "convergence.rs", + "delta.rs", + "report.rs", + "report_facts.rs", + "report_value.rs", + "root_cause.rs", + "sarif.rs", + ] + .iter() + .map(|name| crate_src().join(name)), + ); + files +} + +fn rust_files(dir: PathBuf) -> Vec { + let mut files = fs::read_dir(&dir) + .unwrap_or_else(|err| panic!("read {}: {err}", dir.display())) + .map(|entry| entry.expect("rust file entry").path()) + .filter(|path| path.extension().and_then(|ext| ext.to_str()) == Some("rs")) + .collect::>(); + files.sort(); + files +} + +#[test] +fn every_supported_language_has_a_syntax_adapter_file() { + let adapters = crate_src().join("syntax/adapters"); + let expected = [ + "c.rs", + "cpp.rs", + "csharp.rs", + "go.rs", + "java.rs", + "javascript.rs", + "kotlin.rs", + "lua.rs", + "php.rs", + "python.rs", + "ruby.rs", + "rust.rs", + "swift.rs", + "typescript.rs", + "zig.rs", + ]; + + for file in expected { + assert!( + adapters.join(file).is_file(), + "missing syntax adapter file {}", + adapters.join(file).display() + ); + } +} + +#[test] +fn every_supported_language_has_an_ast_adapter_file() { + let adapters = crate_src().join("ast/adapters"); + let expected = [ + "c.rs", + "cpp.rs", + "csharp.rs", + "go.rs", + "java.rs", + "javascript.rs", + "kotlin.rs", + "lua.rs", + "php.rs", + "python.rs", + "ruby.rs", + "rust.rs", + "swift.rs", + "typescript.rs", + "zig.rs", + ]; + + for file in expected { + assert!( + adapters.join(file).is_file(), + "missing AST adapter file {}", + adapters.join(file).display() + ); + } +} + +#[test] +fn tree_sitter_adapter_does_not_define_concrete_language_profiles() { + let path = crate_src().join("syntax/tree_sitter_adapter.rs"); + let source = fs::read_to_string(&path).expect("read tree_sitter_adapter.rs"); + let forbidden = [ + "default_profile!", + "struct RubyProfile", + "struct PythonProfile", + "struct JavaScriptProfile", + "struct JavaProfile", + "struct TypeScriptProfile", + "struct SwiftProfile", + "struct KotlinProfile", + "struct GoProfile", + "struct RustProfile", + "struct ZigProfile", + "struct LuaProfile", + "struct CProfile", + "struct CppProfile", + "struct CSharpProfile", + "struct PhpProfile", + ]; + + for pattern in forbidden { + assert!( + !source.contains(pattern), + "{} should live in syntax/adapters, not tree_sitter_adapter.rs", + pattern + ); + } +} + +#[test] +fn ast_normalizer_does_not_define_a_language_adapter_enum() { + let path = crate_src().join("ast.rs"); + let source = fs::read_to_string(&path).expect("read ast.rs"); + for pattern in [ + "enum TreeSitterNormalizationAdapter", + "impl TreeSitterNormalizationAdapter", + "TreeSitterNormalizationAdapter::", + ] { + assert!( + !source.contains(pattern), + "{} should live as polymorphic ast/adapters implementations", + pattern + ); + } +} + +#[test] +fn ast_adapters_do_not_delegate_through_a_language_kind_selector() { + let adapters = crate_src().join("ast/adapters"); + for entry in fs::read_dir(&adapters).expect("read ast adapters dir") { + let path = entry.expect("ast adapter entry").path(); + if path.extension().and_then(|ext| ext.to_str()) != Some("rs") { + continue; + } + let source = fs::read_to_string(&path).expect("read ast adapter"); + for pattern in [ + "TreeSitterNormalizationAdapter", + "fn kind(&self)", + "self.kind()", + ] { + assert!( + !source.contains(pattern), + "{} delegates through {}; put behavior directly in the adapter", + path.display(), + pattern + ); + } + } +} + +#[test] +fn detectors_do_not_import_tree_sitter_directly() { + for path in detector_files() { + let source = production_source(&fs::read_to_string(&path).expect("read detector source")); + assert!( + !source.contains("tree_sitter"), + "{} imports tree_sitter directly; detectors should consume normalized syntax/AST facts", + path.display() + ); + } +} + +#[test] +fn detectors_do_not_cross_the_syntax_boundary() { + let forbidden = [ + ("syntax adapter access", "syntax::adapters"), + ("language profile access", "language_profile("), + ("raw syntax node type", "RawNode"), + ("raw document root access", "document.root"), + ( + "normalized document root access", + "document.normalized_root", + ), + ("document language inspection", "document.language"), + ("Ruby language branch", "Language::Ruby"), + ("Python language branch", "Language::Python"), + ("JavaScript language branch", "Language::JavaScript"), + ("Java language branch", "Language::Java"), + ("TypeScript language branch", "Language::TypeScript"), + ("Swift language branch", "Language::Swift"), + ("Kotlin language branch", "Language::Kotlin"), + ("Go language branch", "Language::Go"), + ("Rust language branch", "Language::Rust"), + ("Zig language branch", "Language::Zig"), + ("Lua language branch", "Language::Lua"), + ("C language branch", "Language::C"), + ("Cpp language branch", "Language::Cpp"), + ("CSharp language branch", "Language::CSharp"), + ("Php language branch", "Language::Php"), + ]; + let mut offenders = Vec::new(); + + for path in detector_files() { + let source = production_source(&fs::read_to_string(&path).expect("read detector source")); + for (reason, pattern) in forbidden { + if source.contains(pattern) { + offenders.push(format!("{}: {}: {}", path.display(), reason, pattern)); + } + } + } + + assert!( + offenders.is_empty(), + "Detectors must consume syntax facts, not language/parser internals:\n{}", + offenders.join("\n") + ); +} + +#[test] +fn post_syntax_consumers_do_not_access_parser_or_adapter_internals() { + let forbidden = [ + ("syntax adapter access", "syntax::adapters"), + ("language profile access", "language_profile("), + ("raw syntax node type", "RawNode"), + ("tree-sitter access", "tree_sitter"), + ("raw document root access", "document.root"), + ( + "normalized document root access", + "document.normalized_root", + ), + ]; + let mut offenders = Vec::new(); + + for path in post_syntax_consumer_files() { + let source = production_source(&fs::read_to_string(&path).expect("read consumer source")); + for (reason, pattern) in forbidden { + if source.contains(pattern) { + offenders.push(format!("{}: {}: {}", path.display(), reason, pattern)); + } + } + } + + assert!( + offenders.is_empty(), + "Post-syntax consumers must consume generated facts, not parser/adaptor internals:\n{}", + offenders.join("\n") + ); +} + +#[test] +fn post_syntax_consumers_do_not_branch_on_concrete_languages() { + let forbidden = [ + ("Ruby language branch", "Language::Ruby"), + ("Python language branch", "Language::Python"), + ("JavaScript language branch", "Language::JavaScript"), + ("Java language branch", "Language::Java"), + ("TypeScript language branch", "Language::TypeScript"), + ("Swift language branch", "Language::Swift"), + ("Kotlin language branch", "Language::Kotlin"), + ("Go language branch", "Language::Go"), + ("Rust language branch", "Language::Rust"), + ("Zig language branch", "Language::Zig"), + ("Lua language branch", "Language::Lua"), + ("C language branch", "Language::C"), + ("Cpp language branch", "Language::Cpp"), + ("CSharp language branch", "Language::CSharp"), + ("Php language branch", "Language::Php"), + ]; + let mut offenders = Vec::new(); + + for path in post_syntax_consumer_files() { + let source = production_source(&fs::read_to_string(&path).expect("read consumer source")); + for (reason, pattern) in forbidden { + if source.contains(pattern) { + offenders.push(format!("{}: {}: {}", path.display(), reason, pattern)); + } + } + } + + assert!( + offenders.is_empty(), + "Post-syntax consumers must not encode language-specific branches:\n{}", + offenders.join("\n") + ); +} + +#[test] +fn report_facts_uses_document_detector_apis() { + let path = crate_src().join("report_facts.rs"); + let source = fs::read_to_string(&path).expect("read report_facts.rs"); + assert!( + !source.contains("::scan_files("), + "report_facts.rs must build shared documents once and call detector scan_documents APIs" + ); +} + +#[test] +fn false_simplicity_detector_does_not_own_language_lexicons() { + let path = crate_src().join("detectors/false_simplicity.rs"); + let source = fs::read_to_string(&path).expect("read false_simplicity.rs"); + for pattern in [ + "fn lexicon_for", + "struct Lexicon", + "RUBY_CONTEXT_PAIRS", + "RUBY_CALLBACK_SET", + "RUBY_CORE_CONSTS", + "PYTHON_CONTEXT_PAIRS", + "JS_CONTEXT_PAIRS", + "COMMON_CALLBACK_SET", + ] { + assert!( + !source.contains(pattern), + "{} belongs in syntax/adapters, not the false_simplicity detector", + pattern + ); + } +} + +#[test] +fn state_branch_density_detector_does_not_own_ruby_source_mining() { + let path = crate_src().join("detectors/state_branch_density.rs"); + let source = fs::read_to_string(&path).expect("read state_branch_density.rs"); + for pattern in [ + "T::Struct", + "T\\.type_alias", + "const\\s+:", + "fn immutable_struct_readers", + "fn immutable_struct_reader_types", + "fn type_aliases", + "fn extract_method_param_types", + "fn sig_param_types", + ] { + assert!( + !source.contains(pattern), + "{} belongs in the Ruby syntax adapter, not state_branch_density", + pattern + ); + } +} + +#[test] +fn flay_similarity_detector_does_not_own_clone_fingerprint_grammar() { + let path = crate_src().join("detectors/flay_similarity.rs"); + let source = production_source(&fs::read_to_string(&path).expect("read flay_similarity.rs")); + for pattern in [ + "RawNode", + "CLONE_CANDIDATE_KINDS", + "IDENTIFIER_KINDS", + "LITERAL_KINDS", + "fn candidate_node", + "fn fingerprint", + "fn typed_struct_schema_text", + ] { + assert!( + !source.contains(pattern), + "{} belongs in syntax/adapters, not flay_similarity", + pattern + ); + } +} + +fn production_source(source: &str) -> String { + source + .lines() + .take_while(|line| line.trim() != "#[cfg(test)]") + .collect::>() + .join("\n") +} + +#[test] +fn ast_normalizer_does_not_branch_on_language_after_parser_setup() { + let path = crate_src().join("ast.rs"); + let source = fs::read_to_string(&path).expect("read ast.rs"); + let normalizer_source = source + .split_once("struct TreeSitterNormalizer") + .map(|(_, rest)| rest) + .unwrap_or(&source); + let language_branch_count = [ + "Language::Ruby", + "Language::Python", + "Language::JavaScript", + "Language::Java", + "Language::TypeScript", + "Language::Swift", + "Language::Kotlin", + "Language::Go", + "Language::Rust", + "Language::Zig", + "Language::Lua", + "Language::C", + "Language::Cpp", + "Language::CSharp", + "Language::Php", + "Self::Ruby", + "Self::Python", + "Self::Lua", + "Self::TypeScript", + "Self::Default", + "TreeSitterNormalizationAdapter::Python", + "TreeSitterNormalizationAdapter::Lua", + ] + .iter() + .map(|pattern| normalizer_source.matches(pattern).count()) + .sum::(); + + assert_eq!( + language_branch_count, 0, + "ast.rs normalizer branches on language; put behavior in ast/adapters instead" + ); +} diff --git a/gems/decomplex/rust/src/decomplex/ast-test.rs b/gems/decomplex/rust/src/decomplex/ast-test.rs new file mode 100644 index 000000000..48a12e4f8 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast-test.rs @@ -0,0 +1,20852 @@ +use super::{parse, parse_with_language, Child, Node}; +use crate::decomplex::syntax::Language; +use serde_json::{json, Value}; +use std::collections::BTreeSet; +use std::io::Write; +use std::path::Path; +use std::process::Command; +use tree_sitter::{Node as TreeSitterNode, Parser as TreeSitterParser}; + +fn parse_source(source: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create temp ruby file"); + file.write_all(source.as_bytes()) + .expect("write temp ruby file"); + parse(file.path()).expect("parse temp ruby file").0 +} + +fn parse_language_source(source: &str, language: Language, suffix: &str) -> Node { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create temp source file"); + file.write_all(source.as_bytes()) + .expect("write temp source file"); + parse_with_language(file.path(), language) + .expect("parse temp source file") + .0 +} + +fn nodes_of_type<'a>(node: &'a Node, node_type: &str, out: &mut Vec<&'a Node>) { + if node.r#type == node_type { + out.push(node); + } + for child in node.children.iter().filter_map(super::node) { + nodes_of_type(child, node_type, out); + } +} + +fn first_node<'a>(root: &'a Node, node_type: &str, text: &str) -> &'a Node { + let mut nodes = Vec::new(); + nodes_of_type(root, node_type, &mut nodes); + nodes + .into_iter() + .find(|node| node.text == text) + .unwrap_or_else(|| panic!("expected {node_type} with text {text:?} in {root:#?}")) +} + +fn child_node(node: &Node, index: usize) -> &Node { + node.children + .get(index) + .and_then(super::node) + .unwrap_or_else(|| panic!("expected child node {index} in {node:#?}")) +} + +fn child_types(node: &Node) -> Vec<&str> { + node.children + .iter() + .filter_map(super::node) + .map(|child| child.r#type.as_str()) + .collect() +} + +fn test_node(node_type: &str, children: Vec) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: node_type.to_string(), + } +} + +fn infix_parts_text( + normalizer: &super::TreeSitterNormalizer<'_>, + node: TreeSitterNode<'_>, + source: &str, +) -> Option<(String, String, String)> { + let (left, operator, right) = normalizer.infix_statement_parts(node)?; + Some(( + super::node_text(left, source).to_string(), + operator, + super::node_text(right, source).to_string(), + )) +} + +fn node_value(node: &Node) -> Value { + json!({ + "type": node.r#type, + "children": node.children.iter().map(child_value).collect::>(), + "first_lineno": node.first_lineno, + "first_column": node.first_column, + "last_lineno": node.last_lineno, + "last_column": node.last_column, + "text": node.text, + }) +} + +fn child_value(child: &Child) -> Value { + match child { + Child::Node(node) => node_value(node), + Child::Symbol(value) | Child::String(value) => Value::String(value.clone()), + Child::Integer(value) => Value::Number((*value).into()), + Child::Bool(value) => Value::Bool(*value), + Child::Nil => Value::Null, + } +} + +fn children_value(children: &[Child]) -> Value { + Value::Array(children.iter().map(child_value).collect()) +} + +fn ruby_language_name(language: Language) -> &'static str { + match language { + Language::Ruby => "ruby", + Language::Python => "python", + Language::JavaScript => "javascript", + Language::Java => "java", + Language::TypeScript => "typescript", + Language::Swift => "swift", + Language::Kotlin => "kotlin", + Language::Go => "go", + Language::Rust => "rust", + Language::Zig => "zig", + Language::Lua => "lua", + Language::C => "c", + Language::Cpp => "cpp", + Language::CSharp => "csharp", + Language::Php => "php", + } +} + +fn ruby_normalized_value(path: &Path, language: Language) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + root, = Decomplex::Ast.parse(ARGV.fetch(0)) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(root)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(path) + .output() + .expect("run ruby normalizer"); + assert!( + output.status.success(), + "ruby normalizer failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalizer should emit JSON") +} + +fn assert_ruby_parity(source: &str, language: Language, suffix: &str) { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create parity temp source file"); + file.write_all(source.as_bytes()) + .expect("write parity temp source file"); + + let rust = node_value( + &parse_with_language(file.path(), language) + .expect("parse parity temp source file") + .0, + ); + let ruby = ruby_normalized_value(file.path(), language); + assert_eq!(rust, ruby); +} + +fn raw_tree(source: &str, language: Language) -> tree_sitter::Tree { + let mut parser = TreeSitterParser::new(); + parser + .set_language(&super::language_grammar(language)) + .expect("set raw parser language"); + parser.parse(source, None).expect("parse raw source") +} + +fn first_raw_node<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, +) -> TreeSitterNode<'tree> { + if node.kind() == kind && super::node_text(node, source) == text { + return node; + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return found; + } + } + panic!("expected raw node kind={kind:?} text={text:?}"); +} + +fn first_raw_node_opt<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, +) -> Option> { + if node.kind() == kind && super::node_text(node, source) == text { + return Some(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if let Some(found) = first_raw_node_opt(child, source, kind, text) { + return Some(found); + } + } + None +} + +fn nth_raw_node<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + index: usize, +) -> TreeSitterNode<'tree> { + let mut found = Vec::new(); + collect_raw_nodes(node, source, kind, text, &mut found); + *found + .get(index) + .unwrap_or_else(|| panic!("expected raw node kind={kind:?} text={text:?} index={index}")) +} + +fn collect_raw_nodes<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kind: &str, + text: &str, + found: &mut Vec>, +) { + if node.kind() == kind && super::node_text(node, source) == text { + found.push(node); + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_raw_nodes(child, source, kind, text, found); + } +} + +fn ruby_private_predicate( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby predicate temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby predicate temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private predicate"); + assert!( + output.status.success(), + "ruby predicate failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby predicate output should be utf8") + .trim() + == "true" +} + +fn ruby_private_collected_names( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby collected names temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby collected names temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + locals = Set.new + normalizer.send(method, target, locals) + puts JSON.generate(locals.to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby collected names helper"); + assert!( + output.status.success(), + "ruby collected names helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby collected names output should be json") + .into_iter() + .collect() +} + +fn ruby_private_scope_collected_names( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + root: bool, +) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope collected names temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope collected names temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + root = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + locals = Set.new + normalizer.send(:collect_ruby_scope_locals, target, locals, root: root) + puts JSON.generate(locals.to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if root { "true" } else { "false" }) + .output() + .expect("run ruby scope collected names helper"); + assert!( + output.status.success(), + "ruby scope collected names helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby scope collected names output should be json") + .into_iter() + .collect() +} + +fn ruby_private_ruby_scope_locals( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> BTreeSet { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope locals temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope locals temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts JSON.generate(normalizer.send(:ruby_scope_locals, target).to_a.sort) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby scope locals helper"); + assert!( + output.status.success(), + "ruby scope locals helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice::>(&output.stdout) + .expect("ruby scope locals output should be json") + .into_iter() + .collect() +} + +fn ruby_private_with_ruby_scope_trace( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + reset: bool, + initial_stack: &[Vec<&str>], +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby with_ruby_scope temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby with_ruby_scope temp source file"); + let initial_stack_json = + serde_json::to_string(initial_stack).expect("serialize initial local stack"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + reset = ARGV.fetch(3) == "true" + initial = JSON.parse(ARGV.fetch(4)).map { |names| Set.new(names) } + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, initial) + snapshot = lambda do + Array(normalizer.instance_variable_get(:@local_stack)).map { |locals| locals.to_a.sort } + end + before = snapshot.call + inside = nil + result = normalizer.send(:with_ruby_scope, target, reset: reset) do + inside = snapshot.call + "block-result" + end + after = snapshot.call + puts JSON.generate("before" => before, "inside" => inside, "after" => after, "result" => result) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-r", + "set", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if reset { "true" } else { "false" }) + .arg(initial_stack_json) + .output() + .expect("run ruby with_ruby_scope helper"); + assert!( + output.status.success(), + "ruby with_ruby_scope helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby with_ruby_scope output should be json") +} + +fn local_stack_from(names: &[Vec<&str>]) -> Vec> { + names + .iter() + .map(|scope| scope.iter().map(|name| name.to_string()).collect()) + .collect() +} + +fn local_stack_value(stack: &[BTreeSet]) -> Value { + json!(stack + .iter() + .map(|scope| scope.iter().cloned().collect::>()) + .collect::>()) +} + +fn ruby_private_destructured_parameter_targets_value( + source: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create ruby destructured parameter temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby destructured parameter temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + targets = [] + normalizer.send(:collect_destructured_parameter_targets, target, targets) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(targets.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env( + "DECOMPLEX_FORCE_LANGUAGE", + ruby_language_name(Language::Ruby), + ) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby destructured parameter helper"); + assert!( + output.status.success(), + "ruby destructured parameter helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby destructured parameter output should be json") +} + +fn ruby_private_scope_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby scope temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby scope temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + body = mode == "body" ? normalizer.send(:wrap, :BODY, children: [], source: target) : nil + args = mode == "args" ? normalizer.send(:wrap, :ARGS, children: [], source: target) : nil + result = normalizer.send(:scope, body, args: args, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(mode) + .output() + .expect("run ruby scope helper"); + assert!( + output.status.success(), + "ruby scope helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby scope output should be json") +} + +fn ruby_private_list_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby list temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby list temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + item = normalizer.send(:wrap, :ITEM, children: [], source: target) + children = + case mode + when "nil" then nil + when "empty" then [] + when "one" then [item] + else abort "unknown list mode: #{mode}" + end + result = normalizer.send(:list, children, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(mode) + .output() + .expect("run ruby list helper"); + assert!( + output.status.success(), + "ruby list helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby list output should be json") +} + +fn ruby_private_string( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> String { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby string temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby string temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(method, target).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private string helper"); + assert!( + output.status.success(), + "ruby string helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby string helper output should be utf8") + .trim_end_matches(['\r', '\n']) + .to_string() +} + +fn ruby_private_text_predicate(language: Language, method: &str, text: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + language = ARGV.fetch(0).to_sym + text = ARGV.fetch(1) + method = ARGV.fetch(2) + document = Object.new + document.define_singleton_method(:language) { language } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + puts normalizer.send(method, text) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(ruby_language_name(language)) + .arg(text) + .arg(method) + .output() + .expect("run ruby private text predicate"); + assert!( + output.status.success(), + "ruby text predicate failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby text predicate output should be utf8") + .trim() + == "true" +} + +fn ruby_private_text_string(language: Language, method: &str, text: &str) -> String { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + language = ARGV.fetch(0).to_sym + text = ARGV.fetch(1) + method = ARGV.fetch(2) + document = Object.new + document.define_singleton_method(:language) { language } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + puts normalizer.send(method, text).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(ruby_language_name(language)) + .arg(text) + .arg(method) + .output() + .expect("run ruby private text string helper"); + assert!( + output.status.success(), + "ruby text string helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby text string output should be utf8") + .trim_end_matches(['\r', '\n']) + .to_string() +} + +fn ruby_private_ts_node_value(value: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Object.new + document.define_singleton_method(:language) { :ruby } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + target = + case ARGV.fetch(0) + when "nil" + nil + when "string" + "value" + when "normalized_node" + Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") + else + abort "unknown ts_node? probe" + end + puts normalizer.send(:ts_node?, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(value) + .output() + .expect("run ruby private ts_node? value helper"); + assert!( + output.status.success(), + "ruby ts_node? value helper failed for {value}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby ts_node? value output should be utf8") + .trim() + == "true" +} + +fn ruby_private_regex_literal_value(value: &str) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Object.new + document.define_singleton_method(:language) { :ruby } + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, document) + target = + case ARGV.fetch(0) + when "nil" + nil + when "string" + "value" + when "normalized_node" + Decomplex::Ast::Node.new(type: :LIT, children: [], first_lineno: 1, first_column: 0, last_lineno: 1, last_column: 1, text: "1") + else + abort "unknown regex_literal? probe" + end + puts normalizer.send(:regex_literal?, target) ? "true" : "false" + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(value) + .output() + .expect("run ruby private regex_literal? value helper"); + assert!( + output.status.success(), + "ruby regex_literal? value helper failed for {value}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby regex_literal? value output should be utf8") + .trim() + == "true" +} + +fn ruby_private_node_signature( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node signature temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node signature temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(method, target) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private node signature helper"); + assert!( + output.status.success(), + "ruby node signature helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby node signature output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby node signature should be an array"); + Some(( + pair[0] + .as_str() + .expect("node kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("node text should be string") + .to_string(), + )) +} + +fn ruby_private_inline_def_name_after_receiver( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> String { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline def name temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline def name temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver = normalizer.send(:inline_def_receiver, target) + puts normalizer.send(:inline_def_name_after_receiver, target, receiver).to_s + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby inline def name helper"); + assert!( + output.status.success(), + "ruby inline def name helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby inline def name output should be utf8") + .trim() + .to_string() +} + +fn ruby_private_inline_parameter_begin_marker_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline_parameter_begin_marker temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline_parameter_begin_marker temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:inline_parameter_begin_marker, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private inline_parameter_begin_marker helper"); + assert!( + output.status.success(), + "ruby inline_parameter_begin_marker helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby inline_parameter_begin_marker output should be json") +} + +fn ruby_private_prepend_inline_parameter_begin_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + body: &Value, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby prepend_inline_parameter_begin temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby prepend_inline_parameter_begin temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |ts_node| + if ts_node.respond_to?(:kind) + target ||= ts_node if ts_node.kind == target_kind && ts_node.text.to_s == target_text + ts_node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = node(JSON.parse(ARGV.fetch(3))) + result = normalizer.send(:prepend_inline_parameter_begin, target, body) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(body.to_string()) + .output() + .expect("run ruby private prepend_inline_parameter_begin helper"); + assert!( + output.status.success(), + "ruby prepend_inline_parameter_begin helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby prepend_inline_parameter_begin output should be json") +} + +fn ruby_private_local_or_call_for_name_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + name: &str, + local: bool, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby local_or_call_for_name temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby local_or_call_for_name temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + name = ARGV.fetch(3) + local = ARGV.fetch(4) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local ? [Set[name]] : []) + result = normalizer.send(:local_or_call_for_name, name, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(name) + .arg(if local { "true" } else { "false" }) + .output() + .expect("run ruby private local_or_call_for_name helper"); + assert!( + output.status.success(), + "ruby local_or_call_for_name helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby local_or_call_for_name output should be json") +} + +fn ruby_private_ruby_vcall_identifier_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby ruby_vcall_identifier temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby ruby_vcall_identifier temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + puts normalizer.send(:ruby_vcall_identifier?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private ruby_vcall_identifier? helper"); + assert!( + output.status.success(), + "ruby ruby_vcall_identifier? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby ruby_vcall_identifier? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_vcall_identifier_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby vcall_identifier temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby vcall_identifier temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + puts normalizer.send(:vcall_identifier?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private vcall_identifier? helper"); + assert!( + output.status.success(), + "ruby vcall_identifier? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby vcall_identifier? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_normalize_terminal_statement_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + local_names: &[&str], +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_terminal_statement temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_terminal_statement temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + require "set" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + local_names = ARGV.fetch(3).split(",").reject(&:empty?) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalizer.instance_variable_set(:@local_stack, local_names.empty? ? [] : [Set.new(local_names)]) + result = normalizer.send(:normalize_terminal_statement, target) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(local_names.join(",")) + .output() + .expect("run ruby private normalize_terminal_statement helper"); + assert!( + output.status.success(), + "ruby normalize_terminal_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_terminal_statement output should be json") +} + +fn ruby_private_node_list_signature( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> Vec<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node list signature temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node list signature temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = Array(normalizer.send(method, target)) + puts JSON.generate(result.map { |node| [node.kind, node.text.to_s] }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby node list signature helper"); + assert!( + output.status.success(), + "ruby node list signature helper failed for {method}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby node list signature output should be json"); + value + .as_array() + .expect("ruby node list signature should be an array") + .iter() + .map(|item| { + let item = item + .as_array() + .expect("ruby node list item should be an array"); + ( + item[0] + .as_str() + .expect("ruby node list kind should be a string") + .to_string(), + item[1] + .as_str() + .expect("ruby node list text should be a string") + .to_string(), + ) + }) + .collect() +} + +fn ruby_private_dotted_call_parts( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby dotted_call_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby dotted_call_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver, method = normalizer.send(:dotted_call_parts, target) + if receiver + puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private dotted_call_parts helper"); + assert!( + output.status.success(), + "ruby dotted_call_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby dotted_call_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby dotted_call_parts should be an array"); + Some(( + parts[0] + .as_str() + .expect("receiver kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("receiver text should be string") + .to_string(), + parts[2] + .as_str() + .expect("method should be string") + .to_string(), + )) +} + +fn ruby_private_member_parts( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby member_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby member_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + receiver, method = normalizer.send(:member_parts, target) + if receiver + puts JSON.generate([receiver.kind, receiver.text.to_s, method.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private member_parts helper"); + assert!( + output.status.success(), + "ruby member_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby member_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby member_parts should be an array"); + Some(( + parts[0] + .as_str() + .expect("receiver kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("receiver text should be string") + .to_string(), + parts[2] + .as_str() + .expect("method should be string") + .to_string(), + )) +} + +fn ruby_private_named_field_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + field: &str, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby named_field temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby named_field temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + field = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:named_field, target, field) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(field) + .output() + .expect("run ruby private named_field helper"); + assert!( + output.status.success(), + "ruby named_field helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby named_field output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby named_field output should be an array"); + Some(( + pair[0] + .as_str() + .expect("named_field kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("named_field text should be string") + .to_string(), + )) +} + +fn ruby_private_branch_child_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + condition_kind: &str, + condition_text: &str, + index: usize, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby branch_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby branch_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + condition_kind = ARGV.fetch(3) + condition_text = ARGV.fetch(4) + index = Integer(ARGV.fetch(5)) + target = nil + condition = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + condition ||= node if node.kind == condition_kind && node.text.to_s == condition_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + abort "condition node not found" unless condition + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:branch_child, target, condition, index) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(condition_kind) + .arg(condition_text) + .arg(index.to_string()) + .output() + .expect("run ruby private branch_child helper"); + assert!( + output.status.success(), + "ruby branch_child helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby branch_child output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby branch_child output should be an array"); + Some(( + pair[0] + .as_str() + .expect("branch_child kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("branch_child text should be string") + .to_string(), + )) +} + +fn ruby_private_wrap_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + normalized_source: bool, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby wrap temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby wrap temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + normalized_source = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = if normalized_source + normalizer.send(:wrap, :INNER, children: [], source: target) + else + target + end + result = normalizer.send(:wrap, :OUTER, children: [:child], source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if normalized_source { "true" } else { "false" }) + .output() + .expect("run ruby private wrap helper"); + assert!( + output.status.success(), + "ruby wrap helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby wrap output should be json") +} + +fn ruby_private_normalize_method_value( + source: &str, + language: Language, + suffix: &str, + method: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize method temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize method temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + method = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(method, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(method) + .output() + .expect("run ruby private normalize method helper"); + assert!( + output.status.success(), + "ruby normalize method helper failed for {method}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalize method output should be json") +} + +fn ruby_private_normalize_return_node_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + elide_symbol: bool, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize return node temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize return node temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + elide_symbol = ARGV.fetch(3) == "true" + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_return_node, target, elide_symbol: elide_symbol) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + elsif node.is_a?(Array) + node.map { |child| value(child) } + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(if elide_symbol { "true" } else { "false" }) + .output() + .expect("run ruby private normalize_return_node helper"); + assert!( + output.status.success(), + "ruby normalize_return_node helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_return_node output should be json") +} + +fn ruby_private_normalize_body_nodes_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize body nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize body nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + if target_kind == "__root__" + target = document.root + else + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + end + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_body_nodes, target.named_children, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private normalize_body_nodes helper"); + assert!( + output.status.success(), + "ruby normalize_body_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalize_body_nodes output should be json") +} + +fn ruby_private_inline_def_from_argument_list_nil_value( + source: &str, + language: Language, + suffix: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby inline def argument nil temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby inline def argument nil temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:inline_def_from_argument_list, nil) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .output() + .expect("run ruby private inline def argument nil helper"); + assert!( + output.status.success(), + "ruby inline def argument nil helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby inline def argument nil output should be json") +} + +fn ruby_private_assignment_target_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby assignment target temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby assignment target temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:parent_node, target) || target + right_raw = normalizer.send(:assignment_right, source) + right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil + result = normalizer.send(:assignment_target, target, right, source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private assignment target helper"); + assert!( + output.status.success(), + "ruby assignment target helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby assignment target output should be json") +} + +fn ruby_private_normalize_multiple_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby multiple assignment temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby multiple assignment temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left = normalizer.send(:assignment_left, target) + right_raw = normalizer.send(:assignment_right, target) + right = right_raw ? normalizer.send(:normalize_node, right_raw) : nil + result = normalizer.send(:normalize_multiple_assignment, left, right, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private multiple assignment helper"); + assert!( + output.status.success(), + "ruby multiple assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby multiple assignment output should be json") +} + +fn ruby_private_augmented_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + operator: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby augmented assignment value temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby augmented assignment value temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + operator = ARGV.fetch(3).to_sym + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:parent_node, target) || target + right_raw = normalizer.send(:assignment_right, source) + result = normalizer.send(:augmented_assignment_value, target, operator, right_raw, source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(operator) + .output() + .expect("run ruby private augmented assignment value helper"); + assert!( + output.status.success(), + "ruby augmented assignment value helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby augmented assignment value output should be json") +} + +fn ruby_private_logical_operator_assignment_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby logical operator assignment temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby logical operator assignment temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left = normalizer.send(:assignment_left, target) + right_raw = normalizer.send(:assignment_right, target) + right = normalizer.send(:normalize_node, right_raw) + operator = normalizer.send(:operator_assignment_operator, target) + result = normalizer.send(:normalize_logical_operator_assignment, left, operator, right, source: target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private logical operator assignment helper"); + assert!( + output.status.success(), + "ruby logical operator assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby logical operator assignment output should be json") +} + +fn ruby_private_call_arguments_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + function_mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby call arguments temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby call arguments temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + function_mode = ARGV.fetch(3) + target = nil + fallback_target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + fallback_target ||= node if node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target ||= fallback_target + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + function = + case function_mode + when "auto" + normalizer.send(:named_field, target, "function") || + normalizer.send(:named_field, target, "call") || + target.named_children.first + when "none" + nil + else + abort "unknown function mode: #{function_mode.inspect}" + end + result = normalizer.send(:call_arguments, target, function) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(function_mode) + .output() + .expect("run ruby private call arguments helper"); + assert!( + output.status.success(), + "ruby call arguments helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby call arguments output should be json") +} + +fn ruby_private_normalize_call_without_block_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + block_mode: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_call_without_block temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_call_without_block temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + block_mode = ARGV.fetch(3) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + block = + case block_mode + when "auto" + normalizer.send(:call_block, target) + when "none" + nil + else + abort "unknown block mode: #{block_mode.inspect}" + end + result = normalizer.send(:normalize_call_without_block, target, block) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(block_mode) + .output() + .expect("run ruby private normalize_call_without_block helper"); + assert!( + output.status.success(), + "ruby normalize_call_without_block helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby normalize_call_without_block output should be json") +} + +fn ruby_private_normalize_patterns_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby normalize_patterns temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby normalize_patterns temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:normalize_patterns, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private normalize_patterns helper"); + assert!( + output.status.success(), + "ruby normalize_patterns helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby normalize_patterns output should be json") +} + +fn ruby_private_command_arguments_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby command arguments temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby command arguments temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + fallback_target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + fallback_target ||= node if node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target ||= fallback_target + abort "target node not found: #{target_kind} #{target_text.inspect}" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:command_arguments, target) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(result.map { |node| value(node) }) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private command arguments helper"); + assert!( + output.status.success(), + "ruby command arguments helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby command arguments output should be json") +} + +fn ruby_private_const_for_nil_value(source: &str, language: Language, suffix: &str) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby const_for nil temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby const_for nil temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:const_for, nil) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .output() + .expect("run ruby private const_for nil helper"); + assert!( + output.status.success(), + "ruby const_for nil helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby const_for nil output should be json") +} + +fn ruby_private_source_before_child_wrap_value( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + child_kind: &str, + child_text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_before_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_before_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + child_kind = ARGV.fetch(3) + child_text = ARGV.fetch(4) + target = nil + child = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + child ||= node if node.kind == child_kind && node.text.to_s == child_text + node.named_children.each { |next_child| walk.call(next_child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + abort "child node not found" unless child + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + source = normalizer.send(:source_before_child, target, child) + result = normalizer.send(:wrap, :OUTER, children: [], source: source) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(child_kind) + .arg(child_text) + .output() + .expect("run ruby private source_before_child helper"); + assert!( + output.status.success(), + "ruby source_before_child helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby source_before_child output should be json") +} + +fn ruby_private_source_from_nodes_value( + source: &str, + language: Language, + suffix: &str, + first_kind: &str, + first_text: &str, + last_kind: &str, + last_text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_from_nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_from_nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + first_kind = ARGV.fetch(1) + first_text = ARGV.fetch(2) + last_kind = ARGV.fetch(3) + last_text = ARGV.fetch(4) + first_node = nil + last_node = nil + walk = lambda do |node| + if node.respond_to?(:kind) + first_node ||= node if node.kind == first_kind && node.text.to_s == first_text + last_node = node if node.kind == last_kind && node.text.to_s == last_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "first node not found: #{first_kind} #{first_text.inspect}" unless first_node + abort "last node not found: #{last_kind} #{last_text.inspect}" unless last_node + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + result = normalizer.send(:source_from_nodes, first_node, last_node) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(first_kind) + .arg(first_text) + .arg(last_kind) + .arg(last_text) + .output() + .expect("run ruby private source_from_nodes helper"); + assert!( + output.status.success(), + "ruby source_from_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby source_from_nodes output should be json") +} + +fn ruby_private_source_from_normalized_nodes_value( + source: &str, + language: Language, + suffix: &str, + first_kind: &str, + first_text: &str, + last_kind: &str, + last_text: &str, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby source_from_normalized_nodes temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby source_from_normalized_nodes temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + first_kind = ARGV.fetch(1) + first_text = ARGV.fetch(2) + last_kind = ARGV.fetch(3) + last_text = ARGV.fetch(4) + first_raw = nil + last_raw = nil + walk = lambda do |node| + if node.respond_to?(:kind) + first_raw ||= node if node.kind == first_kind && node.text.to_s == first_text + last_raw ||= node if node.kind == last_kind && node.text.to_s == last_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "first node not found" unless first_raw + abort "last node not found" unless last_raw + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + first_node = normalizer.send(:wrap, :FIRST, children: [], source: first_raw) + last_node = normalizer.send(:wrap, :LAST, children: [], source: last_raw) + result = normalizer.send(:source_from_normalized_nodes, first_node, last_node) + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(first_kind) + .arg(first_text) + .arg(last_kind) + .arg(last_text) + .output() + .expect("run ruby private source_from_normalized_nodes helper"); + assert!( + output.status.success(), + "ruby source_from_normalized_nodes helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby source_from_normalized_nodes output should be json") +} + +fn ruby_private_dynamic_string_source_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby dynamic_string_source temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby dynamic_string_source temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + normalized = target.named_children.map { |child| [child, normalizer.send(:normalize_node, child)] } + result = normalizer.send(:dynamic_string_source, normalized) + if result + puts JSON.generate([result.kind, result.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private dynamic_string_source helper"); + assert!( + output.status.success(), + "ruby dynamic_string_source helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby dynamic_string_source output should be json"); + if value.is_null() { + return None; + } + let pair = value + .as_array() + .expect("ruby dynamic_string_source output should be an array"); + Some(( + pair[0] + .as_str() + .expect("dynamic_string_source kind should be string") + .to_string(), + pair[1] + .as_str() + .expect("dynamic_string_source text should be string") + .to_string(), + )) +} + +fn ruby_private_operator_assignment_statement_parts_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<(String, String, String, String, String)> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby operator_assignment_statement_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby operator_assignment_statement_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + left, operator, right = normalizer.send(:operator_assignment_statement_parts, target) + if left && operator && right + puts JSON.generate([left.kind, left.text.to_s, operator.to_s, right.kind, right.text.to_s]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private operator_assignment_statement_parts helper"); + assert!( + output.status.success(), + "ruby operator_assignment_statement_parts helper failed for {language:?} {kind:?} {text:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = serde_json::from_slice(&output.stdout) + .expect("ruby operator_assignment_statement_parts output should be json"); + if value.is_null() { + return None; + } + let parts = value + .as_array() + .expect("ruby operator_assignment_statement_parts output should be an array"); + Some(( + parts[0] + .as_str() + .expect("operator_assignment left kind should be string") + .to_string(), + parts[1] + .as_str() + .expect("operator_assignment left text should be string") + .to_string(), + parts[2] + .as_str() + .expect("operator_assignment operator should be string") + .to_string(), + parts[3] + .as_str() + .expect("operator_assignment right kind should be string") + .to_string(), + parts[4] + .as_str() + .expect("operator_assignment right text should be string") + .to_string(), + )) +} + +fn ruby_private_modifier_parts_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> Option<((String, String), (String, String))> { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby modifier_parts temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby modifier_parts temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + action, condition = normalizer.send(:modifier_parts, target) + if action && condition + puts JSON.generate([[action.kind, action.text.to_s], [condition.kind, condition.text.to_s]]) + else + puts "null" + end + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private modifier_parts helper"); + assert!( + output.status.success(), + "ruby modifier_parts helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby modifier_parts output should be json"); + if value.is_null() { + return None; + } + let pairs = value + .as_array() + .expect("ruby modifier_parts output should be an array"); + let action = pairs[0] + .as_array() + .expect("modifier_parts action should be an array"); + let condition = pairs[1] + .as_array() + .expect("modifier_parts condition should be an array"); + Some(( + ( + action[0] + .as_str() + .expect("modifier_parts action kind should be string") + .to_string(), + action[1] + .as_str() + .expect("modifier_parts action text should be string") + .to_string(), + ), + ( + condition[0] + .as_str() + .expect("modifier_parts condition kind should be string") + .to_string(), + condition[1] + .as_str() + .expect("modifier_parts condition text should be string") + .to_string(), + ), + )) +} + +fn ruby_private_visibility_inline_def_statement_predicate( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby visibility_inline_def_statement temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby visibility_inline_def_statement temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target = nil + walk = lambda do |node| + if node.respond_to?(:kind) + target ||= node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + abort "target node not found" unless target + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:visibility_inline_def_statement?, target, target.named_children.first) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .output() + .expect("run ruby private visibility_inline_def_statement helper"); + assert!( + output.status.success(), + "ruby visibility_inline_def_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby visibility_inline_def_statement output should be utf8") + .trim() + == "true" +} + +fn ruby_private_drop_trailing_nil_statement_value(input: &Value) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + result = normalizer.send(:drop_trailing_nil_statement, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .output() + .expect("run ruby private drop_trailing_nil_statement helper"); + assert!( + output.status.success(), + "ruby drop_trailing_nil_statement helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby drop_trailing_nil_statement output should be json") +} + +fn ruby_private_elide_tail_returns_value(input: &Value, ruby: bool) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + adapter = if ARGV.fetch(1) == "ruby" + Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) + else + Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) + end + normalizer.instance_variable_set(:@normalization_adapter, adapter) + result = normalizer.send(:elide_tail_returns, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .arg(if ruby { "ruby" } else { "other" }) + .output() + .expect("run ruby private elide_tail_returns helper"); + assert!( + output.status.success(), + "ruby elide_tail_returns helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout).expect("ruby elide_tail_returns output should be json") +} + +fn ruby_private_elide_implicit_nil_body_value(input: &Value, ruby: bool) -> Value { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + adapter = if ARGV.fetch(1) == "ruby" + Decomplex::Ast::RubyTreeSitterNormalizationAdapter.new(nil) + else + Decomplex::Ast::TreeSitterNormalizationAdapter.new(nil) + end + normalizer.instance_variable_set(:@normalization_adapter, adapter) + result = normalizer.send(:elide_implicit_nil_body, node(JSON.parse(ARGV.fetch(0)))) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "json", + "-e", + script, + ]) + .arg(input.to_string()) + .arg(if ruby { "ruby" } else { "other" }) + .output() + .expect("run ruby private elide_implicit_nil_body helper"); + assert!( + output.status.success(), + "ruby elide_implicit_nil_body helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby elide_implicit_nil_body output should be json") +} + +fn ruby_private_prepend_rescue_exception_assignment_value( + source: &str, + body: &Value, + assignment: &Value, +) -> Value { + let mut file = tempfile::Builder::new() + .suffix(".rb") + .tempfile() + .expect("create ruby prepend rescue temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby prepend rescue temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def node(value) + return nil if value.nil? + return value unless value.is_a?(Hash) + + Decomplex::Ast::Node.new( + type: value.fetch("type").to_sym, + children: value.fetch("children").map { |child| node(child) }, + first_lineno: value.fetch("first_lineno"), + first_column: value.fetch("first_column"), + last_lineno: value.fetch("last_lineno"), + last_column: value.fetch("last_column"), + text: value.fetch("text") + ) + end + + def value(node) + if node.is_a?(Decomplex::Ast::Node) + { + "type" => node.type.to_s, + "children" => node.children.map { |child| value(child) }, + "first_lineno" => node.first_lineno, + "first_column" => node.first_column, + "last_lineno" => node.last_lineno, + "last_column" => node.last_column, + "text" => node.text.to_s, + } + elsif node.is_a?(Symbol) + node.to_s + else + node + end + end + + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = node(JSON.parse(ARGV.fetch(1))) + assignment = node(JSON.parse(ARGV.fetch(2))) + result = normalizer.send(:prepend_rescue_exception_assignment, body, assignment) + puts JSON.generate(value(result)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", "ruby") + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(body.to_string()) + .arg(assignment.to_string()) + .output() + .expect("run ruby private prepend_rescue_exception_assignment helper"); + assert!( + output.status.success(), + "ruby prepend_rescue_exception_assignment helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + serde_json::from_slice(&output.stdout) + .expect("ruby prepend_rescue_exception_assignment output should be json") +} + +fn ruby_private_symbol_literal_node_predicate( + node_type: Option<&str>, + child_kind: Option<&str>, +) -> bool { + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + def child(kind) + case kind + when "symbol" + :value + when "string" + "value" + when "node" + Decomplex::Ast::Node.new( + type: :NIL, + children: [], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: "NIL" + ) + when "nil" + nil + else + nil + end + end + + node_type = ARGV.fetch(0) + child_kind = ARGV.fetch(1) + target = if node_type == "none" + nil + else + children = child_kind == "none" ? [] : [child(child_kind)] + Decomplex::Ast::Node.new( + type: node_type.to_sym, + children: children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: node_type + ) + end + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + puts normalizer.send(:symbol_literal_node?, target) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .args(["-I", "lib", "-r", "decomplex/ast", "-e", script]) + .arg(node_type.unwrap_or("none")) + .arg(child_kind.unwrap_or("none")) + .output() + .expect("run ruby private symbol_literal_node? helper"); + assert!( + output.status.success(), + "ruby symbol_literal_node? helper failed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby symbol_literal_node? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_same_ts_node_predicate( + source: &str, + language: Language, + suffix: &str, + left_kind: &str, + left_text: &str, + left_index: usize, + right_kind: &str, + right_text: &str, + right_index: usize, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby same_ts_node temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby same_ts_node temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + left_kind = ARGV.fetch(1) + left_text = ARGV.fetch(2) + left_index = ARGV.fetch(3).to_i + right_kind = ARGV.fetch(4) + right_text = ARGV.fetch(5) + right_index = ARGV.fetch(6).to_i + + def matches(root, kind, text) + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == kind && node.text.to_s == text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(root) + found + end + + left = matches(document.root, left_kind, left_text).fetch(left_index) + right = matches(document.root, right_kind, right_text).fetch(right_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:same_ts_node?, left, right) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(left_kind) + .arg(left_text) + .arg(left_index.to_string()) + .arg(right_kind) + .arg(right_text) + .arg(right_index.to_string()) + .output() + .expect("run ruby private same_ts_node? helper"); + assert!( + output.status.success(), + "ruby same_ts_node? helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby same_ts_node? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_parent_named_child_predicate( + source: &str, + language: Language, + suffix: &str, + parent_kind: &str, + parent_text: &str, + parent_index: usize, + child_kind: &str, + child_text: &str, + child_index: usize, +) -> bool { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby parent_named_child temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby parent_named_child temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + parent_kind = ARGV.fetch(1) + parent_text = ARGV.fetch(2) + parent_index = ARGV.fetch(3).to_i + child_kind = ARGV.fetch(4) + child_text = ARGV.fetch(5) + child_index = ARGV.fetch(6).to_i + + def matches(root, kind, text) + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == kind && node.text.to_s == text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(root) + found + end + + parent = matches(document.root, parent_kind, parent_text).fetch(parent_index) + child = matches(document.root, child_kind, child_text).fetch(child_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts normalizer.send(:parent_named_child?, parent, child) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-e", + script, + ]) + .arg(file.path()) + .arg(parent_kind) + .arg(parent_text) + .arg(parent_index.to_string()) + .arg(child_kind) + .arg(child_text) + .arg(child_index.to_string()) + .output() + .expect("run ruby private parent_named_child? helper"); + assert!( + output.status.success(), + "ruby parent_named_child? helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + String::from_utf8(output.stdout) + .expect("ruby parent_named_child? output should be utf8") + .trim() + == "true" +} + +fn ruby_private_node_key_signature( + source: &str, + language: Language, + suffix: &str, + kind: &str, + text: &str, + index: usize, +) -> (String, usize, usize) { + let mut file = tempfile::Builder::new() + .suffix(suffix) + .tempfile() + .expect("create ruby node_key temp source file"); + file.write_all(source.as_bytes()) + .expect("write ruby node_key temp source file"); + let decomplex_dir = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("decomplex rust dir should have gem parent"); + let script = r#" + document = Decomplex::Syntax.parse(ARGV.fetch(0), parser: "tree_sitter") + target_kind = ARGV.fetch(1) + target_text = ARGV.fetch(2) + target_index = ARGV.fetch(3).to_i + found = [] + walk = lambda do |node| + if node.respond_to?(:kind) + found << node if node.kind == target_kind && node.text.to_s == target_text + node.named_children.each { |child| walk.call(child) } + end + end + walk.call(document.root) + target = found.fetch(target_index) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + puts JSON.generate(normalizer.send(:node_key, target)) + "#; + let output = Command::new("ruby") + .current_dir(decomplex_dir) + .env("DECOMPLEX_FORCE_LANGUAGE", ruby_language_name(language)) + .args([ + "-I", + "lib", + "-r", + "decomplex/ast", + "-r", + "decomplex/syntax", + "-r", + "json", + "-e", + script, + ]) + .arg(file.path()) + .arg(kind) + .arg(text) + .arg(index.to_string()) + .output() + .expect("run ruby private node_key helper"); + assert!( + output.status.success(), + "ruby node_key helper failed for {language:?}: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let value: Value = + serde_json::from_slice(&output.stdout).expect("ruby node_key output should be json"); + let key = value + .as_array() + .expect("ruby node_key output should be an array"); + ( + key[0] + .as_str() + .expect("node_key kind should be string") + .to_string(), + key[1] + .as_u64() + .expect("node_key start byte should be integer") as usize, + key[2] + .as_u64() + .expect("node_key end byte should be integer") as usize, + ) +} + +#[test] +fn tree_normalizer_new_initializes_empty_state() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + assert_eq!(normalizer.source, ""); + assert_eq!(normalizer.language, Language::Ruby); + assert!(normalizer.local_stack.is_empty()); + assert_eq!(normalizer.root_span, None); +} + +#[test] +fn normalize_root_matches_ruby_across_tree_normalizer_languages() { + for (source, language, suffix) in [ + ( + "class C\n def each(value)\n yield value\n case value\n when 1 then :one\n else :other\n end\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def gen(value):\n yield value\n other()\n", + Language::Python, + ".py", + ), + ( + "function f(value: number) { switch (value) { case 1: one(); break; default: other(); } return value ? one() : other(); }\n", + Language::TypeScript, + ".ts", + ), + ( + "function f(value)\n if value then\n one()\n else\n other()\n end\n return value\nend\n", + Language::Lua, + ".lua", + ), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn tree_normalizer_yield_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield :item", + ), + ( + "def each\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "def gen():\n yield from items\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield from items", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "block", + "yield item\n other()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.yield_statement(node), + ruby_private_predicate(source, language, suffix, "yield_statement?", kind, text), + "yield_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn yield_argument_list_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":item", + ), + ( + "def call\n foo(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "yield(value);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(value)", + ), + ( + "coroutine.yield(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.yield_argument_list(node), + ruby_private_predicate(source, language, suffix, "yield_argument_list?", kind, text), + "yield_argument_list? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn yield_argument_nodes_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def each\n yield item, other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "item, other", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "yield(value);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(value)", + ), + ( + "coroutine.yield(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .yield_argument_nodes(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "yield_argument_nodes", + kind, + text + ), + "yield_argument_nodes mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn yield_inline_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "yield item;", + ), + ( + "coroutine.yield(item)\n", + Language::Lua, + ".lua", + "function_call", + "coroutine.yield(item)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .yield_inline_arguments(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "yield_inline_arguments", + kind, + text + ), + "yield_inline_arguments mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_yield_argument_list_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield(:item)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "(:item)", + ), + ( + "def each\n yield :item\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "yield_value(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield_argument_list(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield_argument_list", + kind, + text + ), + "normalize_yield_argument_list mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_yield_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield nil", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "yield_expression", + "yield item", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield", + kind, + text + ), + "normalize_yield mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_yield_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield", + ), + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def each\n yield nil\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield nil", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "def gen():\n yield from items\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield from items", + ), + ( + "function* gen() { yield item; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "yield item;", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_yield_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_yield_statement", + kind, + text + ), + "normalize_yield_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_node_dispatch_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def each\n yield item\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "yield item", + ), + ( + "def check\n !flag\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "!flag", + ), + ( + "def gen():\n yield item\n other()\n", + Language::Python, + ".py", + "expression_statement", + "yield item", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); default: other(); }", + ), + ( + "if value then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_node(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_node", + kind, + text + ), + "normalize_node mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn python_yield_statement_in_multi_statement_block_matches_ruby_ast() { + let source = "def gen():\n yield item\n other()\n"; + assert_ruby_parity(source, Language::Python, ".py"); + + let root = parse_language_source(source, Language::Python, ".py"); + let defn = first_node(&root, "DEFN", "def gen():\n yield item\n other()"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!(child_types(body), vec!["YIELD", "EXPRESSION_STATEMENT"]); +} + +#[test] +fn tree_normalizer_super_statement_matches_ruby_private_predicate() { + for (source, kind, text) in [ + ( + "class Child < Parent\n def call\n super\n end\nend\n", + "body_statement", + "super", + ), + ( + "class Child < Parent\n def call\n super :item\n end\nend\n", + "body_statement", + "super :item", + ), + ( + "class Child < Parent\n def call\n value\n end\nend\n", + "body_statement", + "value", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.super_statement(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "super_statement?", + kind, + text + ), + "super_statement? mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_super_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "class Child < Parent\n def call\n super\n end\nend\n", + "body_statement", + "super", + ), + ( + "class Child < Parent\n def call\n super :item\n end\nend\n", + "body_statement", + "super :item", + ), + ( + "class Child < Parent\n def call\n super value\n end\nend\n", + "body_statement", + "super value", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_super_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_super_statement", + kind, + text + ), + "normalize_super_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_super_statement_normalization_matches_ruby_ast() { + let source = "class Child < Parent\n def bare\n super\n end\n def with_arg\n super :item\n end\nend\n"; + assert_ruby_parity(source, Language::Ruby, ".rb"); + + let root = parse_language_source(source, Language::Ruby, ".rb"); + let bare = first_node(&root, "SUPER", "super"); + let with_arg = first_node(&root, "SUPER", "super :item"); + + assert_eq!(bare.children, vec![Child::Nil]); + assert_eq!(child_types(with_arg), vec!["LIST"]); + assert_eq!(child_types(child_node(with_arg, 0)), vec!["LIT"]); +} + +#[test] +fn tree_normalizer_argument_list_element_reference_matches_ruby_private_predicate() { + for (source, text) in [ + ("def indexed\n return items[0]\nend\n", "items[0]"), + ("def indexed\n return obj.foo[0]\nend\n", "obj.foo[0]"), + ("def indexed\n return [0]\nend\n", "[0]"), + ( + "def indexed\n return items[0], other\nend\n", + "items[0], other", + ), + ("def indexed\n return items[]\nend\n", "items[]"), + ( + "def indexed\n return items[0] { nope }\nend\n", + "items[0] { nope }", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_element_reference(node), + ruby_private_predicate( + source, + Language::Ruby, + ".rb", + "argument_list_element_reference?", + "argument_list", + text + ), + "argument_list_element_reference? mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_argument_list_element_reference_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def indexed\n return items[0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0]", + ), + ( + "def indexed\n return obj.foo[0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "obj.foo[0]", + ), + ( + "def indexed\n return [0]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "[0]", + ), + ( + "def indexed\n return items[0], other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0], other", + ), + ( + "def indexed\n return items[0] { nope }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[0] { nope }", + ), + ( + "def indexed():\n return foo(items[0])\n", + Language::Python, + ".py", + "argument_list", + "(items[0])", + ), + ( + "function indexed(){ return foo(items[0]); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(items[0])", + ), + ( + "function indexed() return foo(items[0]) end\n", + Language::Lua, + ".lua", + "arguments", + "(items[0])", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_element_reference(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_element_reference", + kind, + text + ), + "normalize_argument_list_element_reference mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dynamic_scope_rewrites_locals_without_crossing_scope_boundaries() { + let inner_assignment = test_node("LASGN", vec![Child::Symbol("inner".to_string())]); + let node = test_node( + "BLOCK", + vec![ + Child::Node(Box::new(test_node( + "LASGN", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "LVAR", + vec![Child::Symbol("value".to_string())], + ))), + Child::Node(Box::new(test_node( + "DEFN", + vec![ + Child::Symbol("nested".to_string()), + Child::Node(Box::new(test_node( + "SCOPE", + vec![ + Child::Nil, + Child::Nil, + Child::Node(Box::new(inner_assignment)), + ], + ))), + ], + ))), + ], + ); + + let result = super::dynamic_scope(node); + + assert_eq!(child_node(&result, 0).r#type, "DASGN"); + assert_eq!(child_node(&result, 1).r#type, "DVAR"); + let nested = child_node(&result, 2); + assert_eq!(nested.r#type, "DEFN"); + let nested_scope = child_node(nested, 1); + assert_eq!(nested_scope.r#type, "SCOPE"); + assert_eq!(child_node(nested_scope, 2).r#type, "LASGN"); +} + +#[test] +fn link_when_chain_sets_next_arm_and_pads_short_when_nodes() { + let fallback = test_node("ELSE", Vec::new()); + let first = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "WHEN", + vec![ + Child::Symbol("patterns".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_when_chain(vec![first, second], Some(fallback)) + .expect("expected linked when chain"); + + assert_eq!(result.r#type, "WHEN"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "WHEN"); + assert_eq!(child_node(next, 2).r#type, "ELSE"); + + let short = test_node("WHEN", vec![Child::Symbol("patterns".to_string())]); + let fallback = test_node("ELSE", Vec::new()); + let result = normalizer + .link_when_chain(vec![short], Some(fallback)) + .expect("expected padded when chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(child_node(&result, 2).r#type, "ELSE"); +} + +#[test] +fn link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes() { + let first = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let second = test_node( + "RESBODY", + vec![ + Child::Symbol("exceptions".to_string()), + Child::Nil, + Child::Nil, + ], + ); + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + let result = normalizer + .link_rescue_chain(vec![first, second]) + .expect("expected linked rescue chain"); + + assert_eq!(result.r#type, "RESBODY"); + let next = child_node(&result, 2); + assert_eq!(next.r#type, "RESBODY"); + assert_eq!(next.children[2], Child::Nil); + + let short = test_node("RESBODY", vec![Child::Symbol("exceptions".to_string())]); + let result = normalizer + .link_rescue_chain(vec![short]) + .expect("expected padded rescue chain"); + + assert_eq!(result.children.len(), 3); + assert_eq!(result.children[1], Child::Nil); + assert_eq!(result.children[2], Child::Nil); +} + +#[test] +fn infix_statement_parts_extracts_allowed_wrapper_parts() { + let source = "def calc\n left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let body = first_raw_node(tree.root_node(), source, "body_statement", "left + right"); + let binary = first_raw_node(tree.root_node(), source, "binary", "left + right"); + + assert_eq!( + infix_parts_text(&normalizer, body, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + assert_eq!(infix_parts_text(&normalizer, binary, source), None); + + let source = "def calc\n return left + right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let args = first_raw_node(tree.root_node(), source, "argument_list", "left + right"); + assert_eq!( + infix_parts_text(&normalizer, args, source), + Some(("left".to_string(), "+".to_string(), "right".to_string())) + ); + + let source = "def calc\n left && right\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let boolean = first_raw_node(tree.root_node(), source, "body_statement", "left && right"); + assert_eq!(infix_parts_text(&normalizer, boolean, source), None); +} + +#[test] +fn infix_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "def calc\n return left + right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.infix_statement(node), + ruby_private_predicate(source, language, suffix, "infix_statement?", kind, text), + "infix_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_infix_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def calc\n left + right\nend\n", + "body_statement", + "left + right", + ), + ( + "def calc\n return left + right\nend\n", + "argument_list", + "left + right", + ), + ( + "def match\n value =~ /left/\nend\n", + "body_statement", + "value =~ /left/", + ), + ( + "def match\n value =~ pattern\nend\n", + "body_statement", + "value =~ pattern", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_infix_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_infix_statement", + kind, + text + ), + "normalize_infix_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn regex_literal_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value =~ /left/\n", + Language::Ruby, + ".rb", + "regex", + "/left/", + ), + ( + "value = \"left\"\n", + Language::Ruby, + ".rb", + "string", + "\"left\"", + ), + ( + "const pattern = /left/;\n", + Language::TypeScript, + ".ts", + "regex", + "/left/", + ), + ( + "pattern = r\"left\"\n", + Language::Python, + ".py", + "string", + "r\"left\"", + ), + ( + "local pattern = \"left\"\n", + Language::Lua, + ".lua", + "string_content", + "left", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.regex_literal(Some(node)), + ruby_private_predicate(source, language, suffix, "regex_literal?", kind, text), + "regex_literal? mismatch for {language:?} {kind} {text:?}" + ); + } + + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert_eq!( + normalizer.regex_literal(None), + ruby_private_regex_literal_value("nil") + ); + assert!(!ruby_private_regex_literal_value("string")); + assert!(!ruby_private_regex_literal_value("normalized_node")); +} + +#[test] +fn argument_list_unary_not_matches_ruby_private_predicate() { + for (line, text) in [ + ("return !flag", "!flag"), + ("return !!flag", "!!flag"), + ("return flag", "flag"), + ("return !flag, other", "!flag, other"), + ("return (!flag)", "(!flag)"), + ("return not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "argument_list", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.argument_list_unary_not(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "argument_list_unary_not?", + "argument_list", + text + ), + "argument_list_unary_not? mismatch for {line:?}" + ); + } +} + +#[test] +fn normalize_argument_list_unary_not_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n return !flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!flag", + ), + ( + "def check\n return !!flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!!flag", + ), + ( + "def check\n return flag\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "flag", + ), + ( + "def check\n return !flag, other\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!flag, other", + ), + ( + "def check():\n return foo(not flag)\n", + Language::Python, + ".py", + "argument_list", + "(not flag)", + ), + ( + "function check(){ return foo(!flag); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(!flag)", + ), + ( + "function check() return foo(not flag) end\n", + Language::Lua, + ".lua", + "arguments", + "(not flag)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_unary_not(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_unary_not", + kind, + text + ), + "normalize_argument_list_unary_not mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn unary_not_statement_matches_ruby_private_predicate() { + for (line, text) in [ + ("!flag", "!flag"), + ("!!flag", "!!flag"), + ("flag", "flag"), + ("!flag; other", "!flag; other"), + ("(!flag)", "(!flag)"), + ("not flag", "not flag"), + ] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "body_statement", text); + let normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + + assert_eq!( + normalizer.unary_not_statement(node), + ruby_private_predicate( + &source, + Language::Ruby, + ".rb", + "unary_not_statement?", + "body_statement", + text + ), + "unary_not_statement? mismatch for {line:?}" + ); + } +} + +#[test] +fn unary_not_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "not flag", + ), + ( + "function check(flag: boolean) { return !flag; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "!flag", + ), + ( + "if not flag:\n pass\n", + Language::Python, + ".py", + "not_operator", + "not flag", + ), + ( + "if not flag then end\n", + Language::Lua, + ".lua", + "unary_expression", + "not flag", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_not_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_not_expression?", + kind, + text + ), + "unary_not_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_unary_not_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "def check\n !flag\n !!flag\n -flag\n not flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "!!flag", + ), + ( + "function check(flag: boolean) { return !flag; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "!flag", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_unary_not(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_unary_not", + kind, + text + ), + "normalize_unary_not mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_unary_not_statement_matches_ruby_private_method() { + for (line, text) in [("!flag", "!flag"), ("!!flag", "!!flag")] { + let source = format!("def check\n {line}\nend\n"); + let tree = raw_tree(&source, Language::Ruby); + let node = first_raw_node(tree.root_node(), &source, "body_statement", text); + let mut normalizer = super::TreeSitterNormalizer::new(&source, Language::Ruby); + let rust = normalizer + .normalize_unary_not_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + &source, + Language::Ruby, + ".rb", + "normalize_unary_not_statement", + "body_statement", + text + ), + "normalize_unary_not_statement mismatch for {text:?}" + ); + } +} + +#[test] +fn unary_minus_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "def check\n -flag\n !flag\n value\nend\n", + Language::Ruby, + ".rb", + "unary", + "!flag", + ), + ( + "function check(value: number) { return -value; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "-value", + ), + ( + "x = -value\n", + Language::Python, + ".py", + "unary_operator", + "-value", + ), + ( + "local x = -value\n", + Language::Lua, + ".lua", + "expression_list", + "-value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unary_minus_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "unary_minus_expression?", + kind, + text + ), + "unary_minus_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_unary_minus_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n -1\n -flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-1", + ), + ( + "def check\n -1\n -flag\nend\n", + Language::Ruby, + ".rb", + "unary", + "-flag", + ), + ( + "function check(value: number) { return -value; }\n", + Language::TypeScript, + ".ts", + "unary_expression", + "-value", + ), + ( + "x = -value\n", + Language::Python, + ".py", + "unary_operator", + "-value", + ), + ( + "local x = -value\n", + Language::Lua, + ".lua", + "expression_list", + "-value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_unary_minus(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_unary_minus", + kind, + text + ), + "normalize_unary_minus mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn binary_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left + right\n left && right\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right\n left && right\n value", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right && other", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "boolean_operator", + "left + right and other", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "expression_list", + "left + right and other", + ), + ( + "local value = left + right and other\n", + Language::Lua, + ".lua", + "binary_expression", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.binary_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "binary_operator", kind, text), + "binary_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left || right", + ), + ( + "def calc\n left && right\n left || right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "const value = left && right || other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left && right || other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right || other", + ), + ( + "value = left and right or other\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left and right or other\n", + Language::Python, + ".py", + "boolean_operator", + "left and right or other", + ), + ( + "local value = left and right or other\n", + Language::Lua, + ".lua", + "expression_list", + "left and right or other", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "boolean_operator", kind, text), + "boolean_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn comparison_operator_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.comparison_operator(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "comparison_operator", kind, text), + "comparison_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn comparison_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.comparison_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "comparison_expression?", + kind, + text + ), + "comparison_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn comparison_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("value = left == right\n", Language::Python, ".py"), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + ), + ("local value = left == right\n", Language::Lua, ".lua"), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_comparison_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left == right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left == right", + ), + ( + "value = left == right\n", + Language::Python, + ".py", + "comparison_operator", + "left == right", + ), + ( + "const value = left === right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left === right", + ), + ( + "local value = left == right\n", + Language::Lua, + ".lua", + "expression_list", + "left == right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_comparison(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_comparison", + kind, + text + ), + "normalize_comparison mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left or right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left or right", + ), + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "foo(left && right)\n", + Language::Ruby, + ".rb", + "argument_list", + "(left && right)", + ), + ( + "value = left and right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "local value = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_statement(node), + ruby_private_predicate(source, language, suffix, "boolean_statement?", kind, text), + "boolean_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left && right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "def calc\n left && right\n left + right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "const value = left && right;\nconst other = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left && right;\nconst other = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left and right\nother = left + right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left and right\nother = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left and right\nlocal other = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "local value = left and right\nlocal other = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.boolean_expression(node), + ruby_private_predicate(source, language, suffix, "boolean_expression?", kind, text), + "boolean_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_boolean_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def calc\n left || right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left || right", + ), + ( + "def calc\n left && middle && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && middle && right", + ), + ( + "value = left and right\n", + Language::Python, + ".py", + "boolean_operator", + "left and right", + ), + ( + "value = left or right\n", + Language::Python, + ".py", + "boolean_operator", + "left or right", + ), + ( + "local value = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ( + "local value = left or right\n", + Language::Lua, + ".lua", + "expression_list", + "left or right", + ), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left && right", + ), + ( + "const value = left || right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left || right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_boolean(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_boolean", + kind, + text + ), + "normalize_boolean mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn boolean_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def calc\n left && right\nend\n", Language::Ruby, ".rb"), + ("value = left and right\n", Language::Python, ".py"), + ("local value = left and right\n", Language::Lua, ".lua"), + ( + "const value = left && right;\n", + Language::TypeScript, + ".ts", + ), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn operator_call_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\n left && right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left + right", + ), + ( + "def calc\n left + right\n left && right\nend\n", + Language::Ruby, + ".rb", + "binary", + "left && right", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "const value = left + right && other;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right && other", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "value = left + right and other\n", + Language::Python, + ".py", + "boolean_operator", + "left + right and other", + ), + ( + "local value = left + right\nlocal other = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ( + "local value = left + right\nlocal other = left and right\n", + Language::Lua, + ".lua", + "expression_list", + "left and right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_call_expression(node), + ruby_private_predicate( + source, + language, + suffix, + "operator_call_expression?", + kind, + text + ), + "operator_call_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_operator_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "def calc\n left =~ /right/\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left =~ /right/", + ), + ( + "def calc\n left =~ pattern\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left =~ pattern", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_call", + kind, + text + ), + "normalize_operator_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn operator_call_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("value = left + right\n", Language::Python, ".py"), + ("local value = left + right\n", Language::Lua, ".lua"), + ("const value = left + right;\n", Language::TypeScript, ".ts"), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn spaced_text_matches_ruby_private_helper() { + for (source, language, suffix, kind, text) in [ + ( + "def calc\n left + right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left + right", + ), + ( + "const value = left + right;\n", + Language::TypeScript, + ".ts", + "binary_expression", + "left + right", + ), + ( + "value = left + right\n", + Language::Python, + ".py", + "binary_operator", + "left + right", + ), + ( + "local value = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.spaced_text(node), + ruby_private_string(source, language, suffix, "spaced_text", kind, text), + "spaced_text mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn class_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Thing:\n pass", + ), + ( + "class Thing {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Thing {}", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_declaration", + "local Thing = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.class_node(node), + ruby_private_predicate(source, language, suffix, "class_node?", kind, text), + "class_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn module_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "module Thing\n value\nend\n", + Language::Ruby, + ".rb", + "module", + "module Thing\n value\nend", + ), + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "value = 1\n", + Language::Python, + ".py", + "module", + "value = 1\n", + ), + ( + "namespace Thing { const value = 1; }\n", + Language::TypeScript, + ".ts", + "program", + "namespace Thing { const value = 1; }\n", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "chunk", + "local Thing = {}\n", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.module_node(node), + ruby_private_predicate(source, language, suffix, "module_node?", kind, text), + "module_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_module_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "module Thing\n value\nend\n", + Language::Ruby, + ".rb", + "module", + "module Thing\n value\nend", + ), + ( + "module Empty\nend\n", + Language::Ruby, + ".rb", + "module", + "module Empty\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_module(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_module", + kind, + text + ), + "normalize_module mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_singleton_class_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class << self\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_class", + "class << self\n value\nend", + ), + ( + "class << object\nend\n", + Language::Ruby, + ".rb", + "singleton_class", + "class << object\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_singleton_class(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_singleton_class", + kind, + text + ), + "normalize_singleton_class mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_definition_identifier_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def helper(arg)\n arg\nend\n", + Language::Ruby, + ".rb", + "identifier", + "helper", + ), + ( + "def helper(arg)\n arg\nend\n", + Language::Ruby, + ".rb", + "identifier", + "arg", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "identifier", + "item", + ), + ( + "def helper\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def helper(arg):\n return arg\n", + Language::Python, + ".py", + "identifier", + "arg", + ), + ( + "function helper(arg) { return arg; }\n", + Language::TypeScript, + ".ts", + "identifier", + "arg", + ), + ( + "function helper(arg)\n return arg\nend\n", + Language::Lua, + ".lua", + "identifier", + "arg", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_definition_identifier(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_definition_identifier?", + kind, + text + ), + "ruby_definition_identifier? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn literal_fragment_assignment_context_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = \"left = right\"\n", + Language::Ruby, + ".rb", + "string_content", + "left = right", + ), + ("value = 1\n", Language::Ruby, ".rb", "identifier", "value"), + ( + "value = \"left = right\"\n", + Language::Python, + ".py", + "string_content", + "left = right", + ), + ( + "const value = \"left = right\";\n", + Language::TypeScript, + ".ts", + "string_fragment", + "left = right", + ), + ( + "local value = \"left = right\"\n", + Language::Lua, + ".lua", + "string_content", + "left = right", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.literal_fragment_assignment_context(node), + ruby_private_predicate( + source, + language, + suffix, + "literal_fragment_assignment_context?", + kind, + text + ), + "literal_fragment_assignment_context? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_lhs_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "key", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.assignment_lhs(node), + ruby_private_predicate(source, language, suffix, "assignment_lhs?", kind, text), + "assignment_lhs? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_rhs_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "key", + ), + ( + "{ key: value }\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.assignment_rhs(node), + ruby_private_predicate(source, language, suffix, "assignment_rhs?", kind, text), + "assignment_rhs? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_assignment_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "value = 1\n", + Language::Ruby, + ".rb", + "assignment", + "value = 1", + ), + ( + "value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += 1", + ), + ( + "def helper\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block_body", + "local = item", + ), + ( + "value = 1\n", + Language::Python, + ".py", + "expression_statement", + "value = 1", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_assignment_node(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_assignment_node?", + kind, + text + ), + "ruby_assignment_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_assignment_target_names_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "left_assignment_list", + "left, *rest", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "const value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut names = BTreeSet::new(); + normalizer.collect_assignment_target_names(node, &mut names); + + assert_eq!( + names, + ruby_private_collected_names( + source, + language, + suffix, + "collect_assignment_target_names", + kind, + text + ), + "collect_assignment_target_names mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_identifier_names_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "left_assignment_list", + "left, *rest", + ), + ( + "receiver.call(argument)\n", + Language::Ruby, + ".rb", + "call", + "receiver.call(argument)", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "const value = { shorthand };\n", + Language::TypeScript, + ".ts", + "object", + "{ shorthand }", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "variable_declaration", + "local value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut names = BTreeSet::new(); + normalizer.collect_identifier_names(node, &mut names); + + assert_eq!( + names, + ruby_private_collected_names( + source, + language, + suffix, + "collect_identifier_names", + kind, + text + ), + "collect_identifier_names mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn member_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "identifier", "name"), + ("user&.name\n", Language::Ruby, ".rb", "identifier", "name"), + ( + "user.name()\n", + Language::Python, + ".py", + "identifier", + "name", + ), + ( + "user?.name;\n", + Language::TypeScript, + ".ts", + "property_identifier", + "name", + ), + ("user.name()\n", Language::Lua, ".lua", "identifier", "name"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.member_name(node), + ruby_private_string(source, language, suffix, "member_name", kind, text), + "member_name mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn member_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name(thing)\n", + Language::Python, + ".py", + "expression_statement", + "user.name(thing)", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name(thing);\n", + Language::TypeScript, + ".ts", + "call_expression", + "user.name(thing)", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.member_parts(node).map(|(receiver, method)| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + method, + ) + }); + + assert_eq!( + rust, + ruby_private_member_parts(source, language, suffix, kind, text), + "member_parts mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn member_read_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name(thing)\n", + Language::Python, + ".py", + "expression_statement", + "user.name(thing)", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name(thing);\n", + Language::TypeScript, + ".ts", + "call_expression", + "user.name(thing)", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.member_read_node(node), + ruby_private_predicate(source, language, suffix, "member_read_node?", kind, text), + "member_read_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_member_read_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ("value\n", Language::Ruby, ".rb", "identifier", "value"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_member_read(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_member_read", + kind, + text + ), + "normalize_member_read mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_left_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, right = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, right = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.assignment_left(node).map(|left| { + ( + left.kind().to_string(), + super::node_text(left, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "assignment_left", kind, text), + "assignment_left mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_right_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, right = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, right = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.assignment_right(node).map(|right| { + ( + right.kind().to_string(), + super::node_text(right, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "assignment_right", kind, text), + "assignment_right mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn singleton_receiver_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.foo\nend\n", + "singleton_method", + "def self.foo\nend", + ), + ( + "def User.foo\nend\n", + "singleton_method", + "def User.foo\nend", + ), + ( + "def object.foo\nend\n", + "singleton_method", + "def object.foo\nend", + ), + ( + "def self.foo(value)\n value\nend\n", + "singleton_method", + "def self.foo(value)\n value\nend", + ), + ( + "def object.foo\n value\nend\n", + "singleton_method", + "def object.foo\n value\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer.singleton_receiver(node).map(|receiver| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + Language::Ruby, + ".rb", + "singleton_receiver", + kind, + text + ), + "singleton_receiver mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn singleton_name_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.foo\nend\n", + "singleton_method", + "def self.foo\nend", + ), + ( + "def User.foo\nend\n", + "singleton_method", + "def User.foo\nend", + ), + ( + "def object.foo\nend\n", + "singleton_method", + "def object.foo\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + assert_eq!( + normalizer.singleton_name(node), + ruby_private_string(source, Language::Ruby, ".rb", "singleton_name", kind, text), + "singleton_name mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_singleton_function_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def self.hidden(value)\n return value\nend\n", + "singleton_method", + "def self.hidden(value)\n return value\nend", + ), + ( + "def User.hidden\nend\n", + "singleton_method", + "def User.hidden\nend", + ), + ( + "def object.hidden\n value\nend\n", + "singleton_method", + "def object.hidden\n value\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_singleton_function(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_singleton_function", + kind, + text + ), + "normalize_singleton_function mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_function_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check(value)\n return value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n return value\nend", + ), + ( + "def empty\nend\n", + Language::Ruby, + ".rb", + "method", + "def empty\nend", + ), + ( + "def object.hidden\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def object.hidden\n value\nend", + ), + ( + "def check(value):\n return value\n", + Language::Python, + ".py", + "function_definition", + "def check(value):\n return value", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + ), + ( + "class Box { check(value) { return value; } }\n", + Language::TypeScript, + ".ts", + "method_definition", + "check(value) { return value; }", + ), + ( + "function check(value)\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check(value)\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_function(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_function", + kind, + text + ), + "normalize_function mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn lambda_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "fn = ->(x) { x + 1 }\n", + Language::Ruby, + ".rb", + "lambda", + "->(x) { x + 1 }", + ), + ( + "fn = lambda x: x + 1\n", + Language::Python, + ".py", + "lambda", + "lambda x: x + 1", + ), + ( + "const fn = (x) => x + 1;\n", + Language::TypeScript, + ".ts", + "arrow_function", + "(x) => x + 1", + ), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + "function_expression", + "function(x) { return x + 1; }", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + "expression_list", + "function(x) return x + 1 end", + ), + ( + "function f(x) return x + 1 end\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(x) return x + 1 end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.lambda_expression(node), + ruby_private_predicate(source, language, suffix, "lambda_expression?", kind, text), + "lambda_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_lambda_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "fn = ->(x) { x + 1 }\n", + Language::Ruby, + ".rb", + "lambda", + "->(x) { x + 1 }", + ), + ( + "fn = lambda x: x + 1\n", + Language::Python, + ".py", + "lambda", + "lambda x: x + 1", + ), + ( + "const fn = (x) => x + 1;\n", + Language::TypeScript, + ".ts", + "arrow_function", + "(x) => x + 1", + ), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + "function_expression", + "function(x) { return x + 1; }", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + "expression_list", + "function(x) return x + 1 end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_lambda(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_lambda", + kind, + text + ), + "normalize_lambda mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn lambda_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("fn = ->(x) { x + 1 }\n", Language::Ruby, ".rb"), + ("fn = lambda x: x + 1\n", Language::Python, ".py"), + ("const fn = (x) => x + 1;\n", Language::TypeScript, ".ts"), + ( + "const fn = function(x) { return x + 1; };\n", + Language::TypeScript, + ".ts", + ), + ( + "local fn = function(x) return x + 1 end\n", + Language::Lua, + ".lua", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut lambdas = Vec::new(); + nodes_of_type(&root, "LAMBDA", &mut lambdas); + assert!( + !lambdas.is_empty(), + "expected LAMBDA for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn function_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def run\nend\n", + Language::Ruby, + ".rb", + "method", + "def run\nend", + ), + ( + "def self.run\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def self.run\nend", + ), + ( + "def run():\n pass\n", + Language::Python, + ".py", + "function_definition", + "def run():\n pass", + ), + ( + "function run() {}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function run() {}", + ), + ( + "class Box { run() {} }\n", + Language::TypeScript, + ".ts", + "method_definition", + "run() {}", + ), + ( + "function run()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function run()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.function_name(node).unwrap_or_default(), + ruby_private_string(source, language, suffix, "function_name", kind, text), + "function_name mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_destructured_parameter_targets_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + "destructured_parameter", + "(left, right)", + ), + ( + "items.each do |(left, (middle, right))| left end\n", + "destructured_parameter", + "(left, (middle, right))", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let mut targets = Vec::new(); + normalizer.collect_destructured_parameter_targets(node, &mut targets); + let rust = Value::Array(targets.iter().map(node_value).collect()); + + assert_eq!( + rust, + ruby_private_destructured_parameter_targets_value(source, kind, text), + "collect_destructured_parameter_targets mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_block_parameters_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + Language::Ruby, + ".rb", + "block", + "{ |(left, right)| left }", + ), + ( + "items.each { |item, (left, right)| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item, (left, right)| item }", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "def f(x):\n pass\n", + Language::Python, + ".py", + "function_definition", + "def f(x):\n pass", + ), + ( + "items.forEach((item) => item);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items.forEach((item) => item);", + ), + ( + "function f(x)\n return x\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(x)\n return x\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_block_parameters(Some(node)) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_block_parameters", + kind, + text + ), + "normalize_block_parameters mismatch for {language:?} {kind} {text:?}" + ); + } + + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert!(normalizer.normalize_block_parameters(None).is_none()); +} + +#[test] +fn normalize_parameters_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(value = 1)\nend\n", + Language::Ruby, + ".rb", + "method_parameters", + "(value = 1)", + ), + ( + "def f(value)\nend\n", + Language::Ruby, + ".rb", + "method_parameters", + "(value)", + ), + ( + "def f(value=1):\n pass\n", + Language::Python, + ".py", + "parameters", + "(value=1)", + ), + ( + "function f(value = 1) {}\n", + Language::TypeScript, + ".ts", + "formal_parameters", + "(value = 1)", + ), + ( + "function f(value)\nend\n", + Language::Lua, + ".lua", + "parameters", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_parameters(Some(node)) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_parameters", + kind, + text + ), + "normalize_parameters mismatch for {language:?} {kind} {text:?}" + ); + } + + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + assert!(normalizer.normalize_parameters(None).is_none()); +} + +#[test] +fn normalize_destructured_block_parameter_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "items.each { |(left, right)| left }\n", + "destructured_parameter", + "(left, right)", + ), + ( + "items.each do |(left, (middle, right))| left end\n", + "destructured_parameter", + "(left, (middle, right))", + ), + ("items.each { |item| item }\n", "identifier", "item"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_destructured_block_parameter(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_destructured_block_parameter", + kind, + text + ), + "normalize_destructured_block_parameter mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn scope_matches_ruby_private_method() { + for (source, language, suffix, kind, text, mode) in [ + ("1\n", Language::Ruby, ".rb", "integer", "1", "body"), + ( + "1\n", + Language::Python, + ".py", + "expression_statement", + "1", + "body", + ), + ( + "value;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "args", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + "empty", + ), + ] { + let tree = raw_tree(source, language); + let root = tree.root_node(); + let node = first_raw_node(root, source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + normalizer.root_span = Some(super::span(root)); + let body = if mode == "body" { + Some(normalizer.wrap("BODY", Vec::new(), node)) + } else { + None + }; + let args = if mode == "args" { + Some(normalizer.wrap("ARGS", Vec::new(), node)) + } else { + None + }; + let rust = node_value(&normalizer.scope(body, args, node)); + + assert_eq!( + rust, + ruby_private_scope_value(source, language, suffix, kind, text, mode), + "scope mismatch for {language:?} {kind} {text:?} mode {mode}" + ); + } +} + +#[test] +fn list_matches_ruby_private_method() { + for (source, language, suffix, kind, text, mode) in [ + ( + "value\n", + Language::Ruby, + ".rb", + "identifier", + "value", + "one", + ), + ( + "value\n", + Language::Python, + ".py", + "expression_statement", + "value", + "empty", + ), + ( + "value;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "nil", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + "one", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let item = normalizer.wrap("ITEM", Vec::new(), node); + let children = match mode { + "nil" => None, + "empty" => Some(Vec::new()), + "one" => Some(vec![item]), + _ => panic!("unknown list mode: {mode}"), + }; + let rust = normalizer + .list(children, node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_list_value(source, language, suffix, kind, text, mode), + "list mismatch for {language:?} {kind} {text:?} mode {mode}" + ); + } +} + +#[test] +fn unwrap_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n (value)\n value\nend\n", + Language::Ruby, + ".rb", + "parenthesized_statements", + "(value)", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "value", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "(value)", + ), + ( + "const value = (other);\n", + Language::TypeScript, + ".ts", + "parenthesized_expression", + "(other)", + ), + ( + "local first = (other)\nlocal second = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "(other)", + ), + ( + "local first = (other)\nlocal second = left + right\n", + Language::Lua, + ".lua", + "expression_list", + "left + right", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.unwrap_node(node), + ruby_private_predicate(source, language, suffix, "unwrap_node?", kind, text), + "unwrap_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn statement_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n return value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value", + ), + ( + "def check\n return value\nend\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "expression_statement", + "(value)", + ), + ( + "value\n(value)\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value + other;", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "binary_expression", + "value + other", + ), + ( + "function check() { return value + other; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "return_statement", + "return value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.statement_node(node.kind()), + ruby_private_predicate(source, language, suffix, "statement_node?", kind, text), + "statement_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn local_identifier_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def check\nend\nclass Thing; end\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "def check\nend\nclass Thing; end\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + ), + ( + "def check(value):\n pass\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "def check(value):\n pass\n", + Language::Python, + ".py", + "parameters", + "(value)", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "property_identifier", + "field", + ), + ( + "const value = object.field;\n", + Language::TypeScript, + ".ts", + "lexical_declaration", + "const value = object.field;", + ), + ( + "local value = other\nprint(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "expression_list", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.identifier_kind(node.kind()), + ruby_private_predicate(source, language, suffix, "local_identifier?", kind, text), + "local_identifier? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_local_name_matches_scope_stack_lookup() { + let mut normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + normalizer.local_stack = vec![ + BTreeSet::from(["outer".to_string(), "shared".to_string()]), + BTreeSet::from(["inner".to_string()]), + ]; + + assert!(normalizer.ruby_local_name("outer")); + assert!(normalizer.ruby_local_name("inner")); + assert!(normalizer.ruby_local_name("shared")); + assert!(!normalizer.ruby_local_name("missing")); +} + +#[test] +fn ruby_vcall_identifier_matches_ruby_private_predicate() { + let cases = vec![ + ( + "ruby_vcall", + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "ruby_local", + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + vec!["foo"], + ), + ( + "assignment_lhs", + "foo = 1\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "method_name", + "def foo\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "parameter", + "def f(foo)\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "non_identifier", + "Thing\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + Vec::<&str>::new(), + ), + ( + "non_ruby", + "foo\n", + Language::Python, + ".py", + "expression_statement", + "foo", + Vec::<&str>::new(), + ), + ]; + + for (label, source, language, suffix, kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + + assert_eq!( + normalizer.ruby_vcall_identifier(node, super::node_text(node, source)), + ruby_private_ruby_vcall_identifier_predicate( + source, language, suffix, kind, text, &locals, + ), + "ruby_vcall_identifier? mismatch for {label}" + ); + } +} + +#[test] +fn vcall_identifier_matches_ruby_private_predicate() { + let cases = vec![ + ( + "ruby_modifier_action", + "foo if cond\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "ruby_local", + "foo if cond\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + vec!["foo"], + ), + ( + "method_name", + "def foo\nend\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "argument", + "call(foo)\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "member_read", + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "identifier", + "name", + Vec::<&str>::new(), + ), + ( + "assignment_lhs", + "foo = bar\n", + Language::Ruby, + ".rb", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "python_identifier", + "foo\n", + Language::Python, + ".py", + "expression_statement", + "foo", + Vec::<&str>::new(), + ), + ( + "typescript_identifier", + "foo;\n", + Language::TypeScript, + ".ts", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "lua_identifier", + "foo()\n", + Language::Lua, + ".lua", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ]; + + for (label, source, language, suffix, kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + + assert_eq!( + normalizer.vcall_identifier(node, super::node_text(node, source)), + ruby_private_vcall_identifier_predicate(source, language, suffix, kind, text, &locals,), + "vcall_identifier? mismatch for {label}" + ); + } + + let source = "def f\n Thing\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "constant", "Thing"); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + assert!( + !normalizer.vcall_identifier(node, super::node_text(node, source)), + "vcall_identifier? must reject non-local identifiers in statement wrappers" + ); + + let source = "foo\n"; + let tree = raw_tree(source, Language::Python); + let node = first_raw_node(tree.root_node(), source, "identifier", "foo"); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Python); + assert!( + !normalizer.vcall_identifier(node, super::node_text(node, source)), + "vcall_identifier? must reject Python bare identifiers" + ); +} + +#[test] +fn collect_ruby_parameter_locals_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f(a, b = 1, *rest, key:, **opts, &block)\nend\n", + "method_parameters", + "(a, b = 1, *rest, key:, **opts, &block)", + ), + ( + "[1].each { |item, (left, right)| item }\n", + "block_parameters", + "|item, (left, right)|", + ), + ("fn = ->(x, y:) { x }\n", "lambda_parameters", "(x, y:)"), + ("value = other\n", "assignment", "value = other"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_parameter_locals(node, &mut locals); + + assert_eq!( + locals, + ruby_private_collected_names( + source, + Language::Ruby, + ".rb", + "collect_ruby_parameter_locals", + kind, + text + ), + "collect_ruby_parameter_locals mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn collect_ruby_assignment_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "left, *rest = values\n", + Language::Ruby, + ".rb", + "assignment", + "left, *rest = values", + ), + ( + "value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += 1", + ), + ( + "begin\n work\nrescue => error\n error\nend\n", + Language::Ruby, + ".rb", + "exception_variable", + "=> error", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_assignment_locals(node, &mut locals); + + assert_eq!( + locals, + ruby_private_collected_names( + source, + language, + suffix, + "collect_ruby_assignment_locals", + kind, + text + ), + "collect_ruby_assignment_locals mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn collect_ruby_scope_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text, root) in [ + ( + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", + true, + ), + ( + "def outer(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\nend", + false, + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + true, + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + true, + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + true, + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + true, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let mut locals = BTreeSet::new(); + normalizer.collect_ruby_scope_locals(node, &mut locals, root); + + assert_eq!( + locals, + ruby_private_scope_collected_names(source, language, suffix, kind, text, root), + "collect_ruby_scope_locals mismatch for {language:?} {kind} {text:?} root={root}" + ); + } +} + +#[test] +fn ruby_scope_locals_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend\n", + Language::Ruby, + ".rb", + "method", + "def outer(a)\n local = 1\n items.each { |item| nested = item }\n def inner(inner_arg)\n inner_local = 1\n end\nend", + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "variable_declarator", + "value = other", + ), + ( + "local value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_locals(node), + ruby_private_ruby_scope_locals(source, language, suffix, kind, text), + "ruby_scope_locals mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn with_ruby_scope_matches_ruby_private_method() { + for (source, language, suffix, kind, text, reset, initial_stack) in [ + ( + "def f(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n local = 1\nend", + false, + vec![vec!["outer"]], + ), + ( + "def f(a)\n local = 1\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n local = 1\nend", + true, + vec![vec!["outer"]], + ), + ( + "[1].each { |item| local = item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| local = item }", + false, + vec![], + ), + ( + "def f(value):\n local = value\n", + Language::Python, + ".py", + "function_definition", + "def f(value):\n local = value", + true, + vec![vec!["outer"]], + ), + ( + "function f(value) { let local = value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f(value) { let local = value; }", + true, + vec![vec!["outer"]], + ), + ( + "function f(value)\n local local_value = value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(value)\n local local_value = value\nend", + true, + vec![vec!["outer"]], + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + normalizer.local_stack = local_stack_from(&initial_stack); + let before = local_stack_value(&normalizer.local_stack); + let inside = normalizer.with_ruby_scope(node, reset, |normalizer| { + local_stack_value(&normalizer.local_stack) + }); + let after = local_stack_value(&normalizer.local_stack); + let rust = json!({ + "before": before, + "inside": inside, + "after": after, + "result": "block-result", + }); + + assert_eq!( + rust, + ruby_private_with_ruby_scope_trace( + source, + language, + suffix, + kind, + text, + reset, + &initial_stack, + ), + "with_ruby_scope mismatch for {language:?} {kind} {text:?} reset={reset}" + ); + } +} + +#[test] +fn ruby_scope_boundary_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def f\n value\nend", + ), + ( + "class Box\nend\n", + Language::Ruby, + ".rb", + "class", + "class Box\nend", + ), + ( + "module Admin\nend\n", + Language::Ruby, + ".rb", + "module", + "module Admin\nend", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "handler = -> { value }\n", + Language::Ruby, + ".rb", + "block", + "{ value }", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "function_definition", + "def f():\n return value\n break\n continue", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "class Box:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Box:\n pass", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { return value; }", + ), + ( + "class Box {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Box {}", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_boundary(node), + ruby_private_predicate(source, language, suffix, "ruby_scope_boundary?", kind, text), + "ruby_scope_boundary? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_scope_child_boundary_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def f\n value\nend", + ), + ( + "class Box\nend\n", + Language::Ruby, + ".rb", + "class", + "class Box\nend", + ), + ( + "module Admin\nend\n", + Language::Ruby, + ".rb", + "module", + "module Admin\nend", + ), + ( + "items.each { |item| item }\n", + Language::Ruby, + ".rb", + "block", + "{ |item| item }", + ), + ( + "handler = -> { value }\n", + Language::Ruby, + ".rb", + "block", + "{ value }", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "function_definition", + "def f():\n return value", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "class Box:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Box:\n pass", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { return value; }", + ), + ( + "class Box {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Box {}", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n return value\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ruby_scope_child_boundary(node), + ruby_private_predicate( + source, + language, + suffix, + "ruby_scope_child_boundary?", + kind, + text + ), + "ruby_scope_child_boundary? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_predicate_uses_normalization_adapter() { + for (language, expected) in [ + (Language::Ruby, true), + (Language::Python, false), + (Language::Lua, false), + (Language::TypeScript, false), + ] { + let normalizer = super::TreeSitterNormalizer::new("", language); + + assert_eq!( + normalizer.ruby(), + expected, + "ruby? mismatch for {language:?}" + ); + } +} + +#[test] +fn interpolated_string_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"hi #{user}\"\nplain = \"hi\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi #{user}\"", + ), + ( + "name = \"hi #{user}\"\nplain = \"hi\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi\"", + ), + ( + "name = f\"hi {user}\"\nplain = \"hi\"\n", + Language::Python, + ".py", + "string", + "f\"hi {user}\"", + ), + ( + "name = f\"hi {user}\"\nplain = \"hi\"\n", + Language::Python, + ".py", + "string", + "\"hi\"", + ), + ( + "const name = `hi ${user}`;\nconst plain = `hi`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi ${user}`", + ), + ( + "const name = `hi ${user}`;\nconst plain = `hi`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi`", + ), + ( + "local name = \"hi\"\n", + Language::Lua, + ".lua", + "expression_list", + "\"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.interpolated_string(node), + ruby_private_predicate(source, language, suffix, "interpolated_string?", kind, text), + "interpolated_string? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_interpolated_string_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"hi #{user}\"\n", + Language::Ruby, + ".rb", + "string", + "\"hi #{user}\"", + ), + ( + "name = f\"hi {user}\"\n", + Language::Python, + ".py", + "string", + "f\"hi {user}\"", + ), + ( + "const name = `hi ${user}`;\n", + Language::TypeScript, + ".ts", + "template_string", + "`hi ${user}`", + ), + ( + "local name = \"hi\"\n", + Language::Lua, + ".lua", + "expression_list", + "\"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_interpolated_string(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_interpolated_string", + kind, + text + ), + "normalize_interpolated_string mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_subshell_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = `echo hi`\n", + Language::Ruby, + ".rb", + "subshell", + "`echo hi`", + ), + ( + "value = `echo #{name}`\n", + Language::Ruby, + ".rb", + "subshell", + "`echo #{name}`", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = node_value(&normalizer.normalize_subshell(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_subshell", + kind, + text + ), + "normalize_subshell mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn const_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\ndef check; end\n", + Language::Ruby, + ".rb", + "constant", + "Thing", + ), + ( + "class Thing; end\ndef check; end\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "identifier", + "Thing", + ), + ( + "type Thing = Other;\nconst value = Thing;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Thing", + ), + ( + "type Thing = Other;\nconst value = Thing;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_list", + "Thing", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.const_kind(node.kind()), + ruby_private_predicate(source, language, suffix, "const_node?", kind, text), + "const_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn self_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("self\nother\n", Language::Ruby, ".rb", "self", "self"), + ( + "self\nother\n", + Language::Ruby, + ".rb", + "identifier", + "other", + ), + ( + "self.value\nother.value\n", + Language::Python, + ".py", + "identifier", + "self", + ), + ( + "self.value\nother.value\n", + Language::Python, + ".py", + "identifier", + "other", + ), + ( + "this.value;\nother;\n", + Language::TypeScript, + ".ts", + "this", + "this", + ), + ( + "this.value;\nother;\n", + Language::TypeScript, + ".ts", + "identifier", + "other", + ), + ( + "print(self.value)\nprint(other.value)\n", + Language::Lua, + ".lua", + "identifier", + "self", + ), + ( + "print(self.value)\nprint(other.value)\n", + Language::Lua, + ".lua", + "identifier", + "other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.self_node(node), + ruby_private_predicate(source, language, suffix, "self_node?", kind, text), + "self_node? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn instance_variable_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "@value\nname\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "@value\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "@decorator\ndef call():\n pass\n", + Language::Python, + ".py", + "decorator", + "@decorator", + ), + ( + "@sealed\nclass Thing {}\n", + Language::TypeScript, + ".ts", + "decorator", + "@sealed", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.instance_variable(node), + ruby_private_predicate(source, language, suffix, "instance_variable?", kind, text), + "instance_variable? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn global_variable_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "$value\nname\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "$value\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "value = \"$name\"\n", + Language::Python, + ".py", + "string_content", + "$name", + ), + ( + "const $value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "$value", + ), + ( + "print(\"$name\")\n", + Language::Lua, + ".lua", + "string_content", + "$name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.global_variable(node), + ruby_private_predicate(source, language, suffix, "global_variable?", kind, text), + "global_variable? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_global_variable_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$1", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$12", + ), + ( + "$value\n$1\n$12\n$0\n", + Language::Ruby, + ".rb", + "global_variable", + "$0", + ), + ( + "value = \"$name\"\n", + Language::Python, + ".py", + "string_content", + "$name", + ), + ( + "const $value = 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "$value", + ), + ( + "print(\"$name\")\n", + Language::Lua, + ".lua", + "string_content", + "$name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_global_variable(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_global_variable", + kind, + text + ), + "normalize_global_variable mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_operator_matches_ruby_private_predicate() { + for (language, text) in [ + (Language::Ruby, "="), + (Language::Ruby, "**="), + (Language::Ruby, "??="), + (Language::Python, ":="), + (Language::Python, "//="), + (Language::Python, "&&="), + (Language::TypeScript, "??="), + (Language::TypeScript, ">>>="), + (Language::TypeScript, ":="), + (Language::Lua, "="), + (Language::Lua, "+="), + ] { + let normalizer = super::TreeSitterNormalizer::new("", language); + + assert_eq!( + normalizer.assignment_operator(text), + ruby_private_text_predicate(language, "assignment_operator?", text), + "assignment_operator? mismatch for {language:?} {text:?}" + ); + } +} + +#[test] +fn operator_assignment_operator_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value **= other\nflag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value **= other", + ), + ( + "value **= other\nflag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag ||= fallback", + ), + ( + "value //= other\n", + Language::Python, + ".py", + "expression_statement", + "value //= other", + ), + ( + "value ??= other;\ncount >>>= 1;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "value ??= other", + ), + ( + "value ??= other;\ncount >>>= 1;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "count >>>= 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_assignment_operator(node), + ruby_private_string( + source, + language, + suffix, + "operator_assignment_operator", + kind, + text + ), + "operator_assignment_operator mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_logical_operator_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value ||= fallback", + ), + ( + "value &&= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value &&= fallback", + ), + ( + "value += fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += fallback", + ), + ( + "@value ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "@value ||= fallback", + ), + ( + "value //= fallback\n", + Language::Python, + ".py", + "expression_statement", + "value //= fallback", + ), + ( + "value ||= fallback;\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "value ||= fallback", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let left = normalizer + .assignment_left(node) + .expect("operator assignment should have left side"); + let right = normalizer + .assignment_right(node) + .and_then(|right| normalizer.normalize_node(right)); + let operator = normalizer.operator_assignment_operator(node); + let rust = normalizer + .normalize_logical_operator_assignment(left, &operator, right, node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_logical_operator_assignment_value(source, language, suffix, kind, text), + "normalize_logical_operator_assignment mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_operator_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value += other\n", + Language::Ruby, + ".rb", + "operator_assignment", + "value += other", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "$value += 1", + ), + ( + "items[index] += value\n", + Language::Ruby, + ".rb", + "operator_assignment", + "items[index] += value", + ), + ( + "object.value += 1\n", + Language::Ruby, + ".rb", + "operator_assignment", + "object.value += 1", + ), + ( + "flag ||= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag ||= fallback", + ), + ( + "flag &&= fallback\n", + Language::Ruby, + ".rb", + "operator_assignment", + "flag &&= fallback", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_assignment(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_assignment", + kind, + text + ), + "normalize_operator_assignment mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn first_named_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\nname\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing; end\nname\n", + Language::Ruby, + ".rb", + "identifier", + "name", + ), + ( + "def check(value):\n return value\n", + Language::Python, + ".py", + "function_definition", + "def check(value):\n return value", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.first_named(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "first_named", kind, text), + "first_named mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn block_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\n call\nend\n", + Language::Ruby, + ".rb", + "method", + "def check\n call\nend", + ), + ( + "items.each do\n call\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do\n call\nend", + ), + ( + "def check():\n call()\n", + Language::Python, + ".py", + "function_definition", + "def check():\n call()", + ), + ( + "function check() { call(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check() { call(); }", + ), + ( + "function check()\n call()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check()\n call()\nend", + ), + ("name\n", Language::Ruby, ".rb", "identifier", "name"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.block_child(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "block_child", kind, text), + "block_child mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn branch_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text, condition_kind, condition_text, index) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + "identifier", + "ready", + 0, + ), + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + "identifier", + "ready", + 1, + ), + ( + "if ready\n # note\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n # note\n call\nend", + "identifier", + "ready", + 0, + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelse:\n stop()", + "identifier", + "ready", + 1, + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + "parenthesized_expression", + "(ready)", + 0, + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelse\n stop()\nend", + "identifier", + "ready", + 1, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let condition = first_raw_node(tree.root_node(), source, condition_kind, condition_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.branch_child(node, condition, index).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_branch_child_signature( + source, + language, + suffix, + kind, + text, + condition_kind, + condition_text, + index + ), + "branch_child mismatch for {language:?} {kind} {text:?} index {index}" + ); + } +} + +#[test] +fn explicit_alternative_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelsif other\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelsif other\n stop\nend", + ), + ( + "if ready\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nend", + ), + ( + "if ready:\n call()\nelif other:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelif other:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + ), + ( + "if ready then\n call()\nelseif other then\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelseif other then\n stop()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.explicit_alternative(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature( + source, + language, + suffix, + "explicit_alternative", + kind, + text + ), + "explicit_alternative mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn wrap_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "first\nsecond\n", + Language::Ruby, + ".rb", + "identifier", + "second", + ), + ( + "first\nsecond\n", + Language::Python, + ".py", + "expression_statement", + "second", + ), + ( + "first;\nsecond;\n", + Language::TypeScript, + ".ts", + "identifier", + "second", + ), + ( + "print(first)\nprint(second)\n", + Language::Lua, + ".lua", + "identifier", + "second", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + let raw_wrapped = normalizer.wrap("OUTER", vec![Child::Symbol("child".to_string())], node); + assert_eq!( + node_value(&raw_wrapped), + ruby_private_wrap_value(source, language, suffix, kind, text, false), + "wrap raw-source mismatch for {language:?} {kind} {text:?}" + ); + + let inner = normalizer.wrap("INNER", Vec::new(), node); + let node_wrapped = normalizer.wrap_from_source_node( + "OUTER", + vec![Child::Symbol("child".to_string())], + &inner, + ); + assert_eq!( + node_value(&node_wrapped), + ruby_private_wrap_value(source, language, suffix, kind, text, true), + "wrap normalized-source mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn source_before_child_matches_ruby_private_method() { + for (source, language, suffix, kind, text, child_kind, child_text) in [ + ( + "if ready\n call\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nend", + "then", + "\n call", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "block", + "call()", + ), + ( + "if (ready) { call(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); }", + "statement_block", + "{ call(); }", + ), + ( + "if ready then\n call()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nend", + "block", + "call()", + ), + ( + "puts value\n", + Language::Ruby, + ".rb", + "call", + "puts value", + "identifier", + "puts", + ), + ( + "call()\n", + Language::Python, + ".py", + "expression_statement", + "call()", + "identifier", + "call", + ), + ( + "call();\n", + Language::TypeScript, + ".ts", + "expression_statement", + "call();", + "identifier", + "call", + ), + ( + "call()\n", + Language::Lua, + ".lua", + "function_call", + "call()", + "identifier", + "call", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let child = first_raw_node(tree.root_node(), source, child_kind, child_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.source_before_child(node, child); + let wrapped = normalizer.wrap_from_source_node("OUTER", Vec::new(), &source_node); + + assert_eq!( + node_value(&wrapped), + ruby_private_source_before_child_wrap_value( + source, language, suffix, kind, text, child_kind, child_text + ), + "source_before_child mismatch for {language:?} {kind} {text:?} before {child_kind} {child_text:?}" + ); + } +} + +#[test] +fn source_from_nodes_matches_ruby_private_method() { + for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ + ( + "left + right\n", + Language::Ruby, + ".rb", + "identifier", + "left", + "identifier", + "right", + ), + ( + "left = one\nright = two\n", + Language::Python, + ".py", + "identifier", + "one", + "identifier", + "two", + ), + ( + "const left = one;\nconst right = two;\n", + Language::TypeScript, + ".ts", + "identifier", + "one", + "identifier", + "two", + ), + ( + "local left = one\nlocal right = two\n", + Language::Lua, + ".lua", + "expression_list", + "one", + "expression_list", + "two", + ), + ] { + let tree = raw_tree(source, language); + let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); + let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.source_from_nodes(first_raw, last_raw); + + assert_eq!( + node_value(&source_node), + ruby_private_source_from_nodes_value( + source, language, suffix, first_kind, first_text, last_kind, last_text + ), + "source_from_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" + ); + } +} + +#[test] +fn source_from_normalized_nodes_matches_ruby_private_method() { + for (source, language, suffix, first_kind, first_text, last_kind, last_text) in [ + ( + "first\nsecond\n", + Language::Ruby, + ".rb", + "identifier", + "first", + "identifier", + "second", + ), + ( + "first\nsecond\n", + Language::Python, + ".py", + "expression_statement", + "first", + "expression_statement", + "second", + ), + ( + "first;\nsecond;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "first;", + "expression_statement", + "second;", + ), + ( + "print(first)\nprint(second)\n", + Language::Lua, + ".lua", + "function_call", + "print(first)", + "function_call", + "print(second)", + ), + ( + "first + second\n", + Language::Ruby, + ".rb", + "identifier", + "first", + "identifier", + "second", + ), + ] { + let tree = raw_tree(source, language); + let first_raw = first_raw_node(tree.root_node(), source, first_kind, first_text); + let last_raw = first_raw_node(tree.root_node(), source, last_kind, last_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let first_node = normalizer.wrap("FIRST", Vec::new(), first_raw); + let last_node = normalizer.wrap("LAST", Vec::new(), last_raw); + let source_node = normalizer.source_from_normalized_nodes(&first_node, &last_node); + + assert_eq!( + node_value(&source_node), + ruby_private_source_from_normalized_nodes_value( + source, language, suffix, first_kind, first_text, last_kind, last_text + ), + "source_from_normalized_nodes mismatch for {language:?} {first_kind} {first_text:?} through {last_kind} {last_text:?}" + ); + } +} + +#[test] +fn named_field_matches_ruby_private_method() { + for (source, language, suffix, kind, text, field) in [ + ( + "def check(value)\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n value\nend", + "name", + ), + ( + "def check(value)\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def check(value)\n value\nend", + "missing", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "body", + ), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()", + "condition", + ), + ( + "function check(value) { return value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function check(value) { return value; }", + "body", + ), + ( + "function check(value)\n return value\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function check(value)\n return value\nend", + "body", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.named_field(node, field).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_named_field_signature(source, language, suffix, kind, text, field), + "named_field mismatch for {language:?} {kind} {text:?} field {field}" + ); + } +} + +#[test] +fn parent_node_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def check\nend\n", + Language::Ruby, + ".rb", + "identifier", + "check", + ), + ("value\n", Language::Ruby, ".rb", "program", "value\n"), + ( + "if ready:\n call()\n", + Language::Python, + ".py", + "identifier", + "ready", + ), + ( + "call(value);\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "call(value)\n", + Language::Lua, + ".lua", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.parent_node(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "parent_node", kind, text), + "parent_node mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn next_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), + ("a + b\n", Language::Python, ".py", "identifier", "a"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.next_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "next_sibling", kind, text), + "next_sibling mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn prev_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "b"), + ("a + b\n", Language::Python, ".py", "identifier", "b"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "b"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "b"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.prev_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "prev_sibling", kind, text), + "prev_sibling mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn next_named_sibling_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("a + b\n", Language::Ruby, ".rb", "identifier", "a"), + ("a + b\n", Language::Python, ".py", "identifier", "a"), + ("a + b;\n", Language::TypeScript, ".ts", "identifier", "a"), + ("print(a, b)\n", Language::Lua, ".lua", "identifier", "a"), + ("a\n", Language::Ruby, ".rb", "identifier", "a"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.next_named_sibling(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "next_named_sibling", kind, text), + "next_named_sibling mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ternary_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond ? a : b", + ), + ( + "value = a if cond else b\n", + Language::Python, + ".py", + "conditional_expression", + "a if cond else b", + ), + ( + "const value = cond ? a : b;\n", + Language::TypeScript, + ".ts", + "ternary_expression", + "cond ? a : b", + ), + ( + "local value = cond and a or b\n", + Language::Lua, + ".lua", + "expression_list", + "cond and a or b", + ), + ( + "def f(cond)\n cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ternary_statement(node), + ruby_private_predicate(source, language, suffix, "ternary_statement?", kind, text), + "ternary_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_ternary_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "cond ? a : b", + ), + ( + "value = a if cond else b\n", + Language::Python, + ".py", + "conditional_expression", + "a if cond else b", + ), + ( + "const value = cond ? a : b;\n", + Language::TypeScript, + ".ts", + "ternary_expression", + "cond ? a : b", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_ternary_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_ternary_statement", + kind, + text + ), + "normalize_ternary_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ternary_statement_normalization_matches_ruby() { + for (source, language, suffix, if_text) in [ + ( + "def f(cond, a, b)\n cond ? a : b\nend\n", + Language::Ruby, + ".rb", + "cond ? a : b", + ), + ( + "def f(cond, a, b):\n return a if cond else b\n", + Language::Python, + ".py", + "a if cond else b", + ), + ( + "function f(cond: boolean, a: number, b: number) { return cond ? a : b; }\n", + Language::TypeScript, + ".ts", + "cond ? a : b", + ), + ] { + let root = parse_language_source(source, language, suffix); + let if_node = first_node(&root, "IF", if_text); + assert_eq!(child_node(if_node, 0).text, "cond"); + assert_eq!(child_node(if_node, 1).text, "a"); + assert_eq!(child_node(if_node, 2).text, "b"); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn case_argument_list_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n return case x\n when 1 then :one\n else :other\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "case x\n when 1 then :one\n else :other\n end", + ), + ( + "case x\nwhen 1 then :one\nelse :other\nend\n", + Language::Ruby, + ".rb", + "case", + "case x\nwhen 1 then :one\nelse :other\nend", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one(); break;", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.case_argument_list(node), + ruby_private_predicate(source, language, suffix, "case_argument_list?", kind, text), + "case_argument_list? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_function_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "def inner\n x\n end", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + "block", + "def inner():\n x", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "function inner()\n x()\n end", + ), + ( + "function outer() { function inner() { x; } }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function inner() { x; }", + ), + ( + "def outer\n x\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_function_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_function_statement?", + kind, + text + ), + "leading_function_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_function_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "def inner\n x\n end", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + "block", + "def inner():\n x", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "function inner()\n x()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_function_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_function_statement", + kind, + text + ), + "normalize_leading_function_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_function_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def outer\n def inner\n x\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def outer():\n def inner():\n x\n", + Language::Python, + ".py", + ), + ( + "function outer()\n function inner()\n x()\n end\nend\n", + Language::Lua, + ".lua", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut defns = Vec::new(); + nodes_of_type(&root, "DEFN", &mut defns); + assert!( + defns.iter().any( + |node| matches!(node.children.first(), Some(Child::Symbol(name)) if name == "inner") + ), + "expected nested DEFN inner for {language:?} in {root:#?}" + ); + let mut iters = Vec::new(); + nodes_of_type(&root, "ITER", &mut iters); + assert!( + iters.iter().all(|node| !node.text.contains("inner")), + "nested function must not normalize as ITER for {language:?}: {iters:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_owner_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n class Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "class Inner\n value\n end", + ), + ( + "def outer\n module Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module Inner\n value\n end", + ), + ( + "def outer():\n class Inner:\n pass\n", + Language::Python, + ".py", + "block", + "class Inner:\n pass", + ), + ( + "function outer() { class Inner {} }\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Inner {}", + ), + ( + "function outer()\n Inner = {}\nend\n", + Language::Lua, + ".lua", + "block", + "Inner = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_owner_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_owner_statement?", + kind, + text + ), + "leading_owner_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_owner_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def outer\n class Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "class Inner\n value\n end", + ), + ( + "def outer\n module Inner\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module Inner\n value\n end", + ), + ( + "def outer():\n class Inner:\n pass\n", + Language::Python, + ".py", + "block", + "class Inner:\n pass", + ), + ( + "function outer() { class Inner {} }\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Inner {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_owner_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_owner_statement", + kind, + text + ), + "normalize_leading_owner_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_keyword_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n value while cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value while cond", + ), + ( + "def f\n value until cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value until cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.modifier_keyword(node).unwrap_or_default(); + + assert_eq!( + rust, + ruby_private_string(source, language, suffix, "modifier_keyword", kind, text), + "modifier_keyword mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.modifier_parts(node).map(|(action, condition)| { + ( + ( + action.kind().to_string(), + super::node_text(action, source).to_string(), + ), + ( + condition.kind().to_string(), + super::node_text(condition, source).to_string(), + ), + ) + }); + + assert_eq!( + rust, + ruby_private_modifier_parts_signature(source, language, suffix, kind, text), + "modifier_parts mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n return value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value if cond", + ), + ( + "def f\n if cond\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if cond\n value\n end", + ), + ( + "def f():\n if cond:\n value()\n", + Language::Python, + ".py", + "block", + "if cond:\n value()", + ), + ( + "function f() { if (cond) { value(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (cond) { value(); }", + ), + ( + "function f()\n if cond then\n value()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if cond then\n value()\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.modifier_statement(node), + ruby_private_predicate(source, language, suffix, "modifier_statement?", kind, text), + "modifier_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_modifier_action_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "return value if cond\n", + Language::Ruby, + ".rb", + "return", + "return value", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ( + "value if cond\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_modifier_action(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_modifier_action", + kind, + text + ), + "normalize_modifier_action mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_modifier_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if cond", + ), + ( + "def f\n value unless cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value unless cond", + ), + ( + "def f\n value while cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value while cond", + ), + ( + "def f\n value until cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value until cond", + ), + ( + "def f\n return value if cond\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value if cond", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_modifier_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_modifier_statement", + kind, + text + ), + "normalize_modifier_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn modifier_return_action_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "return value if ready\n", + Language::Ruby, + ".rb", + "return", + "return value", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ("next if skip\n", Language::Ruby, ".rb", "next", "next"), + ( + "return value if ready\n", + Language::Ruby, + ".rb", + "identifier", + "ready", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "return_statement", + "return value", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "break_statement", + "break", + ), + ( + "def f():\n return value\n break\n continue\n", + Language::Python, + ".py", + "continue_statement", + "continue", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value;", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "break_statement", + "break;", + ), + ( + "function f() { return value; break; continue; }\n", + Language::TypeScript, + ".ts", + "continue_statement", + "continue;", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "return_statement", + "return value", + ), + ( + "return value\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.modifier_return_action(node), + ruby_private_predicate( + source, + language, + suffix, + "modifier_return_action?", + kind, + text + ), + "modifier_return_action? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn call_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.each do |item|\n item\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do |item|\n item\nend", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ("items.each\n", Language::Ruby, ".rb", "call", "items.each"), + ( + "def f():\n value()\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value()", + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + ), + ( + "function f() { value(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.call_block(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature(source, language, suffix, "call_block", kind, text), + "call_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn statement_block_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n foo(bar) { baz }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo(bar) { baz }", + ), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "def f():\n value()\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value()", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "function f() { value(); }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value(); }", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let found = normalizer.statement_block_call(node).map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + + assert_eq!( + found, + ruby_private_node_signature( + source, + language, + suffix, + "statement_block_call", + kind, + text + ), + "statement_block_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn statement_call_with_block_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + ), + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "def f\n foo(bar) { baz }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo(bar) { baz }", + ), + ( + "def f\n items.map\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map", + ), + ( + "def f():\n value(lambda item: item)\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value(lambda item: item)", + ), + ( + "items.map(item => item);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items.map(item => item);", + ), + ( + "items:map(function(item) return item end)\n", + Language::Lua, + ".lua", + "function_call", + "items:map(function(item) return item end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.statement_call_with_block(node), + ruby_private_predicate( + source, + language, + suffix, + "statement_call_with_block?", + kind, + text + ), + "statement_call_with_block? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_statement_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [( + "def f\n items.map { |item| item }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items.map { |item| item }", + )] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_statement_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_statement_call_with_block", + kind, + text + ), + "normalize_statement_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn visibility_inline_def_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "private def hidden; value; end\n", + Language::Ruby, + ".rb", + "call", + "private def hidden; value; end", + ), + ( + "public def visible\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "public def visible\n value\nend", + ), + ( + "private :hidden\n", + Language::Ruby, + ".rb", + "call", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "private(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.visibility_inline_def_call(node), + ruby_private_predicate( + source, + language, + suffix, + "visibility_inline_def_call?", + kind, + text + ), + "visibility_inline_def_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn visibility_inline_def_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private def hidden\n value\n end", + ), + ( + "class C\n module_function def helper\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module_function def helper\n value\n end", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "private(value);", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let function = + normalizer.named_children(node).into_iter().next().expect( + "visibility_inline_def_statement test target should have a first named child", + ); + + assert_eq!( + normalizer.visibility_inline_def_statement(node, function), + ruby_private_visibility_inline_def_statement_predicate( + source, language, suffix, kind, text + ), + "visibility_inline_def_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_visibility_inline_def_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "private def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "private def hidden\n value\nend", + ), + ( + "public def visible\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "public def visible\n value\nend", + ), + ( + "module_function def self.helper\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "module_function def self.helper\n value\nend", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "private(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_visibility_inline_def(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_visibility_inline_def", + kind, + text + ), + "normalize_visibility_inline_def mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_from_argument_list_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "arguments", + "(value)", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_argument_list(Some(node)) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_argument_list", + kind, + text + ), + "inline_def_from_argument_list mismatch for {language:?} {kind} {text:?}" + ); + } + + for (source, language, suffix) in [ + ("private def hidden\n value\nend\n", Language::Ruby, ".rb"), + ("private(value)\n", Language::Python, ".py"), + ("private(value);\n", Language::TypeScript, ".ts"), + ("private(value)\n", Language::Lua, ".lua"), + ] { + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_argument_list(None) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_inline_def_from_argument_list_nil_value(source, language, suffix), + "inline_def_from_argument_list nil mismatch for {language:?}" + ); + } +} + +#[test] +fn inline_def_from_source_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "method", + "def hidden\n value\nend", + ), + ( + "def self.hidden\n value\nend\n", + Language::Ruby, + ".rb", + "singleton_method", + "def self.hidden\n value\nend", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":hidden", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_source(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_source", + kind, + text + ), + "inline_def_from_source mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_from_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private def hidden\n value\n end", + ), + ( + "class C\n module_function def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "module_function def self.hidden\n value\n end", + ), + ( + "private def hidden\n value\nend\n", + Language::Ruby, + ".rb", + "call", + "private def hidden\n value\nend", + ), + ( + "class C\n private :hidden\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "private :hidden", + ), + ( + "private(value)\n", + Language::Python, + ".py", + "expression_statement", + "private(value)", + ), + ( + "private(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "private(value);", + ), + ( + "private(value)\n", + Language::Lua, + ".lua", + "function_call", + "private(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_def_from_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "inline_def_from_statement", + kind, + text + ), + "inline_def_from_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def empty\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def empty\n end", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.inline_def_body(node).map(|body| { + ( + body.kind().to_string(), + super::node_text(body, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "inline_def_body", kind, text), + "inline_def_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def hidden\n value\n end", + ), + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def Owner.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner.hidden\n value\n end", + ), + ( + "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner::Nested.hidden\n value\n end", + ), + ( + "def hidden():\n value\n", + Language::Python, + ".py", + "function_definition", + "def hidden():\n value", + ), + ( + "function hidden() {\n value;\n}\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function hidden() {\n value;\n}", + ), + ( + "function hidden()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function hidden()\n value()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.inline_def_receiver(node).map(|receiver| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature( + source, + language, + suffix, + "inline_def_receiver", + kind, + text + ), + "inline_def_receiver mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_def_name_after_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class C\n private def self.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def self.hidden\n value\n end", + ), + ( + "class C\n private def Owner.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner.hidden\n value\n end", + ), + ( + "class C\n private def Owner::Nested.hidden\n value\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "def Owner::Nested.hidden\n value\n end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let receiver = normalizer + .inline_def_receiver(node) + .expect("inline def receiver should exist for name-after-receiver case"); + let rust = normalizer + .inline_def_name_after_receiver(node, receiver) + .unwrap_or_default(); + + assert_eq!( + rust, + ruby_private_inline_def_name_after_receiver(source, language, suffix, kind, text), + "inline_def_name_after_receiver mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn inline_parameter_begin_marker_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + ), + ( + "def f a; a; end\n", + Language::Ruby, + ".rb", + "method", + "def f a; a; end", + ), + ( + "def f(a)\n a\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n a\nend", + ), + ( + "def f(a):\n return a\n", + Language::Python, + ".py", + "function_definition", + "def f(a):\n return a", + ), + ( + "function f(a) { return a; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f(a) { return a; }", + ), + ( + "function f(a)\n return a\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f(a)\n return a\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .inline_parameter_begin_marker(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_inline_parameter_begin_marker_value(source, language, suffix, kind, text), + "inline_parameter_begin_marker mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn prepend_inline_parameter_begin_matches_ruby_private_method() { + let scalar = test_node("VCALL", Vec::new()); + let block = test_node( + "BLOCK", + vec![Child::Node(Box::new(scalar.clone())), Child::Nil], + ); + let empty_block = test_node("BLOCK", vec![Child::Nil]); + + let cases = vec![ + ( + "no_marker", + "def f(a)\n a\nend\n", + Language::Ruby, + ".rb", + "method", + "def f(a)\n a\nend", + Some(scalar.clone()), + ), + ( + "marker_nil_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + None, + ), + ( + "marker_scalar_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(scalar.clone()), + ), + ( + "marker_block_body", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(block), + ), + ( + "marker_empty_block", + "def f(a); a; end\n", + Language::Ruby, + ".rb", + "method", + "def f(a); a; end", + Some(empty_block), + ), + ( + "non_ruby", + "def f(a):\n return a\n", + Language::Python, + ".py", + "function_definition", + "def f(a):\n return a", + Some(scalar), + ), + ]; + + for (label, source, language, suffix, kind, text, body) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .prepend_inline_parameter_begin(node, body.clone()) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + let body_value = body.as_ref().map(node_value).unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_prepend_inline_parameter_begin_value( + source, + language, + suffix, + kind, + text, + &body_value, + ), + "prepend_inline_parameter_begin mismatch for {label}" + ); + } +} + +#[test] +fn scalar_argument_list_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return yield\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "yield", + ), + ( + "def f\n return nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def f\n return true\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "true", + ), + ( + "def f\n return false\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "false", + ), + ( + "def f\n return :ok?\nend\n", + Language::Ruby, + ".rb", + "argument_list", + ":ok?", + ), + ( + "def f\n return 12\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "12", + ), + ( + "def f\n return -12\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "-12", + ), + ( + "def f\n return name\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "name", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "function f() { return value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "expression_list", + "value", + ), + ( + "function f() { return yield; }\n", + Language::TypeScript, + ".ts", + "yield_expression", + "yield", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .scalar_argument_list_value(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "scalar_argument_list_value", + kind, + text, + ), + "scalar_argument_list_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn local_or_call_for_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text, name, local) in [ + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "name", + "name", + false, + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "hash_key_symbol", + "name", + "name", + true, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "identifier", + "f", + "f", + false, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "value", + false, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "identifier", + "value", + "value", + false, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if local { + normalizer + .local_stack + .push(BTreeSet::from([name.to_string()])); + } + let rust = node_value(&normalizer.local_or_call_for_name(name, node)); + + assert_eq!( + rust, + ruby_private_local_or_call_for_name_value( + source, language, suffix, kind, text, name, local + ), + "local_or_call_for_name mismatch for {language:?} {name:?} local={local}" + ); + } +} + +#[test] +fn literal_arguments_from_text_normalization_matches_ruby() { + let symbol_source = "puts :ok\n"; + let root = parse_language_source(symbol_source, Language::Ruby, ".rb"); + let fcall = first_node(&root, "FCALL", "puts :ok"); + assert_eq!( + fcall.children.first(), + Some(&Child::Symbol("puts".to_string())) + ); + let args = child_node(fcall, 1); + assert_eq!(args.r#type, "LIST"); + let lit = child_node(args, 0); + assert_eq!(lit.r#type, "LIT"); + assert_eq!(lit.children.first(), Some(&Child::Symbol("ok".to_string()))); + assert_ruby_parity(symbol_source, Language::Ruby, ".rb"); + + let heredoc_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let root = parse_language_source(heredoc_source, Language::Ruby, ".rb"); + let fcall = first_node(&root, "FCALL", "puts <<~TXT"); + let args = child_node(fcall, 1); + assert_eq!(args.r#type, "LIST"); + let dstr = child_node(args, 0); + assert_eq!(dstr.r#type, "DSTR"); + assert_eq!(child_types(dstr), vec!["STR"]); + let body = child_node(dstr, 0); + assert_eq!( + body.children.first(), + Some(&Child::String("\n hi\n ".to_string())) + ); + assert_ruby_parity(heredoc_source, Language::Ruby, ".rb"); +} + +#[test] +fn literal_symbol_arguments_matches_ruby_scan_contract() { + assert_eq!( + super::literal_symbol_arguments(":one, :two?, :three!, :four=, :1, ::Name"), + vec![ + "one".to_string(), + "two?".to_string(), + "three!".to_string(), + "four=".to_string(), + "Name".to_string(), + ] + ); +} + +#[test] +fn elide_tail_returns_matches_ruby_private_method() { + let leaf = |node_type: &str| test_node(node_type, vec![Child::String("value".to_string())]); + let return_leaf = || test_node("RETURN", vec![Child::Node(Box::new(leaf("LVAR")))]); + let protected_def = test_node( + "DEFN", + vec![ + Child::Symbol("kept".to_string()), + Child::Node(Box::new(test_node( + "SCOPE", + vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], + ))), + ], + ); + let cases = vec![ + None, + Some(return_leaf()), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf("LVAR"))), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "SCOPE", + vec![Child::Nil, Child::Nil, Child::Node(Box::new(return_leaf()))], + )), + Some(test_node( + "IF", + vec![ + Child::Node(Box::new(leaf("COND"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "UNLESS", + vec![ + Child::Node(Box::new(leaf("COND"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "CASE", + vec![ + Child::Node(Box::new(leaf("LVAR"))), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "CASE2", + vec![Child::Node(Box::new(return_leaf()))], + )), + Some(test_node( + "WHEN", + vec![ + Child::Node(Box::new(leaf("LIST"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "RESCUE", + vec![ + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(test_node( + "RESBODY", + vec![ + Child::Node(Box::new(leaf("LIST"))), + Child::Node(Box::new(return_leaf())), + Child::Node(Box::new(return_leaf())), + ], + )), + Some(protected_def), + ]; + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + + for node in cases { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .elide_tail_returns(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_elide_tail_returns_value(&input, true), + "elide_tail_returns mismatch for input {input}" + ); + } + + let non_ruby = Some(return_leaf()); + let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); + let normalizer = super::TreeSitterNormalizer::new("", Language::Python); + let rust = normalizer + .elide_tail_returns(non_ruby) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!(rust, input); + assert_eq!(ruby_private_elide_tail_returns_value(&input, false), input); +} + +#[test] +fn elide_implicit_nil_body_matches_ruby_private_method() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + let leaf = || test_node("LVAR", vec![Child::String("value".to_string())]); + let nil_node = || test_node("NIL", Vec::new()); + let cases = vec![ + None, + Some(nil_node()), + Some(leaf()), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf())), + Child::Node(Box::new(nil_node())), + Child::Node(Box::new(nil_node())), + ], + )), + Some(test_node( + "BLOCK", + vec![Child::Nil, Child::Node(Box::new(nil_node()))], + )), + Some(test_node( + "BLOCK", + vec![ + Child::Node(Box::new(leaf())), + Child::Node(Box::new(leaf())), + Child::Node(Box::new(nil_node())), + ], + )), + ]; + + for node in cases { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .elide_implicit_nil_body(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_elide_implicit_nil_body_value(&input, true), + "elide_implicit_nil_body mismatch for input {input}" + ); + } + + let non_ruby = Some(nil_node()); + let input = non_ruby.as_ref().map(node_value).unwrap_or(Value::Null); + let normalizer = super::TreeSitterNormalizer::new("", Language::Python); + let rust = normalizer + .elide_implicit_nil_body(non_ruby) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!(rust, input); + assert_eq!( + ruby_private_elide_implicit_nil_body_value(&input, false), + input + ); +} + +#[test] +fn drop_trailing_nil_statement_matches_ruby_private_method() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + let leaf = |node_type: &str| test_node(node_type, vec![Child::Symbol("value".to_string())]); + let nil_node = || test_node("NIL", Vec::new()); + let block = |children| test_node("BLOCK", children); + + for node in [ + None, + Some(nil_node()), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Node(Box::new(nil_node())), + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(nil_node())), + ])), + Some(block(vec![Child::Nil, Child::Node(Box::new(nil_node()))])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(leaf("VCALL"))), + ])), + Some(block(vec![ + Child::Node(Box::new(leaf("LASGN"))), + Child::Nil, + Child::Node(Box::new(leaf("VCALL"))), + Child::Node(Box::new(nil_node())), + ])), + ] { + let input = node.as_ref().map(node_value).unwrap_or(Value::Null); + let rust = normalizer + .drop_trailing_nil_statement(node) + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_drop_trailing_nil_statement_value(&input), + "drop_trailing_nil_statement mismatch for input {input}" + ); + } +} + +#[test] +fn symbol_literal_node_matches_ruby_private_predicate() { + let normalizer = super::TreeSitterNormalizer::new("", Language::Ruby); + for (node, node_type, child_kind) in [ + (None, None, None), + ( + Some(test_node("LIT", vec![Child::Symbol("value".to_string())])), + Some("LIT"), + Some("symbol"), + ), + ( + Some(test_node("LIT", vec![Child::String("value".to_string())])), + Some("LIT"), + Some("string"), + ), + (Some(test_node("LIT", Vec::new())), Some("LIT"), None), + ( + Some(test_node("STR", vec![Child::Symbol("value".to_string())])), + Some("STR"), + Some("symbol"), + ), + ( + Some(test_node( + "LIT", + vec![Child::Node(Box::new(test_node("NIL", Vec::new())))], + )), + Some("LIT"), + Some("node"), + ), + ( + Some(test_node("LIT", vec![Child::Nil])), + Some("LIT"), + Some("nil"), + ), + ] { + assert_eq!( + normalizer.symbol_literal_node(node.as_ref()), + ruby_private_symbol_literal_node_predicate(node_type, child_kind), + "symbol_literal_node? mismatch for node_type={node_type:?} child_kind={child_kind:?}" + ); + } +} + +#[test] +fn same_ts_node_matches_ruby_private_predicate() { + for ( + source, + language, + suffix, + left_kind, + left_text, + left_index, + right_kind, + right_text, + right_index, + ) in [ + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + "identifier", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + "identifier", + "value", + 1, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 0, + "expression_statement", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 0, + "expression_statement", + "value", + 1, + ), + ( + "value;\nvalue;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value;", + 0, + "expression_statement", + "value;", + 1, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 0, + "function_call", + "value()", + 0, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 0, + "function_call", + "value()", + 1, + ), + ] { + let tree = raw_tree(source, language); + let left = nth_raw_node(tree.root_node(), source, left_kind, left_text, left_index); + let right = nth_raw_node( + tree.root_node(), + source, + right_kind, + right_text, + right_index, + ); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.same_ts_node(left, right), + ruby_private_same_ts_node_predicate( + source, + language, + suffix, + left_kind, + left_text, + left_index, + right_kind, + right_text, + right_index + ), + "same_ts_node? mismatch for {language:?} {left_kind}:{left_text:?}[{left_index}] vs {right_kind}:{right_text:?}[{right_index}]" + ); + } +} + +#[test] +fn parent_named_child_matches_ruby_private_predicate() { + for ( + source, + language, + suffix, + parent_kind, + parent_text, + parent_index, + child_kind, + child_text, + child_index, + ) in [ + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + 0, + "hash_key_symbol", + "name", + 0, + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + 0, + "identifier", + "f", + 0, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "function_definition", + "def f():\n value", + 0, + "identifier", + "f", + 0, + ), + ( + "def f():\n value\n", + Language::Python, + ".py", + "block", + "value", + 0, + "identifier", + "f", + 0, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "function_declaration", + "function f() { value; }", + 0, + "identifier", + "f", + 0, + ), + ( + "function f() { value; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value; }", + 0, + "identifier", + "f", + 0, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "function_declaration", + "function f()\n value()\nend", + 0, + "identifier", + "f", + 0, + ), + ( + "function f()\n value()\nend\n", + Language::Lua, + ".lua", + "block", + "value()", + 0, + "identifier", + "f", + 0, + ), + ] { + let tree = raw_tree(source, language); + let parent = nth_raw_node( + tree.root_node(), + source, + parent_kind, + parent_text, + parent_index, + ); + let child = nth_raw_node( + tree.root_node(), + source, + child_kind, + child_text, + child_index, + ); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.parent_named_child(parent, child), + ruby_private_parent_named_child_predicate( + source, + language, + suffix, + parent_kind, + parent_text, + parent_index, + child_kind, + child_text, + child_index + ), + "parent_named_child? mismatch for {language:?} {parent_kind}:{parent_text:?}[{parent_index}] -> {child_kind}:{child_text:?}[{child_index}]" + ); + } +} + +#[test] +fn node_key_matches_ruby_private_method() { + for (source, language, suffix, kind, text, index) in [ + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 0, + ), + ( + "value\nvalue\n", + Language::Ruby, + ".rb", + "identifier", + "value", + 1, + ), + ( + "value\nvalue\n", + Language::Python, + ".py", + "expression_statement", + "value", + 1, + ), + ( + "value;\nvalue;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value;", + 0, + ), + ( + "value()\nvalue()\n", + Language::Lua, + ".lua", + "function_call", + "value()", + 1, + ), + ] { + let tree = raw_tree(source, language); + let node = nth_raw_node(tree.root_node(), source, kind, text, index); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.node_key(node), + ruby_private_node_key_signature(source, language, suffix, kind, text, index), + "node_key mismatch for {language:?} {kind}:{text:?}[{index}]" + ); + } +} + +#[test] +fn bare_identifier_text_matches_ruby_private_predicate() { + for text in [ + "value", + "_value", + "value1", + "value?", + "value!", + "value=", + " value? ", + "", + "1value", + "value-name", + "value?name", + "value??", + "value!=", + "value =", + ] { + assert_eq!( + super::bare_identifier_text(text), + ruby_private_text_predicate(Language::Ruby, "bare_identifier_text?", text), + "bare_identifier_text? mismatch for {text:?}" + ); + } +} + +#[test] +fn hidden_match_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "match(value)\n", + Language::Ruby, + ".rb", + "call", + "match(value)", + ), + ( + "match value:\n case 1:\n result\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n result", + ), + ( + "match(value)\n", + Language::Python, + ".py", + "expression_statement", + "match(value)", + ), + ( + "match(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "match(value);", + ), + ( + "match(value)\n", + Language::Lua, + ".lua", + "function_call", + "match(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.hidden_match(node), + ruby_private_predicate(source, language, suffix, "hidden_match?", kind, text), + "hidden_match? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn kind_type_matches_ruby_private_method() { + for kind in [ + "", + "body_statement", + "block_body", + "block", + "statements", + "expression_statement", + "alreadyCAPS", + "argument-list??", + "foo__bar", + "123kind", + "é_node", + ] { + assert_eq!( + super::kind_type(kind), + ruby_private_text_string(Language::Ruby, "kind_type", kind), + "kind_type mismatch for {kind:?}" + ); + } +} + +#[test] +fn ts_node_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), + ( + "value\n", + Language::Python, + ".py", + "expression_statement", + "value", + ), + ( + "let value = 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + + assert_eq!( + super::ts_node(Some(node)), + ruby_private_predicate(source, language, suffix, "ts_node?", kind, text), + "ts_node? raw-node mismatch for {language:?} {kind}:{text:?}" + ); + } + + assert_eq!(super::ts_node(None), ruby_private_ts_node_value("nil")); + assert!(!ruby_private_ts_node_value("string")); + assert!(!ruby_private_ts_node_value("normalized_node")); +} + +#[test] +fn command_call_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n puts value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "puts value", + ), + ( + "def f\n foo { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { value }", + ), + ( + "def f\n foo\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo", + ), + ( + "def f\n user.name value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name value", + ), + ( + "print(value)\n", + Language::Python, + ".py", + "expression_statement", + "print(value)", + ), + ( + "console.log(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "console.log(value);", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.command_call_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "command_call_statement?", + kind, + text + ), + "command_call_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_command_call_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n puts value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "puts value", + ), + ( + "def f\n foo { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { value }", + ), + ( + "print(value)\n", + Language::Python, + ".py", + "expression_statement", + "print(value)", + ), + ( + "console.log(value);\n", + Language::TypeScript, + ".ts", + "expression_statement", + "console.log(value);", + ), + ( + "print(value)\n", + Language::Lua, + ".lua", + "function_call", + "print(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_command_call_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_command_call_statement", + kind, + text + ), + "normalize_command_call_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn zero_child_identifier_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("foo?\n", Language::Ruby, ".rb", "call", "foo?"), + ("foo!\n", Language::Ruby, ".rb", "call", "foo!"), + ("foo()\n", Language::Ruby, ".rb", "call", "foo()"), + ( + "foo()\n", + Language::Python, + ".py", + "expression_statement", + "foo()", + ), + ( + "foo();\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo()", + ), + ("foo()\n", Language::Lua, ".lua", "function_call", "foo()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.zero_child_identifier_call(node), + ruby_private_predicate( + source, + language, + suffix, + "zero_child_identifier_call?", + kind, + text + ), + "zero_child_identifier_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn zero_child_identifier_call_normalization_matches_ruby() { + for source in ["foo?\n", "foo!\n"] { + let root = parse_language_source(source, Language::Ruby, ".rb"); + let text = source.trim(); + let vcall = first_node(&root, "VCALL", text); + assert_eq!( + vcall.children.first(), + Some(&Child::Symbol(text.to_string())) + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); + } +} + +#[test] +fn normalize_zero_child_call_matches_ruby_private_method() { + for source in ["foo?\n", "foo!\n", "foo()\n"] { + let text = source.trim(); + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "call", text); + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer.normalize_zero_child_call(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_zero_child_call", + "call", + text + ), + "normalize_zero_child_call mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_const_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), + ( + "Foo::Bar\n", + Language::Ruby, + ".rb", + "scope_resolution", + "Foo::Bar", + ), + ( + "class Foo::Bar::Baz\nend\n", + Language::Ruby, + ".rb", + "scope_resolution", + "Foo::Bar::Baz", + ), + ( + "type Alias = Foo;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Foo", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_const(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_const", + kind, + text + ), + "normalize_const mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_receiver_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("value += 1\n", Language::Ruby, ".rb", "identifier", "value"), + ( + "@value += 1\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ("VALUE += 1\n", Language::Ruby, ".rb", "constant", "VALUE"), + ( + "user.value += 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "value += 1\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "user.value += 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "value += 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "user.value += 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "user.value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .assignment_receiver(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "assignment_receiver", + kind, + text + ), + "assignment_receiver mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn assignment_target_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "@value = 1\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value = 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "element_reference", + "items[index]", + ), + ( + "user.value = 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "user.value = 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "user.value = 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "user.value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.parent_node(node).unwrap_or(node); + let right = normalizer + .assignment_right(source_node) + .and_then(|right| normalizer.normalize_node(right)); + let rust = normalizer + .assignment_target(node, right, source_node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_assignment_target_value(source, language, suffix, kind, text), + "assignment_target mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn augmented_assignment_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text, operator) in [ + ( + "value += 1\n", + Language::Ruby, + ".rb", + "identifier", + "value", + "+", + ), + ( + "@value *= 2\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + "*", + ), + ( + "$value += 1\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + "+", + ), + ( + "VALUE -= 1\n", + Language::Ruby, + ".rb", + "constant", + "VALUE", + "-", + ), + ( + "user.value += 1\n", + Language::Ruby, + ".rb", + "call", + "user.value", + "+", + ), + ( + "value += 1\n", + Language::Python, + ".py", + "identifier", + "value", + "+", + ), + ( + "user.value += 1\n", + Language::Python, + ".py", + "attribute", + "user.value", + "+", + ), + ( + "value += 1;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + "+", + ), + ( + "user.value += 1;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + "+", + ), + ( + "value = 1\n", + Language::Lua, + ".lua", + "variable_list", + "value", + "+", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let source_node = normalizer.parent_node(node).unwrap_or(node); + let right_raw = normalizer.assignment_right(source_node); + let rust = normalizer.augmented_assignment_value(node, operator, right_raw, source_node); + + assert_eq!( + node_value(&rust), + ruby_private_augmented_assignment_value(source, language, suffix, kind, text, operator), + "augmented_assignment_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn target_name_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "VALUE = other\n", + Language::Ruby, + ".rb", + "constant", + "VALUE", + ), + ( + "a, *rest = values\n", + Language::Ruby, + ".rb", + "rest_assignment", + "*rest", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "let value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + Value::String(normalizer.target_name(node)), + ruby_private_normalize_method_value( + source, + language, + suffix, + "target_name", + kind, + text + ), + "target_name mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_multiple_assignment_matches_ruby_private_method() { + for (source, kind, text) in [ + ("a, b = values\n", "assignment", "a, b = values"), + ("$a, b = values\n", "assignment", "$a, b = values"), + ("a, *rest = values\n", "assignment", "a, *rest = values"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let left = normalizer + .assignment_left(node) + .expect("multiple assignment should have left side"); + let right = normalizer + .assignment_right(node) + .and_then(|right| normalizer.normalize_node(right)); + let rust = normalizer.normalize_multiple_assignment(left, right, node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_multiple_assignment_value( + source, + Language::Ruby, + ".rb", + kind, + text + ), + "normalize_multiple_assignment mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_assignment_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "assignment", + "value = other", + ), + ( + "@value = other\n", + Language::Ruby, + ".rb", + "assignment", + "@value = other", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "assignment", + "$value = other", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "assignment", + "items[index] = value", + ), + ( + "user.value = other\n", + Language::Ruby, + ".rb", + "assignment", + "user.value = other", + ), + ( + "a, b = values\n", + Language::Ruby, + ".rb", + "assignment", + "a, b = values", + ), + ( + "value = other\n", + Language::Python, + ".py", + "expression_statement", + "value = other", + ), + ( + "user.value = other\n", + Language::Python, + ".py", + "expression_statement", + "user.value = other", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value = other;", + ), + ( + "user.value = other;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.value = other;", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "value = other", + ), + ( + "user.value = other\n", + Language::Lua, + ".lua", + "assignment_statement", + "user.value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_assignment(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_assignment", + kind, + text + ), + "normalize_assignment mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_assignment_lhs_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "value = other\n", + Language::Ruby, + ".rb", + "identifier", + "value", + ), + ( + "@value = other\n", + Language::Ruby, + ".rb", + "instance_variable", + "@value", + ), + ( + "$value = other\n", + Language::Ruby, + ".rb", + "global_variable", + "$value", + ), + ( + "items[index] = value\n", + Language::Ruby, + ".rb", + "element_reference", + "items[index]", + ), + ( + "user.value = other\n", + Language::Ruby, + ".rb", + "call", + "user.value", + ), + ( + "value = other\n", + Language::Python, + ".py", + "identifier", + "value", + ), + ( + "user.value = other\n", + Language::Python, + ".py", + "attribute", + "user.value", + ), + ( + "value = other;\n", + Language::TypeScript, + ".ts", + "identifier", + "value", + ), + ( + "user.value = other;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.value", + ), + ( + "value = other\n", + Language::Lua, + ".lua", + "variable_list", + "value", + ), + ( + "user.value = other\n", + Language::Lua, + ".lua", + "variable_list", + "user.value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_assignment_lhs(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_assignment_lhs", + kind, + text + ), + "normalize_assignment_lhs mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_begin_matches_ruby_private_method() { + for (source, text) in [ + ("begin\n work\n done\nend\n", "begin\n work\n done\nend"), + ( + "begin\n work\nensure\n cleanup\nend\n", + "begin\n work\nensure\n cleanup\nend", + ), + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + "begin\n work\nrescue Error => e\n handle\nend", + ), + ( + "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend\n", + "begin\n work\nrescue Error => e\n handle\nensure\n cleanup\nend", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "begin", text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_begin(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_begin", + "begin", + text + ), + "normalize_begin mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_block_argument_matches_ruby_private_method() { + for (source, text) in [ + ("foo(&block)\n", "&block"), + ("foo(&:to_s)\n", "&:to_s"), + ("foo(&method(:bar))\n", "&method(:bar)"), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, "block_argument", text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_block_argument(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_block_argument", + "block_argument", + text + ), + "normalize_block_argument mismatch for {text:?}" + ); + } +} + +#[test] +fn normalize_body_nodes_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("\n", Language::Ruby, ".rb", "__root__", ""), + ("value\n", Language::Ruby, ".rb", "__root__", ""), + ("first\nsecond\n", Language::Ruby, ".rb", "__root__", ""), + ( + "first()\nsecond()\n", + Language::Python, + ".py", + "__root__", + "", + ), + ( + "first();\nsecond();\n", + Language::TypeScript, + ".ts", + "__root__", + "", + ), + ("first()\nsecond()\n", Language::Lua, ".lua", "__root__", ""), + ] { + let tree = raw_tree(source, language); + let target = if kind == "__root__" { + tree.root_node() + } else { + first_raw_node(tree.root_node(), source, kind, text) + }; + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let nodes = normalizer.named_children(target); + let rust = normalizer + .normalize_body_nodes(nodes, target) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_body_nodes_value(source, language, suffix, kind, text), + "normalize_body_nodes mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_children_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n one\n two\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "one\n two", + ), + ( + "def f\n value = other\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value = other", + ), + ( + "def f\n x = <<~TXT\n hi\n TXT\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x = <<~TXT\n hi\n TXT", + ), + ( + "def f():\n one()\n two()\n", + Language::Python, + ".py", + "block", + "one()\n two()", + ), + ( + "def f():\n value = other\n", + Language::Python, + ".py", + "block", + "value = other", + ), + ( + "function f(){ one(); two(); }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ one(); two(); }", + ), + ( + "function f(){ value = other; }\n", + Language::TypeScript, + ".ts", + "assignment_expression", + "value = other", + ), + ( + "function f()\n one()\n two()\nend\n", + Language::Lua, + ".lua", + "block", + "one()\n two()", + ), + ( + "function f()\n value = other\nend\n", + Language::Lua, + ".lua", + "block", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = children_value(&normalizer.normalize_children(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_children", + kind, + text + ), + "normalize_children mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_class_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "class Thing; end\n", + Language::Ruby, + ".rb", + "class", + "class Thing; end", + ), + ( + "class Thing:\n pass\n", + Language::Python, + ".py", + "class_definition", + "class Thing:\n pass", + ), + ( + "class Thing {}\n", + Language::TypeScript, + ".ts", + "class_declaration", + "class Thing {}", + ), + ( + "local Thing = {}\n", + Language::Lua, + ".lua", + "variable_declaration", + "local Thing = {}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_class(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_class", + kind, + text + ), + "normalize_class mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_impl_matches_ruby_private_method() { + for (source, kind, text) in [( + "impl Thing {\n fn call(&self) {\n work();\n }\n}\n", + "impl_item", + "impl Thing {\n fn call(&self) {\n work();\n }\n}", + )] { + let tree = raw_tree(source, Language::Rust); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Rust); + let rust = normalizer + .normalize_impl(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Rust, + ".rs", + "normalize_impl", + kind, + text + ), + "normalize_impl mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn rust_impl_normalization_matches_ruby() { + let source = "impl Thing {\n fn call(&self) {\n work();\n }\n}\n"; + let root = parse_language_source(source, Language::Rust, ".rs"); + let class_node = first_node(&root, "CLASS", source.trim_end()); + + assert_eq!(child_node(class_node, 0).r#type, "CONST"); + assert_ruby_parity(source, Language::Rust, ".rs"); +} + +#[test] +fn normalize_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value", + ), + ( + "def f\n return value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "return value", + ), + ( + "def f\n items[index]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[index]", + ), + ( + "def f\n [first, second]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[first, second]", + ), + ( + "def f\n value if ready?\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "value if ready?", + ), + ( + "def f\n left && right\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "left && right", + ), + ( + "def f():\n return value\n", + Language::Python, + ".py", + "block", + "return value", + ), + ( + "def f():\n value = other\n", + Language::Python, + ".py", + "block", + "value = other", + ), + ( + "function f() {\n return value;\n}\n", + Language::TypeScript, + ".ts", + "return_statement", + "return value;", + ), + ( + "function f() {\n value = other;\n}\n", + Language::TypeScript, + ".ts", + "expression_statement", + "value = other;", + ), + ( + "function f()\n return value\nend\n", + Language::Lua, + ".lua", + "block", + "return value", + ), + ( + "function f()\n value = other\nend\n", + Language::Lua, + ".lua", + "block", + "value = other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_body(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_body", + kind, + text + ), + "normalize_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_return_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return nil\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "nil", + ), + ( + "def f\n return items[index]\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "items[index]", + ), + ( + "def f\n return left && right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left && right", + ), + ( + "def f\n return condition ? yes : no\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "condition ? yes : no", + ), + ( + "def f\n return foo { value }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { value }", + ), + ( + "def f\n return user.name\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "user.name", + ), + ( + "def f\n return !value\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "!value", + ), + ( + "def f\n return left + right\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "def f\n return foo(bar)\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(bar)", + ), + ( + "def f():\n return value + other\n", + Language::Python, + ".py", + "binary_operator", + "value + other", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return_value(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_return_value", + kind, + text + ), + "normalize_return_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_return_node_matches_ruby_private_method() { + for (source, language, suffix, kind, text, elide_symbol) in [ + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + false, + ), + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + true, + ), + ( + "return value if cond\n", + Language::Ruby, + ".rb", + "return", + "return value", + true, + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return_node_with_elide_symbol(node, elide_symbol) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_return_node_value( + source, + language, + suffix, + kind, + text, + elide_symbol + ), + "normalize_return_node mismatch for {language:?} {kind} {text:?} elide_symbol={elide_symbol}" + ); + } +} + +#[test] +fn normalize_return_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "return :ok if cond\n", + Language::Ruby, + ".rb", + "return", + "return :ok", + ), + ("break if done\n", Language::Ruby, ".rb", "break", "break"), + ( + "next value if done\n", + Language::Ruby, + ".rb", + "next", + "next value", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_return(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_return", + kind, + text + ), + "normalize_return mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn call_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text, function_mode) in [ + ( + "foo(value)\n", + Language::Ruby, + ".rb", + "call", + "foo(value)", + "auto", + ), + ( + "foo(left + right)\n", + Language::Ruby, + ".rb", + "call", + "foo(left + right)", + "auto", + ), + ( + "foo(user.name)\n", + Language::Ruby, + ".rb", + "call", + "foo(user.name)", + "auto", + ), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + "none", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "call", + "foo(value)", + "auto", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + "auto", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + "auto", + ), + ( + "user.name(value)\n", + Language::Lua, + ".lua", + "function_call", + "user.name(value)", + "none", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let function = match function_mode { + "auto" => normalizer + .named_field(node, "function") + .or_else(|| normalizer.named_field(node, "call")) + .or_else(|| normalizer.named_children(node).into_iter().next()), + "none" => None, + other => panic!("unknown function mode {other:?}"), + }; + let rust = Value::Array( + normalizer + .call_arguments(node, function) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_call_arguments_value(source, language, suffix, kind, text, function_mode), + "call_arguments mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("ready?\n", Language::Ruby, ".rb", "call", "ready?"), + ("foo(value)\n", Language::Ruby, ".rb", "call", "foo(value)"), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "expression_statement", + "foo(value)", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_call", + kind, + text + ), + "normalize_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "items.map { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map { |item| item }", + ), + ( + "items.each do |item|\n item\nend\n", + Language::Ruby, + ".rb", + "call", + "items.each do |item|\n item\nend", + ), + ( + "foo(1) { bar }\n", + Language::Ruby, + ".rb", + "call", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_call_with_block", + kind, + text + ), + "normalize_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_call_without_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text, block_mode) in [ + ( + "foo(value)\n", + Language::Ruby, + ".rb", + "call", + "foo(value)", + "none", + ), + ( + "user.name(value)\n", + Language::Ruby, + ".rb", + "call", + "user.name(value)", + "none", + ), + ( + "foo(1) { bar }\n", + Language::Ruby, + ".rb", + "call", + "foo(1) { bar }", + "auto", + ), + ( + "items.map(1) { |item| item }\n", + Language::Ruby, + ".rb", + "call", + "items.map(1) { |item| item }", + "auto", + ), + ( + "Foo { bar }\n", + Language::Ruby, + ".rb", + "call", + "Foo { bar }", + "auto", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "expression_statement", + "foo(value)", + "none", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "call_expression", + "foo(value)", + "none", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "function_call", + "foo(value)", + "none", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let block = match block_mode { + "auto" => normalizer.call_block(node), + "none" => None, + other => panic!("unknown block mode {other:?}"), + }; + let rust = normalizer + .normalize_call_without_block(node, block) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_call_without_block_value( + source, language, suffix, kind, text, block_mode + ), + "normalize_call_without_block mismatch for {language:?} {kind} {text:?} with block mode {block_mode:?}" + ); + } +} + +#[test] +fn command_arguments_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "foo value\n", + Language::Ruby, + ".rb", + "argument_list", + "value", + ), + ( + "foo :name\n", + Language::Ruby, + ".rb", + "argument_list", + ":name", + ), + ( + "foo left + right\n", + Language::Ruby, + ".rb", + "argument_list", + "left + right", + ), + ( + "foo user.name\n", + Language::Ruby, + ".rb", + "argument_list", + "user.name", + ), + ( + "foo(value)\n", + Language::Python, + ".py", + "argument_list", + "(value)", + ), + ( + "foo(left + right)\n", + Language::Python, + ".py", + "argument_list", + "(left + right)", + ), + ( + "foo(value);\n", + Language::TypeScript, + ".ts", + "arguments", + "(value)", + ), + ( + "foo(value)\n", + Language::Lua, + ".lua", + "arguments", + "(value)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .command_arguments(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_command_arguments_value(source, language, suffix, kind, text), + "command_arguments mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn const_for_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("Foo\n", Language::Ruby, ".rb", "constant", "Foo"), + ("foo\n", Language::Ruby, ".rb", "identifier", "foo"), + ( + "class Foo:\n pass\n", + Language::Python, + ".py", + "identifier", + "Foo", + ), + ( + "type Alias = Foo;\n", + Language::TypeScript, + ".ts", + "type_identifier", + "Foo", + ), + ( + "local Foo = {}\n", + Language::Lua, + ".lua", + "variable_list", + "Foo", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.const_for(Some(node), node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value(source, language, suffix, "const_for", kind, text), + "const_for mismatch for {language:?} {kind} {text:?}" + ); + } + + for (source, language, suffix) in [ + ("class Foo\nend\n", Language::Ruby, ".rb"), + ("class Foo:\n pass\n", Language::Python, ".py"), + ("class Foo {}\n", Language::TypeScript, ".ts"), + ("local Foo = {}\n", Language::Lua, ".lua"), + ] { + let tree = raw_tree(source, language); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.const_for(None, tree.root_node()); + + assert_eq!( + node_value(&rust), + ruby_private_const_for_nil_value(source, language, suffix), + "const_for nil mismatch for {language:?}" + ); + } +} + +#[test] +fn normalize_patterns_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "case\nwhen ready\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when ready\n one", + ), + ( + "case value\nwhen Foo::Bar\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo::Bar\n one", + ), + ( + "case value\nwhen Foo\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one();", + ), + ("return 1\n", Language::Lua, ".lua", "expression_list", "1"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = Value::Array( + normalizer + .normalize_patterns(node) + .iter() + .map(node_value) + .collect(), + ); + + assert_eq!( + rust, + ruby_private_normalize_patterns_value(source, language, suffix, kind, text), + "normalize_patterns mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn case_value_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nend", + ), + ( + "case\nwhen ready\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case\nwhen ready\n one\nend", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); }", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.case_value(node).map(|value| { + ( + value.kind().to_string(), + super::node_text(value, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "case_value", kind, text), + "case_value mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn case_arms_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); default: other(); }", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .case_arms(node) + .into_iter() + .map(|arm| { + ( + arm.kind().to_string(), + super::node_text(arm, source).to_string(), + ) + }) + .collect::>(); + + assert_eq!( + rust, + ruby_private_node_list_signature(source, language, suffix, "case_arms", kind, text), + "case_arms mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn when_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one();", + ), + ( + "switch (value) { case 1: one(); default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_default", + "default: other();", + ), + ( + "if value == 1 then one() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.when_body(node).map(|body| { + ( + body.kind().to_string(), + super::node_text(body, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "when_body", kind, text), + "when_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_when_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when 1\n one", + ), + ( + "case value\nwhen Foo::Bar\n one\nend\n", + Language::Ruby, + ".rb", + "when", + "when Foo::Bar\n one", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "case_clause", + "case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_case", + "case 1: one(); break;", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_when(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_when", + kind, + text + ), + "normalize_when mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn case_else_body_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nelse\n other\nend", + ), + ( + "case value\nwhen 1\n one\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "match value:\n case 1:\n one()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; default: other(); }", + ), + ( + "switch (value) { case 1: one(); break; }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; }", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .case_else_body(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "case_else_body", + kind, + text + ), + "case_else_body mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_case_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case value\nwhen 1\n one\nwhen 2\n two\nelse\n other\nend", + ), + ( + "case\nwhen ready\n one\nelse\n other\nend\n", + Language::Ruby, + ".rb", + "case", + "case\nwhen ready\n one\nelse\n other\nend", + ), + ( + "match value:\n case 1:\n one()\n case _:\n other()\n", + Language::Python, + ".py", + "match_statement", + "match value:\n case 1:\n one()\n case _:\n other()", + ), + ( + "switch (value) { case 1: one(); break; default: other(); }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (value) { case 1: one(); break; default: other(); }", + ), + ( + "if value == 1 then one() else other() end\n", + Language::Lua, + ".lua", + "if_statement", + "if value == 1 then one() else other() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_case(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_case", + kind, + text + ), + "normalize_case mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ("user\n", Language::Ruby, ".rb", "identifier", "user"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user\n", + Language::Python, + ".py", + "expression_statement", + "user", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ("user;\n", Language::TypeScript, ".ts", "identifier", "user"), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ("user()\n", Language::Lua, ".lua", "function_call", "user()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.dotted_call(node), + ruby_private_predicate(source, language, suffix, "dotted_call?", kind, text), + "dotted_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_expression_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.name;", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.dotted_expression(node), + ruby_private_predicate(source, language, suffix, "dotted_expression?", kind, text), + "dotted_expression? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_expression_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n user.name\nend\n", Language::Ruby, ".rb"), + ("user.name\n", Language::Python, ".py"), + ] { + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_else_or_branch_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "else", + "else\n stop", + ), + ( + "if ready\n call\nelse\n user.name\nend\n", + Language::Ruby, + ".rb", + "else", + "else\n user.name", + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "else_clause", + "else:\n stop()", + ), + ( + "if ready:\n call()\nelse:\n if backup:\n stop()\n", + Language::Python, + ".py", + "else_clause", + "else:\n if backup:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "else_clause", + "else { stop(); }", + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "else_statement", + "else\n stop()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_else_or_branch(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_else_or_branch", + kind, + text + ), + "normalize_else_or_branch mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_if_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "if ready\n call\nelse\n stop\nend\n", + Language::Ruby, + ".rb", + "if", + "if ready\n call\nelse\n stop\nend", + ), + ( + "call if ready\n", + Language::Ruby, + ".rb", + "if_modifier", + "call if ready", + ), + ( + "unless ready\n call\nend\n", + Language::Ruby, + ".rb", + "unless", + "unless ready\n call\nend", + ), + ( + "if ready:\n call()\nelse:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelse:\n stop()", + ), + ( + "if ready:\n call()\nelif other:\n stop()\n", + Language::Python, + ".py", + "if_statement", + "if ready:\n call()\nelif other:\n stop()", + ), + ( + "if (ready) { call(); } else { stop(); }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (ready) { call(); } else { stop(); }", + ), + ( + "if ready then\n call()\nelseif other then\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelseif other then\n stop()\nend", + ), + ( + "if ready then\n call()\nelse\n stop()\nend\n", + Language::Lua, + ".lua", + "if_statement", + "if ready then\n call()\nelse\n stop()\nend", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_if(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_if", + kind, + text + ), + "normalize_if mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_elsif_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "if ready\n call\nelsif other\n stop\nend\n", + "elsif", + "elsif other\n stop", + ), + ( + "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", + "elsif", + "elsif other\n stop\nelse\n done", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_elsif(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_elsif", + kind, + text + ), + "normalize_elsif mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_loop_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "while ready\n work\nend\n", + Language::Ruby, + ".rb", + "while", + "while ready\n work\nend", + ), + ( + "work while ready\n", + Language::Ruby, + ".rb", + "while_modifier", + "work while ready", + ), + ( + "work until ready\n", + Language::Ruby, + ".rb", + "until_modifier", + "work until ready", + ), + ( + "for item in items\n work\nend\n", + Language::Ruby, + ".rb", + "for", + "for item in items\n work\nend", + ), + ( + "while ready:\n work()\n", + Language::Python, + ".py", + "while_statement", + "while ready:\n work()", + ), + ( + "for item in items:\n work()\n", + Language::Python, + ".py", + "for_statement", + "for item in items:\n work()", + ), + ( + "while ready do\n work()\nend\n", + Language::Lua, + ".lua", + "while_statement", + "while ready do\n work()\nend", + ), + ( + "while (ready) { work(); }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (ready) { work(); }", + ), + ( + "for (let i = 0; i < n; i++) { work(i); }\n", + Language::TypeScript, + ".ts", + "for_statement", + "for (let i = 0; i < n; i++) { work(i); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let node_type = super::loop_kind(node.kind()).expect("test node should be a loop kind"); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_loop(node, node_type) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_loop", + kind, + text + ), + "normalize_loop mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_elsif_normalization_matches_ruby() { + for source in [ + "if ready\n call\nelsif other\n stop\nend\n", + "if ready\n call\nelsif other\n stop\nelse\n done\nend\n", + ] { + let root = parse_language_source(source, Language::Ruby, ".rb"); + let if_node = first_node(&root, "IF", source.trim_end()); + + assert_eq!( + child_node(if_node, 2).r#type, + "IF", + "expected Ruby elsif alternative to normalize as nested IF: {if_node:#?}" + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); + } +} + +#[test] +fn normalize_dotted_expression_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ( + "def f\n user.name { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name { value }", + ), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "expression_statement", + "user.name;", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_dotted_expression(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_dotted_expression", + kind, + text + ), + "normalize_dotted_expression mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_dotted_call_expression_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n user.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name", + ), + ( + "def f\n user.name(1)\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name(1)", + ), + ( + "def f\n user&.name\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user&.name", + ), + ( + "def f\n user.name { value }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "user.name { value }", + ), + ( + "user.name\n", + Language::Python, + ".py", + "expression_statement", + "user.name", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_dotted_call_expression(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_dotted_call_expression", + kind, + text + ), + "normalize_dotted_call_expression mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn argument_list_call_with_block_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f\n return foo.bar { baz }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo.bar { baz }", + ), + ( + "def f\n return Foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "Foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.argument_list_call_with_block(node), + ruby_private_predicate( + source, + language, + suffix, + "argument_list_call_with_block?", + kind, + text + ), + "argument_list_call_with_block? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_argument_list_call_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_call(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_call", + kind, + text + ), + "normalize_argument_list_call mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_argument_list_call_with_block_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n return foo { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo { bar }", + ), + ( + "def f\n return foo do\n bar\n end\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo do\n bar\n end", + ), + ( + "def f\n return foo(1) { bar }\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "foo(1) { bar }", + ), + ( + "def f\n foo { bar }\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo { bar }", + ), + ( + "def f():\n return foo(lambda: bar)\n", + Language::Python, + ".py", + "argument_list", + "(lambda: bar)", + ), + ( + "function f(){ return foo(() => bar); }\n", + Language::TypeScript, + ".ts", + "arguments", + "(() => bar)", + ), + ( + "function f() return foo(function() return bar end) end\n", + Language::Lua, + ".lua", + "arguments", + "(function() return bar end)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_argument_list_call_with_block(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_argument_list_call_with_block", + kind, + text + ), + "normalize_argument_list_call_with_block mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn safe_navigation_call_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user?.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user?.name", + ), + ( + "user?.name();\n", + Language::TypeScript, + ".ts", + "call_expression", + "user?.name()", + ), + ( + "user.name;\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.safe_navigation_call(node), + ruby_private_predicate( + source, + language, + suffix, + "safe_navigation_call?", + kind, + text + ), + "safe_navigation_call? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn rescue_source_end_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle()", + ), + ( + "try { work(); } catch (e) { handle(); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(); }", + ), + ("work()\n", Language::Lua, ".lua", "function_call", "work()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.rescue_source_end(node).map(|source_end| { + ( + source_end.kind().to_string(), + super::node_text(source_end, source).to_string(), + ) + }); + + assert_eq!( + rust, + ruby_private_node_signature(source, language, suffix, "rescue_source_end", kind, text), + "rescue_source_end mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn rescue_exception_variable_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "begin\n work\nrescue Error\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle()", + ), + ( + "try:\n work()\nexcept Error:\n handle()\n", + Language::Python, + ".py", + "except_clause", + "except Error:\n handle()", + ), + ( + "try { work(); } catch (e) { handle(); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(); }", + ), + ("work()\n", Language::Lua, ".lua", "function_call", "work()"), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .rescue_exception_variable(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "rescue_exception_variable", + kind, + text + ), + "rescue_exception_variable mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_rescue_clause_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "begin\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Error => e\n handle", + ), + ( + "begin\n work\nrescue Net::Error\n handle\nend\n", + Language::Ruby, + ".rb", + "rescue", + "rescue Net::Error\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "except_clause", + "except Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "catch_clause", + "catch (e) { handle(e); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_clause(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_clause", + kind, + text + ), + "normalize_rescue_clause mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_rescue_modifier_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [( + "value rescue fallback\n", + Language::Ruby, + ".rb", + "rescue_modifier", + "value rescue fallback", + )] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_modifier(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_modifier", + kind, + text + ), + "normalize_rescue_modifier mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn prepend_rescue_exception_assignment_matches_ruby_private_method() { + fn synthetic_node( + node_type: &str, + text: &str, + first_lineno: usize, + first_column: usize, + last_lineno: usize, + last_column: usize, + children: Vec, + ) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno, + first_column, + last_lineno, + last_column, + text: text.to_string(), + } + } + + let source = "assign\nbody\n"; + let assignment = synthetic_node("LASGN", "assign", 1, 0, 1, 6, Vec::new()); + let body = synthetic_node("VCALL", "body", 2, 0, 2, 4, Vec::new()); + let block = synthetic_node( + "BLOCK", + "body", + 2, + 0, + 2, + 4, + vec![Child::Node(Box::new(body.clone())), Child::Nil], + ); + + for (label, body_node, assignment_node) in [ + ("no_assignment", Some(body.clone()), None), + ("no_body", None, Some(assignment.clone())), + ("block_body", Some(block), Some(assignment.clone())), + ("scalar_body", Some(body), Some(assignment)), + ] { + let normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .prepend_rescue_exception_assignment(body_node.clone(), assignment_node.clone()) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + let body_value = body_node.as_ref().map(node_value).unwrap_or(Value::Null); + let assignment_value = assignment_node + .as_ref() + .map(node_value) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_prepend_rescue_exception_assignment_value( + source, + &body_value, + &assignment_value + ), + "prepend_rescue_exception_assignment mismatch for {label}" + ); + } +} + +#[test] +fn dotted_call_parts_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ("user.name\n", Language::Ruby, ".rb", "call", "user.name"), + ("user&.name\n", Language::Ruby, ".rb", "call", "user&.name"), + ( + "user.name()\n", + Language::Python, + ".py", + "attribute", + "user.name", + ), + ( + "user.name();\n", + Language::TypeScript, + ".ts", + "member_expression", + "user.name", + ), + ( + "user.name()\n", + Language::Lua, + ".lua", + "dot_index_expression", + "user.name", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .dotted_call_parts(node, None) + .map(|(receiver, method)| { + ( + receiver.kind().to_string(), + super::node_text(receiver, source).to_string(), + method, + ) + }); + + assert_eq!( + rust, + ruby_private_dotted_call_parts(source, language, suffix, kind, text), + "dotted_call_parts mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn dotted_call_parts_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("user.name\n", Language::Ruby, ".rb"), + ("user&.name\n", Language::Ruby, ".rb"), + ("user.name()\n", Language::Python, ".py"), + ("user.name();\n", Language::TypeScript, ".ts"), + ("user.name()\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + if language != Language::Lua { + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + let mut qcalls = Vec::new(); + nodes_of_type(&root, "QCALL", &mut qcalls); + assert!( + calls + .iter() + .chain(qcalls.iter()) + .any(|node| matches!(node.children.get(1), Some(Child::Symbol(method)) if method == "name")), + "expected dotted call method name for {language:?} in {root:#?}" + ); + } + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_if_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n if x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if x\n y\n end", + ), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + "block", + "if x:\n y()", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if x then\n y()\n end", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_if_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_if_statement?", + kind, + text + ), + "leading_if_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_if_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n if x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "if x\n y\n end", + ), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + "block", + "if x:\n y()", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "if x then\n y()\n end", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "if_statement", + "if (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_if_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_if_statement", + kind, + text + ), + "normalize_leading_if_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_if_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n if x\n y\n end\nend\n", Language::Ruby, ".rb"), + ( + "def f():\n if x:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f()\n if x then\n y()\n end\nend\n", + Language::Lua, + ".lua", + ), + ( + "function f() { if (x) { y(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut if_nodes = Vec::new(); + nodes_of_type(&root, "IF", &mut if_nodes); + assert!( + !if_nodes.is_empty(), + "expected IF node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_case_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + "block", + "match x:\n case 1:\n y()", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + ), + ( + "function f(x)\n if x == 1 then y() end\nend\n", + Language::Lua, + ".lua", + "block", + "if x == 1 then y() end", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_case_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_case_statement?", + kind, + text + ), + "leading_case_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_case_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + "block", + "match x:\n case 1:\n y()", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_case_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_case_statement", + kind, + text + ), + "normalize_leading_case_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_case_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def f(x):\n match x:\n case 1:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut case_nodes = Vec::new(); + nodes_of_type(&root, "CASE", &mut case_nodes); + assert!( + !case_nodes.is_empty(), + "expected CASE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn leading_loop_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "while x\n y\n end", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + "block", + "while x:\n y()", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "while x do\n y()\n end", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.leading_loop_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "leading_loop_statement?", + kind, + text + ), + "leading_loop_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_leading_loop_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "while x\n y\n end", + ), + ( + "def f(x)\n until x\n y\n end\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "until x\n y\n end", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + "block", + "while x:\n y()", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + "block", + "while x do\n y()\n end", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + "while_statement", + "while (x) { y(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_leading_loop_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_leading_loop_statement", + kind, + text + ), + "normalize_leading_loop_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn leading_loop_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f(x)\n while x\n y\n end\nend\n", + Language::Ruby, + ".rb", + ), + ( + "def f(x):\n while x:\n y()\n", + Language::Python, + ".py", + ), + ( + "function f(x)\n while x do\n y()\n end\nend\n", + Language::Lua, + ".lua", + ), + ( + "function f(x) { while (x) { y(); } }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut while_nodes = Vec::new(); + nodes_of_type(&root, "WHILE", &mut while_nodes); + assert!( + !while_nodes.is_empty(), + "expected WHILE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn rescue_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + ), + ( + "local ok, err = pcall(work)\n", + Language::Lua, + ".lua", + "variable_declaration", + "local ok, err = pcall(work)", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.rescue_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "rescue_body_statement?", + kind, + text + ), + "rescue_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_rescue_body_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_rescue_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_rescue_body_statement", + kind, + text + ), + "normalize_rescue_body_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn rescue_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f\n work\nrescue Error => e\n handle\nend\n", + Language::Ruby, + ".rb", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\n", + Language::Python, + ".py", + ), + ( + "try { work(); } catch (e) { handle(e); }\n", + Language::TypeScript, + ".ts", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut rescue_nodes = Vec::new(); + nodes_of_type(&root, "RESCUE", &mut rescue_nodes); + assert!( + !rescue_nodes.is_empty(), + "expected RESCUE node for {language:?} in {root:#?}" + ); + let mut resbody_nodes = Vec::new(); + nodes_of_type(&root, "RESBODY", &mut resbody_nodes); + assert!( + !resbody_nodes.is_empty(), + "expected RESBODY node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn ensure_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nensure\n cleanup", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + ), + ( + "work()\ncleanup()\n", + Language::Lua, + ".lua", + "function_call", + "work()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.ensure_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "ensure_body_statement?", + kind, + text + ), + "ensure_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn ensure_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", + Language::Python, + ".py", + ), + ] { + let root = parse_language_source(source, language, suffix); + let mut ensure_nodes = Vec::new(); + nodes_of_type(&root, "ENSURE", &mut ensure_nodes); + assert!( + !ensure_nodes.is_empty(), + "expected ENSURE node for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_ensure_body_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n work\nensure\n cleanup\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "work\nensure\n cleanup", + ), + ( + "try:\n work()\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + ), + ( + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", + Language::Python, + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()", + ), + ( + "try { work(); } finally { cleanup(); }\n", + Language::TypeScript, + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_ensure_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_ensure_body_statement", + kind, + text + ), + "normalize_ensure_body_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_ensure_clause_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "begin\n work\nensure\n cleanup\nend\n", + "ensure", + "ensure\n cleanup", + ), + ( + "begin\n work\nensure\n user.name\nend\n", + "ensure", + "ensure\n user.name", + ), + ( + "begin\n work\nensure\n user.name\n cleanup\nend\n", + "ensure", + "ensure\n user.name\n cleanup", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_ensure_clause(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_ensure_clause", + kind, + text + ), + "normalize_ensure_clause mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn ruby_begin_ensure_clause_keeps_all_body_statements() { + let source = "begin\n work\nensure\n user.name\n cleanup\nend\n"; + let root = parse_language_source(source, Language::Ruby, ".rb"); + let ensure = first_node(&root, "ENSURE", "work\nensure\n user.name\n cleanup"); + let ensure_body = child_node(ensure, 1); + + assert_eq!( + child_types(ensure_body), + vec!["CALL", "VCALL"], + "Ruby ensure clause body must retain all statements: {ensure:#?}" + ); + assert_ruby_parity(source, Language::Ruby, ".rb"); +} + +#[test] +fn array_literal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n [a, b]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[a, b]", + ), + ( + "def f():\n [a, b]\n", + Language::Python, + ".py", + "block", + "[a, b]", + ), + ( + "function f() { [a, b]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[a, b];", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ( + "function f()\n {x = a, y = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {x = a, y = b}", + ), + ( + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + Language::Lua, + ".lua", + "arguments", + "({rocks_tree, \"a_rock\"})", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.array_literal_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "array_literal_statement?", + kind, + text + ), + "array_literal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn array_literal_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n [a, b]\nend\n", Language::Ruby, ".rb"), + ("def f():\n [a, b]\n", Language::Python, ".py"), + ("function f() { [a, b]; }\n", Language::TypeScript, ".ts"), + ("function f()\n {a, b}\nend\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut lists = Vec::new(); + nodes_of_type(&root, "LIST", &mut lists); + assert!( + lists + .iter() + .any(|node| node.text.contains('a') && node.text.contains('b')), + "expected LIST for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_array_literal_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n [a, b]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[a, b]", + ), + ( + "def f\n []\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[]", + ), + ( + "def f():\n [a, b]\n", + Language::Python, + ".py", + "block", + "[a, b]", + ), + ("def f():\n []\n", Language::Python, ".py", "block", "[]"), + ( + "function f() { [a, b]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[a, b];", + ), + ( + "function f() { []; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "[];", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ( + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + Language::Lua, + ".lua", + "arguments", + "(install, { bin = { P\"bin/binfile\" } })", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_array_literal_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_array_literal_statement", + kind, + text + ), + "normalize_array_literal_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn element_reference_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n [0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "[0]", + ), + ( + "def f():\n items[0]\n", + Language::Python, + ".py", + "block", + "items[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "function f() { items[0]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items[0];", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "return items[1]\n", + Language::Lua, + ".lua", + "expression_list", + "items[1]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.element_reference_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "element_reference_statement?", + kind, + text + ), + "element_reference_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_element_reference_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n self[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "self[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_element_reference(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_element_reference", + kind, + text + ), + "normalize_element_reference mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_element_reference_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n items[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[0]", + ), + ( + "def f\n self[0]\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "self[0]", + ), + ( + "def f():\n items[0]\n", + Language::Python, + ".py", + "block", + "items[0]", + ), + ( + "return items[0]\n", + Language::Python, + ".py", + "subscript", + "items[0]", + ), + ( + "function f() { items[0]; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "items[0];", + ), + ( + "return items[0];\n", + Language::TypeScript, + ".ts", + "subscript_expression", + "items[0]", + ), + ( + "return items[1]\n", + Language::Lua, + ".lua", + "expression_list", + "items[1]", + ), + ( + "print(items[1])\n", + Language::Lua, + ".lua", + "bracket_index_expression", + "items[1]", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_element_reference_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_element_reference_statement", + kind, + text + ), + "normalize_element_reference_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn element_reference_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n items[0]\nend\n", Language::Ruby, ".rb"), + ("def f():\n items[0]\n", Language::Python, ".py"), + ("function f() { items[0]; }\n", Language::TypeScript, ".ts"), + ("return items[1]\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + assert!( + calls.iter().any(|node| { + matches!(node.children.get(1), Some(Child::Symbol(message)) if message == "[]") + && node.text.contains("items") + }), + "expected element reference CALL for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn hash_literal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "{a: b}", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "block", + "{\"a\": b}", + ), + ( + "function f() { ({a: b}); }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "({a: b});", + ), + ( + "return {a: b};\n", + Language::TypeScript, + ".ts", + "object", + "{a: b}", + ), + ( + "function f()\n {a = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a = b}", + ), + ( + "function f()\n {a, b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a, b}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.hash_literal_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "hash_literal_statement?", + kind, + text + ), + "hash_literal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_hash_literal_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "{a: b}", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "block", + "{\"a\": b}", + ), + ( + "function f() { ({a: b}); }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "({a: b});", + ), + ( + "return {a: b};\n", + Language::TypeScript, + ".ts", + "object", + "{a: b}", + ), + ( + "function f()\n {a = b}\nend\n", + Language::Lua, + ".lua", + "block", + "\n {a = b}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_hash_literal_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_hash_literal_statement", + kind, + text + ), + "normalize_hash_literal_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_pair_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n {a: b}\nend\n", + Language::Ruby, + ".rb", + "pair", + "a: b", + ), + ( + "def f\n {name:}\nend\n", + Language::Ruby, + ".rb", + "pair", + "name:", + ), + ( + "def f\n {\"a\" => b}\nend\n", + Language::Ruby, + ".rb", + "pair", + "\"a\" => b", + ), + ( + "def f():\n {\"a\": b}\n", + Language::Python, + ".py", + "pair", + "\"a\": b", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_pair(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_pair", + kind, + text + ), + "normalize_pair mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn hash_literal_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f\n {a: b}\nend\n", Language::Ruby, ".rb"), + ("def f():\n {\"a\": b}\n", Language::Python, ".py"), + ("function f() { ({a: b}); }\n", Language::TypeScript, ".ts"), + ("function f()\n {a = b}\nend\n", Language::Lua, ".lua"), + ] { + let root = parse_language_source(source, language, suffix); + let mut hashes = Vec::new(); + nodes_of_type(&root, "HASH", &mut hashes); + assert!( + hashes + .iter() + .any(|node| node.text.contains('a') && node.text.contains('b')), + "expected hash literal HASH for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn empty_body_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f():\n pass\n", + Language::Python, + ".py", + "block", + "pass", + ), + ( + "function f() {}\n", + Language::TypeScript, + ".ts", + "statement_block", + "{}", + ), + ( + "function f() { work(); }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ work(); }", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.empty_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "empty_body_statement?", + kind, + text + ), + "empty_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn empty_body_statement_normalization_matches_ruby() { + for (source, language, suffix) in [ + ("def f():\n pass\n", Language::Python, ".py"), + ("function f() {}\n", Language::TypeScript, ".ts"), + ] { + let root = parse_language_source(source, language, suffix); + let mut defns = Vec::new(); + nodes_of_type(&root, "DEFN", &mut defns); + let scope = child_node(defns[0], 1); + assert!( + matches!(scope.children.get(2), Some(Child::Nil)), + "expected empty body for {language:?} in {root:#?}" + ); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn heredoc_body_statement_matches_ruby_private_predicate() { + let ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + for (source, language, suffix, kind, text) in [ + ( + ruby_source, + Language::Ruby, + ".rb", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + (ruby_source, Language::Ruby, ".rb", "call", "puts <<~TXT"), + ( + "def f():\n value = 1\n", + Language::Python, + ".py", + "block", + "value = 1", + ), + ( + "function f() { value = 1; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value = 1; }", + ), + ( + "function f()\n value = 1\nend\n", + Language::Lua, + ".lua", + "block", + "value = 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.heredoc_body_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "heredoc_body_statement?", + kind, + text + ), + "heredoc_body_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn heredoc_call_for_body_matches_ruby_private_predicate() { + let ruby_arg_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let ruby_receiver_source = "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n"; + for (source, language, suffix, kind, text) in [ + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "call", + "puts <<~TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "argument_list", + "<<~TXT", + ), + ( + ruby_arg_source, + Language::Ruby, + ".rb", + "method", + "def f\n puts <<~TXT\n hi\n TXT\nend", + ), + ( + ruby_receiver_source, + Language::Ruby, + ".rb", + "call", + "<<~ZIG.chomp", + ), + ( + ruby_receiver_source, + Language::Ruby, + ".rb", + "heredoc_beginning", + "<<~ZIG", + ), + ( + "def f():\n value = 1\n", + Language::Python, + ".py", + "block", + "value = 1", + ), + ( + "function f() { value = 1; }\n", + Language::TypeScript, + ".ts", + "statement_block", + "{ value = 1; }", + ), + ( + "function f()\n value = 1\nend\n", + Language::Lua, + ".lua", + "block", + "value = 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.heredoc_call_for_body(node), + ruby_private_predicate( + source, + language, + suffix, + "heredoc_call_for_body?", + kind, + text + ), + "heredoc_call_for_body? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn with_current_heredoc_body_restores_previous_body() { + let source = "def f\n puts <<~TXT\n hi\n TXT\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n hi\n TXT"); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + normalizer.current_heredoc_body_span = Some([9, 2, 9, 7]); + + let result = normalizer.with_current_heredoc_body(Some(body), |normalizer| { + assert_eq!( + normalizer.current_heredoc_body_span, + Some(super::span(body)) + ); + "result" + }); + + assert_eq!(result, "result"); + assert_eq!(normalizer.current_heredoc_body_span, Some([9, 2, 9, 7])); +} + +#[test] +fn normalize_interpolation_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "name = \"#{user}\"\n", + Language::Ruby, + ".rb", + "interpolation", + "#{user}", + ), + ( + "name = \"#{a; b}\"\n", + Language::Ruby, + ".rb", + "interpolation", + "#{a; b}", + ), + ( + "name = f\"hi {user}\"\n", + Language::Python, + ".py", + "interpolation", + "{user}", + ), + ( + "const name = `hi ${user}`;\n", + Language::TypeScript, + ".ts", + "template_substitution", + "${user}", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_interpolation(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_interpolation", + kind, + text + ), + "normalize_interpolation mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_heredoc_children_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n puts <<~TXT\n hi\n TXT\nend\n", + "heredoc_body", + "\n hi\n TXT", + ), + ( + "def f\n puts <<~TXT\n hi #{name}\n TXT\nend\n", + "heredoc_body", + "\n hi #{name}\n TXT", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = children_value(&normalizer.normalize_heredoc_children(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_children", + kind, + text + ), + "normalize_heredoc_children mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_heredoc_beginning_matches_ruby_private_method() { + for (source, kind, text) in [( + "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", + "heredoc_beginning", + "<<~ZIG", + )] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_heredoc_beginning(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_beginning", + kind, + text + ), + "normalize_heredoc_beginning mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_heredoc_beginning_uses_current_body_for_multiple_heredocs() { + let source = "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n"; + let tree = raw_tree(source, Language::Ruby); + let beginning = first_raw_node(tree.root_node(), source, "heredoc_beginning", "<<~B"); + let body = first_raw_node(tree.root_node(), source, "heredoc_body", "\n two\n B"); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + + let dstr = normalizer.with_current_heredoc_body(Some(body), |normalizer| { + normalizer.normalize_heredoc_beginning(beginning) + }); + + let content = child_node(&dstr, 0); + assert_eq!(content.r#type, "STR"); + assert_eq!( + content.children, + vec![Child::String("\n two\n ".to_string())] + ); +} + +#[test] +fn normalize_heredoc_body_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n puts <<~TXT\n hi\n TXT\nend\n", + "body_statement", + "puts <<~TXT\n hi\n TXT", + ), + ( + "def emit\n <<~ZIG.chomp\n hi\n ZIG\nend\n", + "body_statement", + "<<~ZIG.chomp\n hi\n ZIG", + ), + ( + "def f\n puts <<~A, <<~B\n one\n A\n two\n B\nend\n", + "body_statement", + "puts <<~A, <<~B\n one\n A\n two\n B", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = normalizer + .normalize_heredoc_body_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_heredoc_body_statement", + kind, + text + ), + "normalize_heredoc_body_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn interpolated_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n \"hi #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"hi #{name}\"", + ), + ( + "def f():\n f\"hi {name}\"\n", + Language::Python, + ".py", + "block", + "f\"hi {name}\"", + ), + ( + "function f() { `hi ${name}`; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "`hi ${name}`;", + ), + ( + "function f()\n \"hi\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"hi\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.interpolated_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "interpolated_statement?", + kind, + text + ), + "interpolated_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn interpolated_statement_normalization_matches_ruby() { + let source = "def f\n \"hi #{name}\"\nend\n"; + let root = parse_language_source(source, Language::Ruby, ".rb"); + let dstr = first_node(&root, "DSTR", "\"hi #{name}\""); + + assert_eq!(child_types(dstr), vec!["STR", "EVSTR"]); + assert_ruby_parity(source, Language::Ruby, ".rb"); +} + +#[test] +fn normalize_interpolated_statement_matches_ruby_private_method() { + for (source, kind, text) in [ + ( + "def f\n \"hi #{name}\"\nend\n", + "body_statement", + "\"hi #{name}\"", + ), + ( + "def f\n \"#{first} #{last}\"\nend\n", + "body_statement", + "\"#{first} #{last}\"", + ), + ] { + let tree = raw_tree(source, Language::Ruby); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, Language::Ruby); + let rust = node_value(&normalizer.normalize_interpolated_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + Language::Ruby, + ".rb", + "normalize_interpolated_statement", + kind, + text + ), + "normalize_interpolated_statement mismatch for {kind} {text:?}" + ); + } +} + +#[test] +fn concatenated_string_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + ), + ( + "function f() { \"a\"; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "\"a\";", + ), + ( + "function f()\n \"a\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"a\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.concatenated_string_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "concatenated_string_statement?", + kind, + text + ), + "concatenated_string_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn concatenated_string_statement_normalization_matches_ruby() { + for (source, language, suffix, expected_text, expected_types) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "\"a\"", + vec!["STR", "STR"], + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "\"b #{name}\"", + vec!["STR", "STR", "EVSTR"], + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "\"a\"", + vec!["STR", "STR"], + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "f\"b {name}\"", + vec!["STR", "STRING_START", "STR", "EVSTR", "STRING_END"], + ), + ] { + let root = parse_language_source(source, language, suffix); + let dstr = first_node(&root, "DSTR", expected_text); + + assert_eq!(child_types(dstr), expected_types); + assert_ruby_parity(source, language, suffix); + } +} + +#[test] +fn normalize_concatenated_string_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + ), + ( + "function f() { \"a\"; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "\"a\";", + ), + ( + "function f()\n \"a\"\nend\n", + Language::Lua, + ".lua", + "block", + "\n \"a\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_concatenated_string_statement(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_concatenated_string_statement", + kind, + text + ), + "normalize_concatenated_string_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_chained_string_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + "chained_string", + "\"a\" \"b\"", + ), + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + "chained_string", + "\"a\" \"b #{name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + "concatenated_string", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + "concatenated_string", + "\"a\" f\"b {name}\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer.normalize_chained_string(node); + + assert_eq!( + node_value(&rust), + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_chained_string", + ruby_kind, + ruby_text + ), + "normalize_chained_string mismatch for {language:?} {rust_kind} {rust_text:?}" + ); + } +} + +#[test] +fn dynamic_string_source_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n \"a\" \"b #{name}\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b #{name}\"", + "chained_string", + "\"a\" \"b #{name}\"", + ), + ( + "def f\n \"a\" \"b\"\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "\"a\" \"b\"", + "chained_string", + "\"a\" \"b\"", + ), + ( + "def f():\n \"a\" f\"b {name}\"\n", + Language::Python, + ".py", + "block", + "\"a\" f\"b {name}\"", + "concatenated_string", + "\"a\" f\"b {name}\"", + ), + ( + "def f():\n \"a\" \"b\"\n", + Language::Python, + ".py", + "block", + "\"a\" \"b\"", + "concatenated_string", + "\"a\" \"b\"", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let mut normalized_children = Vec::new(); + for child in normalizer.named_children(node) { + let normalized = normalizer.normalize_node(child); + normalized_children.push((child, normalized)); + } + let rust = normalizer + .dynamic_string_source(&normalized_children) + .map(|node| { + ( + node.kind().to_string(), + super::node_text(node, source).to_string(), + ) + }); + let ruby = ruby_private_dynamic_string_source_signature( + source, language, suffix, ruby_kind, ruby_text, + ); + + assert_eq!( + rust, ruby, + "dynamic_string_source mismatch for {language:?} {rust_kind} {rust_text:?}" + ); + } +} + +#[test] +fn terminal_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n foo()\nend\n", + Language::Ruby, + ".rb", + "argument_list", + "()", + ), + ( + "def f\n foo\n foo()\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "foo\n foo()", + ), + ( + "def f():\n foo()\n", + Language::Python, + ".py", + "argument_list", + "()", + ), + ( + "def f():\n foo\n", + Language::Python, + ".py", + "block", + "foo", + ), + ( + "function f() { foo(); }\n", + Language::TypeScript, + ".ts", + "arguments", + "()", + ), + ( + "function f()\n foo()\nend\n", + Language::Lua, + ".lua", + "arguments", + "()", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.terminal_statement(node), + ruby_private_predicate(source, language, suffix, "terminal_statement?", kind, text), + "terminal_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_terminal_statement_matches_ruby_private_method() { + let cases = vec![ + ( + "yield\n", + Language::Ruby, + ".rb", + "yield", + "yield", + "yield", + Vec::<&str>::new(), + ), + ( + "@name\n", + Language::Ruby, + ".rb", + "instance_variable", + "instance_variable", + "@name", + Vec::<&str>::new(), + ), + ( + "$1\n$value\n", + Language::Ruby, + ".rb", + "global_variable", + "global_variable", + "$1", + Vec::<&str>::new(), + ), + ( + "$1\n$value\n", + Language::Ruby, + ".rb", + "global_variable", + "global_variable", + "$value", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "nil", + "nil", + "nil", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "true", + "true", + "true", + Vec::<&str>::new(), + ), + ( + "nil\ntrue\nfalse\n", + Language::Ruby, + ".rb", + "false", + "false", + "false", + Vec::<&str>::new(), + ), + ( + ":ready\n", + Language::Ruby, + ".rb", + "simple_symbol", + "simple_symbol", + ":ready", + Vec::<&str>::new(), + ), + ( + "-123\n", + Language::Ruby, + ".rb", + "unary", + "unary", + "-123", + Vec::<&str>::new(), + ), + ( + "[]\n", + Language::Ruby, + ".rb", + "array", + "array", + "[]", + Vec::<&str>::new(), + ), + ( + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo\n", + Language::Ruby, + ".rb", + "identifier", + "identifier", + "foo", + vec!["foo"], + ), + ( + "foo\n", + Language::Python, + ".py", + "expression_statement", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo;\n", + Language::TypeScript, + ".ts", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo()\n", + Language::Lua, + ".lua", + "identifier", + "identifier", + "foo", + Vec::<&str>::new(), + ), + ( + "foo()\n", + Language::Ruby, + ".rb", + "argument_list", + "argument_list", + "()", + Vec::<&str>::new(), + ), + ]; + + for (source, language, suffix, ruby_kind, rust_kind, text, locals) in cases { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + if !locals.is_empty() { + normalizer + .local_stack + .push(locals.iter().map(|name| name.to_string()).collect()); + } + let rust = node_value(&normalizer.normalize_terminal_statement(node)); + + assert_eq!( + rust, + ruby_private_normalize_terminal_statement_value( + source, + language, + suffix, + ruby_kind, + text, + &locals, + ), + "normalize_terminal_statement mismatch for {language:?} ruby={ruby_kind} rust={rust_kind} {text:?} locals={locals:?}" + ); + } +} + +#[test] +fn operator_assignment_statement_parts_matches_ruby_private_method() { + for (source, language, suffix, ruby_kind, ruby_text, rust_kind, rust_text) in [ + ( + "def f\n x += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1", + "operator_assignment", + "x += 1", + ), + ( + "def f\n x ||= y\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x ||= y", + "operator_assignment", + "x ||= y", + ), + ( + "def f\n x += 1\n y += 2\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1\n y += 2", + "body_statement", + "x += 1\n y += 2", + ), + ( + "def f():\n x += 1\n", + Language::Python, + ".py", + "block", + "x += 1", + "augmented_assignment", + "x += 1", + ), + ( + "function f() { obj.x ||= y; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "obj.x ||= y", + "augmented_assignment_expression", + "obj.x ||= y", + ), + ( + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "x += 1;", + "expression_statement", + "x += 1;", + ), + ( + "function f()\n x = x + 1\nend\n", + Language::Lua, + ".lua", + "block", + "x = x + 1", + "block", + "x = x + 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, rust_kind, rust_text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = + normalizer + .operator_assignment_statement_parts(node) + .map(|(left, operator, right)| { + ( + left.kind().to_string(), + super::node_text(left, source).to_string(), + operator, + right.kind().to_string(), + super::node_text(right, source).to_string(), + ) + }); + let ruby = ruby_private_operator_assignment_statement_parts_signature( + source, language, suffix, ruby_kind, ruby_text, + ); + + assert_eq!( + rust, ruby, + "operator_assignment_statement_parts mismatch for {language:?} {rust_kind} {rust_text:?}" + ); + } +} + +#[test] +fn operator_assignment_statement_matches_ruby_private_predicate() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n x += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1", + ), + ( + "def f\n x ||= y\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x ||= y", + ), + ( + "def f\n x = 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x = 1", + ), + ( + "def f\n x += 1\n y += 2\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1\n y += 2", + ), + ( + "def f():\n x += 1\n", + Language::Python, + ".py", + "block", + "x += 1", + ), + ( + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "expression_statement", + "x += 1;", + ), + ( + "function f()\n x = x + 1\nend\n", + Language::Lua, + ".lua", + "block", + "x = x + 1", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let normalizer = super::TreeSitterNormalizer::new(source, language); + + assert_eq!( + normalizer.operator_assignment_statement(node), + ruby_private_predicate( + source, + language, + suffix, + "operator_assignment_statement?", + kind, + text + ), + "operator_assignment_statement? mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn normalize_operator_assignment_statement_matches_ruby_private_method() { + for (source, language, suffix, kind, text) in [ + ( + "def f\n x += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x += 1", + ), + ( + "def f\n x ||= y\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "x ||= y", + ), + ( + "def f\n items[index] += value\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "items[index] += value", + ), + ( + "def f\n object.value += 1\nend\n", + Language::Ruby, + ".rb", + "body_statement", + "object.value += 1", + ), + ( + "def f():\n x += 1\n", + Language::Python, + ".py", + "block", + "x += 1", + ), + ( + "function f() { x += 1; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "x += 1", + ), + ( + "function f() { obj.x ||= y; }\n", + Language::TypeScript, + ".ts", + "augmented_assignment_expression", + "obj.x ||= y", + ), + ] { + let tree = raw_tree(source, language); + let node = first_raw_node(tree.root_node(), source, kind, text); + let mut normalizer = super::TreeSitterNormalizer::new(source, language); + let rust = normalizer + .normalize_operator_assignment_statement(node) + .map(|node| node_value(&node)) + .unwrap_or(Value::Null); + + assert_eq!( + rust, + ruby_private_normalize_method_value( + source, + language, + suffix, + "normalize_operator_assignment_statement", + kind, + text + ), + "normalize_operator_assignment_statement mismatch for {language:?} {kind} {text:?}" + ); + } +} + +#[test] +fn python_f_string_interpolation_next_to_equals_is_evstr_not_assignment() { + let root = parse_language_source( + r#" +class Tag: + @property + def markup(self): + return f"[{self.name}={self.parameters}]" +"#, + Language::Python, + ".py", + ); + let dstr = first_node(&root, "DSTR", r#"f"[{self.name}={self.parameters}]""#); + + let types = child_types(dstr); + assert_eq!( + types, + vec![ + "STRING_START", + "STR", + "EVSTR", + "STR", + "EVSTR", + "STR", + "STRING_END" + ], + "expected Ruby-style f-string interpolation parts in {dstr:#?}" + ); + assert!( + !types.contains(&"LASGN"), + "interpolation next to '=' must not normalize as assignment: {dstr:#?}" + ); +} + +#[test] +fn python_relative_import_prefix_only_has_no_children() { + let root = parse_language_source( + r#" +if __name__ == "__main__": + from . import box as box +"#, + Language::Python, + ".py", + ); + let relative_import = first_node(&root, "RELATIVE_IMPORT", "."); + + assert!( + relative_import.children.is_empty(), + "Ruby exposes bare relative import prefix as an empty RELATIVE_IMPORT: {relative_import:#?}" + ); +} + +#[test] +fn python_annotation_type_wrappers_match_ruby_tree_shape() { + let root = parse_language_source( + r#" +from typing import Callable + +_is_single_cell_widths: Callable[[str], bool] = value +last_measured_character: str | None = None +fileno: Callable[[], int] | None = value +"#, + Language::Python, + ".py", + ); + + let str_list_type = first_node(&root, "TYPE", "[str]"); + assert_eq!(child_types(str_list_type), vec!["LVAR"]); + assert_eq!( + child_node(str_list_type, 0).children, + vec![Child::String("str".to_string())] + ); + + let empty_list_type = first_node(&root, "TYPE", "[]"); + assert!( + empty_list_type.children.is_empty(), + "Ruby keeps Callable[[]] list type empty: {empty_list_type:#?}" + ); + + let union_type = first_node(&root, "TYPE", "str | None"); + assert_eq!(child_types(union_type), vec!["LVAR", "NIL"]); +} + +#[test] +fn python_docstring_only_class_body_stays_block_wrapped() { + let root = parse_language_source( + r#" +class ColorParseError(Exception): + """The color could not be parsed.""" +"#, + Language::Python, + ".py", + ); + let class_node = first_node( + &root, + "CLASS", + "class ColorParseError(Exception):\n \"\"\"The color could not be parsed.\"\"\"", + ); + let scope = child_node(class_node, 2); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "BLOCK"); + assert_eq!( + child_types(body), + vec!["STRING_START", "STR", "STRING_END"], + "Ruby exposes docstring-only class body as BLOCK of string parts: {body:#?}" + ); +} + +#[test] +fn python_ellipsis_only_function_body_is_empty_scope_with_root_source() { + assert_ruby_parity( + r#"def __rich__(): + ... +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_explicit_return_none_is_not_elided_from_function_body() { + let source = r#" +class Thing: + def _repr_latex_(self): + return None +"#; + let root = parse_language_source(source, Language::Python, ".py"); + let defn = first_node( + &root, + "DEFN", + "def _repr_latex_(self):\n return None", + ); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + + assert_eq!(body.r#type, "RETURN"); + assert_eq!( + child_node(body, 0).r#type, + "NIL", + "Ruby only elides implicit nil bodies for Ruby, not explicit Python return None: {scope:#?}" + ); + assert_ruby_parity(source, Language::Python, ".py"); +} + +#[test] +fn python_with_attribute_item_uses_ruby_clause_children() { + let root = parse_language_source( + r#" +def page(self): + with self._console._lock: + buffer = self._console._buffer[:] +"#, + Language::Python, + ".py", + ); + let clause = first_node(&root, "WITH_CLAUSE", "self._console._lock"); + + assert_eq!( + child_types(clause), + vec!["CALL", "LVAR"], + "Ruby with_clause exposes attribute receiver and field separately: {clause:#?}" + ); + assert_eq!(child_node(clause, 0).text, "self._console"); + assert_eq!(child_node(clause, 1).text, "_lock"); +} + +#[test] +fn python_bare_identifier_expression_statement_has_no_children() { + let root = parse_language_source( + r#" +def _is_jupyter(): + try: + get_ipython # type: ignore[name-defined] + except NameError: + return False +"#, + Language::Python, + ".py", + ); + let expression = first_node(&root, "EXPRESSION_STATEMENT", "get_ipython"); + + assert!( + expression.children.is_empty(), + "Ruby parser exposes bare identifier expression statements without named children: {expression:#?}" + ); +} + +#[test] +fn python_bare_identifier_only_block_has_no_children() { + assert_ruby_parity( + r#" +def get_exception(): + try: + pass + except: + foobarbaz +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_bare_dotted_expression_statement_normalizes_as_call() { + let root = parse_language_source("os.get_terminal_size\n", Language::Python, ".py"); + let call = first_node(&root, "CALL", "os.get_terminal_size"); + + assert_eq!( + child_types(call), + vec!["LVAR"], + "bare Python dotted expression statements should normalize as calls: {call:#?}" + ); +} + +#[test] +fn python_bare_comparison_expression_statement_keeps_statement_wrapper() { + let root = parse_language_source( + r#" +def test_get_style(): + console.get_style("repr.brace") == Style(bold=True) +"#, + Language::Python, + ".py", + ); + let expression = first_node( + &root, + "EXPRESSION_STATEMENT", + r#"console.get_style("repr.brace") == Style(bold=True)"#, + ); + + assert_eq!( + child_types(expression), + vec!["CALL", "FCALL"], + "Ruby exposes bare comparison statements as expression_statement operand children: {expression:#?}" + ); +} + +#[test] +fn python_delete_statement_matches_ruby_block_contexts() { + assert_ruby_parity( + r#" +def save(self, clear): + if clear: + del self._record_buffer[:] + with self._record_buffer_lock: + del self._record_buffer[:] + text = "" +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_single_subscript_expression_block_exposes_subscript_children() { + assert_ruby_parity( + r#" +def test_render(): + with pytest.raises(KeyError): + top["asdasd"] +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_single_if_block_under_try_matches_ruby_if_shape() { + let root = parse_language_source( + r#" +def load(args): + try: + if args.path == "-": + json_data = sys.stdin.read() + else: + json_data = Path(args.path).read_text() + except Exception as error: + sys.exit(-1) +"#, + Language::Python, + ".py", + ); + let if_node = first_node( + &root, + "IF", + "if args.path == \"-\":\n json_data = sys.stdin.read()\n else:\n json_data = Path(args.path).read_text()", + ); + + assert_eq!( + child_types(if_node), + vec!["OPCALL", "LASGN", "ELSE_CLAUSE"], + "Ruby normalizes this Python try-body child as an IF: {if_node:#?}" + ); + assert_eq!(child_types(child_node(if_node, 2)), vec!["BLOCK"]); +} + +#[test] +fn python_single_decorated_definition_block_exposes_decorator_and_function() { + assert_ruby_parity( + r#" +def test_inspect_swig_edge_case(): + class Thing: + @property + def __class__(self): + raise AttributeError +"#, + Language::Python, + ".py", + ); +} + +#[test] +fn python_nested_class_inside_class_body_matches_ruby_iter_shape() { + let root = parse_language_source( + r#" +def test_can_handle_special_characters_in_docstrings(): + class Something: + class Thing: + pass +"#, + Language::Python, + ".py", + ); + let iter = first_node(&root, "ITER", "class Thing:\n pass"); + + assert_eq!(child_node(iter, 0).r#type, "VCALL"); + assert_eq!( + child_node(iter, 0).children, + vec![Child::Symbol("Thing".to_string()), Child::Nil] + ); + assert_eq!(child_node(iter, 1).r#type, "SCOPE"); +} + +#[test] +fn lua_local_assignment_call_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source( + r#"local test_env = require("spec.util.test_env") +"#, + Language::Lua, + ".lua", + ); + let expression_list = first_node(&root, "EXPRESSION_LIST", r#"require("spec.util.test_env")"#); + + assert_eq!( + child_types(expression_list), + vec!["LVAR", "ARGUMENTS"], + "Ruby exposes a Lua call RHS expression_list as the call function and arguments, without a FUNCTION_CALL wrapper: {expression_list:#?}" + ); +} + +#[test] +fn lua_local_assignment_member_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source("local run = test_env.run\n", Language::Lua, ".lua"); + let expression_list = first_node(&root, "EXPRESSION_LIST", "test_env.run"); + + assert_eq!( + child_types(expression_list), + vec!["LVAR", "LVAR"], + "Ruby exposes a Lua dotted RHS expression_list as receiver and field, without a DOT_INDEX_EXPRESSION wrapper: {expression_list:#?}" + ); +} + +#[test] +fn lua_table_string_entry_matches_ruby_field_shape() { + let root = parse_language_source( + "local extra_rocks = {\n \"/luasocket-${LUASOCKET}.src.rock\",\n}\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node( + &root, + "EXPRESSION_LIST", + "{\n \"/luasocket-${LUASOCKET}.src.rock\",\n}", + ); + let field = child_node(expression_list, 0); + let string = child_node(field, 0); + + assert_eq!( + child_types(expression_list), + vec!["FIELD"], + "Ruby exposes a Lua table constructor assignment RHS as its field children: {expression_list:#?}" + ); + assert_eq!(string.r#type, "STR"); + assert_eq!( + string.children, + vec![Child::String( + "/luasocket-${LUASOCKET}.src.rock".to_string() + )], + "Ruby normalizes a Lua table string field from string_content, without quotes: {string:#?}" + ); +} + +#[test] +fn lua_table_dollar_string_entry_matches_ruby_str_not_gvar() { + let root = parse_language_source( + "local incdirs = { \"$(FOO1_INCDIR)\" }\n", + Language::Lua, + ".lua", + ); + let string = first_node(&root, "STR", "$(FOO1_INCDIR)"); + let mut gvars = Vec::new(); + nodes_of_type(&root, "GVAR", &mut gvars); + + assert_eq!( + string.children, + vec![Child::String("$(FOO1_INCDIR)".to_string())], + "Ruby normalizes Lua table strings starting with $ as STR, not GVAR: {string:#?}" + ); + assert!( + gvars.is_empty(), + "Lua string_content starting with $ must not normalize as GVAR: {gvars:#?}" + ); +} + +#[test] +fn lua_table_call_entry_matches_ruby_field_children_shape() { + assert_ruby_parity( + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_table_identifier_entry_matches_ruby_empty_field_shape() { + assert_ruby_parity( + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_call_function_body_matches_ruby_block_shape() { + assert_ruby_parity( + "before_each(function()\n test_env.setup_specs(extra_rocks)\nend)\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_assignment_function_body_matches_ruby_lasgn_shape() { + assert_ruby_parity( + "lazy_setup(function()\n git = git_repo.start()\nend)\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_bare_assignment_function_body_matches_ruby_lasgn_shape() { + let root = parse_language_source("function()\n x = y\nend\n", Language::Lua, ".lua"); + let defn = first_node(&root, "DEFN", "function()\n x = y\nend"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let right = child_node(body, 1); + + assert_eq!(body.r#type, "LASGN"); + assert_eq!(body.children.first(), Some(&Child::String("x".to_string()))); + assert_eq!(right.r#type, "EXPRESSION_LIST"); + assert!( + right.children.is_empty(), + "Ruby exposes a bare identifier Lua single-assignment RHS with no children: {right:#?}" + ); +} + +#[test] +fn lua_single_dotted_assignment_function_body_normalizes_as_attribute_assignment() { + let root = parse_language_source( + "function()\n package.path = oldpath\nend\n", + Language::Lua, + ".lua", + ); + let defn = first_node(&root, "DEFN", "function()\n package.path = oldpath\nend"); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let assignment = body; + let receiver = child_node(assignment, 0); + let args = child_node(assignment, 2); + + assert_eq!(body.r#type, "ATTRASGN"); + assert_eq!(receiver.r#type, "LVAR"); + assert_eq!( + receiver.children, + vec![Child::String("package".to_string())] + ); + assert_eq!( + assignment.children.get(1), + Some(&Child::Symbol("path=".to_string())) + ); + assert_eq!(args.r#type, "LIST"); +} + +#[test] +fn lua_single_local_assignment_function_body_matches_ruby_lasgn_shape() { + assert_ruby_parity( + "it(function()\n local output = run.luarocks(\"show --rock-tree luacov\")\nend)\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_assigned_function_expression_matches_ruby_expression_list_shape() { + assert_ruby_parity( + "local test_with_location = function(location)\n lfs.mkdir(location)\nend\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_assigned_function_if_else_matches_fixed_ruby_if_shape() { + assert_ruby_parity( + "local make_unreadable = function(path)\n if is_win then\n fs.execute(\"x\")\n else\n fs.execute(\"y\")\n end\nend\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_single_return_function_body_matches_ruby_opcall_shape() { + let source = "function sum.sum(a, b)\n return a + b\nend\n"; + let root = parse_language_source(source, Language::Lua, ".lua"); + let defn = first_node( + &root, + "DEFN", + "function sum.sum(a, b)\n return a + b\nend", + ); + let scope = child_node(defn, 1); + let body = child_node(scope, 2); + let returned = child_node(body, 0); + + assert_eq!(body.r#type, "RETURN"); + assert_eq!(returned.r#type, "OPCALL"); + assert_eq!( + returned.children.get(1), + Some(&Child::Symbol("+".to_string())), + "Ruby exposes a single Lua return body as RETURN wrapping the returned operator call: {body:#?}" + ); + assert_ruby_parity(source, Language::Lua, ".lua"); +} + +#[test] +fn lua_top_level_return_identifier_matches_ruby_empty_expression_list() { + let root = parse_language_source("return sum\n", Language::Lua, ".lua"); + let return_node = first_node(&root, "RETURN", "return sum"); + let expression_list = child_node(return_node, 0); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua return of a bare identifier as an empty expression_list: {expression_list:#?}" + ); +} + +#[test] +fn lua_top_level_return_scalar_literals_match_ruby_empty_expression_list() { + for literal in ["true", "false", "nil", "0"] { + let root = parse_language_source(&format!("return {literal}\n"), Language::Lua, ".lua"); + let return_node = first_node(&root, "RETURN", &format!("return {literal}")); + let expression_list = child_node(return_node, 0); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua return of {literal} as an empty expression_list: {expression_list:#?}" + ); + } +} + +#[test] +fn lua_assignment_scalar_literals_match_ruby_empty_expression_list() { + for literal in ["true", "false", "nil", "0"] { + let root = parse_language_source(&format!("tmpfile = {literal}\n"), Language::Lua, ".lua"); + let assignment = first_node(&root, "LASGN", &format!("tmpfile = {literal}")); + let expression_list = child_node(assignment, 1); + + assert_eq!(expression_list.r#type, "EXPRESSION_LIST"); + assert!( + expression_list.children.is_empty(), + "Ruby exposes a Lua scalar literal assignment RHS as an empty expression_list: {expression_list:#?}" + ); + } +} + +#[test] +fn lua_no_paren_string_argument_matches_ruby_string_content_shape() { + let root = parse_language_source("V\"foo\"\n", Language::Lua, ".lua"); + let call = first_node(&root, "FUNCTION_CALL", "V\"foo\""); + let arguments = child_node(call, 1); + let string = child_node(arguments, 0); + + assert_eq!(arguments.r#type, "ARGUMENTS"); + assert_eq!(arguments.text, "\"foo\""); + assert_eq!(string.r#type, "STR"); + assert_eq!(string.text, "foo"); + assert_eq!(string.children, vec![Child::String("foo".to_string())]); +} + +#[test] +fn lua_long_string_assignment_matches_ruby_expression_list_content_shape() { + assert_ruby_parity( + "local c_module_source = [[\n #include \n]]\n", + Language::Lua, + ".lua", + ); +} + +#[test] +fn lua_elseif_branch_is_preserved_as_if_alternative() { + let root = parse_language_source( + r#"if test_env.LUA_V == "5.1" then + one() +elseif test_env.LUA_V == "5.2" then + two() +end +"#, + Language::Lua, + ".lua", + ); + let if_node = first_node( + &root, + "IF", + "if test_env.LUA_V == \"5.1\" then\n one()\nelseif test_env.LUA_V == \"5.2\" then\n two()\nend", + ); + let alternative = child_node(if_node, 2); + + assert_eq!(alternative.r#type, "ELSEIF_STATEMENT"); +} + +#[test] +fn lua_binary_assignment_rhs_matches_ruby_expression_list_shape() { + let root = parse_language_source( + "local rockspec = testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"\n", + Language::Lua, + ".lua", + ); + let expression_list = first_node( + &root, + "EXPRESSION_LIST", + "testing_paths.fixtures_dir .. \"/build_only_deps-0.1-1.rockspec\"", + ); + + assert_eq!( + child_types(expression_list), + vec!["DOT_INDEX_EXPRESSION", "STR"], + "Ruby exposes a Lua binary RHS expression_list as the binary operands, without a BINARY_EXPRESSION wrapper: {expression_list:#?}" + ); +} + +#[test] +fn lua_local_declaration_without_rhs_matches_ruby_empty_variable_list() { + let root = parse_language_source("local tmpdir\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "tmpdir"); + + assert!( + variable_list.children.is_empty(), + "Ruby exposes a Lua local declaration without RHS as an empty VARIABLE_LIST: {variable_list:#?}" + ); +} + +#[test] +fn lua_multi_local_declaration_without_rhs_keeps_ruby_variable_list_children() { + let root = parse_language_source("local cfg, fs\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "cfg, fs"); + + assert_eq!( + child_types(variable_list), + vec!["LVAR", "LVAR"], + "Ruby keeps children for a multi-name Lua local declaration without RHS: {variable_list:#?}" + ); +} + +#[test] +fn lua_single_generic_for_variable_matches_ruby_empty_variable_list() { + let root = parse_language_source( + "for f in lfs.dir(spec_quick) do end\n", + Language::Lua, + ".lua", + ); + let variable_list = first_node(&root, "VARIABLE_LIST", "f"); + + assert!( + variable_list.children.is_empty(), + "Ruby exposes a single Lua generic-for variable list as empty: {variable_list:#?}" + ); +} + +#[test] +fn lua_multi_generic_for_variable_list_keeps_ruby_children() { + let root = parse_language_source("for _, t in ipairs(tests) do end\n", Language::Lua, ".lua"); + let variable_list = first_node(&root, "VARIABLE_LIST", "_, t"); + + assert_eq!( + child_types(variable_list), + vec!["LVAR", "LVAR"], + "Ruby keeps children for a multi-name Lua generic-for variable list: {variable_list:#?}" + ); +} + +#[test] +fn normalizes_safe_navigation_inside_multi_statement_else_body() { + let root = parse_source( + r#" +def x(cond, node) + if cond + node.storage = :stack + else + node.storage = :heap + current_fn_ctx&.record_heap_use! + end +end +"#, + ); + let mut qcalls = Vec::new(); + nodes_of_type(&root, "QCALL", &mut qcalls); + + assert!( + qcalls + .iter() + .any(|node| node.text == "current_fn_ctx&.record_heap_use!"), + "expected normalized QCALL for current_fn_ctx safe navigation, got {qcalls:#?} in {root:#?}" + ); +} + +#[test] +fn normalizes_visibility_wrapped_singleton_def() { + let root = parse_source( + r#" +private_class_method def self.collect_payload_binding_names(node, names) + if node.is_a?(AST::Identifier) + return + end + AST.wrapped_children(node).each { |child| collect_payload_binding_names(child, names) if child.is_a?(AST::Locatable) } +end +"#, + ); + let mut defs = Vec::new(); + nodes_of_type(&root, "DEFS", &mut defs); + + assert!( + defs.iter().any(|node| node.children.get(1) + == Some(&Child::Symbol("collect_payload_binding_names".to_string()))), + "expected normalized DEFS for visibility-wrapped singleton def, got {root:#?}" + ); + + let def = defs + .into_iter() + .find(|node| { + node.children.get(1) + == Some(&Child::Symbol("collect_payload_binding_names".to_string())) + }) + .expect("visibility-wrapped singleton def should normalize to DEFS"); + let mut calls = Vec::new(); + nodes_of_type(def, "CALL", &mut calls); + nodes_of_type(def, "FCALL", &mut calls); + calls.sort_by_key(|node| (node.first_lineno, node.first_column)); + let ordered = calls + .iter() + .map(|node| (node.first_lineno, node.text.as_str())) + .collect::>(); + + let first_if_call = ordered + .iter() + .position(|(_line, text)| *text == "node.is_a?(AST::Identifier)") + .expect("expected identifier guard call"); + let recursive_call = ordered + .iter() + .position(|(_line, text)| *text == "collect_payload_binding_names(child, names)") + .expect("expected recursive payload scan call"); + assert!( + first_if_call < recursive_call, + "expected method body calls in source order, got {ordered:#?} in {root:#?}" + ); +} + +#[test] +fn normalizes_heredoc_beginning_as_dynamic_string_receiver() { + let root = parse_source( + r#" +def emit + <<~ZIG.chomp + hi + ZIG +end +"#, + ); + let mut calls = Vec::new(); + nodes_of_type(&root, "CALL", &mut calls); + + let call = calls + .iter() + .find(|node| node.text == "<<~ZIG.chomp") + .expect("expected heredoc chomp call"); + assert_eq!( + call.children.get(1), + Some(&Child::Symbol("chomp".to_string())) + ); + assert_eq!( + call.children + .first() + .and_then(super::node) + .map(|node| node.r#type.as_str()), + Some("DSTR") + ); +} + +#[test] +fn flatten_and_matches_ruby_ast_helper() { + let left = Node { + r#type: "LVAR".to_string(), + children: vec![Child::String("a".to_string())], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: "a".to_string(), + }; + let right = Node { + r#type: "LVAR".to_string(), + children: vec![Child::String("b".to_string())], + first_lineno: 1, + first_column: 5, + last_lineno: 1, + last_column: 6, + text: "b".to_string(), + }; + let and_node = Node { + r#type: "AND".to_string(), + children: vec![Child::Node(Box::new(left)), Child::Node(Box::new(right))], + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 6, + text: "a && b".to_string(), + }; + + assert_eq!(super::flatten_and(&and_node).len(), 2); +} diff --git a/gems/decomplex/rust/src/decomplex/ast.rs b/gems/decomplex/rust/src/decomplex/ast.rs new file mode 100644 index 000000000..c9e549d54 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast.rs @@ -0,0 +1,6807 @@ +use crate::decomplex::syntax::Language; +use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; +use std::fs; +use std::path::Path; +use tree_sitter::{Language as TreeSitterLanguage, Node as TreeSitterNode, Parser}; + +mod adapters; +use adapters::{normalization_adapter, AstNormalizationAdapter, NamedChildrenAction}; + +pub type Span = [usize; 4]; +const COMPARISON_OPERATORS: &[&str] = &["==", "!=", "===", "!==", "<", "<=", ">", ">="]; +const OPERATOR_CALL_OPERATORS: &[&str] = &[ + "+", "-", "*", "/", "%", "**", "|", "&", "^", "<<", ">>", "=~", "!~", +]; +const BINARY_WRAPPER_KINDS: &[&str] = &[ + "binary", + "binary_expression", + "binary_operator", + "boolean_operator", + "comparison_operator", +]; +const BOOLEAN_EXPRESSION_KINDS: &[&str] = &["binary", "binary_expression", "boolean_operator"]; +const COMPARISON_EXPRESSION_KINDS: &[&str] = + &["binary", "binary_expression", "comparison_operator"]; +const DOTTED_EXPRESSION_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "expression_statement", +]; + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct RawNode { + pub kind: String, + pub text: String, + pub span: Span, + pub named: bool, + pub field_name: Option, + pub children: Vec, +} + +impl RawNode { + pub fn from_tree_sitter(node: TreeSitterNode<'_>, source: &str) -> Self { + let mut cursor = node.walk(); + let mut children: Vec = node + .children(&mut cursor) + .enumerate() + .map(|(index, child)| { + let mut raw = Self::from_tree_sitter(child, source); + raw.field_name = node.field_name_for_child(index as u32).map(str::to_string); + raw + }) + .collect(); + + if node.kind() == "argument_list" + && !node_text(node, source).trim_start().starts_with('(') + && children.len() == 1 + && children[0].kind == "scope_resolution" + { + children = children[0].children.clone(); + } + + if node.kind() == "call" { + let mut flattened = Vec::new(); + for child in children { + if child.kind == "argument_list" + && !child.text.trim_start().starts_with('(') + && child.children.len() == 1 + && child.children[0].kind != "scope_resolution" + { + flattened.extend(child.children); + } else { + flattened.push(child); + } + } + children = flattened; + } + + if node.kind() == "bare_string" { + children.clear(); + } + + if matches!(node.kind(), "return" | "next" | "break" | "yield") { + let mut flattened = Vec::new(); + for child in children { + if child.kind == "argument_list" { + flattened.extend(child.children); + } else { + flattened.push(child); + } + } + children = flattened; + } + + if node.kind() == "pattern" && children.len() == 1 && children[0].kind == "scope_resolution" + { + children = children[0].children.clone(); + } + + if node.kind() == "when" { + let mut flattened = Vec::new(); + for child in children { + if child.kind == "pattern" + && child.children.len() == 1 + && child.children[0].kind != "scope_resolution" + { + flattened.extend(child.children); + } else { + flattened.push(child); + } + } + children = flattened; + } + + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "array" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "call" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" + && children.len() == 1 + && children[0].kind == "conditional" + { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "module" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" && children.len() == 1 && children[0].kind == "binary" { + children = children[0].children.clone(); + } + if node.kind() == "body_statement" + && children.len() == 1 + && children[0].kind == "assignment" + && children[0] + .children + .first() + .map(|child| child.kind == "element_reference") + .unwrap_or(false) + { + children = children[0].children.clone(); + } + if node.kind() == "block_body" && children.len() == 1 && children[0].kind == "call" { + children = children[0].children.clone(); + } + if node.kind() == "block_body" && children.len() == 1 && children[0].kind == "assignment" { + children = children[0].children.clone(); + } + if node.kind() == "block_body" + && children.len() == 1 + && matches!( + children[0].kind.as_str(), + "array" | "binary" | "string" | "unary" + ) + { + children = children[0].children.clone(); + } + + Self { + kind: node.kind().to_string(), + text: node_text(node, source).to_string(), + span: span(node), + named: node.is_named(), + field_name: None, + children, + } + } + + pub fn named_children(&self) -> Vec<&RawNode> { + self.children.iter().filter(|child| child.named).collect() + } + + pub fn walk<'a>(&'a self, out: &mut Vec<&'a RawNode>) { + out.push(self); + for child in &self.children { + child.walk(out); + } + } + + pub fn line(&self) -> usize { + self.span[0] + } +} + +pub fn normalize_text(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") +} + +fn ruby_exception_constant_text(text: &str) -> bool { + let mut parts = text.split("::"); + let Some(first) = parts.next() else { + return false; + }; + let mut first_chars = first.chars(); + if !matches!(first_chars.next(), Some(ch) if ch.is_ascii_uppercase()) { + return false; + } + if !first_chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) { + return false; + } + parts.all(|part| { + !part.is_empty() + && part + .chars() + .all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) + }) +} + +pub fn span(node: TreeSitterNode<'_>) -> Span { + let start = node.start_position(); + let end = node.end_position(); + [start.row + 1, start.column, end.row + 1, end.column] +} + +pub fn line(node: TreeSitterNode<'_>) -> usize { + node.start_position().row + 1 +} + +pub fn node_text<'a>(node: TreeSitterNode<'_>, source: &'a str) -> &'a str { + node.utf8_text(source.as_bytes()).unwrap_or("") +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub enum Child { + Node(Box), + Symbol(String), + String(String), + Integer(i64), + Bool(bool), + Nil, +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Node { + pub r#type: String, + pub children: Vec, + pub first_lineno: usize, + pub first_column: usize, + pub last_lineno: usize, + pub last_column: usize, + pub text: String, +} + +pub fn parse(file: &Path) -> Result<(Node, Vec)> { + parse_with_language(file, Language::Ruby) +} + +pub fn parse_with_language(file: &Path, language: Language) -> Result<(Node, Vec)> { + let source = + fs::read_to_string(file).with_context(|| format!("failed to read {}", file.display()))?; + let mut parser = Parser::new(); + parser + .set_language(&language_grammar(language)) + .with_context(|| "failed to initialize tree-sitter parser")?; + let tree = parser + .parse(&source, None) + .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; + let root = normalize_tree(tree.root_node(), &source, language); + let lines = source.lines().map(ToString::to_string).collect(); + Ok((root, lines)) +} + +pub fn normalize_tree(root: TreeSitterNode<'_>, source: &str, language: Language) -> Node { + TreeSitterNormalizer::new(source, language).normalize(root) +} + +fn language_grammar(language: Language) -> TreeSitterLanguage { + match language { + Language::Ruby => tree_sitter_ruby::LANGUAGE.into(), + Language::Python => tree_sitter_python::LANGUAGE.into(), + Language::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Language::Java => tree_sitter_java::LANGUAGE.into(), + Language::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + Language::Swift => tree_sitter_swift::LANGUAGE.into(), + Language::Kotlin => tree_sitter_kotlin_ng::LANGUAGE.into(), + Language::Go => tree_sitter_go::LANGUAGE.into(), + Language::Rust => tree_sitter_rust::LANGUAGE.into(), + Language::Zig => tree_sitter_zig::LANGUAGE.into(), + Language::Lua => tree_sitter_lua::LANGUAGE.into(), + Language::C => tree_sitter_c::LANGUAGE.into(), + Language::Cpp => tree_sitter_cpp::LANGUAGE.into(), + Language::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Language::Php => tree_sitter_php::LANGUAGE_PHP.into(), + } +} + +pub fn node(child: &Child) -> Option<&Node> { + match child { + Child::Node(node) => Some(node), + _ => None, + } +} + +pub fn slice(node: &Node, _lines: &[String]) -> String { + normalize_text(&node.text) +} + +pub fn body_stmts(defn_node: &Node) -> Vec<&Node> { + let scope_index = if defn_node.r#type == "DEFS" { 2 } else { 1 }; + let Some(scope) = defn_node.children.get(scope_index).and_then(node) else { + return Vec::new(); + }; + if scope.r#type != "SCOPE" { + return Vec::new(); + } + let Some(body) = scope.children.get(2).and_then(node) else { + return Vec::new(); + }; + statement_nodes(body) +} + +fn statement_nodes(body: &Node) -> Vec<&Node> { + match body.r#type.as_str() { + "BLOCK" | "COMPOUND_STATEMENT" | "DECLARATION_LIST" | "FUNCTION_BODY" | "HASH" + | "STATEMENTS" => body.children.iter().filter_map(node).collect(), + "RESCUE" | "ENSURE" => { + let mut out = Vec::new(); + if let Some(primary) = body.children.first().and_then(node) { + out.extend(statement_nodes(primary)); + } + out.extend( + body.children + .iter() + .skip(1) + .filter_map(node) + .filter(|child| child.r#type != "SCOPE"), + ); + out + } + _ => vec![body], + } +} + +pub fn canon_polarity(text: &str) -> (String, bool) { + let trimmed = text.trim(); + if let Some(rest) = trimmed.strip_prefix('!') { + ( + rest.trim_start_matches('(') + .trim_end_matches(')') + .trim() + .to_string(), + true, + ) + } else { + (trimmed.to_string(), false) + } +} + +pub fn flatten_and(node: &Node) -> Vec<&Node> { + if node.r#type != "AND" { + return vec![node]; + } + node.children + .iter() + .filter_map(self::node) + .flat_map(flatten_and) + .collect() +} + +const QUESTION_COLON_TERNARY_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "conditional", +]; +const TYPESCRIPT_TERNARY_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "conditional", + "ternary_expression", +]; +const CASE_ARGUMENT_WHEN_KINDS: &[&str] = &[ + "when", + "switch_case", + "case_clause", + "expression_case", + "case_statement", + "switch_section", + "switch_block_statement_group", + "switch_entry", + "when_entry", + "match_arm", +]; +const CASE_ELSE_KINDS: &[&str] = &["else", "switch_default"]; +const CASE_DEFAULT_PATTERN_KINDS: &[&str] = &["case_pattern", "match_pattern", "pattern"]; +const LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; +const PYTHON_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; +const LUA_LEADING_FUNCTION_WRAPPER_KINDS: &[&str] = &["block"]; +const OWNER_STATEMENT_NESTED_KINDS: &[&str] = + &["class", "class_definition", "class_declaration", "module"]; +const LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["body_statement", "statement"]; +const PYTHON_LEADING_OWNER_WRAPPER_KINDS: &[&str] = &["block"]; +const OWNER_NODE_KINDS: &[&str] = &["class", "class_definition", "class_declaration", "module"]; +const IF_NODE_KINDS: &[&str] = &[ + "if", + "if_statement", + "if_modifier", + "unless", + "unless_modifier", + "if_expression", + "conditional", +]; +const LEADING_IF_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const PYTHON_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; +const LUA_LEADING_IF_WRAPPER_KINDS: &[&str] = &["block"]; +const LEADING_CASE_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const CASE_NODE_KINDS: &[&str] = &[ + "case", + "switch_statement", + "expression_switch_statement", + "switch_expression", + "match_statement", + "match_expression", + "when_expression", +]; +const LEADING_LOOP_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const LOOP_NODE_KINDS: &[&str] = &[ + "while", + "while_statement", + "while_modifier", + "until", + "until_modifier", +]; +const RESCUE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; +const ENSURE_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement"]; +const ARRAY_LITERAL_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "argument_list", + "expression_statement", +]; +const ARRAY_LITERAL_NODE_KINDS: &[&str] = &["array", "list"]; +const ELEMENT_REFERENCE_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "expression_statement", + "expression_list", +]; +const ELEMENT_REFERENCE_NODE_KINDS: &[&str] = &[ + "element_reference", + "subscript", + "subscript_expression", + "bracket_index_expression", +]; +const HASH_LITERAL_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block", + "block_body", + "statement", + "argument_list", + "expression_statement", + "parenthesized_expression", +]; +const HASH_LITERAL_NODE_KINDS: &[&str] = &["hash", "dictionary", "object", "table_constructor"]; +const STATEMENT_BLOCK_PARENT_KINDS: &[&str] = &[ + "method_declaration", + "constructor_declaration", + "function_declaration", + "function_body", + "if_statement", + "while_statement", + "for_statement", + "enhanced_for_statement", + "try_statement", + "catch_clause", + "finally_clause", + "do_statement", + "lambda_expression", +]; +const EMPTY_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block", "block_body", "statement"]; +const HEREDOC_BODY_WRAPPER_KINDS: &[&str] = &["body_statement", "block_body", "statement", "then"]; +const INTERPOLATED_STATEMENT_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = + &["body_statement", "block_body", "statement", "argument_list"]; +const PYTHON_CONCATENATED_STRING_WRAPPER_KINDS: &[&str] = &[ + "body_statement", + "block_body", + "statement", + "argument_list", + "block", + "expression_statement", +]; +const CONCATENATED_STRING_NODE_KINDS: &[&str] = &["chained_string", "concatenated_string"]; + +pub(crate) struct TernaryParts<'tree> { + pub(crate) condition: TreeSitterNode<'tree>, + pub(crate) positive: Vec>, + pub(crate) negative: Vec>, +} + +fn direct_binary_operator<'source>( + node: TreeSitterNode<'_>, + source: &'source str, +) -> Option<&'source str> { + node.children(&mut node.walk()) + .find(|child| !child.is_named() && !matches!(node_text(*child, source), "(" | ")")) + .map(|child| node_text(child, source)) +} + +fn question_colon_ternary_parts<'tree>( + node: TreeSitterNode<'tree>, + source: &str, + kinds: &[&str], +) -> Option> { + if !kinds.contains(&node.kind()) { + return None; + } + let Some((question_byte, colon_byte)) = ternary_separator_bytes(node, source) else { + let raw_named = raw_named_children(node); + if raw_named.len() == 1 && node_text(raw_named[0], source) == node_text(node, source) { + return question_colon_ternary_parts(raw_named[0], source, kinds); + } + return None; + }; + let named = named_children(node); + let condition = *named.first()?; + let positive = named + .iter() + .copied() + .filter(|child| child.start_byte() > question_byte && child.end_byte() <= colon_byte) + .collect::>(); + let negative = named + .iter() + .copied() + .filter(|child| child.start_byte() > colon_byte) + .collect::>(); + + if positive.is_empty() || negative.is_empty() { + return None; + } + + Some(TernaryParts { + condition, + positive, + negative, + }) +} + +fn ternary_separator_bytes(node: TreeSitterNode<'_>, source: &str) -> Option<(usize, usize)> { + let mut question = None; + let mut colon = None; + for child in node.children(&mut node.walk()) { + if child.is_named() { + continue; + } + let text = node_text(child, source); + if text == "?" && question.is_none() { + question = Some(child.start_byte()); + } else if text == ":" && question.is_some() { + colon = Some(child.start_byte()); + break; + } + } + Some((question?, colon?)) +} + +fn named_children<'tree>(node: TreeSitterNode<'tree>) -> Vec> { + node.children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect() +} + +fn raw_named_children<'tree>(node: TreeSitterNode<'tree>) -> Vec> { + node.children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect() +} + +fn identifier_kind_name(kind: &str) -> bool { + matches!( + kind, + "identifier" + | "simple_identifier" + | "property_identifier" + | "field_identifier" + | "shorthand_property_identifier" + ) +} + +fn case_arm_descendant(node: TreeSitterNode<'_>) -> bool { + let mut stack = named_children(node); + while let Some(child) = stack.pop() { + if CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind()) { + return true; + } + stack.extend(named_children(child)); + } + false +} + +fn descendant<'tree>(node: TreeSitterNode<'tree>, kinds: &[&str]) -> Option> { + let mut stack = named_children(node); + while let Some(child) = stack.pop() { + if kinds.contains(&child.kind()) { + return Some(child); + } + stack.extend(named_children(child)); + } + None +} + +fn concatenated_string_node<'tree>(node: TreeSitterNode<'tree>) -> Option> { + if !CONCATENATED_STRING_NODE_KINDS.contains(&node.kind()) { + return None; + } + let children = named_children(node); + if children.len() > 1 && children.iter().all(|child| child.kind() == "string") { + Some(node) + } else { + None + } +} + +fn concatenated_string_target<'tree>(node: TreeSitterNode<'tree>) -> Option> { + if let Some(target) = concatenated_string_node(node) { + return Some(target); + } + let children = named_children(node); + if children.len() == 1 { + return concatenated_string_target(children[0]); + } + None +} + +fn bracketed(node: TreeSitterNode<'_>, source: &str, opening: &str, closing: &str) -> bool { + let children = node.children(&mut node.walk()).collect::>(); + children + .first() + .map(|child| node_text(*child, source) == opening) + .unwrap_or(false) + && children + .last() + .map(|child| node_text(*child, source) == closing) + .unwrap_or(false) +} + +fn statement_block_wrapper(node: TreeSitterNode<'_>) -> bool { + node.kind() == "block" + && node + .parent() + .map(|parent| STATEMENT_BLOCK_PARENT_KINDS.contains(&parent.kind())) + .unwrap_or(false) +} + +fn element_reference_shape(node: TreeSitterNode<'_>, source: &str) -> bool { + let children = node.children(&mut node.walk()).collect::>(); + children + .first() + .map(|child| node_text(*child, source) != "[") + .unwrap_or(false) + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, source) == "[") + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, source) == "]") + && named_children(node).len() >= 2 + && named_children(node) + .iter() + .all(|child| !matches!(child.kind(), "block" | "do_block")) +} + +fn lua_positional_table_target<'tree>( + node: TreeSitterNode<'tree>, + source: &str, +) -> Option> { + if node.kind() == "block" { + let named = named_children(node); + if named.len() == 1 && named[0].kind() == "function_call" { + return lua_positional_table_target(named[0], source); + } + } + + if node.kind() == "function_call" { + let named = named_children(node); + if named.len() == 2 + && named[0].kind() == "identifier" + && node_text(named[0], source).is_empty() + { + return lua_positional_table_target(named[1], source); + } + } + + if node.kind() == "arguments" { + let table = named_children(node) + .into_iter() + .find(|child| child.kind() == "table_constructor")?; + if node_text(node, source).trim() == node_text(table, source).trim() { + return lua_positional_table_target(table, source).map(|_| node); + } + return None; + } + + if node.kind() == "table_constructor" { + let fields = named_children(node); + if fields.is_empty() { + return None; + } + if fields.iter().all(|field| { + field.kind() == "field" && { + let named = named_children(*field); + named.len() <= 1 + } + }) { + return Some(node); + } + } + + None +} + +fn lua_keyed_table_target<'tree>( + node: TreeSitterNode<'tree>, + source: &str, +) -> Option> { + if node.kind() == "block" { + let named = named_children(node); + if named.len() == 1 && node_text(named[0], source).trim() == node_text(node, source).trim() + { + return lua_keyed_table_target(named[0], source); + } + if named.len() == 2 + && named[0].kind() == "identifier" + && node_text(named[0], source).is_empty() + { + return lua_keyed_table_target(named[1], source); + } + } + + if node.kind() == "function_call" { + let named = named_children(node); + if named.len() == 2 + && named[0].kind() == "identifier" + && node_text(named[0], source).is_empty() + { + return lua_keyed_table_target(named[1], source); + } + } + + if node.kind() == "arguments" { + if bracketed(node, source, "{", "}") { + let fields = named_children(node); + if fields.is_empty() { + return Some(node); + } + if fields + .iter() + .any(|field| field.kind() != "field" || named_children(*field).len() > 1) + { + return Some(node); + } + return None; + } + + let table = named_children(node) + .into_iter() + .find(|child| child.kind() == "table_constructor")?; + if node_text(node, source).trim() == node_text(table, source).trim() { + return lua_keyed_table_target(table, source).map(|_| node); + } + return None; + } + + if node.kind() == "table_constructor" { + let fields = named_children(node); + if fields.is_empty() { + return Some(node); + } + if fields + .iter() + .any(|field| field.kind() != "field" || named_children(*field).len() > 1) + { + return Some(node); + } + } + + None +} + +struct TreeSitterNormalizer<'source> { + source: &'source str, + #[cfg(test)] + language: Language, + normalization_adapter: &'static dyn AstNormalizationAdapter, + local_stack: Vec>, + root_span: Option, + current_heredoc_body_span: Option, +} + +impl<'source> TreeSitterNormalizer<'source> { + fn new(source: &'source str, language: Language) -> Self { + Self { + source, + #[cfg(test)] + language, + normalization_adapter: normalization_adapter(language), + local_stack: Vec::new(), + root_span: None, + current_heredoc_body_span: None, + } + } + + fn normalize(mut self, root: TreeSitterNode<'_>) -> Node { + self.root_span = Some(span(root)); + let children = if self.ruby() { + self.with_ruby_scope(root, true, |normalizer| normalizer.normalize_children(root)) + } else { + self.normalize_children(root) + }; + self.wrap("ROOT", children, root) + } + + fn normalize_node(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "comment" { + return None; + } + if self.assignment_lhs(node) { + return self.normalize_assignment_lhs(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if self.leading_function_statement(node) { + return self.normalize_leading_function_statement(node); + } + if self.leading_owner_statement(node) { + return self.normalize_leading_owner_statement(node); + } + if self.leading_if_statement(node) { + return self.normalize_leading_if_statement(node); + } + if node.kind() == "elsif" { + return Some(self.normalize_elsif(node)); + } + if self.ensure_body_statement(node) { + return self.normalize_ensure_body_statement(node); + } + if self.rescue_body_statement(node) { + return self.normalize_rescue_body_statement(node); + } + if if_kind(node.kind()) { + return self.normalize_if(node); + } + if self.leading_case_statement(node) { + return self.normalize_leading_case_statement(node); + } + if self.leading_loop_statement(node) { + return self.normalize_leading_loop_statement(node); + } + if let Some(loop_type) = self.loop_node_type(node.kind()) { + return self.normalize_loop(node, loop_type); + } + if self.case_kind(node.kind()) || self.hidden_match(node) { + return self.normalize_case(node); + } + if self.hash_literal_statement(node) { + return self.normalize_hash_literal_statement(node); + } + if self.array_literal_statement(node) { + return self.normalize_array_literal_statement(node); + } + if self.element_reference_statement(node) { + return self.normalize_element_reference_statement(node); + } + if self.concatenated_string_statement(node) { + return Some(self.normalize_concatenated_string_statement(node)); + } + if self.interpolated_statement(node) { + return Some(self.normalize_interpolated_statement(node)); + } + if self.wrapped_return_statement(node) { + return self.normalize_wrapped_return_statement(node); + } + if self.heredoc_body_statement(node) { + return self.normalize_heredoc_body_statement(node); + } + if self.empty_body_statement(node) { + return None; + } + if self.terminal_statement(node) { + return Some(self.normalize_terminal_statement(node)); + } + if self.modifier_statement(node) { + return self.normalize_modifier_statement(node); + } + if self.statement_call_with_block(node) { + return self.normalize_statement_call_with_block(node); + } + if self.super_statement(node) { + return Some(self.normalize_super_statement(node)); + } + if self.command_call_statement(node) { + return self.normalize_command_call_statement(node); + } + if self.yield_statement(node) { + return Some(self.normalize_yield_statement(node)); + } + if self.yield_argument_list(node) { + return Some(self.normalize_yield_argument_list(node)); + } + if self.super_statement(node) { + return Some(self.normalize_super_statement(node)); + } + if self.unary_not_statement(node) { + return self.normalize_unary_not_statement(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.unary_minus_expression(node) { + return self.normalize_unary_minus(node); + } + if self.unary_not_expression(node) { + return self.normalize_unary_not(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.operator_call_expression(node) { + return self.normalize_operator_call(node); + } + if self.comparison_expression(node) { + return self.normalize_comparison(node); + } + if self.self_node(node) { + return Some(self.wrap("SELF", Vec::new(), node)); + } + if self.instance_variable(node) { + return Some(self.wrap( + "IVAR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )); + } + if self.global_variable(node) { + return Some(self.normalize_global_variable(node)); + } + if self.self_identifier(node) { + return Some(self.wrap("SELF", Vec::new(), node)); + } + if let Some(name) = self + .normalization_adapter + .local_identifier_text(node, self.source) + { + return Some(self.normalize_identifier_with_name(node, name)); + } + if let Some(name) = self + .normalization_adapter + .constant_identifier_text(node, self.source) + { + return Some(self.wrap("CONST", vec![Child::Symbol(name)], node)); + } + if self.class_node(node) { + return self.normalize_class(node); + } + if self.module_node(node) { + return self.normalize_module(node); + } + if self.lambda_expression(node) { + return self.normalize_lambda(node); + } + + match node.kind() { + "program" => { + let children = self.normalize_children(node); + Some(self.wrap("ROOT", children, node)) + } + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" => self.normalize_function(node), + "impl_item" => self.normalize_impl(node), + "singleton_method" => self.normalize_singleton_function(node), + _ if self.block_kind(node.kind()) => { + let children = self.normalize_children(node); + Some(self.wrap("BLOCK", children, node)) + } + "ensure" => self.normalize_ensure_clause(node), + "begin" => self.normalize_begin(node), + "subshell" => Some(self.normalize_subshell(node)), + "block_argument" => self.normalize_block_argument(node), + "singleton_class" => self.normalize_singleton_class(node), + "yield" => Some(self.normalize_yield(node)), + "operator_assignment" => self.normalize_operator_assignment(node), + "assignment" | "assignment_expression" | "assignment_statement" => { + self.normalize_assignment(node) + } + "variable_declarator" if !self.has_assignment_operator_child(node) => { + Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) + } + "expression_list" if self.single_short_var_lhs(node) => { + Some(self.wrap(&kind_type(node.kind()), Vec::new(), node)) + } + _ if self.call_node(node) => self.normalize_call(node), + _ if self.member_read_node(node) => self.normalize_member_read(node), + _ if self.unwrap_node(node) => self + .named_children(node) + .into_iter() + .next() + .and_then(|child| self.normalize_node(child)), + "element_reference" => self.normalize_element_reference(node), + "rescue_modifier" => self.normalize_rescue_modifier(node), + "super" => Some(self.normalize_super(node)), + "return" | "return_statement" | "return_expression" | "break" | "break_statement" + | "break_expression" | "next" | "continue_statement" => self.normalize_return(node), + "nil" | "none" | "null" => Some(self.wrap("NIL", Vec::new(), node)), + "true" => Some(self.wrap("TRUE", Vec::new(), node)), + "false" => Some(self.wrap("FALSE", Vec::new(), node)), + "instance_variable" => Some(self.wrap( + "IVAR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )), + "identifier" + | "simple_identifier" + | "property_identifier" + | "field_identifier" + | "shorthand_property_identifier" => Some(self.normalize_identifier(node)), + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" => { + Some(self.normalize_const(node)) + } + "self" | "this" => Some(self.wrap("SELF", Vec::new(), node)), + "global_variable" => Some(self.normalize_global_variable(node)), + "array" => Some(self.normalize_array_literal(node)), + _ if self.interpolation_node(node) => self.normalize_interpolation(node), + "heredoc_beginning" => Some(self.normalize_heredoc_beginning(node)), + "chained_string" | "concatenated_string" => Some(self.normalize_chained_string(node)), + "string" + | "string_content" + | "string_literal" + | "interpreted_string_literal" + | "raw_string_literal" => { + if self.interpolated_string(node) { + Some(self.normalize_interpolated_string(node)) + } else if let Some(content) = self.no_paren_string_argument_content(node) { + Some(self.wrap( + "STR", + vec![Child::String(node_text(content, self.source).to_string())], + content, + )) + } else { + Some(self.wrap( + "STR", + vec![Child::String(node_text(node, self.source).to_string())], + node, + )) + } + } + "integer" => Some(self.wrap("INTEGER", Vec::new(), node)), + "float" | "float_literal" => Some(self.wrap("FLOAT", Vec::new(), node)), + "pair" => self.normalize_pair(node), + "simple_symbol" | "symbol" => Some(self.wrap( + "LIT", + vec![Child::Symbol( + node_text(node, self.source).trim_start_matches(':').to_string(), + )], + node, + )), + _ => { + let children = self.normalize_children(node); + Some(self.wrap(&kind_type(node.kind()), children, node)) + } + } + } + + fn normalize_function(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "singleton_method" { + return self.normalize_singleton_function(node); + } + + let name = self.function_name(node)?; + let args = self.normalize_parameters(self.parameters_child(node)); + let body = self.with_ruby_scope(node, true, |normalizer| { + let body_node = normalizer + .named_field(node, "body") + .or_else(|| normalizer.block_child(node))?; + let body = normalizer.normalize_body(body_node); + let body = normalizer.elide_tail_returns(body); + let body = normalizer.prepend_inline_parameter_begin(node, body); + normalizer.elide_implicit_nil_body(body) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "DEFN", + vec![Child::Symbol(name), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_leading_function_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_function_target(node)?; + if function_kind(target.kind()) { + return self.normalize_function(target); + } + let name = self + .leading_function_name(target) + .map(|name| node_text(name, self.source).to_string())?; + let body_node = self.leading_function_body(target); + let body = self.with_ruby_scope(target, true, |normalizer| { + let body = body_node.and_then(|body| normalizer.normalize_body(body)); + normalizer.elide_tail_returns(body) + }); + Some(self.wrap( + "DEFN", + vec![ + Child::Symbol(name), + Child::Node(Box::new(self.scope(body, None, target))), + ], + target, + )) + } + + fn normalize_singleton_function(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.function_name(node)?; + let receiver = self + .singleton_receiver(node) + .and_then(|child| self.normalize_node(child)) + .unwrap_or_else(|| self.wrap("SELF", Vec::new(), node)); + let args = self.normalize_parameters(self.parameters_child(node)); + let body = self.with_ruby_scope(node, true, |normalizer| { + let body_node = normalizer + .named_field(node, "body") + .or_else(|| normalizer.block_child(node))?; + let body = normalizer.normalize_body(body_node); + let body = normalizer.elide_tail_returns(body); + let body = normalizer.prepend_inline_parameter_begin(node, body); + normalizer.elide_implicit_nil_body(body) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "DEFS", + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(name), + Child::Node(Box::new(scope)), + ], + node, + )) + } + + fn normalize_class(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.const_for( + self.named_field(node, "name") + .or_else(|| self.first_named(node)), + node, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "CLASS", + vec![ + Child::Node(Box::new(name)), + Child::Nil, + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) + } + + fn normalize_impl(&mut self, node: TreeSitterNode<'_>) -> Option { + let type_node = self.named_field(node, "type").or_else(|| { + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "type_identifier" | "scoped_type_identifier" | "identifier" + ) + }) + }); + let name = self.const_for(type_node, node); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .or(Some(node)) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "CLASS", + vec![ + Child::Node(Box::new(name)), + Child::Nil, + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) + } + + fn normalize_nested_class_as_iter(&mut self, node: TreeSitterNode<'_>) -> Option { + let name_node = self + .named_field(node, "name") + .or_else(|| self.first_named(node))?; + let name = node_text(name_node, self.source).to_string(); + let header_end = node + .children(&mut node.walk()) + .find(|child| !child.is_named() && node_text(*child, self.source) == ":") + .unwrap_or(name_node); + let call = self.wrap_from_nodes( + "VCALL", + vec![Child::Symbol(name), Child::Nil], + node, + header_end, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + let scope = self.scope(body, None, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_module(&mut self, node: TreeSitterNode<'_>) -> Option { + let name = self.const_for( + self.named_field(node, "name") + .or_else(|| self.first_named(node)), + node, + ); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)) + .and_then(|body| self.normalize_body(body)); + Some(self.wrap( + "MODULE", + vec![ + Child::Node(Box::new(name)), + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) + } + + fn normalize_singleton_class(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let receiver = named + .first() + .and_then(|receiver| self.normalize_node(*receiver)); + let body = named.get(1).and_then(|body| self.normalize_body(*body)); + Some(self.wrap( + "SCLASS", + vec![ + optional_node(receiver), + Child::Node(Box::new(self.scope(body, None, node))), + ], + node, + )) + } + + fn normalize_lambda(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.lambda_target(node).unwrap_or(node); + let body_node = self + .named_field(target, "body") + .or_else(|| self.block_child(target)) + .or_else(|| self.named_children(target).into_iter().last())?; + let body = self.with_ruby_scope(target, false, |normalizer| { + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + let scope = self.scope(body, None, target); + Some(self.wrap("LAMBDA", vec![Child::Node(Box::new(scope))], target)) + } + + fn normalize_yield(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_else(|| self.yield_inline_arguments(node)); + self.wrap( + "YIELD", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) + } + + fn normalize_yield_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_else(|| self.yield_inline_arguments(node)); + self.wrap( + "YIELD", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) + } + + fn normalize_yield_argument_list(&mut self, node: TreeSitterNode<'_>) -> Node { + let args = self.yield_argument_nodes(node); + let source = self.parent_node(node).unwrap_or(node); + self.wrap("YIELD", vec![list_or_nil(args, node, self)], source) + } + + fn normalize_super_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let raw = self.raw_named_children(node); + let children = if raw.len() == 1 && raw[0].kind() == "call" { + self.raw_named_children(raw[0]) + } else { + raw + }; + let args_node = children + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| self.yield_argument_nodes(args)) + .unwrap_or_default(); + self.wrap( + "SUPER", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) + } + + fn normalize_body(&mut self, node: TreeSitterNode<'_>) -> Option { + if let Some(child) = self + .normalization_adapter + .nested_class_body_child(node, self.source) + { + return self.normalize_nested_class_as_iter(child); + } + if self.leading_function_statement(node) { + return self.normalize_leading_function_statement(node); + } + if self.leading_owner_statement(node) { + return self.normalize_leading_owner_statement(node); + } + if self.leading_if_statement(node) { + return self.normalize_leading_if_statement(node); + } + if node.kind() == "elsif" { + return Some(self.normalize_elsif(node)); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if if_kind(node.kind()) { + return self.normalize_if(node); + } + if self.leading_case_statement(node) { + return self.normalize_leading_case_statement(node); + } + if self.leading_loop_statement(node) { + return self.normalize_leading_loop_statement(node); + } + if self.ensure_body_statement(node) { + return self.normalize_ensure_body_statement(node); + } + if self.rescue_body_statement(node) { + return self.normalize_rescue_body_statement(node); + } + if self.hash_literal_statement(node) { + return self.normalize_hash_literal_statement(node); + } + if self.array_literal_statement(node) { + return self.normalize_array_literal_statement(node); + } + if self.element_reference_statement(node) { + return self.normalize_element_reference_statement(node); + } + if self.interpolated_statement(node) { + return Some(self.normalize_interpolated_statement(node)); + } + if self.wrapped_return_statement(node) { + return self.normalize_wrapped_return_statement(node); + } + if self.heredoc_body_statement(node) { + return self.normalize_heredoc_body_statement(node); + } + if self.empty_body_statement(node) { + return None; + } + if self.modifier_statement(node) { + return self.normalize_modifier_statement(node); + } + if self.statement_call_with_block(node) { + return self.normalize_statement_call_with_block(node); + } + if self.command_call_statement(node) { + return self.normalize_command_call_statement(node); + } + if self.yield_statement(node) { + return Some(self.normalize_yield_statement(node)); + } + if self.unary_not_statement(node) { + return self.normalize_unary_not_statement(node); + } + if self.operator_assignment_statement(node) { + return self.normalize_operator_assignment_statement(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.unary_minus_expression(node) { + return self.normalize_unary_minus(node); + } + if self.argument_list_unary_not(node) { + return self.normalize_argument_list_unary_not(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.block_kind(node.kind()) { + let children = self.normalize_children(node); + if children.is_empty() { + let text = node_text(node, self.source).trim(); + if bare_identifier_text(text) { + return Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)); + } + return None; + } + if children.len() == 1 { + return child_node(children.into_iter().next().unwrap()); + } + + return Some(self.wrap("BLOCK", children, node)); + } + + self.normalize_node(node) + } + + fn normalize_if(&mut self, node: TreeSitterNode<'_>) -> Option { + if matches!(node.kind(), "if_modifier" | "unless_modifier") { + let named = self.named_children(node); + let action = *named.first()?; + let condition = *named.get(1)?; + let node_type = if node.kind().starts_with("unless") { + "UNLESS" + } else { + "IF" + }; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + return Some(self.wrap(node_type, vec![condition, action, Child::Nil], node)); + } + + let condition_raw = self + .named_field(node, "condition") + .or_else(|| self.named_field(node, "predicate")) + .or_else(|| self.first_named(node))?; + let condition = optional_node(self.normalize_node(condition_raw)); + let positive_raw = self + .named_field(node, "consequence") + .or_else(|| self.named_field(node, "body")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| child.kind() == "then") + }) + .or_else(|| self.branch_child(node, condition_raw, 0)); + let negative_raw = self + .named_field(node, "alternative") + .or_else(|| self.explicit_alternative(node)) + .or_else(|| { + if self.ruby() { + None + } else { + self.branch_child(node, condition_raw, 1) + } + }); + let positive = optional_node(positive_raw.and_then(|child| self.normalize_body(child))); + let negative = + optional_node(negative_raw.and_then(|child| self.normalize_else_or_branch(child))); + let node_type = if node.kind().starts_with("unless") { + "UNLESS" + } else { + "IF" + }; + Some(self.wrap(node_type, vec![condition, positive, negative], node)) + } + + fn normalize_elsif(&mut self, node: TreeSitterNode<'_>) -> Node { + let condition = self + .named_children(node) + .into_iter() + .find(|child| !matches!(child.kind(), "comment" | "then" | "elsif" | "else")); + let positive = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "then"); + let negative = self + .named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elsif" | "else")); + let condition = optional_node(condition.and_then(|child| self.normalize_node(child))); + let positive = optional_node(positive.and_then(|child| self.normalize_body(child))); + let negative = + optional_node(negative.and_then(|child| self.normalize_else_or_branch(child))); + + self.wrap("IF", vec![condition, positive, negative], node) + } + + fn normalize_loop(&mut self, node: TreeSitterNode<'_>, node_type: &str) -> Option { + if matches!(node.kind(), "while_modifier" | "until_modifier") { + let named = self.named_children(node); + let action = *named.first()?; + let condition = *named.get(1)?; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + return Some(self.wrap(node_type, vec![condition, action, Child::Bool(true)], node)); + } + + let condition = self + .named_field(node, "condition") + .or_else(|| self.first_named(node)); + let body = self + .named_field(node, "body") + .or_else(|| self.named_field(node, "consequence")) + .or_else(|| self.block_child(node)); + let condition = + optional_node(condition.and_then(|condition| self.normalize_node(condition))); + let body = optional_node(body.and_then(|body| self.normalize_body(body))); + Some(self.wrap(node_type, vec![condition, body], node)) + } + + fn normalize_else_or_branch(&mut self, node: TreeSitterNode<'_>) -> Option { + if let Some(block) = self.normalization_adapter.else_if_block(node, self.source) { + if let Some(normalized) = self.normalize_else_if_block_child(block) { + return Some(self.wrap( + "ELSE_CLAUSE", + vec![Child::Node(Box::new(normalized))], + node, + )); + } + } + if node.kind() != "else" { + return self.normalize_body(node); + } + if let Some(call) = self.first_dotted_call_descendant(node) { + let trailing = self + .source + .get(call.end_byte()..node.end_byte()) + .unwrap_or("") + .trim(); + if trailing.is_empty() { + return self.normalize_node(call); + } + } + self.normalize_body_nodes(self.named_children(node), node) + } + + fn normalize_else_if_block_child(&mut self, node: TreeSitterNode<'_>) -> Option { + let statements = self + .raw_named_children(node) + .into_iter() + .filter(|child| child.kind() != "comment") + .collect::>(); + if statements.len() != 1 || statements[0].kind() != "if_statement" { + return None; + } + let if_node = statements[0]; + self.normalize_if(if_node) + } + + fn normalize_case(&mut self, node: TreeSitterNode<'_>) -> Option { + let value_raw = self.case_value(node); + let value = value_raw.and_then(|value| self.normalize_node(value)); + let whens = self + .case_arms(node) + .into_iter() + .filter_map(|arm| self.normalize_when(arm)) + .collect::>(); + let fallback = self.case_else_body(node); + let chain = self.link_when_chain(whens, fallback); + if value_raw.is_none() { + Some(self.wrap("CASE2", vec![optional_node(chain)], node)) + } else { + Some(self.wrap( + "CASE", + vec![optional_node(value), optional_node(chain)], + node, + )) + } + } + + fn normalize_when(&mut self, node: TreeSitterNode<'_>) -> Option { + let patterns = self.normalize_patterns(node); + let body = if let Some(body_nodes) = self + .normalization_adapter + .case_arm_body_nodes(node, self.source) + { + body_nodes + .first() + .copied() + .and_then(|source| self.normalize_body_nodes(body_nodes, source)) + } else { + self.when_body(node) + .and_then(|body| self.normalize_body(body)) + }; + Some(self.wrap( + "WHEN", + vec![ + list_or_nil(patterns, node, self), + optional_node(body), + Child::Nil, + ], + node, + )) + } + + fn normalize_patterns(&mut self, node: TreeSitterNode<'_>) -> Vec { + let mut patterns = self + .raw_named_children(node) + .into_iter() + .filter(|child| { + matches!( + child.kind(), + "pattern" + | "case_pattern" + | "match_pattern" + | "switch_pattern" + | "when_condition" + ) + }) + .collect::>(); + if patterns.is_empty() { + if let Some(value) = self.named_field(node, "value") { + patterns.push(value); + } + } + if patterns.is_empty() { + if let Some(pattern) = self + .named_children(node) + .into_iter() + .find(|child| !self.block_kind(child.kind()) && !self.statement_node(child.kind())) + { + patterns.push(pattern); + } + } + + let mut normalized = Vec::new(); + for pattern in patterns { + let pattern_text = node_text(pattern, self.source).to_string(); + let pattern_wrapper = matches!( + pattern.kind(), + "pattern" + | "case_pattern" + | "match_pattern" + | "switch_pattern" + | "when_condition" + | "expression_list" + ); + let pattern_children = self.named_children(pattern); + if pattern_text.contains("::") { + normalized.push(self.wrap("CONST", vec![Child::Symbol(pattern_text)], pattern)); + } else if pattern_wrapper && pattern_children.is_empty() && integer_text(&pattern_text) + { + normalized.push(self.wrap("INTEGER", Vec::new(), pattern)); + } else if self.ruby() + && pattern_wrapper + && pattern_children.is_empty() + && ruby_constant_text(&pattern_text) + { + normalized.push(self.wrap("CONST", vec![Child::Symbol(pattern_text)], pattern)); + } else if self.ruby() + && pattern_wrapper + && pattern_children.is_empty() + && bare_identifier_text(&pattern_text) + { + normalized.push(self.local_or_call_for_name(&pattern_text, pattern)); + } else if pattern_wrapper { + normalized.extend( + pattern_children + .into_iter() + .filter_map(|child| self.normalize_node(child)), + ); + } else if let Some(pattern) = self.normalize_node(pattern) { + normalized.push(pattern); + } + } + normalized + } + + fn link_when_chain(&self, whens: Vec, fallback: Option) -> Option { + whens + .into_iter() + .rev() + .fold(fallback, |next_when, mut current| { + while current.children.len() <= 2 { + current.children.push(Child::Nil); + } + current.children[2] = optional_node(next_when); + Some(current) + }) + } + + fn case_else_body(&mut self, node: TreeSitterNode<'_>) -> Option { + let else_node = self + .normalization_adapter + .case_else_node(node, self.source)?; + if self + .normalization_adapter + .case_else_arm(else_node, self.source) + || else_node.kind() == "switch_default" + { + if let Some(body) = self.when_body(else_node) { + return self.normalize_body(body); + } + } + self.normalize_else_or_branch(else_node) + } + + fn normalize_body_nodes( + &mut self, + nodes: Vec>, + source: TreeSitterNode<'_>, + ) -> Option { + let mut children = Vec::new(); + let mut index = 0; + while index < nodes.len() { + if index + 1 < nodes.len() { + if let Some(call) = self.normalize_flat_dotted_nodes(&nodes[index..=index + 1]) { + children.push(Child::Node(Box::new(call))); + index += 2; + continue; + } + } + if let Some(child) = self.normalize_body(nodes[index]) { + children.push(Child::Node(Box::new(child))); + } + index += 1; + } + if children.is_empty() { + None + } else if children.len() == 1 { + child_node(children.into_iter().next().unwrap()) + } else { + Some(self.wrap("BLOCK", children, source)) + } + } + + fn normalize_return(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_return_node(node) + } + + fn normalize_super(&mut self, node: TreeSitterNode<'_>) -> Node { + let args_node = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| { + self.named_children(args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + self.wrap( + "SUPER", + vec![list_or_nil(args, args_node.unwrap_or(node), self)], + node, + ) + } + + fn normalize_return_node(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_return_node_with_elide_symbol(node, false) + } + + fn normalize_return_node_with_elide_symbol( + &mut self, + node: TreeSitterNode<'_>, + elide_symbol: bool, + ) -> Option { + let children = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_return_value(child)) + .collect::>(); + if elide_symbol + && self.ruby() + && children.len() == 1 + && self.symbol_literal_node(children.first()) + { + return children.into_iter().next(); + } + let children = children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect::>(); + Some(self.wrap(return_kind(node.kind()), children, node)) + } + + fn wrapped_return_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "block" + ) && !node_text(node, self.source).contains('\n') + && node + .children(&mut node.walk()) + .next() + .map(|child| { + return_statement_kind(child.kind()) + && (!child.is_named() + || node_text(node, self.source) == node_text(child, self.source)) + }) + .unwrap_or(false) + } + + fn normalize_wrapped_return_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let keyword = node.children(&mut node.walk()).next()?; + if keyword.is_named() + && return_statement_kind(keyword.kind()) + && node_text(node, self.source) == node_text(keyword, self.source) + { + return self.normalize_return_node(keyword); + } + let children = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_return_value(child)) + .map(|child| Child::Node(Box::new(child))) + .collect::>(); + Some(self.wrap(return_kind(keyword.kind()), children, node)) + } + + fn normalize_return_value(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() != "argument_list" { + return self.normalize_node(node); + } + if self.named_children(node).is_empty() { + return self.scalar_argument_list_value(node); + } + if self.argument_list_element_reference(node) { + return self.normalize_argument_list_element_reference(node); + } + if self.boolean_expression(node) { + return self.normalize_boolean(node); + } + if self.ternary_statement(node) { + return self.normalize_ternary_statement(node); + } + if self.case_argument_list(node) { + return self.normalize_case(node); + } + if self.argument_list_call_with_block(node) { + return self.normalize_argument_list_call_with_block(node); + } + if self.dotted_expression(node) { + return self.normalize_dotted_expression(node); + } + if self.argument_list_unary_not(node) { + return self.normalize_argument_list_unary_not(node); + } + if self.infix_statement(node) { + return self.normalize_infix_statement(node); + } + let children = self.named_children(node); + if children.len() == 1 + && self.call_node(children[0]) + && node_text(children[0], self.source) == node_text(node, self.source) + { + if let Some(call) = self.normalize_return_value_call(children[0]) { + return Some(call); + } + } + if let (Some(function), Some(nested_args)) = (children.first(), children.get(1)) { + if let Some(function_name) = self + .identifier_text(*function) + .filter(|_| nested_args.kind() == "argument_list") + { + let args = self + .named_children(*nested_args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + let args_source = self + .parenthesized_source(*nested_args) + .or_else(|| self.parenthesized_source(node)); + let args_child = if let Some(source) = args_source { + self.list_or_nil_from_source_node(args, &source) + } else { + list_or_nil(args, *nested_args, self) + }; + return Some(self.wrap( + "FCALL", + vec![Child::Symbol(function_name), args_child], + node, + )); + } + } + let values = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + if values.len() == 1 { + values.into_iter().next() + } else if values.is_empty() { + None + } else { + Some(self.list_node(values, node)) + } + } + + fn normalize_return_value_call(&mut self, node: TreeSitterNode<'_>) -> Option { + let function = self + .named_field(node, "function") + .or_else(|| self.named_field(node, "call")) + .or_else(|| self.named_children(node).into_iter().next())?; + let Some(function_name) = self.identifier_text(function) else { + return None; + }; + + let args_node = self + .named_field(node, "arguments") + .or_else(|| self.named_field(node, "argument")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")) + }); + let args = args_node + .map(|args_node| { + self.named_children(args_node) + .into_iter() + .filter(|child| *child != function) + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + let args_child = if let Some(args_node) = args_node { + if let Some(source) = self + .parenthesized_source(args_node) + .or_else(|| self.parenthesized_source(node)) + { + self.list_or_nil_from_source_node(args, &source) + } else { + list_or_nil(args, args_node, self) + } + } else { + Child::Nil + }; + + Some(self.wrap( + "FCALL", + vec![Child::Symbol(function_name), args_child], + node, + )) + } + + fn normalize_ternary_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let parts = self.ternary_parts(node)?; + let condition = optional_node(self.normalize_node(parts.condition)); + let positive = optional_node(self.normalize_ternary_branch(&parts.positive)); + let negative = optional_node(self.normalize_ternary_branch(&parts.negative)); + Some(self.wrap("IF", vec![condition, positive, negative], node)) + } + + fn normalize_boolean(&mut self, node: TreeSitterNode<'_>) -> Option { + let operator = self.boolean_operator(node)?; + let node_type = if operator == "or" { "OR" } else { "AND" }; + let mut operands = Vec::new(); + for child in self.named_children(node) { + if let Some(normalized) = self.normalize_node(child) { + if normalized.r#type == node_type { + operands.extend(normalized.children); + } else { + operands.push(Child::Node(Box::new(normalized))); + } + } + } + Some(self.wrap(node_type, operands, node)) + } + + fn normalize_comparison(&mut self, node: TreeSitterNode<'_>) -> Option { + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && BINARY_WRAPPER_KINDS.contains(&raw_named[0].kind()) + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; + let operands = self.named_children(target); + let left = operands.first().and_then(|left| self.normalize_node(*left)); + let right_raw = operands.get(1).copied()?; + let right = self.normalize_node(right_raw); + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(self.comparison_operator(node)?), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) + } + + fn normalize_operator_call(&mut self, node: TreeSitterNode<'_>) -> Option { + let operands = self.named_children(node); + let direct_parts = match ( + operands.first().copied(), + self.binary_operator(node), + operands.get(1).copied(), + ) { + (Some(left), Some(operator), Some(right)) => Some((left, operator, right)), + _ => None, + }; + let (left_raw, operator, right_raw) = + direct_parts.or_else(|| self.infix_statement_parts(node))?; + let left = self.normalize_node(left_raw); + let right = self.normalize_node(right_raw); + if self.ruby() && operator == "=~" && self.regex_literal(Some(right_raw)) { + return Some(self.wrap( + "MATCH3", + vec![optional_node(right), optional_node(left)], + node, + )); + } else if self.ruby() && operator == "=~" { + return Some(self.wrap( + "CALL", + vec![ + optional_node(left), + Child::Symbol("=~".to_string()), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )); + } + + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(operator), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) + } + + fn normalize_infix_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let (left_raw, operator, right_raw) = self.infix_statement_parts(node)?; + let left = self.normalize_node(left_raw); + let right = self.normalize_node(right_raw); + if self.ruby() && operator == "=~" && self.regex_literal(Some(right_raw)) { + return Some(self.wrap( + "MATCH3", + vec![optional_node(right), optional_node(left)], + node, + )); + } else if self.ruby() && operator == "=~" { + return Some(self.wrap( + "CALL", + vec![ + optional_node(left), + Child::Symbol("=~".to_string()), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )); + } + Some(self.wrap( + "OPCALL", + vec![ + optional_node(left), + Child::Symbol(operator), + list_or_nil(right.into_iter().collect(), right_raw, self), + ], + node, + )) + } + + fn normalize_unary_not(&mut self, node: TreeSitterNode<'_>) -> Option { + let operand = self.named_children(node).into_iter().next()?; + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("!".to_string()), Child::Nil], + node, + )) + } + + fn normalize_unary_not_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + && self.unary_not_expression(raw_named[0]) + { + raw_named[0] + } else { + node + }; + let operand = self.named_children(target).into_iter().next()?; + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("!".to_string()), Child::Nil], + node, + )) + } + + fn normalize_unary_minus(&mut self, node: TreeSitterNode<'_>) -> Option { + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + && self.unary_minus_expression(raw_named[0]) + { + raw_named[0] + } else { + node + }; + let operand = self.named_children(target).into_iter().next()?; + if operand.kind() == "integer" { + if let Ok(value) = node_text(operand, self.source).parse::() { + return Some(self.wrap("INTEGER", vec![Child::Integer(-value)], operand)); + } + } + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("-@".to_string()), Child::Nil], + node, + )) + } + + fn normalize_ternary_branch(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { + if nodes.is_empty() { + return None; + } + if nodes.len() == 1 { + return self.normalize_node(nodes[0]); + } + if let Some(call) = self.normalize_flat_dotted_nodes(nodes) { + return Some(call); + } + self.normalize_body_nodes(nodes.to_vec(), nodes[0]) + } + + fn normalize_flat_dotted_nodes(&mut self, nodes: &[TreeSitterNode<'_>]) -> Option { + let receiver = *nodes.first()?; + let method = *nodes.get(1)?; + let connector = self + .source + .get(receiver.end_byte()..method.start_byte()) + .unwrap_or("") + .trim(); + if !matches!(connector, "." | "&.") { + return None; + } + let node_type = if connector == "&." { "QCALL" } else { "CALL" }; + let receiver_node = optional_node(self.normalize_node(receiver)); + Some(self.wrap_from_nodes( + node_type, + vec![ + receiver_node, + Child::Symbol(node_text(method, self.source).trim_end_matches('=').to_string()), + Child::Nil, + ], + receiver, + method, + )) + } + + fn normalize_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { + let left = self.assignment_left(node)?; + let right = self + .assignment_right(node) + .and_then(|right| self.normalize_node(right)); + if left.kind() == "left_assignment_list" { + return Some(self.normalize_multiple_assignment(left, right, node)); + } + if let Some(target) = self.assignment_target(left, right.clone(), node) { + return Some(target); + } + Some(self.wrap( + "LASGN", + vec![Child::String(self.target_name(left)), optional_node(right)], + node, + )) + } + + fn normalize_operator_assignment(&mut self, node: TreeSitterNode<'_>) -> Option { + let left = self.assignment_left(node)?; + let right_raw = self.assignment_right(node); + let right = right_raw.and_then(|right| self.normalize_node(right)); + let operator = self.operator_assignment_operator(node); + + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN1", + vec![ + receiver, + Child::Symbol(operator), + list_or_nil(args, left, self), + optional_node(right), + ], + node, + )); + } + + if self.member_read_node(left) { + let (receiver, method) = self.member_parts(left)?; + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN2", + vec![ + receiver, + Child::Bool(false), + Child::Symbol(method), + Child::Symbol(operator), + optional_node(right), + ], + node, + )); + } + + if let Some(logical) = + self.normalize_logical_operator_assignment(left, &operator, right.clone(), node) + { + return Some(logical); + } + + if self.instance_variable(left) || self.global_variable(left) { + let value = self.augmented_assignment_value(left, &operator, right_raw, node); + return self.assignment_target(left, Some(value), node); + } + + let value = self.augmented_assignment_value(left, &operator, right_raw, node); + self.assignment_target(left, Some(value.clone()), node) + .or_else(|| { + Some(self.wrap( + "LASGN", + vec![ + Child::String(self.target_name(left)), + Child::Node(Box::new(value)), + ], + node, + )) + }) + } + + fn normalize_operator_assignment_statement( + &mut self, + node: TreeSitterNode<'_>, + ) -> Option { + let (left, operator, right_raw) = self.operator_assignment_statement_parts(node)?; + let right = self.normalize_node(right_raw); + + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN1", + vec![ + receiver, + Child::Symbol(operator), + list_or_nil(args, left, self), + optional_node(right), + ], + node, + )); + } + + if self.member_read_node(left) { + let (receiver, method) = self.member_parts(left)?; + let receiver = optional_node(self.normalize_node(receiver)); + return Some(self.wrap( + "OP_ASGN2", + vec![ + receiver, + Child::Bool(false), + Child::Symbol(method), + Child::Symbol(operator), + optional_node(right), + ], + node, + )); + } + + if let Some(logical) = + self.normalize_logical_operator_assignment(left, &operator, right.clone(), node) + { + return Some(logical); + } + + if self.instance_variable(left) || self.global_variable(left) { + let value = self.augmented_assignment_value(left, &operator, Some(right_raw), node); + return self.assignment_target(left, Some(value), node); + } + + if let Some(target) = self.assignment_target(left, right, node) { + return Some(target); + } + + let value = self.augmented_assignment_value(left, &operator, Some(right_raw), node); + Some(self.wrap( + "LASGN", + vec![ + Child::String(self.target_name(left)), + Child::Node(Box::new(value)), + ], + node, + )) + } + + fn operator_assignment_statement_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String, TreeSitterNode<'tree>)> { + let mut left = None; + let mut operator = None; + let mut right = None; + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.is_named() { + if left.is_none() { + left = Some(child); + } + if operator.is_some() { + right = Some(child); + } + } else if let Some(found_operator) = + operator_assignment_statement_operator(node_text(child, self.source)) + { + operator = Some(found_operator); + } + } + + if let (Some(left), Some(operator), Some(right)) = (left, operator, right) { + return Some((left, operator, right)); + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + return self.operator_assignment_statement_parts(raw_named[0]); + } + + None + } + + fn operator_assignment_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!(node.kind(), "body_statement" | "block_body" | "statement") { + return false; + } + if self.operator_assignment_statement_parts(node).is_some() { + return true; + } + + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + && self + .operator_assignment_statement_parts(raw_named[0]) + .is_some() + } + + fn normalize_logical_operator_assignment( + &mut self, + left: TreeSitterNode<'_>, + operator: &str, + right: Option, + source: TreeSitterNode<'_>, + ) -> Option { + if !self + .normalization_adapter + .logical_operator_assignment(operator) + { + return None; + } + if self.identifier_text(left).is_none() { + return None; + } + let name = self.target_name(left); + let node_type = if operator == "||" { + "OP_ASGN_OR" + } else { + "OP_ASGN_AND" + }; + let receiver = self.wrap("LVAR", vec![Child::String(name.clone())], left); + let assignment = self.wrap( + "LASGN", + vec![Child::String(name), optional_node(right)], + source, + ); + Some(self.wrap( + node_type, + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(operator.to_string()), + Child::Node(Box::new(assignment)), + ], + source, + )) + } + + fn augmented_assignment_value( + &mut self, + left: TreeSitterNode<'_>, + operator: &str, + right_raw: Option>, + source: TreeSitterNode<'_>, + ) -> Node { + let receiver = optional_node(self.assignment_receiver(left)); + let right = right_raw.and_then(|right| self.normalize_node(right)); + self.wrap( + "CALL", + vec![ + receiver, + Child::Symbol(operator.to_string()), + list_or_nil(right.into_iter().collect(), right_raw.unwrap_or(left), self), + ], + source, + ) + } + + fn assignment_receiver(&mut self, left: TreeSitterNode<'_>) -> Option { + if let Some(name) = self.identifier_text(left) { + return Some(self.wrap("LVAR", vec![Child::String(name)], left)); + } + if self.instance_variable(left) { + return Some(self.wrap( + "IVAR", + vec![Child::String(node_text(left, self.source).to_string())], + left, + )); + } + if self.global_variable(left) { + return Some(self.normalize_global_variable(left)); + } + if self.const_kind(left.kind()) { + return Some(self.normalize_const(left)); + } + self.normalize_node(left) + } + + fn normalize_multiple_assignment( + &self, + left: TreeSitterNode<'_>, + right: Option, + source: TreeSitterNode<'_>, + ) -> Node { + let targets = self + .named_children(left) + .into_iter() + .map(|child| { + let node_type = if child.kind() == "global_variable" + || node_text(child, self.source).starts_with('$') + { + "GASGN" + } else { + "LASGN" + }; + self.wrap( + node_type, + vec![Child::String(self.target_name(child)), Child::Nil], + child, + ) + }) + .collect::>(); + self.wrap( + "MASGN", + vec![optional_node(right), list_or_nil(targets, left, self)], + source, + ) + } + + fn normalize_call(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.zero_child_identifier_call(node) { + return Some(self.normalize_zero_child_call(node)); + } + if self.call_block(node).is_some() { + return self.normalize_call_with_block(node); + } + if self.visibility_inline_def_call(node) { + return self.normalize_visibility_inline_def(node); + } + self.normalize_call_without_block(node, None) + } + + fn normalize_zero_child_call(&self, node: TreeSitterNode<'_>) -> Node { + self.wrap( + "VCALL", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) + } + + fn normalize_member_read(&mut self, node: TreeSitterNode<'_>) -> Option { + if let Some(field) = self + .normalization_adapter + .state_field_name(node, self.source) + { + return Some(self.wrap("IVAR", vec![Child::String(field)], node)); + } + let Some((receiver, method)) = self.member_parts(node) else { + let children = self.normalize_children(node); + return Some(self.wrap(&kind_type(node.kind()), children, node)); + }; + let receiver = optional_node(self.normalize_node(receiver)); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol(method), Child::Nil], + node, + )) + } + + fn normalize_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call_source = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); + let call = self.normalize_call_without_block(call_source, block)?; + let args = self.normalize_block_parameters(block); + let body = block.and_then(|block| { + self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_argument_list_call(&mut self, node: TreeSitterNode<'_>) -> Option { + if !self.ruby() || node.kind() != "argument_list" { + return None; + } + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node + } + }; + let function = self.named_children(target).into_iter().next()?; + let args_node = self + .named_children(target) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let args = args_node + .map(|args| { + self.named_children(args) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>() + }) + .unwrap_or_default(); + Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, args_node.unwrap_or(node), self), + ], + node, + )) + } + + fn normalize_argument_list_element_reference( + &mut self, + node: TreeSitterNode<'_>, + ) -> Option { + if !self.ruby() || !self.argument_list_element_reference(node) { + return None; + } + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "element_reference" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node + } + }; + let named = self.named_children(target); + let recv = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|child| self.normalize_node(*child)) + .collect::>(); + let recv = self.normalize_node(recv)?; + Some(self.wrap( + "CALL", + vec![ + Child::Node(Box::new(recv)), + Child::Symbol("[]".to_string()), + list_or_nil(args, node, self), + ], + node, + )) + } + + fn normalize_argument_list_unary_not(&mut self, node: TreeSitterNode<'_>) -> Option { + if !self.ruby() || !self.argument_list_unary_not(node) { + return None; + } + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "unary" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node + } + }; + let operand = self.named_children(target).into_iter().next()?; + let operand = optional_node(self.normalize_node(operand)); + Some(self.wrap( + "OPCALL", + vec![operand, Child::Symbol("!".to_string()), Child::Nil], + node, + )) + } + + fn normalize_argument_list_call_with_block( + &mut self, + node: TreeSitterNode<'_>, + ) -> Option { + if !self.ruby() || node.kind() != "argument_list" { + return None; + } + let target = { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + raw_named[0] + } else { + node + } + }; + let block = self.call_block(target)?; + let call = self.normalize_argument_list_call(node)?; + let args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + Some(self.wrap( + "ITER", + vec![ + Child::Node(Box::new(call)), + Child::Node(Box::new(self.scope(body, args, node))), + ], + node, + )) + } + + fn normalize_statement_call_with_block(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call_source = self.statement_block_call(node)?; + let call = self.normalize_call_without_block(call_source, block)?; + let args = self.normalize_block_parameters(block); + let body = block.and_then(|block| { + self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_dotted_expression(&mut self, node: TreeSitterNode<'_>) -> Option { + let block = self.call_block(node); + let call_source = block.map(|block| self.source_before_child(node, block)); + let call = self.normalize_dotted_call_expression_with_source(node, call_source.as_ref())?; + let Some(block) = block else { + return Some(call); + }; + let args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + let scope = self.scope(body, args, node); + Some(self.wrap( + "ITER", + vec![Child::Node(Box::new(call)), Child::Node(Box::new(scope))], + node, + )) + } + + fn normalize_call_without_block( + &mut self, + node: TreeSitterNode<'_>, + block: Option>, + ) -> Option { + let call_source = block.map(|block| self.source_before_child(node, block)); + if let Some(name) = self + .normalization_adapter + .intrinsic_call_name(node, self.source) + { + let args = self.call_arguments(node, None); + let node_type = if block.is_some() || !args.is_empty() { + "FCALL" + } else { + "VCALL" + }; + let children = vec![ + Child::Symbol(name.to_string()), + if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }, + ]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node(node_type, children, source)); + } + return Some(self.wrap(node_type, children, node)); + } + if self.dotted_call(node) { + let (receiver, method) = self.dotted_call_parts(node, block)?; + let args = self.call_arguments(node, None); + let node_type = if self.safe_navigation_call(node) { + "QCALL" + } else { + "CALL" + }; + let receiver = optional_node(self.normalize_node(receiver)); + let args = if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node( + node_type, + vec![receiver, Child::Symbol(method), args], + source, + )); + } + return Some(self.wrap(node_type, vec![receiver, Child::Symbol(method), args], node)); + } + + let function = self + .named_field(node, "function") + .or_else(|| self.named_field(node, "call")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| Some(*child) != block) + })?; + let args = self.call_arguments(node, Some(function)); + if let Some(function_name) = self.identifier_text(function) { + let node_type = if block.is_some() || !args.is_empty() { + "FCALL" + } else { + "VCALL" + }; + let children = vec![ + Child::Symbol(function_name), + if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }, + ]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node(node_type, children, source)); + } + return Some(self.wrap(node_type, children, node)); + } + if self + .normalization_adapter + .bare_const_call_function(function) + { + let children = vec![ + Child::Symbol(node_text(function, self.source).to_string()), + if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }, + ]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("FCALL", children, source)); + } + return Some(self.wrap("FCALL", children, node)); + } + if self.member_read_node(function) { + let (receiver, method) = self.member_parts(function)?; + let receiver = optional_node(self.normalize_node(receiver)); + let args = if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }; + let children = vec![receiver, Child::Symbol(method), args]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("CALL", children, source)); + } + return Some(self.wrap("CALL", children, node)); + } + let function = optional_node(self.normalize_node(function)); + let args = if let Some(source) = call_source.as_ref() { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }; + let children = vec![function, Child::Symbol("call".to_string()), args]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("CALL", children, source)); + } + Some(self.wrap("CALL", children, node)) + } + + fn normalize_element_reference(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .element_reference_target(node, self.source) + .unwrap_or(node); + let named = self.named_children(target); + let receiver = *named.first()?; + let args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + if self.self_node(receiver) { + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol("[]".to_string()), + list_or_nil(args, node, self), + ], + node, + )); + } + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, node, self); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol("[]".to_string()), args], + node, + )) + } + + fn normalize_rescue_modifier(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let body = named.first().and_then(|body| self.normalize_node(*body)); + let handler = named + .get(1) + .and_then(|handler| self.normalize_node(*handler)); + let resbody = self.wrap( + "RESBODY", + vec![Child::Nil, optional_node(handler), Child::Nil], + node, + ); + Some(self.wrap( + "RESCUE", + vec![ + optional_node(body), + Child::Node(Box::new(resbody)), + Child::Nil, + ], + node, + )) + } + + fn normalize_ensure_clause(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_body_nodes(self.named_children(node), node) + } + + #[cfg(test)] + fn normalize_dotted_call_expression(&mut self, node: TreeSitterNode<'_>) -> Option { + self.normalize_dotted_call_expression_with_source(node, None) + } + + fn normalize_dotted_call_expression_with_source( + &mut self, + node: TreeSitterNode<'_>, + source: Option<&Node>, + ) -> Option { + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 && self.dotted_call(raw_named[0]) { + raw_named[0] + } else { + node + }; + let (receiver_raw, method) = self.dotted_call_parts(target, None)?; + let args = self.call_arguments(target, None); + let args = if let Some(source) = source { + self.list_or_nil_from_source_node(args, source) + } else { + list_or_nil(args, node, self) + }; + let receiver = optional_node(self.normalize_node(receiver_raw)); + let node_type = if self.safe_navigation_call(target) { + "QCALL" + } else { + "CALL" + }; + let children = vec![receiver, Child::Symbol(method), args]; + if let Some(source) = source { + return Some(self.wrap_from_source_node(node_type, children, source)); + } + Some(self.wrap(node_type, children, node)) + } + + fn normalize_begin(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let rescue_nodes = named + .iter() + .copied() + .filter(|child| child.kind() == "rescue") + .collect::>(); + let ensure_node = named.iter().copied().find(|child| child.kind() == "ensure"); + if rescue_nodes.is_empty() { + let Some(ensure_node) = ensure_node else { + let children = self.normalize_children(node); + return Some(self.wrap("BEGIN", children, node)); + }; + let body_nodes = named + .iter() + .copied() + .take_while(|child| child.kind() != "ensure") + .collect::>(); + let body = + self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let ensure_body = self.normalize_body(ensure_node); + let source_start = body_nodes.first().copied().unwrap_or(node); + let ensure_named = self.named_children(ensure_node); + let source_end = ensure_named.last().copied().unwrap_or(ensure_node); + let source = self.source_from_nodes(source_start, source_end); + return Some(self.wrap_from_source_node( + "ENSURE", + vec![optional_node(body), optional_node(ensure_body)], + &source, + )); + } + + let body_nodes = named + .iter() + .copied() + .take_while(|child| child.kind() != "rescue") + .collect::>(); + let body = + self.normalize_body_nodes(body_nodes.clone(), *body_nodes.first().unwrap_or(&node)); + let resbodies = rescue_nodes + .iter() + .filter_map(|child| self.normalize_rescue_clause(*child)) + .collect::>(); + let source_start = body_nodes.first().copied().unwrap_or(node); + let source_end = rescue_nodes + .last() + .and_then(|last| self.rescue_source_end(*last)) + .or_else(|| rescue_nodes.last().copied()) + .unwrap_or(node); + let source = self.source_from_nodes(source_start, source_end); + let rescued = self.wrap_from_source_node( + "RESCUE", + vec![ + optional_node(body), + optional_node(self.link_rescue_chain(resbodies)), + Child::Nil, + ], + &source, + ); + let Some(ensure_node) = ensure_node else { + return Some(rescued); + }; + let ensure_body = self.normalize_body(ensure_node); + let ensure_named = self.named_children(ensure_node); + let source_end = ensure_named.last().copied().unwrap_or(ensure_node); + let source = self.source_from_nodes(source_start, source_end); + Some(self.wrap_from_source_node( + "ENSURE", + vec![Child::Node(Box::new(rescued)), optional_node(ensure_body)], + &source, + )) + } + + fn normalize_rescue_clause(&mut self, node: TreeSitterNode<'_>) -> Option { + let exceptions = self + .normalization_adapter + .rescue_clause_exceptions(node, self.source); + let exception_nodes = exceptions + .iter() + .filter_map(|child| { + if child.kind() == "exceptions" + && ruby_exception_constant_text(node_text(*child, self.source)) + { + Some(self.normalize_const(*child)) + } else { + self.normalize_node(*child) + } + }) + .collect::>(); + let exception_source = self + .normalization_adapter + .rescue_clause_exceptions_source(node, self.source); + let exception_variable = self.rescue_exception_variable(node); + let handler = self.normalization_adapter.rescue_clause_handler(node); + let normalized_handler = handler.and_then(|handler| self.normalize_body(handler)); + let body = self.prepend_rescue_exception_assignment(normalized_handler, exception_variable); + Some(self.wrap( + "RESBODY", + vec![ + list_or_nil(exception_nodes, exception_source.unwrap_or(node), self), + optional_node(body), + Child::Nil, + ], + node, + )) + } + + fn rescue_source_end<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if let Some(handler) = self.normalization_adapter.rescue_clause_handler(node) { + return self + .named_children(handler) + .last() + .copied() + .or(Some(handler)); + } + + self.named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() != "comment") + .or(Some(node)) + } + + fn link_rescue_chain(&self, mut resbodies: Vec) -> Option { + let mut next = None; + while let Some(mut current) = resbodies.pop() { + while current.children.len() <= 2 { + current.children.push(Child::Nil); + } + current.children[2] = optional_node(next); + next = Some(current); + } + next + } + + fn rescue_exception_variable(&self, node: TreeSitterNode<'_>) -> Option { + let name = self + .normalization_adapter + .rescue_clause_exception_variable_name(node)?; + let source = self + .normalization_adapter + .rescue_clause_exception_variable_source(node) + .unwrap_or(name); + let errinfo = self.wrap("ERRINFO", Vec::new(), source); + Some(self.wrap( + "LASGN", + vec![ + Child::String(node_text(name, self.source).to_string()), + Child::Node(Box::new(errinfo)), + ], + source, + )) + } + + fn prepend_rescue_exception_assignment( + &self, + body: Option, + assignment: Option, + ) -> Option { + let Some(assignment) = assignment else { + return body; + }; + let Some(mut body) = body else { + return Some(assignment); + }; + if body.r#type == "BLOCK" { + let mut children = vec![Child::Node(Box::new(assignment))]; + children.extend( + body.children + .into_iter() + .filter(|child| !matches!(child, Child::Nil)), + ); + body.children = children; + Some(body) + } else { + let source = self.source_from_normalized_nodes(&assignment, &body); + Some(self.wrap_from_source_node( + "BLOCK", + vec![ + Child::Node(Box::new(assignment)), + Child::Node(Box::new(body)), + ], + &source, + )) + } + } + + fn normalize_modifier_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let keyword = self.modifier_keyword(node); + let (action, condition) = self.modifier_parts(node)?; + let node_type = match keyword.as_deref() { + Some("unless") => "UNLESS", + Some("while") => "WHILE", + Some("until") => "UNTIL", + _ => "IF", + }; + let condition = optional_node(self.normalize_node(condition)); + let action = optional_node(self.normalize_modifier_action(action)); + let trailing = if matches!(node_type, "WHILE" | "UNTIL") { + Child::Bool(true) + } else { + Child::Nil + }; + Some(self.wrap(node_type, vec![condition, action, trailing], node)) + } + + fn normalize_modifier_action(&mut self, node: TreeSitterNode<'_>) -> Option { + if self.modifier_return_action(node) { + self.normalize_return_node(node) + } else { + self.normalize_node(node) + } + } + + fn normalize_command_call_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let raw_named = self.raw_named_children(node); + let target = if matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) && raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; + let function = self.named_children(target).into_iter().next()?; + if self.visibility_inline_def_statement(node, function) { + let method = self.inline_def_from_statement(node); + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(method.into_iter().collect(), node, self), + ], + node, + )); + } + let args_node = self + .named_children(target) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")); + let args = args_node + .map(|args| self.command_arguments(args)) + .unwrap_or_default(); + let block = self.call_block(target); + let call_source = block.map(|block| self.source_before_child(node, block)); + if self.ruby() && node_text(function, self.source) == "yield" { + let children = vec![list_or_nil(args, args_node.unwrap_or(node), self)]; + if let Some(source) = call_source.as_ref() { + return Some(self.wrap_from_source_node("YIELD", children, source)); + } + return Some(self.wrap("YIELD", children, node)); + } + let call_type = if args.is_empty() { "VCALL" } else { "FCALL" }; + let call_children = vec![ + Child::Symbol(node_text(function, self.source).to_string()), + list_or_nil(args, args_node.unwrap_or(node), self), + ]; + let call = if let Some(source) = call_source.as_ref() { + self.wrap_from_source_node(call_type, call_children, source) + } else { + self.wrap(call_type, call_children, node) + }; + let Some(block) = block else { + return Some(call); + }; + let block_args = self.normalize_block_parameters(Some(block)); + let body = self.with_ruby_scope(block, false, |normalizer| { + let body_node = normalizer + .named_field(block, "body") + .or_else(|| normalizer.block_child(block)) + .unwrap_or(block); + normalizer.normalize_body(body_node).map(dynamic_scope) + }); + Some(self.wrap( + "ITER", + vec![ + Child::Node(Box::new(call)), + Child::Node(Box::new(self.scope(body, block_args, node))), + ], + node, + )) + } + + fn normalize_visibility_inline_def(&mut self, node: TreeSitterNode<'_>) -> Option { + let message = + node_text(self.named_children(node).into_iter().next()?, self.source).to_string(); + let args = self + .named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list"); + let method = self.inline_def_from_argument_list(args); + Some(self.wrap( + "FCALL", + vec![ + Child::Symbol(message), + list_or_nil(method.into_iter().collect(), args.unwrap_or(node), self), + ], + node, + )) + } + + fn normalize_const(&mut self, node: TreeSitterNode<'_>) -> Node { + if matches!(node.kind(), "scope_resolution" | "scoped_type_identifier") { + let parts = self.named_children(node); + let base = parts + .first() + .map(|part| self.normalize_const(*part)) + .map(|part| Child::Node(Box::new(part))) + .unwrap_or(Child::Nil); + let name = self + .named_field(node, "name") + .or_else(|| parts.last().copied()) + .map(|name| node_text(name, self.source).to_string()) + .unwrap_or_default(); + return self.wrap("COLON2", vec![base, Child::Symbol(name)], node); + } + + self.wrap( + "CONST", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) + } + + fn const_for(&mut self, node: Option>, source: TreeSitterNode<'_>) -> Node { + let Some(node) = node else { + return self.wrap( + "CONST", + vec![Child::Symbol("(anonymous)".to_string())], + source, + ); + }; + if self.const_kind(node.kind()) { + return self.normalize_const(node); + } + self.wrap( + "CONST", + vec![Child::Symbol(node_text(node, self.source).to_string())], + node, + ) + } + + fn normalize_global_variable(&self, node: TreeSitterNode<'_>) -> Node { + let text = node_text(node, self.source).to_string(); + if let Some(value) = text.strip_prefix('$') { + if value + .chars() + .next() + .map(|first| matches!(first, '1'..='9')) + .unwrap_or(false) + && value.chars().all(|ch| ch.is_ascii_digit()) + { + let number = value + .parse::() + .expect("validated global nth reference should parse"); + return self.wrap("NTH_REF", vec![Child::Integer(number)], node); + } + } + self.wrap("GVAR", vec![Child::String(text)], node) + } + + fn array_literal_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .array_literal_statement(node, self.source) + } + + fn normalize_array_literal_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .array_literal_target(node, self.source) + .unwrap_or(node); + let values = self + .normalization_adapter + .array_literal_values(target, self.source) + .into_iter() + .filter_map(|child| self.normalize_array_literal_value(child)) + .collect::>(); + if values.is_empty() { + Some(self.wrap("ZLIST", Vec::new(), target)) + } else { + Some(self.list_node(values, target)) + } + } + + fn normalize_array_literal_value(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "field" { + let named = self.named_children(node); + if named.len() == 1 { + return self.normalize_node(named[0]); + } + if named.is_empty() { + return Some(self.normalize_terminal_statement(node)); + } + } + + self.normalize_node(node) + } + + fn element_reference_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .element_reference_statement(node, self.source) + } + + fn normalize_element_reference_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .element_reference_target(node, self.source) + .unwrap_or(node); + let receiver = self + .normalization_adapter + .element_reference_receiver(target, self.source)?; + let args = self + .normalization_adapter + .element_reference_arguments(target, self.source) + .into_iter() + .filter_map(|arg| self.normalize_node(arg)) + .collect::>(); + if self.ruby() && self.self_node(receiver) { + return Some(self.wrap( + "FCALL", + vec![ + Child::Symbol("[]".to_string()), + list_or_nil(args, target, self), + ], + target, + )); + } + + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, target, self); + Some(self.wrap( + "CALL", + vec![receiver, Child::Symbol("[]".to_string()), args], + target, + )) + } + + fn hash_literal_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .hash_literal_statement(node, self.source) + } + + fn normalize_hash_literal_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .hash_literal_target(node, self.source) + .unwrap_or(node); + let children = self + .normalization_adapter + .hash_literal_values(target, self.source) + .into_iter() + .filter_map(|child| self.normalize_hash_literal_value(child)) + .map(|child| Child::Node(Box::new(child))) + .collect::>(); + Some(self.wrap("HASH", children, target)) + } + + fn normalize_hash_literal_value(&mut self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "field" { + let named = self.named_children(node); + if named.len() >= 2 { + let key = named[0]; + let value = named[1]; + let key_lit = self.wrap( + "LIT", + vec![Child::Symbol(node_text(key, self.source).to_string())], + key, + ); + let value = self.normalize_node(value); + return Some(self.wrap( + "HASH", + vec![Child::Node(Box::new(key_lit)), optional_node(value)], + node, + )); + } + } + + self.normalize_node(node) + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .empty_body_statement(node, self.source) + } + + fn heredoc_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.heredoc_body_statement(node) + } + + fn heredoc_call_for_body(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .heredoc_call_for_body(node, self.source) + } + + fn terminal_statement(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) && self.named_children(node).is_empty() + && !node_text(node, self.source).trim().is_empty() + } + + fn normalize_terminal_statement(&self, node: TreeSitterNode<'_>) -> Node { + let text = node_text(node, self.source).trim(); + if self.ruby() && text == "yield" { + return self.wrap("YIELD", vec![Child::Nil], node); + } + if ruby_instance_variable_text(text) { + return self.wrap("IVAR", vec![Child::String(text.to_string())], node); + } + if text.starts_with('$') { + return self.normalize_global_variable(node); + } + if text == "nil" { + return self.wrap("NIL", Vec::new(), node); + } + if text == "true" { + return self.wrap("TRUE", Vec::new(), node); + } + if text == "false" { + return self.wrap("FALSE", Vec::new(), node); + } + if let Some(symbol) = text.strip_prefix(':') { + if exact_bare_identifier_text(symbol) { + return self.wrap("LIT", vec![Child::Symbol(symbol.to_string())], node); + } + } + if exact_integer_text(text) { + if let Ok(value) = text.parse::() { + return self.wrap("INTEGER", vec![Child::Integer(value)], node); + } + } + if text == "[]" { + return self.wrap("ZLIST", Vec::new(), node); + } + if bare_identifier_text(text) { + if self.ruby() && !self.ruby_local_name(text) { + return self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node); + } + return self.wrap("LVAR", vec![Child::String(text.to_string())], node); + } + + self.wrap(&kind_type(node.kind()), Vec::new(), node) + } + + fn normalize_array_literal(&mut self, node: TreeSitterNode<'_>) -> Node { + let values = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_array_literal_value(child)) + .collect::>(); + if values.is_empty() { + self.wrap("ZLIST", Vec::new(), node) + } else { + self.list_node(values, node) + } + } + + fn normalize_pair(&mut self, node: TreeSitterNode<'_>) -> Option { + let named = self.named_children(node); + let key = *named.first()?; + let value_raw = named.get(1).copied(); + + let has_hash_rocket = node + .children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, self.source) == "=>"); + if has_hash_rocket { + let children = [ + self.normalize_node(key), + value_raw.and_then(|value| self.normalize_node(value)), + ] + .into_iter() + .flatten() + .map(|child| Child::Node(Box::new(child))) + .collect(); + return Some(self.wrap("HASH", children, node)); + } + + let key_text = node_text(key, self.source); + let key_lit = self.wrap("LIT", vec![Child::Symbol(key_text.to_string())], key); + if self.ruby() && key.kind() == "hash_key_symbol" && value_raw.is_none() { + let value = self.local_or_call_for_name(key_text, key); + return Some(self.wrap( + "HASH", + vec![Child::Node(Box::new(key_lit)), Child::Node(Box::new(value))], + node, + )); + } + + let mut children = vec![Child::Node(Box::new(key_lit))]; + if let Some(value) = value_raw.and_then(|value| self.normalize_node(value)) { + children.push(Child::Node(Box::new(value))); + } + Some(self.wrap("HASH", children, node)) + } + + fn normalize_block_argument(&mut self, node: TreeSitterNode<'_>) -> Option { + let value = self + .named_children(node) + .into_iter() + .next() + .and_then(|child| self.normalize_node(child)); + Some(self.wrap("BLOCK_PASS", vec![Child::Nil, optional_node(value)], node)) + } + + fn normalize_interpolated_string(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self.normalize_children(node); + self.wrap("DSTR", children, node) + } + + fn normalize_subshell(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self + .named_children(node) + .into_iter() + .filter_map(|child| match child.kind() { + "interpolation" => self + .normalize_interpolation(child) + .map(|node| Child::Node(Box::new(node))), + "string_content" => Some(Child::Node(Box::new(self.wrap( + "STR", + vec![Child::String(node_text(child, self.source).to_string())], + child, + )))), + _ => None, + }) + .collect::>(); + let node_type = if children + .iter() + .any(|child| matches!(child, Child::Node(node) if node.r#type == "EVSTR")) + { + "DXSTR" + } else { + "XSTR" + }; + self.wrap(node_type, children, node) + } + + fn normalize_chained_string(&mut self, node: TreeSitterNode<'_>) -> Node { + let mut normalized_children = Vec::new(); + for child in self.named_children(node) { + let normalized = self.normalize_node(child); + normalized_children.push((child, normalized)); + } + + let mut parts = Vec::new(); + for (_, normalized) in &normalized_children { + match normalized { + Some(normalized) if normalized.r#type == "DSTR" => { + parts.extend(normalized.children.clone()); + } + Some(normalized) => { + parts.push(Child::Node(Box::new(normalized.clone()))); + } + None => {} + } + } + + let source = self + .dynamic_string_source(&normalized_children) + .or_else(|| normalized_children.first().map(|(child, _)| *child)) + .unwrap_or(node); + self.wrap("DSTR", parts, source) + } + + fn normalize_concatenated_string_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let target = concatenated_string_target(node).unwrap_or(node); + let mut normalized_children = Vec::new(); + for child in self.named_children(target) { + let normalized = self.normalize_node(child); + normalized_children.push((child, normalized)); + } + + let mut parts = Vec::new(); + for (_, normalized) in &normalized_children { + match normalized { + Some(normalized) if normalized.r#type == "DSTR" => { + parts.extend(normalized.children.clone()); + } + Some(normalized) => { + parts.push(Child::Node(Box::new(normalized.clone()))); + } + None => {} + } + } + + let source = self + .dynamic_string_source(&normalized_children) + .or_else(|| normalized_children.first().map(|(child, _)| *child)) + .unwrap_or(node); + self.wrap("DSTR", parts, source) + } + + fn dynamic_string_source<'tree>( + &self, + normalized_children: &[(TreeSitterNode<'tree>, Option)], + ) -> Option> { + normalized_children + .iter() + .find(|(_, child_node)| { + let Some(child_node) = child_node else { + return false; + }; + child_node.r#type == "DSTR" + && child_node + .children + .iter() + .filter_map(self::node) + .any(|part| part.r#type == "EVSTR") + }) + .map(|(child, _)| *child) + } + + fn normalize_interpolated_statement(&mut self, node: TreeSitterNode<'_>) -> Node { + let children = self.normalize_children(node); + self.wrap("DSTR", children, node) + } + + fn normalize_interpolation(&mut self, node: TreeSitterNode<'_>) -> Option { + let exprs = self + .named_children(node) + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect::>(); + let body = if exprs.len() == 1 { + exprs.into_iter().next() + } else if exprs.is_empty() { + None + } else { + Some(self.list_node(exprs, node)) + }; + Some( + self.wrap( + "EVSTR", + body.into_iter() + .map(|node| Child::Node(Box::new(node))) + .collect(), + node, + ), + ) + } + + fn normalize_heredoc_body_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let mut heredoc_bodies = self + .named_children(node) + .into_iter() + .filter(|child| child.kind() == "heredoc_body"); + let mut children = Vec::new(); + + for child in self.named_children(node) { + if child.kind() == "heredoc_body" { + continue; + } + + let normalized = if self.heredoc_call_for_body(child) { + let body = heredoc_bodies.next(); + self.with_current_heredoc_body(body, |normalizer| normalizer.normalize_node(child)) + } else { + self.normalize_body(child) + }; + + if let Some(normalized) = normalized { + children.push(normalized); + } + } + + if children.is_empty() { + None + } else if children.len() == 1 { + children.into_iter().next() + } else { + Some( + self.wrap( + "BLOCK", + children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect(), + node, + ), + ) + } + } + + fn normalize_heredoc_beginning(&mut self, node: TreeSitterNode<'_>) -> Node { + let mut heredoc_body = None; + let mut ancestor = node.parent(); + while let Some(candidate) = ancestor { + let bodies = self + .named_children(candidate) + .into_iter() + .filter(|child| child.kind() == "heredoc_body") + .collect::>(); + if !bodies.is_empty() { + heredoc_body = if let Some(current) = self.current_heredoc_body_span { + bodies + .iter() + .copied() + .find(|body| span(*body) == current) + .or_else(|| bodies.first().copied()) + } else { + bodies.first().copied() + }; + break; + } + ancestor = candidate.parent(); + } + let children = heredoc_body + .map(|body| self.normalize_heredoc_children(body)) + .unwrap_or_default(); + self.wrap("DSTR", children, node) + } + + fn with_current_heredoc_body( + &mut self, + body: Option>, + block: impl FnOnce(&mut Self) -> T, + ) -> T { + let previous = self.current_heredoc_body_span; + self.current_heredoc_body_span = body.map(span); + let result = block(self); + self.current_heredoc_body_span = previous; + result + } + + fn normalize_heredoc_children(&mut self, node: TreeSitterNode<'_>) -> Vec { + self.named_children(node) + .into_iter() + .filter_map(|child| match child.kind() { + "interpolation" => self.normalize_interpolation(child), + "heredoc_content" => { + let text = node_text(child, self.source).to_string(); + if text.is_empty() { + None + } else { + Some(self.wrap("STR", vec![Child::String(text)], child)) + } + } + _ => None, + }) + .map(|child| Child::Node(Box::new(child))) + .collect() + } + + fn normalize_identifier(&mut self, node: TreeSitterNode<'_>) -> Node { + let name = self + .identifier_text(node) + .unwrap_or_else(|| node_text(node, self.source).to_string()); + self.normalize_identifier_with_name(node, name) + } + + fn normalize_identifier_with_name(&mut self, node: TreeSitterNode<'_>, name: String) -> Node { + if self.ruby_vcall_identifier(node, &name) || self.vcall_identifier(node, &name) { + self.wrap("VCALL", vec![Child::Symbol(name)], node) + } else { + self.wrap("LVAR", vec![Child::String(name)], node) + } + } + + fn normalize_parameters(&mut self, node: Option>) -> Option { + if !self.normalization_adapter.normalize_default_parameters() { + return None; + } + let node = node?; + let defaults = self + .named_children(node) + .into_iter() + .filter_map(|param| { + let name = self.named_field(param, "name")?; + let value = self.named_field(param, "value")?; + let value = optional_node(self.normalize_node(value)); + Some(self.wrap( + "LASGN", + vec![ + Child::Symbol(node_text(name, self.source).to_string()), + value, + ], + param, + )) + }) + .map(|node| Child::Node(Box::new(node))) + .collect::>(); + if defaults.is_empty() { + None + } else { + Some(self.wrap("ARGS", defaults, node)) + } + } + + fn normalize_block_parameters(&mut self, block: Option>) -> Option { + if !self.normalization_adapter.normalize_block_parameters() { + return None; + } + let block = block?; + let params = self + .named_children(block) + .into_iter() + .find(|child| child.kind() == "block_parameters")?; + let pre_init = self + .named_children(params) + .into_iter() + .filter(|param| param.kind() == "destructured_parameter") + .filter_map(|param| self.normalize_destructured_block_parameter(param)) + .map(|node| Child::Node(Box::new(node))) + .collect::>(); + if pre_init.is_empty() { + None + } else { + Some(self.wrap("ARGS", pre_init, params)) + } + } + + fn normalize_destructured_block_parameter( + &mut self, + param: TreeSitterNode<'_>, + ) -> Option { + let mut targets = Vec::new(); + for child in self.named_children(param) { + self.collect_destructured_parameter_targets(child, &mut targets); + } + if targets.is_empty() { + return None; + } + let dvar = self.wrap("DVAR", vec![Child::Nil], param); + Some(self.wrap( + "MASGN", + vec![ + Child::Node(Box::new(dvar)), + list_or_nil(targets, param, self), + Child::Nil, + ], + param, + )) + } + + fn collect_destructured_parameter_targets( + &mut self, + node: TreeSitterNode<'_>, + targets: &mut Vec, + ) { + if self.identifier_kind(node.kind()) { + targets.push(self.wrap( + "DASGN", + vec![ + Child::String(node_text(node, self.source).to_string()), + Child::Nil, + ], + node, + )); + return; + } + + for child in self.named_children(node) { + self.collect_destructured_parameter_targets(child, targets); + } + } + + fn normalize_children(&mut self, node: TreeSitterNode<'_>) -> Vec { + let mut children = Vec::new(); + for child in self.named_children(node) { + if child.kind() == "heredoc_body" { + continue; + } + if self.assignment_rhs(child) { + continue; + } + if let Some(normalized) = self.normalize_node(child) { + children.push(Child::Node(Box::new(normalized))); + } + } + children + } + + fn scope(&self, body: Option, args: Option, source: TreeSitterNode<'_>) -> Node { + let source_node = body.as_ref().or(args.as_ref()).cloned(); + let children = vec![Child::Nil, optional_node(args), optional_node(body)]; + if let Some(source_node) = source_node { + self.wrap_from_source_node("SCOPE", children, &source_node) + } else { + self.wrap("SCOPE", children, source) + } + } + + #[cfg(test)] + fn list(&self, children: Option>, source: TreeSitterNode<'_>) -> Option { + let children = children?; + if children.is_empty() { + return None; + } + + Some(self.list_node(children, source)) + } + + fn list_node(&self, children: Vec, source: TreeSitterNode<'_>) -> Node { + self.wrap( + "LIST", + children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect(), + source, + ) + } + + fn list_or_nil_from_source_node(&self, children: Vec, source: &Node) -> Child { + if children.is_empty() { + Child::Nil + } else { + Child::Node(Box::new( + self.wrap_from_source_node( + "LIST", + children + .into_iter() + .map(|child| Child::Node(Box::new(child))) + .collect(), + source, + ), + )) + } + } + + fn wrap(&self, node_type: &str, children: Vec, source: TreeSitterNode<'_>) -> Node { + let node_span = span(source); + Node { + r#type: node_type.to_string(), + children, + first_lineno: node_span[0], + first_column: node_span[1], + last_lineno: node_span[2], + last_column: node_span[3], + text: self.source_text(node_text(source, self.source)), + } + } + + fn wrap_from_nodes( + &self, + node_type: &str, + children: Vec, + first: TreeSitterNode<'_>, + last: TreeSitterNode<'_>, + ) -> Node { + let first_span = span(first); + let last_span = span(last); + let text = self + .source + .get(first.start_byte()..last.end_byte()) + .unwrap_or("") + .to_string(); + Node { + r#type: node_type.to_string(), + children, + first_lineno: first_span[0], + first_column: first_span[1], + last_lineno: last_span[2], + last_column: last_span[3], + text: self.source_text(&text), + } + } + + fn wrap_from_source_node(&self, node_type: &str, children: Vec, source: &Node) -> Node { + Node { + r#type: node_type.to_string(), + children, + first_lineno: source.first_lineno, + first_column: source.first_column, + last_lineno: source.last_lineno, + last_column: source.last_column, + text: self.source_text(&source.text), + } + } + + fn with_ruby_scope( + &mut self, + node: TreeSitterNode<'_>, + reset: bool, + f: impl FnOnce(&mut Self) -> T, + ) -> T { + if !self.ruby() { + return f(self); + } + let previous = self.local_stack.clone(); + if reset { + self.local_stack.clear(); + } + self.local_stack.push(self.ruby_scope_locals(node)); + let result = f(self); + self.local_stack = previous; + result + } + + fn ruby_scope_locals(&self, node: TreeSitterNode<'_>) -> BTreeSet { + let mut locals = BTreeSet::new(); + self.collect_ruby_scope_locals(node, &mut locals, true); + locals + } + + fn collect_ruby_scope_locals( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + root: bool, + ) { + if !root && self.ruby_scope_boundary(node) { + return; + } + self.collect_ruby_parameter_locals(node, locals); + self.collect_ruby_assignment_locals(node, locals); + for child in self.named_children(node) { + if !self.ruby_scope_child_boundary(child) { + self.collect_ruby_scope_locals(child, locals, false); + } + } + } + + fn collect_ruby_parameter_locals( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + ) { + if !matches!( + node.kind(), + "method_parameters" | "block_parameters" | "lambda_parameters" + ) { + return; + } + + for child in self.named_children(node) { + self.collect_identifier_names(child, locals); + } + } + + fn collect_ruby_assignment_locals( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + ) { + if node.kind() == "exception_variable" { + self.collect_identifier_names(node, locals); + return; + } + + if !self.ruby_assignment_node(node) { + return; + } + + if let Some(left) = self.assignment_left(node) { + self.collect_assignment_target_names(left, locals); + } + } + + fn collect_assignment_target_names( + &self, + node: TreeSitterNode<'_>, + locals: &mut BTreeSet, + ) { + if let Some(name) = self.identifier_text(node) { + locals.insert(name); + return; + } + if matches!( + node.kind(), + "left_assignment_list" + | "expression_list" + | "splat" + | "splat_parameter" + | "rest_assignment" + ) { + for child in self.named_children(node) { + self.collect_assignment_target_names(child, locals); + } + } + } + + fn collect_identifier_names(&self, node: TreeSitterNode<'_>, locals: &mut BTreeSet) { + if let Some(name) = self.identifier_text(node) { + locals.insert(name); + } + for child in self.raw_named_children(node) { + self.collect_identifier_names(child, locals); + } + } + + fn ruby_scope_boundary(&self, node: TreeSitterNode<'_>) -> bool { + if matches!(node.kind(), "block" | "do_block") + && node + .parent() + .map(|parent| parent.kind() == "lambda") + .unwrap_or(false) + { + return false; + } + matches!( + node.kind(), + "singleton_class" | "lambda" | "block" | "do_block" + ) || function_kind(node.kind()) + || self.class_node(node) + || self.module_node(node) + } + + fn ruby_scope_child_boundary(&self, node: TreeSitterNode<'_>) -> bool { + self.ruby_scope_boundary(node) + } + + fn ruby_vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { + self.ruby() + && self.identifier_kind(node.kind()) + && !self.assignment_lhs(node) + && !self.ruby_definition_identifier(node) + && !self.ruby_local_name(name) + } + + fn ruby_local_name(&self, name: &str) -> bool { + self.local_stack + .iter() + .rev() + .any(|scope| scope.contains(name)) + } + + fn ruby(&self) -> bool { + self.normalization_adapter.ruby() + } + + fn instance_variable(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .instance_variable(node, self.source) + } + + fn global_variable(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .global_variable(node, self.source) + } + + fn assignment_operator(&self, text: &str) -> bool { + self.normalization_adapter.assignment_operator(text) + } + + fn vcall_identifier(&self, node: TreeSitterNode<'_>, name: &str) -> bool { + if !self.identifier_kind(node.kind()) { + return false; + } + if self.ruby() && self.ruby_local_name(name) { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "method" | "method_parameters" | "parameter_list" | "argument_list" | "arguments" + ) { + return false; + } + if self.member_read_node(parent) { + return false; + } + if self.dotted_expression(parent) { + return false; + } + if self.assignment_lhs(node) || self.assignment_rhs(node) { + return false; + } + + if matches!(parent.kind(), "body_statement" | "block_body" | "then") + && self.parent_named_child(parent, node) + { + return true; + } + if matches!(parent.kind(), "if_modifier" | "unless_modifier") + && self + .named_children(parent) + .into_iter() + .next() + .map(|child| child == node) + .unwrap_or(false) + { + return true; + } + + false + } + + fn ruby_definition_identifier(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = self.parent_node(node) else { + return false; + }; + if matches!(parent.kind(), "method" | "singleton_method") { + let name = self.named_field(parent, "name").or_else(|| { + self.named_children(parent) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + }); + return name + .map(|name| self.same_ts_node(name, node)) + .unwrap_or(false); + } + matches!( + parent.kind(), + "method_parameters" + | "block_parameters" + | "lambda_parameters" + | "optional_parameter" + | "keyword_parameter" + | "block_parameter" + ) + } + + fn ruby_assignment_node(&self, node: TreeSitterNode<'_>) -> bool { + if matches!(node.kind(), "assignment" | "operator_assignment") { + return true; + } + if node.kind() == "pattern" + && node + .children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, self.source) == "=") + { + return true; + } + let raw_named = self.raw_named_children(node); + if node.kind() == "block_body" + && raw_named.len() == 1 + && raw_named[0].kind() == "assignment" + { + return true; + } + + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.has_assignment_operator_child(node) + } + + fn self_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "self" | "this") + || matches!(node_text(node, self.source), "self" | "this") + } + + fn assignment_lhs(&self, node: TreeSitterNode<'_>) -> bool { + if self.single_assignment_block_child(node) { + return false; + } + if node + .prev_sibling() + .map(|sibling| node_text(sibling, self.source) == ":") + .unwrap_or(false) + { + return false; + } + if self.literal_fragment_assignment_context(node) { + return false; + } + node.next_sibling() + .map(|sibling| self.assignment_operator(node_text(sibling, self.source))) + .unwrap_or(false) + } + + fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .literal_fragment_assignment_context(node, self.source) + } + + fn literal_fragment_expression_list(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "expression_list" { + return false; + } + + let named = self.named_children(node); + named.len() == 1 && self.literal_fragment_assignment_context(named[0]) + } + + fn assignment_rhs(&self, node: TreeSitterNode<'_>) -> bool { + if self.single_assignment_block_child(node) { + return false; + } + if self.literal_fragment_assignment_context(node) { + return false; + } + node.prev_sibling() + .map(|sibling| self.assignment_operator(node_text(sibling, self.source))) + .unwrap_or(false) + } + + fn single_assignment_block_child(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .single_assignment_block_child(node, self.source) + } + + fn has_assignment_operator_child(&self, node: TreeSitterNode<'_>) -> bool { + node.children(&mut node.walk()).any(|child| { + !child.is_named() && self.assignment_operator(node_text(child, self.source)) + }) + } + + fn single_short_var_lhs(&self, node: TreeSitterNode<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "short_var_declaration" { + return false; + } + if self.named_children(node).len() != 1 { + return false; + } + self.named_children(parent) + .into_iter() + .next() + .map(|child| child == node) + .unwrap_or(false) + } + + fn modifier_statement(&self, node: TreeSitterNode<'_>) -> bool { + let named = self.named_children(node); + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.modifier_keyword(node).is_some() + && named.len() >= 2 + } + + fn modifier_return_action(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "return" + | "return_statement" + | "return_expression" + | "break" + | "break_statement" + | "break_expression" + | "next" + | "continue_statement" + ) + } + + fn leading_if_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_if_statement(node, self.source) + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_if_target(node, self.source) + } + + fn normalize_leading_if_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_if_target(node).unwrap_or(node); + if target != node { + return self.normalize_if(target); + } + let keyword = target + .children(&mut target.walk()) + .next() + .map(|child| child.kind().to_string())?; + let condition = self + .named_children(target) + .into_iter() + .find(|child| !matches!(child.kind(), "comment" | "then" | "elsif" | "else"))?; + let consequence = self + .named_children(target) + .into_iter() + .find(|child| child.kind() == "then") + .or_else(|| self.branch_child(target, condition, 0)); + let alternative = self.explicit_alternative(target); + let node_type = if keyword == "unless" { "UNLESS" } else { "IF" }; + let condition = optional_node(self.normalize_node(condition)); + let consequence = optional_node(consequence.and_then(|child| self.normalize_body(child))); + let alternative = + optional_node(alternative.and_then(|child| self.normalize_else_or_branch(child))); + Some(self.wrap(node_type, vec![condition, consequence, alternative], target)) + } + + fn leading_case_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_case_statement(node, self.source) + } + + fn leading_case_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_case_target(node, self.source) + } + + fn normalize_leading_case_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_case_target(node).unwrap_or(node); + self.normalize_case(target) + } + + fn leading_loop_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_loop_statement(node, self.source) + } + + fn leading_loop_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_loop_target(node, self.source) + } + + fn normalize_leading_loop_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_loop_target(node).unwrap_or(node); + if target != node { + let keyword = target.children(&mut target.walk()).next()?.kind(); + let node_type = if keyword == "until" { "UNTIL" } else { "WHILE" }; + return self.normalize_loop(target, node_type); + } + let keyword = target.children(&mut target.walk()).next()?.kind(); + let node_type = if keyword == "until" { "UNTIL" } else { "WHILE" }; + let named = self.named_children(target); + let condition = optional_node( + named + .first() + .and_then(|condition| self.normalize_node(*condition)), + ); + let body = optional_node(named.get(1).and_then(|body| self.normalize_body(*body))); + Some(self.wrap(node_type, vec![condition, body], target)) + } + + fn normalize_leading_owner_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.leading_owner_target(node).unwrap_or(node); + let keyword = target.children(&mut target.walk()).next()?.kind(); + let name = self.const_for(self.named_children(target).first().copied(), target); + let body_node = self.named_field(target, "body").or_else(|| { + self.named_children(target) + .into_iter() + .rev() + .find(|child| self.block_kind(child.kind())) + }); + let body = body_node.and_then(|body| self.normalize_body(body)); + if keyword == "module" { + Some(self.wrap( + "MODULE", + vec![ + Child::Node(Box::new(name)), + Child::Node(Box::new(self.scope(body, None, target))), + ], + target, + )) + } else { + Some(self.wrap( + "CLASS", + vec![ + Child::Node(Box::new(name)), + Child::Nil, + Child::Node(Box::new(self.scope(body, None, target))), + ], + target, + )) + } + } + + fn rescue_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .rescue_body_statement(node, self.source) + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .rescue_body_target(node, self.source) + } + + fn normalize_rescue_body_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.rescue_body_target(node)?; + let body_nodes = self + .normalization_adapter + .rescue_body_nodes(target, self.source); + let body = self.normalize_body_nodes(body_nodes.clone(), target); + let rescue_nodes = self + .normalization_adapter + .rescue_clauses(target, self.source); + let resbodies = rescue_nodes + .iter() + .filter_map(|child| self.normalize_rescue_clause(*child)) + .collect::>(); + let source_start = body_nodes.first().copied().unwrap_or(target); + let source_end = rescue_nodes + .last() + .and_then(|last| self.rescue_source_end(*last)) + .or_else(|| rescue_nodes.last().copied()) + .unwrap_or(target); + let source = self.source_from_nodes(source_start, source_end); + Some(self.wrap_from_source_node( + "RESCUE", + vec![ + optional_node(body), + optional_node(self.link_rescue_chain(resbodies)), + Child::Nil, + ], + &source, + )) + } + + fn ensure_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .ensure_body_statement(node, self.source) + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .ensure_body_target(node, self.source) + } + + fn normalize_ensure_body_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self.ensure_body_target(node)?; + let body = if self.rescue_body_statement(target) { + self.normalize_rescue_body_statement(target) + } else { + let body_nodes = self + .normalization_adapter + .ensure_body_nodes(target, self.source); + self.normalize_body_nodes(body_nodes, target) + }; + let ensure_node = self + .normalization_adapter + .ensure_clause(target, self.source)?; + let ensure_body_node = self + .normalization_adapter + .ensure_clause_body(ensure_node) + .unwrap_or(ensure_node); + let ensure_body = self.normalize_body(ensure_body_node); + let source = body.clone(); + let children = vec![optional_node(body), optional_node(ensure_body)]; + if let Some(source) = source.as_ref() { + Some(self.wrap_from_source_node("ENSURE", children, source)) + } else { + Some(self.wrap("ENSURE", children, target)) + } + } + + fn command_call_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) || self.dotted_call(node) + { + return false; + } + + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; + let children = self.named_children(target); + children + .first() + .map(|child| self.identifier_kind(child.kind())) + .unwrap_or(false) + && (children + .iter() + .any(|child| matches!(child.kind(), "argument_list" | "arguments")) + || self.call_block(target).is_some()) + } + + fn visibility_inline_def_call(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "call" { + return false; + } + let Some(message) = self.named_children(node).into_iter().next() else { + return false; + }; + if !inline_def_wrapper_mid(node_text(message, self.source)) { + return false; + } + self.named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + .map(|args| { + node_text(args, self.source) + .trim_start() + .starts_with("def ") + }) + .unwrap_or(false) + } + + fn visibility_inline_def_statement( + &self, + node: TreeSitterNode<'_>, + function: TreeSitterNode<'_>, + ) -> bool { + let function_text_source = self + .normalization_adapter + .inline_def_function_text_source(function, self.source); + let function_text = node_text(function_text_source, self.source); + inline_def_wrapper_mid(function_text) && node_text(node, self.source).contains("def ") + } + + fn inline_def_from_argument_list(&mut self, args: Option>) -> Option { + if !self.ruby() { + return None; + } + self.inline_def_from_source(args?) + } + + fn inline_def_from_statement(&mut self, node: TreeSitterNode<'_>) -> Option { + let target = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); + let source = self + .named_children(target) + .into_iter() + .find(|child| child.kind() == "argument_list") + .unwrap_or(target); + self.inline_def_from_source(source) + } + + fn inline_def_from_source(&mut self, source: TreeSitterNode<'_>) -> Option { + if !self.ruby() { + return None; + } + if let Some(method) = self + .named_children(source) + .into_iter() + .find(|child| matches!(child.kind(), "method" | "singleton_method")) + { + return if method.kind() == "singleton_method" { + self.normalize_singleton_function(method) + } else { + self.normalize_function(method) + }; + } + let body = self.inline_def_body(source); + let receiver = self.inline_def_receiver(source); + let normalized_body = self.with_ruby_scope(source, true, |normalizer| { + let body = body.and_then(|body| normalizer.normalize_body(body)); + normalizer.elide_tail_returns(body) + }); + if let Some(receiver) = receiver { + let name = self.inline_def_name_after_receiver(source, receiver)?; + if name.is_empty() { + return None; + } + let receiver = self.normalize_node(receiver)?; + return Some(self.wrap( + "DEFS", + vec![ + Child::Node(Box::new(receiver)), + Child::Symbol(name), + Child::Node(Box::new(self.scope(normalized_body, None, source))), + ], + source, + )); + } + + let name = self + .named_children(source) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + .map(|child| node_text(child, self.source).to_string())?; + if name.is_empty() { + return None; + } + Some(self.wrap( + "DEFN", + vec![ + Child::Symbol(name), + Child::Node(Box::new(self.scope(normalized_body, None, source))), + ], + source, + )) + } + + fn inline_def_receiver<'tree>( + &self, + source: TreeSitterNode<'tree>, + ) -> Option> { + let text = node_text(source, self.source); + if !inline_def_receiver_text(text) { + return None; + } + let children = self.named_children(source); + if children.len() == 1 + && matches!(children[0].kind(), "method" | "singleton_method") + && node_text(children[0], self.source) == text + { + return self.inline_def_receiver(children[0]); + } + + children.into_iter().find(|child| { + matches!( + child.kind(), + "self" | "this" | "constant" | "scope_resolution" + ) + }) + } + + fn inline_def_name_after_receiver( + &self, + source: TreeSitterNode<'_>, + receiver: TreeSitterNode<'_>, + ) -> Option { + let children = self.named_children(source); + if let Some(index) = children + .iter() + .position(|child| self.same_ts_node(*child, receiver)) + { + return children + .into_iter() + .skip(index + 1) + .find(|child| self.identifier_kind(child.kind())) + .map(|child| node_text(child, self.source).to_string()); + } + + if children.len() == 1 + && matches!(children[0].kind(), "method" | "singleton_method") + && node_text(children[0], self.source) == node_text(source, self.source) + { + return self.inline_def_name_after_receiver(children[0], receiver); + } + + None + } + + fn inline_def_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + let mut stack = self + .named_children(node) + .into_iter() + .rev() + .collect::>(); + while let Some(child) = stack.pop() { + if child.kind() == "body_statement" { + return Some(child); + } + stack.extend(self.named_children(child).into_iter().rev()); + } + None + } + + fn modifier_keyword(&self, node: TreeSitterNode<'_>) -> Option { + let mut seen_named = false; + for child in node.children(&mut node.walk()) { + seen_named = seen_named || child.is_named(); + if seen_named + && !child.is_named() + && matches!(child.kind(), "if" | "unless" | "while" | "until") + { + return Some(child.kind().to_string()); + } + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + return self.modifier_keyword(raw_named[0]); + } + + None + } + + fn modifier_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, TreeSitterNode<'tree>)> { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(raw_named[0], self.source) == node_text(node, self.source) + { + if let Some(parts) = self.modifier_parts(raw_named[0]) { + return Some(parts); + } + } + + let named = self.named_children(node); + Some((*named.first()?, *named.last()?)) + } + + fn ternary_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .ternary_statement(node, self.source) + } + + fn ternary_parts<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.normalization_adapter.ternary_parts(node, self.source) + } + + fn case_argument_list(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .case_argument_list(node, self.source) + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_function_statement(node, self.source) + } + + fn leading_owner_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .leading_owner_statement(node, self.source) + } + + fn leading_owner_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_owner_target(node, self.source) + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .leading_function_target(node, self.source) + } + + fn leading_function_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_children(node) + .into_iter() + .find(|child| self.identifier_kind(child.kind())) + } + + fn leading_function_body<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + let body_kind = self.normalization_adapter.leading_function_body_kind(); + self.named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == body_kind) + } + + fn zero_child_identifier_call(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .zero_child_identifier_call(node, self.source) + } + + fn boolean_expression(&self, node: TreeSitterNode<'_>) -> bool { + (self.normalization_adapter.boolean_expression_kind(node) || self.boolean_statement(node)) + && matches!(self.boolean_operator(node).as_deref(), Some("and" | "or")) + } + + fn boolean_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return false; + } + let named = self.named_children(node); + let target = self + .normalization_adapter + .boolean_statement_target(node, self.source, &named); + if !matches!( + self.binary_operator(target).as_deref(), + Some("&&" | "||" | "and" | "or") + ) { + return false; + } + if self.named_children(target).len() < 2 { + return false; + } + target.children(&mut target.walk()).all(|child| { + child.is_named() + || matches!( + node_text(child, self.source), + "&&" | "||" | "and" | "or" | "(" | ")" + ) + }) + } + + fn operator_call_expression(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .operator_call_expression_kind(node) + && self.named_children(node).len() >= 2 + && self + .binary_operator(node) + .map(|operator| OPERATOR_CALL_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) + } + + fn comparison_expression(&self, node: TreeSitterNode<'_>) -> bool { + if self.literal_fragment_expression_list(node) { + return false; + } + + self.normalization_adapter.comparison_expression_kind(node) + && self + .comparison_operator(node) + .map(|operator| COMPARISON_OPERATORS.contains(&operator.as_str())) + .unwrap_or(false) + } + + fn infix_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.infix_statement_parts(node).is_some() + } + + fn regex_literal(&self, node: Option>) -> bool { + node.map(|node| matches!(node.kind(), "regex" | "regex_literal")) + .unwrap_or(false) + } + + fn argument_list_unary_not(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let named = self.named_children(node); + if node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, self.source) == "!") + .unwrap_or(false) + && named.len() == 1 + { + return true; + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() != 1 || raw_named[0].kind() != "unary" { + return false; + } + node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_not_expression(raw_named[0]) + && self.raw_named_children(raw_named[0]).len() == 1 + } + + fn unary_not_statement(&self, node: TreeSitterNode<'_>) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return false; + } + let named = self.named_children(node); + if node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, self.source) == "!") + .unwrap_or(false) + && named.len() == 1 + { + return true; + } + + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && raw_named[0].kind() == "unary" + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_not_expression(raw_named[0]) + && self.raw_named_children(raw_named[0]).len() == 1 + } + + fn unary_not_expression(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .unary_not_expression(node, self.source) + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>) -> bool { + if self + .normalization_adapter + .unary_minus_expression(node, self.source) + { + return true; + } + + let raw_named = self.raw_named_children(node); + raw_named.len() == 1 + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.unary_minus_expression(raw_named[0]) + } + + fn infix_statement_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String, TreeSitterNode<'tree>)> { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return None; + } + let raw_named = self.raw_named_children(node); + let target = if raw_named.len() == 1 + && matches!( + raw_named[0].kind(), + "binary" | "binary_expression" | "comparison_operator" + ) + && node_text(node, self.source) == node_text(raw_named[0], self.source) + { + raw_named[0] + } else { + node + }; + let mut named_index = 0usize; + let mut left = None; + let mut right = None; + let mut operator = None; + for child in target.children(&mut target.walk()) { + if child.is_named() { + left.get_or_insert(child); + if operator.is_some() { + right = Some(child); + } + named_index += 1; + } else { + let text = node_text(child, self.source); + if COMPARISON_OPERATORS.contains(&text) || OPERATOR_CALL_OPERATORS.contains(&text) { + operator = Some(text.to_string()); + } + } + } + if named_index == 2 { + Some((left?, operator?, right?)) + } else { + None + } + } + + fn boolean_operator(&self, node: TreeSitterNode<'_>) -> Option { + let direct = self.binary_operator(node)?; + if matches!(direct.as_str(), "&&" | "and") { + Some("and".to_string()) + } else if matches!(direct.as_str(), "||" | "or") { + Some("or".to_string()) + } else { + None + } + } + + fn comparison_operator(&self, node: TreeSitterNode<'_>) -> Option { + if let Some(operator) = self.binary_operator(node) { + if COMPARISON_OPERATORS.contains(&operator.as_str()) { + return Some(operator); + } + } + + comparison_operator_from_text(&self.spaced_text(node)) + } + + fn binary_operator(&self, node: TreeSitterNode<'_>) -> Option { + self.normalization_adapter + .binary_operator(node, self.source) + } + + fn spaced_text(&self, node: TreeSitterNode<'_>) -> String { + format!(" {} ", node_text(node, self.source)) + } + + fn class_node(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.class_node(node) + } + + fn module_node(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "module" && self.named_field(node, "name").is_some() + } + + fn interpolated_statement(&self, node: TreeSitterNode<'_>) -> bool { + let children = self.named_children(node); + self.normalization_adapter + .interpolated_statement(node, &children) + } + + fn concatenated_string_statement(&self, node: TreeSitterNode<'_>) -> bool { + let children = self.named_children(node); + self.normalization_adapter + .concatenated_string_statement(node, &children) + } + + fn interpolated_string(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .interpolated_string(node, &self.named_children(node)) + } + + fn lambda_expression(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .lambda_expression(node, self.source) + } + + fn lambda_target<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.normalization_adapter.lambda_target(node, self.source) + } + + fn interpolation_node(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.interpolation_node(node) + } + + fn statement_call_with_block(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "body_statement" | "block_body" | "statement") + && self.call_block(node).is_some() + && self.statement_block_call(node).is_some() + } + + fn statement_block_call<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if self.dotted_call(node) { + return Some(node); + } + + let block = self.call_block(node); + let child_source = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); + let children = self.named_children(child_source); + + children.into_iter().find(|child| { + Some(*child) != block && (self.call_node(*child) || self.member_read_node(*child)) + }) + } + + fn yield_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .yield_statement(node, self.source) + } + + fn yield_argument_list(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let Some(parent) = self.parent_node(node) else { + return false; + }; + let mut cursor = parent.walk(); + let first_child_is_yield = parent + .children(&mut cursor) + .next() + .map(|child| node_text(child, self.source) == "yield") + .unwrap_or(false); + first_child_is_yield + } + + fn super_statement(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .super_statement(node, self.source) + } + + fn argument_list_element_reference(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" { + return false; + } + let named = self.named_children(node); + if named + .iter() + .any(|child| matches!(child.kind(), "block" | "do_block")) + { + return false; + } + + let children = node.children(&mut node.walk()).collect::>(); + let direct_bracket_shape = children + .first() + .map(|child| node_text(*child, self.source) != "[") + .unwrap_or(false) + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "[") + && children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "]") + && named.len() >= 2; + if direct_bracket_shape { + return true; + } + + if named.len() != 1 || named[0].kind() != "element_reference" { + return false; + } + let reference = named[0]; + let reference_named = self.raw_named_children(reference); + if reference_named.len() < 2 + || reference_named + .iter() + .any(|child| matches!(child.kind(), "block" | "do_block")) + { + return false; + } + let reference_children = reference + .children(&mut reference.walk()) + .collect::>(); + reference_children + .first() + .map(|child| node_text(*child, self.source) != "[") + .unwrap_or(false) + && reference_children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "[") + && reference_children + .iter() + .any(|child| !child.is_named() && node_text(*child, self.source) == "]") + } + + fn dotted_expression(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter.dotted_expression_wrapper(node) && self.dotted_call(node) + } + + fn argument_list_call_with_block(&self, node: TreeSitterNode<'_>) -> bool { + if node.kind() != "argument_list" || self.dotted_call(node) { + return false; + } + + let target = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + .unwrap_or(node); + + self.call_block(target).is_some() + && self + .named_children(target) + .into_iter() + .next() + .map(|child| self.identifier_text(child).is_some()) + .unwrap_or(false) + } + + fn dotted_call(&self, node: TreeSitterNode<'_>) -> bool { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.dotted_call(raw_named[0]) + { + return true; + } + + if !node + .children(&mut node.walk()) + .any(|child| self.member_access_operator(node_text(child, self.source))) + { + return false; + } + let callable = self + .named_children(node) + .into_iter() + .filter(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + .collect::>(); + if callable + .iter() + .any(|child| matches!(child.kind(), "string_content" | "interpolation")) + { + return false; + } + callable.len() >= 2 + } + + fn safe_navigation_call(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .safe_navigation_call(node, self.source) + } + + fn dotted_call_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + block: Option>, + ) -> Option<(TreeSitterNode<'tree>, String)> { + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(node, self.source) == node_text(raw_named[0], self.source) + && self.dotted_call(raw_named[0]) + { + return self.dotted_call_parts(raw_named[0], block); + } + + let callable = self + .named_children(node) + .into_iter() + .filter(|child| Some(*child) != block) + .filter(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + .collect::>(); + let receiver = *callable.first()?; + let method = node_text(*callable.get(1)?, self.source) + .trim_start_matches("::") + .trim_start_matches("->") + .trim_start_matches(['.', '?']) + .trim_end_matches('=') + .to_string(); + Some((receiver, method)) + } + + fn member_read_node(&self, node: TreeSitterNode<'_>) -> bool { + if self.normalization_adapter.member_read_excluded(node) { + return false; + } + matches!( + node.kind(), + "call" + | "attribute" + | "member_expression" + | "member_access_expression" + | "field" + | "field_access" + | "selector_expression" + | "field_expression" + | "navigation_expression" + | "directly_assignable_expression" + | "expression_list" + ) && self.member_parts(node).is_some() + } + + fn member_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option<(TreeSitterNode<'tree>, String)> { + if node.kind() == "expression_list" + && !(self.named_field(node, "operand").is_some() + && self.named_field(node, "field").is_some()) + { + return None; + } + if self.dotted_call(node) { + return self.dotted_call_parts(node, None); + } + let named_children = self.named_children(node); + let receiver = self + .named_field(node, "receiver") + .or_else(|| self.named_field(node, "object")) + .or_else(|| self.named_field(node, "operand")) + .or_else(|| self.named_field(node, "value")) + .or_else(|| self.named_field(node, "expression")) + .or_else(|| { + named_children + .iter() + .copied() + .find(|child| child.kind() != "navigation_suffix") + })?; + let method = self + .named_field(node, "method") + .or_else(|| self.named_field(node, "field")) + .or_else(|| self.named_field(node, "property")) + .or_else(|| self.named_field(node, "suffix")) + .or_else(|| { + named_children + .iter() + .copied() + .find(|child| child.kind() == "navigation_suffix") + }) + .or_else(|| { + named_children.iter().copied().rev().find(|child| { + !matches!( + child.kind(), + "block" | "do_block" | "argument_list" | "arguments" + ) + }) + })?; + (receiver != method).then(|| { + ( + receiver, + self.member_name(method).trim_end_matches('=').to_string(), + ) + }) + } + + fn member_name(&self, node: TreeSitterNode<'_>) -> String { + if node.kind() == "navigation_suffix" { + let named_children = self.named_children(node); + let suffix = self + .named_field(node, "suffix") + .or_else(|| { + named_children + .iter() + .copied() + .find(|child| self.identifier_kind(child.kind())) + }) + .or_else(|| named_children.last().copied()); + return suffix + .map(|suffix| { + node_text(suffix, self.source) + .trim_start_matches("::") + .trim_start_matches("->") + .trim_start_matches(['.', '?']) + .to_string() + }) + .unwrap_or_default(); + } + + node_text(node, self.source) + .trim_start_matches("::") + .trim_start_matches("->") + .trim_start_matches(['.', '?']) + .to_string() + } + + fn call_arguments( + &mut self, + node: TreeSitterNode<'_>, + function: Option>, + ) -> Vec { + let Some(args) = self + .named_field(node, "arguments") + .or_else(|| self.named_field(node, "argument")) + .or_else(|| { + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "argument_list" | "arguments")) + }) + else { + return Vec::new(); + }; + let children = self + .named_children(args) + .into_iter() + .filter(|child| Some(*child) != function) + .collect::>(); + if self.dotted_expression(args) { + return self.normalize_dotted_expression(args).into_iter().collect(); + } + let raw_args = self.raw_named_children(args); + if raw_args.len() == 1 && self.dotted_call(raw_args[0]) { + let source = self.wrap("SOURCE", Vec::new(), args); + return self + .normalize_dotted_call_expression_with_source(raw_args[0], Some(&source)) + .into_iter() + .collect(); + } + if children.len() == 1 + && children[0].kind() == "heredoc_beginning" + && heredoc_marker_text(node_text(args, self.source).trim_start()) + { + return self.literal_arguments_from_text(args); + } + if children.is_empty() { + return self + .scalar_argument_list_value(args) + .into_iter() + .chain(self.literal_arguments_from_text(args)) + .collect(); + } + if self.infix_statement(args) { + return self.normalize_infix_statement(args).into_iter().collect(); + } + + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn literal_arguments_from_text(&mut self, args: TreeSitterNode<'_>) -> Vec { + let text = node_text(args, self.source); + if text.trim_start().starts_with("<<") && heredoc_marker_text(text.trim_start()) { + return vec![self.normalize_heredoc_beginning(args)]; + } + + literal_symbol_arguments(text) + .into_iter() + .map(|name| self.wrap("LIT", vec![Child::Symbol(name)], args)) + .collect() + } + + fn command_arguments(&mut self, args: TreeSitterNode<'_>) -> Vec { + let children = self.named_children(args); + if children.is_empty() { + return self.scalar_argument_list_value(args).into_iter().collect(); + } + if self.infix_statement(args) { + return self.normalize_infix_statement(args).into_iter().collect(); + } + if self.dotted_expression(args) { + return self.normalize_dotted_expression(args).into_iter().collect(); + } + if children.len() == 1 + && self.call_node(children[0]) + && self.call_block(children[0]).is_some() + { + return self + .normalize_call_with_block(children[0]) + .into_iter() + .collect(); + } + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn yield_argument_nodes(&mut self, node: TreeSitterNode<'_>) -> Vec { + let children = self.named_children(node); + if children.is_empty() { + return self.scalar_argument_list_value(node).into_iter().collect(); + } + children + .into_iter() + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn yield_inline_arguments(&mut self, node: TreeSitterNode<'_>) -> Vec { + self.named_children(node) + .into_iter() + .filter(|child| child.kind() != "yield") + .filter_map(|child| self.normalize_node(child)) + .collect() + } + + fn scalar_argument_list_value(&mut self, node: TreeSitterNode<'_>) -> Option { + let text = node_text(node, self.source).trim(); + if self.ruby() && text == "yield" { + return Some(self.wrap("YIELD", vec![Child::Nil], node)); + } + if text == "nil" { + return Some(self.wrap("NIL", Vec::new(), node)); + } + if text == "true" { + return Some(self.wrap("TRUE", Vec::new(), node)); + } + if text == "false" { + return Some(self.wrap("FALSE", Vec::new(), node)); + } + if let Some(symbol) = text.strip_prefix(':') { + if bare_identifier_text(symbol) { + return Some(self.wrap("LIT", vec![Child::Symbol(symbol.to_string())], node)); + } + } + if let Ok(value) = text.parse::() { + return Some(self.wrap("INTEGER", vec![Child::Integer(value)], node)); + } + if bare_identifier_text(text) { + if self.ruby() && !self.ruby_local_name(text) { + Some(self.wrap("VCALL", vec![Child::Symbol(text.to_string())], node)) + } else { + Some(self.wrap("LVAR", vec![Child::String(text.to_string())], node)) + } + } else { + None + } + } + + fn local_or_call_for_name(&self, name: &str, source: TreeSitterNode<'_>) -> Node { + if self.ruby() && !self.ruby_local_name(name) { + self.wrap("VCALL", vec![Child::Symbol(name.to_string())], source) + } else { + self.wrap("LVAR", vec![Child::String(name.to_string())], source) + } + } + + fn symbol_literal_node(&self, node: Option<&Node>) -> bool { + matches!( + node, + Some(node) + if node.r#type == "LIT" && matches!(node.children.first(), Some(Child::Symbol(_))) + ) + } + + fn same_ts_node(&self, left: TreeSitterNode<'_>, right: TreeSitterNode<'_>) -> bool { + left.kind() == right.kind() + && left.start_byte() == right.start_byte() + && left.end_byte() == right.end_byte() + } + + fn parent_named_child(&self, parent: TreeSitterNode<'_>, node: TreeSitterNode<'_>) -> bool { + self.named_children(parent) + .into_iter() + .any(|child| self.same_ts_node(child, node)) + } + + #[cfg(test)] + fn node_key(&self, node: TreeSitterNode<'_>) -> (String, usize, usize) { + (node.kind().to_string(), node.start_byte(), node.end_byte()) + } + + fn hidden_match(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "expression_statement" + && node_text(node, self.source) + .trim_start() + .starts_with("match ") + && self + .named_children(node) + .into_iter() + .any(|child| child.kind() == "match_block") + } + + fn assignment_left<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "left") + .or_else(|| self.named_children(node).into_iter().next()) + } + + fn assignment_right<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_field(node, "right") + .or_else(|| self.named_children(node).into_iter().nth(1)) + } + + fn operator_assignment_operator(&self, node: TreeSitterNode<'_>) -> String { + let mut cursor = node.walk(); + let raw = node.children(&mut cursor).find_map(|child| { + let text = node_text(child, self.source); + (!child.is_named() && self.assignment_operator(text)).then_some(text) + }); + if let Some(raw) = raw { + return match raw { + "||=" => "||".to_string(), + "&&=" => "&&".to_string(), + _ => raw.trim_end_matches('=').to_string(), + }; + } + + let raw_named = self.raw_named_children(node); + if raw_named.len() == 1 + && node_text(node, self.source) + .trim_end_matches(';') + .trim_end() + == node_text(raw_named[0], self.source) + { + return self.operator_assignment_operator(raw_named[0]); + } + + String::new() + } + + fn parameters_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.named_field(node, "parameters").or_else(|| { + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "parameters" + | "parameter_list" + | "formal_parameters" + | "function_value_parameters" + | "method_parameters" + ) + }) + }) + } + + fn inline_parameter_begin_marker(&self, function_node: TreeSitterNode<'_>) -> Option { + if !self.ruby() { + return None; + } + + let params = self.named_field(function_node, "parameters").or_else(|| { + self.named_children(function_node) + .into_iter() + .find(|child| child.kind() == "method_parameters") + })?; + let semicolon = params.next_sibling()?; + if semicolon.is_named() || node_text(semicolon, self.source) != ";" { + return None; + } + + let point = semicolon.start_position(); + Some(Node { + r#type: "BEGIN".to_string(), + children: vec![Child::Nil], + first_lineno: point.row + 1, + first_column: point.column, + last_lineno: point.row + 1, + last_column: point.column, + text: String::new(), + }) + } + + fn prepend_inline_parameter_begin( + &self, + function_node: TreeSitterNode<'_>, + body: Option, + ) -> Option { + let Some(marker) = self.inline_parameter_begin_marker(function_node) else { + return body; + }; + + let mut body = body?; + if body.r#type == "BLOCK" { + let mut children = body + .children + .into_iter() + .filter(|child| !matches!(child, Child::Nil)) + .collect::>(); + if children.is_empty() { + return None; + } + + body.children = vec![Child::Node(Box::new(marker))]; + body.children.append(&mut children); + return Some(body); + } + + Some(self.wrap( + "BLOCK", + vec![Child::Node(Box::new(marker)), Child::Node(Box::new(body))], + function_node, + )) + } + + fn assignment_target( + &mut self, + left: TreeSitterNode<'_>, + right: Option, + source: TreeSitterNode<'_>, + ) -> Option { + if let Some(field) = self + .normalization_adapter + .state_field_name(left, self.source) + { + return Some(self.wrap( + "IASGN", + vec![Child::String(field), optional_node(right)], + source, + )); + } + if self.instance_variable(left) { + return Some(self.wrap( + "IASGN", + vec![ + Child::String(node_text(left, self.source).to_string()), + optional_node(right), + ], + source, + )); + } + if self.global_variable(left) { + return Some(self.wrap( + "GASGN", + vec![ + Child::String(node_text(left, self.source).to_string()), + optional_node(right), + ], + source, + )); + } + if left.kind() == "element_reference" { + let named = self.named_children(left); + let receiver = *named.first()?; + let mut args = named + .iter() + .skip(1) + .filter_map(|arg| self.normalize_node(*arg)) + .collect::>(); + if let Some(right) = right { + args.push(right); + } + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(args, left, self); + return Some(self.wrap( + "ATTRASGN", + vec![receiver, Child::Symbol("[]=".to_string()), args], + source, + )); + } + if self.member_read_node(left) + || self + .normalization_adapter + .member_assignment_target(left, self.source) + { + let (receiver, method) = self.member_parts(left)?; + let writer = if node_text(left, self.source).contains("&.") { + method + } else { + format!("{method}=") + }; + let receiver = optional_node(self.normalize_node(receiver)); + let args = list_or_nil(right.into_iter().collect(), left, self); + return Some(self.wrap( + "ATTRASGN", + vec![receiver, Child::Symbol(writer), args], + source, + )); + } + if left.kind() == "expression_list" { + return self + .named_children(left) + .into_iter() + .next() + .and_then(|child| self.assignment_target(child, right, source)); + } + None + } + + fn normalize_assignment_lhs(&mut self, node: TreeSitterNode<'_>) -> Option { + let right = node + .next_named_sibling() + .and_then(|sibling| self.normalize_node(sibling)); + let source = node.parent().unwrap_or(node); + self.assignment_target(node, right.clone(), source) + .or_else(|| { + Some(self.wrap( + "LASGN", + vec![Child::String(self.target_name(node)), optional_node(right)], + source, + )) + }) + } + + fn target_name(&self, node: TreeSitterNode<'_>) -> String { + let text = node_text(node, self.source); + if let Some(name) = self.identifier_text(node) { + name + } else if matches!(node.kind(), "splat" | "splat_parameter" | "rest_assignment") { + text.trim_start_matches('*').to_string() + } else { + text.to_string() + } + } + + fn function_name(&self, node: TreeSitterNode<'_>) -> Option { + if node.kind() == "singleton_method" { + return Some(self.singleton_name(node)); + } + + Some( + self.named_field(node, "name") + .or_else(|| { + self.named_children(node).into_iter().find(|child| { + self.identifier_text(*child).is_some() || child.kind() == "constant" + }) + }) + .map(|name| { + self.identifier_text(name) + .unwrap_or_else(|| node_text(name, self.source).to_string()) + }) + .unwrap_or_default(), + ) + } + + fn singleton_receiver<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + if let Some(receiver) = self.named_field(node, "receiver") { + return Some(receiver); + } + + let children = self.named_children(node); + let name = self.named_field(node, "name").or_else(|| { + children + .iter() + .rev() + .copied() + .find(|child| self.identifier_text(*child).is_some()) + }); + let parameters = self.named_field(node, "parameters"); + let body = self + .named_field(node, "body") + .or_else(|| self.block_child(node)); + + children.into_iter().find(|child| { + !name + .map(|name| self.same_ts_node(*child, name)) + .unwrap_or(false) + && !parameters + .map(|parameters| self.same_ts_node(*child, parameters)) + .unwrap_or(false) + && !body + .map(|body| self.same_ts_node(*child, body)) + .unwrap_or(false) + }) + } + + fn singleton_name(&self, node: TreeSitterNode<'_>) -> String { + self.named_field(node, "name") + .or_else(|| { + self.named_children(node) + .into_iter() + .rev() + .find(|child| self.identifier_text(*child).is_some()) + }) + .map(|name| node_text(name, self.source).to_string()) + .unwrap_or_default() + } + + fn block_child<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_children(node).into_iter().find(|child| { + matches!( + child.kind(), + "body_statement" + | "block_body" + | "block" + | "do_block" + | "class_body" + | "function_body" + | "match_block" + | "statement_block" + | "statement_list" + | "statements" + | "switch_body" + | "then" + | "control_structure_body" + ) + }) + } + + fn call_block<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + if let Some(target) = self + .normalization_adapter + .statement_wrapped_call_target(node, self.source) + { + return self.call_block(target); + } + + self.named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "block" | "do_block")) + } + + fn named_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + self.normalization_adapter.named_field(node, name) + } + + fn parent_node<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + node.parent() + } + + #[cfg(test)] + fn next_sibling<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + node.next_sibling() + } + + #[cfg(test)] + fn prev_sibling<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + node.prev_sibling() + } + + #[cfg(test)] + fn next_named_sibling<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + node.next_named_sibling() + } + + fn named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + if node.kind() == "dotted_name" && !node_text(node, self.source).contains('.') { + return Vec::new(); + } + + let children = self.raw_named_children(node); + match self + .normalization_adapter + .named_children_action(node, self.source, &children) + { + NamedChildrenAction::Default => {} + NamedChildrenAction::Drop => return Vec::new(), + NamedChildrenAction::Recurse(child) => return self.named_children(child), + NamedChildrenAction::Replace(children) => return children, + } + + if node.kind() == "type" && children.len() == 1 { + if children[0].kind() == "union_type" { + return self.named_children(children[0]); + } + if children[0].kind() == "generic_type" { + return self.named_children(children[0]); + } + if children[0].kind() == "attribute" { + return self.named_children(children[0]); + } + if children[0].kind() == "string" { + return self.named_children(children[0]); + } + if children[0].kind() == "list" { + if self.raw_named_children(children[0]).is_empty() { + return Vec::new(); + } + return self.named_children(children[0]); + } + if matches!( + children[0].kind(), + "ellipsis" | "identifier" | "nil" | "none" | "null" + ) { + return Vec::new(); + } + } + if node.kind() == "expression_statement" + && children.len() == 1 + && matches!(children[0].kind(), "assignment" | "augmented_assignment") + { + return self.named_children(children[0]); + } + + children + } + + fn raw_named_children<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + node.children(&mut node.walk()) + .filter(|child| child.is_named()) + .collect() + } + + fn no_paren_string_argument_content<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter + .no_paren_string_argument_content(node, self.source) + } + + fn source_before_child(&self, node: TreeSitterNode<'_>, child: TreeSitterNode<'_>) -> Node { + let text = self + .source + .get(node.start_byte()..child.start_byte()) + .unwrap_or("") + .trim_end() + .to_string(); + if text.is_empty() { + return self.wrap("SOURCE", Vec::new(), node); + } + + let lines = text.lines().collect::>(); + let first_span = span(node); + let last_lineno = first_span[0] + lines.len() - 1; + let last_column = if lines.len() <= 1 { + first_span[1] + text.len() + } else { + lines.last().map(|line| line.len()).unwrap_or(0) + }; + Node { + r#type: "SOURCE".to_string(), + children: Vec::new(), + first_lineno: first_span[0], + first_column: first_span[1], + last_lineno, + last_column, + text: self.source_text(&text), + } + } + + fn source_from_nodes( + &self, + first_node: TreeSitterNode<'_>, + last_node: TreeSitterNode<'_>, + ) -> Node { + self.wrap_from_nodes("SOURCE", Vec::new(), first_node, last_node) + } + + fn parenthesized_source(&self, node: TreeSitterNode<'_>) -> Option { + let mut open = None; + let mut close = None; + for child in node.children(&mut node.walk()) { + if child.is_named() { + continue; + } + match node_text(child, self.source) { + "(" if open.is_none() => open = Some(child), + ")" => close = Some(child), + _ => {} + } + } + Some(self.source_from_nodes(open?, close?)) + } + + fn source_from_normalized_nodes(&self, first_node: &Node, last_node: &Node) -> Node { + let lines = self.source.split_inclusive('\n').collect::>(); + let text = if first_node.first_lineno == last_node.last_lineno { + lines + .get(first_node.first_lineno.saturating_sub(1)) + .and_then(|line| line.get(first_node.first_column..last_node.last_column)) + .unwrap_or("") + .to_string() + } else { + let mut text = String::new(); + if let Some(line) = lines.get(first_node.first_lineno.saturating_sub(1)) { + text.push_str(line.get(first_node.first_column..).unwrap_or("")); + } + for index in first_node.first_lineno..last_node.last_lineno.saturating_sub(1) { + if let Some(line) = lines.get(index) { + text.push_str(line); + } + } + if let Some(line) = lines.get(last_node.last_lineno.saturating_sub(1)) { + text.push_str(line.get(..last_node.last_column).unwrap_or("")); + } + text + }; + + Node { + r#type: "SOURCE".to_string(), + children: Vec::new(), + first_lineno: first_node.first_lineno, + first_column: first_node.first_column, + last_lineno: last_node.last_lineno, + last_column: last_node.last_column, + text: self.source_text(&text), + } + } + + fn first_named<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_children(node).into_iter().next() + } + + fn branch_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + condition: TreeSitterNode<'tree>, + offset: usize, + ) -> Option> { + self.named_children(node) + .into_iter() + .filter(|child| { + *child != condition && !matches!(child.kind(), "comment" | "else" | "elsif") + }) + .nth(offset) + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.normalization_adapter.explicit_alternative(node) + } + + fn case_value<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "value") + .or_else(|| self.named_field(node, "subject")) + .or_else(|| self.named_field(node, "condition")) + .or_else(|| { + self.named_children(node).into_iter().find(|child| { + !self.when_kind(child.kind()) + && !self.block_kind(child.kind()) + && child.kind() != "else" + }) + }) + } + + fn case_arms<'tree>(&self, node: TreeSitterNode<'tree>) -> Vec> { + let mut arms = Vec::new(); + let mut stack = self.named_children(node); + while !stack.is_empty() { + let child = stack.remove(0); + if self.normalization_adapter.case_arm(child, self.source) { + arms.push(child); + } else if self + .normalization_adapter + .case_else_node_kind(child, self.source) + { + continue; + } else if !function_kind(child.kind()) { + stack.extend(self.named_children(child)); + } + } + arms + } + + fn when_body<'tree>(&self, node: TreeSitterNode<'tree>) -> Option> { + self.named_field(node, "body") + .or_else(|| self.named_field(node, "consequence")) + .or_else(|| self.named_field(node, "value")) + .or_else(|| { + self.named_children(node).into_iter().rev().find(|child| { + self.block_kind(child.kind()) || self.statement_node(child.kind()) + }) + }) + } + + fn identifier_kind(&self, kind: &str) -> bool { + identifier_kind_name(kind) + } + + fn identifier_text(&self, node: TreeSitterNode<'_>) -> Option { + if self.identifier_kind(node.kind()) { + return Some( + node_text(node, self.source) + .trim_start_matches('*') + .to_string(), + ); + } + self.normalization_adapter + .local_identifier_text(node, self.source) + } + + fn self_identifier(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .self_identifier(node, self.source) + } + + fn call_node(&self, node: TreeSitterNode<'_>) -> bool { + self.call_kind(node.kind()) || self.normalization_adapter.call_node(node, self.source) + } + + fn loop_node_type(&self, kind: &str) -> Option<&'static str> { + self.normalization_adapter + .loop_node_type(kind) + .or_else(|| loop_kind(kind)) + } + + fn member_access_operator(&self, text: &str) -> bool { + self.normalization_adapter.member_access_operator(text) + } + + fn source_text(&self, text: &str) -> String { + self.normalization_adapter.source_text(text) + } + + fn const_kind(&self, kind: &str) -> bool { + matches!( + kind, + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" + ) + } + + fn call_kind(&self, kind: &str) -> bool { + matches!( + kind, + "call" | "call_expression" | "method_call" | "method_call_expression" + ) + } + + fn block_kind(&self, kind: &str) -> bool { + self.normalization_adapter.block_node_kind(kind) + || matches!( + kind, + "block" + | "body_statement" + | "statement_block" + | "statement_list" + | "class_body" + | "switch_body" + | "match_block" + | "then" + | "block_body" + | "control_structure_body" + | "function_body" + | "statements" + ) + } + + fn case_kind(&self, kind: &str) -> bool { + matches!( + kind, + "case" + | "switch_statement" + | "expression_switch_statement" + | "switch_expression" + | "match_statement" + | "match_expression" + | "when_expression" + ) + } + + fn when_kind(&self, kind: &str) -> bool { + matches!( + kind, + "when" + | "switch_case" + | "case_clause" + | "expression_case" + | "case_statement" + | "switch_section" + | "switch_block_statement_group" + | "switch_entry" + | "when_entry" + | "match_arm" + ) + } + + fn statement_node(&self, kind: &str) -> bool { + kind.ends_with("_statement") + || kind.ends_with("_expression") + || matches!(kind, "return" | "break" | "next") + } + + fn unwrap_node(&self, node: TreeSitterNode<'_>) -> bool { + self.normalization_adapter + .unwrap_node(node, self.source, self.named_children(node).len()) + } + + fn first_dotted_call_descendant<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + for child in self.named_children(node) { + if self.call_node(child) && self.dotted_call(child) { + return Some(child); + } + if let Some(found) = self.first_dotted_call_descendant(child) { + return Some(found); + } + } + None + } + + fn elide_tail_returns(&self, node: Option) -> Option { + if !self.normalization_adapter.elides_tail_returns() { + return node; + } + let mut node = node?; + if matches!( + node.r#type.as_str(), + "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" | "ITER" + ) { + return Some(node); + } + if node.r#type == "RETURN" { + return node.children.into_iter().next().and_then(child_node); + } + + match node.r#type.as_str() { + "BLOCK" => { + if let Some(last) = node.children.pop() { + match child_node(last) { + Some(last_node) => { + if let Some(elided) = self.elide_tail_returns(Some(last_node)) { + node.children.push(Child::Node(Box::new(elided))); + } else { + node.children.push(Child::Nil); + } + } + None => node.children.push(Child::Nil), + } + } + } + "SCOPE" => { + if node.children.len() > 2 { + let child = std::mem::replace(&mut node.children[2], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[2] = Child::Node(Box::new(elided)); + } + } + } + "IF" | "UNLESS" => { + for index in [1usize, 2usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + "CASE" | "CASE2" => { + let index = if node.r#type == "CASE" { 1 } else { 0 }; + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + "WHEN" | "RESBODY" => { + for index in [1usize, 2usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + "RESCUE" => { + for index in [0usize, 1usize] { + if node.children.len() > index { + let child = std::mem::replace(&mut node.children[index], Child::Nil); + if let Some(elided) = + child_node(child).and_then(|body| self.elide_tail_returns(Some(body))) + { + node.children[index] = Child::Node(Box::new(elided)); + } + } + } + } + _ => {} + } + + Some(node) + } + + fn elide_implicit_nil_body(&self, node: Option) -> Option { + if !self.normalization_adapter.elides_implicit_nil_body() { + return node; + } + let node = self.drop_trailing_nil_statement(node); + match node { + Some(node) if node.r#type == "NIL" => None, + other => other, + } + } + + fn drop_trailing_nil_statement(&self, node: Option) -> Option { + let mut node = node?; + if node.r#type != "BLOCK" { + return Some(node); + } + node.children.retain(|child| !matches!(child, Child::Nil)); + while node + .children + .last() + .and_then(self::node) + .map(|child| child.r#type == "NIL") + .unwrap_or(false) + { + node.children.pop(); + } + if node.children.is_empty() { + None + } else if node.children.len() == 1 { + child_node(node.children.into_iter().next().unwrap()) + } else { + Some(node) + } + } +} + +fn optional_node(node: Option) -> Child { + node.map(|node| Child::Node(Box::new(node))) + .unwrap_or(Child::Nil) +} + +fn child_node(child: Child) -> Option { + match child { + Child::Node(node) => Some(*node), + _ => None, + } +} + +fn list_or_nil( + children: Vec, + source: TreeSitterNode<'_>, + normalizer: &TreeSitterNormalizer<'_>, +) -> Child { + if children.is_empty() { + Child::Nil + } else { + Child::Node(Box::new(normalizer.list_node(children, source))) + } +} + +fn integer_text(text: &str) -> bool { + let digits = text.strip_prefix('-').unwrap_or(text); + !digits.is_empty() && digits.chars().all(|ch| ch.is_ascii_digit()) +} + +fn ruby_constant_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + first.is_ascii_uppercase() && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn dynamic_scope(mut node: Node) -> Node { + if matches!( + node.r#type.as_str(), + "DEFN" | "DEFS" | "CLASS" | "MODULE" | "SCLASS" | "LAMBDA" + ) { + return node; + } + if node.r#type == "LASGN" { + node.r#type = "DASGN".to_string(); + } else if node.r#type == "LVAR" { + node.r#type = "DVAR".to_string(); + } + node.children = node + .children + .into_iter() + .map(|child| match child { + Child::Node(node) => Child::Node(Box::new(dynamic_scope(*node))), + other => other, + }) + .collect(); + node +} + +fn kind_type(kind: &str) -> String { + let mut result = String::new(); + let mut in_separator = false; + for ch in kind.chars() { + if ch.is_ascii_alphanumeric() { + result.push(ch.to_ascii_uppercase()); + in_separator = false; + } else if !in_separator { + result.push('_'); + in_separator = true; + } + } + result +} + +#[cfg(test)] +fn ts_node(node: Option>) -> bool { + node.is_some() +} + +fn if_kind(kind: &str) -> bool { + matches!( + kind, + "if" | "if_statement" + | "if_modifier" + | "unless" + | "unless_modifier" + | "if_expression" + | "conditional" + ) +} + +fn loop_kind(kind: &str) -> Option<&'static str> { + match kind { + "while" | "while_statement" | "while_modifier" => Some("WHILE"), + "until_modifier" => Some("UNTIL"), + "for" | "for_statement" | "for_in_clause" => Some("FOR"), + _ => None, + } +} + +fn function_kind(kind: &str) -> bool { + matches!( + kind, + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "method_declaration" + | "function_item" + | "singleton_method" + ) +} + +fn return_kind(kind: &str) -> &str { + match kind { + "return" | "return_statement" | "return_expression" => "RETURN", + "break" | "break_statement" | "break_expression" => "BREAK", + "next" | "continue_statement" => "NEXT", + other => other, + } +} + +fn return_statement_kind(kind: &str) -> bool { + matches!( + kind, + "return" + | "return_statement" + | "return_expression" + | "break" + | "break_statement" + | "break_expression" + | "next" + | "continue_statement" + ) +} + +fn inline_def_wrapper_mid(text: &str) -> bool { + matches!( + text, + "public" | "protected" | "private" | "private_class_method" | "module_function" + ) +} + +fn inline_def_receiver_text(text: &str) -> bool { + let mut tokens = text.split_whitespace(); + while let Some(token) = tokens.next() { + if token != "def" { + continue; + } + let Some(name) = tokens.next() else { + return false; + }; + let Some((receiver, _method)) = name.split_once('.') else { + return false; + }; + return !receiver.is_empty(); + } + false +} + +fn literal_symbol_arguments(text: &str) -> Vec { + let chars = text.char_indices().collect::>(); + let mut symbols = Vec::new(); + let mut index = 0; + while index < chars.len() { + if chars[index].1 != ':' { + index += 1; + continue; + } + let Some((_, first)) = chars.get(index + 1).copied() else { + index += 1; + continue; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + index += 1; + continue; + } + + let start = chars[index + 1].0; + let mut end = start + first.len_utf8(); + let mut cursor = index + 2; + while let Some((byte, ch)) = chars.get(cursor).copied() { + if ch == '_' || ch.is_ascii_alphanumeric() { + end = byte + ch.len_utf8(); + cursor += 1; + } else { + break; + } + } + if let Some((byte, ch)) = chars.get(cursor).copied() { + if matches!(ch, '!' | '?' | '=') { + end = byte + ch.len_utf8(); + cursor += 1; + } + } + symbols.push(text[start..end].to_string()); + index = cursor; + } + symbols +} + +fn bare_identifier_text(text: &str) -> bool { + let text = text.trim(); + exact_bare_identifier_text(text) +} + +fn exact_bare_identifier_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + return false; + } + let mut chars = chars.peekable(); + while let Some(ch) = chars.next() { + if ch == '_' || ch.is_ascii_alphanumeric() { + continue; + } + if matches!(ch, '!' | '?' | '=') { + return chars.peek().is_none(); + } + return false; + } + true +} + +fn ruby_instance_variable_text(text: &str) -> bool { + text.strip_prefix('@') + .map(exact_bare_identifier_text) + .unwrap_or(false) +} + +fn exact_integer_text(text: &str) -> bool { + let digits = text.strip_prefix('-').unwrap_or(text); + !digits.is_empty() && digits.chars().all(|ch| ch.is_ascii_digit()) +} + +fn heredoc_marker_text(text: &str) -> bool { + text.split(|ch: char| ch.is_whitespace() || matches!(ch, '(' | ',')) + .any(|token| { + let Some(marker) = token.strip_prefix("<<") else { + return false; + }; + let marker = marker + .strip_prefix('-') + .or_else(|| marker.strip_prefix('~')) + .unwrap_or(marker); + let mut chars = marker.chars(); + let Some(first) = chars.next() else { + return false; + }; + first == '_' || first.is_ascii_alphabetic() + }) +} + +fn ruby_variable_name_text(text: &str) -> bool { + let mut chars = text.chars().peekable(); + let Some(first) = chars.next() else { + return false; + }; + if !(first == '_' || first.is_ascii_alphabetic()) { + return false; + } + while let Some(ch) = chars.next() { + if matches!(ch, '!' | '?' | '=') { + return chars.peek().is_none(); + } + if !(ch == '_' || ch.is_ascii_alphanumeric()) { + return false; + } + } + true +} + +fn comparison_operator_from_text(text: &str) -> Option { + for operator in ["===", "!==", "==", "!=", "<=", ">=", "<", ">"] { + if text.contains(operator) { + return Some(operator.to_string()); + } + } + None +} + +fn operator_assignment_statement_operator(text: &str) -> Option { + match text { + "+=" => Some("+".to_string()), + "-=" => Some("-".to_string()), + "*=" => Some("*".to_string()), + "/=" => Some("/".to_string()), + "%=" => Some("%".to_string()), + "&=" => Some("&".to_string()), + "|=" => Some("|".to_string()), + "^=" => Some("^".to_string()), + "||=" => Some("||".to_string()), + "&&=" => Some("&&".to_string()), + _ => None, + } +} + +pub fn child_to_string(child: Option<&Child>) -> Option { + match child { + Some(Child::String(value)) | Some(Child::Symbol(value)) => Some(value.clone()), + Some(Child::Integer(value)) => Some(value.to_string()), + _ => None, + } +} + +#[cfg(test)] +#[path = "ast-test.rs"] +mod tests; diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs new file mode 100644 index 000000000..fc40580c0 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/base.rs @@ -0,0 +1,1018 @@ +use super::super::{ + bracketed, case_arm_descendant, concatenated_string_node, concatenated_string_target, + descendant, direct_binary_operator, element_reference_shape, function_kind, + identifier_kind_name, named_children, node_text, question_colon_ternary_parts, + raw_named_children, ruby_exception_constant_text, statement_block_wrapper, TernaryParts, + ARRAY_LITERAL_NODE_KINDS, ARRAY_LITERAL_WRAPPER_KINDS, BOOLEAN_EXPRESSION_KINDS, + CASE_ARGUMENT_WHEN_KINDS, CASE_ELSE_KINDS, CASE_NODE_KINDS, COMPARISON_EXPRESSION_KINDS, + CONCATENATED_STRING_WRAPPER_KINDS, DOTTED_EXPRESSION_WRAPPER_KINDS, + ELEMENT_REFERENCE_NODE_KINDS, ELEMENT_REFERENCE_WRAPPER_KINDS, EMPTY_BODY_WRAPPER_KINDS, + ENSURE_BODY_WRAPPER_KINDS, HASH_LITERAL_NODE_KINDS, HASH_LITERAL_WRAPPER_KINDS, + HEREDOC_BODY_WRAPPER_KINDS, IF_NODE_KINDS, INTERPOLATED_STATEMENT_WRAPPER_KINDS, + LEADING_CASE_WRAPPER_KINDS, LEADING_FUNCTION_WRAPPER_KINDS, LEADING_IF_WRAPPER_KINDS, + LEADING_LOOP_WRAPPER_KINDS, LEADING_OWNER_WRAPPER_KINDS, LOOP_NODE_KINDS, OWNER_NODE_KINDS, + OWNER_STATEMENT_NESTED_KINDS, QUESTION_COLON_TERNARY_KINDS, RESCUE_BODY_WRAPPER_KINDS, +}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) const COMMON_ASSIGNMENT_OPERATORS: &[&str] = &["=", "+=", "-=", "*=", "/=", "%="]; +pub(crate) const RUBY_ASSIGNMENT_OPERATORS: &[&str] = &[ + "=", "+=", "-=", "*=", "/=", "%=", "**=", "&&=", "||=", "&=", "|=", "^=", "<<=", ">>=", +]; +pub(crate) const PYTHON_ASSIGNMENT_OPERATORS: &[&str] = &[ + "=", "+=", "-=", "*=", "/=", "%=", "//=", "**=", "@=", "&=", "|=", "^=", "<<=", ">>=", ":=", +]; +pub(crate) const LUA_ASSIGNMENT_OPERATORS: &[&str] = &["="]; +pub(crate) const TYPESCRIPT_ASSIGNMENT_OPERATORS: &[&str] = &[ + "=", "+=", "-=", "*=", "/=", "%=", "**=", "<<=", ">>=", ">>>=", "&=", "|=", "^=", "&&=", "||=", + "??=", +]; + +pub(crate) enum NamedChildrenAction<'tree> { + Default, + Drop, + Recurse(TreeSitterNode<'tree>), + Replace(Vec>), +} + +pub(crate) trait AstNormalizationAdapter: Sync { + fn ruby(&self) -> bool { + false + } + + fn yield_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn super_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn safe_navigation_call(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn ternary_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.ternary_parts(node, source).is_some() + } + + fn ternary_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + question_colon_ternary_parts(node, source, QUESTION_COLON_TERNARY_KINDS) + } + + fn case_argument_list(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn case_arm(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + CASE_ARGUMENT_WHEN_KINDS.contains(&node.kind()) && !self.case_else_arm(node, source) + } + + fn case_arm_body_nodes<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option>> { + None + } + + fn case_else_node<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let mut stack = named_children(node); + while !stack.is_empty() { + let child = stack.remove(0); + if self.case_else_node_kind(child, source) { + return Some(child); + } + if CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind()) { + continue; + } + if !function_kind(child.kind()) { + stack.extend(named_children(child)); + } + } + None + } + + fn case_else_node_kind(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + CASE_ELSE_KINDS.contains(&node.kind()) || self.case_else_arm(node, source) + } + + fn case_else_arm(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn leading_function_statement(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_FUNCTION_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "def") + .unwrap_or(false) + { + return Some(node); + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && matches!(raw_named[0].kind(), "method" | "singleton_method") + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + None + } + + fn leading_function_body_kind(&self) -> &'static str { + "body_statement" + } + + fn leading_owner_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_owner_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "class" | "module")) + .unwrap_or(false) + && named_children(target).len() >= 2 + && named_children(target) + .first() + .map(|child| !OWNER_STATEMENT_NESTED_KINDS.contains(&child.kind())) + .unwrap_or(false) + } + + fn leading_owner_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_OWNER_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && OWNER_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn leading_if_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_if_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "if" | "unless")) + .unwrap_or(false) + && named_children(target).len() >= 2 + && named_children(target) + .first() + .map(|child| !IF_NODE_KINDS.contains(&child.kind())) + .unwrap_or(false) + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && IF_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn leading_case_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_case_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| matches!(child.kind(), "case" | "match" | "switch")) + .unwrap_or(false) + && case_arm_descendant(target) + } + + fn leading_case_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_CASE_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && CASE_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn leading_loop_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(target) = self.leading_loop_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| !child.is_named() && matches!(child.kind(), "while" | "until")) + .unwrap_or(false) + && named_children(target).len() >= 2 + } + + fn leading_loop_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LEADING_LOOP_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + let raw_named = named_children(node); + if raw_named.len() == 1 + && LOOP_NODE_KINDS.contains(&raw_named[0].kind()) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + Some(node) + } + + fn rescue_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + !self.rescue_clauses(node, source).is_empty() + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + Some(node) + } else { + None + } + } + + fn rescue_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + let named = named_children(target); + let Some(index) = named.iter().position(|child| self.rescue_clause(*child)) else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn rescue_clauses<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + named_children(target) + .into_iter() + .filter(|child| self.rescue_clause(*child)) + .collect() + } + + fn rescue_clause_exceptions<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(exceptions) = named_children(node) + .into_iter() + .find(|child| child.kind() == "exceptions") + else { + return Vec::new(); + }; + let text = node_text(exceptions, source).trim(); + if ruby_exception_constant_text(text) + || (named_children(exceptions).is_empty() && !text.is_empty()) + { + return vec![exceptions]; + } + named_children(exceptions) + } + + fn rescue_clause_exceptions_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "exceptions") + } + + fn rescue_clause_exception_variable_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "exception_variable") + .and_then(|variable| { + named_children(variable) + .into_iter() + .find(|child| identifier_kind_name(child.kind())) + }) + } + + fn rescue_clause_exception_variable_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "exception_variable") + } + + fn rescue_clause_handler<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node).into_iter().rev().find(|child| { + !matches!( + child.kind(), + "exceptions" | "exception_variable" | "comment" + ) + }) + } + + fn rescue_clause(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "rescue" + } + + fn ensure_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.ensure_clause(node, source).is_some() + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + Some(node) + } else { + None + } + } + + fn ensure_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.ensure_body_target(node, source) else { + return Vec::new(); + }; + let named = named_children(target); + let Some(index) = named + .iter() + .position(|child| self.ensure_clause_kind(*child)) + else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn ensure_clause<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.ensure_body_target(node, source)?; + named_children(target) + .into_iter() + .find(|child| self.ensure_clause_kind(*child)) + } + + fn ensure_clause_body<'tree>( + &self, + _node: TreeSitterNode<'tree>, + ) -> Option> { + None + } + + fn ensure_clause_kind(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "ensure" + } + + fn array_literal_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.array_literal_target(node, source).is_some() + } + + fn array_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if ARRAY_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !ARRAY_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if bracketed(node, source, "[", "]") { + return Some(node); + } + + let named = named_children(node); + let child = *named.first()?; + if named.len() == 1 { + if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + + if matches!(child.kind(), "expression_statement" | "statement") + && node_text(child, source).trim() == node_text(node, source).trim() + { + return self.array_literal_target(child, source); + } + + let stripped = node_text(node, source).trim(); + if stripped == node_text(child, source) + || stripped == format!("{};", node_text(child, source)) + { + if ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + } + } + + None + } + + fn array_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.array_literal_target(node, source).unwrap_or(node); + named_children(target) + } + + fn element_reference_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.element_reference_target(node, source).is_some() + } + + fn element_reference_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if ELEMENT_REFERENCE_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + + let named = named_children(node); + if named.len() == 1 + && ELEMENT_REFERENCE_WRAPPER_KINDS.contains(&named[0].kind()) + && node_text(named[0], source).trim() == node_text(node, source).trim() + { + return self.element_reference_target(named[0], source); + } + if named.len() == 1 && ELEMENT_REFERENCE_NODE_KINDS.contains(&named[0].kind()) { + let stripped = node_text(node, source).trim(); + let child_text = node_text(named[0], source); + if stripped == child_text || stripped == format!("{child_text};") { + return Some(named[0]); + } + } + + if element_reference_shape(node, source) { + Some(node) + } else { + None + } + } + + fn element_reference_receiver<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.element_reference_target(node, source).unwrap_or(node); + named_children(target).first().copied() + } + + fn element_reference_arguments<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.element_reference_target(node, source).unwrap_or(node); + named_children(target).into_iter().skip(1).collect() + } + + fn hash_literal_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.hash_literal_target(node, source).is_some() + } + + fn hash_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if HASH_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !HASH_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if statement_block_wrapper(node) { + return None; + } + if bracketed(node, source, "{", "}") { + return Some(node); + } + + let named = named_children(node); + if named.len() != 1 { + return None; + } + + let child = named[0]; + if node.kind() == "parenthesized_expression" { + return self.hash_literal_target(child, source); + } + + let stripped = node_text(node, source).trim(); + let child_text = node_text(child, source); + if stripped == child_text || stripped == format!("{child_text};") { + if HASH_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + if HASH_LITERAL_WRAPPER_KINDS.contains(&child.kind()) { + return self.hash_literal_target(child, source); + } + } + + None + } + + fn hash_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.hash_literal_target(node, source).unwrap_or(node); + named_children(target) + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node).is_empty() + && node_text(node, source).trim().is_empty() + } + + fn heredoc_body_statement(&self, node: TreeSitterNode<'_>) -> bool { + HEREDOC_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node) + .iter() + .any(|child| child.kind() == "heredoc_body") + } + + fn heredoc_call_for_body(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn interpolated_statement( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) + && children.iter().any(|child| child.kind() == "interpolation") + } + + fn concatenated_string_statement( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + if concatenated_string_node(node).is_some() { + return true; + } + if !self + .concatenated_string_wrapper_kinds() + .contains(&node.kind()) + { + return false; + } + if children.len() > 1 && children.iter().all(|child| child.kind() == "string") { + return true; + } + children.len() == 1 && concatenated_string_target(children[0]).is_some() + } + + fn zero_child_identifier_call(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn operator_call_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "binary" | "binary_expression") + } + + fn boolean_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + BOOLEAN_EXPRESSION_KINDS.contains(&node.kind()) + } + + fn comparison_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + COMPARISON_EXPRESSION_KINDS.contains(&node.kind()) + } + + fn dotted_expression_wrapper(&self, node: TreeSitterNode<'_>) -> bool { + self.dotted_expression_wrapper_kinds() + .contains(&node.kind()) + } + + fn unary_not_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('!') + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('-') + } + + fn binary_operator(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if let Some(operator) = direct_binary_operator(node, source) { + return Some(operator.to_string()); + } + + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && self.binary_wrapper_kinds().contains(&raw_named[0].kind()) + && node_text(node, source) == node_text(raw_named[0], source) + { + return self.binary_operator(raw_named[0], source); + } + + None + } + + fn class_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "class" | "class_definition" | "class_declaration" | "class_specifier" + ) + } + + fn local_identifier_text(&self, _node: TreeSitterNode<'_>, _source: &str) -> Option { + None + } + + fn constant_identifier_text(&self, _node: TreeSitterNode<'_>, _source: &str) -> Option { + None + } + + fn self_identifier(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn call_node(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn intrinsic_call_name( + &self, + _node: TreeSitterNode<'_>, + _source: &str, + ) -> Option<&'static str> { + None + } + + fn block_node_kind(&self, _kind: &str) -> bool { + false + } + + fn loop_node_type(&self, _kind: &str) -> Option<&'static str> { + None + } + + fn member_access_operator(&self, text: &str) -> bool { + matches!(text, "." | "&.") + } + + fn source_text(&self, text: &str) -> String { + text.to_string() + } + + fn state_field_name(&self, _node: TreeSitterNode<'_>, _source: &str) -> Option { + None + } + + fn member_assignment_target(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn instance_variable(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + node.kind() == "instance_variable" + } + + fn global_variable(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + node.kind() == "global_variable" + } + + fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) { + return true; + } + + matches!( + node.kind(), + "string_content" | "escape_sequence" | "interpolation" | "string_fragment" + ) && parent + .parent() + .map(|grandparent| { + matches!( + grandparent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) + }) + .unwrap_or(false) + } + + fn assignment_operator(&self, text: &str) -> bool { + self.assignment_operators().contains(&text) + } + + fn unwrap_node( + &self, + node: TreeSitterNode<'_>, + _source: &str, + named_child_count: usize, + ) -> bool { + matches!( + node.kind(), + "parenthesized_expression" + | "parenthesized_statements" + | "expression_statement" + | "statement" + | "case_pattern" + | "match_pattern" + | "pattern" + ) && named_child_count == 1 + } + + fn interpolated_string( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + node.kind() == "string" && children.iter().any(|child| child.kind() == "interpolation") + } + + fn lambda_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.lambda_target(node, source).is_some() + } + + fn lambda_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() == "lambda" { + Some(node) + } else { + None + } + } + + fn interpolation_node(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "interpolation" + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "else" | "else_clause" | "else_statement")) + } + + fn named_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + node.child_by_field_name(name) + } + + fn named_children_action<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + _children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + NamedChildrenAction::Default + } + + fn nested_class_body_child<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn else_if_block<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn logical_operator_assignment(&self, _operator: &str) -> bool { + false + } + + fn statement_wrapped_call_target<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn inline_def_function_text_source<'tree>( + &self, + function: TreeSitterNode<'tree>, + _source: &str, + ) -> TreeSitterNode<'tree> { + function + } + + fn bare_const_call_function(&self, _function: TreeSitterNode<'_>) -> bool { + false + } + + fn normalize_default_parameters(&self) -> bool { + false + } + + fn normalize_block_parameters(&self) -> bool { + false + } + + fn boolean_statement_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + _children: &[TreeSitterNode<'tree>], + ) -> TreeSitterNode<'tree> { + node + } + + fn single_assignment_block_child(&self, _node: TreeSitterNode<'_>, _source: &str) -> bool { + false + } + + fn member_read_excluded(&self, _node: TreeSitterNode<'_>) -> bool { + false + } + + fn no_paren_string_argument_content<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn elides_tail_returns(&self) -> bool { + false + } + + fn elides_implicit_nil_body(&self) -> bool { + false + } + + fn assignment_operators(&self) -> &'static [&'static str] { + COMMON_ASSIGNMENT_OPERATORS + } + + fn binary_wrapper_kinds(&self) -> &'static [&'static str] { + super::super::BINARY_WRAPPER_KINDS + } + + fn concatenated_string_wrapper_kinds(&self) -> &'static [&'static str] { + CONCATENATED_STRING_WRAPPER_KINDS + } + + fn dotted_expression_wrapper_kinds(&self) -> &'static [&'static str] { + DOTTED_EXPRESSION_WRAPPER_KINDS + } + + fn leading_function_statement_with_keyword( + &self, + node: TreeSitterNode<'_>, + source: &str, + keyword: &str, + wrapper_kinds: &[&str], + ) -> bool { + if !wrapper_kinds.contains(&node.kind()) { + return false; + } + let Some(target) = self.leading_function_target(node, source) else { + return false; + }; + target + .children(&mut target.walk()) + .next() + .map(|child| child.kind() == keyword) + .unwrap_or(false) + && named_children(target) + .iter() + .any(|child| identifier_kind_name(child.kind())) + } + + fn exact_single_named_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + kinds: &[&str], + source: &str, + ) -> Option> { + let children = named_children(node); + if children.len() != 1 { + return None; + } + let child = children[0]; + if !kinds.contains(&child.kind()) || node_text(node, source) != node_text(child, source) { + return None; + } + Some(child) + } + + fn default_case_pattern(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + named_children(node) + .into_iter() + .find(|child| super::super::CASE_DEFAULT_PATTERN_KINDS.contains(&child.kind())) + .map(|pattern| node_text(pattern, source).trim() == "_") + .unwrap_or(false) + } + + fn descendant<'tree>( + &self, + node: TreeSitterNode<'tree>, + kinds: &[&str], + ) -> Option> { + descendant(node, kinds) + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/c.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/c.rs new file mode 100644 index 000000000..badc37c41 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/c.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct CAstAdapter; + +impl AstNormalizationAdapter for CAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/cpp.rs new file mode 100644 index 000000000..daa9143ff --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/cpp.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct CppAstAdapter; + +impl AstNormalizationAdapter for CppAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/csharp.rs new file mode 100644 index 000000000..256fa4d24 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/csharp.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct CSharpAstAdapter; + +impl AstNormalizationAdapter for CSharpAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/go.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/go.rs new file mode 100644 index 000000000..79abeea2f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/go.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct GoAstAdapter; + +impl AstNormalizationAdapter for GoAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/java.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/java.rs new file mode 100644 index 000000000..4acf3e41c --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/java.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct JavaAstAdapter; + +impl AstNormalizationAdapter for JavaAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/javascript.rs new file mode 100644 index 000000000..a545fc064 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/javascript.rs @@ -0,0 +1 @@ +pub(crate) use super::typescript::TypeScriptAstAdapter as JavaScriptAstAdapter; diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/kotlin.rs new file mode 100644 index 000000000..25fc5396e --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/kotlin.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct KotlinAstAdapter; + +impl AstNormalizationAdapter for KotlinAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs new file mode 100644 index 000000000..9e4b17cf3 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/lua.rs @@ -0,0 +1,480 @@ +use super::super::{ + bracketed, direct_binary_operator, lua_keyed_table_target, lua_positional_table_target, + named_children, node_text, raw_named_children, LUA_LEADING_FUNCTION_WRAPPER_KINDS, + LUA_LEADING_IF_WRAPPER_KINDS, +}; +use super::base::{AstNormalizationAdapter, NamedChildrenAction, LUA_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct LuaAstAdapter; + +impl AstNormalizationAdapter for LuaAstAdapter { + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elseif_statement" | "else" | "else_statement")) + } + + fn ternary_parts<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + if node.kind() == "variable_list" && children.len() == 1 { + if children[0].kind() == "identifier" && lua_single_assignment_block_child(node, source) + { + return NamedChildrenAction::Drop; + } + if node + .parent() + .map(|parent| parent.kind() == "for_generic_clause") + .unwrap_or(false) + { + return NamedChildrenAction::Drop; + } + if node + .parent() + .map(|parent| { + parent.kind() == "variable_declaration" && raw_named_children(parent).len() == 1 + }) + .unwrap_or(false) + { + return NamedChildrenAction::Drop; + } + if children[0].kind() == "dot_index_expression" + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + } + + if node.kind() == "expression_list" && children.len() == 1 { + if children[0].kind() == "identifier" + && node + .parent() + .map(|parent| { + matches!(parent.kind(), "assignment_statement" | "return_statement") + }) + .unwrap_or(false) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Drop; + } + if matches!( + children[0].kind(), + "true" | "false" | "nil" | "number" | "integer" | "float" + ) && node + .parent() + .map(|parent| matches!(parent.kind(), "assignment_statement" | "return_statement")) + .unwrap_or(false) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Drop; + } + if matches!( + children[0].kind(), + "binary_expression" + | "function_call" + | "dot_index_expression" + | "function_definition" + | "string" + | "table_constructor" + ) && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + } + + if node.kind() == "field" && children.len() == 1 { + if children[0].kind() == "identifier" + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Drop; + } + if matches!(children[0].kind(), "string" | "function_call") + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + } + + if node.kind() == "block" + && children.len() == 1 + && matches!( + children[0].kind(), + "function_call" | "return_statement" | "variable_declaration" + ) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + + NamedChildrenAction::Default + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + (matches!(node.kind(), "unary" | "unary_expression") + && node_text(node, source).trim_start().starts_with('-')) + || (node.kind() == "expression_list" + && node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, source) == "-") + .unwrap_or(false) + && named_children(node).len() == 1) + } + + fn binary_operator(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if let Some(operator) = direct_binary_operator(node, source) { + return Some(operator.to_string()); + } + + let child = self.exact_single_named_child(node, self.binary_wrapper_kinds(), source)?; + self.binary_operator(child, source) + } + + fn unwrap_node( + &self, + node: TreeSitterNode<'_>, + source: &str, + named_child_count: usize, + ) -> bool { + if matches!( + node.kind(), + "parenthesized_expression" + | "parenthesized_statements" + | "expression_statement" + | "statement" + | "case_pattern" + | "match_pattern" + | "pattern" + ) && named_child_count == 1 + { + return true; + } + + if node.kind() != "expression_list" || named_child_count != 1 { + return false; + } + + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "parenthesized_expression" + && node_text(raw_named[0], source) == node_text(node, source) + { + return true; + } + + let raw_children = node.children(&mut node.walk()).collect::>(); + raw_children + .first() + .map(|child| node_text(*child, source) == "(") + .unwrap_or(false) + && raw_children + .last() + .map(|child| node_text(*child, source) == ")") + .unwrap_or(false) + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.leading_function_statement_with_keyword( + node, + source, + "function", + LUA_LEADING_FUNCTION_WRAPPER_KINDS, + ) + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !LUA_LEADING_FUNCTION_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "function") + .unwrap_or(false) + { + return Some(node); + } + self.exact_single_named_child(node, &["function_declaration"], source) + } + + fn leading_function_body_kind(&self) -> &'static str { + "block" + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if LUA_LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + if let Some(child) = self.exact_single_named_child(node, &["if_statement"], source) { + return Some(child); + } + } + if super::super::LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn array_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if let Some(target) = lua_positional_table_target(node, source) { + return Some(target); + } + + if super::super::ARRAY_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !super::super::ARRAY_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if bracketed(node, source, "[", "]") { + return Some(node); + } + let named = named_children(node); + let child = *named.first()?; + if named.len() == 1 { + if super::super::ARRAY_LITERAL_NODE_KINDS.contains(&child.kind()) { + return Some(child); + } + if matches!(child.kind(), "expression_statement" | "statement") + && node_text(child, source).trim() == node_text(node, source).trim() + { + return self.array_literal_target(child, source); + } + } + None + } + + fn array_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.array_literal_target(node, source).unwrap_or(node); + if target.kind() == "arguments" { + if let Some(table) = named_children(target) + .into_iter() + .find(|child| child.kind() == "table_constructor") + { + if node_text(target, source).trim() == node_text(table, source).trim() { + return named_children(table); + } + } + } + if target.kind() == "table_constructor" { + return named_children(target); + } + + named_children(target) + } + + fn hash_literal_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if let Some(target) = lua_keyed_table_target(node, source) { + return Some(target); + } + + if super::super::HASH_LITERAL_NODE_KINDS.contains(&node.kind()) { + return Some(node); + } + if !super::super::HASH_LITERAL_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + None + } + + fn hash_literal_values<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.hash_literal_target(node, source).unwrap_or(node); + if target.kind() == "arguments" { + if let Some(table) = named_children(target) + .into_iter() + .find(|child| child.kind() == "table_constructor") + { + return named_children(table); + } + return named_children(target); + } + if target.kind() == "table_constructor" { + return named_children(target); + } + + named_children(target) + } + + fn member_assignment_target(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() != "variable_list" { + return false; + } + + let raw_named = raw_named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "dot_index_expression" + && node_text(node, source) == node_text(raw_named[0], source) + { + raw_named[0] + } else { + node + }; + + raw_named_children(target).len() == 2 + && target + .children(&mut target.walk()) + .any(|child| !child.is_named() && node_text(child, source) == ".") + } + + fn literal_fragment_assignment_context(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if matches!( + parent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) { + return true; + } + matches!( + node.kind(), + "string_content" | "escape_sequence" | "interpolation" | "string_fragment" + ) && (parent.kind() == "expression_list" + || parent + .parent() + .map(|grandparent| { + matches!( + grandparent.kind(), + "string" | "delimited_symbol" | "regex" | "regex_literal" + ) + }) + .unwrap_or(false)) + && !source.is_empty() + } + + fn lambda_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "function_definition" { + return Some(node); + } + + if node.kind() == "expression_list" { + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "function") + .unwrap_or(false) + && named_children(node) + .iter() + .any(|child| child.kind() == "block") + { + return Some(node); + } + + let named = named_children(node); + if named.len() == 1 + && named[0].kind() == "function_definition" + && node_text(named[0], source) == node_text(node, source) + { + return Some(named[0]); + } + } + + if node.kind() == "lambda" { + Some(node) + } else { + None + } + } + + fn operator_call_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "binary" | "binary_expression" | "expression_list" + ) + } + + fn boolean_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + super::super::BOOLEAN_EXPRESSION_KINDS.contains(&node.kind()) + || node.kind() == "expression_list" + } + + fn comparison_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + super::super::COMPARISON_EXPRESSION_KINDS.contains(&node.kind()) + || node.kind() == "expression_list" + } + + fn single_assignment_block_child(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + lua_single_assignment_block_child(node, source) + } + + fn member_read_excluded(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "field" + } + + fn no_paren_string_argument_content<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() != "string" { + return None; + } + let parent = node.parent()?; + if parent.kind() != "arguments" || node_text(parent, source) != node_text(node, source) { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "string_content") + } + + fn assignment_operators(&self) -> &'static [&'static str] { + LUA_ASSIGNMENT_OPERATORS + } +} + +fn lua_single_assignment_block_child(node: TreeSitterNode<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "assignment_statement" { + return false; + } + let Some(grandparent) = parent.parent() else { + return false; + }; + grandparent.kind() == "block" + && node_text(grandparent, source) == node_text(parent, source) + && raw_named_children(grandparent).len() == 1 +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs new file mode 100644 index 000000000..90d7a0a69 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/mod.rs @@ -0,0 +1,71 @@ +pub(crate) mod base; +mod c; +mod cpp; +mod csharp; +mod go; +mod java; +mod javascript; +mod kotlin; +mod lua; +mod php; +mod python; +mod ruby; +mod rust; +mod swift; +mod typescript; +mod zig; + +pub(crate) use base::{AstNormalizationAdapter, NamedChildrenAction}; + +use super::super::syntax::Language; +use c::CAstAdapter; +use cpp::CppAstAdapter; +use csharp::CSharpAstAdapter; +use go::GoAstAdapter; +use java::JavaAstAdapter; +use javascript::JavaScriptAstAdapter; +use kotlin::KotlinAstAdapter; +use lua::LuaAstAdapter; +use php::PhpAstAdapter; +use python::PythonAstAdapter; +use ruby::RubyAstAdapter; +use rust::RustAstAdapter; +use swift::SwiftAstAdapter; +use typescript::TypeScriptAstAdapter; +use zig::ZigAstAdapter; + +static RUBY: RubyAstAdapter = RubyAstAdapter; +static PYTHON: PythonAstAdapter = PythonAstAdapter; +static JAVASCRIPT: JavaScriptAstAdapter = JavaScriptAstAdapter; +static TYPESCRIPT: TypeScriptAstAdapter = TypeScriptAstAdapter; +static LUA: LuaAstAdapter = LuaAstAdapter; +static C: CAstAdapter = CAstAdapter; +static CPP: CppAstAdapter = CppAstAdapter; +static CSHARP: CSharpAstAdapter = CSharpAstAdapter; +static GO: GoAstAdapter = GoAstAdapter; +static JAVA: JavaAstAdapter = JavaAstAdapter; +static KOTLIN: KotlinAstAdapter = KotlinAstAdapter; +static RUST: RustAstAdapter = RustAstAdapter; +static SWIFT: SwiftAstAdapter = SwiftAstAdapter; +static ZIG: ZigAstAdapter = ZigAstAdapter; +static PHP: PhpAstAdapter = PhpAstAdapter; + +pub(crate) fn normalization_adapter(language: Language) -> &'static dyn AstNormalizationAdapter { + match language { + Language::Ruby => &RUBY, + Language::Python => &PYTHON, + Language::JavaScript => &JAVASCRIPT, + Language::TypeScript => &TYPESCRIPT, + Language::Lua => &LUA, + Language::C => &C, + Language::Cpp => &CPP, + Language::CSharp => &CSHARP, + Language::Go => &GO, + Language::Java => &JAVA, + Language::Kotlin => &KOTLIN, + Language::Rust => &RUST, + Language::Swift => &SWIFT, + Language::Zig => &ZIG, + Language::Php => &PHP, + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/php.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/php.rs new file mode 100644 index 000000000..63758c9e2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/php.rs @@ -0,0 +1,181 @@ +use super::super::node_text; +use super::base::{AstNormalizationAdapter, COMMON_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct PhpAstAdapter; + +impl AstNormalizationAdapter for PhpAstAdapter { + fn local_identifier_text(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if !matches!(node.kind(), "name" | "qualified_name" | "variable_name") { + return None; + } + let text = php_identifier_text(node_text(node, source)); + if matches!(node.kind(), "name" | "qualified_name") && php_constant_identifier(&text) { + return None; + } + (!text.is_empty()).then_some(text) + } + + fn constant_identifier_text(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if !matches!(node.kind(), "name" | "qualified_name") { + return None; + } + let text = php_identifier_text(node_text(node, source)); + php_constant_identifier(&text).then_some(text) + } + + fn self_identifier(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "variable_name" && php_identifier_text(node_text(node, source)) == "this" + } + + fn call_node(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + matches!( + node.kind(), + "function_call_expression" + | "member_call_expression" + | "scoped_call_expression" + | "print_intrinsic" + ) + } + + fn intrinsic_call_name(&self, node: TreeSitterNode<'_>, _source: &str) -> Option<&'static str> { + (node.kind() == "print_intrinsic").then_some("print") + } + + fn block_node_kind(&self, kind: &str) -> bool { + matches!(kind, "compound_statement" | "declaration_list") + } + + fn loop_node_type(&self, kind: &str) -> Option<&'static str> { + (kind == "foreach_statement").then_some("FOR") + } + + fn member_access_operator(&self, text: &str) -> bool { + matches!(text, "." | "&." | "->" | "::") + } + + fn source_text(&self, text: &str) -> String { + php_normalize_source(text) + } + + fn state_field_name(&self, node: TreeSitterNode<'_>, source: &str) -> Option { + if node.kind() != "member_access_expression" { + return None; + } + let receiver = php_member_receiver(node)?; + if !matches!( + php_identifier_text(node_text(receiver, source)).as_str(), + "this" | "self" + ) { + return None; + } + let field = php_member_name(node)?; + let field = php_identifier_text(node_text(field, source)); + (!field.is_empty()).then_some(field) + } + + fn class_node(&self, node: TreeSitterNode<'_>) -> bool { + node.kind() == "class_declaration" + } + + fn member_assignment_target(&self, node: TreeSitterNode<'_>, _source: &str) -> bool { + node.kind() == "member_access_expression" + } + + fn member_read_excluded(&self, node: TreeSitterNode<'_>) -> bool { + node.parent() + .map(|parent| { + matches!( + parent.kind(), + "member_call_expression" | "scoped_call_expression" + ) + }) + .unwrap_or(false) + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> super::base::NamedChildrenAction<'tree> { + if matches!(node.kind(), "compound_statement" | "declaration_list") + && children.len() == 1 + && node_text(node, source) == node_text(children[0], source) + { + return super::base::NamedChildrenAction::Recurse(children[0]); + } + + super::base::NamedChildrenAction::Default + } + + fn case_arm_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option>> { + if node.kind() != "case_statement" { + return None; + } + let mut children = php_named_children(node).into_iter(); + children.next()?; + let mut body = Vec::new(); + for child in children { + if child.kind() == "case_statement" { + break; + } + body.push(child); + } + Some(body) + } + + fn assignment_operators(&self) -> &'static [&'static str] { + COMMON_ASSIGNMENT_OPERATORS + } +} + +fn php_named_children<'tree>(node: TreeSitterNode<'tree>) -> Vec> { + let mut cursor = node.walk(); + node.named_children(&mut cursor).collect() +} + +fn php_member_receiver<'tree>(node: TreeSitterNode<'tree>) -> Option> { + node.child_by_field_name("object") + .or_else(|| node.child_by_field_name("receiver")) + .or_else(|| node.child_by_field_name("expression")) + .or_else(|| php_named_children(node).into_iter().next()) +} + +fn php_member_name<'tree>(node: TreeSitterNode<'tree>) -> Option> { + node.child_by_field_name("name") + .or_else(|| node.child_by_field_name("field")) + .or_else(|| php_named_children(node).into_iter().rev().next()) +} + +fn php_identifier_text(text: &str) -> String { + text.trim().trim_start_matches('$').to_string() +} + +fn php_constant_identifier(text: &str) -> bool { + text.chars() + .next() + .map(|ch| ch == '_' || ch.is_ascii_uppercase()) + .unwrap_or(false) +} + +fn php_normalize_source(source: &str) -> String { + let mut out = String::new(); + let mut chars = source.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '$' + && chars + .peek() + .map(|next| *next == '_' || next.is_ascii_alphabetic()) + .unwrap_or(false) + { + continue; + } + out.push(ch); + } + out.replace("->", ".").replace("::", ".") +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/python.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/python.rs new file mode 100644 index 000000000..9dfb8bee6 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/python.rs @@ -0,0 +1,557 @@ +use super::super::{ + bare_identifier_text, named_children, node_text, raw_named_children, TernaryParts, + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS, PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS, + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS, PYTHON_LEADING_IF_WRAPPER_KINDS, + PYTHON_LEADING_OWNER_WRAPPER_KINDS, +}; +use super::base::{AstNormalizationAdapter, NamedChildrenAction, PYTHON_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +const PYTHON_BODY_FIELD_KINDS: &[&str] = &[ + "elif_clause", + "else_clause", + "for_statement", + "function_definition", + "if_statement", + "try_statement", + "while_statement", + "with_statement", +]; + +pub(crate) struct PythonAstAdapter; + +impl AstNormalizationAdapter for PythonAstAdapter { + fn yield_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "expression_statement" | "statement" + ) { + return false; + } + let named = named_children(node); + named.len() == 1 + && named[0].kind() == "yield" + && node_text(named[0], source) == node_text(node, source) + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elif_clause" | "else" | "else_clause")) + } + + fn case_else_arm(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "case_clause" && self.default_case_pattern(node, source) + } + + fn named_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + node.child_by_field_name(name) + .or_else(|| self.python_body_field(node, name)) + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + if node.kind() == "with_clause" && bare_identifier_text(node_text(node, source)) { + return NamedChildrenAction::Drop; + } + + if node.kind() == "relative_import" + && children.len() == 1 + && children[0].kind() == "import_prefix" + { + return NamedChildrenAction::Drop; + } + + if node.kind() == "block" && children.len() == 1 { + let child = children[0]; + if matches!(child.kind(), "function_definition" | "decorated_definition") { + return NamedChildrenAction::Recurse(child); + } + if child.kind() == "pass_statement" && node_text(node, source).trim() == "pass" { + return NamedChildrenAction::Drop; + } + if matches!(child.kind(), "break_statement" | "continue_statement") + && bare_identifier_text(node_text(node, source).trim()) + { + return NamedChildrenAction::Drop; + } + if child.kind() == "return_statement" + && node_text(node, source) == node_text(child, source) + { + if raw_named_children(child).is_empty() { + return NamedChildrenAction::Drop; + } + return NamedChildrenAction::Recurse(child); + } + if matches!(child.kind(), "delete_statement" | "if_statement") { + return NamedChildrenAction::Recurse(child); + } + if matches!( + child.kind(), + "assert_statement" + | "for_statement" + | "import_from_statement" + | "import_statement" + | "raise_statement" + | "try_statement" + | "while_statement" + | "with_statement" + ) { + return NamedChildrenAction::Recurse(child); + } + if child.kind() == "expression_statement" { + let statement_children = raw_named_children(child); + if statement_children.len() == 1 + && statement_children[0].kind() == "identifier" + && node_text(node, source) == node_text(child, source) + { + return NamedChildrenAction::Drop; + } + if statement_children.len() == 1 && statement_children[0].kind() == "ellipsis" { + return NamedChildrenAction::Drop; + } + if statement_children.len() == 1 + && matches!( + statement_children[0].kind(), + "assignment" + | "augmented_assignment" + | "binary_operator" + | "call" + | "string" + | "subscript" + ) + { + return NamedChildrenAction::Recurse(statement_children[0]); + } + } + } + + if node.kind() == "expression_statement" && children.len() == 1 { + let child = children[0]; + if child.kind() == "identifier" { + return NamedChildrenAction::Drop; + } + if matches!( + child.kind(), + "yield" + | "binary_operator" + | "comparison_operator" + | "call" + | "attribute" + | "string" + ) { + return NamedChildrenAction::Recurse(child); + } + } + + if node.kind() == "as_pattern_target" { + return NamedChildrenAction::Drop; + } + + if matches!(node.kind(), "with_clause" | "with_item") + && children.len() == 1 + && matches!(children[0].kind(), "with_item" | "as_pattern") + { + return NamedChildrenAction::Recurse(children[0]); + } + + if node.kind() == "with_item" + && children.len() == 1 + && matches!(children[0].kind(), "call" | "attribute") + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + + if node.kind() == "type" && children.len() == 1 && children[0].kind() == "binary_operator" { + return NamedChildrenAction::Recurse(children[0]); + } + + NamedChildrenAction::Default + } + + fn nested_class_body_child<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() != "block" { + return None; + } + let raw_children = raw_named_children(node); + if raw_children.len() == 1 + && raw_children[0].kind() == "class_definition" + && node + .parent() + .map(|parent| parent.kind() == "class_definition") + .unwrap_or(false) + { + Some(raw_children[0]) + } else { + None + } + } + + fn else_if_block<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() != "else_clause" { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.leading_function_statement_with_keyword( + node, + source, + "def", + PYTHON_LEADING_FUNCTION_WRAPPER_KINDS, + ) + } + + fn leading_function_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !PYTHON_LEADING_FUNCTION_WRAPPER_KINDS.contains(&node.kind()) { + return None; + } + if node + .children(&mut node.walk()) + .next() + .map(|child| child.kind() == "def") + .unwrap_or(false) + { + return Some(node); + } + self.exact_single_named_child(node, &["function_definition"], source) + } + + fn leading_function_body_kind(&self) -> &'static str { + "block" + } + + fn leading_owner_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if PYTHON_LEADING_OWNER_WRAPPER_KINDS.contains(&node.kind()) { + let raw_named = named_children(node); + if raw_named.len() == 1 + && matches!( + raw_named[0].kind(), + "class" | "class_definition" | "class_declaration" | "module" + ) + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + return Some(node); + } + if super::super::LEADING_OWNER_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn leading_if_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if PYTHON_LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + if let Some(child) = self.exact_single_named_child(node, &["if_statement"], source) { + return Some(child); + } + } + if super::super::LEADING_IF_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" + || self.flattened_try_block(node, &["except_clause"], source) + { + return Some(node); + } + if node.kind() == "block" { + if let Some(child) = self.exact_single_named_child(node, &["try_statement"], source) { + return Some(child); + } + } + if super::super::RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn rescue_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.rescue_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" + || self.flattened_try_block(target, &["except_clause"], source) + { + return named_children(target) + .into_iter() + .take_while(|child| !matches!(child.kind(), "except_clause" | "finally_clause")) + .collect(); + } + let Some(index) = named_children(target) + .iter() + .position(|child| self.rescue_clause(*child)) + else { + return Vec::new(); + }; + named_children(target)[..index].to_vec() + } + + fn rescue_clauses<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + named_children(target) + .into_iter() + .filter(|child| child.kind() == "except_clause") + .collect() + } + + fn rescue_clause_exceptions<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Vec> { + let Some(pattern) = named_children(node) + .into_iter() + .find(|child| !matches!(child.kind(), "block" | "comment")) + else { + return Vec::new(); + }; + if pattern.kind() != "as_pattern" { + return vec![pattern]; + } + named_children(pattern) + .into_iter() + .find(|child| child.kind() != "as_pattern_target") + .into_iter() + .collect() + } + + fn rescue_clause_exceptions_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + self.rescue_clause_exceptions(node, source) + .into_iter() + .next() + } + + fn rescue_clause_exception_variable_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == "as_pattern") + .and_then(|pattern| self.descendant(pattern, &["as_pattern_target"])) + } + + fn rescue_clause_exception_variable_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.rescue_clause_exception_variable_name(node) + } + + fn rescue_clause_handler<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "block") + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" + || self.flattened_try_block(node, &["finally_clause"], source) + { + return Some(node); + } + if node.kind() == "block" { + if let Some(child) = self.exact_single_named_child(node, &["try_statement"], source) { + return Some(child); + } + } + if super::super::ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn ensure_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.ensure_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" + || self.flattened_try_block(target, &["finally_clause"], source) + { + return named_children(target) + .into_iter() + .take_while(|child| child.kind() != "finally_clause") + .collect(); + } + let named = named_children(target); + let Some(index) = named + .iter() + .position(|child| self.ensure_clause_kind(*child)) + else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn ensure_clause<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.ensure_body_target(node, source)?; + named_children(target) + .into_iter() + .find(|child| child.kind() == "finally_clause") + } + + fn ensure_clause_body<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "block") + } + + fn ternary_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if node.kind() != "conditional_expression" { + return None; + } + let named = named_children(node); + Some(TernaryParts { + condition: *named.get(1)?, + positive: vec![*named.first()?], + negative: vec![*named.get(2)?], + }) + } + + fn unary_minus_expression(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + matches!(node.kind(), "unary" | "unary_expression" | "unary_operator") + && node_text(node, source).trim_start().starts_with('-') + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + (super::super::EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node).is_empty() + && node_text(node, source).trim().is_empty()) + || node.kind() == "pass_statement" + || (node.kind() == "block" && node_text(node, source).trim() == "pass" && { + let named = named_children(node); + named.is_empty() || named.iter().all(|child| child.kind() == "pass_statement") + }) + } + + fn operator_call_expression_kind(&self, node: TreeSitterNode<'_>) -> bool { + matches!( + node.kind(), + "binary" | "binary_expression" | "binary_operator" + ) + } + + fn assignment_operators(&self) -> &'static [&'static str] { + PYTHON_ASSIGNMENT_OPERATORS + } + + fn concatenated_string_wrapper_kinds(&self) -> &'static [&'static str] { + PYTHON_CONCATENATED_STRING_WRAPPER_KINDS + } + + fn dotted_expression_wrapper_kinds(&self) -> &'static [&'static str] { + PYTHON_DOTTED_EXPRESSION_WRAPPER_KINDS + } +} + +impl PythonAstAdapter { + fn flattened_try_block( + &self, + node: TreeSitterNode<'_>, + clauses: &[&str], + source: &str, + ) -> bool { + node.kind() == "block" + && node + .children(&mut node.walk()) + .next() + .map(|child| node_text(child, source) == "try") + .unwrap_or(false) + && named_children(node) + .iter() + .any(|child| clauses.contains(&child.kind())) + } + + fn python_body_field<'tree>( + &self, + node: TreeSitterNode<'tree>, + name: &str, + ) -> Option> { + if !matches!(name, "body" | "consequence") + || !PYTHON_BODY_FIELD_KINDS.contains(&node.kind()) + { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| child.kind() == "block") + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs new file mode 100644 index 000000000..362df5201 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/ruby.rs @@ -0,0 +1,267 @@ +use super::super::{ + heredoc_marker_text, named_children, node_text, raw_named_children, ruby_variable_name_text, + CASE_ARGUMENT_WHEN_KINDS, INTERPOLATED_STATEMENT_WRAPPER_KINDS, LEADING_FUNCTION_WRAPPER_KINDS, +}; +use super::base::{AstNormalizationAdapter, NamedChildrenAction, RUBY_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct RubyAstAdapter; + +impl AstNormalizationAdapter for RubyAstAdapter { + fn ruby(&self) -> bool { + true + } + + fn yield_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "statement" + ) { + return false; + } + let named = named_children(node); + named.len() == 1 + && named[0].kind() == "yield" + && node_text(named[0], source) == node_text(node, source) + } + + fn super_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if !matches!( + node.kind(), + "body_statement" | "block" | "block_body" | "call" | "statement" + ) { + return false; + } + if node_text(node, source).trim() == "super" { + return true; + } + let raw = raw_named_children(node); + let named = if raw.len() == 1 && raw[0].kind() == "call" { + raw_named_children(raw[0]) + } else { + raw + }; + named + .first() + .map(|child| child.kind() == "super") + .unwrap_or(false) + && named + .iter() + .skip(1) + .all(|child| child.kind() == "argument_list") + } + + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "elsif" | "else")) + } + + fn instance_variable(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "instance_variable" + || node_text(node, source) + .strip_prefix('@') + .map(ruby_variable_name_text) + .unwrap_or(false) + } + + fn global_variable(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.kind() == "global_variable" + || node_text(node, source) + .strip_prefix('$') + .map(ruby_variable_name_text) + .unwrap_or(false) + } + + fn case_argument_list(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() != "argument_list" { + return false; + } + let raw_named = named_children(node); + let target = if raw_named.len() == 1 + && raw_named[0].kind() == "case" + && node_text(raw_named[0], source) == node_text(node, source) + { + raw_named[0] + } else { + node + }; + let has_case_keyword = target + .children(&mut target.walk()) + .any(|child| !child.is_named() && child.kind() == "case"); + has_case_keyword + && named_children(target) + .iter() + .any(|child| CASE_ARGUMENT_WHEN_KINDS.contains(&child.kind())) + } + + fn safe_navigation_call(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, source) == "&.") + } + + fn leading_function_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + self.leading_function_statement_with_keyword( + node, + source, + "def", + LEADING_FUNCTION_WRAPPER_KINDS, + ) + } + + fn zero_child_identifier_call(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() != "call" || !ruby_variable_name_text(node_text(node, source)) { + return false; + } + let named = named_children(node); + named.is_empty() + || (named.len() == 1 + && super::super::identifier_kind_name(named[0].kind()) + && node_text(named[0], source) == node_text(node, source)) + } + + fn heredoc_call_for_body(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + if node.kind() == "heredoc_beginning" { + return true; + } + if matches!(node.kind(), "call" | "argument_list") + && heredoc_marker_text(node_text(node, source)) + { + return true; + } + + named_children(node).into_iter().any(|child| { + if named_children(child) + .into_iter() + .any(|grandchild| grandchild.kind() == "heredoc_body") + { + return false; + } + + self.heredoc_call_for_body(child, source) + }) + } + + fn named_children_action<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> NamedChildrenAction<'tree> { + if INTERPOLATED_STATEMENT_WRAPPER_KINDS.contains(&node.kind()) + && children.len() == 1 + && children[0].kind() == "string" + && node_text(node, source) == node_text(children[0], source) + { + let string_children = raw_named_children(children[0]); + if string_children + .iter() + .any(|child| child.kind() == "interpolation") + { + return NamedChildrenAction::Replace(string_children); + } + } + + if matches!(node.kind(), "body_statement" | "block_body" | "statement") + && children.len() == 1 + && matches!( + children[0].kind(), + "if_modifier" | "unless_modifier" | "while_modifier" | "until_modifier" | "yield" + ) + && node_text(node, source) == node_text(children[0], source) + { + return NamedChildrenAction::Recurse(children[0]); + } + + NamedChildrenAction::Default + } + + fn logical_operator_assignment(&self, operator: &str) -> bool { + matches!(operator, "||" | "&&") + } + + fn statement_wrapped_call_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if !matches!( + node.kind(), + "body_statement" | "block_body" | "statement" | "argument_list" + ) { + return None; + } + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "call" + && node_text(node, source) == node_text(raw_named[0], source) + { + Some(raw_named[0]) + } else { + None + } + } + + fn inline_def_function_text_source<'tree>( + &self, + function: TreeSitterNode<'tree>, + _source: &str, + ) -> TreeSitterNode<'tree> { + if function.kind() == "call" { + return named_children(function) + .into_iter() + .next() + .unwrap_or(function); + } + function + } + + fn bare_const_call_function(&self, function: TreeSitterNode<'_>) -> bool { + matches!( + function.kind(), + "constant" | "scope_resolution" | "type_identifier" | "scoped_type_identifier" + ) + } + + fn normalize_default_parameters(&self) -> bool { + true + } + + fn normalize_block_parameters(&self) -> bool { + true + } + + fn boolean_statement_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + children: &[TreeSitterNode<'tree>], + ) -> TreeSitterNode<'tree> { + if children.len() == 1 + && matches!( + children[0].kind(), + "binary" | "binary_expression" | "binary_operator" | "boolean_operator" + ) + && node_text(node, source) == node_text(children[0], source) + { + children[0] + } else { + node + } + } + + fn elides_tail_returns(&self) -> bool { + true + } + + fn elides_implicit_nil_body(&self) -> bool { + true + } + + fn assignment_operators(&self) -> &'static [&'static str] { + RUBY_ASSIGNMENT_OPERATORS + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/rust.rs new file mode 100644 index 000000000..e7931107f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/rust.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct RustAstAdapter; + +impl AstNormalizationAdapter for RustAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/swift.rs new file mode 100644 index 000000000..f042d102a --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/swift.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct SwiftAstAdapter; + +impl AstNormalizationAdapter for SwiftAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs new file mode 100644 index 000000000..4354e2bf2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/typescript.rs @@ -0,0 +1,247 @@ +use super::super::{ + named_children, node_text, question_colon_ternary_parts, raw_named_children, TernaryParts, + TYPESCRIPT_TERNARY_KINDS, +}; +use super::base::{AstNormalizationAdapter, TYPESCRIPT_ASSIGNMENT_OPERATORS}; +use tree_sitter::Node as TreeSitterNode; + +pub(crate) struct TypeScriptAstAdapter; + +impl AstNormalizationAdapter for TypeScriptAstAdapter { + fn explicit_alternative<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "else" | "else_clause")) + } + + fn safe_navigation_call(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + node.children(&mut node.walk()) + .any(|child| !child.is_named() && node_text(child, source) == "&.") + || node + .children(&mut node.walk()) + .any(|child| child.kind() == "optional_chain" && node_text(child, source) == "?.") + || (node.kind() == "call_expression" + && named_children(node) + .into_iter() + .any(|child| self.safe_navigation_call(child, source))) + } + + fn ternary_parts<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + question_colon_ternary_parts(node, source, TYPESCRIPT_TERNARY_KINDS) + } + + fn interpolated_string( + &self, + node: TreeSitterNode<'_>, + children: &[TreeSitterNode<'_>], + ) -> bool { + (node.kind() == "string" && children.iter().any(|child| child.kind() == "interpolation")) + || (node.kind() == "template_string" + && children + .iter() + .any(|child| child.kind() == "template_substitution")) + } + + fn lambda_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + if matches!( + node.kind(), + "arrow_function" | "function_expression" | "lambda" + ) { + Some(node) + } else { + None + } + } + + fn interpolation_node(&self, node: TreeSitterNode<'_>) -> bool { + matches!(node.kind(), "interpolation" | "template_substitution") + } + + fn rescue_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "statement_block" { + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } + if super::super::RESCUE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn rescue_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.rescue_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" { + return named_children(target) + .into_iter() + .take_while(|child| !matches!(child.kind(), "catch_clause" | "finally_clause")) + .collect(); + } + let named = named_children(target); + let Some(index) = named.iter().position(|child| self.rescue_clause(*child)) else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn rescue_clauses<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let Some(target) = self.rescue_body_target(node, source) else { + return Vec::new(); + }; + named_children(target) + .into_iter() + .filter(|child| child.kind() == "catch_clause") + .collect() + } + + fn rescue_clause_exceptions<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Vec> { + Vec::new() + } + + fn rescue_clause_exceptions_source<'tree>( + &self, + _node: TreeSitterNode<'tree>, + _source: &str, + ) -> Option> { + None + } + + fn rescue_clause_exception_variable_name<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .find(|child| super::super::identifier_kind_name(child.kind())) + } + + fn rescue_clause_exception_variable_source<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + self.rescue_clause_exception_variable_name(node) + } + + fn rescue_clause_handler<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "statement_block") + } + + fn ensure_body_target<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + if node.kind() == "try_statement" { + return Some(node); + } + if node.kind() == "statement_block" { + let raw_named = raw_named_children(node); + if raw_named.len() == 1 + && raw_named[0].kind() == "try_statement" + && node_text(raw_named[0], source) == node_text(node, source) + { + return Some(raw_named[0]); + } + } + if super::super::ENSURE_BODY_WRAPPER_KINDS.contains(&node.kind()) { + return Some(node); + } + None + } + + fn ensure_body_nodes<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Vec> { + let target = self.ensure_body_target(node, source).unwrap_or(node); + if target.kind() == "try_statement" { + return named_children(target) + .into_iter() + .take_while(|child| child.kind() != "finally_clause") + .collect(); + } + let named = named_children(target); + let Some(index) = named + .iter() + .position(|child| self.ensure_clause_kind(*child)) + else { + return Vec::new(); + }; + named[..index].to_vec() + } + + fn ensure_clause<'tree>( + &self, + node: TreeSitterNode<'tree>, + source: &str, + ) -> Option> { + let target = self.ensure_body_target(node, source)?; + named_children(target) + .into_iter() + .find(|child| child.kind() == "finally_clause") + } + + fn ensure_clause_body<'tree>( + &self, + node: TreeSitterNode<'tree>, + ) -> Option> { + named_children(node) + .into_iter() + .rev() + .find(|child| child.kind() == "statement_block") + } + + fn empty_body_statement(&self, node: TreeSitterNode<'_>, source: &str) -> bool { + (super::super::EMPTY_BODY_WRAPPER_KINDS.contains(&node.kind()) + && named_children(node).is_empty() + && node_text(node, source).trim().is_empty()) + || (node.kind() == "statement_block" + && named_children(node).is_empty() + && node_text(node, source).trim() == "{}") + } + + fn assignment_operators(&self) -> &'static [&'static str] { + TYPESCRIPT_ASSIGNMENT_OPERATORS + } +} diff --git a/gems/decomplex/rust/src/decomplex/ast/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/ast/adapters/zig.rs new file mode 100644 index 000000000..5e24d2cb2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/ast/adapters/zig.rs @@ -0,0 +1,5 @@ +use super::base::AstNormalizationAdapter; + +pub(crate) struct ZigAstAdapter; + +impl AstNormalizationAdapter for ZigAstAdapter {} diff --git a/gems/decomplex/rust/src/decomplex/convergence.rs b/gems/decomplex/rust/src/decomplex/convergence.rs new file mode 100644 index 000000000..93768835c --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/convergence.rs @@ -0,0 +1,208 @@ +use crate::decomplex::report::ReportSection; +use crate::decomplex::report_value as rv; +use serde::Serialize; +use serde_json::Value; +use std::collections::{BTreeMap, HashMap}; + +pub const TIER_WEIGHT: &[(i64, i64)] = &[(1, 3), (2, 2), (3, 1)]; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct Unit { + pub file: String, + pub method: String, + pub detectors: Vec, + pub n_detectors: usize, + pub score: i64, + pub findings: usize, + pub at: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FileRollup { + pub file: String, + pub detectors: Vec, + pub n_detectors: usize, + pub methods: usize, + pub score: i64, +} + +#[derive(Clone, Debug)] +struct Accumulator { + dets: BTreeMap, + tiers: BTreeMap, + findings: usize, + at: Option, +} + +pub fn rollup(sections: &[ReportSection], min_detectors: usize) -> Vec { + let mut acc: HashMap<(String, String), Accumulator> = HashMap::new(); + for section in sections { + for finding in §ion.findings { + for loc in locations(finding) { + let (Some(file), Some(method), line) = parse_loc(&loc) else { + continue; + }; + if file.is_empty() || method.is_empty() { + continue; + } + let unit = acc + .entry((file.clone(), method.clone())) + .or_insert_with(|| Accumulator { + dets: BTreeMap::new(), + tiers: BTreeMap::new(), + findings: 0, + at: None, + }); + *unit.dets.entry(section.title.clone()).or_insert(0) += 1; + unit.tiers.insert(section.title.clone(), section.tier); + unit.findings += 1; + if unit.at.is_none() { + unit.at = Some(match line { + Some(line) => format!("{file}:{method}:{line}"), + None => format!("{file}:{method}"), + }); + } + } + } + } + + let mut units = acc + .into_iter() + .filter_map(|((file, method), data)| { + if data.dets.len() < min_detectors { + return None; + } + let detectors = data.dets.keys().cloned().collect::>(); + let score = data.tiers.values().map(|tier| tier_weight(*tier)).sum(); + Some(Unit { + file, + method, + n_detectors: detectors.len(), + detectors, + score, + findings: data.findings, + at: data.at.unwrap_or_default(), + }) + }) + .collect::>(); + units.sort_by(|left, right| { + right + .n_detectors + .cmp(&left.n_detectors) + .then_with(|| right.score.cmp(&left.score)) + .then_with(|| right.findings.cmp(&left.findings)) + .then_with(|| left.file.cmp(&right.file)) + .then_with(|| left.method.cmp(&right.method)) + }); + units +} + +pub fn by_file(units: &[Unit]) -> Vec { + let mut grouped: BTreeMap> = BTreeMap::new(); + for unit in units { + grouped.entry(unit.file.clone()).or_default().push(unit); + } + + let mut rows = grouped + .into_iter() + .filter_map(|(file, units)| { + let mut detectors = units + .iter() + .flat_map(|unit| unit.detectors.iter().cloned()) + .collect::>(); + detectors.sort(); + detectors.dedup(); + if detectors.len() < 2 { + return None; + } + let score = units.iter().map(|unit| unit.score).sum(); + Some(FileRollup { + file, + n_detectors: detectors.len(), + detectors, + methods: units.len(), + score, + }) + }) + .collect::>(); + rows.sort_by(|left, right| { + right + .n_detectors + .cmp(&left.n_detectors) + .then_with(|| right.score.cmp(&left.score)) + .then_with(|| right.methods.cmp(&left.methods)) + .then_with(|| left.file.cmp(&right.file)) + }); + rows +} + +pub fn locations(finding: &Value) -> Vec { + let mut out = Vec::new(); + for key in ["at", "ref_at"] { + if let Some(Value::String(text)) = rv::get(finding, key) { + out.push(text.clone()); + } + } + if let Some(Value::Array(sites)) = rv::get(finding, "sites") { + out.extend( + sites + .iter() + .filter_map(|site| site.as_str().map(ToOwned::to_owned)), + ); + } + out +} + +pub fn parse_loc(loc: &str) -> (Option, Option, Option) { + let mut parts = loc.split(':').map(ToOwned::to_owned).collect::>(); + if parts.len() < 2 { + return (None, None, None); + } + let line = if parts + .last() + .is_some_and(|part| part.chars().all(|ch| ch.is_ascii_digit())) + { + parts.pop() + } else { + None + }; + let method = parts.pop(); + let file = Some(parts.join(":")); + (file, method, line) +} + +pub fn tier_weight(tier: i64) -> i64 { + TIER_WEIGHT + .iter() + .find_map(|(key, value)| (*key == tier).then_some(*value)) + .unwrap_or(1) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn parse_loc_splits_from_the_right() { + assert_eq!( + parse_loc("dir:a.rb:method:42"), + ( + Some("dir:a.rb".to_string()), + Some("method".to_string()), + Some("42".to_string()) + ) + ); + } + + #[test] + fn rollup_requires_distinct_detectors() { + let sections = vec![ + ReportSection::new("A", 1, "", vec![json!({"at": "a.rb:m:1"})]), + ReportSection::new("B", 2, "", vec![json!({"at": "a.rb:m:2"})]), + ]; + let rows = rollup(§ions, 2); + assert_eq!(rows.len(), 1); + assert_eq!(rows[0].score, 5); + } +} diff --git a/gems/decomplex/rust/src/decomplex/delta.rs b/gems/decomplex/rust/src/decomplex/delta.rs new file mode 100644 index 000000000..22144b872 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/delta.rs @@ -0,0 +1,94 @@ +use crate::decomplex::report::ReportSection; +use crate::decomplex::root_cause::{self, Cluster}; +use serde_json::{json, Map, Value}; +use std::collections::BTreeMap; + +const SEP: &str = "\t"; + +pub fn snapshot(sections: &[ReportSection], clusters: &[Cluster]) -> Value { + let mut findings: BTreeMap = BTreeMap::new(); + let mut details: BTreeMap> = BTreeMap::new(); + for section in sections { + for finding in §ion.findings { + let fp = fingerprint(§ion.title, finding); + *findings.entry(fp.clone()).or_insert(0) += 1; + details + .entry(fp) + .or_default() + .push(json_safe_finding(§ion.title, finding)); + } + } + + let mut site: BTreeMap = BTreeMap::new(); + let mut site_details: BTreeMap> = BTreeMap::new(); + for section in sections { + for finding in §ion.findings { + let detail = json_safe_finding(§ion.title, finding); + for sfp in site_fingerprints(§ion.title, finding) { + *site.entry(sfp.clone()).or_insert(0) += 1; + site_details.entry(sfp).or_default().push(detail.clone()); + } + } + } + + let mut cluster_values = Map::new(); + for cluster in clusters { + cluster_values.insert( + format!("{}{}{}", cluster.kind, SEP, cluster.token), + json!({ + "n": cluster.n_detectors, + "s": cluster.support, + "fat": cluster.fat_union, + }), + ); + } + let total = findings.values().sum::(); + json!({ + "findings": findings, + "site_findings": site, + "details": details, + "site_details": site_details, + "clusters": cluster_values, + "total": total, + }) +} + +pub fn fingerprint(detector: &str, finding: &Value) -> String { + let mut entities = root_cause::entities(finding) + .into_iter() + .map(|entity| format!("{}:{}", entity.kind, entity.token)) + .collect::>(); + entities.sort(); + let mut units = root_cause::finding_units(finding) + .into_iter() + .map(|(file, method)| format!("{file}#{method}")) + .collect::>(); + units.sort(); + units.dedup(); + [detector.to_string(), entities.join(","), units.join(",")].join(SEP) +} + +pub fn site_fingerprints(detector: &str, finding: &Value) -> Vec { + let mut entities = root_cause::entities(finding) + .into_iter() + .map(|entity| format!("{}:{}", entity.kind, entity.token)) + .collect::>(); + entities.sort(); + let entity_text = entities.join(","); + let mut units = root_cause::finding_units(finding) + .into_iter() + .map(|(file, method)| format!("{file}#{method}")) + .collect::>(); + units.sort(); + units.dedup(); + units + .into_iter() + .map(|unit| [detector.to_string(), entity_text.clone(), unit].join(SEP)) + .collect() +} + +pub fn json_safe_finding(detector: &str, finding: &Value) -> Value { + let mut object = finding.as_object().cloned().unwrap_or_default(); + object.insert("detector".to_string(), Value::String(detector.to_string())); + Value::Object(object) +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/co_update.rs b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs new file mode 100644 index 000000000..8b0527e9d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/co_update.rs @@ -0,0 +1,192 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language, StateWrite}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct CoUpdateReport { + pub co_written_pairs: Vec, + pub neglected_updates: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct CoWrittenPair { + pub pair: Vec, + pub support: usize, + pub sites: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct NeglectedUpdate { + pub pair: Vec, + pub support: usize, + pub has: String, + pub missing: String, + pub at: String, + pub spans: BTreeMap, + pub recv: String, +} + +#[derive(Clone, Debug)] +struct Write { + attr: String, + recv: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> CoUpdateReport { + let mut writes = Vec::new(); + for doc in documents { + for w in &doc.state_writes { + writes.push(write_from_state_write(w)); + } + } + let report = Report::new(writes); + CoUpdateReport { + co_written_pairs: report.co_written_pairs(3), + neglected_updates: report.neglected_updates(3), + } +} + +pub fn state_writes_for_documents(documents: &[Document]) -> Vec { + documents + .iter() + .flat_map(|document| document.state_writes.clone()) + .collect() +} + +pub fn state_writes_for_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(state_writes_for_documents(&documents)) +} + +fn write_from_state_write(w: &StateWrite) -> Write { + Write { + attr: w.field.clone(), + recv: w.receiver.clone(), + file: w.file.clone(), + defn: w.function.clone(), + line: w.line, + span: w.span, + } +} + +struct Report { + #[allow(dead_code)] + writes: Vec, + by_unit: Vec<((String, String), Vec)>, +} + +impl Report { + fn new(writes: Vec) -> Self { + let mut keys = Vec::new(); + let mut map: BTreeMap<(String, String), Vec> = BTreeMap::new(); + for w in &writes { + let key = (w.file.clone(), w.defn.clone()); + if !map.contains_key(&key) { + keys.push(key.clone()); + } + map.entry(key).or_default().push(w.clone()); + } + let by_unit = keys + .into_iter() + .map(|k| { + let v = map.remove(&k).unwrap(); + (k, v) + }) + .collect(); + Self { writes, by_unit } + } + + fn co_written_pairs(&self, min_support: usize) -> Vec { + let mut keys = Vec::new(); + let mut counts: BTreeMap, Vec<(String, String)>> = BTreeMap::new(); + for (unit, ws) in &self.by_unit { + let mut attrs: Vec<_> = ws + .iter() + .map(|w| w.attr.clone()) + .collect::>() + .into_iter() + .collect(); + attrs.sort(); + + for i in 0..attrs.len() { + for j in i + 1..attrs.len() { + let pair = vec![attrs[i].clone(), attrs[j].clone()]; + if !counts.contains_key(&pair) { + keys.push(pair.clone()); + } + counts.entry(pair).or_default().push(unit.clone()); + } + } + } + + let mut out = Vec::new(); + for pair in keys { + let units = counts.remove(&pair).unwrap(); + if units.len() < min_support { + continue; + } + out.push(CoWrittenPair { + pair, + support: units.len(), + sites: units + .into_iter() + .map(|(f, d)| format!("{}:{}", f, d)) + .collect(), + }); + } + out.sort_by(|a, b| b.support.cmp(&a.support)); + out + } + + fn neglected_updates(&self, min_support: usize) -> Vec { + let pairs = self.co_written_pairs(min_support); + let mut out = Vec::new(); + + for ((file, defn), ws) in &self.by_unit { + let attrs: BTreeSet<_> = ws.iter().map(|w| w.attr.clone()).collect(); + for p in &pairs { + let a = &p.pair[0]; + let b = &p.pair[1]; + + let (has, miss) = if attrs.contains(a) && !attrs.contains(b) { + (Some(a), Some(b)) + } else if attrs.contains(b) && !attrs.contains(a) { + (Some(b), Some(a)) + } else { + (None, None) + }; + + if let (Some(has), Some(miss)) = (has, miss) { + if let Some(w) = ws.iter().find(|x| &x.attr == has) { + let loc = format!("{}:{}:{}", file, defn, w.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), w.span); + out.push(NeglectedUpdate { + pair: p.pair.clone(), + support: p.support, + has: has.clone(), + missing: miss.clone(), + at: loc, + spans, + recv: w.recv.clone(), + }); + } + } + } + } + out.sort_by(|a, b| b.support.cmp(&a.support)); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs new file mode 100644 index 000000000..aeaff8452 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/decision_pressure.rs @@ -0,0 +1,312 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow::{self, MethodSummary}; +use crate::decomplex::syntax::{self, CallSite, Document, Language}; +use anyhow::Result; +use regex::Regex; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; +use std::sync::OnceLock; + +const GUARD_MIDS: &[&str] = &[ + "is_a?", + "kind_of?", + "instance_of?", + "nil?", + "respond_to?", + "is_none", + "is_some", + "is_null", + "isNull", +]; +const TRANSIENT_NOARG_MIDS: &[&str] = &["pop", "shift"]; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct DecisionPressureRow { + pub contract: String, + pub decisions: usize, + pub essential: usize, + pub methods: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Hit { + contract: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_documents_with_summaries(documents, local_flow::scan_documents(documents)) +} + +pub fn scan_documents_with_summaries( + documents: &[Document], + methods: Vec, +) -> Vec { + let mut guard = Vec::new(); + let mut dispatch = Vec::new(); + let assignment_maps = build_assignment_maps(&methods); + let methods_by_file = methods_by_file(&methods); + + for document in documents { + for call in &document.call_sites { + if call.receiver.is_empty() { + continue; + } + let empty = BTreeMap::new(); + let assignment_map = assignment_maps + .get(&(call.file.clone(), call.function.clone())) + .unwrap_or(&empty); + if eliminable_guard(call) { + if let Some(contract) = contract_of(&call.receiver, assignment_map, 0) { + guard.push(hit(contract, call)); + } + } else if essential_dispatch(call) { + if let Some(contract) = contract_of(&call.receiver, assignment_map, 0) { + dispatch.push(hit(contract, call)); + } + } + } + + if let Some(methods) = methods_by_file.get(&document.file) { + guard.extend(rescue_nil_hits(document, methods, &assignment_maps)); + } + } + + let mut seen = BTreeSet::new(); + guard.retain(|hit| { + seen.insert(( + hit.contract.clone(), + hit.file.clone(), + hit.defn.clone(), + hit.line, + )) + }); + + Report::new(guard, dispatch).ranked() +} + +fn eliminable_guard(call: &CallSite) -> bool { + GUARD_MIDS.contains(&call.message.as_str()) || call.safe_navigation +} + +fn essential_dispatch(call: &CallSite) -> bool { + call.message.ends_with('?') +} + +fn hit(contract: String, call: &CallSite) -> Hit { + Hit { + contract, + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, + } +} + +fn build_assignment_maps( + methods: &[MethodSummary], +) -> BTreeMap<(String, String), BTreeMap> { + methods + .iter() + .map(|method| { + ( + (method.file.clone(), method.name.clone()), + local_contract_assignments(method), + ) + }) + .collect() +} + +fn methods_by_file<'a>(methods: &'a [MethodSummary]) -> BTreeMap> { + let mut out: BTreeMap> = BTreeMap::new(); + for method in methods { + out.entry(method.file.clone()).or_default().push(method); + } + out +} + +fn local_contract_assignments(method: &MethodSummary) -> BTreeMap { + local_flow::local_contract_assignments(method) + .into_iter() + .filter_map(|(name, source)| contract_of(&source, &BTreeMap::new(), 0).map(|c| (name, c))) + .collect() +} + +fn rescue_nil_hits( + document: &Document, + methods: &[&MethodSummary], + assignment_maps: &BTreeMap<(String, String), BTreeMap>, +) -> Vec { + let mut out = Vec::new(); + for method in methods { + let empty = BTreeMap::new(); + let assignment_map = assignment_maps + .get(&(method.file.clone(), method.name.clone())) + .unwrap_or(&empty); + for statement in &method.statements { + if !statement.source.contains("rescue nil") { + continue; + } + let Some(call) = document.call_sites.iter().find(|candidate| { + candidate.function == method.name && inside_span(candidate.span, statement.span) + }) else { + continue; + }; + let Some(contract) = contract_of(&call_expression(call), assignment_map, 0) else { + continue; + }; + out.push(Hit { + contract, + file: method.file.clone(), + defn: method.name.clone(), + line: statement.line, + span: statement.span, + }); + } + } + out +} + +fn contract_of( + receiver: &str, + assignment_map: &BTreeMap, + depth: usize, +) -> Option { + let source = receiver.trim(); + if source.is_empty() || depth >= 8 { + return None; + } + + if let Some(mapped) = assignment_map.get(source) { + return Some(mapped.clone()); + } + if source.starts_with('@') { + return Some(source.to_string()); + } + + static INDEX_SOURCE: OnceLock = OnceLock::new(); + let index_source = + INDEX_SOURCE.get_or_init(|| Regex::new(r"^(?:[A-Za-z_]\w*|self)\s*\[(.+)\]$").unwrap()); + if let Some(captures) = index_source.captures(source) { + return Some(format!("[{}]", captures[1].trim())); + } + + static LOCAL_SOURCE: OnceLock = OnceLock::new(); + let local_source = LOCAL_SOURCE.get_or_init(|| Regex::new(r"^[A-Za-z_]\w*$").unwrap()); + if local_source.is_match(source) { + return Some("~local".to_string()); + } + + if source.contains('.') { + let mut member = source.split('.').last().unwrap_or("").to_string(); + if let Some(index) = member.find('(') { + if member.ends_with(')') { + member.truncate(index); + } + } + if TRANSIENT_NOARG_MIDS.contains(&member.as_str()) || member.is_empty() { + return None; + } + return Some(format!(".{member}")); + } + + None +} + +fn call_expression(call: &CallSite) -> String { + [call.receiver.as_str(), call.message.as_str()] + .into_iter() + .filter(|part| !part.is_empty()) + .collect::>() + .join(".") +} + +fn inside_span(inner: Span, outer: Span) -> bool { + let starts_after_or_at = + (inner[0] > outer[0]) || (inner[0] == outer[0] && inner[1] >= outer[1]); + let ends_before_or_at = (inner[2] < outer[2]) || (inner[2] == outer[2] && inner[3] <= outer[3]); + starts_after_or_at && ends_before_or_at +} + +struct Report { + guard: Vec, + dispatch: Vec, +} + +impl Report { + fn new(guard: Vec, dispatch: Vec) -> Self { + Self { guard, dispatch } + } + + fn ranked(&self) -> Vec { + let mut ess = BTreeMap::new(); + for h in &self.dispatch { + *ess.entry(&h.contract).or_insert(0) += 1; + } + + let mut rows_map: Vec<(String, Vec<&Hit>)> = Vec::new(); + for h in &self.guard { + if let Some((_, hits)) = rows_map + .iter_mut() + .find(|(contract, _)| contract == &h.contract) + { + hits.push(h); + } else { + rows_map.push((h.contract.clone(), vec![h])); + } + } + + let rows: Vec<_> = rows_map + .into_iter() + .map(|(contract, hs)| { + let mut methods_set = BTreeSet::new(); + for h in &hs { + methods_set.insert((&h.file, &h.defn)); + } + let sites = hs.iter().map(|h| loc(h)).collect(); + let spans = hs.iter().map(|h| (loc(h), h.span)).collect(); + let essential = ess.get(&contract).cloned().unwrap_or(0); + DecisionPressureRow { + contract, + decisions: hs.len(), + essential, + methods: methods_set.len(), + sites, + spans, + } + }) + .collect(); + + let mut named: Vec<_> = rows + .iter() + .filter(|r| r.contract != "~local") + .cloned() + .collect(); + named.sort_by(|a, b| { + b.decisions + .cmp(&a.decisions) + .then_with(|| b.methods.cmp(&a.methods)) + }); + + let local: Vec<_> = rows + .into_iter() + .filter(|r| r.contract == "~local") + .collect(); + named.into_iter().chain(local).collect() + } +} + +fn loc(h: &Hit) -> String { + format!("{}:{}:{}", h.file, h.defn, h.line) +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs new file mode 100644 index 000000000..3bbcb8551 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/derived_state.rs @@ -0,0 +1,183 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow::{self, MethodSummary, Statement}; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct DerivedStateRow { + pub file: String, + pub defn: String, + pub derived: String, + pub source: String, + pub derived_at: usize, + pub source_reassigned_at: usize, + pub gap: isize, + pub at: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Asgn { + name: String, + deps: Vec, + line: usize, + span: Span, + statement_index: usize, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_summaries(local_flow::scan_documents(documents)) +} + +pub fn scan_summaries(summaries: Vec) -> Vec { + let mut out = summaries + .iter() + .flat_map(|method| analyze_method(method)) + .collect::>(); + out.sort_by(|a, b| b.gap.cmp(&a.gap)); + out +} + +fn analyze_method(method: &MethodSummary) -> Vec { + analyze(&method.file, &method.name, &assignments(method)) +} + +fn assignments(method: &MethodSummary) -> Vec { + method + .statements + .iter() + .flat_map(|statement| { + let mut writes = statement.writes.iter().cloned().collect::>(); + writes.sort_by(|a, b| { + write_position(&statement.source, a) + .cmp(&write_position(&statement.source, b)) + .then_with(|| a.cmp(b)) + }); + writes + .into_iter() + .map(|name| Asgn { + deps: dependencies_for(statement, &name), + name, + line: statement.line, + span: statement.span, + statement_index: statement.index, + }) + .collect::>() + }) + .collect() +} + +fn write_position(source: &str, name: &str) -> usize { + identifier_positions(source) + .into_iter() + .find_map(|(identifier, position)| (identifier == name).then_some(position)) + .unwrap_or(usize::MAX) +} + +fn identifier_positions(source: &str) -> Vec<(String, usize)> { + let mut out = Vec::new(); + let mut current = String::new(); + let mut start = 0usize; + for (index, ch) in source.char_indices() { + if ch == '_' || ch.is_ascii_alphanumeric() { + if current.is_empty() { + start = index; + } + current.push(ch); + } else if !current.is_empty() { + if current + .chars() + .next() + .map(|first| first == '_' || first.is_ascii_alphabetic()) + .unwrap_or(false) + { + out.push((current.clone(), start)); + } + current.clear(); + } + } + if !current.is_empty() + && current + .chars() + .next() + .map(|first| first == '_' || first.is_ascii_alphabetic()) + .unwrap_or(false) + { + out.push((current, start)); + } + out +} + +fn dependencies_for(statement: &Statement, name: &str) -> Vec { + let mut deps: Vec<_> = statement + .dependencies + .iter() + .filter_map(|(left, right)| { + if left == name { + Some(right.clone()) + } else { + None + } + }) + .collect::>() + .into_iter() + .collect(); + deps.sort(); + deps +} + +fn analyze(file: &str, defn: &str, asgns: &[Asgn]) -> Vec { + let mut out = Vec::new(); + for (i, b) in asgns.iter().enumerate() { + if b.deps.is_empty() { + continue; + } + + for a in &b.deps { + if a == &b.name { + continue; + } + + // a reassigned strictly after b's definition? + let reasn = asgns + .iter() + .skip(i + 1) + .find(|x| &x.name == a && x.statement_index > b.statement_index); + let Some(reasn) = reasn else { continue }; + + // b recomputed at or after a's reassignment? + let recomputed = asgns + .iter() + .skip(i + 1) + .any(|x| &x.name == &b.name && x.statement_index >= reasn.statement_index); + if recomputed { + continue; + } + + let loc = format!("{}:{}:{}", file, defn, b.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), b.span); + + out.push(DerivedStateRow { + file: file.to_string(), + defn: defn.to_string(), + derived: b.name.clone(), + source: a.clone(), + derived_at: b.line, + source_reassigned_at: reasn.line, + gap: (reasn.line as isize) - (b.line as isize), + at: loc, + spans, + }); + } + } + out +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs new file mode 100644 index 000000000..a7e3904da --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/false_simplicity.rs @@ -0,0 +1,211 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FalseSimplicityRow { + pub kind: String, + pub detail: String, + pub support: usize, + pub scatter: usize, + pub at: String, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Hit { + kind: String, + detail: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct ClassRec { + name: String, + file: String, + line: usize, + core: bool, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let mut hits = Vec::new(); + let mut classrecs = Vec::new(); + for document in documents { + hits.extend(hits_for_document(document)); + let (doc_recs, doc_hits) = class_records_for_document(document); + classrecs.extend(doc_recs); + hits.extend(doc_hits); + } + Report::new(hits, classrecs).findings() +} + +fn class_records_for_document(document: &Document) -> (Vec, Vec) { + let function_owners = document + .function_defs + .iter() + .map(|function| function.owner.clone()) + .filter(|owner| !owner.is_empty()) + .collect::>(); + let core_owner_names = syntax::core_owner_names(document); + let mut recs = Vec::new(); + let mut hits = Vec::new(); + + for owner in &document.owner_defs { + let canonical = owner.name.trim_start_matches("::").to_string(); + if canonical.is_empty() { + continue; + } + if !function_owners.contains(&owner.name) && !function_owners.contains(&canonical) { + continue; + } + let simple = canonical + .split("::") + .last() + .unwrap_or(canonical.as_str()) + .to_string(); + let core = !canonical.contains("::") && core_owner_names.contains(&simple.as_str()); + recs.push(ClassRec { + name: canonical.clone(), + file: owner.file.clone(), + line: owner.line, + core, + span: owner.span, + }); + if core { + hits.push(Hit { + kind: "monkeypatch".to_string(), + detail: simple.clone(), + file: owner.file.clone(), + defn: simple, + line: owner.line, + span: owner.span, + }); + } + } + + (recs, hits) +} + +fn hits_for_document(document: &Document) -> Vec { + document + .semantic_effect_sites + .iter() + .map(|site| Hit { + kind: site.kind.clone(), + detail: site.detail.clone(), + file: site.file.clone(), + defn: if site.function.is_empty() { + "(top-level)".to_string() + } else { + site.function.clone() + }, + line: site.line, + span: site.span, + }) + .collect() +} + +struct Report { + hits: Vec, +} + +impl Report { + fn new(mut hits: Vec, classrecs: Vec) -> Self { + let mut grouped: Vec<(String, Vec)> = Vec::new(); + for rec in classrecs { + if let Some((_, recs)) = grouped.iter_mut().find(|(name, _)| name == &rec.name) { + recs.push(rec); + } else { + grouped.push((rec.name.clone(), vec![rec])); + } + } + for (_name, mut recs) in grouped { + recs.sort_by(|left, right| { + left.file + .cmp(&right.file) + .then_with(|| left.line.cmp(&right.line)) + }); + if recs.first().is_some_and(|rec| rec.core) { + continue; + } + let file_count = recs + .iter() + .map(|rec| rec.file.clone()) + .collect::>() + .len(); + if file_count < 2 { + continue; + } + for rec in recs { + hits.push(Hit { + kind: "monkeypatch".to_string(), + detail: format!("reopen {}", rec.name), + file: rec.file.clone(), + defn: rec.name.clone(), + line: rec.line, + span: rec.span, + }); + } + } + Self { hits } + } + + fn findings(&self) -> Vec { + let mut groups: Vec<((String, String), Vec<&Hit>)> = Vec::new(); + for hit in &self.hits { + let key = (hit.kind.clone(), hit.detail.clone()); + if let Some((_, hits)) = groups.iter_mut().find(|(existing, _)| existing == &key) { + hits.push(hit); + } else { + groups.push((key, vec![hit])); + } + } + + let mut out = Vec::new(); + for ((kind, detail), hits) in groups { + let units = hits + .iter() + .map(|hit| (hit.file.clone(), hit.defn.clone())) + .collect::>(); + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for hit in &hits { + let loc = format!("{}:{}:{}", hit.file, hit.defn, hit.line); + if !sites.contains(&loc) { + sites.push(loc.clone()); + } + spans.entry(loc).or_insert(hit.span); + } + out.push(FalseSimplicityRow { + kind, + detail, + support: hits.len(), + scatter: units.len(), + at: sites.first().cloned().unwrap_or_default(), + sites, + spans, + }); + } + out.sort_by(|a, b| { + b.scatter + .cmp(&a.scatter) + .then_with(|| b.support.cmp(&a.support)) + .then_with(|| a.kind.cmp(&b.kind)) + .then_with(|| a.detail.cmp(&b.detail)) + }); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs new file mode 100644 index 000000000..c8aab01c9 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/fat_union.rs @@ -0,0 +1,145 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, DispatchSite, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FatUnionReport { + pub fat_unions: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FatUnionRow { + pub name: String, + pub common: Vec, + pub variant: Vec, + pub degenerate: bool, + pub support: usize, + pub scatter: usize, + pub variant_set: Vec, + pub at: String, + pub spans: BTreeMap, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> FatUnionReport { + let sites = documents + .iter() + .flat_map(|document| document.dispatch_sites.iter()) + .collect::>(); + FatUnionReport { + fat_unions: fat_unions_from_sites(&sites, 3, 2, 0.6), + } +} + +fn fat_unions_from_sites( + sites: &[&DispatchSite], + min_variants: usize, + min_common: usize, + ratio: f64, +) -> Vec { + let mut groups: BTreeMap, Vec<&DispatchSite>> = BTreeMap::new(); + for site in sites { + groups + .entry(site.variant_set.clone()) + .or_default() + .push(*site); + } + + let mut rows = Vec::new(); + for (variant_set, group) in groups { + let variant_count = variant_set.len(); + if variant_count < min_variants { + continue; + } + + let mut by_member_variant: BTreeMap> = BTreeMap::new(); + let mut outside = BTreeSet::new(); + for site in &group { + for (variant, members) in &site.arm_members { + for member in members { + by_member_variant + .entry(member.clone()) + .or_default() + .insert(variant.clone()); + } + } + for member in &site.outside { + outside.insert(member.clone()); + } + } + + let mut keys = by_member_variant.keys().cloned().collect::>(); + keys.extend(outside.iter().cloned()); + let common = keys + .iter() + .filter(|member| { + outside.contains(*member) + || by_member_variant + .get(*member) + .map(|variants| variants.len() >= variant_count) + .unwrap_or(false) + }) + .cloned() + .collect::>(); + let variant = keys + .iter() + .filter(|member| { + !outside.contains(*member) + && by_member_variant + .get(*member) + .map(|variants| variants.len() == 1) + .unwrap_or(false) + }) + .cloned() + .collect::>(); + let total = common.len() + variant.len(); + if common.len() < min_common || total == 0 || common.len() as f64 / (total as f64) < ratio { + continue; + } + + let at = group + .first() + .map(|site| format!("{}:{}:{}", site.file, site.function, site.line)) + .unwrap_or_default(); + let mut spans = BTreeMap::new(); + for site in &group { + spans.insert( + format!("{}:{}:{}", site.file, site.function, site.line), + site.span, + ); + } + let scatter = group + .iter() + .map(|site| (site.file.clone(), site.function.clone())) + .collect::>() + .len(); + rows.push(( + group.len() * common.len(), + FatUnionRow { + name: String::new(), + common, + variant: variant.clone(), + degenerate: variant.is_empty(), + support: group.len(), + scatter, + variant_set, + at, + spans, + }, + )); + } + + rows.sort_by(|a, b| { + (if a.1.degenerate { 0 } else { 1 }) + .cmp(&(if b.1.degenerate { 0 } else { 1 })) + .then_with(|| b.0.cmp(&a.0)) + }); + rows.into_iter().map(|(_, row)| row).collect() +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs new file mode 100644 index 000000000..443d477c0 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/flay_similarity.rs @@ -0,0 +1,779 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, CloneCandidate, Document, Language, SimilarityFinding}; +use anyhow::Result; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::path::PathBuf; + +const MAX_FUZZY_CHILDREN: usize = 14; + +pub fn scan_files( + files: &[PathBuf], + language: Language, + mass: usize, + fuzzy: usize, +) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents, mass, fuzzy)) +} + +pub fn scan_documents(documents: &[Document], mass: usize, fuzzy: usize) -> Vec { + let mut scanner = Scanner::new(mass, fuzzy); + scanner.scan(documents) +} + +struct Scanner { + mass: usize, + fuzzy: usize, +} + +impl Scanner { + fn new(mass: usize, fuzzy: usize) -> Self { + Self { mass, fuzzy } + } + + fn scan(&mut self, documents: &[Document]) -> Vec { + let mut candidates = Vec::new(); + for document in documents { + candidates.extend(self.candidates_for_document(document)); + } + let mut findings = self.type2_findings(&candidates); + findings.extend(self.type3_findings(&candidates)); + findings.sort_by(|left, right| { + ( + clone_type_rank(&left.clone_type), + std::cmp::Reverse(left.mass), + left.node.clone(), + left.at.clone(), + ) + .cmp(&( + clone_type_rank(&right.clone_type), + std::cmp::Reverse(right.mass), + right.node.clone(), + right.at.clone(), + )) + }); + self.prune_nested_findings(findings) + } + + fn candidates_for_document(&mut self, document: &Document) -> Vec { + let mut out = Vec::new(); + let mut seen = HashSet::new(); + for candidate in syntax::clone_candidates(document) { + self.add_candidate(&mut out, &mut seen, candidate); + } + out + } + + fn add_candidate( + &self, + out: &mut Vec, + seen: &mut HashSet, + candidate: CloneCandidate, + ) { + if candidate.mass < self.effective_mass_floor() { + return; + } + let key = format!( + "{}\0{}\0{:?}\0{}\0{}", + candidate.file, + candidate.line, + candidate.span, + candidate.node_name, + candidate.fingerprint + ); + if seen.insert(key) { + out.push(candidate); + } + } + + fn type2_findings(&self, candidates: &[CloneCandidate]) -> Vec { + let mut groups: HashMap<&str, Vec> = HashMap::new(); + for candidate in candidates { + groups + .entry(candidate.fingerprint.as_str()) + .or_default() + .push(candidate.clone()); + } + let mut out = Vec::new(); + for cluster in groups.values() { + let cluster = uniq_sites(cluster.clone()); + if cluster.len() < 2 { + continue; + } + let raw_count = cluster + .iter() + .map(|candidate| candidate.raw.as_str()) + .collect::>() + .len(); + if raw_count < 2 { + continue; + } + let mass = cluster + .iter() + .map(|candidate| candidate.mass) + .min() + .unwrap_or(0); + out.push(self.finding_for(&cluster, "type2", mass)); + } + out + } + + fn type3_findings(&self, candidates: &[CloneCandidate]) -> Vec { + if self.fuzzy == 0 { + return Vec::new(); + } + let mut groups: HashMap> = HashMap::new(); + for candidate in candidates { + for (signature, signature_mass) in self.fuzzy_signatures(candidate) { + if signature_mass >= self.effective_mass_floor() { + groups + .entry(signature) + .or_default() + .push((candidate.clone(), signature_mass)); + } + } + } + + let mut best_by_key: BTreeMap = BTreeMap::new(); + for rows in groups.values() { + let cluster = uniq_sites( + rows.iter() + .map(|(candidate, _)| candidate.clone()) + .collect(), + ); + if cluster.len() < 2 { + continue; + } + let fingerprint_count = cluster + .iter() + .map(|candidate| candidate.fingerprint.as_str()) + .collect::>() + .len(); + if fingerprint_count < 2 { + continue; + } + let mut key = cluster + .iter() + .map(|candidate| { + format!( + "{}\0{}\0{}", + candidate.file, candidate.line, candidate.node_name + ) + }) + .collect::>(); + key.sort(); + let key = key.join("\0"); + let mass = rows + .iter() + .map(|(_, signature_mass)| *signature_mass) + .max() + .unwrap_or(0); + let finding = self.finding_for(&cluster, "type3", mass); + if best_by_key + .get(&key) + .map(|existing| existing.mass < finding.mass) + .unwrap_or(true) + { + best_by_key.insert(key, finding); + } + } + best_by_key.into_values().collect() + } + + fn finding_for( + &self, + cluster: &[CloneCandidate], + clone_type: &str, + mass: usize, + ) -> SimilarityFinding { + let mut sites = cluster.iter().map(site_for).collect::>(); + sites.sort(); + SimilarityFinding { + at: sites.first().cloned().unwrap_or_default(), + sites, + spans: self.spans_for(cluster), + clone_type: clone_type.to_string(), + node: most_common_node(cluster), + mass, + locations: { + let mut locations = cluster + .iter() + .map(|candidate| format!("{}:{}", candidate.file, candidate.line)) + .collect::>(); + locations.sort(); + locations + }, + } + } + + fn spans_for(&self, cluster: &[CloneCandidate]) -> BTreeMap { + let mut spans = BTreeMap::new(); + for candidate in cluster { + let value = if candidate.node_name == "defn" { + [candidate.span[0], 0, candidate.span[2], 1] + } else { + candidate.span + }; + spans.insert(site_for(candidate), value); + } + spans + } + + fn prune_nested_findings(&self, findings: Vec) -> Vec { + let defn_site_sets = findings + .iter() + .filter(|finding| finding.node == "defn") + .map(|finding| (finding.clone_type.clone(), site_identities(finding))) + .collect::>(); + let mut kept = Vec::new(); + for finding in findings { + if finding.node != "defn" + && defn_site_sets.contains(&(finding.clone_type.clone(), site_identities(&finding))) + { + continue; + } + if kept.iter().any(|larger| nested_finding(&finding, larger)) { + continue; + } + kept.push(finding); + } + kept + } + + fn fuzzy_signatures(&self, candidate: &CloneCandidate) -> Vec<(String, usize)> { + let children = &candidate.child_fingerprints; + if children.len() < 2 || children.len() > MAX_FUZZY_CHILDREN { + return Vec::new(); + } + let max_delete = self.fuzzy.min(children.len() - 1); + let mut signatures = Vec::new(); + for delete_count in 0..=max_delete { + for deleted in combinations(children.len(), delete_count) { + let deleted = deleted.into_iter().collect::>(); + let mut kept = Vec::new(); + let mut mass = 0; + for (index, fingerprint) in children.iter().enumerate() { + if deleted.contains(&index) { + continue; + } + kept.push(fingerprint.as_str()); + mass += candidate.child_masses[index]; + } + signatures.push((format!("{}({})", candidate.node_name, kept.join("|")), mass)); + } + } + signatures + } + + fn effective_mass_floor(&self) -> usize { + self.mass + .max(((self.mass as f64) * 23.0 / 8.0).ceil() as usize) + } +} + +fn uniq_sites(candidates: Vec) -> Vec { + let mut seen = HashSet::new(); + let mut out = Vec::new(); + for candidate in candidates { + let key = format!( + "{}\0{}\0{:?}\0{}", + candidate.file, candidate.line, candidate.span, candidate.node_name + ); + if seen.insert(key) { + out.push(candidate); + } + } + out +} + +fn most_common_node(cluster: &[CloneCandidate]) -> String { + let mut order = Vec::new(); + let mut tally: HashMap<&str, usize> = HashMap::new(); + for candidate in cluster { + if !tally.contains_key(candidate.node_name.as_str()) { + order.push(candidate.node_name.as_str()); + } + *tally.entry(candidate.node_name.as_str()).or_default() += 1; + } + let mut best = ""; + let mut best_count = 0; + for node in order { + let count = tally.get(node).copied().unwrap_or(0); + if count > best_count { + best = node; + best_count = count; + } + } + best.to_string() +} + +fn site_for(candidate: &CloneCandidate) -> String { + format!( + "{}:{}:{}", + candidate.file, candidate.method_name, candidate.line + ) +} + +fn nested_finding(inner: &SimilarityFinding, outer: &SimilarityFinding) -> bool { + if outer.mass <= inner.mass { + return false; + } + inner.spans.iter().all(|(site, span)| { + let file = site_file(site); + outer.spans.iter().any(|(outer_site, outer_span)| { + site_file(outer_site) == file && contains_span(*outer_span, *span) + }) + }) +} + +fn contains_span(outer: Span, inner: Span) -> bool { + let outer_start = (outer[0], outer[1]); + let outer_end = (outer[2], outer[3]); + let inner_start = (inner[0], inner[1]); + let inner_end = (inner[2], inner[3]); + outer_start <= inner_start && outer_end >= inner_end +} + +fn site_file(site: &str) -> String { + let mut parts = site.split(':').collect::>(); + if parts.len() >= 2 { + parts.truncate(parts.len() - 2); + } + parts.join(":") +} + +fn site_identities(finding: &SimilarityFinding) -> Vec<(String, String)> { + let mut identities = finding + .sites + .iter() + .map(|site| { + let parts = site.split(':').collect::>(); + let file = if parts.len() >= 2 { + parts[..parts.len() - 2].join(":") + } else { + String::new() + }; + let method = parts + .get(parts.len().saturating_sub(2)) + .copied() + .unwrap_or_default() + .to_string(); + (file, method) + }) + .collect::>(); + identities.sort(); + identities +} + +fn clone_type_rank(clone_type: &str) -> usize { + if clone_type == "type2" { + 0 + } else { + 1 + } +} + +fn combinations(size: usize, count: usize) -> Vec> { + fn step( + start: usize, + size: usize, + count: usize, + current: &mut Vec, + out: &mut Vec>, + ) { + if current.len() == count { + out.push(current.clone()); + return; + } + for index in start..size { + current.push(index); + step(index + 1, size, count, current, out); + current.pop(); + } + } + let mut out = Vec::new(); + step(0, size, count, &mut Vec::new(), &mut out); + out +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::decomplex::syntax::adapters::language_profile; + use std::io::Write; + use tempfile::NamedTempFile; + + fn scan(source: &str, mass: usize, fuzzy: usize) -> Vec { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + scan_files(&[file.path().to_path_buf()], Language::Ruby, mass, fuzzy).expect("scan") + } + + fn document(source: &str) -> Document { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + syntax::parse_file(file.path().to_path_buf(), Language::Ruby).expect("document") + } + + #[test] + fn detects_type2_similarity_for_renamed_ruby_methods() { + let out = scan( + r#" +def a(node) + return false unless node.respond_to?(:type) + node.type == :heap || node.type == :frame +end + +def b(entry) + return false unless entry.respond_to?(:kind) + entry.kind == :heap || entry.kind == :frame +end +"#, + 8, + 1, + ); + assert!(out + .iter() + .any(|finding| finding.clone_type == "type2" && finding.node == "defn")); + } + + #[test] + fn detects_type3_similarity_for_missing_child() { + let out = scan( + r#" +def a(node) + alpha(node.left) + beta(node.right) + gamma(node.name) + delta(node.type) +end + +def b(entry) + alpha(entry.left) + beta(entry.right) + delta(entry.type) +end +"#, + 4, + 1, + ); + assert!(out.iter().any(|finding| finding.clone_type == "type3")); + } + + #[test] + fn singleton_method_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def self.release(ctx_id, lock_index, lock_ref, unlock_method) + [ + MIR::Set.new( + MIR::FieldGet.new(MIR::Ident.new("__ctx_#{ctx_id}"), "__lock_held_#{lock_index}"), + MIR::Lit.new("false"), + false, + ), + MIR::ExprStmt.new( + MIR::MethodCall.new(MIR::Ident.new(lock_ref), unlock_method, [], false), + false, + ), + ] +end +"#, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); + assert_eq!(mass, 128); + } + + #[test] + fn unless_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def check(attrs, tok) + unless attrs + has_at = T.must(tok).value.start_with?('@') + candidates = has_at ? BG_SIGILS.keys : BG_SIGILS.keys.map { |k| k.sub(/^@/, '') } + emit_typo_suggestion!( + tok, T.must(tok).value, candidates, + "Unknown BG prefix #{T.must(tok).value.inspect}", + "closest BG body sigil", + category: :type, cascade: true + ) + end +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "unless" && node.named) + .expect("unless"); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); + assert_eq!(mass, 126); + } + + #[test] + fn struct_assignment_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +DeferStmt = Struct.new(:body) do + extend T::Sig + include Stmt + sig { params(body: DeferBodyInput).void } + def initialize(body) + MIR.validate_defer_body!(body, "MIR::DeferStmt") + super(body) + end + + sig { returns(T::Array[BodySlot]) } + def body_slots + body.is_a?(Array) ? [body_slot(:body, body, ->(new_body) { self.body = new_body })] : [] + end + sig { returns(T::Array[Emittable]) } + def child_exprs = body.is_a?(Array) ? [] : compact_child_exprs([body]) +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "assignment" && node.named) + .expect("assignment"); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); + assert_eq!(mass, 178); + } + + #[test] + fn body_slots_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +SwitchStmt = Struct.new(:subject, :arms, :default_body) do + extend T::Sig + include Stmt + sig { returns(T::Array[Emittable]) } + def child_exprs + compact_child_exprs([subject, *(arms || []).flat_map(&:patterns)]) + end + sig { returns(T::Array[BodySlot]) } + def body_slots + slots = T.let([], T::Array[BodySlot]) + arms&.each_with_index do |arm, index| + slots << body_slot(:"arms_#{index}", arm.body, ->(new_body) { arm.body = new_body }) + end + slots << body_slot(:default_body, default_body, ->(new_body) { self.default_body = new_body }) if default_body + slots + end +end +"#, + ); + let function = doc + .function_defs + .iter() + .find(|function| function.name == "body_slots") + .expect("body_slots"); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); + assert_eq!(mass, 110); + } + + #[test] + fn if_bind_do_block_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +IfBind = Struct.new(:token, :bindings, :then_branch, :else_branch) do + extend T::Sig + include Locatable + + sig { params(args: T.untyped).void } + def initialize(*args) + super + self[:bindings] = [] if self[:bindings].nil? + end + + sig { params(val: T::Array[AST::Binding]).void } + def bindings=(val) + self[:bindings] = val + end +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "do_block" && node.named) + .expect("do_block"); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); + assert_eq!(mass, 110); + } + + #[test] + fn control_flow_argument_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def self.find_package_source(pkg_name, start_dir:) + dir = File.expand_path(start_dir) + loop do + candidate = File.join(dir, "packages", pkg_name, "src", "lib.cht") + return candidate if File.exist?(candidate) + + parent = File.dirname(dir) + break if parent == dir + + dir = parent + end + nil +end +"#, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); + assert_eq!(mass, 96); + } + + #[test] + fn case_scope_pattern_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def walk_for_local_decls(node, &block) + return if node.nil? + case node + when AST::BindExpr, AST::VarDecl + yield node if auto?(node.type) + walk_for_local_decls(node.value, &block) + when AST::FunctionDef + when Array + node.each { |c| walk_for_local_decls(c, &block) } + when Hash + node.each_value { |v| walk_for_local_decls(v, &block) } + else + if node.respond_to?(:each_pair) + node.each_pair { |_, v| walk_for_local_decls(v, &block) } + end + end +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "case" && node.named) + .expect("case"); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); + assert_eq!(mass, 136); + } + + #[test] + fn case_simple_pattern_mass_matches_ruby_oracle_shape() { + let doc = document( + r#" +def references_alias?(expr, alias_name) + found = false + walk = lambda do |n| + return if found + case n + when nil, Symbol, String, Integer, Float, TrueClass, FalseClass + when Array then n.each { |x| walk.call(x) } + when AST::Identifier + found = true if n.name == alias_name + else + n.each_pair { |_, v| walk.call(v) } if n.respond_to?(:each_pair) + end + end + walk.call(expr) + found +end +"#, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "case" && node.named) + .expect("case"); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); + assert_eq!(mass, 96); + } + + #[test] + fn alias_cluster_mass_matches_ruby_oracle_shape() { + let doc = document( + r##" +def alias_clusters + @preds.group_by(&:body).filter_map do |body, ps| + names = ps.map(&:name).uniq + next if names.size < 2 + + { body: body, names: names, + sites: ps.map { |p| "#{p.file}:#{p.name}:#{p.line}" }, + spans: ps.to_h { |p| ["#{p.file}:#{p.name}:#{p.line}", p.span] } } + end.sort_by { |h| -h[:names].size } +end +"##, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); + assert_eq!(mass, 175); + } + + #[test] + fn native_module_mass_matches_ruby_oracle_shape() { + let doc = document( + r##" +module Decomplex + module Native + module CoUpdate + module_function + + def scan(files) + paths = Array(files).map(&:to_s) + validate_ruby_files!(paths) + JSON.parse(Command.run("co-update", "--language", "ruby", *paths)) + end + + private_class_method def self.validate_ruby_files!(paths) + bad = paths.reject { |path| File.extname(path) == ".rb" } + return if bad.empty? + + raise ArgumentError, "--engine=rust currently supports Ruby files only: #{bad.join(', ')}" + end + end + end +end +"##, + ); + let mut nodes = Vec::new(); + doc.root.walk(&mut nodes); + let node = nodes + .into_iter() + .find(|node| node.kind == "module" && node.named) + .expect("module"); + let (_fingerprint, mass) = language_profile(Language::Ruby).clone_fingerprint(node); + assert_eq!(mass, 150); + } + + #[test] + fn hidden_method_name_mass_matches_ruby_oracle_shape() { + let doc = document( + r##" +def inline_def_name(node) + return nil unless inline_def_argument_list?(node) + + receiver_index = node.named_children.index { |child| child.kind == "self" || child.kind == "constant" } + search = receiver_index ? node.named_children[(receiver_index + 1)..] : node.named_children + name = search&.find { |child| %w[identifier field_identifier property_identifier].include?(child.kind) }&.text + receiver_index ? "self.#{name}" : name +end +"##, + ); + let function = doc.function_defs.first().expect("function"); + let (_fingerprint, mass) = + language_profile(Language::Ruby).clone_fingerprint(&function.body); + assert_eq!(mass, 132); + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs new file mode 100644 index 000000000..d003e3ba3 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/function_lcom.rs @@ -0,0 +1,297 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow; +use crate::decomplex::syntax::{Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct FunctionLcomRow { + pub file: String, + pub defn: String, + pub owner: String, + pub method: String, + pub line: usize, + pub at: String, + pub score: usize, + pub mode: String, + pub components: usize, + pub locals: usize, + pub statements: usize, + pub terminal_join: bool, + pub component_vars: Vec>, + pub component_lines: Vec>, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Component { + vars: BTreeSet, + statements: Vec, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let summaries = local_flow::scan_files(files, language)?; + Ok(scan_summaries(summaries)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_summaries(local_flow::scan_documents(documents)) +} + +pub fn scan_summaries(summaries: Vec) -> Vec { + FunctionLcom::new(summaries).findings() +} + +struct FunctionLcom { + summaries: Vec, + min_components: usize, + min_locals: usize, + min_statements: usize, + min_score: usize, +} + +impl FunctionLcom { + fn new(summaries: Vec) -> Self { + Self { + summaries, + min_components: 2, + min_locals: 5, + min_statements: 5, + min_score: 40, + } + } + + fn findings(&self) -> Vec { + let mut out = self + .summaries + .iter() + .filter_map(|summary| self.finding_for(summary)) + .collect::>(); + out.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.line.cmp(&b.line)) + }); + out + } + + fn finding_for(&self, summary: &local_flow::MethodSummary) -> Option { + if summary.statements.len() < self.min_statements { + return None; + } + + let full_components = + self.substantial_components(self.components(&summary.statements), &summary.statements); + let pre_terminal = self.pre_terminal_statements(summary); + let pre_components = + self.substantial_components(self.components(pre_terminal), pre_terminal); + let local_count = self.local_names(&summary.statements).len(); + if local_count < self.min_locals { + return None; + } + + let terminal_join = self.terminal_join(summary, &pre_components); + let mut report_components = full_components; + let mut mode = "disjoint".to_string(); + if report_components.len() < self.min_components + && terminal_join + && pre_components.len() >= self.min_components + { + report_components = pre_components; + mode = "late_join".to_string(); + } + if report_components.len() < self.min_components { + return None; + } + + let score = self.score_for( + &report_components, + local_count, + summary.statements.len(), + terminal_join, + ); + if score < self.min_score { + return None; + } + + let at = format!("{}:{}:{}", summary.file, summary.name, summary.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), summary.span); + Some(FunctionLcomRow { + file: summary.file.clone(), + defn: summary.name.clone(), + owner: summary.owner.clone(), + method: summary.name.clone(), + line: summary.line, + at, + score, + mode, + components: report_components.len(), + locals: local_count, + statements: summary.statements.len(), + terminal_join, + component_vars: report_components + .iter() + .map(|component| component.vars.iter().cloned().collect()) + .collect(), + component_lines: report_components + .iter() + .map(|component| { + component + .statements + .iter() + .map(|index| summary.statements[*index].line) + .collect::>() + .into_iter() + .collect() + }) + .collect(), + spans, + }) + } + + fn pre_terminal_statements<'a>( + &self, + summary: &'a local_flow::MethodSummary, + ) -> &'a [local_flow::Statement] { + if summary.statements.len() <= 1 { + &[] + } else { + &summary.statements[..summary.statements.len() - 1] + } + } + + fn terminal_join( + &self, + summary: &local_flow::MethodSummary, + pre_components: &[Component], + ) -> bool { + let Some(terminal) = summary.statements.last() else { + return false; + }; + let mut component_index = BTreeMap::new(); + for (index, component) in pre_components.iter().enumerate() { + for name in &component.vars { + component_index.insert(name.clone(), index); + } + } + self.touched_vars(terminal) + .into_iter() + .filter_map(|name| component_index.get(&name).copied()) + .collect::>() + .len() + >= self.min_components + } + + fn score_for( + &self, + components: &[Component], + local_count: usize, + statement_count: usize, + terminal_join: bool, + ) -> usize { + (components.len() * 10) + local_count + statement_count + if terminal_join { 5 } else { 0 } + } + + fn substantial_components( + &self, + raw_components: Vec>, + statements: &[local_flow::Statement], + ) -> Vec { + let mut components = raw_components + .into_iter() + .filter_map(|vars| { + let touched = statements + .iter() + .enumerate() + .filter_map(|(index, statement)| { + if !self.touched_vars(statement).is_disjoint(&vars) { + Some(index) + } else { + None + } + }) + .collect::>(); + if vars.len() < 2 || touched.len() < 2 { + return None; + } + Some(Component { + vars, + statements: touched, + }) + }) + .collect::>(); + components + .sort_by_key(|component| component.statements.first().copied().unwrap_or(usize::MAX)); + components + } + + fn components(&self, statements: &[local_flow::Statement]) -> Vec> { + let vars = self.local_names(statements); + let edges = self.graph_edges(statements); + let mut adjacency = vars + .iter() + .map(|name| (name.clone(), BTreeSet::new())) + .collect::>(); + for (left, right) in edges { + if left == right { + continue; + } + adjacency + .entry(left.clone()) + .or_default() + .insert(right.clone()); + adjacency.entry(right).or_default().insert(left); + } + + let mut visited = BTreeSet::new(); + let mut components = Vec::new(); + for name in vars { + if visited.contains(&name) { + continue; + } + let mut component = BTreeSet::new(); + let mut stack = vec![name]; + while let Some(current) = stack.pop() { + if visited.contains(¤t) { + continue; + } + visited.insert(current.clone()); + component.insert(current.clone()); + if let Some(neighbors) = adjacency.get(¤t) { + for neighbor in neighbors { + if !visited.contains(neighbor) { + stack.push(neighbor.clone()); + } + } + } + } + components.push(component); + } + components + } + + fn graph_edges(&self, statements: &[local_flow::Statement]) -> Vec<(String, String)> { + let mut edges = Vec::new(); + for statement in statements { + edges.extend(statement.dependencies.iter().cloned()); + edges.extend(statement.co_uses.iter().cloned()); + } + edges + } + + fn local_names(&self, statements: &[local_flow::Statement]) -> BTreeSet { + let mut names = BTreeSet::new(); + for statement in statements { + names.extend(self.touched_vars(statement)); + } + names + } + + fn touched_vars(&self, statement: &local_flow::Statement) -> BTreeSet { + statement.reads.union(&statement.writes).cloned().collect() + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs new file mode 100644 index 000000000..85f4a683c --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/implicit_control_flow.rs @@ -0,0 +1,512 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct ImplicitControlFlowReport { + pub ordered_protocols: Vec, + pub order_drift: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct ProtocolFinding { + pub kind: String, + pub protocol: Vec, + pub dependency: Vec, + pub states: Vec, + pub support: usize, + pub confidence: f64, + pub at: String, + pub observed: Vec, + pub missing: Vec, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct MethodEffect { + owner: String, + name: String, + reads: Vec, + writes: Vec, +} + +#[derive(Clone, Debug)] +struct Call { + mid: String, + line: usize, + span: Span, + reads: Vec, + writes: Vec, +} + +#[derive(Clone, Debug)] +struct MethodSequence { + file: String, + owner: String, + defn: String, + line: usize, + calls: Vec, +} + +const OPTIONAL_DIAGNOSTIC_MIDS: &[&str] = + &["error!", "fixable!", "read_interpolated_string", "warn!"]; + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> ImplicitControlFlowReport { + let effect_index = EffectIndex::build_documents(documents); + let mut sequences = Vec::new(); + for document in documents { + sequences.extend(sequences_for_document(document, &effect_index)); + } + + let report = Report::new(sequences); + ImplicitControlFlowReport { + ordered_protocols: report.ordered_protocols(1), + order_drift: report.drift(4, 0.75), + } +} + +fn sequences_for_document(document: &Document, effect_index: &EffectIndex) -> Vec { + document + .protocol_call_paths + .iter() + .filter_map(|path| { + let calls = path + .calls + .iter() + .map(|call| { + let effect = effect_index.effect_for(&path.owner, &call.mid); + Call { + mid: call.mid.clone(), + line: call.line, + span: call.span, + reads: effect.map(|e| e.reads.clone()).unwrap_or_default(), + writes: effect.map(|e| e.writes.clone()).unwrap_or_default(), + } + }) + .collect::>(); + + if calls + .iter() + .filter(|call| !call.reads.is_empty() || !call.writes.is_empty()) + .count() + < 2 + { + return None; + } + + Some(MethodSequence { + file: path.file.clone(), + owner: path.owner.clone(), + defn: path.name.clone(), + line: path.line, + calls, + }) + }) + .collect() +} + +struct EffectIndex { + by_owner_name: BTreeMap<(String, String), MethodEffect>, + by_name: BTreeMap>, +} + +impl EffectIndex { + fn build_documents(documents: &[Document]) -> Self { + let mut effects = Vec::new(); + for document in documents { + for effect in &document.protocol_method_effects { + effects.push(MethodEffect { + owner: effect.owner.clone(), + name: effect.name.clone(), + reads: effect.reads.clone(), + writes: effect.writes.clone(), + }); + } + } + Self::from_effects(effects) + } + + fn from_effects(effects: Vec) -> Self { + let mut by_owner_name = BTreeMap::new(); + let mut by_name = BTreeMap::new(); + for e in effects { + by_owner_name.insert((e.owner.clone(), e.name.clone()), e.clone()); + by_name + .entry(e.name.clone()) + .or_insert_with(Vec::new) + .push(e); + } + Self { + by_owner_name, + by_name, + } + } + + fn effect_for(&self, owner: &str, name: &str) -> Option<&MethodEffect> { + if let Some(e) = self + .by_owner_name + .get(&(owner.to_string(), name.to_string())) + { + return Some(e); + } + let candidates = self.by_name.get(name)?; + let stateful: Vec<_> = candidates + .iter() + .filter(|e| !e.reads.is_empty() || !e.writes.is_empty()) + .collect(); + if stateful.len() == 1 { + Some(stateful[0]) + } else { + None + } + } +} + +struct Report { + sequences: Vec, + site_call_sets: BTreeMap<(String, String, String, usize), BTreeMap>, +} + +impl Report { + fn new(sequences: Vec) -> Self { + let mut site_call_sets = BTreeMap::new(); + for seq in &sequences { + let mut calls = BTreeMap::new(); + for c in seq + .calls + .iter() + .filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) + { + calls.insert(c.mid.clone(), true); + } + site_call_sets.insert( + ( + seq.file.clone(), + seq.owner.clone(), + seq.defn.clone(), + seq.line, + ), + calls, + ); + } + Self { + sequences, + site_call_sets, + } + } + + fn ordered_protocols(&self, min_support: usize) -> Vec { + let mut counts: BTreeMap< + (String, String, String, String), + BTreeMap<(String, String, String, usize), ProtocolFinding>, + > = BTreeMap::new(); + for seq in &self.sequences { + let state_calls: Vec<_> = seq + .calls + .iter() + .filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) + .collect(); + let collapsed = self.collapse_consecutive(&state_calls); + for i in 0..collapsed.len().saturating_sub(1) { + let left = collapsed[i]; + let right = collapsed[i + 1]; + let edge = self.dependency_edge(left, right); + let Some(edge) = edge else { continue }; + if self.diagnostic_protocol(&[left.mid.clone(), right.mid.clone()]) { + continue; + }; + + let key = ( + left.mid.clone(), + right.mid.clone(), + edge.0.join("|"), + edge.1.join("|"), + ); + let site_key = ( + seq.file.clone(), + seq.owner.clone(), + seq.defn.clone(), + seq.line, + ); + counts.entry(key).or_default().insert( + site_key, + ProtocolFinding { + kind: "protocol_pressure".to_string(), + protocol: vec![left.mid.clone(), right.mid.clone()], + dependency: edge.0, + states: edge.1, + support: 0, + confidence: 1.0, + at: format!("{}:{}:{}", seq.file, seq.defn, seq.line), + observed: vec![left.mid.clone(), right.mid.clone()], + missing: Vec::new(), + sites: Vec::new(), + spans: { + let mut s = BTreeMap::new(); + s.insert(format!("{}:{}:{}", seq.file, seq.defn, seq.line), left.span); + s + }, + }, + ); + } + } + + let mut out = Vec::new(); + for (_, sites) in counts { + if sites.len() < min_support { + continue; + } + let mut first = sites.values().next().unwrap().clone(); + first.support = sites.len(); + first.sites = sites + .keys() + .map(|k| format!("{}:{}:{}", k.0, k.2, k.3)) + .collect(); + out.push(first); + } + out.sort_by(|a, b| { + b.support + .cmp(&a.support) + .then_with(|| self.dependency_rank(a).cmp(&self.dependency_rank(b))) + .then_with(|| a.protocol.join("\0").cmp(&b.protocol.join("\0"))) + }); + out + } + + fn drift(&self, min_support: usize, min_confidence: f64) -> Vec { + let protocols = self.ordered_protocols(min_support); + let mut protocol_index: BTreeMap> = BTreeMap::new(); + for p in protocols { + let mut pair = p.protocol.clone(); + pair.sort(); + protocol_index.entry(pair.join("\0")).or_default().push(p); + } + + let mut out = Vec::new(); + for seq in &self.sequences { + let state_calls: Vec<_> = seq + .calls + .iter() + .filter(|c| !c.reads.is_empty() || !c.writes.is_empty()) + .collect(); + let collapsed = self.collapse_consecutive(&state_calls); + let mids: Vec<_> = collapsed.iter().map(|c| c.mid.clone()).collect(); + let positions = self.first_positions(&mids); + + for protocol_row in self.candidate_protocols( + &positions.keys().cloned().collect::>(), + &protocol_index, + ) { + let present: Vec<_> = protocol_row + .protocol + .iter() + .filter(|m| positions.contains_key(*m)) + .cloned() + .collect(); + if present.len() < 2 { + continue; + } + if self.ordered_subsequence(&mids, &protocol_row.protocol) { + continue; + } + + let confidence = + (protocol_row.support as f64) / (self.denominator_for(&present) as f64); + if confidence < min_confidence { + continue; + } + + out.push(self.finding(seq, &protocol_row, &present, &positions, confidence)); + } + } + + let mut deduped = Vec::new(); + let mut seen = BTreeSet::new(); + for row in out { + let key = ( + row.kind.clone(), + row.at.clone(), + row.protocol.clone(), + row.observed.clone(), + row.states.clone(), + ); + if seen.insert(key) { + deduped.push(row); + } + } + deduped.sort_by(|a, b| { + b.confidence + .partial_cmp(&a.confidence) + .unwrap() + .then_with(|| b.support.cmp(&a.support)) + .then_with(|| a.at.cmp(&b.at)) + }); + deduped + } + + fn dependency_rank(&self, row: &ProtocolFinding) -> usize { + if row.dependency.iter().any(|d| d == "write_read") { + 0 + } else if row.dependency.iter().any(|d| d == "write_write") { + 1 + } else { + 2 + } + } + + fn collapse_consecutive<'a>(&self, calls: &'a [&'a Call]) -> Vec<&'a Call> { + let mut out = Vec::new(); + let mut last = None; + for c in calls { + if last.map(|l| l == &c.mid).unwrap_or(false) { + continue; + } + last = Some(&c.mid); + out.push(*c); + } + out + } + + fn dependency_edge(&self, left: &Call, right: &Call) -> Option<(Vec, Vec)> { + let lw: BTreeSet<_> = left.writes.iter().collect(); + let lr: BTreeSet<_> = left.reads.iter().collect(); + let rw: BTreeSet<_> = right.writes.iter().collect(); + let rr: BTreeSet<_> = right.reads.iter().collect(); + + let mut kinds = Vec::new(); + let mut states = BTreeSet::new(); + + let wr: Vec<_> = lw.intersection(&rr).collect(); + if !wr.is_empty() { + kinds.push("write_read".to_string()); + for s in wr { + states.insert((*s).clone()); + } + } + let ww: Vec<_> = lw.intersection(&rw).collect(); + if !ww.is_empty() { + kinds.push("write_write".to_string()); + for s in ww { + states.insert((*s).clone()); + } + } + let rw_int: Vec<_> = lr.intersection(&rw).collect(); + if !rw_int.is_empty() { + kinds.push("read_write".to_string()); + for s in rw_int { + states.insert((*s).clone()); + } + } + + if kinds.is_empty() { + return None; + } + kinds.sort(); + let mut states_v: Vec<_> = states.into_iter().collect(); + states_v.sort(); + Some((kinds, states_v)) + } + + fn diagnostic_protocol(&self, protocol: &[String]) -> bool { + protocol + .iter() + .any(|m| OPTIONAL_DIAGNOSTIC_MIDS.contains(&m.as_str())) + } + + fn candidate_protocols( + &self, + mids: &[String], + protocol_index: &BTreeMap>, + ) -> Vec { + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + for i in 0..mids.len() { + for j in i + 1..mids.len() { + let mut pair = vec![mids[i].clone(), mids[j].clone()]; + pair.sort(); + if let Some(ps) = protocol_index.get(&pair.join("\0")) { + for p in ps { + let key = (p.protocol.clone(), p.dependency.clone(), p.states.clone()); + if seen.insert(key) { + out.push(p.clone()); + } + } + } + } + } + out + } + + fn first_positions(&self, mids: &[String]) -> BTreeMap { + let mut out = BTreeMap::new(); + for (i, m) in mids.iter().enumerate() { + out.entry(m.clone()).or_insert(i); + } + out + } + + fn ordered_subsequence(&self, mids: &[String], protocol: &[String]) -> bool { + let mut idx = 0; + for m in mids { + if m == &protocol[idx] { + idx += 1; + } + if idx == protocol.len() { + return true; + } + } + false + } + + fn denominator_for(&self, present: &[String]) -> usize { + self.site_call_sets + .values() + .filter(|mids| present.iter().all(|m| mids.contains_key(m))) + .count() + .max(1) + } + + fn finding( + &self, + seq: &MethodSequence, + protocol_row: &ProtocolFinding, + present: &[String], + positions: &BTreeMap, + confidence: f64, + ) -> ProtocolFinding { + let anchor_mid = present + .iter() + .min_by_key(|m| positions.get(*m).unwrap()) + .unwrap(); + let anchor = seq.calls.iter().find(|c| &c.mid == anchor_mid).unwrap(); + let loc = format!("{}:{}:{}", seq.file, seq.defn, anchor.line); + let mut observed = present.to_vec(); + observed.sort_by_key(|m| positions.get(m).unwrap()); + + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), anchor.span); + + ProtocolFinding { + kind: "order_drift".to_string(), + protocol: protocol_row.protocol.clone(), + observed, + missing: Vec::new(), + dependency: protocol_row.dependency.clone(), + states: protocol_row.states.clone(), + support: protocol_row.support, + confidence: (confidence * 100.0).round() / 100.0, + at: loc, + sites: protocol_row.sites.clone(), + spans, + } + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs new file mode 100644 index 000000000..5e2b604d8 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/inconsistent_rename_clone.rs @@ -0,0 +1,264 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use regex::Regex; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; +use std::sync::OnceLock; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct InconsistentRenameCloneRow { + pub file: String, + pub defn: String, + pub line: usize, + pub at: String, + pub ref_at: String, + pub spans: BTreeMap, + pub ref_name: String, + pub divergent: Vec, + pub clone_size: usize, +} + +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] +enum Skeleton { + ID, + Node(String), +} + +#[derive(Clone, Debug)] +struct Block { + skeleton: Vec, + names: Vec, + file: String, + defn: String, + line: usize, + span: Span, +} + +const MIN_TOKENS: usize = 8; + +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let blocks = local_flow::scan_documents(documents) + .into_iter() + .filter_map(|method| block_from_method(&method)) + .collect::>(); + Report::new(blocks).inconsistent_renames() +} + +fn block_from_method(method: &local_flow::MethodSummary) -> Option { + if method.statements.len() < 3 { + return None; + } + let mut skeleton = Vec::new(); + let mut names = Vec::new(); + for statement in &method.statements { + tokenize_source(&statement.source, &mut skeleton, &mut names); + } + if skeleton.len() < MIN_TOKENS { + return None; + } + + let first = method.statements.first()?; + let last = method.statements.last()?; + Some(Block { + skeleton, + names, + file: method.file.clone(), + defn: method.name.clone(), + line: first.line, + span: [first.span[0], first.span[1], last.span[2], last.span[3]], + }) +} + +fn tokenize_source(source: &str, skeleton: &mut Vec, names: &mut Vec) { + for token in token_re().find_iter(source).map(|match_| match_.as_str()) { + if identifier_token(token) { + skeleton.push(Skeleton::ID); + names.push( + token + .trim_start_matches('@') + .trim_end_matches('=') + .to_string(), + ); + } else if literal_token(token) { + skeleton.push(Skeleton::Node("LIT".to_string())); + } else { + skeleton.push(Skeleton::Node(token.to_string())); + } + } +} + +fn token_re() -> &'static Regex { + static TOKEN_RE: OnceLock = OnceLock::new(); + TOKEN_RE.get_or_init(|| { + Regex::new(r#"[A-Za-z_]\w*[!?=]?|@\w+|\d+(?:\.\d+)?|:[A-Za-z_]\w*|"[^"]*"|'[^']*'|\S"#) + .expect("inconsistent-rename-clone token regex") + }) +} + +fn identifier_token(token: &str) -> bool { + let token = token.strip_prefix('@').unwrap_or(token); + let token = token.trim_end_matches(['!', '?', '=']); + let mut chars = token.chars(); + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn literal_token(token: &str) -> bool { + token.starts_with(':') || quoted_token(token) || numeric_token(token) +} + +fn quoted_token(token: &str) -> bool { + (token.starts_with('"') && token.ends_with('"')) + || (token.starts_with('\'') && token.ends_with('\'')) +} + +fn numeric_token(token: &str) -> bool { + let mut saw_digit = false; + let mut saw_dot = false; + for ch in token.chars() { + if ch.is_ascii_digit() { + saw_digit = true; + } else if ch == '.' && !saw_dot { + saw_dot = true; + } else { + return false; + } + } + saw_digit +} + +struct Report { + groups: BTreeMap, Vec>, +} + +impl Report { + fn new(blocks: Vec) -> Self { + let mut groups: BTreeMap, Vec> = BTreeMap::new(); + for b in blocks { + groups.entry(b.skeleton.clone()).or_default().push(b); + } + groups.retain(|_, v| v.len() >= 2); + Self { groups } + } + + fn inconsistent_renames(&self) -> Vec { + let mut out = Vec::new(); + for members in self.groups.values() { + out.extend(self.findings_for(members)); + } + out.sort_by(|a, b| { + b.clone_size + .cmp(&a.clone_size) + .then_with(|| a.at.cmp(&b.at)) + }); + out.dedup_by(|a, b| a.at == b.at && a.ref_at == b.ref_at && a.ref_name == b.ref_name); + out + } + + fn findings_for(&self, members: &[Block]) -> Vec { + let mut units = BTreeSet::new(); + for m in members { + units.insert((m.file.clone(), m.defn.clone())); + } + if units.len() < 2 { + return Vec::new(); + } + + let mut out = Vec::new(); + for i in 0..members.len() { + for j in i + 1..members.len() { + let ref_block = &members[i]; + let candidate = &members[j]; + if self.same_unit(ref_block, candidate) { + continue; + } + out.extend(self.inconsistent_pairs(ref_block, candidate)); + } + } + out + } + + fn inconsistent_pairs( + &self, + ref_block: &Block, + candidate: &Block, + ) -> Vec { + let mut out = Vec::new(); + for (ref_name, positions) in self.ref_classes(ref_block) { + let mut spellings = Vec::new(); + for pos in positions { + if let Some(name) = candidate.names.get(pos) { + if !spellings.contains(name) { + spellings.push(name.clone()); + } + } + } + if spellings.len() < 2 { + continue; + } + out.push(self.finding(ref_block, candidate, &ref_name, spellings)); + } + out + } + + fn ref_classes(&self, ref_block: &Block) -> Vec<(String, Vec)> { + let mut order = Vec::new(); + let mut classes: BTreeMap> = BTreeMap::new(); + for (index, name) in ref_block.names.iter().enumerate() { + if !classes.contains_key(name) { + order.push(name.clone()); + } + classes.entry(name.clone()).or_default().push(index); + } + order + .into_iter() + .filter_map(|name| { + let positions = classes.remove(&name)?; + (positions.len() >= 2).then_some((name, positions)) + }) + .collect() + } + + fn same_unit(&self, left: &Block, right: &Block) -> bool { + left.file == right.file && left.defn == right.defn + } + + fn finding( + &self, + ref_block: &Block, + candidate: &Block, + ref_name: &str, + divergent: Vec, + ) -> InconsistentRenameCloneRow { + let at = format!("{}:{}:{}", candidate.file, candidate.defn, candidate.line); + let ref_at = format!("{}:{}:{}", ref_block.file, ref_block.defn, ref_block.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), candidate.span); + spans.insert(ref_at.clone(), ref_block.span); + InconsistentRenameCloneRow { + file: candidate.file.clone(), + defn: candidate.defn.clone(), + line: candidate.line, + at, + ref_at, + spans, + ref_name: ref_name.to_string(), + divergent, + clone_size: 2, + } + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs new file mode 100644 index 000000000..4b1bbc6fe --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/local_flow.rs @@ -0,0 +1,4 @@ +pub use crate::decomplex::syntax::local_flow::{ + local_contract_assignments, scan_documents, scan_files, Boundary, LocalFlowRow, MethodSummary, + Statement, +}; diff --git a/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs new file mode 100644 index 000000000..c4eb4865d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/locality_drag.rs @@ -0,0 +1,509 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow; +use crate::decomplex::syntax::{self, Document, Language, LocalComplexityScore}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct LocalityDragRow { + pub at: String, + pub file: String, + pub owner: String, + pub defn: String, + pub method: String, + pub line: usize, + pub variable: String, + pub defined_at: usize, + pub used_at: usize, + pub gap_lines: usize, + pub gap_statements: usize, + pub unrelated_statements: usize, + pub setup_statements: usize, + pub related_statements: usize, + pub boundary_crossings: usize, + pub local_complexity: f64, + pub score: isize, + pub definition_deps: Vec, + pub use_reads: Vec, + pub examples: Vec, + pub boundaries: Vec, + pub reason: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct Example { + pub line: usize, + pub source: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct BoundaryInfo { + pub line: usize, + pub marker: String, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let summaries = local_flow::scan_documents(documents); + let complexity_scores = documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect(); + scan_summaries_with_scores(summaries, complexity_scores) +} + +pub fn scan_summaries(summaries: Vec) -> Vec { + let mut detector = LocalityDrag::new(summaries, BTreeMap::new()); + detector.findings() +} + +pub fn scan_summaries_with_scores( + summaries: Vec, + complexity_scores: BTreeMap<(String, String), LocalComplexityScore>, +) -> Vec { + let mut detector = LocalityDrag::new(summaries, complexity_scores); + detector.findings() +} + +struct LocalityDrag { + summaries: Vec, + min_unrelated_statements: usize, + min_gap_lines: usize, + min_local_complexity: f64, + min_score: isize, + max_findings_per_method: usize, + complexity_scores: BTreeMap<(String, String), LocalComplexityScore>, +} + +impl LocalityDrag { + fn new( + summaries: Vec, + complexity_scores: BTreeMap<(String, String), LocalComplexityScore>, + ) -> Self { + Self { + summaries, + min_unrelated_statements: 4, + min_gap_lines: 8, + min_local_complexity: 12.0, + min_score: 60, + max_findings_per_method: 3, + complexity_scores, + } + } + + fn findings(&mut self) -> Vec { + let mut out: Vec<_> = self + .summaries + .iter() + .flat_map(|s| self.findings_for(s)) + .collect(); + out.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| b.unrelated_statements.cmp(&a.unrelated_statements)) + .then_with(|| b.gap_lines.cmp(&a.gap_lines)) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.line.cmp(&b.line)) + }); + out + } + + fn findings_for(&self, summary: &local_flow::MethodSummary) -> Vec { + if summary.statements.len() < self.min_unrelated_statements + 2 { + return Vec::new(); + } + + let local_complexity = self.local_complexity(summary); + if local_complexity < self.min_local_complexity { + return Vec::new(); + } + + let mut findings = Vec::new(); + for (index, statement) in summary.statements.iter().enumerate() { + for name in &statement.writes { + if let Some(f) = + self.finding_for_write(summary, local_complexity, statement, index, name) + { + findings.push(f); + } + } + } + + findings.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| a.defined_at.cmp(&b.defined_at)) + .then_with(|| a.variable.cmp(&b.variable)) + }); + findings + .into_iter() + .take(self.max_findings_per_method) + .collect() + } + + fn local_complexity(&self, summary: &local_flow::MethodSummary) -> f64 { + self.complexity_scores + .get(&(summary.file.clone(), summary.id.clone())) + .map(|score| score.score) + .unwrap_or(0.0) + } + + fn finding_for_write( + &self, + summary: &local_flow::MethodSummary, + local_complexity: f64, + statement: &local_flow::Statement, + index: usize, + name: &str, + ) -> Option { + if self.ignorable_local(name) { + return None; + } + + let use_index = self.first_read_before_rewrite(&summary.statements, index, name)?; + if self.same_prefix_staging_batch(&summary.statements, use_index, name) { + return None; + } + + let gap = &summary.statements[(index + 1)..use_index]; + if gap.is_empty() { + return None; + } + + let gap_refs: Vec<_> = gap.iter().collect(); + let (related, unrelated) = self.classify_gap_statements(name, statement, &gap_refs); + let substantive_unrelated: Vec<_> = unrelated + .into_iter() + .filter(|s| !self.trivial_initializer(s)) + .collect(); + if substantive_unrelated.len() < self.min_unrelated_statements { + return None; + } + + let use_statement = &summary.statements[use_index]; + let gap_lines = use_statement.line - statement.line; + let boundaries = self.boundary_crossings(summary, index, use_index); + if gap_lines < self.min_gap_lines && boundaries.is_empty() { + return None; + } + + let score = self.score_for( + name, + &substantive_unrelated, + &related, + gap_lines, + &boundaries, + local_complexity, + self.read_count_after_write(&summary.statements, index, name), + ); + if score < self.min_score { + return None; + } + + let at = format!("{}:{}:{}", summary.file, summary.name, statement.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), summary.span); + + Some(LocalityDragRow { + at, + file: summary.file.clone(), + owner: summary.owner.clone(), + defn: summary.name.clone(), + method: summary.name.clone(), + line: statement.line, + variable: name.to_string(), + defined_at: statement.line, + used_at: use_statement.line, + gap_lines, + gap_statements: gap.len(), + unrelated_statements: substantive_unrelated.len(), + setup_statements: (gap.len() - related.len()) - substantive_unrelated.len(), + related_statements: related.len(), + boundary_crossings: boundaries.len(), + local_complexity: self.round(local_complexity), + score, + definition_deps: self.definition_deps(statement, name).into_iter().collect(), + use_reads: use_statement.reads.iter().cloned().collect(), + examples: substantive_unrelated + .iter() + .take(3) + .map(|s| self.example_for(s)) + .collect(), + boundaries: boundaries.iter().map(|b| self.boundary_for(b)).collect(), + reason: self.reason_for( + name, + &substantive_unrelated, + gap_lines, + &boundaries, + local_complexity, + ), + spans, + }) + } + + fn first_read_before_rewrite( + &self, + statements: &[local_flow::Statement], + index: usize, + name: &str, + ) -> Option { + for (offset, statement) in statements.iter().skip(index + 1).enumerate() { + if statement.writes.contains(name) { + return None; + } + if statement.reads.contains(name) { + return Some(index + 1 + offset); + } + } + None + } + + fn read_count_after_write( + &self, + statements: &[local_flow::Statement], + index: usize, + name: &str, + ) -> usize { + statements + .iter() + .skip(index + 1) + .filter(|s| s.reads.contains(name)) + .count() + } + + fn classify_gap_statements<'a>( + &self, + name: &str, + definition: &local_flow::Statement, + gap: &'a [&local_flow::Statement], + ) -> ( + Vec<&'a local_flow::Statement>, + Vec<&'a local_flow::Statement>, + ) { + let mut related_names = BTreeSet::new(); + related_names.insert(name.to_string()); + for d in self.definition_deps(definition, name) { + related_names.insert(d); + } + + let mut related = Vec::new(); + let mut unrelated = Vec::new(); + for s in gap { + let new_related = self.derived_from_related(s, &related_names); + let touched: BTreeSet<_> = s.reads.union(&s.writes).cloned().collect(); + let touches_related = !touched.is_disjoint(&related_names); + if touches_related || !new_related.is_empty() { + related.push(*s); + for n in new_related { + related_names.insert(n); + } + } else { + unrelated.push(*s); + } + } + (related, unrelated) + } + + fn definition_deps(&self, statement: &local_flow::Statement, name: &str) -> BTreeSet { + statement + .dependencies + .iter() + .filter(|(lhs, _)| lhs == name) + .map(|(_, rhs)| rhs.clone()) + .collect() + } + + fn derived_from_related( + &self, + statement: &local_flow::Statement, + related_names: &BTreeSet, + ) -> BTreeSet { + statement + .dependencies + .iter() + .filter(|(_, rhs)| related_names.contains(rhs)) + .map(|(lhs, _)| lhs.clone()) + .collect() + } + + fn boundary_crossings<'a>( + &self, + summary: &'a local_flow::MethodSummary, + definition_index: usize, + use_index: usize, + ) -> Vec<&'a local_flow::Boundary> { + summary + .boundaries + .iter() + .filter(|b| b.before_index >= definition_index && b.after_index <= use_index) + .collect() + } + + fn score_for( + &self, + variable: &str, + unrelated: &[&local_flow::Statement], + related: &[&local_flow::Statement], + gap_lines: usize, + boundaries: &[&local_flow::Boundary], + local_complexity: f64, + read_count: usize, + ) -> isize { + let mut score = (unrelated.len() as isize * 5) + + (gap_lines.min(30) as isize) + + (boundaries.len() as isize * 8) + + (local_complexity.min(25.0).round() as isize); + if read_count == 1 { + score += 5; + } + if self.benign_local(variable) { + score -= 8; + } + score -= related.len() as isize * 2; + score + } + + fn ignorable_local(&self, name: &str) -> bool { + name.starts_with('_') || self.source_location_local(name) + } + + fn same_prefix_staging_batch( + &self, + statements: &[local_flow::Statement], + use_index: usize, + name: &str, + ) -> bool { + let Some(prefix) = self.staging_prefix(name) else { + return false; + }; + let staged_names: BTreeSet<_> = statements + .iter() + .take(use_index) + .flat_map(|s| s.writes.iter()) + .filter(|n| n.starts_with(&format!("{}_", prefix))) + .cloned() + .collect(); + if staged_names.len() < 4 { + return false; + } + let use_reads = &statements[use_index].reads; + staged_names.intersection(use_reads).count() >= 4 + } + + fn trivial_initializer(&self, statement: &local_flow::Statement) -> bool { + if statement.writes.is_empty() || !statement.reads.is_empty() { + return false; + } + let source = statement.source.trim(); + let re = regex::Regex::new( + r"^\w+\s*=\s*(?:\{\}|\[\]|nil|false|true|0|T\.let\((?:nil|false|true|0)\b)", + ) + .unwrap(); + re.is_match(source) + } + + fn staging_prefix(&self, name: &str) -> Option { + let parts: Vec<_> = name.split('_').collect(); + if parts.len() >= 2 && parts[0].len() >= 3 { + Some(parts[0].to_string()) + } else { + None + } + } + + fn benign_local(&self, name: &str) -> bool { + self.source_location_local(name) + } + + fn source_location_local(&self, name: &str) -> bool { + let re = regex::Regex::new(r"(?i)(?:\A|_)(?:tok|token|span|source|source_code|line|column|col|pos|idx|index|loc|location)(?:\z|_)").unwrap(); + re.is_match(name) + } + + fn example_for(&self, statement: &local_flow::Statement) -> Example { + let source = statement.source.lines().next().unwrap_or("").trim(); + let source = truncate_example_source(source); + Example { + line: statement.line, + source, + } + } + + fn boundary_for(&self, boundary: &local_flow::Boundary) -> BoundaryInfo { + BoundaryInfo { + line: boundary.line, + marker: if boundary.text.is_empty() { + boundary.kind.clone() + } else { + boundary.text.clone() + }, + } + } + + fn reason_for( + &self, + variable: &str, + unrelated: &[&local_flow::Statement], + gap_lines: usize, + boundaries: &[&local_flow::Boundary], + local_complexity: f64, + ) -> String { + let mut parts = vec![ + format!( + "`{}` is initialized {} line(s) before first use", + variable, gap_lines + ), + format!("{} unrelated intervening statement(s)", unrelated.len()), + ]; + if !boundaries.is_empty() { + parts.push(format!( + "{} structural boundary crossing(s)", + boundaries.len() + )); + } + parts.push(format!( + "method local complexity {:.1}", + self.round(local_complexity) + )); + parts.join("; ") + } + + fn round(&self, value: f64) -> f64 { + (value * 10.0).round() / 10.0 + } +} + +fn truncate_example_source(source: &str) -> String { + if source.chars().count() <= 99 { + return source.to_string(); + } + + let prefix: String = source.chars().take(96).collect(); + format!("{prefix}...") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn truncates_non_ascii_examples_on_character_boundaries() { + let source = "value = \"✓\"".repeat(12); + let truncated = truncate_example_source(&source); + + assert_eq!(truncated.chars().count(), 99); + assert!(truncated.ends_with("...")); + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/miner.rs b/gems/decomplex/rust/src/decomplex/detectors/miner.rs new file mode 100644 index 000000000..9dea61398 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/miner.rs @@ -0,0 +1,152 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, DecisionSite, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct MinerReport { + pub missing_abstractions: Vec, + pub neglected_conditions: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct MissingAbstraction { + pub kind: String, + pub members: Vec, + pub support: usize, + pub scatter: usize, + pub rank: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct NeglectedCondition { + pub pattern: Vec, + pub support: usize, + pub missing: String, + pub at: String, + pub spans: BTreeMap, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> MinerReport { + let mut sites = Vec::new(); + for doc in documents { + sites.extend(doc.decision_sites.clone()); + } + let m = Miner::new(sites); + MinerReport { + missing_abstractions: m.missing_abstractions(2), + neglected_conditions: m.neglected_conditions(3), + } +} + +struct Miner { + sites: Vec, + groups: Vec<((String, Vec), Vec)>, +} + +impl Miner { + fn new(sites: Vec) -> Self { + let mut groups: Vec<((String, Vec), Vec)> = Vec::new(); + for s in &sites { + let key = (s.kind.clone(), s.members.clone()); + if let Some((_, grouped)) = groups.iter_mut().find(|(existing, _)| existing == &key) { + grouped.push(s.clone()); + } else { + groups.push((key, vec![s.clone()])); + } + } + Self { sites, groups } + } + + fn missing_abstractions(&self, min_scatter: usize) -> Vec { + let mut out = Vec::new(); + for ((kind, members), sts) in &self.groups { + let mut methods = BTreeSet::new(); + for s in sts { + methods.insert((s.file.clone(), s.function.clone())); + } + let scatter = methods.len(); + if scatter < min_scatter { + continue; + } + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for s in sts { + let l = self.loc(s); + sites.push(l.clone()); + spans.insert(l, s.span); + } + + out.push(MissingAbstraction { + kind: kind.clone(), + members: members.clone(), + support: sts.len(), + scatter, + rank: sts.len() * scatter, + sites, + spans, + }); + } + out.sort_by(|a, b| b.rank.cmp(&a.rank)); + out + } + + fn neglected_conditions(&self, min_support: usize) -> Vec { + let mut popular = Vec::new(); + for ((kind, members), sts) in &self.groups { + if sts.len() >= min_support { + popular.push((kind.clone(), members.clone(), sts.len())); + } + } + + let mut out = Vec::new(); + for s in &self.sites { + for (kind, mem, sup) in &popular { + if kind != &s.kind { + continue; + } + + let mem_set: BTreeSet<_> = mem.iter().cloned().collect(); + let s_mem_set: BTreeSet<_> = s.members.iter().cloned().collect(); + + let diff_mem_s: BTreeSet<_> = mem_set.difference(&s_mem_set).cloned().collect(); + let diff_s_mem: BTreeSet<_> = s_mem_set.difference(&mem_set).cloned().collect(); + + if diff_mem_s.len() == 1 && diff_s_mem.is_empty() { + if s.members == *mem { + continue; + } + + let l = self.loc(s); + let mut spans = BTreeMap::new(); + spans.insert(l.clone(), s.span); + + out.push(NeglectedCondition { + pattern: mem.clone(), + support: *sup, + missing: diff_mem_s.into_iter().next().unwrap(), + at: l, + spans, + }); + } + } + } + out.sort_by(|a, b| b.support.cmp(&a.support)); + out.dedup_by(|a, b| a.at == b.at && a.pattern == b.pattern); + out + } + + fn loc(&self, s: &DecisionSite) -> String { + format!("{}:{}:{}", s.file, s.function, s.line) + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/mod.rs b/gems/decomplex/rust/src/decomplex/detectors/mod.rs new file mode 100644 index 000000000..c7cb3b359 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/mod.rs @@ -0,0 +1,24 @@ +pub mod co_update; +pub mod decision_pressure; +pub mod derived_state; +pub mod false_simplicity; +pub mod fat_union; +pub mod flay_similarity; +pub mod function_lcom; +pub mod implicit_control_flow; +pub mod inconsistent_rename_clone; +pub mod local_flow; +pub mod locality_drag; +pub mod miner; +pub mod operational_discontinuity; +pub mod oversized_predicate; +pub mod path_condition; +pub mod predicate_alias; +pub mod redundant_nil_guard; +pub mod semantic_alias; +pub mod sequence_mine; +pub mod state_branch_density; +pub mod state_mesh; +pub mod structural_topology; +pub mod temporal_ordering_pressure; +pub mod weighted_inlined_cognitive_complexity; diff --git a/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs new file mode 100644 index 000000000..002045b0d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/operational_discontinuity.rs @@ -0,0 +1,252 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::local_flow; +use crate::decomplex::syntax::{Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct OperationalDiscontinuityRow { + pub file: String, + pub defn: String, + pub owner: String, + pub method: String, + pub line: usize, + pub at: String, + pub score: isize, + pub resets: usize, + pub dead_total: usize, + pub new_total: usize, + pub reset_points: Vec, + pub confidence: String, + pub confidence_reasons: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ResetPoint { + pub line: usize, + pub kind: String, + pub text: String, + pub before_statement: usize, + pub after_statement: usize, + pub dead: Vec, + pub new: Vec, + pub continuing: Vec, +} + +struct RangeInfo { + first: usize, + last: usize, +} + +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { + let summaries = local_flow::scan_files(files, language)?; + Ok(scan_summaries(summaries)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_summaries(local_flow::scan_documents(documents)) +} + +pub fn scan_summaries( + summaries: Vec, +) -> Vec { + let detector = OperationalDiscontinuity::new(summaries); + detector.findings() +} + +struct OperationalDiscontinuity { + summaries: Vec, + min_dead: usize, + min_new: usize, + max_continuing: usize, + min_score: isize, +} + +impl OperationalDiscontinuity { + fn new(summaries: Vec) -> Self { + Self { + summaries, + min_dead: 2, + min_new: 2, + max_continuing: 1, + min_score: 12, + } + } + + fn findings(&self) -> Vec { + let mut out: Vec<_> = self + .summaries + .iter() + .filter_map(|s| self.finding_for(s)) + .collect(); + out.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.line.cmp(&b.line)) + }); + out + } + + fn finding_for( + &self, + summary: &local_flow::MethodSummary, + ) -> Option { + if summary.boundaries.is_empty() { + return None; + } + + let ranges = self.variable_ranges(summary); + let resets: Vec<_> = summary + .boundaries + .iter() + .filter_map(|b| self.reset_at(b, &ranges)) + .collect(); + if resets.is_empty() { + return None; + } + + let score = resets + .iter() + .map(|r| r.dead.len() as isize + r.new.len() as isize - r.continuing.len() as isize) + .sum::() + + (resets.len() as isize * 8); + if score < self.min_score { + return None; + } + + let confidence_reasons = self.confidence_reasons_for(&summary.name, score, &resets); + let at = format!("{}:{}:{}", summary.file, summary.name, summary.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), summary.span); + + Some(OperationalDiscontinuityRow { + file: summary.file.clone(), + defn: summary.name.clone(), + owner: summary.owner.clone(), + method: summary.name.clone(), + line: summary.line, + at, + score, + resets: resets.len(), + dead_total: resets.iter().map(|r| r.dead.len()).sum(), + new_total: resets.iter().map(|r| r.new.len()).sum(), + reset_points: resets, + confidence: if confidence_reasons.is_empty() { + "review".to_string() + } else { + "high".to_string() + }, + confidence_reasons, + spans, + }) + } + + fn confidence_reasons_for( + &self, + method_name: &str, + score: isize, + resets: &[ResetPoint], + ) -> Vec { + let explicit_phase = resets.iter().any(|r| self.phase_marker(r)); + let mut reasons = Vec::new(); + if resets.len() >= 2 { + reasons.push("repeated_resets".to_string()); + } + if explicit_phase { + reasons.push("explicit_phase_marker".to_string()); + } + if score >= 20 { + reasons.push("high_score".to_string()); + } + + if self.grammar_method(method_name) && !explicit_phase { + reasons.retain(|r| r != "repeated_resets" && r != "high_score"); + } + reasons + } + + fn phase_marker(&self, reset: &ResetPoint) -> bool { + let re = + regex::Regex::new(r"(?i)^(?:#|//|--)\s*(?:\d+[a-z]?\s*[.)]|(?:phase|step|stage)\b)") + .unwrap(); + re.is_match(&reset.text) + } + + fn grammar_method(&self, method_name: &str) -> bool { + let re = regex::Regex::new(r"^parse(?:_|$)").unwrap(); + re.is_match(method_name) + } + + fn reset_at( + &self, + boundary: &local_flow::Boundary, + ranges: &BTreeMap, + ) -> Option { + let before = boundary.before_index; + let after = boundary.after_index; + + let mut dead = Vec::new(); + let mut continuing = Vec::new(); + let mut new_vars = Vec::new(); + + for (name, range) in ranges { + if range.first <= before { + if range.last <= before { + dead.push(name.clone()); + } + if range.last >= after { + continuing.push(name.clone()); + } + } + if range.first >= after { + new_vars.push(name.clone()); + } + } + + if dead.len() < self.min_dead + || new_vars.len() < self.min_new + || continuing.len() > self.max_continuing + { + return None; + } + + dead.sort(); + new_vars.sort(); + continuing.sort(); + + Some(ResetPoint { + line: boundary.line, + kind: boundary.kind.clone(), + text: boundary.text.clone(), + before_statement: before, + after_statement: after, + dead, + new: new_vars, + continuing, + }) + } + + fn variable_ranges(&self, summary: &local_flow::MethodSummary) -> BTreeMap { + let mut ranges = BTreeMap::new(); + for statement in &summary.statements { + let touched: BTreeSet<_> = statement.reads.union(&statement.writes).cloned().collect(); + for name in touched { + ranges + .entry(name) + .and_modify(|r: &mut RangeInfo| r.last = statement.index) + .or_insert(RangeInfo { + first: statement.index, + last: statement.index, + }); + } + } + ranges + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs new file mode 100644 index 000000000..b8a3a66d6 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/oversized_predicate.rs @@ -0,0 +1,89 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct OversizedPredicateRow { + pub at: String, + pub count: usize, + pub predicate: String, + pub atoms: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Serialize)] +pub struct ResultReport { + pub findings: Vec, +} + +const LIMIT: usize = 3; +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> ResultReport { + let mut findings = Vec::new(); + for document in documents { + let scanner = OversizedPredicate::new(LIMIT); + for site in &document.decision_sites { + if let Some(finding) = scanner.finding_for_site(site) { + findings.push(finding); + } + } + } + ResultReport { findings } +} + +struct OversizedPredicate { + limit: usize, +} + +impl OversizedPredicate { + fn new(limit: usize) -> Self { + Self { limit } + } + + fn finding_for_site( + &self, + site: &crate::decomplex::syntax::DecisionSite, + ) -> Option { + if self.predicate_helper(&site.function) { + return None; + } + let atoms_text = self.condition_atoms(&site.predicate); + if atoms_text.len() <= self.limit { + return None; + } + + let at = format!("{}:{}:{}", site.file, site.function, site.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), site.enclosing_span); + + Some(OversizedPredicateRow { + at, + count: atoms_text.len(), + predicate: site.predicate.clone(), + atoms: atoms_text, + spans, + }) + } + + fn condition_atoms(&self, predicate: &str) -> Vec { + predicate + .split("&&") + .flat_map(|part| part.split("||")) + .flat_map(|part| part.split(" and ")) + .flat_map(|part| part.split(" or ")) + .map(|atom| atom.replace(['(', ')'], "").trim().to_string()) + .filter(|atom| !atom.is_empty()) + .collect() + } + + fn predicate_helper(&self, name: &str) -> bool { + name.ends_with('?') + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs new file mode 100644 index 000000000..7ca69d04a --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/path_condition.rs @@ -0,0 +1,3 @@ +pub use crate::decomplex::syntax::path_condition::{ + scan_documents, scan_files, NeglectedPathCondition, PathConditionReport, ScatteredPathCondition, +}; diff --git a/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs new file mode 100644 index 000000000..838a07eac --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/predicate_alias.rs @@ -0,0 +1,107 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct PredicateAliasReport { + pub alias_clusters: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct AliasCluster { + pub body: String, + pub names: Vec, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct Pred { + name: String, + body: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> PredicateAliasReport { + let mut preds = Vec::new(); + for document in documents { + preds.extend(document.predicate_aliases.iter().map(|predicate| Pred { + name: predicate.name.clone(), + body: predicate.body.clone(), + file: predicate.file.clone(), + defn: predicate.defn.clone(), + line: predicate.line, + span: predicate.span, + })); + } + Report::new(preds).findings() +} + +struct Report { + preds: Vec, +} + +impl Report { + fn new(preds: Vec) -> Self { + Self { preds } + } + + fn findings(&self) -> PredicateAliasReport { + PredicateAliasReport { + alias_clusters: self.alias_clusters(), + } + } + + fn alias_clusters(&self) -> Vec { + let mut keys = Vec::new(); + let mut by_body: BTreeMap> = BTreeMap::new(); + for p in &self.preds { + if !by_body.contains_key(&p.body) { + keys.push(p.body.clone()); + } + by_body.entry(p.body.clone()).or_default().push(p); + } + + let mut out = Vec::new(); + for body in keys { + let ps = by_body.remove(&body).unwrap(); + let mut names = Vec::new(); + for p in &ps { + if !names.contains(&p.name) { + names.push(p.name.clone()); + } + } + if names.len() < 2 { + continue; + } + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for p in &ps { + let loc = format!("{}:{}:{}", p.file, p.name, p.line); + sites.push(loc.clone()); + spans.insert(loc, p.span); + } + + out.push(AliasCluster { + body, + names, + sites, + spans, + }); + } + out.sort_by(|a, b| b.names.len().cmp(&a.names.len())); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs new file mode 100644 index 000000000..e0d4a77f2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/redundant_nil_guard.rs @@ -0,0 +1,3 @@ +pub use crate::decomplex::syntax::redundant_nil_guard::{ + scan_documents, scan_files, RedundantNilGuardRow, +}; diff --git a/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs new file mode 100644 index 000000000..443ff0811 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/semantic_alias.rs @@ -0,0 +1,198 @@ +use crate::decomplex::ast::{self, Span}; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct SemanticAliasReport { + pub alias_clusters: Vec, + pub reification_misses: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct SemanticAliasCluster { + pub canon: String, + pub names: Vec, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ReificationMiss { + pub predicate: String, + pub canon: String, + pub at: String, + pub spans: BTreeMap, + pub raw: String, +} + +#[derive(Clone, Debug)] +struct Pred { + name: String, + canon: String, + file: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct Use { + canon: String, + file: String, + defn: String, + line: usize, + raw: String, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> SemanticAliasReport { + let mut preds = Vec::new(); + let mut uses = Vec::new(); + for document in documents { + for predicate in &document.predicate_aliases { + if !semantic_predicate_definition(&predicate.name, &predicate.body) { + continue; + } + preds.push(Pred { + name: predicate.name.clone(), + canon: canon(&predicate.body), + file: predicate.file.clone(), + line: predicate.line, + span: predicate.span, + }); + } + uses.extend(document.comparison_uses.iter().map(|comparison| Use { + canon: canon(&comparison.raw), + file: comparison.file.clone(), + defn: comparison.function.clone(), + line: comparison.line, + raw: comparison.raw.clone(), + span: comparison.span, + })); + } + Report::new(preds, uses).findings() +} + +fn canon(text: &str) -> String { + let (mut t, _) = ast::canon_polarity(text); + t = t.strip_prefix("self.").unwrap_or(&t).to_string(); + t = t.strip_prefix('@').unwrap_or(&t).to_string(); + + // Ruby: t = t.sub(/\A[A-Za-z_]\w*(?:\([^)]*\))?\.(?=[A-Za-z_]\w*\s*(==|!=|\.))/, "") + let re = + regex::Regex::new(r"^[A-Za-z_]\w*(?:\([^)]*\))?\.(?P[A-Za-z_]\w*\s*(?:==|!=|\.))") + .unwrap(); + t = re.replace(&t, "$rest").to_string(); + + t.split_whitespace().collect::>().join(" ") +} + +fn semantic_predicate_definition(name: &str, body: &str) -> bool { + name.ends_with('?') + || body.contains("==") + || body.contains("!=") + || body.contains("&&") + || body.contains("||") + || body.contains(" and ") + || body.contains(" or ") +} + +struct Report { + preds: Vec, + uses: Vec, +} + +impl Report { + fn new(preds: Vec, uses: Vec) -> Self { + Self { preds, uses } + } + + fn findings(&self) -> SemanticAliasReport { + SemanticAliasReport { + alias_clusters: self.alias_clusters(), + reification_misses: self.reification_misses(), + } + } + + fn alias_clusters(&self) -> Vec { + let mut keys = Vec::new(); + let mut by_canon: BTreeMap> = BTreeMap::new(); + for p in &self.preds { + if !by_canon.contains_key(&p.canon) { + keys.push(p.canon.clone()); + } + by_canon.entry(p.canon.clone()).or_default().push(p); + } + + let mut out = Vec::new(); + for c in keys { + let ps = by_canon.remove(&c).unwrap(); + let mut names = Vec::new(); + for p in &ps { + if !names.contains(&p.name) { + names.push(p.name.clone()); + } + } + if names.len() < 2 { + continue; + } + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for p in &ps { + let loc = format!("{}:{}:{}", p.file, p.name, p.line); + sites.push(loc.clone()); + spans.insert(loc, p.span); + } + + out.push(SemanticAliasCluster { + canon: c, + names, + sites, + spans, + }); + } + out.sort_by(|a, b| b.names.len().cmp(&a.names.len())); + out + } + + fn reification_misses(&self) -> Vec { + let mut by_canon: BTreeMap> = BTreeMap::new(); + for p in &self.preds { + by_canon.entry(p.canon.clone()).or_default().push(p); + } + + let mut out = Vec::new(); + for u in &self.uses { + if let Some(ps) = by_canon.get(&u.canon) { + if ps.is_empty() { + continue; + } + if ps.iter().any(|p| p.name == u.defn) { + continue; + } + + let loc = format!("{}:{}:{}", u.file, u.defn, u.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), u.span); + + out.push(ReificationMiss { + predicate: ps[0].name.clone(), + canon: u.canon.clone(), + at: loc, + spans, + raw: u.raw.clone(), + }); + } + } + out.sort_by(|a, b| a.predicate.cmp(&b.predicate)); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs new file mode 100644 index 000000000..306ca805f --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/sequence_mine.rs @@ -0,0 +1,419 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, CallSite, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct BrokenProtocolReport { + pub broken: Vec, +} + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct BrokenProtocol { + pub pair: Vec, + pub support: usize, + pub confidence: f64, + pub has: String, + pub missing: String, + pub at: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Call { + mid: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> BrokenProtocolReport { + let mut calls = Vec::new(); + for document in documents { + for call in &document.call_sites { + let mid = call.message.to_string(); + for nested_mid in nested_protocol_events(call, document) { + calls.push(Call { + mid: nested_mid, + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, + }); + } + + if protocol_event(call, &mid) { + calls.push(Call { + mid, + file: call.file.clone(), + defn: call.function.clone(), + line: call.line, + span: call.span, + }); + } + } + } + Report::new(calls).findings() +} + +const DECLARATIVE_MIDS: &[&str] = &[ + "abstract!", + "alias_method", + "any", + "attr_accessor", + "attr_reader", + "attr_writer", + "bind", + "cast", + "checked", + "enum", + "extend", + "final", + "include", + "interface!", + "let", + "must", + "must_because", + "nilable", + "override", + "overridable", + "params", + "prepend", + "private", + "private_class_method", + "public", + "require", + "require_relative", + "requires_ancestor", + "sealed!", + "sig", + "type_member", + "type_template", + "untyped", + "unsafe", + "void", +]; +const TEST_DSL_MIDS: &[&str] = &[ + "a_kind_of", + "after", + "around", + "before", + "be", + "be_a", + "be_an", + "be_empty", + "be_falsey", + "be_nil", + "be_truthy", + "change", + "contain_exactly", + "context", + "describe", + "eq", + "eql", + "equal", + "expect", + "have_attributes", + "have_key", + "have_received", + "it", + "match", + "not_to", + "raise_error", + "receive", + "subject", + "to", +]; +const ZERO_ARG_ACTION_MIDS: &[&str] = &[ + "acquire", + "begin", + "close", + "commit", + "connect", + "deinit", + "disconnect", + "drain", + "finish", + "flush", + "lock", + "open", + "release", + "rollback", + "start", + "stop", + "unlock", + "wait", +]; +const ZERO_ARG_ACTION_PREFIXES: &[&str] = &[ + "analyze", + "append", + "apply", + "build", + "call", + "check", + "classify", + "collect", + "compile", + "compute", + "consume", + "create", + "declare", + "emit", + "enforce", + "finalize", + "find", + "flush", + "handle", + "initialize", + "lower", + "mark", + "normalize", + "parse", + "perform", + "process", + "push", + "record", + "register", + "render", + "resolve", + "rewrite", + "run", + "scan", + "set", + "stamp", + "sync", + "transform", + "validate", + "verify", + "visit", + "walk", + "warn", + "write", +]; + +fn protocol_event(call: &CallSite, mid: &str) -> bool { + !ignored_mid(mid) && !passive_reader_call(call, mid) +} + +fn passive_reader_call(call: &CallSite, mid: &str) -> bool { + if zero_arg_action_name(mid) { + return false; + } + + call.arguments.is_empty() +} + +fn nested_protocol_events(call: &CallSite, document: &Document) -> Vec { + if !ignored_mid(&call.message) { + return Vec::new(); + } + + let mut candidates = call.arguments.clone(); + candidates.extend( + source_text(&document.lines, call.span) + .split(|ch: char| !(ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric())) + .filter_map(protocol_word), + ); + let mut out = Vec::new(); + for candidate in candidates { + if !out.contains(&candidate) && !ignored_mid(&candidate) && zero_arg_action_name(&candidate) + { + out.push(candidate); + } + } + out +} + +fn protocol_word(text: &str) -> Option { + let word = text.trim(); + if word.is_empty() { + return None; + } + let mut chars = word.chars(); + let first = chars.next()?; + if !(first == '_' || first.is_ascii_lowercase()) { + return None; + } + if !chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric()) { + return None; + } + Some(word.to_string()) +} + +fn source_text(lines: &[String], span: Span) -> String { + let [first_line, first_column, last_line, last_column] = span; + if first_line == 0 || last_line == 0 { + return String::new(); + } + if first_line == last_line { + return lines + .get(first_line - 1) + .and_then(|line| line.get(first_column..last_column)) + .unwrap_or("") + .to_string(); + } + + let mut parts = Vec::new(); + parts.push( + lines + .get(first_line - 1) + .and_then(|line| line.get(first_column..)) + .unwrap_or("") + .to_string(), + ); + for line_index in first_line..last_line.saturating_sub(1) { + if let Some(line) = lines.get(line_index) { + parts.push(line.clone()); + } + } + parts.push( + lines + .get(last_line - 1) + .and_then(|line| line.get(..last_column)) + .unwrap_or("") + .to_string(), + ); + parts.join("") +} + +struct PairSupport { + pair: Vec, + support: usize, + sites: Vec<(String, String)>, +} + +struct Report { + by_unit: Vec<((String, String), Vec)>, + support: BTreeMap, +} + +impl Report { + fn new(calls: Vec) -> Self { + let mut by_unit: Vec<((String, String), Vec)> = Vec::new(); + for call in calls { + let key = (call.file.clone(), call.defn.clone()); + if let Some((_, unit_calls)) = by_unit.iter_mut().find(|(existing, _)| existing == &key) + { + unit_calls.push(call); + } else { + by_unit.push((key, vec![call])); + } + } + + let mut support = BTreeMap::new(); + for (_, calls) in &by_unit { + for mid in unique_mids(calls) { + *support.entry(mid).or_insert(0) += 1; + } + } + + Self { by_unit, support } + } + + fn findings(&self) -> BrokenProtocolReport { + BrokenProtocolReport { + broken: self.broken_protocol(4, 0.75), + } + } + + fn broken_protocol(&self, min_support: usize, min_confidence: f64) -> Vec { + let pairs = self.co_called_pairs(min_support); + let mut out = Vec::new(); + for ((file, defn), calls) in &self.by_unit { + let mids = unique_mids(calls); + for pair in &pairs { + let (has, missing) = + if mids.contains(&pair.pair[0]) && !mids.contains(&pair.pair[1]) { + (pair.pair[0].clone(), pair.pair[1].clone()) + } else if mids.contains(&pair.pair[1]) && !mids.contains(&pair.pair[0]) { + (pair.pair[1].clone(), pair.pair[0].clone()) + } else { + continue; + }; + let denominator = *self.support.get(&has).unwrap_or(&0); + if denominator == 0 { + continue; + } + let confidence = pair.support as f64 / denominator as f64; + if confidence < min_confidence { + continue; + } + let Some(has_call) = calls.iter().find(|call| call.mid == has) else { + continue; + }; + let loc = format!("{}:{}:{}", file, defn, has_call.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), has_call.span); + out.push(BrokenProtocol { + pair: pair.pair.clone(), + support: pair.support, + confidence: (confidence * 100.0).round() / 100.0, + has, + missing, + at: loc, + spans, + }); + } + } + out.sort_by(|a, b| { + b.confidence + .partial_cmp(&a.confidence) + .unwrap() + .then_with(|| b.support.cmp(&a.support)) + }); + out + } + + fn co_called_pairs(&self, min_support: usize) -> Vec { + let mut counts: Vec = Vec::new(); + for (unit, calls) in &self.by_unit { + let mids = unique_mids(calls); + for i in 0..mids.len() { + for j in i + 1..mids.len() { + let pair = vec![mids[i].clone(), mids[j].clone()]; + if let Some(existing) = counts.iter_mut().find(|row| row.pair == pair) { + existing.support += 1; + existing.sites.push(unit.clone()); + } else { + counts.push(PairSupport { + pair, + support: 1, + sites: vec![unit.clone()], + }); + } + } + } + } + let mut out: Vec<_> = counts + .into_iter() + .filter(|row| row.support >= min_support) + .collect(); + out.sort_by(|a, b| b.support.cmp(&a.support)); + out + } +} + +fn ignored_mid(mid: &str) -> bool { + DECLARATIVE_MIDS.contains(&mid) || TEST_DSL_MIDS.contains(&mid) +} + +fn zero_arg_action_name(mid: &str) -> bool { + ZERO_ARG_ACTION_MIDS.contains(&mid) + || mid.ends_with('!') + || ZERO_ARG_ACTION_PREFIXES + .iter() + .any(|prefix| mid == *prefix || mid.starts_with(&format!("{prefix}_"))) +} + +fn unique_mids(calls: &[Call]) -> Vec { + let set: BTreeSet<_> = calls.iter().map(|call| call.mid.clone()).collect(); + set.into_iter().collect() +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs new file mode 100644 index 000000000..cd37e51ba --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/state_branch_density.rs @@ -0,0 +1,158 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct StateBranchDensityRow { + pub at: String, + pub file: String, + pub method: String, + pub decisions: usize, + pub state_refs: Vec, + pub predicate: String, + pub score: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Debug, Clone)] +struct Decision { + file: String, + defn: String, + line: usize, + span: Span, + predicate: String, + state_refs: Vec, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let all_decisions = documents + .iter() + .flat_map(decisions_from_mined_facts) + .collect::>(); + Report::new(all_decisions).findings() +} + +fn decisions_from_mined_facts(document: &Document) -> Vec { + filter_wrapper_decisions( + document + .branch_decisions + .iter() + .map(|decision| Decision { + file: decision.file.clone(), + defn: decision.function.clone(), + line: decision.line, + span: decision.span, + predicate: decision.predicate.clone(), + state_refs: decision.state_refs.clone(), + }) + .collect(), + ) +} + +fn filter_wrapper_decisions(decisions: Vec) -> Vec { + decisions + .iter() + .filter(|decision| { + !(wrapper_predicate(&decision.predicate) && nested_state_decision(decision, &decisions)) + }) + .cloned() + .collect() +} + +fn wrapper_predicate(predicate: &str) -> bool { + ["if", "unless", "while", "until"].iter().any(|prefix| { + predicate == *prefix + || predicate + .strip_prefix(prefix) + .map(|rest| rest.starts_with(char::is_whitespace)) + .unwrap_or(false) + }) +} + +fn nested_state_decision(decision: &Decision, decisions: &[Decision]) -> bool { + decisions.iter().any(|candidate| { + !std::ptr::eq(candidate, decision) + && candidate.defn == decision.defn + && span_encloses(decision.span, candidate.span) + && candidate + .state_refs + .iter() + .all(|state_ref| decision.state_refs.contains(state_ref)) + }) +} + +fn span_encloses(outer: Span, inner: Span) -> bool { + let starts_before_or_at = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]); + let ends_after_or_at = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]); + starts_before_or_at && ends_after_or_at +} + +struct Report { + decisions: Vec, +} + +impl Report { + fn new(decisions: Vec) -> Self { + Self { decisions } + } + + fn findings(&self) -> Vec { + let mut groups: BTreeMap<(String, String), Vec> = BTreeMap::new(); + for d in &self.decisions { + groups + .entry((d.file.clone(), d.defn.clone())) + .or_default() + .push(d.clone()); + } + + let mut rows = Vec::new(); + for ((file, defn), ds) in groups { + let mut refs = BTreeSet::new(); + for d in &ds { + for r in &d.state_refs { + refs.insert(r.clone()); + } + } + let refs: Vec<_> = refs.into_iter().collect(); + let score = ds.len() * refs.len().max(1); + + let mut sites = Vec::new(); + let mut spans = BTreeMap::new(); + for d in &ds { + let loc = format!("{}:{}:{}", d.file, d.defn, d.line); + sites.push(loc.clone()); + spans.insert(loc, d.span); + } + + rows.push(StateBranchDensityRow { + at: format!("{}:{}:{}", file, defn, ds.first().unwrap().line), + file, + method: defn, + decisions: ds.len(), + state_refs: refs, + predicate: ds.first().unwrap().predicate.clone(), + score, + sites, + spans, + }); + } + + rows.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| b.decisions.cmp(&a.decisions)) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.method.cmp(&b.method)) + }); + rows + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs new file mode 100644 index 000000000..5cf1465d5 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/state_mesh.rs @@ -0,0 +1,591 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::semantic_alias; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug, Serialize)] +pub struct StateMeshReport { + pub state_mesh: StateMeshMeta, + pub fields: BTreeMap, + pub hierarchy: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct StateMeshMeta { + pub total_fields: usize, + pub total_writes: usize, + pub total_reads: usize, + pub total_re_derivations: usize, + pub min_writes: usize, + pub custom_fields: Option>, +} + +#[derive(Clone, Debug, Serialize)] +pub struct StateFieldRow { + pub messiness: f64, + pub rank: usize, + pub metrics: FieldMetricsRow, + pub writers: Vec, + pub readers: Vec, + pub re_derivations: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct FieldMetricsRow { + pub writes: usize, + pub reads: usize, + pub re_derivations: usize, + pub scatter: usize, + pub write_scatter: usize, + pub read_scatter: usize, + pub receiver_types: usize, + pub fix_churn: f64, + pub pressure: usize, + pub percentiles: BTreeMap, +} + +#[derive(Clone, Debug, Serialize)] +pub struct SiteInfo { + pub file: String, + pub defn: String, + pub line: usize, + pub recv: String, + pub span: Span, +} + +#[derive(Clone, Debug, Serialize)] +pub struct ReDerivationInfo { + pub file: String, + pub defn: String, + pub line: usize, + pub raw: String, + pub predicate: String, + pub canon: String, +} + +#[derive(Clone, Debug, Serialize)] +pub struct DirObj { + pub name: String, + pub writers: usize, + pub readers: usize, + pub files: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct FileObj { + pub name: String, + pub writers: usize, + pub readers: usize, + pub defns: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct DefnObj { + pub name: String, + pub writers: usize, + pub readers: usize, + pub fields: DefnFields, +} + +#[derive(Clone, Debug, Serialize)] +pub struct DefnFields { + pub written: Vec, + pub read: Vec, +} + +#[derive(Clone, Debug)] +struct Write { + #[allow(dead_code)] + attr: String, + norm: String, + recv: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct Read { + #[allow(dead_code)] + attr: String, + norm: String, + recv: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +#[derive(Clone, Debug)] +struct ReDerivation { + field: String, + file: String, + defn: String, + line: usize, + raw: String, + predicate: String, + canon: String, +} + +struct FieldMetrics { + name: String, + writes: usize, + reads: usize, + re_derivations: usize, + scatter: usize, + write_scatter: usize, + read_scatter: usize, + receiver_types: usize, + messiness: f64, + pressure: usize, + percentiles: BTreeMap, + rank: usize, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> StateMeshReport { + let semantic_aliases = semantic_alias::scan_documents(documents); + scan_documents_with_semantic_aliases(documents, &semantic_aliases) +} + +pub fn scan_documents_with_semantic_aliases( + documents: &[Document], + semantic_aliases: &semantic_alias::SemanticAliasReport, +) -> StateMeshReport { + scan_documents_with_semantic_aliases_and_min_writes(documents, semantic_aliases, 2) +} + +pub fn scan_documents_with_semantic_aliases_and_min_writes( + documents: &[Document], + semantic_aliases: &semantic_alias::SemanticAliasReport, + min_writes: usize, +) -> StateMeshReport { + let mut sm = StateMesh::new(min_writes); + sm.load_document_facts(documents); + sm.find_re_derivations(semantic_aliases); + sm.to_json_graph() +} + +struct StateMesh { + min_writes: usize, + custom_fields: Option>, + writes: Vec, + reads: Vec, + re_derivations: Vec, +} + +impl StateMesh { + fn new(min_writes: usize) -> Self { + Self { + min_writes, + custom_fields: None, + writes: Vec::new(), + reads: Vec::new(), + re_derivations: Vec::new(), + } + } + + fn load_document_facts(&mut self, documents: &[Document]) { + for document in documents { + for write in &document.state_writes { + let norm = self.normalize(&write.field); + self.writes.push(Write { + attr: write.field.clone(), + norm, + recv: write.receiver.clone(), + file: write.file.clone(), + defn: write.function.clone(), + line: write.line, + span: write.span, + }); + } + } + + let field_norms = self.known_field_norms(); + if field_norms.is_empty() { + return; + } + + for document in documents { + for read in &document.state_reads { + let norm = self.normalize(&read.field); + if !field_norms.contains(&norm) { + continue; + } + let candidate = Read { + attr: read.field.clone(), + norm, + recv: read.receiver.clone(), + file: read.file.clone(), + defn: read.function.clone(), + line: read.line, + span: read.span, + }; + if !self.write_target_read(&candidate) { + self.reads.push(candidate); + } + } + } + } + + fn write_target_read(&self, read: &Read) -> bool { + self.writes.iter().any(|write| { + write.file == read.file + && write.defn == read.defn + && write.recv == read.recv + && (write.attr == read.attr || write.norm == read.norm) + && write.line == read.line + && write.span[0] == read.span[0] + && write.span[1] == read.span[1] + }) + } + + fn find_re_derivations(&mut self, semantic_aliases: &semantic_alias::SemanticAliasReport) { + let field_norms = self.known_field_norms(); + if field_norms.is_empty() { + return; + } + + for m in &semantic_aliases.reification_misses { + let loc = m.at.clone(); + let parts: Vec<&str> = loc.split(':').collect(); + if parts.len() < 3 { + continue; + } + let line = parts.last().unwrap().parse::().unwrap_or(0); + let defn = parts[parts.len() - 2].to_string(); + let file = parts[..parts.len() - 2].join(":"); + + if let Some(matched) = field_norms + .iter() + .find(|fnorm| m.raw.contains(*fnorm) || m.canon.contains(*fnorm)) + { + self.re_derivations.push(ReDerivation { + field: matched.clone(), + file, + defn, + line, + raw: m.raw.clone(), + predicate: m.predicate.clone(), + canon: m.canon.clone(), + }); + } + } + } + + fn metrics(&self) -> Vec { + let field_norms = self.known_field_norms(); + let mut metrics_vec = Vec::new(); + + for fnorm in &field_norms { + let ws: Vec<_> = self.writes.iter().filter(|w| &w.norm == fnorm).collect(); + let rs: Vec<_> = self.reads.iter().filter(|r| &r.norm == fnorm).collect(); + let ds: Vec<_> = self + .re_derivations + .iter() + .filter(|d| &d.field == fnorm) + .collect(); + + let mut all_sites = BTreeSet::new(); + for w in &ws { + all_sites.insert((w.file.clone(), w.defn.clone())); + } + for r in &rs { + all_sites.insert((r.file.clone(), r.defn.clone())); + } + for d in &ds { + all_sites.insert((d.file.clone(), d.defn.clone())); + } + let scatter = all_sites.len(); + + let mut write_sites = BTreeSet::new(); + for w in &ws { + write_sites.insert((w.file.clone(), w.defn.clone())); + } + let write_scatter = write_sites.len(); + + let mut read_sites = BTreeSet::new(); + for r in &rs { + read_sites.insert((r.file.clone(), r.defn.clone())); + } + let read_scatter = read_sites.len(); + + let mut receivers = BTreeSet::new(); + for w in &ws { + receivers.insert(w.recv.clone()); + } + for r in &rs { + receivers.insert(r.recv.clone()); + } + let receiver_types = receivers.len(); + + let n_writes = ws.len(); + let n_reads = rs.len(); + let n_reder = ds.len(); + let fix_churn = 1.0; + let messiness = (n_writes + n_reads + n_reder) as f64 * scatter as f64 * fix_churn; + let pressure = read_scatter; + + metrics_vec.push(FieldMetrics { + name: fnorm.clone(), + writes: n_writes, + reads: n_reads, + re_derivations: n_reder, + scatter, + write_scatter, + read_scatter, + receiver_types, + messiness, + pressure, + percentiles: BTreeMap::new(), + rank: 0, + }); + } + + metrics_vec.sort_by(|a, b| { + b.messiness + .partial_cmp(&a.messiness) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| a.name.cmp(&b.name)) + }); + for (i, m) in metrics_vec.iter_mut().enumerate() { + m.rank = i + 1; + } + + let total = metrics_vec.len(); + if total > 1 { + let attrs = [ + "writes", + "reads", + "re_derivations", + "scatter", + "messiness", + "pressure", + ]; + for attr in &attrs { + let mut vals: Vec = metrics_vec + .iter() + .map(|m| match *attr { + "writes" => m.writes as f64, + "reads" => m.reads as f64, + "re_derivations" => m.re_derivations as f64, + "scatter" => m.scatter as f64, + "messiness" => m.messiness, + "pressure" => m.pressure as f64, + _ => 0.0, + }) + .collect(); + vals.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + for m in metrics_vec.iter_mut() { + let v = match *attr { + "writes" => m.writes as f64, + "reads" => m.reads as f64, + "re_derivations" => m.re_derivations as f64, + "scatter" => m.scatter as f64, + "messiness" => m.messiness, + "pressure" => m.pressure as f64, + _ => 0.0, + }; + let pctl = vals.iter().filter(|&&x| x <= v).count() * 100 / total; + m.percentiles.insert(attr.to_string(), pctl); + } + } + } + + metrics_vec + } + + fn to_json_graph(&self) -> StateMeshReport { + let fm = self.metrics(); + let fm_index: BTreeMap = + fm.iter().map(|m| (m.name.clone(), m)).collect(); + let field_norms = self.known_field_norms(); + + let mut fields_obj = BTreeMap::new(); + for fnorm in &field_norms { + let m = fm_index.get(fnorm).unwrap(); + let ws: Vec<_> = self + .writes + .iter() + .filter(|w| &w.norm == fnorm) + .map(|w| SiteInfo { + file: w.file.clone(), + defn: w.defn.clone(), + line: w.line, + recv: w.recv.clone(), + span: w.span, + }) + .collect(); + let rs: Vec<_> = self + .reads + .iter() + .filter(|r| &r.norm == fnorm) + .map(|r| SiteInfo { + file: r.file.clone(), + defn: r.defn.clone(), + line: r.line, + recv: r.recv.clone(), + span: r.span, + }) + .collect(); + let ds: Vec<_> = self + .re_derivations + .iter() + .filter(|d| &d.field == fnorm) + .map(|d| ReDerivationInfo { + file: d.file.clone(), + defn: d.defn.clone(), + line: d.line, + raw: d.raw.clone(), + predicate: d.predicate.clone(), + canon: d.canon.clone(), + }) + .collect(); + + fields_obj.insert( + fnorm.clone(), + StateFieldRow { + messiness: m.messiness, + rank: m.rank, + metrics: FieldMetricsRow { + writes: m.writes, + reads: m.reads, + re_derivations: m.re_derivations, + scatter: m.scatter, + write_scatter: m.write_scatter, + read_scatter: m.read_scatter, + receiver_types: m.receiver_types, + fix_churn: 1.0, + pressure: m.pressure, + percentiles: m.percentiles.clone(), + }, + writers: ws, + readers: rs, + re_derivations: ds, + }, + ); + } + + let mut all_unit_sites: BTreeMap<(String, String), (BTreeSet, BTreeSet)> = + BTreeMap::new(); + for w in &self.writes { + let entry = all_unit_sites + .entry((w.file.clone(), w.defn.clone())) + .or_default(); + entry.0.insert(w.norm.clone()); + } + for r in &self.reads { + let entry = all_unit_sites + .entry((r.file.clone(), r.defn.clone())) + .or_default(); + entry.1.insert(r.norm.clone()); + } + + let mut dirs: BTreeMap>> = + BTreeMap::new(); + for ((file, defn), (ws, rs)) in all_unit_sites { + let path = Path::new(&file); + let dir = path + .parent() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|| ".".to_string()); + let dir = if dir.is_empty() { ".".to_string() } else { dir }; + let base = path + .file_name() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| file.clone()); + + dirs.entry(dir) + .or_default() + .entry(base) + .or_default() + .insert( + defn.clone(), + DefnObj { + name: defn, + writers: ws.len(), + readers: rs.len(), + fields: DefnFields { + written: ws.into_iter().collect(), + read: rs.into_iter().collect(), + }, + }, + ); + } + + let mut hierarchy = Vec::new(); + for (dname, files_map) in dirs { + let mut dir_writers = 0; + let mut dir_readers = 0; + let mut file_objs = Vec::new(); + for (fname, defns_map) in files_map { + let mut file_writers = 0; + let mut file_readers = 0; + let mut defn_objs: Vec = defns_map.into_iter().map(|(_, v)| v).collect(); + defn_objs.sort_by(|a, b| a.name.cmp(&b.name)); + for d in &defn_objs { + file_writers += d.writers; + file_readers += d.readers; + } + dir_writers += file_writers; + dir_readers += file_readers; + file_objs.push(FileObj { + name: fname, + writers: file_writers, + readers: file_readers, + defns: defn_objs, + }); + } + file_objs.sort_by(|a, b| a.name.cmp(&b.name)); + hierarchy.push(DirObj { + name: dname, + writers: dir_writers, + readers: dir_readers, + files: file_objs, + }); + } + hierarchy.sort_by(|a, b| a.name.cmp(&b.name)); + + StateMeshReport { + state_mesh: StateMeshMeta { + total_fields: field_norms.len(), + total_writes: self.writes.len(), + total_reads: self.reads.len(), + total_re_derivations: self.re_derivations.len(), + min_writes: self.min_writes, + custom_fields: self.custom_fields.clone(), + }, + fields: fields_obj, + hierarchy, + } + } + + fn normalize(&self, attr: &str) -> String { + attr.trim_start_matches('@').to_string() + } + + fn known_field_norms(&self) -> BTreeSet { + let mut discovered = BTreeMap::new(); + for w in &self.writes { + *discovered.entry(w.norm.clone()).or_insert(0) += 1; + } + let mut norms: BTreeSet = discovered + .into_iter() + .filter(|(_, count)| *count >= self.min_writes) + .map(|(name, _)| name) + .collect(); + if let Some(custom) = &self.custom_fields { + norms.extend(custom.clone()); + } + norms + } +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs new file mode 100644 index 000000000..12321f5c6 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/structural_topology.rs @@ -0,0 +1,278 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, CallSite, Document, FunctionDef, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug, Serialize)] +pub struct StructuralTopologyReport { + pub methods: Vec, + pub edges: Vec, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Method { + pub id: String, + pub owner: String, + pub name: String, + pub file: String, + pub line: usize, + pub span: Span, + pub visibility: String, +} + +#[derive(Clone, Debug, Serialize)] +pub struct Edge { + pub caller: String, + pub callee: String, + pub caller_name: String, + pub callee_name: String, + pub file: String, + pub line: usize, + pub span: Span, + pub r#type: String, + pub kind: String, + pub confidence: String, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> StructuralTopologyReport { + let mut methods = Vec::new(); + for document in documents { + methods.extend(methods_for_document(document)); + } + + let method_by_id = methods + .iter() + .map(|method| (method.id.clone(), method.clone())) + .collect::>(); + + let mut edges = Vec::new(); + let mut seen = BTreeSet::new(); + for document in documents { + for edge in edges_for_document(document, &method_by_id) { + if seen.insert(( + edge.caller.clone(), + edge.callee.clone(), + edge.r#type.clone(), + )) { + edges.push(edge); + } + } + } + + StructuralTopologyReport { methods, edges } +} + +pub struct Graph { + pub methods: Vec, + pub edges: Vec, + method_by_id: BTreeMap, + edges_by_caller: BTreeMap>, + edges_by_callee: BTreeMap>, +} + +impl Graph { + pub fn new(methods: Vec, edges: Vec) -> Self { + let mut method_by_id = BTreeMap::new(); + for m in &methods { + method_by_id.insert(m.id.clone(), m.clone()); + } + + let mut edges_by_caller = BTreeMap::new(); + let mut edges_by_callee = BTreeMap::new(); + for e in &edges { + edges_by_caller + .entry(e.caller.clone()) + .or_insert_with(Vec::new) + .push(e.clone()); + edges_by_callee + .entry(e.callee.clone()) + .or_insert_with(Vec::new) + .push(e.clone()); + } + + Self { + methods, + edges, + method_by_id, + edges_by_caller, + edges_by_callee, + } + } + + pub fn method(&self, id: &str) -> Option<&Method> { + self.method_by_id.get(id) + } + + pub fn internal_calls(&self, id: &str) -> Vec { + self.edges_by_caller.get(id).cloned().unwrap_or_default() + } + + pub fn internal_callers(&self, id: &str) -> Vec { + self.edges_by_callee.get(id).cloned().unwrap_or_default() + } + + pub fn single_internal_caller(&self, id: &str) -> bool { + let callers = self.internal_callers(id); + let mut unique = BTreeMap::new(); + for c in callers { + unique.insert(c.caller, true); + } + unique.len() == 1 + } + + pub fn visibility(&self, id: &str) -> Option<&str> { + self.method(id).map(|m| m.visibility.as_str()) + } +} + +fn methods_for_document(document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function| method_for_function(document, function)) + .collect() +} + +fn method_for_function(document: &Document, function: &FunctionDef) -> Method { + let owner = top_level_owner_for(document, &function.owner, function.span); + Method { + id: format!("{}#{}", owner, function.name), + owner, + name: function.name.clone(), + file: function.file.clone(), + line: function.line, + span: function.span, + visibility: function + .visibility + .clone() + .unwrap_or_else(|| "public".to_string()), + } +} + +fn edges_for_document(document: &Document, method_by_id: &BTreeMap) -> Vec { + document + .call_sites + .iter() + .filter_map(|call| edge_for_call(document, method_by_id, call)) + .collect() +} + +fn edge_for_call( + document: &Document, + method_by_id: &BTreeMap, + call: &CallSite, +) -> Option { + if call.receiver != "self" { + return None; + } + + let owner = top_level_owner_for(document, &call.owner, call.span); + let caller = method_by_id.get(&format!("{}#{}", owner, call.function))?; + let callee_name = scoped_name(caller, &call.message); + let callee = method_by_id.get(&format!("{}#{}", owner, callee_name))?; + if caller.id == callee.id { + return None; + } + + Some(Edge { + caller: caller.id.clone(), + callee: callee.id.clone(), + caller_name: caller.name.clone(), + callee_name: callee.name.clone(), + file: call.file.clone(), + line: call.line, + span: call.span, + r#type: edge_type(call.control.as_deref()), + kind: call_kind(document, call.span), + confidence: "high".to_string(), + }) +} + +fn scoped_name(caller: &Method, message: &str) -> String { + if caller.name.starts_with("self.") { + format!("self.{message}") + } else { + message.to_string() + } +} + +fn edge_type(control: Option<&str>) -> String { + match control { + Some("conditional" | "iterates") => control.unwrap().to_string(), + _ => "always".to_string(), + } +} + +fn call_kind(document: &Document, span: Span) -> String { + if source_text(&document.lines, span) + .trim_start() + .starts_with("self.") + { + "direct_self".to_string() + } else { + "bare_internal".to_string() + } +} + +fn source_text(lines: &[String], span: Span) -> String { + let [first_line, first_column, last_line, last_column] = span; + if first_line == 0 || last_line == 0 || first_line > lines.len() || last_line > lines.len() { + return String::new(); + } + if first_line == last_line { + return lines[first_line - 1] + .chars() + .skip(first_column) + .take(last_column.saturating_sub(first_column)) + .collect(); + } + + let mut parts = Vec::new(); + parts.push(lines[first_line - 1].chars().skip(first_column).collect()); + for line in lines + .iter() + .take(last_line.saturating_sub(1)) + .skip(first_line) + { + parts.push(line.clone()); + } + parts.push(lines[last_line - 1].chars().take(last_column).collect()); + parts.join("") +} + +fn top_level_owner_for(document: &Document, owner: &str, span: Span) -> String { + if owner != file_owner(&document.file) || enclosed_by_matching_owner(document, owner, span) { + owner.to_string() + } else { + format!("(top-level:{})", document.file) + } +} + +fn file_owner(file: &str) -> String { + Path::new(file) + .file_stem() + .and_then(|stem| stem.to_str()) + .filter(|stem| !stem.is_empty()) + .unwrap_or("(file)") + .to_string() +} + +fn enclosed_by_matching_owner(document: &Document, owner: &str, span: Span) -> bool { + document + .owner_defs + .iter() + .any(|owner_def| owner_def.name == owner && span_encloses(owner_def.span, span)) +} + +fn span_encloses(outer: Span, inner: Span) -> bool { + let starts_before = outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1]); + let ends_after = outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3]); + starts_before && ends_after +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs new file mode 100644 index 000000000..185102d4d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/temporal_ordering_pressure.rs @@ -0,0 +1,191 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct TemporalOrderingPressureRow { + pub at: String, + pub file: String, + pub owner: String, + pub public_methods: usize, + pub state_methods: usize, + pub writers: usize, + pub state_fields: Vec, + pub shared_fields: Vec, + pub orderings: String, + pub state_space: String, + pub score: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct MethodState { + name: String, + line: usize, + span: Span, + visibility: String, + reads: Vec, + writes: Vec, +} + +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let mut rows = Vec::new(); + for document in documents { + rows.extend(scan_document_facts(document)); + } + rows.sort_by(|a, b| { + b.score + .cmp(&a.score) + .then_with(|| b.state_methods.cmp(&a.state_methods)) + .then_with(|| a.file.cmp(&b.file)) + .then_with(|| a.owner.cmp(&b.owner)) + }); + rows +} + +fn scan_document_facts(document: &Document) -> Vec { + let owners = document + .owner_defs + .iter() + .map(|owner| owner.name.clone()) + .chain( + document + .function_defs + .iter() + .map(|function| function.owner.clone()), + ) + .filter(|owner| !owner.is_empty()) + .collect::>(); + owners + .into_iter() + .filter_map(|owner| pressure_row_for_owner(document, &owner)) + .collect() +} + +fn pressure_row_for_owner(document: &Document, owner: &str) -> Option { + let methods = document + .function_defs + .iter() + .filter(|function| function.owner == owner) + .map(|function| MethodState { + name: function.name.clone(), + line: function.line, + span: function.span, + visibility: function + .visibility + .clone() + .unwrap_or_else(|| "public".to_string()), + reads: sorted_unique( + document + .state_reads + .iter() + .filter(|read| read.owner == function.owner && read.function == function.name) + .map(|read| read.field.clone()), + ), + writes: sorted_unique( + document + .state_writes + .iter() + .filter(|write| { + write.owner == function.owner && write.function == function.name + }) + .map(|write| write.field.clone()), + ), + }) + .collect::>(); + pressure_row(document.file.as_str(), owner, &methods) +} + +fn pressure_row( + file: &str, + owner: &str, + methods: &[MethodState], +) -> Option { + let public_methods: Vec<_> = methods + .iter() + .filter(|m| m.visibility == "public") + .collect(); + let state_methods: Vec<_> = public_methods + .iter() + .filter(|m| !m.reads.is_empty() || !m.writes.is_empty()) + .collect(); + let writers: Vec<_> = public_methods + .iter() + .filter(|m| !m.writes.is_empty()) + .collect(); + + if state_methods.len() < 3 || writers.len() < 2 { + return None; + } + + let mut fields_set = BTreeSet::new(); + for m in &state_methods { + fields_set.extend(m.reads.iter().cloned()); + fields_set.extend(m.writes.iter().cloned()); + } + let fields = fields_set.into_iter().collect::>(); + let shared_fields = fields + .iter() + .filter(|field| { + state_methods + .iter() + .filter(|m| m.reads.contains(*field) || m.writes.contains(*field)) + .count() + >= 2 + }) + .cloned() + .collect::>(); + if shared_fields.is_empty() { + return None; + } + + let n = state_methods.len(); + let state_space = 2usize.pow(fields.len().min(12) as u32); + let score = (n * writers.len() * shared_fields.len().max(1)) + state_space; + Some(TemporalOrderingPressureRow { + at: format!("{}:{}:{}", file, owner, state_methods[0].line), + file: file.to_string(), + owner: owner.to_string(), + public_methods: public_methods.len(), + state_methods: n, + writers: writers.len(), + state_fields: fields, + shared_fields, + orderings: format!("{n}!"), + state_space: format!( + "2^{}", + state_methods + .iter() + .flat_map(|m| m.reads.iter().chain(m.writes.iter())) + .collect::>() + .len() + ), + score, + sites: state_methods + .iter() + .map(|m| format!("{}:{}:{}", file, m.name, m.line)) + .collect(), + spans: state_methods + .iter() + .map(|m| (format!("{}:{}:{}", file, m.name, m.line), m.span)) + .collect(), + }) +} + +fn sorted_unique(values: impl Iterator) -> Vec { + let mut out: Vec<_> = values.collect::>().into_iter().collect(); + out.sort(); + out +} diff --git a/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs new file mode 100644 index 000000000..fd1ab44d5 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/detectors/weighted_inlined_cognitive_complexity.rs @@ -0,0 +1,352 @@ +use crate::decomplex::ast::Span; +use crate::decomplex::detectors::{local_flow, structural_topology}; +use crate::decomplex::syntax::{self, Document, Language, LocalComplexityScore}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct WeightedInlinedCognitiveComplexityRow { + pub at: String, + pub owner: String, + pub method: String, + pub local: f64, + pub inlined: f64, + pub hidden: f64, + pub depth: usize, + pub single_caller_callees: Vec, + pub call_chain: Vec, + pub reason: String, + pub signals: BTreeMap, + pub spans: BTreeMap, +} + +pub fn scan_files( + files: &[PathBuf], + language: Language, +) -> Result> { + let documents = syntax::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + scan_documents_with_summaries(documents, local_flow::scan_documents(documents)) +} + +pub fn scan_documents_with_summaries( + documents: &[Document], + summaries: Vec, +) -> Vec { + let topology_report = structural_topology::scan_documents(documents); + let topology = structural_topology::Graph::new(topology_report.methods, topology_report.edges); + let complexity_scores = documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect::>(); + + let mut scores = BTreeMap::new(); + for summary in summaries { + let owner = if summary.owner == "(top-level)" { + format!("(top-level:{})", summary.file) + } else { + summary.owner.clone() + }; + let id = format!("{}#{}", owner, summary.name); + let score = complexity_scores + .get(&(summary.file.clone(), summary.id.clone())) + .cloned() + .unwrap_or_else(|| LocalComplexityScore { + score: 0.0, + signals: BTreeMap::new(), + }); + scores.insert( + id.clone(), + LocalScore { + id, + owner, + name: summary.name, + file: summary.file, + line: summary.line, + span: summary.span, + score: score.score, + signals: score.signals, + }, + ); + } + + let analyzer = Analyzer::new(topology, scores, 12.0, 15.0, 2); + analyzer.findings() +} + +struct LocalScore { + id: String, + owner: String, + name: String, + file: String, + line: usize, + span: Span, + score: f64, + signals: BTreeMap, +} + +struct Contribution { + #[allow(dead_code)] + callee_id: String, + callee_name: String, + score: f64, + #[allow(dead_code)] + weight: f64, + depth: usize, + chain: Vec, +} + +fn format_one_decimal(value: f64) -> String { + format!("{value:.1}") +} + +struct Analyzer { + topology: structural_topology::Graph, + scores: BTreeMap, + min_score: f64, + min_hidden: f64, + max_depth: usize, +} + +impl Analyzer { + fn new( + topology: structural_topology::Graph, + scores: BTreeMap, + min_score: f64, + min_hidden: f64, + max_depth: usize, + ) -> Self { + Self { + topology, + scores, + min_score, + min_hidden, + max_depth, + } + } + + fn findings(&self) -> Vec { + let mut out: Vec<_> = self + .scores + .values() + .filter_map(|s| self.finding_for(s)) + .collect(); + out.sort_by(|a, b| { + b.hidden + .partial_cmp(&a.hidden) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| { + b.inlined + .partial_cmp(&a.inlined) + .unwrap_or(std::cmp::Ordering::Equal) + }) + .then_with(|| a.at.cmp(&b.at)) + }); + out + } + + fn finding_for(&self, score: &LocalScore) -> Option { + let mut visited = BTreeSet::new(); + visited.insert(score.id.clone()); + let contributions = self.inlined_contributions(&score.id, 1, &mut visited); + + let hidden = self.round(contributions.iter().map(|c| c.score).sum()); + let total = self.round(score.score + hidden); + if total < self.min_score || hidden < self.min_hidden { + return None; + } + + let direct_single_caller = self.single_caller_callees(&score.id); + let at = format!("{}:{}:{}", score.file, score.name, score.line); + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), score.span); + + Some(WeightedInlinedCognitiveComplexityRow { + at, + owner: score.owner.clone(), + method: score.name.clone(), + local: score.score, + inlined: total, + hidden, + depth: contributions.iter().map(|c| c.depth).max().unwrap_or(0), + single_caller_callees: direct_single_caller.clone(), + call_chain: self.strongest_chain(score, &contributions), + reason: self.reason(hidden, &direct_single_caller), + signals: score.signals.clone(), + spans, + }) + } + + fn inlined_contributions( + &self, + method_id: &str, + depth: usize, + visited: &mut BTreeSet, + ) -> Vec { + if depth > self.max_depth { + return Vec::new(); + } + + let mut out = Vec::new(); + for edge in self.grouped_edges(method_id) { + if visited.contains(&edge.callee) { + continue; + } + let Some(callee) = self.scores.get(&edge.callee) else { + continue; + }; + + let weight = self.contribution_weight(&edge, depth); + let direct = Contribution { + callee_id: edge.callee.clone(), + callee_name: edge.callee_name.clone(), + score: self.round(callee.score * weight), + weight: self.round(weight), + depth, + chain: vec![edge.callee_name.clone()], + }; + + let mut next_visited = visited.clone(); + next_visited.insert(edge.callee.clone()); + let nested = self.inlined_contributions(&edge.callee, depth + 1, &mut next_visited); + let nested: Vec<_> = nested + .into_iter() + .map(|c| Contribution { + callee_id: c.callee_id, + callee_name: c.callee_name, + score: self.round(c.score * weight), + weight: self.round(c.weight * weight), + depth: c.depth, + chain: { + let mut chain = vec![edge.callee_name.clone()]; + chain.extend(c.chain); + chain + }, + }) + .collect(); + + out.push(direct); + out.extend(nested); + } + out + } + + fn grouped_edges(&self, method_id: &str) -> Vec { + let mut by_callee: BTreeMap> = BTreeMap::new(); + for edge in self.topology.internal_calls(method_id) { + by_callee.entry(edge.callee.clone()).or_default().push(edge); + } + by_callee + .into_iter() + .map(|(_, edges)| { + edges + .into_iter() + .fold(None, |best: Option, edge| { + let Some(current) = best else { + return Some(edge); + }; + if self.edge_weight(&edge.r#type) > self.edge_weight(¤t.r#type) { + Some(edge) + } else { + Some(current) + } + }) + .unwrap() + }) + .collect() + } + + fn contribution_weight(&self, edge: &structural_topology::Edge, depth: usize) -> f64 { + let caller_factor = if self.topology.single_internal_caller(&edge.callee) { + 1.0 + } else { + 0.35 + }; + let visibility_factor = if self.shared_public_step(edge) { + 0.6 + } else { + 1.0 + }; + let depth_factor = match depth { + 1 => 1.0, + 2 => 0.6, + _ => 0.35, + }; + let edge_factor = self.edge_weight(&edge.r#type); + caller_factor * visibility_factor * depth_factor * edge_factor + } + + fn edge_weight(&self, t: &str) -> f64 { + match t { + "always" => 1.0, + "conditional" => 0.75, + "iterates" => 1.15, + _ => 1.0, + } + } + + fn shared_public_step(&self, edge: &structural_topology::Edge) -> bool { + self.topology.visibility(&edge.callee) == Some("public") + && !self.topology.single_internal_caller(&edge.callee) + } + + fn single_caller_callees(&self, method_id: &str) -> Vec { + let mut out: Vec<_> = self + .grouped_edges(method_id) + .into_iter() + .filter(|e| self.topology.single_internal_caller(&e.callee)) + .map(|e| e.callee_name) + .collect(); + out.sort(); + out + } + + fn strongest_chain(&self, score: &LocalScore, contributions: &[Contribution]) -> Vec { + let chain = contributions + .iter() + .fold(None, |best: Option<&Contribution>, contribution| { + let Some(current) = best else { + return Some(contribution); + }; + if contribution.score > current.score { + Some(contribution) + } else { + Some(current) + } + }) + .map(|c| c.chain.clone()) + .unwrap_or_default(); + let mut out = vec![score.name.clone()]; + out.extend(chain); + out + } + + fn reason(&self, hidden: f64, single_caller_callees: &[String]) -> String { + if single_caller_callees.is_empty() { + format!( + "same-owner call chain adds {} weighted cognitive points", + format_one_decimal(hidden) + ) + } else { + format!( + "{} single-caller helper(s) add {} weighted cognitive points", + single_caller_callees.len(), + format_one_decimal(hidden) + ) + } + } + + fn round(&self, value: f64) -> f64 { + (value * 10.0).round() / 10.0 + } +} diff --git a/gems/decomplex/rust/src/decomplex/mod.rs b/gems/decomplex/rust/src/decomplex/mod.rs new file mode 100644 index 000000000..27df87ae5 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/mod.rs @@ -0,0 +1,15 @@ +#[cfg(test)] +mod architecture_test; + +pub mod ast; +pub mod convergence; +pub mod delta; +pub mod detectors; +pub mod parallel; +pub mod report; +pub mod report_facts; +pub mod report_value; +pub mod root_cause; +pub mod sarif; +pub mod syntax; +pub mod syntax_oracle; diff --git a/gems/decomplex/rust/src/decomplex/parallel.rs b/gems/decomplex/rust/src/decomplex/parallel.rs new file mode 100644 index 000000000..52b9939da --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/parallel.rs @@ -0,0 +1,109 @@ +use anyhow::{bail, Result}; +use std::env; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::mpsc; +use std::thread; + +static JOBS_OVERRIDE: AtomicUsize = AtomicUsize::new(0); + +pub fn set_jobs_for_process(jobs: Option) -> Result<()> { + let Some(jobs) = jobs else { + return Ok(()); + }; + if jobs == 0 { + bail!("--jobs must be greater than zero"); + } + JOBS_OVERRIDE.store(jobs, Ordering::Relaxed); + Ok(()) +} + +pub fn job_count() -> usize { + let configured = JOBS_OVERRIDE.load(Ordering::Relaxed); + if configured > 0 { + return configured; + } + + env_jobs() + .unwrap_or_else(|| { + thread::available_parallelism() + .map(usize::from) + .unwrap_or(1) + }) + .max(1) +} + +pub fn map_ordered(items: &[T], func: F) -> Result> +where + T: Sync, + U: Send, + F: Fn(&T) -> Result + Sync, +{ + let jobs = job_count(); + if jobs <= 1 || items.len() <= 1 { + return items.iter().map(func).collect(); + } + + let worker_count = jobs.min(items.len()); + let next_index = AtomicUsize::new(0); + let (tx, rx) = mpsc::channel(); + + thread::scope(|scope| { + for _ in 0..worker_count { + let tx = tx.clone(); + let func = &func; + let next_index = &next_index; + scope.spawn(move || loop { + let index = next_index.fetch_add(1, Ordering::Relaxed); + if index >= items.len() { + break; + } + if tx.send((index, func(&items[index]))).is_err() { + break; + } + }); + } + drop(tx); + }); + + let mut results = (0..items.len()).map(|_| None).collect::>(); + for (index, result) in rx { + results[index] = Some(result); + } + + results + .into_iter() + .map(|slot| slot.expect("parallel worker did not return a result")) + .collect() +} + +fn env_jobs() -> Option { + ["DECOMPLEX_RUST_JOBS", "DECOMPLEX_JOBS"] + .into_iter() + .find_map(|name| env::var(name).ok().and_then(|value| parse_jobs(&value))) +} + +fn parse_jobs(value: &str) -> Option { + let trimmed = value.trim(); + if trimmed.is_empty() { + return None; + } + trimmed.parse::().ok().filter(|jobs| *jobs > 0) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parallel_map_preserves_input_order() { + set_jobs_for_process(Some(4)).expect("jobs"); + let input = vec![3, 2, 1, 0]; + let output = map_ordered(&input, |item| Ok(item * 10)).expect("map"); + assert_eq!(output, vec![30, 20, 10, 0]); + } + + #[test] + fn rejects_zero_jobs_override() { + assert!(set_jobs_for_process(Some(0)).is_err()); + } +} diff --git a/gems/decomplex/rust/src/decomplex/report.rs b/gems/decomplex/rust/src/decomplex/report.rs new file mode 100644 index 000000000..a9495d192 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/report.rs @@ -0,0 +1,1245 @@ +use crate::decomplex::convergence::{self, Unit}; +use crate::decomplex::report_value as rv; +use crate::decomplex::root_cause::{self, Cluster}; +use crate::decomplex::{delta, sarif}; +use anyhow::{bail, Result}; +use serde_json::{json, Value}; + +#[derive(Clone, Debug)] +pub struct ReportSection { + pub title: String, + pub tier: i64, + pub desc: String, + pub findings: Vec, + convergence_excluded: bool, +} + +impl ReportSection { + pub fn new(title: &str, tier: i64, desc: &str, findings: Vec) -> Self { + Self { + title: title.to_string(), + tier, + desc: desc.to_string(), + findings, + convergence_excluded: false, + } + } + + fn excluded_from_convergence(mut self) -> Self { + self.convergence_excluded = true; + self + } +} + +#[derive(Clone, Debug)] +pub struct Report { + files: Vec, + sections: Vec, + convergence: Vec, + root: Vec, +} + +impl Report { + pub fn from_facts(facts: &Value) -> Result { + let files = rv::field_array_strings(facts, "files"); + let Some(detectors) = rv::get(facts, "detectors") else { + bail!("report facts missing detectors"); + }; + let sections = build_sections(detectors); + validate_spans(§ions)?; + let convergence_sections = sections + .iter() + .filter(|section| !section.convergence_excluded) + .cloned() + .collect::>(); + let convergence = convergence::rollup(&convergence_sections, 2); + let root = root_cause::cluster(&convergence_sections, 2); + Ok(Self { + files, + sections, + convergence, + root, + }) + } + + pub fn to_markdown(&self) -> String { + let mut out = String::from("# Decomplex Report\n\n"); + out.push_str("> Decision-level duplication and neglected-condition analysis.\n"); + out.push_str("> Every entry is a ranked **candidate** (Engler's discipline),\n"); + out.push_str("> never a verdict -- *POSSIBLE* findings, triaged by a human.\n"); + out.push_str("> Sections are ordered by SIGNAL TIER (1 = lowest false\n"); + out.push_str("> positive), not by volume. Items within a section are\n"); + out.push_str("> frequency-ranked. Triage tier 1, top-of-list, first.\n\n"); + + out.push_str("## Table of Contents\n"); + out.push_str("- [Project Prioritization](#project-prioritization)\n"); + out.push_str(&format!( + "- [Cross-Detector Convergence ({})](#cross-detector-convergence-{})\n", + self.convergence.len(), + self.convergence.len() + )); + out.push_str(&format!( + "- [Root-Cause Clusters ({})](#root-cause-clusters-{})\n", + self.root.len(), + self.root.len() + )); + for section in &self.sections { + out.push_str(&format!( + "- [{} ({})](#{}-{})\n", + section.title, + section.findings.len(), + slug(§ion.title), + section.findings.len() + )); + } + out.push_str("- [Run Summary](#run-summary)\n\n"); + + self.render_project_prioritization(&mut out); + self.render_convergence(&mut out); + self.render_root_cause(&mut out); + + for section in &self.sections { + out.push_str(&format!( + "## {} ({})\n", + section.title, + section.findings.len() + )); + out.push_str(&format!("_{}_\n\n", section.desc)); + if section.findings.is_empty() { + out.push_str("None.\n\n"); + continue; + } + self.render_section(&mut out, section); + out.push('\n'); + } + + out.push_str("## Run Summary\n"); + out.push_str(&format!("- Files analyzed: {}\n", self.files.len())); + out.push_str(&format!( + "- Detectors: {} (all shipped, self-tested)\n", + self.sections.len() + )); + out.push_str(&format!( + "- Convergence: {} unit(s) flagged by >=2 independent detectors\n", + self.convergence.len() + )); + out.push_str(&format!( + "- Root-cause clusters: {} (one fix collapses each)\n", + self.root.len() + )); + let total: usize = self + .sections + .iter() + .map(|section| section.findings.len()) + .sum(); + out.push_str(&format!("- Total candidates: {total}\n")); + out.push_str("- Method: stdlib AST only, intra-procedural, zero deps, no CFG / no points-to; Type-2/3 similarity uses Tree-sitter structural fingerprints (see docs/agents/design.md)\n"); + out + } + + pub fn to_sarif(&self) -> String { + serde_json::to_string_pretty(&self.to_sarif_value(true, true, None)).unwrap() + } + + pub fn convergence_value(&self) -> Value { + json!(self.convergence) + } + + pub fn root_clusters_value(&self) -> Value { + json!(self.root) + } + + pub fn to_sarif_value( + &self, + include_snapshot: bool, + include_finding_payload: bool, + max_results: Option, + ) -> Value { + let snapshot = delta::snapshot(&self.sections, &self.root); + let mut results = self.sarif_results(include_finding_payload); + if let Some(max_results) = max_results { + results = ranked_sarif_results(results) + .into_iter() + .take(max_results) + .collect(); + } + let mut properties = json!({ + "format": "decomplex.report.sarif.v1", + "files": self.files, + }); + if include_snapshot { + if let Some(object) = properties.as_object_mut() { + object.insert("decomplex.snapshot".to_string(), snapshot); + } + } + sarif::document( + "Decomplex", + self.sarif_rules(), + results, + Some("https://github.com/cuzzo/clear"), + properties, + ) + } + + fn render_project_prioritization(&self, out: &mut String) { + out.push_str("## Project Prioritization\n"); + out.push_str( + "_Ordered by signal tier (1 = highest signal / lowest FP), then by volume._\n\n", + ); + let mut ranked = self + .sections + .iter() + .enumerate() + .filter(|(_, section)| !section.findings.is_empty()) + .collect::>(); + ranked.sort_by(|(left_index, left), (right_index, right)| { + left.tier + .cmp(&right.tier) + .then_with(|| right.findings.len().cmp(&left.findings.len())) + .then_with(|| left_index.cmp(right_index)) + }); + for (_, section) in ranked { + out.push_str(&format!( + "- **[tier {}]** [{} ({})](#{}-{}): {}\n", + section.tier, + section.title, + section.findings.len(), + slug(§ion.title), + section.findings.len(), + section.desc + )); + } + if self + .sections + .iter() + .all(|section| section.findings.is_empty()) + { + out.push_str("\nNothing flagged.\n"); + } + out.push('\n'); + } + + fn render_convergence(&self, out: &mut String) { + out.push_str(&format!( + "## Cross-Detector Convergence ({})\n", + self.convergence.len() + )); + out.push_str("_(file, method) units flagged by >=2 INDEPENDENT detectors -- the strongest triage signal: agreement outranks any single detector's volume. Tier-weighted (1=3, 2=2, 3=1). **Start here.**_\n\n"); + if self.convergence.is_empty() { + out.push_str("None (no unit flagged by >=2 detectors).\n\n"); + return; + } + for hit in self.convergence.iter().take(25) { + out.push_str(&format!( + "- {} -- **{} detectors** [score {}, {} findings]: {}\n", + nav(&hit.at), + hit.n_detectors, + hit.score, + hit.findings, + hit.detectors.join(", ") + )); + } + if self.convergence.len() > 25 { + out.push_str(&format!("- ...(+{} more)\n", self.convergence.len() - 25)); + } + let by_file = convergence::by_file(&self.convergence); + if !by_file.is_empty() { + out.push_str("\n### By file\n"); + for hit in by_file.iter().take(15) { + out.push_str(&format!( + "- `{}` -- {} detectors across {} method(s): {}\n", + hit.file, + hit.n_detectors, + hit.methods, + hit.detectors.join(", ") + )); + } + } + out.push('\n'); + } + + fn render_root_cause(&self, out: &mut String) { + out.push_str(&format!("## Root-Cause Clusters ({})\n", self.root.len())); + out.push_str("_Findings across >=2 INDEPENDENT detectors that name the SAME entity -- 'N findings are really one invariant'. Convergence says where to look; this says **what one fix collapses the cluster**. Ranked candidate, not a verdict._\n\n"); + if self.root.is_empty() { + out.push_str("None (no entity named by >=2 detectors).\n\n"); + return; + } + for hit in self.root.iter().take(20) { + let tag = if hit.fat_union { + format!("[{} | FAT-UNION]", hit.kind) + } else { + format!("[{}]", hit.kind) + }; + out.push_str(&format!( + "- **{}** `{}` -- **{} detectors** [score {}] across {} unit(s), {} findings: {}\n - FIX: {}\n - {}\n", + tag, + hit.token, + hit.n_detectors, + hit.score, + hit.scatter, + hit.support, + hit.detectors.join(", "), + hit.fix, + hit.sites.iter().take(4).map(|site| nav(site)).collect::>().join(" ; ") + )); + } + if self.root.len() > 20 { + out.push_str(&format!("- ...(+{} more)\n", self.root.len() - 20)); + } + out.push('\n'); + } + + fn render_section(&self, out: &mut String, section: &ReportSection) { + for finding in section.findings.iter().take(25) { + out.push_str(&render_finding(§ion.title, finding)); + } + if section.findings.len() > 25 { + out.push_str(&format!("- ...(+{} more)\n", section.findings.len() - 25)); + } + } + + fn sarif_rules(&self) -> Vec { + self.sections + .iter() + .map(|section| { + sarif::rule( + &sarif_rule_id(§ion.title), + Some(§ion.title), + Some(§ion.desc), + None, + if section.tier <= 1 { "warning" } else { "note" }, + None, + json!({ "tier": section.tier }), + ) + }) + .collect() + } + + fn sarif_results(&self, include_finding_payload: bool) -> Vec { + let mut out = Vec::new(); + for section in &self.sections { + for finding in §ion.findings { + for location in sarif_locations_for_finding(finding) { + let mut properties = json!({ + "detector": section.title, + "tier": section.tier, + "method": location.method, + }); + if include_finding_payload { + if let Some(object) = properties.as_object_mut() { + object.insert( + "decomplex_finding".to_string(), + delta::json_safe_finding(§ion.title, finding), + ); + } + } + out.push(sarif::result( + &sarif_rule_id(§ion.title), + &sarif_message(§ion.title, finding, &location), + location.path.as_deref(), + Some(location.line), + location.start_column, + location.end_line, + location.end_column, + if section.tier <= 1 { "warning" } else { "note" }, + properties, + json!({ "decomplexFinding": delta::fingerprint(§ion.title, finding) }), + )); + } + } + } + out + } +} + +#[derive(Clone, Debug)] +struct SarifLocation { + path: Option, + method: Option, + line: i64, + start_column: Option, + end_line: Option, + end_column: Option, +} + +fn build_sections(detectors: &Value) -> Vec { + let miner = rv::get(detectors, "miner").unwrap_or(&Value::Null); + let co_update = rv::get(detectors, "co_update").unwrap_or(&Value::Null); + let semantic_alias = rv::get(detectors, "semantic_alias").unwrap_or(&Value::Null); + let path_condition = rv::get(detectors, "path_condition").unwrap_or(&Value::Null); + let sequence_mine = rv::get(detectors, "sequence_mine").unwrap_or(&Value::Null); + let fat_union = rv::get(detectors, "fat_union").unwrap_or(&Value::Null); + let operational = direct_array(detectors, "operational_discontinuity"); + let (operational_high, operational_rest): (Vec<_>, Vec<_>) = operational + .into_iter() + .partition(|finding| rv::field(finding, "confidence") == "high"); + + vec![ + section("Decision Pressure", 1, "ELIMINABLE guard-pressure per loose contract (nil/is_a?/respond_to?/safe-nav/rescue-nil) -> tighten the contract once / nil-kill: DELETE. essential dispatch + pure c-uses are split out, NEVER summed (Rapps-Weyuker p-use; McCabe)", direct_array(detectors, "decision_pressure")), + section("Redundant Nil Guards", 1, "nil checks / safe-nav dominated by an earlier non-nil proof -- delete repeated control flow or tighten the type", direct_array(detectors, "redundant_nil_guard")), + section("State Heatmap", 1, "state fields ranked by write/read/re-derivation scatter -- tangled mutable state should get one owner", direct_array(detectors, "state_heatmap")).excluded_from_convergence(), + section("State-Based Branch Density", 1, "branch decisions over mutable/object state -- state + control-flow pressure", direct_array(detectors, "state_branch_density")), + section("Temporal Ordering Pressure", 1, "public mutable lifecycle surfaces that create implicit state-machine ordering", direct_array(detectors, "temporal_ordering_pressure")), + section("Missing Abstractions", 1, "guard tuple recomputed across >=2 decision units", nested_array(miner, "missing_abstractions")), + section("Reification Misses", 1, "an existing predicate reinvented inline -- invariant #16", nested_array(semantic_alias, "reification_misses")), + section("Semantic Predicate Aliases", 1, "one decision, multiple names (receiver/polarity folded)", nested_array(semantic_alias, "alias_clusters")), + section("Exact Predicate Aliases", 1, "identical one-line predicate body under >=2 names", nested_array(rv::get(detectors, "predicate_alias").unwrap_or(&Value::Null), "alias_clusters")), + section("Inconsistent Rename Clones", 2, "pasted block with inconsistent identifier mapping -- *POSSIBLE* missed rename bug", direct_array(detectors, "inconsistent_rename_clone")), + section("Structural Similarity (Type-2/3)", 2, "Tree-sitter structural clone pressure: Type-2 renamed clones and Type-3 fuzzy clones -- refactor pressure, not a verdict", direct_array(detectors, "flay_similarity")), + section("Neglected Updates", 2, "co-written state, one write missing -- *POSSIBLE* redundant-state desync", nested_array(co_update, "neglected_updates")), + section("Derived-State Staleness", 2, "b = f(a); a later reassigned, b not recomputed -- *POSSIBLE* bug", direct_array(detectors, "derived_state")), + section("Neglected Conditions", 2, "dispatch/conjunction minus one element -- *POSSIBLE* bug", nested_array(miner, "neglected_conditions")), + section("Neglected Path Conditions", 3, "nested-if/&& guard set minus one atom -- *POSSIBLE* bug (noisy)", nested_array(path_condition, "neglected")), + section("Oversized Predicates", 3, "predicate with >3 condition atoms -- use an existing helper or extract a named predicate", direct_array(detectors, "oversized_predicate")), + section("Broken Protocols", 3, "co-called pair, one site does A without B -- *POSSIBLE* bug (noisy)", nested_array(sequence_mine, "broken_protocol")), + section("Implicit Control Flow", 2, "state-dependent internal call order exists -- hidden lifecycle/control-flow pressure", nested_array(rv::get(detectors, "implicit_control_flow").unwrap_or(&Value::Null), "ordered_protocols")), + section("Weighted Inlined Cognitive Complexity", 2, "same-owner helper chain hides cognitive load behind a low-looking orchestration method", direct_array(detectors, "weighted_inlined_complexity")), + section("Locality Drag", 2, "local initialized far before first use while unrelated work runs -- move setup closer or extract a private phase", direct_array(detectors, "locality_drag")), + section("Operational Discontinuity (High Confidence)", 2, "strong blank/comment phase boundary where local variable lifetimes reset -- likely implicit sub-function boundary", operational_high), + section("Function LCOM", 3, "independent local data-flow components inside one method -- *POSSIBLE* mixed concerns", direct_array(detectors, "function_lcom")), + section("Operational Discontinuity", 3, "blank/comment phase boundary where local variable lifetimes reset -- *POSSIBLE* implicit sub-function boundary", operational_rest), + section("False Simplicity", 3, "looks simple, behaves non-locally: hidden dispatch/mutation/IO/context/reflection/reopen -- *POSSIBLE* (noisy)", direct_array(detectors, "false_simplicity")), + section("Fat Unions", 3, "case dispatch over class consts whose arms read mostly variant-invariant members -- product-vs-sum decomposition candidate (extraction -> nil-kill) -- *POSSIBLE*", nested_array(fat_union, "fat_unions")), + ] +} + +fn section(title: &str, tier: i64, desc: &str, findings: Vec) -> ReportSection { + ReportSection::new(title, tier, desc, findings) +} + +fn direct_array(value: &Value, key: &str) -> Vec { + rv::array(value, key).to_vec() +} + +fn nested_array(value: &Value, key: &str) -> Vec { + rv::array(value, key).to_vec() +} + +fn validate_spans(sections: &[ReportSection]) -> Result<()> { + for section in sections + .iter() + .filter(|section| !section.convergence_excluded) + { + for finding in §ion.findings { + let Some(spans) = rv::get(finding, "spans").and_then(Value::as_object) else { + continue; + }; + for (loc, span) in spans { + if span.is_null() { + continue; + } + let values = span.as_array(); + let ok = values.is_some_and(|values| { + values.len() == 4 + && values[0].as_i64().is_some() + && values[2].as_i64().is_some() + && values[0].as_i64() <= values[2].as_i64() + }); + if !ok { + bail!( + "decomplex: {} emitted malformed span {} for {}", + section.title, + span, + loc + ); + } + } + } + } + Ok(()) +} + +pub fn slug(title: &str) -> String { + title + .to_lowercase() + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || ch == ' ' { + ch + } else { + '\0' + } + }) + .filter(|ch| *ch != '\0') + .collect::() + .replace(' ', "-") +} + +pub fn nav(loc: &str) -> String { + let parts = loc.split(':').collect::>(); + if parts.len() < 3 { + return loc.to_string(); + } + let line = parts[parts.len() - 1]; + let method = parts[parts.len() - 2]; + let file = parts[..parts.len() - 2].join(":"); + format!("`{file}:{line}` ({method})") +} + +fn render_finding(title: &str, h: &Value) -> String { + match title { + "Decision Pressure" => format!( + "- `{}` -- ELIMINABLE guard-pressure **{}** across {} method(s) -> tighten contract / nil-kill: DELETE{}\n - {}\n", + rv::field(h, "contract"), + rv::field(h, "decisions"), + rv::field(h, "methods"), + if rv::positive(h, "essential") { + format!(" (+{} essential dispatch on this contract -- legitimate; leave unless Fat-Union/Missing-Abstractions says re-derived)", rv::field(h, "essential")) + } else { + String::new() + }, + rv::array(h, "sites").iter().take(4).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "Redundant Nil Guards" => format!( + "- {} -- redundant nil guard on `{}`: `{}`\n - proof: {}\n", + nav(&rv::field(h, "at")), + rv::field(h, "local"), + rv::field(h, "guard"), + rv::field(h, "proof") + ), + "Missing Abstractions" => format!( + "- **[{}]** support={} scatter={} rank={}\n - tuple: `{}`\n - {}\n", + rv::field(h, "kind"), + rv::field(h, "support"), + rv::field(h, "scatter"), + rv::field(h, "rank"), + rv::join_field(h, "members", " | "), + rv::array(h, "sites").iter().take(6).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "State Heatmap" => render_state_heatmap_item(h), + "State-Based Branch Density" => format!( + "- {} -- **{}** state-based branch decision(s), refs=`{}` score={}\n - example predicate: `{}`\n", + nav(&rv::field(h, "at")), + rv::field(h, "decisions"), + rv::array(h, "state_refs").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::field(h, "score"), + rv::field(h, "predicate") + ), + "Temporal Ordering Pressure" => format!( + "- `{}` ({}) -- implicit lifecycle score **{}** (public={}, state methods={}, writers={}, fields={}, shared={}, flows={}, states={})\n - shared fields: `{}`\n - surface: {}\n", + rv::field(h, "owner"), + nav(&rv::field(h, "at")), + rv::field(h, "score"), + rv::field(h, "public_methods"), + rv::field(h, "state_methods"), + rv::field(h, "writers"), + rv::array_len(h, "state_fields"), + rv::array_len(h, "shared_fields"), + rv::field(h, "orderings"), + rv::field(h, "state_space"), + rv::array(h, "shared_fields").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::array(h, "sites").iter().take(6).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "Neglected Conditions" | "Neglected Path Conditions" => { + let pattern = rv::get(h, "pattern").or_else(|| rv::get(h, "guards")); + format!( + "- *POSSIBLE* (support={}) {} -- MISSING `{}` from `{}`\n", + rv::field(h, "support"), + nav(&rv::field(h, "at")), + rv::field(h, "missing"), + rv::array_strings(pattern).join(" | ") + ) + } + "Oversized Predicates" => format!( + "- *POSSIBLE* {} -- {} condition atoms in `{}`\n - atoms: `{}`\n", + nav(&rv::field(h, "at")), + rv::field(h, "count"), + rv::field(h, "predicate"), + rv::array(h, "atoms").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | ") + ), + "Neglected Updates" => format!( + "- *POSSIBLE* (support={}) {} writes `.{}` but NOT `.{}` (recv `{}`)\n", + rv::field(h, "support"), + nav(&rv::field(h, "at")), + rv::field(h, "has"), + rv::field(h, "missing"), + rv::field(h, "recv") + ), + "Semantic Predicate Aliases" | "Exact Predicate Aliases" => format!( + "- `{}` == `{}`\n - {}\n", + rv::join_field(h, "names", " = "), + if rv::get(h, "canon").is_some() { rv::field(h, "canon") } else { rv::field(h, "body") }, + rv::array(h, "sites").iter().map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; ") + ), + "Reification Misses" => format!( + "- predicate `{}` reinvented inline at {} (`{}`)\n", + rv::field(h, "predicate"), + nav(&rv::field(h, "at")), + rv::field(h, "raw") + ), + "Broken Protocols" => format!( + "- *POSSIBLE* conf={} support={} {} does `{}` without `{}`\n", + rv::field(h, "confidence"), + rv::field(h, "support"), + nav(&rv::field(h, "at")), + rv::field(h, "has"), + rv::field(h, "missing") + ), + "Implicit Control Flow" => render_implicit_control_flow_item(h), + "Weighted Inlined Cognitive Complexity" => render_weighted_inlined_complexity_item(h), + "Locality Drag" => render_locality_drag_item(h), + "Function LCOM" => render_function_lcom_item(h), + "Operational Discontinuity" | "Operational Discontinuity (High Confidence)" => { + render_operational_discontinuity_item(h) + } + "False Simplicity" => format!( + "- *POSSIBLE* [{}] scatter={} support={} `{}` -- {}{}\n", + rv::field(h, "kind"), + rv::field(h, "scatter"), + rv::field(h, "support"), + rv::field(h, "detail"), + nav(&rv::field(h, "at")), + if rv::array_len(h, "sites") > 1 { + format!(" (+{} more)", rv::array_len(h, "sites") - 1) + } else { + String::new() + } + ), + "Fat Unions" => format!( + "- *POSSIBLE*{} union `{}` -- **{} common** vs {} variant member(s), scatter={} -- {}\n - common: `{}` -> hoist to a struct, keep a SMALL union for `{}` (-> nil-kill)\n", + if rv::field_bool(h, "degenerate") { " [DEGENERATE: no variance]" } else { "" }, + rv::join_field(h, "variant_set", " | "), + rv::array_len(h, "common"), + rv::array_len(h, "variant"), + rv::field(h, "scatter"), + nav(&rv::field(h, "at")), + rv::array(h, "common").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(", "), + rv::array(h, "variant").iter().take(6).map(|v| rv::string(Some(v))).collect::>().join(", ") + ), + "Derived-State Staleness" => format!( + "- *POSSIBLE* {}: `{}` derived from `{}` (line {}); `{}` reassigned line {}, `{}` not recomputed\n", + nav(&rv::field(h, "at")), + rv::field(h, "derived"), + rv::field(h, "source"), + rv::field(h, "derived_at"), + rv::field(h, "source"), + rv::field(h, "source_reassigned_at"), + rv::field(h, "derived") + ), + "Inconsistent Rename Clones" => format!( + "- *POSSIBLE* {} clone of {}: ref var `{}` spelled {} here\n", + nav(&rv::field(h, "at")), + nav(&rv::field(h, "ref_at")), + rv::field(h, "ref_name"), + rv::ruby_inspect_array(rv::get(h, "divergent")) + ), + "Structural Similarity (Type-2/3)" => format!( + "- *POSSIBLE* [{}] mass={} node=`{}` {}{}\n", + rv::field(h, "clone_type"), + rv::field(h, "mass"), + rv::field(h, "node"), + rv::array(h, "sites").iter().take(4).map(|site| nav(&rv::string(Some(site)))).collect::>().join(" ; "), + if rv::array_len(h, "sites") > 4 { + format!(" (+{} more)", rv::array_len(h, "sites") - 4) + } else { + String::new() + } + ), + _ => String::new(), + } +} + +fn render_state_heatmap_item(item: &Value) -> String { + let mut out = format!( + "- `{}` -- messiness **{}** (writes={}, reads={}, re-derived={}, scatter={}, receiver patterns={})\n", + rv::field(item, "field"), + rv::field(item, "messiness"), + rv::field(item, "writes"), + rv::field(item, "reads"), + rv::field(item, "re_derivations"), + rv::field(item, "scatter"), + rv::field(item, "receiver_types") + ); + let writers = rv::array(item, "top_writers") + .iter() + .map(|site| nav(&rv::string(Some(site)))) + .collect::>(); + let readers = rv::array(item, "top_readers") + .iter() + .map(|site| nav(&rv::string(Some(site)))) + .collect::>(); + if !writers.is_empty() { + out.push_str(&format!(" - writers: {}\n", writers.join(" ; "))); + } + if !readers.is_empty() { + out.push_str(&format!(" - readers: {}\n", readers.join(" ; "))); + } + out +} + +fn render_implicit_control_flow_item(item: &Value) -> String { + if rv::kind_is(item, "kind", "order_drift") { + return format!( + "- *POSSIBLE* [order_drift] conf={} support={} {} observed `{}` against protocol `{}` ({} state=`{}`)\n", + rv::field(item, "confidence"), + rv::field(item, "support"), + nav(&rv::field(item, "at")), + rv::join_field(item, "observed", " -> "), + rv::join_field(item, "protocol", " -> "), + rv::join_field(item, "dependency", "|"), + rv::join_field(item, "states", " | ") + ); + } + let sites = rv::array(item, "sites") + .iter() + .take(4) + .map(|site| nav(&rv::string(Some(site)))) + .collect::>() + .join(" ; "); + let more = if rv::array_len(item, "sites") > 4 { + format!(" (+{} more)", rv::array_len(item, "sites") - 4) + } else { + String::new() + }; + format!( + "- *POSSIBLE* [protocol_pressure] support={} `{}` ({} state=`{}`) -- {}\n - sites: {}{}\n", + rv::field(item, "support"), + rv::join_field(item, "protocol", " -> "), + rv::join_field(item, "dependency", "|"), + rv::join_field(item, "states", " | "), + nav(&rv::field(item, "at")), + sites, + more + ) +} + +fn render_weighted_inlined_complexity_item(item: &Value) -> String { + format!( + "- *POSSIBLE* {} -- inlined={} (local={}, hidden={}, depth={})\n - chain: `{}`\n - single-caller helpers: `{}`\n - reason: {}\n", + nav(&rv::field(item, "at")), + rv::field(item, "inlined"), + rv::field(item, "local"), + rv::field(item, "hidden"), + rv::field(item, "depth"), + rv::join_field(item, "call_chain", " -> "), + rv::array(item, "single_caller_callees").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::field(item, "reason") + ) +} + +fn render_locality_drag_item(item: &Value) -> String { + let mut out = format!( + "- *POSSIBLE* {} -- `{}` dormant until line {} score={} (gap={} lines, unrelated={}, boundaries={}, local={})\n - reason: {}\n", + nav(&rv::field(item, "at")), + rv::field(item, "variable"), + rv::field(item, "used_at"), + rv::field(item, "score"), + rv::field(item, "gap_lines"), + rv::field(item, "unrelated_statements"), + rv::field(item, "boundary_crossings"), + rv::field(item, "local_complexity"), + rv::field(item, "reason") + ); + if rv::positive(item, "setup_statements") { + out.push_str(&format!( + " - ignored setup initializers: {}\n", + rv::field(item, "setup_statements") + )); + } + if rv::array_len(item, "definition_deps") > 0 { + out.push_str(&format!( + " - definition deps: `{}`\n", + rv::array(item, "definition_deps") + .iter() + .take(6) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | ") + )); + } + if rv::array_len(item, "use_reads") > 0 { + out.push_str(&format!( + " - first-use reads: `{}`\n", + rv::array(item, "use_reads") + .iter() + .take(8) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | ") + )); + } + for boundary in rv::array(item, "boundaries").iter().take(2) { + out.push_str(&format!( + " - crosses line {} {}\n", + rv::field(boundary, "line"), + rv::field(boundary, "marker") + )); + } + for example in rv::array(item, "examples").iter().take(2) { + out.push_str(&format!( + " - unrelated line {}: `{}`\n", + rv::field(example, "line"), + rv::field(example, "source") + )); + } + out +} + +fn render_function_lcom_item(item: &Value) -> String { + let mode = if rv::kind_is(item, "mode", "late_join") { + "late_join" + } else { + "disjoint" + }; + let mut out = format!( + "- *POSSIBLE* [{}] {} -- score={} components={}, locals={}, statements={}\n", + mode, + nav(&rv::field(item, "at")), + rv::field(item, "score"), + rv::field(item, "components"), + rv::field(item, "locals"), + rv::field(item, "statements") + ); + for (index, vars) in rv::array(item, "component_vars").iter().take(4).enumerate() { + let lines = rv::array(item, "component_lines").get(index); + let var_text = rv::array_from(Some(vars)) + .iter() + .take(8) + .map(|value| rv::string(Some(value))) + .collect::>() + .join(" | "); + out.push_str(&format!(" - component {}: `{}`", index + 1, var_text)); + if let Some(lines) = lines { + let line_values = rv::array_from(Some(lines)); + if let (Some(first), Some(last)) = (line_values.first(), line_values.last()) { + out.push_str(&format!( + " (lines {}-{})", + rv::string(Some(first)), + rv::string(Some(last)) + )); + } + } + out.push('\n'); + } + out +} + +fn render_operational_discontinuity_item(item: &Value) -> String { + let reasons = rv::join_field(item, "confidence_reasons", ", "); + let confidence = if rv::get(item, "confidence").is_some() { + rv::field(item, "confidence") + } else { + "review".to_string() + }; + let mut out = format!( + "- *POSSIBLE* {} -- score={} reset_boundaries={}, dead={}, new={}, confidence={}", + nav(&rv::field(item, "at")), + rv::field(item, "score"), + rv::field(item, "resets"), + rv::field(item, "dead_total"), + rv::field(item, "new_total"), + confidence + ); + if !reasons.is_empty() { + out.push_str(&format!(" ({reasons})")); + } + out.push('\n'); + for reset in rv::array(item, "reset_points").iter().take(3) { + let marker = if rv::field(reset, "text").is_empty() { + rv::field(reset, "kind") + } else { + rv::field(reset, "text") + }; + out.push_str(&format!( + " - line {} {}: dead `{}` -> new `{}`", + rv::field(reset, "line"), + marker, + rv::array(reset, "dead") + .iter() + .take(6) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | "), + rv::array(reset, "new") + .iter() + .take(6) + .map(|v| rv::string(Some(v))) + .collect::>() + .join(" | ") + )); + if rv::array_len(reset, "continuing") > 0 { + out.push_str(&format!( + " (continuing `{}`)", + rv::join_field(reset, "continuing", " | ") + )); + } + out.push('\n'); + } + out +} + +fn sarif_rule_id(title: &str) -> String { + format!("decomplex.{}", sarif::slug(title)) +} + +fn ranked_sarif_results(mut results: Vec) -> Vec { + results.sort_by(|left, right| { + let left_location = first_physical_location(left); + let right_location = first_physical_location(right); + tier_property(left) + .cmp(&tier_property(right)) + .then_with(|| rv::field(left, "ruleId").cmp(&rv::field(right, "ruleId"))) + .then_with(|| { + rv::get(left, "message") + .and_then(|message| rv::get(message, "text")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default() + .cmp( + &rv::get(right, "message") + .and_then(|message| rv::get(message, "text")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default(), + ) + }) + .then_with(|| { + left_location + .as_ref() + .and_then(|location| location.get("artifactLocation")) + .and_then(|artifact| artifact.get("uri")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default() + .cmp( + &right_location + .as_ref() + .and_then(|location| location.get("artifactLocation")) + .and_then(|artifact| artifact.get("uri")) + .map(|value| rv::string(Some(value))) + .unwrap_or_default(), + ) + }) + .then_with(|| start_line(left_location).cmp(&start_line(right_location))) + }); + results +} + +fn tier_property(result: &Value) -> i64 { + rv::get(result, "properties") + .map(|properties| rv::field_i64(properties, "tier")) + .unwrap_or(0) +} + +fn first_physical_location(result: &Value) -> Option<&Value> { + rv::array(result, "locations") + .first() + .and_then(|location| rv::get(location, "physicalLocation")) +} + +fn start_line(location: Option<&Value>) -> i64 { + location + .and_then(|location| rv::get(location, "region")) + .map(|region| rv::field_i64(region, "startLine")) + .unwrap_or(0) +} + +fn sarif_message(title: &str, finding: &Value, location: &SarifLocation) -> String { + let detail = sarif_message_detail(title, finding); + if !detail.is_empty() { + return format!("{title}: {detail}"); + } + let subject = location + .method + .clone() + .filter(|value| !value.is_empty()) + .or_else(|| { + first_non_empty_field( + finding, + &[ + "method", "name", "field", "contract", "owner", "token", "kind", + ], + ) + }); + [Some(title.to_string()), subject] + .into_iter() + .flatten() + .collect::>() + .join(": ") +} + +fn first_non_empty_field(finding: &Value, keys: &[&str]) -> Option { + keys.iter() + .map(|key| rv::field(finding, key)) + .find(|value| !value.is_empty()) +} + +fn sarif_message_detail(title: &str, finding: &Value) -> String { + match title { + "Decision Pressure" => format!( + "`{}` creates {} eliminable guard decision(s) across {} method(s)", + rv::field(finding, "contract"), + rv::field(finding, "decisions"), + rv::field(finding, "methods") + ), + "Redundant Nil Guards" => format!( + "`{}` is nil-guarded by `{}` after proof `{}`", + rv::field(finding, "local"), + rv::field(finding, "guard"), + rv::field(finding, "proof") + ), + "State Heatmap" => format!( + "state `{}` has pressure={}, messiness={} (writes={}, reads={}, re-derived={}, scatter={}); writers {}; readers {}", + rv::field(finding, "field"), + rv::field(finding, "pressure"), + rv::field(finding, "messiness"), + rv::field(finding, "writes"), + rv::field(finding, "reads"), + rv::field(finding, "re_derivations"), + rv::field(finding, "scatter"), + rv::array(finding, "top_writers").iter().take(3).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::array(finding, "top_readers").iter().take(3).map(|v| rv::string(Some(v))).collect::>().join(" | ") + ), + "Missing Abstractions" => format!( + "guard tuple `{}` repeats in {} site(s) with scatter={}", + rv::join_field(finding, "members", " | "), + rv::field(finding, "support"), + rv::field(finding, "scatter") + ), + "State-Based Branch Density" => format!( + "{} state-based branch decision(s) over `{}`; example predicate `{}`", + rv::field(finding, "decisions"), + rv::array(finding, "state_refs").iter().take(8).map(|v| rv::string(Some(v))).collect::>().join(" | "), + rv::field(finding, "predicate") + ), + "Temporal Ordering Pressure" => format!( + "`{}` exposes mutable lifecycle pressure score={} (public={}, state_methods={}, writers={})", + rv::field(finding, "owner"), + rv::field(finding, "score"), + rv::field(finding, "public_methods"), + rv::field(finding, "state_methods"), + rv::field(finding, "writers") + ), + "Neglected Conditions" | "Neglected Path Conditions" => { + let pattern = rv::get(finding, "pattern").or_else(|| rv::get(finding, "guards")); + format!( + "missing condition `{}` from `{}` (support={})", + rv::field(finding, "missing"), + rv::array_strings(pattern).join(" | "), + rv::field(finding, "support") + ) + } + "Oversized Predicates" => format!( + "{} condition atoms in predicate `{}`", + rv::field(finding, "count"), + rv::field(finding, "predicate") + ), + "Neglected Updates" => format!( + "writes `.{}` but not co-written `.{}` on receiver `{}` (support={})", + rv::field(finding, "has"), + rv::field(finding, "missing"), + rv::field(finding, "recv"), + rv::field(finding, "support") + ), + "Semantic Predicate Aliases" | "Exact Predicate Aliases" => format!( + "predicate aliases `{}` for `{}`", + rv::join_field(finding, "names", " = "), + if rv::get(finding, "canon").is_some() { + rv::field(finding, "canon") + } else { + rv::field(finding, "body") + } + ), + "Reification Misses" => format!( + "predicate `{}` is reinvented inline as `{}`", + rv::field(finding, "predicate"), + rv::field(finding, "raw") + ), + "Broken Protocols" => format!( + "does `{}` without co-called `{}` (support={}, confidence={})", + rv::field(finding, "has"), + rv::field(finding, "missing"), + rv::field(finding, "support"), + rv::field(finding, "confidence") + ), + "Implicit Control Flow" => sarif_implicit_control_flow_detail(finding), + "Weighted Inlined Cognitive Complexity" => format!( + "inlined={} (local={}, hidden={}, depth={}); chain `{}`", + rv::field(finding, "inlined"), + rv::field(finding, "local"), + rv::field(finding, "hidden"), + rv::field(finding, "depth"), + rv::join_field(finding, "call_chain", " -> ") + ), + "Locality Drag" => format!( + "`{}` is initialized at line {} but first used at line {} after {} unrelated statement(s)", + rv::field(finding, "variable"), + rv::field(finding, "defined_at"), + rv::field(finding, "used_at"), + rv::field(finding, "unrelated_statements") + ), + "Function LCOM" => { + let mode = if rv::kind_is(finding, "mode", "late_join") { + "late_join" + } else { + "disjoint" + }; + format!( + "{} local data-flow: score={}, components={}, locals={}, statements={}", + mode, + rv::field(finding, "score"), + rv::field(finding, "components"), + rv::field(finding, "locals"), + rv::field(finding, "statements") + ) + } + "Operational Discontinuity" | "Operational Discontinuity (High Confidence)" => format!( + "score={}, reset_boundaries={}, dead={}, new={}, confidence={}", + rv::field(finding, "score"), + rv::field(finding, "resets"), + rv::field(finding, "dead_total"), + rv::field(finding, "new_total"), + if rv::get(finding, "confidence").is_some() { + rv::field(finding, "confidence") + } else { + "review".to_string() + } + ), + "False Simplicity" => format!( + "[{}] `{}` support={}, scatter={}", + rv::field(finding, "kind"), + rv::field(finding, "detail"), + rv::field(finding, "support"), + rv::field(finding, "scatter") + ), + "Fat Unions" => format!( + "union `{}` has {} common and {} variant member(s), scatter={}", + rv::join_field(finding, "variant_set", " | "), + rv::array_len(finding, "common"), + rv::array_len(finding, "variant"), + rv::field(finding, "scatter") + ), + "Derived-State Staleness" => format!( + "`{}` derived from `{}` at line {}; `{}` reassigned at line {} but `{}` is not recomputed", + rv::field(finding, "derived"), + rv::field(finding, "source"), + rv::field(finding, "derived_at"), + rv::field(finding, "source"), + rv::field(finding, "source_reassigned_at"), + rv::field(finding, "derived") + ), + "Inconsistent Rename Clones" => format!( + "clone of {}: reference variable `{}` diverges as {}", + rv::field(finding, "ref_at"), + rv::field(finding, "ref_name"), + rv::ruby_inspect_array(rv::get(finding, "divergent")) + ), + "Structural Similarity (Type-2/3)" => format!( + "[{}] mass={} node=`{}` across {} site(s)", + rv::field(finding, "clone_type"), + rv::field(finding, "mass"), + rv::field(finding, "node"), + rv::array_len(finding, "sites") + ), + _ => String::new(), + } +} + +fn sarif_implicit_control_flow_detail(finding: &Value) -> String { + let protocol = rv::join_field(finding, "protocol", " -> "); + let dependency = rv::join_field(finding, "dependency", "|"); + let states = rv::join_field(finding, "states", " | "); + if rv::kind_is(finding, "kind", "order_drift") { + return format!( + "[order_drift] observed `{}` against protocol `{}` ({} state=`{}`)", + rv::join_field(finding, "observed", " -> "), + protocol, + dependency, + states + ); + } + format!( + "[protocol_pressure] protocol `{}` ({} state=`{}`), support={}", + protocol, + dependency, + states, + rv::field(finding, "support") + ) +} + +fn sarif_locations_for_finding(finding: &Value) -> Vec { + if let Some(spans) = rv::get(finding, "spans").and_then(Value::as_object) { + if !spans.is_empty() { + return spans + .iter() + .filter_map(|(loc, span)| { + let mut parsed = parse_sarif_loc(loc); + parsed.path.as_ref()?; + let span = rv::array_from(Some(span)); + parsed.line = span + .first() + .and_then(Value::as_i64) + .filter(|line| *line > 0) + .unwrap_or(parsed.line); + parsed.start_column = span + .get(1) + .and_then(Value::as_i64) + .map(zero_based_column_to_sarif); + parsed.end_line = span.get(2).and_then(Value::as_i64).filter(|line| *line > 0); + parsed.end_column = span + .get(3) + .and_then(Value::as_i64) + .map(zero_based_column_to_sarif); + Some(parsed) + }) + .collect(); + } + } + + let mut locs = Vec::new(); + if let Some(value) = rv::get(finding, "at") { + locs.push(rv::string(Some(value))); + } + locs.extend(rv::field_array_strings(finding, "sites")); + if let Some(value) = rv::get(finding, "ref_at") { + locs.push(rv::string(Some(value))); + } + let mut seen = std::collections::HashSet::new(); + locs.retain(|loc| !loc.is_empty() && seen.insert(loc.clone())); + locs.into_iter() + .map(|loc| parse_sarif_loc(&loc)) + .filter(|loc| loc.path.is_some()) + .collect() +} + +fn parse_sarif_loc(loc: &str) -> SarifLocation { + let mut parts = loc.split(':').map(ToOwned::to_owned).collect::>(); + let line = if parts + .last() + .is_some_and(|part| part.chars().all(|ch| ch.is_ascii_digit())) + { + parts.pop().and_then(|part| part.parse::().ok()) + } else { + None + }; + let method = if parts.len() >= 2 { parts.pop() } else { None }; + let path = parts.join(":"); + SarifLocation { + path: (!path.is_empty()).then_some(path), + method, + line: line.filter(|line| *line > 0).unwrap_or(1), + start_column: None, + end_line: None, + end_column: None, + } +} + +fn zero_based_column_to_sarif(value: i64) -> i64 { + value + 1 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nav_splits_location_from_the_right() { + assert_eq!(nav("a:b.rb:m:10"), "`a:b.rb:10` (m)"); + } + + #[test] + fn slug_matches_ruby_report_anchor_shape() { + assert_eq!( + slug("Structural Similarity (Type-2/3)"), + "structural-similarity-type23" + ); + } +} diff --git a/gems/decomplex/rust/src/decomplex/report_facts.rs b/gems/decomplex/rust/src/decomplex/report_facts.rs new file mode 100644 index 000000000..146242d01 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/report_facts.rs @@ -0,0 +1,794 @@ +use crate::decomplex::detectors::{ + co_update, decision_pressure, derived_state, false_simplicity, fat_union, flay_similarity, + function_lcom, implicit_control_flow, inconsistent_rename_clone, local_flow, locality_drag, + miner, operational_discontinuity, oversized_predicate, path_condition, predicate_alias, + redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, + temporal_ordering_pressure, weighted_inlined_cognitive_complexity, +}; +use crate::decomplex::parallel; +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::{bail, Context, Result}; +use serde::Serialize; +use serde_json::{json, Map, Value}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::mpsc; +use std::thread; + +pub const FORMAT: &str = "decomplex.report-facts.v1"; + +const DEFAULT_MASS: usize = 32; +const DEFAULT_FUZZY: usize = 1; +const DEFAULT_EXCLUDE_DIRS: &[&str] = &[ + ".clear-cache", + ".clear-transpile-cache", + ".global-zig-cache", + ".zig-cache", + "zig-cache", + "zig-out", + "node_modules", +]; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum VcsFilter { + Git, +} + +#[derive(Clone, Debug)] +pub struct Options { + pub language: Option, + pub excludes: Vec, + pub mass: usize, + pub fuzzy: usize, + pub vcs: Option, +} + +impl Default for Options { + fn default() -> Self { + Self { + language: None, + excludes: Vec::new(), + mass: DEFAULT_MASS, + fuzzy: DEFAULT_FUZZY, + vcs: None, + } + } +} + +#[derive(Clone, Debug)] +pub struct SourceFile { + pub path: PathBuf, + pub language: Language, +} + +struct SharedFacts { + local_summaries: Vec, + local_complexity_scores: BTreeMap<(String, String), syntax::LocalComplexityScore>, + semantic_aliases: semantic_alias::SemanticAliasReport, +} + +impl SharedFacts { + fn new(documents: &[Document]) -> Self { + thread::scope(|scope| { + let local_summaries = scope.spawn(|| local_flow::scan_documents(documents)); + let local_complexity_scores = scope.spawn(|| local_complexity_scores(documents)); + let semantic_aliases = scope.spawn(|| semantic_alias::scan_documents(documents)); + Self { + local_summaries: local_summaries.join().expect("local-flow facts worker"), + local_complexity_scores: local_complexity_scores + .join() + .expect("local-complexity facts worker"), + semantic_aliases: semantic_aliases + .join() + .expect("semantic-alias facts worker"), + } + }) + } +} + +pub fn collect(targets: &[PathBuf], options: &Options) -> Result { + let files = collect_source_files(targets, options)?; + facts_for_source_files(&files, options) +} + +pub fn collect_source_files(targets: &[PathBuf], options: &Options) -> Result> { + let mut files = Vec::new(); + for target in targets { + expand_target(target, options, &mut files) + .with_context(|| format!("failed to collect {}", target.display()))?; + } + files.sort_by(|left, right| left.path.cmp(&right.path)); + files.dedup_by(|left, right| left.path == right.path); + if options.vcs == Some(VcsFilter::Git) && !files.is_empty() { + retain_git_tracked_files(&mut files)?; + } + Ok(files) +} + +pub fn facts_for_source_files(files: &[SourceFile], options: &Options) -> Result { + if files.is_empty() { + bail!("facts requires at least one supported source file"); + } + + let documents = parallel::map_ordered(files, |file| { + syntax::parse_file(file.path.clone(), file.language) + })?; + let shared = SharedFacts::new(&documents); + let mut groups: BTreeMap> = BTreeMap::new(); + for document in documents { + groups.entry(document.language).or_default().push(document); + } + + let detectors = collect_detector_facts(&groups, &shared, options)?; + + let mut reported_files = files + .iter() + .map(|file| file.path.to_string_lossy().to_string()) + .collect::>(); + reported_files.sort(); + + Ok(json!({ + "format": FORMAT, + "files": reported_files, + "detectors": detectors, + })) +} + +fn collect_detector_facts( + groups: &BTreeMap>, + shared: &SharedFacts, + options: &Options, +) -> Result> { + if parallel::job_count() <= 1 { + return collect_detector_facts_sequential(groups, shared, options); + } + + let (tx, rx) = mpsc::channel(); + thread::scope(|scope| { + macro_rules! spawn_detector { + ($name:expr, $body:expr) => {{ + let tx = tx.clone(); + scope.spawn(move || { + let result: Result = (|| $body)(); + let _ = tx.send(($name.to_string(), result)); + }); + }}; + } + + spawn_detector!("miner", { + merge_object_reports( + groups, + &["missing_abstractions", "neglected_conditions"], + |documents| json_value(miner::scan_documents(documents)), + ) + }); + spawn_detector!("co_update", { + merge_object_reports( + groups, + &["co_written_pairs", "neglected_updates"], + |documents| json_value(co_update::scan_documents(documents)), + ) + }); + spawn_detector!("predicate_alias", { + merge_object_reports(groups, &["alias_clusters"], |documents| { + json_value(predicate_alias::scan_documents(documents)) + }) + }); + spawn_detector!("semantic_alias", { + json_value(shared.semantic_aliases.clone()) + }); + spawn_detector!("path_condition", { + merge_object_reports(groups, &["neglected", "scattered"], |documents| { + json_value(path_condition::scan_documents(documents)) + }) + }); + spawn_detector!("sequence_mine", { + merge_object_reports(groups, &["broken"], |documents| { + json_value(sequence_mine::scan_documents(documents)) + }) + .map(rename_broken_protocol) + }); + spawn_detector!("implicit_control_flow", { + merge_object_reports(groups, &["ordered_protocols"], |documents| { + json_value(implicit_control_flow::scan_documents(documents)) + }) + }); + spawn_detector!("derived_state", { + merge_array_reports(groups, |documents| { + json_value(derived_state::scan_documents(documents)) + }) + }); + spawn_detector!("inconsistent_rename_clone", { + merge_array_reports(groups, |documents| { + json_value(inconsistent_rename_clone::scan_documents(documents)) + }) + }); + spawn_detector!("flay_similarity", { + merge_array_reports(groups, |documents| { + json_value(flay_similarity::scan_documents( + documents, + options.mass, + options.fuzzy, + )) + }) + }); + spawn_detector!("decision_pressure", { + merge_array_reports(groups, |documents| { + json_value(decision_pressure::scan_documents(documents)) + }) + }); + spawn_detector!("redundant_nil_guard", { + merge_array_reports(groups, |documents| { + json_value(redundant_nil_guard::scan_documents(documents)) + }) + }); + spawn_detector!("false_simplicity", { + merge_array_reports(groups, |documents| { + json_value(false_simplicity::scan_documents(documents)) + }) + }); + spawn_detector!("oversized_predicate", { + Ok(merge_object_reports(groups, &["findings"], |documents| { + json_value(oversized_predicate::scan_documents(documents)) + })? + .get("findings") + .cloned() + .unwrap_or_else(|| Value::Array(Vec::new()))) + }); + spawn_detector!("fat_union", { + merge_object_reports(groups, &["fat_unions"], |documents| { + json_value(fat_union::scan_documents(documents)) + }) + }); + spawn_detector!("state_heatmap", { + state_heatmap_findings_for_groups(groups, &shared.semantic_aliases) + }); + spawn_detector!("state_branch_density", { + merge_array_reports(groups, |documents| { + json_value(state_branch_density::scan_documents(documents)) + }) + }); + spawn_detector!("temporal_ordering_pressure", { + merge_array_reports(groups, |documents| { + json_value(temporal_ordering_pressure::scan_documents(documents)) + }) + }); + spawn_detector!("weighted_inlined_complexity", { + merge_array_reports(groups, |documents| { + json_value(weighted_inlined_cognitive_complexity::scan_documents( + documents, + )) + }) + }); + spawn_detector!("locality_drag", { + json_value(locality_drag::scan_summaries_with_scores( + shared.local_summaries.clone(), + shared.local_complexity_scores.clone(), + )) + }); + spawn_detector!("function_lcom", { + json_value(function_lcom::scan_summaries( + shared.local_summaries.clone(), + )) + }); + spawn_detector!("operational_discontinuity", { + json_value(operational_discontinuity::scan_summaries( + shared.local_summaries.clone(), + )) + }); + drop(tx); + }); + + let mut detectors = Map::new(); + let mut first_error = None; + for (name, result) in rx { + match result { + Ok(value) => { + detectors.insert(name, value); + } + Err(error) => { + if first_error.is_none() { + first_error = Some(error.context(format!("failed to collect {name} facts"))); + } + } + } + } + if let Some(error) = first_error { + return Err(error); + } + Ok(detectors) +} + +fn collect_detector_facts_sequential( + groups: &BTreeMap>, + shared: &SharedFacts, + options: &Options, +) -> Result> { + let mut detectors = Map::new(); + detectors.insert( + "miner".to_string(), + merge_object_reports( + groups, + &["missing_abstractions", "neglected_conditions"], + |documents| json_value(miner::scan_documents(documents)), + )?, + ); + detectors.insert( + "co_update".to_string(), + merge_object_reports( + groups, + &["co_written_pairs", "neglected_updates"], + |documents| json_value(co_update::scan_documents(documents)), + )?, + ); + detectors.insert( + "predicate_alias".to_string(), + merge_object_reports(groups, &["alias_clusters"], |documents| { + json_value(predicate_alias::scan_documents(documents)) + })?, + ); + detectors.insert( + "semantic_alias".to_string(), + json_value(shared.semantic_aliases.clone())?, + ); + detectors.insert( + "path_condition".to_string(), + merge_object_reports(groups, &["neglected", "scattered"], |documents| { + json_value(path_condition::scan_documents(documents)) + })?, + ); + detectors.insert( + "sequence_mine".to_string(), + merge_object_reports(groups, &["broken"], |documents| { + json_value(sequence_mine::scan_documents(documents)) + }) + .map(rename_broken_protocol)?, + ); + detectors.insert( + "implicit_control_flow".to_string(), + merge_object_reports(groups, &["ordered_protocols"], |documents| { + json_value(implicit_control_flow::scan_documents(documents)) + })?, + ); + detectors.insert( + "derived_state".to_string(), + merge_array_reports(groups, |documents| { + json_value(derived_state::scan_documents(documents)) + })?, + ); + detectors.insert( + "inconsistent_rename_clone".to_string(), + merge_array_reports(groups, |documents| { + json_value(inconsistent_rename_clone::scan_documents(documents)) + })?, + ); + detectors.insert( + "flay_similarity".to_string(), + merge_array_reports(groups, |documents| { + json_value(flay_similarity::scan_documents( + documents, + options.mass, + options.fuzzy, + )) + })?, + ); + detectors.insert( + "decision_pressure".to_string(), + merge_array_reports(groups, |documents| { + json_value(decision_pressure::scan_documents(documents)) + })?, + ); + detectors.insert( + "redundant_nil_guard".to_string(), + merge_array_reports(groups, |documents| { + json_value(redundant_nil_guard::scan_documents(documents)) + })?, + ); + detectors.insert( + "false_simplicity".to_string(), + merge_array_reports(groups, |documents| { + json_value(false_simplicity::scan_documents(documents)) + })?, + ); + detectors.insert( + "oversized_predicate".to_string(), + merge_object_reports(groups, &["findings"], |documents| { + json_value(oversized_predicate::scan_documents(documents)) + })? + .get("findings") + .cloned() + .unwrap_or_else(|| Value::Array(Vec::new())), + ); + detectors.insert( + "fat_union".to_string(), + merge_object_reports(groups, &["fat_unions"], |documents| { + json_value(fat_union::scan_documents(documents)) + })?, + ); + detectors.insert( + "state_heatmap".to_string(), + state_heatmap_findings_for_groups(groups, &shared.semantic_aliases)?, + ); + detectors.insert( + "state_branch_density".to_string(), + merge_array_reports(groups, |documents| { + json_value(state_branch_density::scan_documents(documents)) + })?, + ); + detectors.insert( + "temporal_ordering_pressure".to_string(), + merge_array_reports(groups, |documents| { + json_value(temporal_ordering_pressure::scan_documents(documents)) + })?, + ); + detectors.insert( + "weighted_inlined_complexity".to_string(), + merge_array_reports(groups, |documents| { + json_value(weighted_inlined_cognitive_complexity::scan_documents( + documents, + )) + })?, + ); + detectors.insert( + "locality_drag".to_string(), + json_value(locality_drag::scan_summaries_with_scores( + shared.local_summaries.clone(), + shared.local_complexity_scores.clone(), + ))?, + ); + detectors.insert( + "function_lcom".to_string(), + json_value(function_lcom::scan_summaries( + shared.local_summaries.clone(), + ))?, + ); + detectors.insert( + "operational_discontinuity".to_string(), + json_value(operational_discontinuity::scan_summaries( + shared.local_summaries.clone(), + ))?, + ); + Ok(detectors) +} + +fn local_complexity_scores( + documents: &[Document], +) -> BTreeMap<(String, String), syntax::LocalComplexityScore> { + documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect() +} + +fn merge_object_reports( + groups: &BTreeMap>, + fields: &[&str], + scan: F, +) -> Result +where + F: Fn(&[Document]) -> Result, +{ + let mut merged = Map::new(); + for field in fields { + merged.insert((*field).to_string(), Value::Array(Vec::new())); + } + + for (language, documents) in groups { + let value = scan(documents)?; + let object = value + .as_object() + .with_context(|| format!("{} detector did not return an object", language.as_str()))?; + for field in fields { + let rows = object + .get(*field) + .and_then(Value::as_array) + .with_context(|| format!("detector result missing array field {field}"))?; + merged + .get_mut(*field) + .and_then(Value::as_array_mut) + .expect("merged array") + .extend(rows.iter().cloned()); + } + } + Ok(Value::Object(merged)) +} + +fn json_value(value: T) -> Result { + Ok(serde_json::to_value(value)?) +} + +fn merge_array_reports(groups: &BTreeMap>, scan: F) -> Result +where + F: Fn(&[Document]) -> Result, +{ + let mut rows = Vec::new(); + for (language, documents) in groups { + let value = scan(documents)?; + rows.extend( + value + .as_array() + .with_context(|| format!("{} detector did not return an array", language.as_str()))? + .iter() + .cloned(), + ); + } + Ok(Value::Array(rows)) +} + +fn rename_broken_protocol(mut value: Value) -> Value { + if let Some(object) = value.as_object_mut() { + if let Some(rows) = object.remove("broken") { + object.insert("broken_protocol".to_string(), rows); + } + } + value +} + +fn state_heatmap_findings_for_groups( + groups: &BTreeMap>, + semantic_aliases: &semantic_alias::SemanticAliasReport, +) -> Result { + let mut rows = Vec::new(); + for documents in groups.values() { + let report = state_mesh::scan_documents_with_semantic_aliases_and_min_writes( + documents, + semantic_aliases, + 1, + ); + rows.extend(state_heatmap_findings(&report)); + } + Ok(Value::Array(rows)) +} + +fn state_heatmap_findings(report: &state_mesh::StateMeshReport) -> Vec { + let mut rows = Vec::new(); + for (field, row) in &report.fields { + let mut sites = Vec::new(); + sites.extend(row.writers.iter().map(site_location)); + sites.extend(row.readers.iter().map(site_location)); + sites.extend(row.re_derivations.iter().map(re_derivation_location)); + + let spans = row + .writers + .iter() + .chain(row.readers.iter()) + .map(|site| (site_location(site), json!(site.span))) + .collect::>(); + + rows.push(json!({ + "at": sites.first().cloned(), + "field": field, + "writes": row.metrics.writes, + "reads": row.metrics.reads, + "re_derivations": row.metrics.re_derivations, + "scatter": row.metrics.scatter, + "write_scatter": row.metrics.write_scatter, + "read_scatter": row.metrics.read_scatter, + "receiver_types": row.metrics.receiver_types, + "messiness": row.messiness, + "pressure": row.metrics.pressure, + "top_writers": row.writers.iter().take(4).map(site_location).collect::>(), + "top_readers": row.readers.iter().take(4).map(site_location).collect::>(), + "sites": sites.into_iter().take(12).collect::>(), + "spans": spans, + })); + } + rows +} + +fn site_location(site: &state_mesh::SiteInfo) -> String { + format!("{}:{}:{}", site.file, site.defn, site.line) +} + +fn re_derivation_location(site: &state_mesh::ReDerivationInfo) -> String { + format!("{}:{}:{}", site.file, site.defn, site.line) +} + +fn retain_git_tracked_files(files: &mut Vec) -> Result<()> { + let tracked = git_tracked_paths_for_files(files)?; + files.retain(|file| tracked.contains(&normalize_path(&file.path))); + Ok(()) +} + +fn git_tracked_paths_for_files(files: &[SourceFile]) -> Result> { + let mut tracked = HashSet::new(); + for root in git_roots_for_files(files)? { + for path in git_ls_files(&root)? { + tracked.insert(path); + } + } + Ok(tracked) +} + +fn git_roots_for_files(files: &[SourceFile]) -> Result> { + let current_root = git_root_for_dir(&std::env::current_dir()?).ok(); + if let Some(root) = current_root { + let root = normalize_path(&root); + if files + .iter() + .all(|file| normalize_path(&file.path).starts_with(&root)) + { + return Ok(BTreeSet::from([root])); + } + } + + let mut roots = BTreeSet::new(); + for file in files { + let dir = file.path.parent().unwrap_or_else(|| Path::new(".")); + let root = git_root_for_dir(dir).with_context(|| { + format!( + "--vcs=git requires {} to be inside a Git work tree", + file.path.display() + ) + })?; + roots.insert(normalize_path(&root)); + } + Ok(roots) +} + +fn git_root_for_dir(dir: &Path) -> Result { + let output = Command::new("git") + .arg("-C") + .arg(dir) + .args(["rev-parse", "--show-toplevel"]) + .output() + .with_context(|| format!("failed to run git rev-parse in {}", dir.display()))?; + if !output.status.success() { + bail!("git rev-parse failed in {}", dir.display()); + } + let stdout = String::from_utf8(output.stdout) + .with_context(|| format!("git rev-parse output was not UTF-8 in {}", dir.display()))?; + Ok(PathBuf::from(stdout.trim())) +} + +fn git_ls_files(root: &Path) -> Result> { + let output = Command::new("git") + .arg("-C") + .arg(root) + .args(["ls-files", "-z"]) + .output() + .with_context(|| format!("failed to run git ls-files in {}", root.display()))?; + if !output.status.success() { + bail!("git ls-files failed in {}", root.display()); + } + let stdout = String::from_utf8(output.stdout) + .with_context(|| format!("git ls-files output was not UTF-8 in {}", root.display()))?; + Ok(stdout + .split('\0') + .filter(|path| !path.is_empty()) + .map(|path| normalize_path(&root.join(path))) + .collect()) +} + +fn normalize_path(path: &Path) -> PathBuf { + fs::canonicalize(path).unwrap_or_else(|_| { + if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir() + .unwrap_or_else(|_| PathBuf::from(".")) + .join(path) + } + }) +} + +fn expand_target(target: &Path, options: &Options, out: &mut Vec) -> Result<()> { + if target.is_dir() { + expand_directory(target, options, out) + } else if target.is_file() { + push_source_file(target, options, out); + Ok(()) + } else { + Ok(()) + } +} + +fn expand_directory(dir: &Path, options: &Options, out: &mut Vec) -> Result<()> { + for entry in fs::read_dir(dir).with_context(|| format!("failed to read {}", dir.display()))? { + let entry = entry?; + let path = entry.path(); + if excluded_path(&path, options) { + continue; + } + if path.is_dir() { + expand_directory(&path, options, out)?; + } else if path.is_file() { + push_source_file(&path, options, out); + } + } + Ok(()) +} + +fn push_source_file(path: &Path, options: &Options, out: &mut Vec) { + if excluded_path(path, options) { + return; + } + let Some(file_name) = path.file_name().and_then(|value| value.to_str()) else { + return; + }; + if file_name.starts_with('.') || file_name == "all-tests.zig" { + return; + } + + let language = options.language.or_else(|| { + path.extension() + .and_then(|value| value.to_str()) + .and_then(|extension| Language::for_extension(&extension.to_ascii_lowercase())) + }); + let Some(language) = language else { + return; + }; + out.push(SourceFile { + path: path.to_path_buf(), + language, + }); +} + +fn excluded_path(path: &Path, options: &Options) -> bool { + let text = path.to_string_lossy().replace('\\', "/"); + if DEFAULT_EXCLUDE_DIRS.iter().any(|dir| { + text == *dir || text.ends_with(&format!("/{dir}")) || text.contains(&format!("/{dir}/")) + }) { + return true; + } + + options.excludes.iter().any(|pattern| { + let pattern = pattern.replace('\\', "/"); + if let Some(prefix) = pattern.strip_suffix("/**") { + let prefix = prefix.strip_prefix("**/").unwrap_or(prefix); + text == prefix + || text.ends_with(&format!("/{prefix}")) + || text.contains(&format!("/{prefix}/")) + } else { + text == pattern || text.ends_with(&format!("/{pattern}")) || text.contains(&pattern) + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn git_vcs_filter_keeps_only_tracked_source_files() { + let dir = TempDir::new().expect("tempdir"); + run_git(dir.path(), &["init"]); + + let tracked = dir.path().join("tracked.rb"); + let untracked = dir.path().join("untracked.rb"); + fs::write(&tracked, "def tracked\nend\n").expect("write tracked"); + fs::write(&untracked, "def untracked\nend\n").expect("write untracked"); + run_git(dir.path(), &["add", "tracked.rb"]); + + let options = Options { + vcs: Some(VcsFilter::Git), + ..Options::default() + }; + let files = + collect_source_files(&[dir.path().to_path_buf()], &options).expect("source files"); + let names = files + .iter() + .map(|file| file.path.file_name().unwrap().to_string_lossy().to_string()) + .collect::>(); + + assert_eq!(names, vec!["tracked.rb"]); + } + + fn run_git(dir: &Path, args: &[&str]) { + let status = Command::new("git") + .arg("-C") + .arg(dir) + .args(args) + .status() + .expect("git command"); + assert!(status.success(), "git {:?} failed", args); + } +} diff --git a/gems/decomplex/rust/src/decomplex/report_value.rs b/gems/decomplex/rust/src/decomplex/report_value.rs new file mode 100644 index 000000000..048799558 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/report_value.rs @@ -0,0 +1,111 @@ +use serde_json::Value; + +pub fn get<'a>(value: &'a Value, key: &str) -> Option<&'a Value> { + value.as_object()?.get(key) +} + +pub fn string(value: Option<&Value>) -> String { + match value { + Some(Value::String(text)) => text.clone(), + Some(Value::Number(number)) => number.to_string(), + Some(Value::Bool(true)) => "true".to_string(), + Some(Value::Bool(false)) => "false".to_string(), + Some(Value::Null) | None => String::new(), + Some(other) => other.to_string(), + } +} + +pub fn field(value: &Value, key: &str) -> String { + string(get(value, key)) +} + +pub fn field_i64(value: &Value, key: &str) -> i64 { + match get(value, key) { + Some(Value::Number(number)) => number + .as_i64() + .or_else(|| number.as_u64().map(|n| n as i64)) + .unwrap_or(0), + Some(Value::String(text)) => text.parse().unwrap_or(0), + _ => 0, + } +} + +pub fn field_usize(value: &Value, key: &str) -> usize { + field_i64(value, key).max(0) as usize +} + +pub fn field_bool(value: &Value, key: &str) -> bool { + match get(value, key) { + Some(Value::Bool(value)) => *value, + Some(Value::String(text)) => text == "true", + _ => false, + } +} + +pub fn array<'a>(value: &'a Value, key: &str) -> &'a [Value] { + get(value, key) + .and_then(Value::as_array) + .map(Vec::as_slice) + .unwrap_or(&[]) +} + +pub fn array_from(value: Option<&Value>) -> &[Value] { + value + .and_then(Value::as_array) + .map(Vec::as_slice) + .unwrap_or(&[]) +} + +pub fn array_strings(value: Option<&Value>) -> Vec { + array_from(value) + .iter() + .map(|item| string(Some(item))) + .collect() +} + +pub fn field_array_strings(value: &Value, key: &str) -> Vec { + array_strings(get(value, key)) +} + +pub fn join(values: &[String], separator: &str) -> String { + values.join(separator) +} + +pub fn join_field(value: &Value, key: &str, separator: &str) -> String { + field_array_strings(value, key).join(separator) +} + +pub fn array_len(value: &Value, key: &str) -> usize { + array(value, key).len() +} + +pub fn positive(value: &Value, key: &str) -> bool { + field_i64(value, key) > 0 +} + +pub fn kind_is(value: &Value, key: &str, expected: &str) -> bool { + field(value, key) == expected +} + +pub fn ruby_inspect_array(value: Option<&Value>) -> String { + let parts = array_from(value) + .iter() + .map(ruby_inspect_value) + .collect::>(); + format!("[{}]", parts.join(", ")) +} + +fn ruby_inspect_value(value: &Value) -> String { + match value { + Value::String(text) => format!("{text:?}"), + Value::Number(number) => number.to_string(), + Value::Bool(true) => "true".to_string(), + Value::Bool(false) => "false".to_string(), + Value::Null => "nil".to_string(), + Value::Array(items) => { + let parts = items.iter().map(ruby_inspect_value).collect::>(); + format!("[{}]", parts.join(", ")) + } + Value::Object(_) => value.to_string(), + } +} diff --git a/gems/decomplex/rust/src/decomplex/root_cause.rs b/gems/decomplex/rust/src/decomplex/root_cause.rs new file mode 100644 index 000000000..834f03a2a --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/root_cause.rs @@ -0,0 +1,288 @@ +use crate::decomplex::convergence; +use crate::decomplex::report::ReportSection; +use crate::decomplex::report_value as rv; +use regex::Regex; +use serde::Serialize; +use serde_json::Value; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::sync::OnceLock; + +const TUPLE_FIELDS: &[&str] = &["members", "guards", "pattern"]; +const NAME_ARRAY_FIELDS: &[&str] = &["pair", "names"]; +const NAME_STR_FIELDS: &[&str] = &[ + "field", + "derived", + "source", + "contract", + "canon", + "predicate", + "detail", + "ref_name", + "has", + "missing", +]; +const STOPWORDS: &[&str] = &[ + "nil", "true", "false", "self", "end", "do", "if", "then", "else", "self_", "it", "new", + "to_s", "call", "each", "map", +]; +const FAT_UNION_FIX: &str = "fat union -- decompose product-vs-sum: hoist the common fields to a struct, keep a SMALL union for the variant part (extraction is value-object work -> nil-kill owns it)"; + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct Entity { + pub kind: String, + pub token: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct Cluster { + pub kind: String, + pub token: String, + pub detectors: Vec, + pub n_detectors: usize, + pub support: usize, + pub scatter: usize, + pub score: i64, + pub fat_union: bool, + pub fix: String, + pub sites: Vec, +} + +#[derive(Clone, Debug)] +struct Accumulator { + dets: BTreeMap, + findings: Vec, + tiers: BTreeMap, +} + +pub fn cluster(sections: &[ReportSection], min_detectors: usize) -> Vec { + let mut acc: HashMap = HashMap::new(); + for section in sections { + for finding in §ion.findings { + for entity in entities(finding) { + let row = acc.entry(entity).or_insert_with(|| Accumulator { + dets: BTreeMap::new(), + findings: Vec::new(), + tiers: BTreeMap::new(), + }); + row.dets.insert(section.title.clone(), true); + row.tiers.insert(section.title.clone(), section.tier); + row.findings.push(finding.clone()); + } + } + } + + let mut clusters = acc + .into_iter() + .filter_map(|(entity, row)| { + if row.dets.len() < min_detectors { + return None; + } + let detectors = row.dets.keys().cloned().collect::>(); + let mut units = row + .findings + .iter() + .flat_map(finding_units) + .collect::>(); + units.sort(); + units.dedup(); + let score = row + .tiers + .values() + .map(|tier| convergence::tier_weight(*tier)) + .sum(); + let fat_union = fat_union(&entity.kind, &entity.token, &row.findings); + let mut sites = row + .findings + .iter() + .flat_map(convergence::locations) + .collect::>(); + let mut seen_sites = HashSet::new(); + sites.retain(|site| seen_sites.insert(site.clone())); + sites.truncate(8); + Some(Cluster { + kind: entity.kind.clone(), + token: entity.token.clone(), + n_detectors: detectors.len(), + support: row.findings.len(), + scatter: units.len(), + score, + fat_union, + fix: if fat_union { + FAT_UNION_FIX.to_string() + } else { + fix_shape(&detectors, &entity.kind) + }, + detectors, + sites, + }) + }) + .collect::>(); + clusters.sort_by(|left, right| { + right + .n_detectors + .cmp(&left.n_detectors) + .then_with(|| right.score.cmp(&left.score)) + .then_with(|| right.scatter.cmp(&left.scatter)) + .then_with(|| left.kind.cmp(&right.kind)) + .then_with(|| left.token.cmp(&right.token)) + }); + clusters +} + +pub fn entities(finding: &Value) -> Vec { + let mut out = Vec::new(); + for key in TUPLE_FIELDS { + let values = rv::array(finding, key); + if values.len() < 2 { + continue; + } + let mut members = values + .iter() + .map(|value| rv::string(Some(value))) + .collect::>(); + members.sort(); + out.push(Entity { + kind: "tuple".to_string(), + token: truncate_chars(&members.join(" | "), 160), + }); + } + for key in NAME_ARRAY_FIELDS { + for value in rv::array(finding, key) { + for token in tokens(&rv::string(Some(value))) { + out.push(Entity { + kind: "name".to_string(), + token, + }); + } + } + } + for key in NAME_STR_FIELDS { + if let Some(value) = rv::get(finding, key) { + for token in tokens(&rv::string(Some(value))) { + out.push(Entity { + kind: "name".to_string(), + token, + }); + } + } + } + let mut seen = HashSet::new(); + out.retain(|entity| seen.insert((entity.kind.clone(), entity.token.clone()))); + out +} + +pub fn tokens(value: &str) -> Vec { + static TOKEN_RE: OnceLock = OnceLock::new(); + let re = TOKEN_RE.get_or_init(|| Regex::new(r"[A-Za-z_][A-Za-z0-9_]*[?!=]?").unwrap()); + let mut out = re + .find_iter(value) + .filter_map(|mat| { + let token = mat.as_str().trim_end_matches(['?', '!', '=']).to_string(); + if token.len() < 2 || STOPWORDS.contains(&token.as_str()) { + None + } else { + Some(token) + } + }) + .collect::>(); + out.sort(); + out.dedup(); + out +} + +pub fn finding_units(finding: &Value) -> Vec<(String, String)> { + convergence::locations(finding) + .into_iter() + .filter_map(|loc| { + let (file, method, _) = convergence::parse_loc(&loc); + match (file, method) { + (Some(file), Some(method)) => Some((file, method)), + _ => None, + } + }) + .collect() +} + +fn fat_union(kind: &str, token: &str, findings: &[Value]) -> bool { + static CONST_RE: OnceLock = OnceLock::new(); + let re = CONST_RE.get_or_init(|| Regex::new(r"\A(::)?[A-Z]\w*(::[A-Z]\w*)*\z").unwrap()); + if kind != "tuple" { + return false; + } + if !findings + .iter() + .any(|finding| rv::kind_is(finding, "kind", "case_dispatch")) + { + return false; + } + let members = token.split(" | ").collect::>(); + members.len() >= 2 && members.iter().all(|member| re.is_match(member)) +} + +fn fix_shape(detectors: &[String], kind: &str) -> String { + let detectors = detectors.iter().map(String::as_str).collect::>(); + let shapes: &[(&[&str], &str, &str)] = &[ + ( + &["Neglected Updates", "Derived-State Staleness"], + "name", + "single-source this state (one stamp, or recompute on write) -- the invariant-#16 desync shape", + ), + ( + &["Broken Protocols"], + "any", + "pair the protocol (RAII / ensure); the unpaired site is the deviant", + ), + ( + &[ + "Missing Abstractions", + "Reification Misses", + "Semantic Predicate Aliases", + "Exact Predicate Aliases", + ], + "any", + "reify ONE named predicate/decision and call it everywhere", + ), + ( + &["Missing Abstractions", "Neglected Conditions", "Neglected Path Conditions"], + "tuple", + "extract the decision; if it dispatches a closed set, consider product-vs-sum (fat-union -> nil-kill)", + ), + ( + &["Decision Pressure"], + "any", + "tighten the contract once; the scattered defensive guards collapse (cross-proc -> nil-kill)", + ), + ]; + for (titles, want_kind, label) in shapes { + if *want_kind != "any" && *want_kind != kind { + continue; + } + if titles.iter().any(|title| detectors.contains(title)) { + return (*label).to_string(); + } + } + "converging structural debt -- resolve once at the named entity".to_string() +} + +fn truncate_chars(value: &str, max: usize) -> String { + value.chars().take(max).collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn equivalent_state_tokens_collapse_to_same_name() { + assert_eq!(tokens("@storage="), vec!["storage"]); + assert_eq!(tokens(".storage"), vec!["storage"]); + } + + #[test] + fn tuple_fields_share_the_same_token() { + let left = entities(&json!({"members": ["b", "a"]})); + let right = entities(&json!({"guards": ["a", "b"]})); + assert_eq!(left, right); + } +} diff --git a/gems/decomplex/rust/src/decomplex/sarif.rs b/gems/decomplex/rust/src/decomplex/sarif.rs new file mode 100644 index 000000000..32eaaacde --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/sarif.rs @@ -0,0 +1,219 @@ +use serde_json::{json, Map, Value}; +use std::collections::BTreeSet; + +const SCHEMA: &str = "https://json.schemastore.org/sarif-2.1.0.json"; + +pub fn document( + tool_name: &str, + rules: Vec, + results: Vec, + information_uri: Option<&str>, + properties: Value, +) -> Value { + let normalized_rules = unique_rules(rules); + let mut rule_index = Map::new(); + for (index, rule) in normalized_rules.iter().enumerate() { + if let Some(id) = rule.get("id").and_then(Value::as_str) { + rule_index.insert(id.to_string(), json!(index)); + } + } + let normalized_results = results + .into_iter() + .map(|result| { + let mut result = compact_value(json_safe_value(result)); + if let Some(rule_id) = result.get("ruleId").and_then(Value::as_str) { + if let Some(index) = rule_index.get(rule_id) { + if let Some(object) = result.as_object_mut() { + object.insert("ruleIndex".to_string(), index.clone()); + } + } + } + result + }) + .collect::>(); + + let mut driver = Map::new(); + driver.insert("name".to_string(), Value::String(tool_name.to_string())); + if let Some(uri) = information_uri { + driver.insert("informationUri".to_string(), Value::String(uri.to_string())); + } + driver.insert("rules".to_string(), Value::Array(normalized_rules)); + let driver = compact_object(driver); + + let run = compact_value(json!({ + "tool": { "driver": driver }, + "results": normalized_results, + "properties": json_safe_value(properties), + })); + + compact_value(json!({ + "version": "2.1.0", + "$schema": SCHEMA, + "runs": [run], + })) +} + +pub fn rule( + id: &str, + name: Option<&str>, + short_description: Option<&str>, + full_description: Option<&str>, + default_level: &str, + help_uri: Option<&str>, + properties: Value, +) -> Value { + compact_value(json!({ + "id": id, + "name": name.unwrap_or(id), + "shortDescription": { "text": short_description.or(name).unwrap_or(id) }, + "fullDescription": full_description.map(|text| json!({ "text": text })), + "defaultConfiguration": { "level": default_level }, + "helpUri": help_uri, + "properties": json_safe_value(properties), + })) +} + +pub fn result( + rule_id: &str, + message: &str, + path: Option<&str>, + line: Option, + start_column: Option, + end_line: Option, + end_column: Option, + level: &str, + properties: Value, + partial_fingerprints: Value, +) -> Value { + compact_value(json!({ + "ruleId": rule_id, + "level": level, + "message": { "text": message }, + "locations": sarif_locations(path, line, start_column, end_line, end_column), + "partialFingerprints": json_safe_value(partial_fingerprints), + "properties": json_safe_value(properties), + })) +} + +fn sarif_locations( + path: Option<&str>, + line: Option, + start_column: Option, + end_line: Option, + end_column: Option, +) -> Value { + let Some(path) = path.filter(|path| !path.is_empty()) else { + return Value::Array(Vec::new()); + }; + Value::Array(vec![compact_value(json!({ + "physicalLocation": compact_value(json!({ + "artifactLocation": { "uri": normalize_path(path) }, + "region": compact_value(json!({ + "startLine": positive_int(line, Some(1)), + "startColumn": positive_int(start_column, None), + "endLine": positive_int(end_line, None), + "endColumn": positive_int(end_column, None), + })) + })) + }))]) +} + +pub fn normalize_path(path: &str) -> String { + path.replace('\\', "/").trim_start_matches("./").to_string() +} + +pub fn slug(value: &str) -> String { + let mut out = String::new(); + let mut last_dash = false; + for ch in value.to_lowercase().chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch); + last_dash = false; + } else if !last_dash { + out.push('-'); + last_dash = true; + } + } + out.trim_matches('-').to_string() +} + +fn positive_int(value: Option, fallback: Option) -> Option { + let number = value.or(fallback)?; + (number > 0).then_some(number).or(fallback) +} + +pub fn json_safe_value(value: Value) -> Value { + match value { + Value::Array(items) => Value::Array(items.into_iter().map(json_safe_value).collect()), + Value::Object(object) => { + let mut out = Map::new(); + for (key, value) in object { + out.insert(key, json_safe_value(value)); + } + Value::Object(out) + } + other => other, + } +} + +fn compact_value(value: Value) -> Value { + match value { + Value::Object(object) => compact_object(object), + other => other, + } +} + +fn compact_object(object: Map) -> Value { + let mut out = Map::new(); + for (key, value) in object { + if value.is_null() { + continue; + } + let value = match value { + Value::Object(object) => compact_object(object), + Value::Array(items) => Value::Array(items.into_iter().map(compact_value).collect()), + other => other, + }; + let empty = match &value { + Value::Array(items) => items.is_empty(), + Value::Object(object) => object.is_empty(), + Value::String(text) => text.is_empty(), + _ => false, + }; + if !empty { + out.insert(key, value); + } + } + Value::Object(out) +} + +fn unique_rules(rules: Vec) -> Vec { + let mut seen = BTreeSet::new(); + let mut out = Vec::new(); + for rule in rules { + let rule = json_safe_value(rule); + let id = rule + .get("id") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + if id.is_empty() || !seen.insert(id) { + continue; + } + out.push(compact_value(rule)); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn slug_matches_ruby_sarif_slug() { + assert_eq!( + slug("Structural Similarity (Type-2/3)"), + "structural-similarity-type-2-3" + ); + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax.rs b/gems/decomplex/rust/src/decomplex/syntax.rs new file mode 100644 index 000000000..d9c2594f3 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax.rs @@ -0,0 +1,477 @@ +pub(crate) mod adapters; +pub(crate) mod complexity; +pub mod local_flow; +pub mod path_condition; +pub mod redundant_nil_guard; +pub mod tree_sitter_adapter; + +use crate::decomplex::ast::{Node as NormalizedNode, RawNode, Span}; +use crate::decomplex::parallel; +use anyhow::{bail, Result}; +use serde::{Deserialize, Deserializer, Serialize}; +use std::collections::BTreeMap; +use std::path::PathBuf; + +#[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum Language { + Ruby, + Python, + JavaScript, + Java, + TypeScript, + Swift, + Kotlin, + Go, + Rust, + Zig, + Lua, + C, + Cpp, + CSharp, + Php, +} + +impl Language { + pub fn parse(value: &str) -> Result { + match value { + "ruby" => Ok(Self::Ruby), + "python" => Ok(Self::Python), + "javascript" => Ok(Self::JavaScript), + "java" => Ok(Self::Java), + "typescript" => Ok(Self::TypeScript), + "swift" => Ok(Self::Swift), + "kotlin" => Ok(Self::Kotlin), + "go" => Ok(Self::Go), + "rust" => Ok(Self::Rust), + "zig" => Ok(Self::Zig), + "lua" => Ok(Self::Lua), + "c" => Ok(Self::C), + "cpp" => Ok(Self::Cpp), + "csharp" => Ok(Self::CSharp), + "php" => Ok(Self::Php), + _ => bail!("unsupported Decomplex native language: {value}"), + } + } + + pub fn as_str(self) -> &'static str { + match self { + Self::Ruby => "ruby", + Self::Python => "python", + Self::JavaScript => "javascript", + Self::Java => "java", + Self::TypeScript => "typescript", + Self::Swift => "swift", + Self::Kotlin => "kotlin", + Self::Go => "go", + Self::Rust => "rust", + Self::Zig => "zig", + Self::Lua => "lua", + Self::C => "c", + Self::Cpp => "cpp", + Self::CSharp => "csharp", + Self::Php => "php", + } + } + + pub fn for_extension(extension: &str) -> Option { + match extension { + "rb" => Some(Self::Ruby), + "py" => Some(Self::Python), + "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), + "java" => Some(Self::Java), + "ts" | "tsx" => Some(Self::TypeScript), + "swift" => Some(Self::Swift), + "kt" | "kts" => Some(Self::Kotlin), + "go" => Some(Self::Go), + "rs" => Some(Self::Rust), + "zig" => Some(Self::Zig), + "lua" => Some(Self::Lua), + "c" | "h" => Some(Self::C), + "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some(Self::Cpp), + "cs" => Some(Self::CSharp), + "php" => Some(Self::Php), + _ => None, + } + } +} + +impl<'de> Deserialize<'de> for Language { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + let value = String::deserialize(deserializer)?; + Self::parse(&value).map_err(serde::de::Error::custom) + } +} + +#[derive(Clone, Debug, Deserialize)] +pub struct Document { + pub file: String, + pub language: Language, + #[serde(default)] + pub source: String, + #[serde(default)] + pub lines: Vec, + #[serde(default = "empty_raw_node")] + pub root: RawNode, + #[serde(default = "empty_normalized_node")] + pub normalized_root: NormalizedNode, + #[serde(default)] + pub function_defs: Vec, + #[serde(default)] + pub owner_defs: Vec, + #[serde(default)] + pub call_sites: Vec, + #[serde(default)] + pub state_declarations: Vec, + #[serde(default)] + pub state_reads: Vec, + #[serde(default)] + pub state_writes: Vec, + #[serde(default)] + pub decision_sites: Vec, + #[serde(default)] + pub branch_decisions: Vec, + #[serde(default)] + pub branch_arms: Vec, + #[serde(default)] + pub dispatch_sites: Vec, + #[serde(default)] + pub semantic_effect_sites: Vec, + #[serde(default)] + pub local_complexity_scores: BTreeMap, + #[serde(default)] + pub predicate_aliases: Vec, + #[serde(default)] + pub comparison_uses: Vec, + #[serde(default)] + pub path_condition_sites: Vec, + #[serde(default)] + pub protocol_method_effects: Vec, + #[serde(default)] + pub protocol_call_paths: Vec, +} + +fn empty_raw_node() -> RawNode { + RawNode { + kind: "program".to_string(), + text: String::new(), + span: [1, 0, 1, 0], + named: true, + field_name: None, + children: Vec::new(), + } +} + +fn empty_normalized_node() -> NormalizedNode { + NormalizedNode { + r#type: "ROOT".to_string(), + children: Vec::new(), + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 0, + text: String::new(), + } +} + +#[derive(Clone, Debug, Deserialize)] +pub struct FunctionDef { + pub file: String, + pub name: String, + pub owner: String, + pub line: usize, + pub span: Span, + pub body: RawNode, + pub visibility: Option, + pub params: Vec, +} + +#[derive(Clone, Debug, Deserialize)] +pub struct OwnerDef { + pub file: String, + pub name: String, + pub kind: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct CallSite { + pub receiver: String, + pub message: String, + pub file: String, + pub function: String, + pub owner: String, + pub line: usize, + pub span: Span, + pub conditional: bool, + pub arguments: Vec, + pub control: Option, + pub safe_navigation: bool, + pub block: bool, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct StateDeclaration { + pub field: String, + pub owner: String, + pub r#type: Option, + pub file: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct StateWrite { + pub field: String, + pub receiver: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub owner: String, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct StateRead { + pub field: String, + pub receiver: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub owner: String, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct PredicateAlias { + pub name: String, + pub body: String, + pub file: String, + pub defn: String, + #[serde(default)] + pub owner: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct DecisionSite { + pub kind: String, + pub members: Vec, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub predicate: String, + pub enclosing_span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct BranchDecision { + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub predicate: String, + pub state_refs: Vec, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct BranchArm { + pub file: String, + pub function: String, + pub kind: String, + pub line: usize, + pub span: Span, + pub decision_line: usize, + pub decision_span: Span, + pub predicate: String, + pub member: String, + pub body: String, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct DispatchSite { + pub variant_set: Vec, + pub arm_members: BTreeMap>, + pub outside: Vec, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct SemanticEffectSite { + pub kind: String, + pub detail: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] +pub struct LocalComplexityScore { + pub score: f64, + pub signals: BTreeMap, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ComparisonUse { + pub canon_source: String, + pub raw: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, + pub enclosing_span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct PathConditionSite { + pub guards: Vec, + pub action: String, + pub file: String, + pub function: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ProtocolMethodEffect { + pub file: String, + pub owner: String, + pub name: String, + pub line: usize, + pub reads: Vec, + pub writes: Vec, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ProtocolCall { + pub mid: String, + pub file: String, + pub owner: String, + pub defn: String, + pub line: usize, + pub span: Span, +} + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct ProtocolMethodPath { + pub file: String, + pub owner: String, + pub name: String, + pub line: usize, + pub calls: Vec, +} + +#[derive(Clone, Debug)] +pub(crate) struct CloneCandidate { + pub(crate) file: String, + pub(crate) line: usize, + pub(crate) span: Span, + pub(crate) method_name: String, + pub(crate) node_name: String, + pub(crate) mass: usize, + pub(crate) fingerprint: String, + pub(crate) raw: String, + pub(crate) child_fingerprints: Vec, + pub(crate) child_masses: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct SimilarityFinding { + pub at: String, + pub sites: Vec, + pub spans: BTreeMap, + pub clone_type: String, + pub node: String, + pub mass: usize, + pub locations: Vec, +} + +pub fn parse_file(file: PathBuf, language: Language) -> Result { + tree_sitter_adapter::parse_file(file, language) +} + +pub fn parse_files(files: &[PathBuf], language: Language) -> Result> { + parallel::map_ordered(files, |file| parse_file(file.clone(), language)) +} + +pub(crate) fn clone_candidates(document: &Document) -> Vec { + adapters::language_profile(document.language).clone_candidates(document) +} + +pub(crate) fn core_owner_names(document: &Document) -> &'static [&'static str] { + adapters::false_simplicity_lexicon::false_simplicity_lexicon(document.language).core_consts +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::decomplex::parallel; + use std::io::Write; + use tempfile::NamedTempFile; + + fn document(source: &str, language: Language) -> Document { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + parse_file(file.path().to_path_buf(), language).expect("parse file") + } + + #[test] + fn parallel_parse_files_preserves_input_order() { + parallel::set_jobs_for_process(Some(4)).expect("jobs"); + let mut first = NamedTempFile::new().expect("first"); + let mut second = NamedTempFile::new().expect("second"); + first + .write_all(b"def first\n 1\nend\n") + .expect("write first"); + second + .write_all(b"def second\n 2\nend\n") + .expect("write second"); + + let files = vec![first.path().to_path_buf(), second.path().to_path_buf()]; + let docs = parse_files(&files, Language::Ruby).expect("parse files"); + + assert_eq!(docs.len(), 2); + assert_eq!(docs[0].file, first.path().to_string_lossy()); + assert_eq!(docs[1].file, second.path().to_string_lossy()); + assert_eq!(docs[0].function_defs[0].name, "first"); + assert_eq!(docs[1].function_defs[0].name, "second"); + } + + #[test] + fn parses_java_kotlin_and_swift_function_defs() { + let cases = [ + ( + Language::Java, + "class Billing { int mixed(int price, int tax) { return price + tax; } }", + ), + ( + Language::Kotlin, + "class Billing { fun mixed(price: Int, tax: Int): Int { return price + tax } }", + ), + ( + Language::Swift, + "class Billing { func mixed(price: Int, tax: Int) -> Int { return price + tax } }", + ), + ]; + + for (language, source) in cases { + let doc = document(source, language); + let function = doc + .function_defs + .iter() + .find(|function| function.name == "mixed") + .expect("mixed function"); + + assert_eq!(function.owner, "Billing"); + } + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs new file mode 100644 index 000000000..389e9c7db --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/base.rs @@ -0,0 +1,1821 @@ +use super::super::tree_sitter_adapter::{ + first_named_child, first_named_child_except, first_named_child_with_kind, first_named_text, + named_children, normalize_type_owner, strip_assignment_suffix, AssignmentTarget, CallTarget, + Target, +}; +use super::super::{ + CallSite, CloneCandidate, Document, FunctionDef, Language, ProtocolCall, ProtocolMethodEffect, + ProtocolMethodPath, SemanticEffectSite, StateRead, StateWrite, +}; +use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; +use std::collections::HashSet; +use std::path::Path; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) const EMPTY_NODE_KINDS: &[&str] = &[]; +pub(crate) const DEFAULT_COMPARISON_OPERATORS: &[&str] = &["==", "!="]; +pub(crate) const DEFAULT_EXPRESSION_BODY_OPERATOR_TOKENS: &[&str] = &["="]; +pub(crate) const DEFAULT_IGNORED_STATEMENT_NODE_KINDS: &[&str] = &["comment", "heredoc_body"]; +const CLONE_IDENTIFIER_KINDS: &[&str] = &[ + "identifier", + "constant", + "type_identifier", + "field_identifier", + "property_identifier", + "shorthand_property_identifier_pattern", + "simple_identifier", + "variable_name", +]; +const CLONE_LITERAL_KINDS: &[&str] = &[ + "string", + "string_content", + "string_literal", + "interpreted_string_literal", + "raw_string_literal", + "integer", + "float", + "int", + "number", + "rational", + "imaginary", + "character", + "char_literal", + "symbol", + "simple_symbol", + "true", + "false", + "nil", + "none", + "null", +]; +const CLONE_SKIP_KINDS: &[&str] = &[ + "comment", + "identifier", + "constant", + "type_identifier", + "field_identifier", + "property_identifier", + "parameters", + "formal_parameters", + "parameter_list", + "argument_list", + "arguments", + "block_parameters", + "call_suffix", + "function_value_parameters", + "method_parameters", + "value_argument", + "scope_resolution", +]; +const CLONE_CANDIDATE_KINDS: &[&str] = &[ + "array", + "assignment", + "assignment_statement", + "block", + "case", + "case_clause", + "class", + "class_definition", + "class_declaration", + "compound_statement", + "conjunction_expression", + "control_structure_body", + "do_block", + "enum_declaration", + "for", + "for_statement", + "function_body", + "hash", + "if", + "if_statement", + "match_expression", + "match_statement", + "method", + "method_definition", + "module", + "operator_assignment", + "singleton_method", + "statements", + "struct_declaration", + "switch_case", + "switch_expression", + "switch_statement", + "unless", + "until", + "while", + "while_statement", +]; +const CLONE_BODY_KINDS: &[&str] = &[ + "body", + "block", + "body_statement", + "declaration_list", + "statement_block", + "compound_statement", + "function_body", + "statements", + "suite", + "do_block", +]; +const CLONE_CALL_KINDS: &[&str] = &[ + "call", + "call_expression", + "function_call", + "method_call", + "method_invocation", + "invocation_expression", +]; +const NOISE_MESSAGES: &[&str] = &[ + "!", "!=", "==", "===", "<", "<=", ">", ">=", "[]", "[]=", "to_s", "inspect", "class", +]; + +pub(crate) trait LanguageProfile { + fn language(&self) -> Language; + fn grammar(&self) -> TreeSitterLanguage; + + fn function_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn class_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn module_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn generic_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn impl_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn call_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + self.identifier_node_kinds() + } + + fn inline_parameter_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn function_body_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn identifier_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn normalize_local_identifier_text(&self, text: &str) -> String { + text.to_string() + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn assignment_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn assignment_operator_tokens(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn indexed_lhs_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn indexed_lhs_bracket_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn update_statement_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn short_variable_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn local_variable_declarator_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn multi_name_variable_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn field_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn declaration_site_parent_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn assignment_state_declaration_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn declaration_assignment_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn method_receiver_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn comparison_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn comparison_operators(&self) -> &[&str] { + DEFAULT_COMPARISON_OPERATORS + } + + fn branch_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_arm_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_subject_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn default_case_patterns(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn boolean_and_operators(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn accessor_call_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn expression_list_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn argument_list_node_kinds(&self) -> &[&str] { + &[ + "argument_list", + "arguments", + "call_suffix", + "value_arguments", + ] + } + + fn block_argument_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn navigation_suffix_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn field_like_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn field_like_dot_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn keyed_element_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn deferred_statement_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn suppress_field_receiver_lhs_reads(&self) -> bool { + false + } + + fn suppress_indexed_lhs_reads(&self) -> bool { + true + } + + fn indexed_lhs_descendants_are_writes(&self) -> bool { + true + } + + fn keyed_element_first_named_child_is_key(&self) -> bool { + true + } + + fn nested_assignment_dependencies_only(&self) -> bool { + false + } + + fn implicit_state_accesses(&self) -> bool { + false + } + + fn path_action_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn path_transparent_branch_body_node_kinds(&self) -> &[&str] { + EMPTY_NODE_KINDS + } + + fn expression_body_operator_tokens(&self) -> &[&str] { + DEFAULT_EXPRESSION_BODY_OPERATOR_TOKENS + } + + fn ignored_statement_node_kinds(&self) -> &[&str] { + DEFAULT_IGNORED_STATEMENT_NODE_KINDS + } + + fn first_argument_receiver(&self) -> bool { + false + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + self.default_function_name(node, source) + } + + fn function_visibility(&self, _node: Node<'_>, _source: &str) -> Option { + None + } + + fn function_params(&self, node: Node<'_>, source: &str) -> Vec { + let param_nodes = if let Some(params) = self.function_parameter_list(node) { + named_children(params) + } else { + named_children(node) + .into_iter() + .filter(|child| self.inline_parameter_node_kinds().contains(&child.kind())) + .collect() + }; + let mut out = Vec::new(); + for param in param_nodes { + if let Some(name) = self.parameter_name(param, source) { + if !out.contains(&name) { + out.push(name); + } + } + } + out + } + + fn after_collect_facts(&self, _functions: &mut Vec, _calls: &[CallSite]) {} + + fn structural_semantic_effect_sites( + &self, + _root: Node<'_>, + _source: &str, + _file: &Path, + _functions: &[FunctionDef], + _state_reads: &[StateRead], + _state_writes: &[StateWrite], + ) -> Vec { + Vec::new() + } + + fn protocol_method_effects(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function_def| { + let mut reads = document + .state_reads + .iter() + .filter(|read| { + read.owner == function_def.owner && read.function == function_def.name + }) + .map(|read| normalize_protocol_state(&read.field)) + .collect::>(); + reads.sort(); + reads.dedup(); + + let mut writes = document + .state_writes + .iter() + .filter(|write| { + write.owner == function_def.owner && write.function == function_def.name + }) + .map(|write| normalize_protocol_state(&write.field)) + .collect::>(); + writes.sort(); + writes.dedup(); + + ProtocolMethodEffect { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + reads, + writes, + } + }) + .collect() + } + + fn protocol_call_paths(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function_def| { + let calls = document + .call_sites + .iter() + .filter(|call| { + call.owner == function_def.owner + && call.function == function_def.name + && call.receiver == "self" + }) + .map(|call| ProtocolCall { + mid: protocol_method_name(&call.message), + file: function_def.file.clone(), + owner: function_def.owner.clone(), + defn: protocol_method_name(&function_def.name), + line: call.line, + span: call.span, + }) + .collect(); + + ProtocolMethodPath { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + calls, + } + }) + .collect() + } + + fn default_function_name(&self, node: Node<'_>, source: &str) -> Option { + if !self.function_node_kinds().contains(&node.kind()) { + return None; + } + + node.child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| self.declarator_name(node.child_by_field_name("declarator"), source)) + .or_else(|| self.first_identifier_text(node, source)) + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + self.default_owner_name_from_declaration(node, source) + } + + fn owner_def_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + self.owner_name_from_declaration(node, source) + } + + fn owner_kind(&self, node: Node<'_>) -> String { + if self.class_owner_node_kinds().contains(&node.kind()) { + "class".to_string() + } else if self.module_owner_node_kinds().contains(&node.kind()) { + "module".to_string() + } else if self.impl_owner_node_kinds().contains(&node.kind()) { + "impl".to_string() + } else if self.struct_owner_node_kinds().contains(&node.kind()) { + "struct".to_string() + } else { + "owner".to_string() + } + } + + fn default_owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if self.class_owner_node_kinds().contains(&node.kind()) + || self.module_owner_node_kinds().contains(&node.kind()) + || self.generic_owner_node_kinds().contains(&node.kind()) + || self.struct_owner_node_kinds().contains(&node.kind()) + { + return node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| self.first_identifier_text(node, source)); + } + if self.impl_owner_node_kinds().contains(&node.kind()) { + return self.impl_owner_name(node, source); + } + None + } + + fn generated_prelude(&self, _node: Node<'_>, _source: &str) -> bool { + false + } + + fn control_context(&self, node: Node<'_>, source: &str) -> Option { + if generic_loop_context(node, source) { + Some("iterates".to_string()) + } else if generic_branch_context(node, source) { + Some("conditional".to_string()) + } else { + None + } + } + + fn normalize_source_text(&self, text: &str) -> String { + normalize_text(text) + } + + fn hidden_case(&self, _node: Node<'_>) -> bool { + false + } + + fn hidden_case_source_node<'tree>(&self, _node: Node<'tree>) -> Option> { + None + } + + fn case_source_node<'tree>(&self, node: Node<'tree>) -> Node<'tree> { + if self.hidden_case(node) { + self.hidden_case_source_node(node).unwrap_or(node) + } else { + node + } + } + + fn predicate_less_case(&self, node: Node<'_>) -> bool { + self.case_node_kinds().contains(&node.kind()) && self.decision_subject(node).is_none() + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + patterns + .iter() + .map(|pattern| normalize_text(node_text(*pattern, source))) + .collect() + } + + fn receiver_convention_owner_name(&self, node: Node<'_>, source: &str) -> Option { + if !self.first_argument_receiver() || !self.function_node_kinds().contains(&node.kind()) { + return None; + } + + let (type_name, _) = self.first_argument_receiver_parameter(node, source)?; + let type_name = normalize_type_owner(&type_name); + let name = self.function_name(node, source)?; + if type_name.is_empty() || name.is_empty() { + return None; + } + + let prefix = snake_case_type_name(&type_name); + if name.starts_with(&format!("{prefix}_")) { + Some(type_name) + } else { + None + } + } + + fn function_receiver_name(&self, node: Node<'_>, source: &str) -> Option { + if self.first_argument_receiver() && self.function_node_kinds().contains(&node.kind()) { + if let Some((_, name)) = self.first_argument_receiver_parameter(node, source) { + return Some(name); + } + } + None + } + + fn single_expression_body<'tree>(&self, node: Node<'tree>) -> Option> { + let mut cursor = node.walk(); + if node.children(&mut cursor).any(|child| { + self.expression_body_operator_tokens() + .contains(&child.kind()) + }) { + let named = named_children(node); + return named.last().copied(); + } + + let body = node.child_by_field_name("body").or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.function_body_node_kinds().contains(&child.kind())) + })?; + let mut statements: Vec> = named_children(body) + .into_iter() + .filter(|child| !self.ignored_statement_node_kinds().contains(&child.kind())) + .collect(); + if statements.len() == 1 + && self + .nested_statement_wrapper_node_kinds() + .contains(&statements[0].kind()) + { + statements = named_children(statements[0]) + .into_iter() + .filter(|child| !self.ignored_statement_node_kinds().contains(&child.kind())) + .collect(); + } + statements.last().copied() + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if self.call_node_kinds().contains(&node.kind()) { + self.default_call_target(node, source) + } else { + None + } + } + + fn default_call_target<'tree>( + &self, + node: Node<'tree>, + source: &str, + ) -> Option> { + let callee = if self.field_like_node_kinds().contains(&node.kind()) { + node + } else { + node.child_by_field_name("function") + .or_else(|| node.child_by_field_name("callee")) + .or_else(|| first_named_child(node))? + }; + if callee.kind() == "builtin_function" || node_text(callee, source).starts_with('@') { + return None; + } + + let (receiver, message) = self.target_from_callee(callee, source)?; + let mut target = CallTarget::new(receiver, message, self.call_argument_texts(node, source)); + if let Some(receiver) = self.first_argument_receiver_call_receiver(node, source, &target) { + target.receiver = receiver; + } + Some(target) + } + + fn first_argument_receiver_call_receiver( + &self, + node: Node<'_>, + source: &str, + target: &CallTarget<'_>, + ) -> Option { + if !self.first_argument_receiver() || target.receiver != "self" { + return None; + } + let first_arg = self.call_argument_nodes(node).first().copied()?; + let arg_target = self.state_read_target(first_arg, source)?; + Some(format!("{}.{}", arg_target.receiver, arg_target.field)) + } + + fn target_from_callee(&self, callee: Node<'_>, source: &str) -> Option<(String, String)> { + if self.field_like_node_kinds().contains(&callee.kind()) { + let object = callee + .child_by_field_name("object") + .or_else(|| callee.child_by_field_name("receiver")) + .or_else(|| callee.child_by_field_name("operand")) + .or_else(|| callee.child_by_field_name("value")) + .or_else(|| callee.child_by_field_name("expression")) + .or_else(|| first_named_child_except(callee, "navigation_suffix"))?; + let field = callee + .child_by_field_name("field") + .or_else(|| callee.child_by_field_name("property")) + .or_else(|| callee.child_by_field_name("name")) + .or_else(|| callee.child_by_field_name("suffix")) + .or_else(|| first_named_child_with_kind(callee, "navigation_suffix")) + .or_else(|| named_children(callee).into_iter().last())?; + let field_text = self.member_field_text(field, source)?; + return Some(( + normalize_text(node_text(object, source)) + .trim_start_matches('*') + .to_string(), + field_text, + )); + } + + if self.identifier_node_kinds().contains(&callee.kind()) { + return Some(("self".to_string(), node_text(callee, source).to_string())); + } + + let text = normalize_text(node_text(callee, source)); + if text.is_empty() { + return None; + } + let parts = text.split('.').collect::>(); + if parts.len() > 1 { + Some(( + parts[..parts.len() - 1].join("."), + parts[parts.len() - 1].to_string(), + )) + } else { + Some(("self".to_string(), text)) + } + } + + fn call_argument_texts(&self, node: Node<'_>, source: &str) -> Vec { + self.call_argument_nodes(node) + .into_iter() + .map(|argument| normalize_text(node_text(argument, source))) + .collect() + } + + fn call_argument_nodes<'tree>(&self, node: Node<'tree>) -> Vec> { + if let Some(args) = node.child_by_field_name("arguments").or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.argument_list_node_kinds().contains(&child.kind())) + }) { + return named_children(args); + } + if !self.call_node_kinds().contains(&node.kind()) { + return Vec::new(); + } + + let callee = node + .child_by_field_name("function") + .or_else(|| node.child_by_field_name("callee")) + .or_else(|| first_named_child(node)); + named_children(node) + .into_iter() + .filter(|child| Some(*child) != callee) + .collect() + } + + fn call_has_block(&self, node: Node<'_>) -> bool { + named_children(node) + .into_iter() + .any(|child| self.block_argument_node_kinds().contains(&child.kind())) + } + + fn noise_call(&self, target: &CallTarget<'_>) -> bool { + let message = target.message.as_str(); + let receiver = target.receiver.as_str(); + message.is_empty() + || NOISE_MESSAGES.contains(&message) + || message.starts_with('@') + || matches!(receiver, "std" | "builtin" | "build_options") + || receiver.starts_with("std.") + || receiver.starts_with("builtin.") + || receiver.starts_with("build_options.") + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + self.default_state_target(lhs, source) + } + + fn state_declaration(&self, node: Node<'_>, source: &str) -> Option<(String, Option)> { + self.default_state_declaration(node, source) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + self.default_state_read_target(node, source) + } + + fn default_state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if self.accessor_call_node_kinds().contains(&node.kind()) { + let receiver = node.child_by_field_name("receiver")?; + let method = node.child_by_field_name("method")?; + let field = node_text(method, source); + if node.child_by_field_name("arguments").is_some() || NOISE_MESSAGES.contains(&field) { + return None; + } + return Some(Target { + receiver: normalize_text(node_text(receiver, source)), + field: field.to_string(), + }); + } + + let target = self.default_state_target(node, source)?; + if NOISE_MESSAGES.contains(&target.field.as_str()) { + None + } else { + Some(target) + } + } + + fn default_state_target(&self, lhs: Node<'_>, source: &str) -> Option { + if self.expression_list_node_kinds().contains(&lhs.kind()) { + let children = named_children(lhs); + if children.len() == 1 { + return self.default_state_target(children[0], source); + } + if !self.member_expression_list(lhs, source) { + return None; + } + } + + if self.accessor_call_node_kinds().contains(&lhs.kind()) { + let receiver = lhs.child_by_field_name("receiver")?; + let method = lhs.child_by_field_name("method")?; + return Some(Target { + receiver: normalize_text(node_text(receiver, source)), + field: strip_assignment_suffix(node_text(method, source)), + }); + } + + if self.field_like_node_kinds().contains(&lhs.kind()) + || self.expression_list_node_kinds().contains(&lhs.kind()) + { + let object = lhs + .child_by_field_name("object") + .or_else(|| lhs.child_by_field_name("receiver")) + .or_else(|| lhs.child_by_field_name("expression")) + .or_else(|| lhs.child_by_field_name("operand")) + .or_else(|| lhs.child_by_field_name("value")) + .or_else(|| lhs.child_by_field_name("argument")) + .or_else(|| first_named_child_except(lhs, "navigation_suffix"))?; + let field = lhs + .child_by_field_name("field") + .or_else(|| lhs.child_by_field_name("property")) + .or_else(|| lhs.child_by_field_name("name")) + .or_else(|| lhs.child_by_field_name("suffix")) + .or_else(|| first_named_child_with_kind(lhs, "navigation_suffix")) + .or_else(|| named_children(lhs).into_iter().last())?; + let field_text = self.member_field_text(field, source)?; + return Some(Target { + receiver: normalize_text(node_text(object, source)), + field: strip_assignment_suffix(&field_text), + }); + } + + None + } + + fn member_expression_list(&self, node: Node<'_>, source: &str) -> bool { + if node.child_by_field_name("operand").is_some() + && node.child_by_field_name("field").is_some() + { + return true; + } + if !self + .field_like_dot_wrapper_node_kinds() + .contains(&node.kind()) + { + return false; + } + let text = node_text(node, source); + text.contains('.') || text.contains("->") || text.contains("::") || text.contains("?.") + } + + fn default_state_declaration( + &self, + node: Node<'_>, + source: &str, + ) -> Option<(String, Option)> { + if self + .assignment_state_declaration_node_kinds() + .contains(&node.kind()) + { + if let Some((field, r#type)) = self.assignment_state_declaration(node, source) { + return Some((field, r#type)); + } + } + if !self.field_declaration_node_kinds().contains(&node.kind()) { + return None; + } + let name = self.field_declaration_name_node(node, source)?; + let field = node_text(name, source).to_string(); + let r#type = declared_type_text(node, name, source); + Some((field, r#type)) + } + + fn field_declaration_name_node<'tree>( + &self, + node: Node<'tree>, + source: &str, + ) -> Option> { + node.child_by_field_name("name") + .or_else(|| self.declarator_name_node(node, source)) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.field_identifier_node_kinds().contains(&child.kind())) + }) + .or_else(|| { + named_children(node).into_iter().rev().find(|child| { + self.identifier_node_kinds().contains(&child.kind()) + || self.field_identifier_node_kinds().contains(&child.kind()) + }) + }) + } + + fn declarator_name_node<'tree>(&self, node: Node<'tree>, _source: &str) -> Option> { + let mut pending = named_children(node); + let mut seen = HashSet::new(); + while let Some(current) = pending.pop() { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + continue; + } + if self.identifier_node_kinds().contains(¤t.kind()) + || self.field_identifier_node_kinds().contains(¤t.kind()) + { + return Some(current); + } + pending.extend(named_children(current)); + } + None + } + + fn assignment_state_declaration( + &self, + node: Node<'_>, + source: &str, + ) -> Option<(String, Option)> { + let assignment = self.assignment_target(node)?; + let target = self.state_target(assignment.lhs, source)?; + if !matches!(target.receiver.as_str(), "self" | "this") { + return None; + } + let rhs = node + .child_by_field_name("right") + .or_else(|| node.child_by_field_name("value")) + .or_else(|| named_children(node).get(1).copied()); + let r#type = rhs.and_then(|node| inferred_assignment_type(node, source)); + r#type.map(|type_name| (target.field, Some(type_name))) + } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + self.default_assignment_target(node) + } + + fn default_assignment_target<'tree>( + &self, + node: Node<'tree>, + ) -> Option> { + if !self.assignment_node_kinds().contains(&node.kind()) { + return None; + } + let lhs = node + .child_by_field_name("left") + .or_else(|| first_named_child(node))?; + Some(AssignmentTarget { lhs, source: node }) + } + + fn skip_state_write_node(&self, _node: Node<'_>) -> bool { + false + } + + fn skip_state_write_target(&self, target: &Target) -> bool { + target.field == "[]" + } + + fn state_write_source_node<'tree>( + &self, + _node: Node<'tree>, + assignment: &AssignmentTarget<'tree>, + ) -> Node<'tree> { + assignment.source + } + + fn assignment_lhs_node(&self, node: Node<'_>) -> bool { + if super::super::tree_sitter_adapter::previous_sibling_raw_text(node).as_deref() + == Some(":") + { + return false; + } + super::super::tree_sitter_adapter::next_sibling_raw_text(node) + .map(|token| self.assignment_operator_tokens().contains(&token.as_str())) + .unwrap_or(false) + } + + fn parenthesized_wrapper(&self, node: Node<'_>) -> bool { + self.parenthesized_wrapper_node_kinds() + .contains(&node.kind()) + && named_children(node).len() == 1 + } + + fn boolean_container(&self, node: Node<'_>) -> bool { + if self.boolean_container_node_kinds().contains(&node.kind()) { + return true; + } + if self.parenthesized_wrapper(node) { + return first_named_child(node) + .map(|child| self.boolean_container(child)) + .unwrap_or(false); + } + if !self.boolean_wrapper_node_kinds().contains(&node.kind()) { + return false; + } + if !self + .boolean_and_operators() + .contains(&super::super::tree_sitter_adapter::direct_operator(node).as_str()) + { + return false; + } + if named_children(node).len() < 2 { + return false; + } + let mut cursor = node.walk(); + let result = node.children(&mut cursor).all(|child| { + child.is_named() + || self.boolean_and_operators().contains(&child.kind()) + || matches!(child.kind(), "(" | ")") + }); + result + } + + fn decision_subject<'tree>(&self, node: Node<'tree>) -> Option> { + node.child_by_field_name("value") + .or_else(|| node.child_by_field_name("subject")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.case_subject_node_kinds().contains(&child.kind())) + }) + .or_else(|| node.child_by_field_name("condition")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| !self.case_subject_skip_node_kinds().contains(&child.kind())) + }) + } + + fn first_identifier_text(&self, node: Node<'_>, source: &str) -> Option { + let mut kinds = Vec::new(); + kinds.extend_from_slice(self.identifier_node_kinds()); + kinds.extend_from_slice(self.field_identifier_node_kinds()); + first_named_text(node, source, &kinds) + } + + fn declarator_name(&self, node: Option>, source: &str) -> Option { + let mut pending = vec![node?]; + let mut seen = HashSet::new(); + while let Some(current) = pending.pop() { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + continue; + } + if self.identifier_node_kinds().contains(¤t.kind()) + || self.field_identifier_node_kinds().contains(¤t.kind()) + { + return Some(node_text(current, source).to_string()); + } + let mut children = named_children(current); + children.reverse(); + pending.extend(children); + } + None + } + + fn function_parameter_list<'tree>(&self, node: Node<'tree>) -> Option> { + let declarator = node.child_by_field_name("declarator"); + declarator + .and_then(|declarator| declarator.child_by_field_name("parameters")) + .or_else(|| node.child_by_field_name("parameters")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + .or_else(|| { + declarator.and_then(|declarator| { + named_children(declarator) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + }) + } + + fn parameter_name(&self, param: Node<'_>, source: &str) -> Option { + let name = if self + .parameter_identifier_node_kinds() + .contains(¶m.kind()) + { + Some(param) + } else { + param + .child_by_field_name("name") + .or_else(|| { + named_children(param) + .into_iter() + .filter(|child| { + self.parameter_identifier_node_kinds() + .contains(&child.kind()) + }) + .last() + }) + .or_else(|| self.descendant_parameter_name(param)) + }?; + let text = self.normalize_parameter_name(node_text(name, source)); + (!text.is_empty() && text != "_").then_some(text) + } + + fn descendant_parameter_name<'tree>(&self, node: Node<'tree>) -> Option> { + let mut found = None; + let mut stack = named_children(node); + while let Some(current) = stack.pop() { + if self + .parameter_identifier_node_kinds() + .contains(¤t.kind()) + { + found = Some(current); + } + stack.extend(named_children(current)); + } + found + } + + fn normalize_parameter_name(&self, text: &str) -> String { + text.to_string() + } + + fn impl_owner_name(&self, node: Node<'_>, source: &str) -> Option { + let r#type = node.child_by_field_name("type").or_else(|| { + named_children(node).into_iter().find(|child| { + self.receiver_type_node_kinds().contains(&child.kind()) + || self.identifier_node_kinds().contains(&child.kind()) + || self.field_identifier_node_kinds().contains(&child.kind()) + }) + })?; + Some(normalize_type_owner(node_text(r#type, source))) + } + + fn first_argument_receiver_parameter( + &self, + node: Node<'_>, + source: &str, + ) -> Option<(String, String)> { + let declarator = node.child_by_field_name("declarator"); + let params = declarator + .and_then(|declarator| declarator.child_by_field_name("parameters")) + .or_else(|| node.child_by_field_name("parameters")) + .or_else(|| { + named_children(node) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + .or_else(|| { + declarator.and_then(|declarator| { + named_children(declarator) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + }) + })?; + + let first = named_children(params) + .into_iter() + .find(|child| self.receiver_parameter_node_kinds().contains(&child.kind()))?; + + let type_node = named_children(first).into_iter().find(|child| { + self.first_argument_receiver_type_node_kinds() + .contains(&child.kind()) + })?; + let name = named_children(first) + .into_iter() + .rev() + .find(|child| { + self.first_argument_receiver_name_node_kinds() + .contains(&child.kind()) + }) + .map(|child| node_text(child, source).to_string()) + .or_else(|| self.nested_receiver_name(first, source)) + .or_else(|| self.declarator_name(Some(first), source))?; + + Some((node_text(type_node, source).to_string(), name)) + } + + fn nested_receiver_name(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node).into_iter().rev() { + let direct = named_children(child).into_iter().rev().find(|grandchild| { + self.first_argument_receiver_name_node_kinds() + .contains(&grandchild.kind()) + }); + if let Some(direct) = direct { + return Some(node_text(direct, source).to_string()); + } + } + None + } + + fn member_field_text(&self, field: Node<'_>, source: &str) -> Option { + if self.navigation_suffix_node_kinds().contains(&field.kind()) { + let suffix = field + .child_by_field_name("suffix") + .or_else(|| { + named_children(field).into_iter().find(|child| { + self.identifier_node_kinds().contains(&child.kind()) + || self.field_identifier_node_kinds().contains(&child.kind()) + }) + }) + .or_else(|| named_children(field).into_iter().last())?; + let text = node_text(suffix, source) + .trim_start_matches(['.', '?']) + .trim_start_matches("->"); + return (!text.is_empty()).then(|| text.to_string()); + } + + Some( + node_text(field, source) + .trim_start_matches(['.', '?']) + .trim_start_matches("->") + .to_string(), + ) + } + + fn clone_candidates(&self, document: &Document) -> Vec { + let mut out = Vec::new(); + let mut seen = HashSet::new(); + + for function in &document.function_defs { + let candidate = clone_candidate_for( + self, + document, + &function.body, + Some("defn"), + Some(function.name.as_str()), + ); + clone_add_candidate(&mut out, &mut seen, candidate); + } + + let mut nodes = Vec::new(); + document.root.walk(&mut nodes); + for node in nodes { + if self.clone_candidate_node(node) { + let candidate = clone_candidate_for(self, document, node, None, None); + clone_add_candidate(&mut out, &mut seen, candidate); + } + } + + out + } + + fn clone_fingerprint(&self, node: &RawNode) -> (String, usize) { + clone_fingerprint_for_profile(self, node, &mut HashSet::new()) + } + + fn clone_candidate_node(&self, node: &RawNode) -> bool { + default_clone_candidate_node(node) + } + + fn clone_fingerprint_children<'a>(&self, node: &'a RawNode) -> Vec<&'a RawNode> { + node.children.iter().collect() + } + + fn clone_child_fingerprint( + &self, + _parent: &RawNode, + _child: &RawNode, + ) -> Option<(String, usize)> { + None + } +} + +fn clone_add_candidate( + out: &mut Vec, + seen: &mut HashSet, + candidate: Option, +) { + let Some(candidate) = candidate else { return }; + if clone_typed_struct_schema_text(&candidate.raw) { + return; + } + let key = format!( + "{}\0{}\0{:?}\0{}\0{}", + candidate.file, candidate.line, candidate.span, candidate.node_name, candidate.fingerprint + ); + if seen.insert(key) { + out.push(candidate); + } +} + +fn clone_candidate_for( + profile: &P, + document: &Document, + node: &RawNode, + node_name: Option<&str>, + function_name: Option<&str>, +) -> Option { + let (fingerprint, mass) = profile.clone_fingerprint(node); + if fingerprint.is_empty() { + return None; + } + + let line = node.line(); + let method = clone_method_span_for(document, line); + let children = clone_fuzzy_children_for(profile, node); + let mut child_fingerprints = Vec::new(); + let mut child_masses = Vec::new(); + for child in children { + let (child_fp, child_mass) = profile.clone_fingerprint(child); + if !child_fp.is_empty() && child_mass > 0 { + child_fingerprints.push(child_fp); + child_masses.push(child_mass); + } + } + + Some(CloneCandidate { + file: document.file.clone(), + line, + span: node.span, + method_name: function_name + .map(ToString::to_string) + .or_else(|| method.map(|function| function.name.clone())) + .unwrap_or_else(|| "(top-level)".to_string()), + node_name: node_name + .map(ToString::to_string) + .unwrap_or_else(|| clone_node_name(node).to_string()), + mass, + fingerprint, + raw: normalize_text(&node.text), + child_fingerprints, + child_masses, + }) +} + +pub(super) fn default_clone_candidate_node(node: &RawNode) -> bool { + node.named + && !CLONE_SKIP_KINDS.contains(&node.kind.as_str()) + && CLONE_CANDIDATE_KINDS.contains(&node.kind.as_str()) + && !clone_typed_struct_schema_text(&node.text) + && !node.named_children().is_empty() +} + +fn clone_fuzzy_children_for<'a, P: LanguageProfile + ?Sized>( + profile: &P, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + let source = clone_body_node_for(profile, node).unwrap_or(node); + let mut children = profile + .clone_fingerprint_children(source) + .into_iter() + .filter(|child| child.named) + .collect::>(); + if children.is_empty() { + children = profile + .clone_fingerprint_children(node) + .into_iter() + .filter(|child| child.named) + .collect(); + } + children + .into_iter() + .filter(|child| { + !CLONE_SKIP_KINDS.contains(&child.kind.as_str()) + && !clone_typed_struct_schema_text(&child.text) + }) + .collect() +} + +fn clone_body_node_for<'a, P: LanguageProfile + ?Sized>( + profile: &P, + node: &'a RawNode, +) -> Option<&'a RawNode> { + clone_body_node(node).or_else(|| { + profile + .clone_fingerprint_children(node) + .into_iter() + .find(|child| CLONE_BODY_KINDS.contains(&child.kind.as_str())) + }) +} + +fn clone_body_node(node: &RawNode) -> Option<&RawNode> { + node.children + .iter() + .find(|child| CLONE_BODY_KINDS.contains(&child.kind.as_str())) +} + +fn declared_type_text(node: Node<'_>, name: Node<'_>, source: &str) -> Option { + if let Some(r#type) = node.child_by_field_name("type") { + let text = normalize_text(node_text(r#type, source)); + if !text.is_empty() { + return Some(text); + } + } + + let text = node_text(node, source); + let name_text = node_text(name, source); + let before_name = text.split(name_text).next().unwrap_or("").trim(); + let candidate = before_name + .split_whitespace() + .filter(|token| { + !matches!( + *token, + "public" | "private" | "protected" | "static" | "final" | "const" + ) + }) + .last() + .unwrap_or("") + .trim_matches(['*', '&']); + (!candidate.is_empty()).then(|| candidate.to_string()) +} + +fn inferred_assignment_type(node: Node<'_>, source: &str) -> Option { + let text = normalize_text(node_text(node, source)); + for prefix in ["new ", ""] { + let value = text.strip_prefix(prefix).unwrap_or(&text); + let candidate = value + .split(['(', '{', '<', ' ', ':']) + .next() + .unwrap_or("") + .trim(); + if candidate + .chars() + .next() + .map(|ch| ch.is_ascii_uppercase()) + .unwrap_or(false) + { + return Some(candidate.to_string()); + } + } + None +} + +fn clone_fingerprint_for_profile( + profile: &P, + node: &RawNode, + active: &mut HashSet, +) -> (String, usize) { + let key = clone_node_key(node); + if active.contains(&key) || node.kind == "comment" { + return (String::new(), 0); + } + active.insert(key.clone()); + let out = + if CLONE_CALL_KINDS.contains(&node.kind.as_str()) && clone_call_message(node).is_some() { + clone_fingerprint_call(profile, node, active) + } else if node.children.is_empty() { + let token = clone_terminal_token(node); + if token.is_empty() { + (String::new(), 0) + } else { + (token, 1) + } + } else { + let mut child_parts = Vec::new(); + let mut mass = 1; + for child in profile.clone_fingerprint_children(node) { + let (child_fp, child_mass) = profile + .clone_child_fingerprint(node, child) + .unwrap_or_else(|| clone_fingerprint_for_profile(profile, child, active)); + if child_fp.is_empty() { + continue; + } + child_parts.push(child_fp); + mass += child_mass; + } + if child_parts.is_empty() { + (clone_terminal_token(node), 1) + } else { + (format!("{}({})", node.kind, child_parts.join(" ")), mass) + } + }; + active.remove(&key); + out +} + +fn clone_fingerprint_call( + profile: &P, + node: &RawNode, + active: &mut HashSet, +) -> (String, usize) { + let message = clone_call_message(node).unwrap_or_default(); + let mut child_parts = Vec::new(); + let mut mass = 1; + for child in profile.clone_fingerprint_children(node) { + let (child_fp, child_mass) = profile + .clone_child_fingerprint(node, child) + .unwrap_or_else(|| clone_fingerprint_for_profile(profile, child, active)); + if child_fp.is_empty() { + continue; + } + child_parts.push(child_fp); + mass += child_mass; + } + ( + format!("{}<{}>({})", node.kind, message, child_parts.join(" ")), + mass, + ) +} + +fn clone_call_message(node: &RawNode) -> Option { + if !node.children.iter().any(|child| { + matches!( + child.kind.as_str(), + "argument_list" | "arguments" | "call_suffix" + ) + }) { + return None; + } + let argument_start = node + .children + .iter() + .find(|child| { + matches!( + child.kind.as_str(), + "argument_list" | "arguments" | "call_suffix" + ) + }) + .map(|child| (child.span[0], child.span[1])); + let named_before_args = node + .named_children() + .into_iter() + .filter(|child| { + argument_start + .map(|start| (child.span[0], child.span[1]) < start) + .unwrap_or(true) + }) + .collect::>(); + named_before_args + .last() + .and_then(|callee| clone_callee_message(callee)) +} + +fn clone_callee_message(node: &RawNode) -> Option { + if CLONE_IDENTIFIER_KINDS.contains(&node.kind.as_str()) { + return Some(node.text.clone()); + } + if matches!( + node.kind.as_str(), + "navigation_expression" | "directly_assignable_expression" + ) { + return clone_navigation_suffix_message(node); + } + + node.named_children() + .into_iter() + .rev() + .find(|child| CLONE_IDENTIFIER_KINDS.contains(&child.kind.as_str())) + .map(|child| child.text.clone()) +} + +fn clone_navigation_suffix_message(node: &RawNode) -> Option { + let suffix = node + .named_children() + .into_iter() + .rev() + .find(|child| child.kind == "navigation_suffix")?; + suffix + .named_children() + .into_iter() + .rev() + .find(|child| CLONE_IDENTIFIER_KINDS.contains(&child.kind.as_str())) + .map(|child| child.text.clone()) +} + +fn clone_terminal_token(node: &RawNode) -> String { + let kind = node.kind.as_str(); + if CLONE_IDENTIFIER_KINDS.contains(&kind) { + return "id".to_string(); + } + if CLONE_LITERAL_KINDS.contains(&kind) { + return clone_literal_token(kind).to_string(); + } + let text = normalize_text(&node.text); + if text.is_empty() { + return String::new(); + } + if clone_identifier_text(&text) { + return "id".to_string(); + } + if clone_literal_text(&text) { + return "lit".to_string(); + } + format!("{kind}:{text}") +} + +fn clone_literal_token(kind: &str) -> &str { + match kind { + "true" | "false" => "bool", + "nil" | "none" | "null" => "nil", + _ => "lit", + } +} + +fn clone_identifier_text(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|char| { + char == '_' || char == '!' || char == '?' || char == '=' || char.is_ascii_alphanumeric() + }) +} + +fn clone_literal_text(text: &str) -> bool { + if clone_symbol_literal_text(text) + || clone_quoted_literal_text(text, '"') + || clone_quoted_literal_text(text, '\'') + { + return true; + } + text.parse::().is_ok() +} + +fn clone_symbol_literal_text(text: &str) -> bool { + let mut chars = text.chars(); + if chars.next() != Some(':') { + return false; + } + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|char| char == '_' || char.is_ascii_alphanumeric()) +} + +fn clone_quoted_literal_text(text: &str, quote: char) -> bool { + text.len() >= 2 && text.starts_with(quote) && text.ends_with(quote) +} + +fn clone_node_name(node: &RawNode) -> &str { + match node.kind.as_str() { + "method" + | "function_definition" + | "function_declaration" + | "method_definition" + | "function_item" => "defn", + "singleton_method" => "defs", + other => other, + } +} + +fn clone_typed_struct_schema_text(text: &str) -> bool { + text.contains("< T::Struct") + || text.contains("( + document: &'a Document, + line_no: usize, +) -> Option<&'a super::super::FunctionDef> { + document + .function_defs + .iter() + .find(|function| function.span[0] <= line_no && line_no <= function.span[2]) +} + +fn generic_loop_context(node: Node<'_>, source: &str) -> bool { + matches!( + node.kind(), + "while" + | "until" + | "for" + | "do_block" + | "while_statement" + | "until_statement" + | "for_statement" + | "for_in_statement" + | "enhanced_for_statement" + | "foreach_statement" + | "for_range_loop" + | "for_expression" + | "loop_expression" + ) || matches!(node.kind(), "expression_statement" | "labeled_statement") + && normalize_text(node_text(node, source)) + .trim_start() + .starts_with("for ") +} + +fn generic_branch_context(node: Node<'_>, source: &str) -> bool { + if matches!( + node.kind(), + "if" | "unless" + | "if_modifier" + | "unless_modifier" + | "case" + | "if_statement" + | "if_expression" + | "case_statement" + | "switch_statement" + | "switch_expression" + | "match_statement" + | "match_expression" + | "when_expression" + | "expression_switch_statement" + ) { + return true; + } + + let first_token_is_branch = matches!( + node.kind(), + "body_statement" | "block" | "statements" | "statement_list" + ) && { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .next() + .map(|child| matches!(child.kind(), "if" | "unless" | "case")) + .unwrap_or(false); + result + }; + first_token_is_branch + || node.kind() == "expression_statement" + && normalize_text(node_text(node, source)) + .trim_start() + .starts_with("if ") +} + +pub(crate) fn protocol_method_name(name: &str) -> String { + name.split(['.', ':']) + .filter(|part| !part.is_empty()) + .last() + .unwrap_or(name) + .to_string() +} + +pub(crate) fn normalize_protocol_state(name: &str) -> String { + name.trim_start_matches('@') + .trim_end_matches('=') + .to_string() +} + +fn clone_node_key(node: &RawNode) -> String { + format!( + "{}\0{}\0{}\0{}\0{}\0{}", + node.kind, + node.span[0], + node.span[1], + node.span[2], + node.span[3], + node.text.len() + ) +} + +fn snake_case_type_name(type_str: &str) -> String { + type_str + .split("::") + .last() + .unwrap_or(type_str) + .chars() + .enumerate() + .fold(String::new(), |mut acc, (index, ch)| { + if index > 0 && ch.is_ascii_uppercase() { + acc.push('_'); + } + acc.push(ch.to_ascii_lowercase()); + acc + }) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs new file mode 100644 index 000000000..ddb85ffe2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/c.rs @@ -0,0 +1,155 @@ +use super::super::tree_sitter_adapter::normalize_type_owner; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct CProfile; + +impl LanguageProfile for CProfile { + fn language(&self) -> Language { + Language::C + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_c::LANGUAGE.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node_text(node, source).trim_start().starts_with("static ") { + Some("private".to_string()) + } else { + Some("public".to_string()) + } + } + + fn first_argument_receiver(&self) -> bool { + true + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition"] + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_specifier"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["declaration", "init_declarator"] + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + &["parameter_declaration"] + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + &[ + "type_identifier", + "primitive_type", + "qualified_identifier", + "scoped_type_identifier", + ] + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_statement", "switch_statement"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_statement"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_definition", "struct_specifier"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_statement", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression"] + } + + fn receiver_convention_owner_name(&self, node: Node<'_>, source: &str) -> Option { + if !self.first_argument_receiver() || node.kind() != "function_definition" { + return None; + } + + let (type_name, name) = self.first_argument_receiver_parameter(node, source)?; + (name == "self").then(|| normalize_type_owner(&type_name)) + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs new file mode 100644 index 000000000..82f7df4b3 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/cpp.rs @@ -0,0 +1,194 @@ +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct CppProfile; + +impl LanguageProfile for CppProfile { + fn language(&self) -> Language { + Language::Cpp + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_cpp::LANGUAGE.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + cpp_previous_access_specifier(node, source).or_else(|| Some("private".to_string())) + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_specifier"] + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_specifier"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &[ + "identifier", + "type_identifier", + "qualified_identifier", + "namespace_identifier", + ] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["compound_statement"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["declaration", "init_declarator"] + } + + fn local_variable_declarator_node_kinds(&self) -> &[&str] { + &["init_declarator"] + } + + fn field_declaration_node_kinds(&self) -> &[&str] { + &["field_declaration"] + } + + fn declaration_site_parent_node_kinds(&self) -> &[&str] { + &[ + "parameter_declaration", + "init_declarator", + "function_declarator", + "class_specifier", + "struct_specifier", + ] + } + + fn assignment_state_declaration_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn implicit_state_accesses(&self) -> bool { + true + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + &[ + "type_identifier", + "qualified_identifier", + "scoped_type_identifier", + ] + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + &["parameter_declaration"] + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + &[ + "type_identifier", + "primitive_type", + "qualified_identifier", + "scoped_type_identifier", + ] + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_range_loop", "switch_statement"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_statement"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_definition", "class_specifier", "struct_specifier"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_statement", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["condition_clause", "parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression"] + } +} + +fn cpp_previous_access_specifier(node: Node<'_>, source: &str) -> Option { + let mut sibling = node.prev_sibling(); + while let Some(current) = sibling { + if current.kind() == "access_specifier" { + let text = node_text(current, source); + if matches!(text, "public" | "private" | "protected") { + return Some(text.to_string()); + } + } + sibling = current.prev_sibling(); + } + None +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs new file mode 100644 index 000000000..18307f0fb --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/csharp.rs @@ -0,0 +1,162 @@ +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct CSharpProfile; + +impl LanguageProfile for CSharpProfile { + fn language(&self) -> Language { + Language::CSharp + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_c_sharp::LANGUAGE.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in super::super::tree_sitter_adapter::named_children(node) { + if child.kind() != "modifier" { + continue; + } + let text = node_text(child, source); + if matches!(text, "public" | "private" | "protected") { + return Some(text.to_string()); + } + } + Some("private".to_string()) + } + + fn function_node_kinds(&self) -> &[&str] { + &["method_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "declaration_list"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["invocation_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "invocation_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["argument"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &[ + "local_declaration_statement", + "variable_declaration", + "variable_declarator", + ] + } + + fn local_variable_declarator_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration"] + } + + fn field_declaration_node_kinds(&self) -> &[&str] { + &["field_declaration"] + } + + fn declaration_site_parent_node_kinds(&self) -> &[&str] { + &[ + "parameter", + "variable_declarator", + "method_declaration", + "class_declaration", + ] + } + + fn assignment_state_declaration_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn implicit_state_accesses(&self) -> bool { + true + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "foreach_statement", "switch_statement"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_section"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["method_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_section", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["member_access_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs new file mode 100644 index 000000000..d15b5ee2d --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/false_simplicity_lexicon.rs @@ -0,0 +1,714 @@ +use super::super::Language; + +#[derive(Clone, Copy)] +pub(crate) struct FalseSimplicityLexicon { + pub(crate) dispatch_mids: &'static [&'static str], + pub(crate) meta_mids: &'static [&'static str], + pub(crate) method_obj_mids: &'static [&'static str], + pub(crate) io_consts: &'static [&'static str], + pub(crate) io_bare: &'static [&'static str], + pub(crate) dir_context: &'static [&'static str], + pub(crate) context_pairs: &'static [(&'static str, &'static [&'static str])], + pub(crate) context_bare: &'static [&'static str], + pub(crate) callback_set: &'static [&'static str], + pub(crate) core_consts: &'static [&'static str], +} + +const EMPTY: &[&str] = &[]; +const EMPTY_PAIRS: &[(&str, &[&str])] = &[]; +const COMMON_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", +]; + +const RUBY_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("Time", &["now", "current"]), + ("Date", &["today", "current"]), + ("DateTime", &["now", "current"]), + ("Process", &["pid", "ppid", "uid", "gid", "euid"]), + ("Thread", &["current", "list", "main"]), + ("Fiber", &["current"]), + ("Random", &["rand", "bytes"]), + ("GC", &["stat", "count"]), + ("ObjectSpace", &["each_object", "count_objects"]), +]; +const PYTHON_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("time", &["time", "monotonic", "perf_counter"]), + ("datetime", &["now", "today", "utcnow"]), + ("random", &["random", "randint", "randrange", "choice"]), +]; +const JS_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("Date", &["now"]), + ("Math", &["random"]), + ("performance", &["now"]), +]; +const GO_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("time", &["Now", "Since", "Until"]), + ("rand", &["Int", "Intn", "Float64", "Read"]), +]; +const RUST_CONTEXT_PAIRS: &[(&str, &[&str])] = &[("SystemTime", &["now"]), ("Instant", &["now"])]; +const ZIG_CONTEXT_PAIRS: &[(&str, &[&str])] = + &[("time", &["timestamp", "nanoTimestamp", "milliTimestamp"])]; +const LUA_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("os", &["time", "clock", "date", "getenv"]), + ("math", &["random"]), +]; +const CPP_CONTEXT_PAIRS: &[(&str, &[&str])] = + &[("chrono", &["now"]), ("random_device", &["operator()"])]; +const CSHARP_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("DateTime", &["Now", "UtcNow", "Today"]), + ("Guid", &["NewGuid"]), + ("Random", &["Next", "NextDouble"]), +]; +const JAVA_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ( + "System", + &["currentTimeMillis", "nanoTime", "getenv", "getProperty"], + ), + ("Instant", &["now"]), + ("UUID", &["randomUUID"]), + ("Math", &["random"]), +]; +const SWIFT_CONTEXT_PAIRS: &[(&str, &[&str])] = &[("Date", &["now"]), ("UUID", &["init"])]; +const KOTLIN_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ( + "System", + &["currentTimeMillis", "nanoTime", "getenv", "getProperty"], + ), + ("Instant", &["now"]), + ("UUID", &["randomUUID"]), + ("Random", &["nextInt", "nextLong", "nextDouble"]), +]; +const PHP_CONTEXT_PAIRS: &[(&str, &[&str])] = &[ + ("DateTime", &["createFromFormat"]), + ("DateTimeImmutable", &["createFromFormat"]), + ("random_int", &["call"]), +]; + +const RUBY_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "reentrant", + "subscribe", + "callback", + "hook", +]; +const GO_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "Lock", + "Unlock", + "RLock", + "RUnlock", + "Do", + "Go", + "Add", + "Done", + "Wait", +]; +const RUST_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "read", + "write", + "spawn", + "await", +]; +const ZIG_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "spawn", + "wait", + "signal", +]; +const C_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "pthread_mutex_lock", + "pthread_mutex_unlock", +]; +const CPP_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "try_lock", + "wait", + "notify_one", + "notify_all", +]; +const CSHARP_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "Lock", + "Monitor", + "Enter", + "Exit", + "Wait", + "Pulse", +]; +const JAVA_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "wait", + "notify", + "notifyAll", + "submit", + "execute", +]; +const SWIFT_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "async", + "sync", +]; +const KOTLIN_CALLBACK_SET: &[&str] = &[ + "transaction", + "synchronize", + "lock", + "with_lock", + "unlock", + "mutex", + "atomic", + "subscribe", + "callback", + "hook", + "synchronized", + "launch", + "async", + "await", +]; + +const RUBY_CORE_CONSTS: &[&str] = &[ + "String", + "Symbol", + "Integer", + "Float", + "Numeric", + "Rational", + "Complex", + "Array", + "Hash", + "Set", + "Range", + "Struct", + "Object", + "BasicObject", + "Kernel", + "Module", + "Class", + "Comparable", + "Enumerable", + "Enumerator", + "Proc", + "Method", + "UnboundMethod", + "NilClass", + "TrueClass", + "FalseClass", + "Exception", + "StandardError", + "RuntimeError", + "ArgumentError", + "TypeError", + "NameError", + "NoMethodError", + "IO", + "File", + "Dir", + "Time", + "Date", + "DateTime", + "Regexp", + "MatchData", + "Thread", + "Mutex", + "Fiber", + "Process", + "Math", + "GC", + "ObjectSpace", + "Marshal", + "Random", + "Encoding", +]; + +pub(crate) fn false_simplicity_lexicon(language: Language) -> FalseSimplicityLexicon { + match language { + Language::Ruby => FalseSimplicityLexicon { + dispatch_mids: &[ + "send", + "__send__", + "public_send", + "const_get", + "constantize", + "instance_variable_get", + ], + meta_mids: &[ + "define_method", + "define_singleton_method", + "alias_method", + "class_eval", + "module_eval", + "instance_eval", + "class_exec", + "module_exec", + "instance_exec", + "eval", + "const_set", + "instance_variable_set", + "remove_method", + "undef_method", + "prepend", + "singleton_class", + "binding", + ], + method_obj_mids: &["method", "public_method", "instance_method"], + io_consts: &[ + "File", + "IO", + "Dir", + "FileUtils", + "Open3", + "Socket", + "TCPSocket", + "UDPSocket", + "TCPServer", + "UNIXSocket", + "Tempfile", + "Pathname", + "Marshal", + ], + io_bare: &[ + "puts", + "print", + "warn", + "gets", + "readline", + "readlines", + "system", + "exec", + "spawn", + "fork", + "sleep", + "open", + "abort", + "exit", + "exit!", + ], + dir_context: &["pwd", "getwd", "home"], + context_pairs: RUBY_CONTEXT_PAIRS, + context_bare: &["rand", "srand"], + callback_set: RUBY_CALLBACK_SET, + core_consts: RUBY_CORE_CONSTS, + }, + Language::Python => FalseSimplicityLexicon { + dispatch_mids: &[ + "getattr", + "setattr", + "hasattr", + "__getattr__", + "__setattr__", + "import_module", + ], + meta_mids: &[ + "eval", "exec", "compile", "type", "globals", "locals", "vars", "setattr", + "delattr", + ], + method_obj_mids: &["method"], + io_consts: &[ + "Path", + "pathlib", + "os", + "sys", + "subprocess", + "socket", + "shutil", + ], + io_bare: &["print", "input", "open", "exec", "eval"], + dir_context: &["getcwd", "home"], + context_pairs: PYTHON_CONTEXT_PAIRS, + context_bare: &["random", "randint", "randrange"], + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::JavaScript | Language::TypeScript => FalseSimplicityLexicon { + dispatch_mids: &["eval", "Function", "call", "apply", "bind"], + meta_mids: &[ + "eval", + "Function", + "defineProperty", + "defineProperties", + "setPrototypeOf", + ], + method_obj_mids: &["method"], + io_consts: &["console", "Console", "fs", "process", "Deno", "Bun"], + io_bare: &["setTimeout", "setInterval", "fetch", "require", "import"], + dir_context: EMPTY, + context_pairs: JS_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Go => FalseSimplicityLexicon { + dispatch_mids: &[ + "Call", + "CallSlice", + "Method", + "MethodByName", + "ValueOf", + "TypeOf", + ], + meta_mids: &["Call", "CallSlice", "MethodByName", "New", "MakeFunc"], + method_obj_mids: &["method"], + io_consts: &["os", "io", "ioutil", "fs", "net", "http", "exec", "syscall"], + io_bare: &["panic", "print", "println", "recover"], + dir_context: &["Getwd", "UserHomeDir"], + context_pairs: GO_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: GO_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Rust => FalseSimplicityLexicon { + dispatch_mids: &[ + "downcast", + "downcast_ref", + "downcast_mut", + "call", + "call_mut", + "call_once", + ], + meta_mids: &["transmute", "from_raw_parts", "from_raw_parts_mut"], + method_obj_mids: &["method"], + io_consts: &["std", "tokio", "fs", "env", "process", "net", "io"], + io_bare: &["panic", "todo", "unimplemented", "unreachable"], + dir_context: &["current_dir", "home_dir"], + context_pairs: RUST_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: RUST_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Zig => FalseSimplicityLexicon { + dispatch_mids: &["field", "fieldParentPtr", "ptrCast", "alignCast", "call"], + meta_mids: &[ + "typeInfo", + "TypeOf", + "ptrCast", + "intFromPtr", + "ptrFromInt", + "eval", + ], + method_obj_mids: &["method"], + io_consts: &[ + "std", "os", "fs", "process", "net", "Thread", "Mutex", "Atomic", + ], + io_bare: &["panic", "unreachable"], + dir_context: EMPTY, + context_pairs: ZIG_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: ZIG_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Lua => FalseSimplicityLexicon { + dispatch_mids: &["load", "loadfile", "dofile", "require", "rawget", "rawset"], + meta_mids: &[ + "setmetatable", + "getmetatable", + "debug", + "eval", + "load", + "loadfile", + ], + method_obj_mids: &["method"], + io_consts: &["io", "os", "debug", "package"], + io_bare: &["print", "error", "assert", "require", "collectgarbage"], + dir_context: EMPTY, + context_pairs: LUA_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::C => FalseSimplicityLexicon { + dispatch_mids: &["dlsym", "dlopen", "GetProcAddress"], + meta_mids: &["setjmp", "longjmp", "va_start", "va_arg"], + method_obj_mids: &["method"], + io_consts: &["FILE", "DIR", "pthread", "mutex", "atomic"], + io_bare: &[ + "printf", "fprintf", "fopen", "open", "read", "write", "close", "system", "exec", + "abort", "exit", "assert", + ], + dir_context: &["getcwd", "getenv"], + context_pairs: EMPTY_PAIRS, + context_bare: &["rand", "time", "clock"], + callback_set: C_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Cpp => FalseSimplicityLexicon { + dispatch_mids: &[ + "dynamic_cast", + "typeid", + "any_cast", + "get_if", + "visit", + "invoke", + ], + meta_mids: &["reinterpret_cast", "const_cast", "dlsym", "dlopen"], + method_obj_mids: &["method"], + io_consts: &[ + "std", + "filesystem", + "fstream", + "iostream", + "thread", + "mutex", + "atomic", + ], + io_bare: &["throw", "abort", "exit", "assert", "system"], + dir_context: &["current_path"], + context_pairs: CPP_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: CPP_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::CSharp => FalseSimplicityLexicon { + dispatch_mids: &[ + "Invoke", + "GetMethod", + "GetProperty", + "GetField", + "Activator", + "CreateInstance", + ], + meta_mids: &["Invoke", "GetType", "Reflection", "Emit", "DynamicMethod"], + method_obj_mids: &["method"], + io_consts: &[ + "Console", + "File", + "Directory", + "Path", + "Process", + "Socket", + "HttpClient", + "Environment", + ], + io_bare: &["throw"], + dir_context: &["CurrentDirectory", "GetEnvironmentVariable"], + context_pairs: CSHARP_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: CSHARP_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Java => FalseSimplicityLexicon { + dispatch_mids: &[ + "invoke", + "getMethod", + "getDeclaredMethod", + "getField", + "getDeclaredField", + "forName", + ], + meta_mids: &["invoke", "setAccessible", "newInstance", "Proxy"], + method_obj_mids: &["method"], + io_consts: &[ + "System", + "File", + "Files", + "Paths", + "ProcessBuilder", + "Socket", + "HttpClient", + "Thread", + "Lock", + "AtomicReference", + ], + io_bare: &["throw"], + dir_context: &["getProperty", "getenv"], + context_pairs: JAVA_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: JAVA_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Swift => FalseSimplicityLexicon { + dispatch_mids: &[ + "perform", + "value", + "setValue", + "selector", + "NSClassFromString", + ], + meta_mids: &[ + "Mirror", + "unsafeBitCast", + "withUnsafePointer", + "withUnsafeBytes", + ], + method_obj_mids: &["method"], + io_consts: &[ + "FileManager", + "Process", + "URLSession", + "DispatchQueue", + "Thread", + "Lock", + "NSLock", + ], + io_bare: &[ + "print", + "fatalError", + "preconditionFailure", + "assertionFailure", + ], + dir_context: &["currentDirectoryPath", "homeDirectoryForCurrentUser"], + context_pairs: SWIFT_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: SWIFT_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Kotlin => FalseSimplicityLexicon { + dispatch_mids: &[ + "invoke", + "call", + "callBy", + "memberProperties", + "declaredMemberFunctions", + ], + meta_mids: &[ + "reflection", + "javaClass", + "Class", + "forName", + "setAccessible", + ], + method_obj_mids: &["method"], + io_consts: &[ + "System", + "File", + "Files", + "Paths", + "ProcessBuilder", + "Socket", + "HttpClient", + "Thread", + "Mutex", + "AtomicReference", + ], + io_bare: &["println", "print", "error", "check", "require", "TODO"], + dir_context: &["getProperty", "getenv"], + context_pairs: KOTLIN_CONTEXT_PAIRS, + context_bare: EMPTY, + callback_set: KOTLIN_CALLBACK_SET, + core_consts: EMPTY, + }, + Language::Php => FalseSimplicityLexicon { + dispatch_mids: &[ + "call_user_func", + "call_user_func_array", + "__call", + "__callStatic", + ], + meta_mids: &[ + "eval", + "ReflectionClass", + "ReflectionMethod", + "ReflectionFunction", + "class_alias", + ], + method_obj_mids: &["Closure", "fromCallable"], + io_consts: &["FilesystemIterator", "DirectoryIterator", "PDO", "mysqli"], + io_bare: &[ + "print", + "printf", + "fopen", + "file_get_contents", + "file_put_contents", + "exec", + "shell_exec", + "system", + "passthru", + "die", + "exit", + "trigger_error", + ], + dir_context: &["getcwd", "getenv"], + context_pairs: PHP_CONTEXT_PAIRS, + context_bare: &["time", "microtime", "random_int", "rand", "mt_rand"], + callback_set: COMMON_CALLBACK_SET, + core_consts: EMPTY, + }, + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs new file mode 100644 index 000000000..72b43f2ef --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/go.rs @@ -0,0 +1,341 @@ +use super::super::tree_sitter_adapter::{named_children, normalize_type_owner, CallTarget, Target}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::{node_text, normalize_text}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct GoProfile; + +impl LanguageProfile for GoProfile { + fn language(&self) -> Language { + Language::Go + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_go::LANGUAGE.into() + } + + fn first_argument_receiver(&self) -> bool { + true + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_declaration"] + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "method_declaration" { + return go_method_receiver(node, source).map(|(owner, _name)| owner); + } + self.default_owner_name_from_declaration(node, source) + } + + fn function_receiver_name(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "method_declaration" { + return go_method_receiver(node, source).map(|(_owner, name)| name); + } + None + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source)?; + if name.chars().next().map(char::is_uppercase).unwrap_or(false) { + Some("public".to_string()) + } else { + Some("private".to_string()) + } + } + + fn generic_owner_node_kinds(&self) -> &[&str] { + &["type_spec"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameter_list"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "statement_list"] + } + + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["statement_list"] + } + + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list", "literal_element"] + } + + fn indexed_lhs_node_kinds(&self) -> &[&str] { + &["index_expression", "slice_expression"] + } + + fn indexed_lhs_bracket_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn update_statement_node_kinds(&self) -> &[&str] { + &["inc_statement", "dec_statement"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression", "go_statement"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_statement", "short_var_declaration"] + } + + fn expression_list_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", ":=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block", "statement_list"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &[ + "short_var_declaration", + "range_clause", + "var_declaration", + "variable_declaration", + ] + } + + fn short_variable_declaration_node_kinds(&self) -> &[&str] { + &["short_var_declaration", "range_clause"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["expression_list", "var_spec", "variable_declaration"] + } + + fn multi_name_variable_declaration_node_kinds(&self) -> &[&str] { + &["var_spec"] + } + + fn normalize_local_identifier_text(&self, text: &str) -> String { + if text == "_" { + String::new() + } else { + text.to_string() + } + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + &["pointer_type", "type_identifier"] + } + + fn method_receiver_node_kinds(&self) -> &[&str] { + &["method_declaration"] + } + + fn receiver_parameter_node_kinds(&self) -> &[&str] { + &["parameter_declaration"] + } + + fn first_argument_receiver_type_node_kinds(&self) -> &[&str] { + &["type_identifier", "pointer_type"] + } + + fn first_argument_receiver_name_node_kinds(&self) -> &[&str] { + &["identifier", "field_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &[ + "if_statement", + "for_statement", + "expression_switch_statement", + ] + } + + fn case_node_kinds(&self) -> &[&str] { + &["expression_switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["expression_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_declaration", "type_spec"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["expression_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["selector_expression"] + } + + fn field_like_dot_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn keyed_element_node_kinds(&self) -> &[&str] { + &["keyed_element"] + } + + fn deferred_statement_node_kinds(&self) -> &[&str] { + &["defer_statement"] + } + + fn suppress_field_receiver_lhs_reads(&self) -> bool { + true + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + match node.kind() { + "call_expression" => self.default_call_target(node, source), + "go_statement" => go_keyword_call_target(node, source), + _ => None, + } + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + if self.expression_list_node_kinds().contains(&lhs.kind()) { + let children = named_children(lhs); + if children.len() == 1 { + return self.state_target(children[0], source); + } + } + if self.indexed_lhs_node_kinds().contains(&lhs.kind()) { + let object = named_children(lhs).into_iter().next()?; + return self.default_state_target(object, source); + } + self.default_state_target(lhs, source) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if go_augmented_assignment_lhs(node) { + return None; + } + self.default_state_read_target(node, source) + } +} + +fn go_method_receiver(node: Node<'_>, source: &str) -> Option<(String, String)> { + let receiver_params = named_children(node) + .into_iter() + .find(|child| child.kind() == "parameter_list")?; + let receiver = named_children(receiver_params) + .into_iter() + .find(|child| child.kind() == "parameter_declaration")?; + let children = named_children(receiver); + let name = children + .iter() + .find(|child| matches!(child.kind(), "identifier" | "field_identifier")) + .map(|child| node_text(*child, source).to_string())?; + let type_node = children + .iter() + .find(|child| matches!(child.kind(), "pointer_type" | "type_identifier"))?; + Some((normalize_type_owner(node_text(*type_node, source)), name)) +} + +fn go_keyword_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "go_statement" { + return None; + } + let arguments = go_statement_arguments(node, source)?; + let mut target = CallTarget::new("self".to_string(), "go".to_string(), arguments); + target.source_node = Some(node); + Some(target) +} + +fn go_statement_arguments(node: Node<'_>, source: &str) -> Option> { + let text = node_text(node, source).trim(); + let inner = text.strip_prefix("go(")?.strip_suffix(')')?; + Some( + inner + .split(',') + .map(normalize_text) + .filter(|argument| !argument.is_empty()) + .collect(), + ) +} + +fn go_augmented_assignment_lhs(node: Node<'_>) -> bool { + let mut current = node; + while let Some(parent) = current.parent() { + if parent.kind() == "assignment_statement" { + let lhs = named_children(parent).into_iter().next(); + let operator = go_assignment_operator(parent); + return lhs.map(|lhs| go_contains_node(lhs, node)).unwrap_or(false) + && !matches!(operator.as_deref(), Some("=" | ":=")); + } + current = parent; + } + false +} + +fn go_assignment_operator(node: Node<'_>) -> Option { + let mut cursor = node.walk(); + let operator = node + .children(&mut cursor) + .find(|child| !child.is_named() && child.kind().ends_with('=')) + .map(|child| child.kind().to_string()); + operator +} + +fn go_contains_node(root: Node<'_>, target: Node<'_>) -> bool { + if root.kind() == target.kind() + && root.start_byte() == target.start_byte() + && root.end_byte() == target.end_byte() + { + return true; + } + named_children(root) + .into_iter() + .any(|child| go_contains_node(child, target)) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs new file mode 100644 index 000000000..3ce0b5244 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/java.rs @@ -0,0 +1,161 @@ +use super::super::tree_sitter_adapter::{named_children, CallTarget}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::Language as TreeSitterLanguage; +use tree_sitter::Node; + +pub(crate) struct JavaProfile; + +impl LanguageProfile for JavaProfile { + fn language(&self) -> Language { + Language::Java + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_java::LANGUAGE.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node) { + if child.kind() != "modifiers" { + continue; + } + let text = node_text(child, source); + if text.split_whitespace().any(|token| token == "public") { + return Some("public".to_string()); + } + if text.split_whitespace().any(|token| token == "private") { + return Some("private".to_string()); + } + if text.split_whitespace().any(|token| token == "protected") { + return Some("protected".to_string()); + } + } + None + } + + fn function_node_kinds(&self) -> &[&str] { + &["method_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["method_invocation"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "type_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "method_invocation", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["local_variable_declaration", "variable_declarator"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &[ + "if_statement", + "enhanced_for_statement", + "switch_expression", + ] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_block_statement_group"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["method_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_block_statement_group", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_access"] + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "method_invocation" { + return None; + } + let children = named_children(node); + let identifiers = children + .iter() + .copied() + .filter(|child| child.kind() == "identifier") + .collect::>(); + if identifiers.len() >= 2 { + return Some(CallTarget::new( + node_text(identifiers[0], source).to_string(), + node_text(identifiers[1], source).to_string(), + self.call_argument_texts(node, source), + )); + } + self.default_call_target(node, source) + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs new file mode 100644 index 000000000..3cb0aa323 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/javascript.rs @@ -0,0 +1,147 @@ +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct JavaScriptProfile; + +impl LanguageProfile for JavaScriptProfile { + fn language(&self) -> Language { + Language::JavaScript + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_javascript::LANGUAGE.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source).unwrap_or_default(); + if name.starts_with('#') { + return Some("private".to_string()); + } + for child in super::super::tree_sitter_adapter::named_children(node) { + if !matches!(child.kind(), "accessibility_modifier" | "modifier") { + continue; + } + let text = node_text(child, source); + if text.split_whitespace().any(|token| token == "private") { + return Some("private".to_string()); + } + if text.split_whitespace().any(|token| token == "protected") { + return Some("protected".to_string()); + } + if text.split_whitespace().any(|token| token == "public") { + return Some("public".to_string()); + } + } + Some("public".to_string()) + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_definition"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["property_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "augmented_assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["lexical_declaration", "variable_declarator"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_in_statement", "switch_statement"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &[ + "function_declaration", + "method_definition", + "class_declaration", + ] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["member_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs new file mode 100644 index 000000000..2fcb63356 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/kotlin.rs @@ -0,0 +1,235 @@ +use super::super::tree_sitter_adapter::{named_children, CallTarget}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct KotlinProfile; + +impl LanguageProfile for KotlinProfile { + fn language(&self) -> Language { + Language::Kotlin + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_kotlin_ng::LANGUAGE.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node) { + if child.kind() != "modifiers" { + continue; + } + if node_text(child, source) + .split_whitespace() + .any(|token| token == "private") + { + return Some("private".to_string()); + } + } + None + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["function_value_parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "simple_identifier"] + } + + fn function_params(&self, node: Node<'_>, source: &str) -> Vec { + let Some(params) = named_children(node) + .into_iter() + .find(|child| self.parameter_list_node_kinds().contains(&child.kind())) + else { + return Vec::new(); + }; + + let mut out = Vec::new(); + for param in named_children(params) { + if let Some(name) = self.parameter_name(param, source) { + if !out.contains(&name) { + out.push(name); + } + } + } + out + } + + fn parameter_name(&self, param: Node<'_>, source: &str) -> Option { + let name = if self + .parameter_identifier_node_kinds() + .contains(¶m.kind()) + { + Some(param) + } else { + named_children(param).into_iter().find(|child| { + self.parameter_identifier_node_kinds() + .contains(&child.kind()) + }) + }?; + let text = node_text(name, source).to_string(); + (!text.is_empty() && text != "_").then_some(text) + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["function_body", "statements"] + } + + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["block", "statements"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "simple_identifier", "type_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &["call_expression", "jump_expression"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "control_structure_body", "function_body"] + } + + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["directly_assignable_expression", "value_argument"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["property_declaration", "variable_declaration"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration", "directly_assignable_expression"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &[ + "equality_expression", + "comparison_expression", + "conjunction_expression", + "additive_expression", + "multiplicative_expression", + "binary_expression", + ] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_expression", "for_statement", "when_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["when_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["when_entry"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["when_condition", "pattern", "string_literal"] + } + + fn case_subject_node_kinds(&self) -> &[&str] { + &["when_subject"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["when_entry", "else", "line_comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default", "else"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &[ + "conjunction_expression", + "equality_expression", + "comparison_expression", + "binary_expression", + ] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "pattern"] + } + + fn navigation_suffix_node_kinds(&self) -> &[&str] { + &["navigation_suffix"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["navigation_expression", "directly_assignable_expression"] + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "call_expression" { + return None; + } + let mut target = self.default_call_target(node, source)?; + if kotlin_single_call_control_body(node) { + target.source_node = named_children(node).into_iter().next(); + } + Some(target) + } +} + +fn kotlin_single_call_control_body(node: Node<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() == "when_entry" { + return true; + } + if parent.kind() != "block" { + return false; + } + if named_children(parent) + .into_iter() + .filter(|child| child.is_named()) + .count() + != 1 + { + return false; + } + parent + .parent() + .map(|ancestor| { + matches!( + ancestor.kind(), + "if_expression" | "for_statement" | "control_structure_body" + ) + }) + .unwrap_or(false) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs new file mode 100644 index 000000000..2b991e7d3 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/lua.rs @@ -0,0 +1,247 @@ +use super::super::tree_sitter_adapter::{named_children, CallTarget, Target}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::{line, node_text}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct LuaProfile; + +impl LanguageProfile for LuaProfile { + fn language(&self) -> Language { + Language::Lua + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_lua::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + lua_method_name(node, source).or_else(|| self.default_function_name(node, source)) + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + lua_method_owner_name(node, source) + .or_else(|| self.default_owner_name_from_declaration(node, source)) + } + + fn owner_def_name_from_declaration(&self, _node: Node<'_>, _source: &str) -> Option { + None + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["function_call", "method_call"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_statement"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &["function_call", "expression_list", "return_statement"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration", "variable_list"] + } + + fn declaration_assignment_node_kinds(&self) -> &[&str] { + &["assignment_statement"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["and", "&&"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["expression_list"] + } + + fn expression_list_node_kinds(&self) -> &[&str] { + &["expression_list", "variable_list"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["dot_index_expression"] + } + + fn generated_prelude(&self, node: Node<'_>, source: &str) -> bool { + if line(node) != 1 { + return false; + } + let first_line = source.lines().next().unwrap_or(""); + first_line.contains("_tl_compat") && first_line.contains("compat53.module") + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if !self.call_node_kinds().contains(&node.kind()) { + return None; + } + let callee = named_children(node).into_iter().next()?; + let mut target = if callee.kind() == "method_index_expression" { + lua_method_call_target(callee, node, self.call_argument_texts(node, source), source)? + } else { + self.default_call_target(node, source)? + }; + if lua_callee_source_span(node) { + target.source_node = Some(callee); + } + Some(target) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + let target = self.default_state_read_target(node, source)?; + if target.receiver == "_" && target.field == "_" { + None + } else { + Some(target) + } + } + + fn assignment_lhs_node(&self, node: Node<'_>) -> bool { + let candidate = if node + .parent() + .map(|parent| parent.kind() == "variable_list") + .unwrap_or(false) + { + node.parent().unwrap() + } else { + node + }; + let Some(parent) = candidate.parent() else { + return false; + }; + if parent.kind() != "assignment_statement" { + return false; + } + named_children(parent) + .into_iter() + .next() + .map(|lhs| same_node(lhs, candidate)) + .unwrap_or(false) + } +} + +fn lua_method_call_target<'tree>( + callee: Node<'tree>, + node: Node<'tree>, + arguments: Vec, + source: &str, +) -> Option> { + let children = named_children(callee); + let receiver = children.first().copied()?; + let message = children.last().copied()?; + let mut target = CallTarget::new( + node_text(receiver, source).to_string(), + node_text(message, source).to_string(), + arguments, + ); + target.source_node = Some(node); + Some(target) +} + +fn lua_callee_source_span(node: Node<'_>) -> bool { + if node + .parent() + .map(|parent| parent.kind() == "expression_list") + .unwrap_or(false) + { + return true; + } + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "block" { + return false; + } + if named_children(parent).len() != 1 { + return false; + } + parent + .parent() + .map(|ancestor| { + matches!( + ancestor.kind(), + "if_statement" | "elseif_statement" | "else_statement" | "for_statement" + ) + }) + .unwrap_or(false) +} + +fn same_node(left: Node<'_>, right: Node<'_>) -> bool { + left.kind() == right.kind() + && left.start_byte() == right.start_byte() + && left.end_byte() == right.end_byte() +} + +fn lua_method_name(node: Node<'_>, source: &str) -> Option { + let method = lua_method_index_expression(node)?; + named_children(method) + .into_iter() + .last() + .map(|child| node_text(child, source).to_string()) +} + +fn lua_method_owner_name(node: Node<'_>, source: &str) -> Option { + let method = lua_method_index_expression(node)?; + named_children(method) + .into_iter() + .next() + .map(|child| node_text(child, source).to_string()) +} + +fn lua_method_index_expression<'tree>(node: Node<'tree>) -> Option> { + if node.kind() != "function_declaration" { + return None; + } + named_children(node) + .into_iter() + .find(|child| child.kind() == "method_index_expression") +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs new file mode 100644 index 000000000..ec6e69829 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/mod.rs @@ -0,0 +1,72 @@ +pub(crate) mod base; +mod c; +mod cpp; +mod csharp; +pub(crate) mod false_simplicity_lexicon; +mod go; +mod java; +mod javascript; +mod kotlin; +mod lua; +mod php; +mod python; +mod ruby; +mod rust; +mod swift; +mod typescript; +mod zig; + +pub(crate) use base::LanguageProfile; + +use super::Language; +use c::CProfile; +use cpp::CppProfile; +use csharp::CSharpProfile; +use go::GoProfile; +use java::JavaProfile; +use javascript::JavaScriptProfile; +use kotlin::KotlinProfile; +use lua::LuaProfile; +use php::PhpProfile; +use python::PythonProfile; +use ruby::RubyProfile; +use rust::RustProfile; +use swift::SwiftProfile; +use typescript::TypeScriptProfile; +use zig::ZigProfile; + +static RUBY_PROFILE: RubyProfile = RubyProfile; +static PYTHON_PROFILE: PythonProfile = PythonProfile; +static JAVASCRIPT_PROFILE: JavaScriptProfile = JavaScriptProfile; +static JAVA_PROFILE: JavaProfile = JavaProfile; +static TYPESCRIPT_PROFILE: TypeScriptProfile = TypeScriptProfile; +static SWIFT_PROFILE: SwiftProfile = SwiftProfile; +static KOTLIN_PROFILE: KotlinProfile = KotlinProfile; +static GO_PROFILE: GoProfile = GoProfile; +static RUST_PROFILE: RustProfile = RustProfile; +static ZIG_PROFILE: ZigProfile = ZigProfile; +static LUA_PROFILE: LuaProfile = LuaProfile; +static C_PROFILE: CProfile = CProfile; +static CPP_PROFILE: CppProfile = CppProfile; +static CSHARP_PROFILE: CSharpProfile = CSharpProfile; +static PHP_PROFILE: PhpProfile = PhpProfile; + +pub(crate) fn language_profile(language: Language) -> &'static dyn LanguageProfile { + match language { + Language::Ruby => &RUBY_PROFILE, + Language::Python => &PYTHON_PROFILE, + Language::JavaScript => &JAVASCRIPT_PROFILE, + Language::Java => &JAVA_PROFILE, + Language::TypeScript => &TYPESCRIPT_PROFILE, + Language::Swift => &SWIFT_PROFILE, + Language::Kotlin => &KOTLIN_PROFILE, + Language::Go => &GO_PROFILE, + Language::Rust => &RUST_PROFILE, + Language::Zig => &ZIG_PROFILE, + Language::Lua => &LUA_PROFILE, + Language::C => &C_PROFILE, + Language::Cpp => &CPP_PROFILE, + Language::CSharp => &CSHARP_PROFILE, + Language::Php => &PHP_PROFILE, + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs new file mode 100644 index 000000000..3a549b7c1 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/php.rs @@ -0,0 +1,291 @@ +use super::super::tree_sitter_adapter::{named_children, AssignmentTarget, CallTarget, Target}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::{node_text, normalize_text}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct PhpProfile; + +impl LanguageProfile for PhpProfile { + fn language(&self) -> Language { + Language::Php + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_php::LANGUAGE_PHP.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition", "method_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["name", "variable_name"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["compound_statement", "declaration_list"] + } + + fn call_node_kinds(&self) -> &[&str] { + &[ + "function_call_expression", + "member_call_expression", + "scoped_call_expression", + "print_intrinsic", + ] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["name", "variable_name"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "augmented_assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "function_call_expression", + "member_call_expression", + "scoped_call_expression", + "expression_statement", + "return_statement", + "print_intrinsic", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["compound_statement", "declaration_list"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn comparison_operators(&self) -> &[&str] { + &["==", "!=", "===", "!==", "<", "<=", ">", ">="] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_statement"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &[ + "function_definition", + "method_declaration", + "class_declaration", + ] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_statement", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "foreach_statement", "switch_statement"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &[ + "member_access_expression", + "nullsafe_member_access_expression", + "member_call_expression", + "class_constant_access_expression", + ] + } + + fn normalize_source_text(&self, text: &str) -> String { + normalize_text(&php_normalize_source(text)) + } + + fn normalize_parameter_name(&self, text: &str) -> String { + php_identifier_text_value(text) + } + + fn normalize_local_identifier_text(&self, text: &str) -> String { + php_identifier_text_value(text) + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + if self.function_node_kinds().contains(&node.kind()) { + return node + .child_by_field_name("name") + .or_else(|| php_first_name_node(node)) + .and_then(|name| php_name_text(name, source)); + } + self.default_function_name(node, source) + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "class_declaration" { + return node + .child_by_field_name("name") + .or_else(|| php_first_name_node(node)) + .and_then(|name| php_name_text(name, source)); + } + self.default_owner_name_from_declaration(node, source) + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + named_children(node) + .into_iter() + .find(|child| child.kind() == "visibility_modifier") + .map(|modifier| node_text(modifier, source).to_string()) + .filter(|modifier| matches!(modifier.as_str(), "public" | "private" | "protected")) + .or_else(|| Some("public".to_string())) + } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + self.default_assignment_target(node) + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if !self.call_node_kinds().contains(&node.kind()) { + return None; + } + if node.kind() == "print_intrinsic" { + return Some(CallTarget::new( + "self".to_string(), + "print".to_string(), + php_print_argument_texts(node, source), + )); + } + let mut target = self.default_call_target(node, source)?; + target.receiver = php_normalize_receiver(&target.receiver); + target.message = php_identifier_text_value(&target.message); + Some(target) + } + + fn call_argument_texts(&self, node: Node<'_>, source: &str) -> Vec { + self.call_argument_nodes(node) + .into_iter() + .map(|argument| normalize_text(&php_normalize_source(node_text(argument, source)))) + .collect() + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + let target = self.default_state_target(lhs, source)?; + Some(Target { + receiver: php_normalize_receiver(&target.receiver), + field: php_identifier_text_value(&target.field), + }) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + let target = self.default_state_read_target(node, source)?; + Some(Target { + receiver: php_normalize_receiver(&target.receiver), + field: php_identifier_text_value(&target.field), + }) + } + + fn member_field_text(&self, field: Node<'_>, source: &str) -> Option { + php_name_text(field, source) + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + patterns + .iter() + .map(|pattern| normalize_text(&php_normalize_source(node_text(*pattern, source)))) + .collect() + } +} + +fn php_first_name_node<'tree>(node: Node<'tree>) -> Option> { + named_children(node) + .into_iter() + .find(|child| php_name_node(*child)) +} + +fn php_name_node(node: Node<'_>) -> bool { + matches!(node.kind(), "name" | "qualified_name" | "variable_name") +} + +fn php_name_text(node: Node<'_>, source: &str) -> Option { + let text = php_identifier_text_value(node_text(node, source)); + (!text.is_empty()).then_some(text) +} + +fn php_identifier_text_value(text: &str) -> String { + text.trim().trim_start_matches('$').to_string() +} + +fn php_normalize_receiver(receiver: &str) -> String { + let value = php_normalize_source(&php_identifier_text_value(receiver)); + if value == "this" { + "self".to_string() + } else { + value + } +} + +fn php_print_argument_texts(node: Node<'_>, source: &str) -> Vec { + named_children(node) + .into_iter() + .flat_map(|child| { + if child.kind() == "parenthesized_expression" { + let children = named_children(child); + if children.is_empty() { + vec![child] + } else { + children + } + } else { + vec![child] + } + }) + .map(|argument| normalize_text(&php_normalize_source(node_text(argument, source)))) + .collect() +} + +fn php_normalize_source(source: &str) -> String { + let mut out = String::new(); + let mut chars = source.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '$' { + if chars + .peek() + .map(|next| *next == '_' || next.is_ascii_alphabetic()) + .unwrap_or(false) + { + continue; + } + } + out.push(ch); + } + out.replace("->", ".").replace("::", ".") +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs new file mode 100644 index 000000000..fbae478f0 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/python.rs @@ -0,0 +1,379 @@ +use super::super::tree_sitter_adapter::Target; +use super::super::Language; +use super::base::{default_clone_candidate_node, LanguageProfile}; +use crate::decomplex::ast::RawNode; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct PythonProfile; + +impl LanguageProfile for PythonProfile { + fn language(&self) -> Language { + Language::Python + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_python::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_definition"] + } + + fn function_visibility(&self, node: tree_sitter::Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source)?; + if name.starts_with('_') && !name.starts_with("__") { + Some("private".to_string()) + } else { + Some("public".to_string()) + } + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_definition"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment", "augmented_assignment"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["comparison_operator", "binary_operator", "boolean_operator"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_statement", "match_statement"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["match_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["case_clause"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["case_pattern", "pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_definition", "class_definition"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["case_clause", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["and", "&&"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_operator", "boolean_operator", "comparison_operator"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["attribute"] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &["call", "expression_statement", "return_statement"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn path_transparent_branch_body_node_kinds(&self) -> &[&str] { + &["if_statement"] + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if python_type_annotation_expression(node) { + return None; + } + let target = self.default_state_read_target(node, source)?; + if python_with_context_expression(node) && python_lock_context_field(&target.field) { + return None; + } + Some(target) + } + + fn clone_candidate_node(&self, node: &RawNode) -> bool { + if python_assignment_wrapper_node(node) { + return false; + } + default_clone_candidate_node(node) + } + + fn clone_fingerprint_children<'a>(&self, node: &'a RawNode) -> Vec<&'a RawNode> { + if node.kind == "type" + && node.children.len() == 1 + && matches!(node.children[0].kind.as_str(), "generic_type" | "string") + { + return node.children[0].children.iter().collect(); + } + if python_terminal_wrapper_node(node) { + return Vec::new(); + } + if node.kind == "expression_statement" && node.children.len() == 1 { + let child = &node.children[0]; + if python_expression_wrapper_child(child) { + return child.children.iter().collect(); + } + } + if python_call_expression_statement(node) { + return node.children.iter().collect(); + } + if node.kind == "block" && node.children.len() == 1 { + let child = &node.children[0]; + if python_single_statement_block_child(child) { + return child.children.iter().collect(); + } + if child.kind == "expression_statement" { + return python_expression_statement_clone_children(child); + } + } + if node.kind == "if_statement" { + return node + .children + .iter() + .flat_map(python_if_statement_clone_children) + .collect(); + } + if matches!(node.kind.as_str(), "else_clause" | "except_clause") { + return node + .children + .iter() + .flat_map(python_clause_clone_children) + .collect(); + } + if node.kind == "with_clause" { + if python_simple_with_clause(node) { + return Vec::new(); + } + return node + .children + .iter() + .flat_map(python_with_clause_clone_children) + .collect(); + } + node.children.iter().collect() + } + + fn clone_child_fingerprint( + &self, + _parent: &RawNode, + child: &RawNode, + ) -> Option<(String, usize)> { + if python_escape_only_string_content(child) { + return Some(("lit".to_string(), 1)); + } + None + } +} + +fn python_with_context_expression(node: Node<'_>) -> bool { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "with_clause" | "with_item" => return true, + "block" | "function_definition" | "class_definition" | "module" => return false, + _ => current = parent.parent(), + } + } + false +} + +fn python_type_annotation_expression(node: Node<'_>) -> bool { + let mut current = Some(node); + while let Some(item) = current { + match item.kind() { + "type" | "type_parameter" => return true, + "block" | "function_definition" | "class_definition" | "module" => return false, + _ => current = item.parent(), + } + } + false +} + +fn python_lock_context_field(field: &str) -> bool { + field == "_lock" || field.ends_with("_lock") +} + +fn python_assignment_wrapper_node(node: &RawNode) -> bool { + matches!(node.kind.as_str(), "assignment" | "augmented_assignment") +} + +fn python_expression_wrapper_child(node: &RawNode) -> bool { + python_assignment_wrapper_node(node) + || matches!(node.kind.as_str(), "call" | "string" | "yield") +} + +fn python_expression_statement_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "expression_statement" && node.children.len() == 1 { + let child = &node.children[0]; + if python_expression_wrapper_child(child) { + return child.children.iter().collect(); + } + } + node.children.iter().collect() +} + +fn python_call_expression_statement(node: &RawNode) -> bool { + if node.kind != "expression_statement" { + return false; + } + if node.children.len() == 1 && node.children[0].kind == "call" { + return true; + } + node.children + .iter() + .any(|child| matches!(child.kind.as_str(), "argument_list" | "arguments")) + && node + .children + .iter() + .all(|child| !python_assignment_wrapper_node(child) && !python_assignment_token(child)) +} + +fn python_assignment_token(node: &RawNode) -> bool { + matches!(node.text.as_str(), "=" | "+=" | "-=" | "*=" | "/=" | "%=") +} + +fn python_terminal_wrapper_node(node: &RawNode) -> bool { + if matches!(node.kind.as_str(), "break_statement" | "continue_statement") { + return node.children.len() == 1 && node.children[0].text == node.text; + } + if node.kind == "as_pattern_target" { + return node.children.len() == 1 && node.children[0].kind == "identifier"; + } + if node.kind == "dotted_name" { + return node.children.len() == 1 && node.children[0].kind == "identifier"; + } + if node.kind == "keyword_separator" { + return node.children.len() == 1 && node.children[0].text == node.text; + } + python_simple_type_wrapper_node(node) +} + +fn python_simple_type_wrapper_node(node: &RawNode) -> bool { + if node.kind != "type" || node.children.len() != 1 { + return false; + } + let child = &node.children[0]; + child.children.is_empty() + && matches!( + child.kind.as_str(), + "identifier" | "none" | "true" | "false" | "integer" | "float" | "string" + ) + && child.text == node.text +} + +fn python_escape_only_string_content(node: &RawNode) -> bool { + node.kind == "string_content" + && node.children.len() == 1 + && node.children[0].kind == "escape_sequence" + && node.children[0].text == node.text +} + +fn python_single_statement_block_child(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "assert_statement" + | "break_statement" + | "continue_statement" + | "for_statement" + | "function_definition" + | "if_statement" + | "raise_statement" + | "try_statement" + | "with_statement" + | "while_statement" + ) +} + +fn python_if_statement_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "block" + && node.children.len() == 1 + && matches!( + node.children[0].kind.as_str(), + "break_statement" | "continue_statement" + ) + { + return node.children[0].children.iter().collect(); + } + vec![node] +} + +fn python_clause_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "block" + && node.children.len() == 1 + && matches!( + node.children[0].kind.as_str(), + "break_statement" | "continue_statement" + ) + { + return node.children[0].children.iter().collect(); + } + vec![node] +} + +fn python_simple_with_clause(node: &RawNode) -> bool { + if node.children.len() != 1 || node.children[0].kind != "with_item" { + return false; + } + let with_item = &node.children[0]; + with_item.text == node.text + && with_item.children.len() == 1 + && with_item.children[0].kind == "identifier" +} + +fn python_with_clause_clone_children(node: &RawNode) -> Vec<&RawNode> { + if node.kind == "with_item" { + if node.children.len() == 1 && node.children[0].kind == "as_pattern" { + return node.children[0].children.iter().collect(); + } + return node.children.iter().collect(); + } + vec![node] +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs new file mode 100644 index 000000000..27d274faa --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/ruby.rs @@ -0,0 +1,2273 @@ +use super::super::tree_sitter_adapter::{ + direct_operator, first_child_kind, first_named_text, named_children, next_sibling_raw_text, + previous_sibling_raw_text, AssignmentTarget, CallTarget, Target, +}; +use super::super::{ + CallSite, Document, FunctionDef, Language, ProtocolCall, ProtocolMethodEffect, + ProtocolMethodPath, SemanticEffectSite, StateRead, StateWrite, +}; +use super::base::{ + default_clone_candidate_node, normalize_protocol_state, protocol_method_name, LanguageProfile, +}; +use crate::decomplex::ast::{node_text, normalize_text, span, RawNode}; +use regex::Regex; +use std::collections::BTreeSet; +use std::path::Path; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct RubyProfile; + +const RUBY_PROTOCOL_IGNORED_MIDS: &[&str] = &[ + "abstract!", + "alias_method", + "any", + "attr_accessor", + "attr_reader", + "attr_writer", + "bind", + "cast", + "checked", + "enum", + "extend", + "final", + "include", + "interface!", + "let", + "must", + "must_because", + "nilable", + "override", + "overridable", + "params", + "prepend", + "private", + "private_class_method", + "protected", + "public", + "require", + "require_relative", + "requires_ancestor", + "sealed!", + "sig", + "type_member", + "type_template", + "untyped", + "unsafe", + "void", + "a_kind_of", + "after", + "around", + "before", + "be", + "be_a", + "be_an", + "be_empty", + "be_falsey", + "be_nil", + "be_truthy", + "change", + "contain_exactly", + "context", + "describe", + "eq", + "eql", + "equal", + "expect", + "have_attributes", + "have_key", + "have_received", + "it", + "match", + "not_to", + "raise_error", + "receive", + "subject", + "to", +]; +const RUBY_PROTOCOL_MUTATING_MIDS: &[&str] = &[ + "<<", + "[]=", + "add", + "append", + "clear", + "collect!", + "compact!", + "concat", + "declare", + "delete", + "delete_if", + "each_key=", + "fill", + "filter!", + "keep_if", + "mark", + "merge!", + "move", + "push", + "reject!", + "replace", + "resolve", + "shift", + "stamp", + "store", + "unshift", + "update", + "write", +]; +const RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS: &[&str] = &["!", "!=", "!~"]; + +impl LanguageProfile for RubyProfile { + fn language(&self) -> Language { + Language::Ruby + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_ruby::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["method"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class"] + } + + fn module_owner_node_kinds(&self) -> &[&str] { + &["module"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["method_parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["body_statement", "do_block"] + } + + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["body_statement"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "constant"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment", "operator_assignment"] + } + + fn indexed_lhs_node_kinds(&self) -> &[&str] { + &["element_assignment", "element_reference"] + } + + fn expression_list_node_kinds(&self) -> &[&str] { + &["left_assignment_list"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &["call", "return"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["body_statement"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &[ + "if", + "unless", + "if_modifier", + "unless_modifier", + "case", + "while", + "until", + "for", + ] + } + + fn case_node_kinds(&self) -> &[&str] { + &["case"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["when"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["method", "class", "module"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["when", "else", "then", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default", "else"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary"] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["body_statement", "pattern", "argument_list"] + } + + fn accessor_call_node_kinds(&self) -> &[&str] { + &["call"] + } + + fn argument_list_node_kinds(&self) -> &[&str] { + &["argument_list"] + } + + fn block_argument_node_kinds(&self) -> &[&str] { + &["do_block", "block"] + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if node.kind() == "call" && ruby_single_command_argument_call(node, source) { + return None; + } + let mut target = match node.kind() { + "call" => { + ruby_proc_call_target(node, source).or_else(|| ruby_call_target(node, source)) + } + "body_statement" | "block_body" => ruby_bare_body_call_target(node, source), + "identifier" => ruby_visibility_identifier_call_target(node, source) + .or_else(|| ruby_bare_call_target(node, source)), + _ => None, + }?; + if target.arguments.is_empty() && !ruby_call_has_block(node) { + if let Some(span) = + ruby_narrow_no_arg_call_span(node, source, &target.receiver, &target.message) + { + target.span = Some(span); + } + } + let effective_span = target + .span + .unwrap_or_else(|| target.source_node.map(span).unwrap_or_else(|| span(node))); + if target.receiver == "self" + && target.message.ends_with('?') + && effective_span[0] != effective_span[2] + { + return None; + } + if ruby_chained_element_predicate_target(&target) { + return None; + } + ruby_valid_call_target(&target).then_some(target) + } + + fn function_name(&self, node: Node<'_>, source: &str) -> Option { + match node.kind() { + "singleton_method" => { + let name = node + .child_by_field_name("name") + .map(|name| node_text(name, source).to_string()) + .or_else(|| { + named_children(node) + .into_iter() + .rev() + .find(|child| { + matches!( + child.kind(), + "identifier" | "field_identifier" | "property_identifier" + ) + }) + .map(|child| node_text(child, source).to_string()) + })?; + Some(format!("self.{name}")) + } + "body_statement" if first_child_kind(node) == Some("def") => { + hidden_ruby_method_name(node, source) + } + "argument_list" if first_child_kind(node) == Some("def") => { + inline_def_name(node, source) + } + _ => self.default_function_name(node, source), + } + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "singleton_method" { + return Some("public".to_string()); + } + if node.kind() == "argument_list" && first_child_kind(node) == Some("def") { + let target = node + .parent() + .and_then(|parent| (parent.kind() == "call").then_some(parent)) + .and_then(|parent| ruby_call_target(parent, source))?; + if target.receiver == "self" + && matches!(target.message.as_str(), "private" | "protected" | "public") + { + return Some(target.message); + } + } + None + } + + fn after_collect_facts(&self, functions: &mut Vec, calls: &[CallSite]) { + apply_ruby_visibility(functions, calls); + } + + fn structural_semantic_effect_sites( + &self, + root: Node<'_>, + source: &str, + file: &Path, + functions: &[FunctionDef], + state_reads: &[StateRead], + state_writes: &[StateWrite], + ) -> Vec { + ruby_structural_semantic_effect_sites( + root, + source, + file, + functions, + state_reads, + state_writes, + ) + } + + fn protocol_method_effects(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .map(|function_def| { + let (reads, writes) = ruby_protocol_method_access(function_def); + ProtocolMethodEffect { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + reads, + writes, + } + }) + .collect() + } + + fn protocol_call_paths(&self, document: &Document) -> Vec { + document + .function_defs + .iter() + .flat_map(|function_def| { + let statements = ruby_raw_function_body_statements(&function_def.body); + let local_names = ruby_protocol_local_names(function_def, &statements); + ruby_protocol_paths_for_statements(&statements, &local_names) + .into_iter() + .map(|path| ProtocolMethodPath { + file: function_def.file.clone(), + owner: function_def.owner.clone(), + name: protocol_method_name(&function_def.name), + line: function_def.line, + calls: path.calls, + }) + .collect::>() + }) + .collect() + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "body_statement" + && matches!(first_child_kind(node), Some("class" | "module")) + { + return first_named_text(node, source, &["constant", "identifier", "type_identifier"]); + } + self.default_owner_name_from_declaration(node, source) + } + + fn hidden_case(&self, node: Node<'_>) -> bool { + matches!( + node.kind(), + "body_statement" | "block_body" | "argument_list" + ) && first_child_kind(node) == Some("case") + } + + fn hidden_case_source_node<'tree>(&self, node: Node<'tree>) -> Option> { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .find(|child| child.kind() == "case"); + result + } + + fn predicate_less_case(&self, node: Node<'_>) -> bool { + (node.kind() == "case" || self.hidden_case(node)) && self.decision_subject(node).is_none() + } + + fn case_pattern_texts(&self, patterns: &[Node<'_>], source: &str) -> Vec { + ruby_case_pattern_texts(patterns, source) + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + ruby_state_variable_target(lhs, source).or_else(|| self.default_state_target(lhs, source)) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + if ruby_direct_flat_map_block_statement(node, source) { + return None; + } + if ruby_sorbet_signature_payload_node(node, source) { + return None; + } + let target = ruby_state_variable_target(node, source) + .or_else(|| self.default_state_read_target(node, source))?; + if ruby_chained_element_predicate_read_target(&target) { + return None; + } + Some(target) + } + + fn assignment_target<'tree>(&self, node: Node<'tree>) -> Option> { + self.default_assignment_target(node) + .or_else(|| match node.kind() { + "instance_variable" | "global_variable" if self.assignment_lhs_node(node) => { + Some(AssignmentTarget { + lhs: node, + source: node.parent().unwrap_or(node), + }) + } + _ => None, + }) + } + + fn skip_state_write_node(&self, node: Node<'_>) -> bool { + node.kind() == "operator_assignment" + || (self.assignment_lhs_node(node) + && next_sibling_raw_text(node).as_deref() != Some("=") + && node.kind() != "instance_variable") + } + + fn skip_state_write_target(&self, target: &Target) -> bool { + target.field == "[]" || target.field.starts_with('$') + } + + fn suppress_indexed_lhs_reads(&self) -> bool { + false + } + + fn indexed_lhs_descendants_are_writes(&self) -> bool { + false + } + + fn keyed_element_first_named_child_is_key(&self) -> bool { + false + } + + fn nested_assignment_dependencies_only(&self) -> bool { + true + } + + fn clone_candidate_node(&self, node: &RawNode) -> bool { + if ruby_state_assignment_node(node) { + return false; + } + default_clone_candidate_node(node) + } + + fn clone_fingerprint_children<'a>(&self, node: &'a RawNode) -> Vec<&'a RawNode> { + if node.kind == "body_statement" { + let named = raw_named_children(node); + if named.len() == 1 && ruby_state_assignment_node(named[0]) { + return named[0].children.iter().collect(); + } + } + node.children.iter().collect() + } +} + +fn ruby_state_assignment_node(node: &RawNode) -> bool { + if !matches!(node.kind.as_str(), "assignment" | "operator_assignment") { + return false; + } + raw_named_children(node) + .first() + .map(|lhs| matches!(lhs.kind.as_str(), "instance_variable" | "global_variable")) + .unwrap_or(false) +} + +fn hidden_ruby_method_name(node: Node<'_>, source: &str) -> Option { + let children = named_children(node); + let receiver_index = children + .iter() + .position(|child| matches!(child.kind(), "self" | "constant")); + let search: Vec> = if let Some(index) = receiver_index { + children.into_iter().skip(index + 1).collect() + } else { + children + }; + let name = search + .into_iter() + .find(|child| { + matches!( + child.kind(), + "identifier" | "field_identifier" | "property_identifier" + ) + }) + .map(|child| node_text(child, source).to_string())?; + if receiver_index.is_some() { + Some(format!("self.{name}")) + } else { + Some(name) + } +} + +fn inline_def_name(node: Node<'_>, source: &str) -> Option { + hidden_ruby_method_name(node, source) +} + +fn ruby_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + let receiver = node.child_by_field_name("receiver"); + let method = node.child_by_field_name("method"); + let message = method + .map(|method| node_text(method, source).to_string()) + .or_else(|| first_named_text(node, source, &["identifier", "constant"])) + .or_else(|| { + let text = normalize_text(node_text(node, source)); + (receiver.is_none() && ruby_simple_call_text(&text)).then_some(text) + })?; + let arguments = if ruby_require_message(&message) { + ruby_require_argument_texts(node, source) + } else { + ruby_argument_texts(node, source) + }; + + let mut target = CallTarget::new( + receiver + .map(|receiver| normalize_text(node_text(receiver, source))) + .unwrap_or_else(|| "self".to_string()), + message, + arguments, + ); + if target.arguments.is_empty() && !ruby_call_has_block(node) { + if let (Some(receiver), Some(method)) = (receiver, method) { + let receiver_span = span(receiver); + let method_span = span(method); + target.span = Some([ + receiver_span[0], + receiver_span[1], + method_span[2], + method_span[3], + ]); + } + } + target.safe_navigation = ruby_safe_navigation_call(node, source); + Some(target) +} + +fn apply_ruby_visibility(functions: &mut [FunctionDef], calls: &[CallSite]) { + let mut owners = functions + .iter() + .map(|function| function.owner.clone()) + .collect::>(); + owners.sort(); + owners.dedup(); + + for owner in owners { + let function_indices = functions + .iter() + .enumerate() + .filter_map(|(index, function)| (function.owner == owner).then_some(index)) + .collect::>(); + let call_indices = calls + .iter() + .enumerate() + .filter_map(|(index, call)| { + (call.owner == owner && ruby_visibility_call(call)).then_some(index) + }) + .collect::>(); + + let mut visibility = "public".to_string(); + let mut events = Vec::new(); + events.extend( + function_indices + .iter() + .map(|index| (functions[*index].line, 1_u8, *index)), + ); + events.extend( + call_indices + .iter() + .map(|index| (calls[*index].line, 0_u8, *index)), + ); + events.sort(); + + for (_, kind, index) in events { + if kind == 1 { + if functions[index].visibility.is_none() { + functions[index].visibility = Some(if functions[index].name.contains('.') { + "public".to_string() + } else { + visibility.clone() + }); + } + } else { + let call = &calls[index]; + if call.arguments.is_empty() { + visibility = call.message.clone(); + } else { + for argument in &call.arguments { + let name = ruby_visibility_arg_name(argument); + for function_index in function_indices.iter().rev() { + if functions[*function_index].name == name { + functions[*function_index].visibility = Some(call.message.clone()); + break; + } + } + } + } + } + } + } +} + +fn ruby_visibility_call(call: &CallSite) -> bool { + call.function == "(top-level)" + && call.receiver == "self" + && matches!(call.message.as_str(), "public" | "protected" | "private") +} + +fn ruby_visibility_identifier_call_target<'tree>( + node: Node<'tree>, + source: &str, +) -> Option> { + let message = node_text(node, source); + if !matches!(message, "private" | "protected" | "public") { + return None; + } + let parent = node.parent()?; + if matches!( + parent.kind(), + "call" | "argument_list" | "method_parameters" | "block_parameters" | "assignment" + ) { + return None; + } + let mut target = CallTarget::new("self".to_string(), message.to_string(), Vec::new()); + target.source_node = Some(node); + Some(target) +} + +fn ruby_visibility_arg_name(argument: &str) -> String { + argument + .trim() + .trim_start_matches(':') + .trim_start_matches('"') + .trim_end_matches('"') + .trim_start_matches('\'') + .trim_end_matches('\'') + .to_string() +} + +fn ruby_bare_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + if !ruby_bare_call_identifier(node, source) { + return None; + } + let parent = node.parent(); + let source_node = if parent + .map(|parent| parent.kind() == "call") + .unwrap_or(false) + || node + .next_sibling() + .map(|sibling| sibling.kind() == "argument_list") + .unwrap_or(false) + { + parent.unwrap_or(node) + } else { + node + }; + let mut target = CallTarget::new( + "self".to_string(), + node_text(node, source).to_string(), + ruby_argument_texts(source_node, source), + ); + target.source_node = Some(source_node); + target.safe_navigation = ruby_safe_navigation_call(source_node, source); + Some(target) +} + +fn ruby_bare_body_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + let stripped = node_text(node, source).trim_start(); + if matches!(first_child_kind(node), Some("def" | "class" | "module")) + || stripped.starts_with("def ") + || stripped.starts_with("class ") + || stripped.starts_with("module ") + { + return None; + } + if let Some(explicit) = ruby_explicit_receiver_body_call_target(node, source) { + return Some(explicit); + } + + let message = node_text(node, source).trim().to_string(); + if !ruby_simple_call_text(&message) + || matches!(message.as_str(), "true" | "false" | "nil" | "self") + { + return None; + } + Some(CallTarget::new("self".to_string(), message, Vec::new())) +} + +fn ruby_explicit_receiver_body_call_target<'tree>( + node: Node<'tree>, + source: &str, +) -> Option> { + let children = named_children(node); + let receiver = *children.first()?; + let message = *children.get(1)?; + if !matches!(receiver.kind(), "self" | "constant" | "identifier") { + return None; + } + if !matches!(message.kind(), "identifier" | "constant") { + return None; + } + let mut target = CallTarget::new( + normalize_text(node_text(receiver, source)), + node_text(message, source).to_string(), + Vec::new(), + ); + let receiver_span = span(receiver); + let message_span = span(message); + target.span = Some([ + receiver_span[0], + receiver_span[1], + message_span[2], + message_span[3], + ]); + Some(target) +} + +fn ruby_proc_call_target<'tree>(node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "call" { + return None; + } + let mut cursor = node.walk(); + if !node + .children(&mut cursor) + .any(|child| !child.is_named() && node_text(child, source) == ".") + { + return None; + } + if node.child_by_field_name("method").is_some() { + return None; + } + + let receiver = node + .child_by_field_name("receiver") + .or_else(|| named_children(node).into_iter().next())?; + let args = node.child_by_field_name("arguments").or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + })?; + let mut target = CallTarget::new( + normalize_text(node_text(receiver, source)), + "call".to_string(), + ruby_argument_texts(node, source), + ); + target.source_node = Some(node); + target.safe_navigation = ruby_safe_navigation_call(node, source); + target.block = named_children(args) + .into_iter() + .any(|child| matches!(child.kind(), "do_block" | "block")); + Some(target) +} + +fn ruby_argument_texts(node: Node<'_>, source: &str) -> Vec { + let args = node.child_by_field_name("arguments").or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + }); + let Some(args) = args else { + return Vec::new(); + }; + if let Some(arguments) = ruby_inline_def_argument_texts(args, source) { + return arguments; + } + if let Some(arguments) = ruby_single_command_call_argument_texts(args, source) { + return arguments; + } + let values = named_children(args) + .into_iter() + .map(|child| ruby_argument_text(child, args, source)) + .collect::>(); + if !values.is_empty() { + return values; + } + + let mut text = node_text(args, source).trim().to_string(); + if text.starts_with('(') && text.ends_with(')') && text.len() >= 2 { + text = text[1..text.len() - 1].to_string(); + } + text.split(',') + .map(normalize_text) + .filter(|arg| !arg.is_empty()) + .collect() +} + +fn ruby_argument_text(node: Node<'_>, _args: Node<'_>, source: &str) -> String { + normalize_text(node_text(node, source)) +} + +fn ruby_single_command_call_argument_texts(args: Node<'_>, source: &str) -> Option> { + if node_text(args, source).trim_start().starts_with('(') { + return None; + } + let children = named_children(args); + if children.len() != 1 || children[0].kind() != "call" { + return None; + } + let values = named_children(children[0]) + .into_iter() + .map(|part| normalize_text(node_text(part, source))) + .filter(|part| !part.is_empty()) + .collect::>(); + (!values.is_empty()).then_some(values) +} + +fn ruby_require_argument_texts(node: Node<'_>, source: &str) -> Vec { + let args = node.child_by_field_name("arguments").or_else(|| { + named_children(node) + .into_iter() + .find(|child| child.kind() == "argument_list") + }); + let Some(args) = args else { + return Vec::new(); + }; + let children = named_children(args); + if children.len() == 1 { + let child = children[0]; + if child.kind() == "string" && !node_text(args, source).trim_start().starts_with('(') { + return vec![ruby_unquoted_string_text(child, source)]; + } + if child.kind() == "call" && !node_text(args, source).trim_start().starts_with('(') { + return named_children(child) + .into_iter() + .map(|part| normalize_text(node_text(part, source))) + .filter(|part| !part.is_empty()) + .collect(); + } + } + children + .into_iter() + .map(|child| normalize_text(node_text(child, source))) + .filter(|part| !part.is_empty()) + .collect() +} + +fn ruby_unquoted_string_text(node: Node<'_>, source: &str) -> String { + if let Some(content) = named_children(node) + .into_iter() + .find(|child| child.kind() == "string_content") + { + return normalize_text(node_text(content, source)); + } + let text = normalize_text(node_text(node, source)); + if text.len() >= 2 + && ((text.starts_with('"') && text.ends_with('"')) + || (text.starts_with('\'') && text.ends_with('\''))) + { + return text[1..text.len() - 1].to_string(); + } + text +} + +fn ruby_inline_def_argument_texts(args: Node<'_>, source: &str) -> Option> { + let children = named_children(args); + if children.len() != 1 || first_child_kind(children[0]) != Some("def") { + return None; + } + let method = children[0]; + let name = method + .child_by_field_name("name") + .or_else(|| { + named_children(method) + .into_iter() + .find(|child| matches!(child.kind(), "identifier" | "field_identifier")) + }) + .map(|node| normalize_text(node_text(node, source)))?; + let params = named_children(method) + .into_iter() + .find(|child| child.kind() == "method_parameters") + .map(|node| normalize_text(node_text(node, source))); + let body = named_children(method) + .into_iter() + .find(|child| child.kind() == "body_statement") + .map(|node| normalize_text(node_text(node, source))); + let mut out = vec![name]; + if let Some(params) = params.filter(|value| !value.is_empty()) { + out.push(params); + } + if let Some(body) = body.filter(|value| !value.is_empty()) { + out.push(body); + } + Some(out) +} + +fn ruby_structural_semantic_effect_sites( + root: Node<'_>, + source: &str, + file: &Path, + functions: &[FunctionDef], + state_reads: &[StateRead], + state_writes: &[StateWrite], +) -> Vec { + let file_name = file.to_string_lossy().to_string(); + let mut out = Vec::new(); + out.extend(ruby_global_context_effects(source, state_reads)); + out.extend(ruby_state_mutation_effects(state_writes)); + out.extend(ruby_method_hook_effects(functions)); + ruby_collect_structural_effect_nodes(root, source, &file_name, functions, &mut out); + out +} + +fn ruby_global_context_effects(source: &str, state_reads: &[StateRead]) -> Vec { + state_reads + .iter() + .filter(|read| read.field.starts_with('$')) + .filter(|read| !ruby_global_assignment_read(source, read)) + .map(|read| SemanticEffectSite { + kind: "context_dependency".to_string(), + detail: read.field.clone(), + file: read.file.clone(), + function: read.function.clone(), + line: read.line, + span: read.span, + }) + .collect() +} + +fn ruby_global_assignment_read(source: &str, read: &StateRead) -> bool { + let line_text = source + .lines() + .nth(read.line.saturating_sub(1)) + .unwrap_or(""); + line_text + .chars() + .skip(read.span[3]) + .collect::() + .trim_start() + .starts_with('=') +} + +fn ruby_state_mutation_effects(state_writes: &[StateWrite]) -> Vec { + state_writes + .iter() + .filter(|write| write.receiver != "self") + .filter(|write| !write.field.starts_with('@') && !write.field.starts_with('$')) + .map(|write| SemanticEffectSite { + kind: "hidden_mutation".to_string(), + detail: format!("{}=", write.field), + file: write.file.clone(), + function: write.function.clone(), + line: write.line, + span: write.span, + }) + .collect() +} + +fn ruby_method_hook_effects(functions: &[FunctionDef]) -> Vec { + functions + .iter() + .filter_map(|function| { + let name = function + .name + .split('.') + .last() + .unwrap_or(function.name.as_str()); + matches!(name, "method_missing" | "respond_to_missing?").then(|| SemanticEffectSite { + kind: "metaprogramming".to_string(), + detail: format!("def {name}"), + file: function.file.clone(), + function: function.name.clone(), + line: function.line, + span: function.span, + }) + }) + .collect() +} + +fn ruby_collect_structural_effect_nodes( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], + out: &mut Vec, +) { + out.extend(ruby_structural_effect_for_node( + node, source, file, functions, + )); + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + ruby_collect_structural_effect_nodes(child, source, file, functions, out); + } +} + +fn ruby_structural_effect_for_node( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + match node.kind() { + "yield" => vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "dynamic_dispatch", + "yield", + )], + "subshell" => vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_io", + "backtick", + )], + "singleton_class" => ruby_singleton_class_effect(node, source, file, functions), + "element_reference" => ruby_element_reference_effect(node, source, file, functions), + "assignment" => ruby_assignment_effects(node, source, file, functions), + "operator_assignment" => ruby_operator_assignment_effect(node, source, file, functions), + "binary" => ruby_binary_effect(node, source, file, functions), + _ => Vec::new(), + } +} + +fn ruby_singleton_class_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let Some(receiver) = named_children(node).into_iter().next() else { + return Vec::new(); + }; + if node_text(receiver, source) == "self" { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "metaprogramming", + &format!("class << {}", normalize_text(node_text(receiver, source))), + )] +} + +fn ruby_element_reference_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let Some(receiver) = named_children(node).into_iter().next() else { + return Vec::new(); + }; + if node_text(receiver, source) != "ENV" { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "context_dependency", + "ENV", + )] +} + +fn ruby_assignment_effects( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let lhs = node + .child_by_field_name("left") + .or_else(|| named_children(node).into_iter().next()); + let Some(lhs) = lhs else { + return Vec::new(); + }; + let mut out = Vec::new(); + if lhs.kind() == "global_variable" { + out.push(ruby_semantic_effect_site( + node, + source, + file, + functions, + "context_dependency", + node_text(lhs, source), + )); + } + if lhs.kind() == "element_reference" { + let receiver = named_children(lhs).into_iter().next(); + if receiver + .map(|receiver| node_text(receiver, source) != "ENV") + .unwrap_or(true) + { + out.push(ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_mutation", + "[]=", + )); + } + } + out +} + +fn ruby_operator_assignment_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + let lhs = node + .child_by_field_name("left") + .or_else(|| named_children(node).into_iter().next()); + if ruby_local_operator_assignment_lhs(lhs) { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_mutation", + "op-assign", + )] +} + +fn ruby_local_operator_assignment_lhs(lhs: Option>) -> bool { + let Some(lhs) = lhs else { + return true; + }; + matches!( + lhs.kind(), + "identifier" | "instance_variable" | "global_variable" + ) +} + +fn ruby_binary_effect( + node: Node<'_>, + source: &str, + file: &str, + functions: &[FunctionDef], +) -> Vec { + if direct_operator(node) != "<<" { + return Vec::new(); + } + vec![ruby_semantic_effect_site( + node, + source, + file, + functions, + "hidden_mutation", + "<<", + )] +} + +fn ruby_semantic_effect_site( + node: Node<'_>, + _source: &str, + file: &str, + functions: &[FunctionDef], + kind: &str, + detail: &str, +) -> SemanticEffectSite { + let site_span = span(node); + SemanticEffectSite { + kind: kind.to_string(), + detail: detail.to_string(), + file: file.to_string(), + function: ruby_effect_function(functions, site_span), + line: site_span[0], + span: site_span, + } +} + +fn ruby_effect_function(functions: &[FunctionDef], site_span: [usize; 4]) -> String { + functions + .iter() + .filter(|function| span_contains(function.span, site_span)) + .min_by_key(|function| span_width(function.span)) + .map(|function| function.name.clone()) + .unwrap_or_else(|| "(top-level)".to_string()) +} + +fn span_contains(outer: [usize; 4], inner: [usize; 4]) -> bool { + (outer[0] < inner[0] || (outer[0] == inner[0] && outer[1] <= inner[1])) + && (outer[2] > inner[2] || (outer[2] == inner[2] && outer[3] >= inner[3])) +} + +fn span_width(span: [usize; 4]) -> usize { + span[2].saturating_sub(span[0]) * 10_000 + span[3].saturating_sub(span[1]) +} + +fn ruby_safe_navigation_call(node: Node<'_>, source: &str) -> bool { + let mut cursor = node.walk(); + let found = node + .children(&mut cursor) + .any(|child| !child.is_named() && node_text(child, source) == "&."); + found +} + +fn ruby_simple_call_text(text: &str) -> bool { + Regex::new(r"^[a-z_]\w*[!?=]?$") + .unwrap() + .is_match(text.trim()) +} + +fn ruby_bare_call_identifier(node: Node<'_>, source: &str) -> bool { + if ruby_embedded_text_node(node) { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + if ruby_declaration_name(node, parent, source) { + return false; + } + if matches!( + parent.kind(), + "method_parameters" | "block_parameters" | "argument_list" | "assignment" + ) { + return false; + } + if parent.kind() == "call" { + if ruby_command_argument_call(parent, source) { + return false; + } + if parent.child_by_field_name("receiver").is_some() { + return false; + } + let first = named_children(parent).into_iter().next(); + return first == Some(node) + && node + .next_sibling() + .map(|sibling| sibling.kind() == "argument_list") + .unwrap_or(false); + } + if next_sibling_raw_text(node).as_deref() == Some("=") + || previous_sibling_raw_text(node).as_deref() == Some("=") + || next_sibling_raw_text(node).as_deref() == Some(".") + || previous_sibling_raw_text(node).as_deref() == Some(".") + { + return false; + } + + matches!( + parent.kind(), + "body_statement" + | "then" + | "else" + | "elsif" + | "ensure" + | "rescue" + | "if_modifier" + | "unless_modifier" + ) || node + .next_sibling() + .map(|sibling| sibling.kind() == "argument_list") + .unwrap_or(false) +} + +#[derive(Clone)] +struct RubyProtocolPath { + calls: Vec, + terminal: bool, +} + +fn ruby_protocol_method_access(function_def: &FunctionDef) -> (Vec, Vec) { + let statements = ruby_raw_function_body_statements(&function_def.body); + let local_names = ruby_protocol_local_names(function_def, &statements); + let mut reads = BTreeSet::new(); + let mut writes = BTreeSet::new(); + ruby_protocol_collect_state_access( + &function_def.body, + None, + &local_names, + &mut reads, + &mut writes, + true, + ); + (reads.into_iter().collect(), writes.into_iter().collect()) +} + +fn ruby_protocol_local_names( + function_def: &FunctionDef, + statements: &[&RawNode], +) -> BTreeSet { + let mut local_names = BTreeSet::new(); + local_names.extend(function_def.params.iter().cloned()); + for statement in statements { + ruby_protocol_collect_local_names(statement, &mut local_names, true); + } + local_names +} + +fn ruby_protocol_collect_state_access( + node: &RawNode, + parent: Option<&RawNode>, + local_names: &BTreeSet, + reads: &mut BTreeSet, + writes: &mut BTreeSet, + root: bool, +) { + if !root && ruby_protocol_nested_boundary(node) { + return; + } + + if ruby_raw_flat_assignment_statement(node) { + let children = raw_named_children(node); + if let Some(lhs) = children.first() { + ruby_protocol_record_write(lhs, writes, local_names); + } + if let Some(rhs) = children.get(1) { + ruby_protocol_collect_state_access(rhs, Some(node), local_names, reads, writes, false); + } + return; + } + + match node.kind.as_str() { + "assignment" => { + let children = raw_named_children(node); + if let Some(lhs) = children.first() { + ruby_protocol_record_write(lhs, writes, local_names); + } + if let Some(rhs) = children.get(1) { + ruby_protocol_collect_state_access( + rhs, + Some(node), + local_names, + reads, + writes, + false, + ); + } + return; + } + "operator_assignment" => { + let children = raw_named_children(node); + if let Some(lhs) = children.first() { + if let Some(state) = ruby_protocol_state_target(lhs, local_names) { + reads.insert(state.clone()); + writes.insert(state); + } + } + if let Some(rhs) = children.get(1) { + ruby_protocol_collect_state_access( + rhs, + Some(node), + local_names, + reads, + writes, + false, + ); + } + return; + } + "instance_variable" => { + reads.insert(normalize_protocol_state(&node.text)); + } + "call" => ruby_protocol_collect_call_state(node, local_names, reads, writes), + "identifier" => { + if ruby_protocol_bare_reader(node, parent, local_names) { + reads.insert(normalize_protocol_state(&node.text)); + } + } + _ => {} + } + + for child in &node.children { + ruby_protocol_collect_state_access(child, Some(node), local_names, reads, writes, false); + } +} + +fn ruby_protocol_collect_call_state( + node: &RawNode, + local_names: &BTreeSet, + reads: &mut BTreeSet, + writes: &mut BTreeSet, +) { + let Some(target) = ruby_raw_call_target(node) else { + return; + }; + if target.receiver == "self" + && target.arguments.is_empty() + && !ruby_protocol_mutating_mid(&target.message) + && !RUBY_PROTOCOL_IGNORED_MIDS.contains(&target.message.as_str()) + { + reads.insert(normalize_protocol_state(&target.message)); + } + if ruby_protocol_mutating_mid(&target.message) { + if let Some(token) = ruby_protocol_receiver_state_token(&target.receiver, local_names) { + writes.insert(token); + } + } +} + +fn ruby_protocol_record_write( + lhs: &RawNode, + writes: &mut BTreeSet, + local_names: &BTreeSet, +) { + if let Some(state) = ruby_protocol_state_target(lhs, local_names) { + writes.insert(state); + } +} + +fn ruby_protocol_state_target(node: &RawNode, local_names: &BTreeSet) -> Option { + match node.kind.as_str() { + "instance_variable" => Some(normalize_protocol_state(&node.text)), + "element_reference" => raw_named_children(node) + .first() + .and_then(|receiver| ruby_protocol_receiver_state_token(&receiver.text, local_names)), + "call" => { + let target = ruby_raw_call_target(node)?; + let receiver = ruby_protocol_receiver_state_token(&target.receiver, local_names)?; + let field = normalize_protocol_state(&target.message); + if receiver == "self" { + Some(field) + } else { + Some(format!("{receiver}.{field}")) + } + } + _ => None, + } +} + +fn ruby_protocol_receiver_state_token( + receiver: &str, + local_names: &BTreeSet, +) -> Option { + let text = receiver.trim(); + if text.is_empty() { + return None; + } + if text == "self" { + return Some("self".to_string()); + } + if text.starts_with('@') { + return Some(normalize_protocol_state(text)); + } + if ruby_simple_call_text(text) { + if local_names.contains(text) { + None + } else { + Some(normalize_protocol_state(text)) + } + } else { + None + } +} + +fn ruby_protocol_paths_for_statements( + statements: &[&RawNode], + local_names: &BTreeSet, +) -> Vec { + let mut paths = vec![ruby_protocol_empty_path()]; + for statement in statements { + let statement_paths = ruby_protocol_paths_for(statement, local_names); + paths = ruby_protocol_combine_path_lists(&paths, &statement_paths); + } + paths +} + +fn ruby_protocol_paths_for( + node: &RawNode, + local_names: &BTreeSet, +) -> Vec { + if ruby_protocol_nested_boundary(node) { + return vec![ruby_protocol_empty_path()]; + } + if ruby_raw_if_node(node) { + return ruby_protocol_branch_paths(node, local_names); + } + if ruby_raw_case_node(node) { + return ruby_protocol_case_paths(node, local_names); + } + + let children = ruby_protocol_child_nodes(node); + let child_paths = children + .iter() + .fold(vec![ruby_protocol_empty_path()], |paths, child| { + ruby_protocol_combine_path_lists(&paths, &ruby_protocol_paths_for(child, local_names)) + }); + let Some(mid) = ruby_protocol_internal_call(node, local_names) else { + return ruby_protocol_terminalize(node, child_paths); + }; + let call_path = RubyProtocolPath { + calls: vec![ruby_protocol_raw_call(mid, node)], + terminal: false, + }; + ruby_protocol_terminalize( + node, + ruby_protocol_combine_path_lists(&[call_path], &child_paths), + ) +} + +fn ruby_protocol_terminalize( + node: &RawNode, + paths: Vec, +) -> Vec { + if matches!( + node.kind.as_str(), + "return" | "break" | "next" | "redo" | "retry" + ) { + paths + .into_iter() + .map(|path| RubyProtocolPath { + calls: path.calls, + terminal: true, + }) + .collect() + } else { + paths + } +} + +fn ruby_protocol_branch_paths( + node: &RawNode, + local_names: &BTreeSet, +) -> Vec { + let condition_paths = ruby_raw_path_condition(node) + .map(|condition| ruby_protocol_paths_for(condition, local_names)) + .unwrap_or_else(|| vec![ruby_protocol_empty_path()]); + let then_paths = ruby_protocol_body_paths(ruby_raw_then_body(node), local_names); + let else_paths = ruby_raw_else_body(node) + .map(|body| ruby_protocol_body_paths(Some(body), local_names)) + .unwrap_or_else(|| vec![ruby_protocol_empty_path()]); + let alternatives = then_paths.into_iter().chain(else_paths).collect::>(); + ruby_protocol_combine_path_lists(&condition_paths, &alternatives) +} + +fn ruby_protocol_case_paths( + node: &RawNode, + local_names: &BTreeSet, +) -> Vec { + let subject_paths = raw_named_children(node) + .first() + .filter(|first| !matches!(first.kind.as_str(), "when" | "else")) + .map(|subject| ruby_protocol_paths_for(subject, local_names)) + .unwrap_or_else(|| vec![ruby_protocol_empty_path()]); + let branch_paths = raw_named_children(node) + .into_iter() + .filter(|child| matches!(child.kind.as_str(), "when" | "else")) + .flat_map(|child| ruby_protocol_body_paths(Some(child), local_names)) + .collect::>(); + let alternatives = if branch_paths.is_empty() { + vec![ruby_protocol_empty_path()] + } else { + branch_paths + }; + ruby_protocol_combine_path_lists(&subject_paths, &alternatives) +} + +fn ruby_protocol_body_paths( + node: Option<&RawNode>, + local_names: &BTreeSet, +) -> Vec { + let Some(node) = node else { + return vec![ruby_protocol_empty_path()]; + }; + if matches!( + node.kind.as_str(), + "then" | "else" | "body_statement" | "block" | "block_body" + ) { + return ruby_protocol_paths_for_statements( + &raw_named_children(node) + .into_iter() + .filter(|child| child.kind != "comment") + .collect::>(), + local_names, + ); + } + ruby_protocol_paths_for(node, local_names) +} + +fn ruby_protocol_child_nodes(node: &RawNode) -> Vec<&RawNode> { + if ruby_protocol_nested_boundary(node) { + return Vec::new(); + } + match node.kind.as_str() { + "call" => raw_named_children(node) + .into_iter() + .filter(|child| matches!(child.kind.as_str(), "argument_list" | "block" | "do_block")) + .collect(), + "assignment" | "operator_assignment" => { + raw_named_children(node).into_iter().skip(1).collect() + } + _ => raw_named_children(node) + .into_iter() + .filter(|child| child.kind != "comment") + .collect(), + } +} + +fn ruby_protocol_internal_call(node: &RawNode, local_names: &BTreeSet) -> Option { + let target = if node.kind == "call" { + ruby_raw_call_target(node) + } else if node.kind == "identifier" && ruby_protocol_bare_internal_identifier(node, local_names) + { + Some(RubyRawCallTarget { + receiver: "self".to_string(), + message: node.text.clone(), + arguments: Vec::new(), + }) + } else { + None + }?; + if target.receiver != "self" { + return None; + } + if local_names.contains(&target.message) + || RUBY_PROTOCOL_IGNORED_MIDS.contains(&target.message.as_str()) + { + return None; + } + Some(target.message) +} + +fn ruby_protocol_raw_call(mid: String, node: &RawNode) -> ProtocolCall { + ProtocolCall { + mid, + file: String::new(), + owner: String::new(), + defn: String::new(), + line: node.span[0], + span: node.span, + } +} + +fn ruby_protocol_combine_path_lists( + left_paths: &[RubyProtocolPath], + right_paths: &[RubyProtocolPath], +) -> Vec { + let mut out = Vec::new(); + for left in left_paths { + if left.terminal { + out.push(left.clone()); + continue; + } + for right in right_paths { + let mut calls = left.calls.clone(); + calls.extend(right.calls.clone()); + out.push(RubyProtocolPath { + calls, + terminal: right.terminal, + }); + } + } + out.into_iter().take(64).collect() +} + +fn ruby_protocol_empty_path() -> RubyProtocolPath { + RubyProtocolPath { + calls: Vec::new(), + terminal: false, + } +} + +fn ruby_protocol_mutating_mid(mid: &str) -> bool { + !RUBY_PROTOCOL_NON_MUTATING_OPERATOR_MIDS.contains(&mid) + && (RUBY_PROTOCOL_MUTATING_MIDS.contains(&mid) || mid.ends_with('!')) +} + +fn ruby_protocol_bare_internal_identifier(node: &RawNode, local_names: &BTreeSet) -> bool { + ruby_simple_call_text(&node.text) + && !local_names.contains(&node.text) + && !RUBY_PROTOCOL_IGNORED_MIDS.contains(&node.text.as_str()) +} + +struct RubyRawCallTarget { + receiver: String, + message: String, + arguments: Vec, +} + +fn ruby_raw_call_target(node: &RawNode) -> Option { + if node.kind != "call" { + return None; + } + let receiver = raw_child_by_field(node, "receiver").map(|child| normalize_text(&child.text)); + let method = raw_child_by_field(node, "method") + .map(|child| child.text.clone()) + .or_else(|| { + raw_named_children(node) + .first() + .filter(|child| matches!(child.kind.as_str(), "identifier" | "constant")) + .map(|child| child.text.clone()) + })?; + Some(RubyRawCallTarget { + receiver: receiver.unwrap_or_else(|| "self".to_string()), + message: method, + arguments: ruby_raw_argument_texts(node), + }) +} + +fn ruby_raw_argument_texts(node: &RawNode) -> Vec { + let Some(args) = raw_child_by_field(node, "arguments").or_else(|| { + raw_named_children(node) + .into_iter() + .find(|child| child.kind == "argument_list") + }) else { + return Vec::new(); + }; + let values = raw_named_children(args) + .into_iter() + .map(|child| normalize_text(&child.text)) + .filter(|text| !text.is_empty()) + .collect::>(); + if !values.is_empty() { + return values; + } + let text = args + .text + .trim() + .trim_start_matches('(') + .trim_end_matches(')') + .to_string(); + text.split(',') + .map(normalize_text) + .filter(|item| !item.is_empty()) + .collect() +} + +fn ruby_raw_function_body_statements(node: &RawNode) -> Vec<&RawNode> { + let Some(body) = ruby_raw_method_body_wrapper(node) else { + return Vec::new(); + }; + let named = raw_named_children(body) + .into_iter() + .filter(|child| child.kind != "comment") + .collect::>(); + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if ruby_raw_if_node(body) + || ruby_raw_case_node(body) + || ruby_raw_flat_assignment_statement(body) + { + return vec![body]; + } + if named.is_empty() || ruby_raw_heredoc_body(&named) { + return vec![body]; + } + named +} + +fn ruby_raw_method_body_wrapper(node: &RawNode) -> Option<&RawNode> { + match node.kind.as_str() { + "method" | "singleton_method" | "argument_list" => raw_named_children(node) + .into_iter() + .rev() + .find(|child| child.kind == "body_statement"), + "body_statement" => { + if ruby_raw_hidden_method_definition(node) { + raw_named_children(node) + .into_iter() + .rev() + .find(|child| child.kind == "body_statement") + } else { + Some(node) + } + } + _ => None, + } +} + +fn ruby_raw_heredoc_body(named: &[&RawNode]) -> bool { + named.first().map(|child| child.kind.as_str()) == Some("call") + && named + .iter() + .skip(1) + .all(|child| child.kind == "heredoc_body") +} + +fn ruby_raw_flat_assignment_statement(node: &RawNode) -> bool { + node.kind == "body_statement" + && node + .children + .iter() + .filter(|child| !child.named && child.text == "=") + .count() + == 1 + && raw_named_children(node).len() >= 2 +} + +fn ruby_raw_if_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "if" | "unless" | "if_modifier" | "unless_modifier" + ) || (matches!( + node.kind.as_str(), + "expression_statement" | "block" | "body_statement" + ) && matches!(raw_first_child_kind(node).as_deref(), Some("if" | "unless"))) +} + +fn ruby_raw_case_node(node: &RawNode) -> bool { + node.kind == "case" + || (matches!( + node.kind.as_str(), + "body_statement" | "block_body" | "argument_list" + ) && raw_first_child_kind(node).as_deref() == Some("case")) +} + +fn ruby_raw_path_condition(node: &RawNode) -> Option<&RawNode> { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") + || ruby_raw_hidden_modifier_if(node) + { + raw_named_children(node).into_iter().last() + } else { + raw_named_children(node).into_iter().next() + } +} + +fn ruby_raw_then_body(node: &RawNode) -> Option<&RawNode> { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") + || ruby_raw_hidden_modifier_if(node) + { + raw_named_children(node).into_iter().next() + } else { + raw_named_children(node) + .into_iter() + .find(|child| child.kind == "then") + .or_else(|| raw_named_children(node).into_iter().nth(1)) + } +} + +fn ruby_raw_else_body(node: &RawNode) -> Option<&RawNode> { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") + || ruby_raw_hidden_modifier_if(node) + { + return None; + } + raw_named_children(node) + .into_iter() + .find(|child| matches!(child.kind.as_str(), "else" | "elsif")) + .or_else(|| raw_named_children(node).into_iter().nth(2)) +} + +fn ruby_raw_hidden_modifier_if(node: &RawNode) -> bool { + if node.kind != "body_statement" { + return false; + } + let mut seen_named = false; + node.children.iter().any(|child| { + seen_named |= child.named; + seen_named && !child.named && matches!(child.kind.as_str(), "if" | "unless") + }) +} + +fn ruby_raw_hidden_method_definition(node: &RawNode) -> bool { + node.kind == "body_statement" && matches!(raw_first_child_kind(node).as_deref(), Some("def")) +} + +fn ruby_protocol_collect_local_names( + node: &RawNode, + local_names: &mut BTreeSet, + root: bool, +) { + if !root && ruby_protocol_nested_boundary(node) { + return; + } + if matches!(node.kind.as_str(), "assignment" | "operator_assignment") + || ruby_raw_flat_assignment_statement(node) + { + if let Some(lhs) = raw_named_children(node).first() { + if lhs.kind == "identifier" && ruby_simple_call_text(&lhs.text) { + local_names.insert(lhs.text.clone()); + } + } + } + if matches!(node.kind.as_str(), "block_parameters" | "method_parameters") { + for child in raw_named_children(node) { + if child.kind == "identifier" && ruby_simple_call_text(&child.text) { + local_names.insert(child.text.clone()); + } + } + } + for child in &node.children { + ruby_protocol_collect_local_names(child, local_names, false); + } +} + +fn ruby_protocol_bare_reader( + node: &RawNode, + parent: Option<&RawNode>, + local_names: &BTreeSet, +) -> bool { + let name = node.text.as_str(); + if !ruby_simple_call_text(name) + || local_names.contains(name) + || RUBY_PROTOCOL_IGNORED_MIDS.contains(&name) + { + return false; + } + let Some(parent) = parent else { + return false; + }; + if ruby_protocol_declaration_name(node, parent) { + return false; + } + if matches!( + parent.kind.as_str(), + "call" + | "method_parameters" + | "block_parameters" + | "argument_list" + | "assignment" + | "operator_assignment" + | "pair" + | "hash_key_symbol" + ) { + return false; + } + if matches!( + raw_next_sibling_text(node, parent).as_deref(), + Some("=" | "." | ":") + ) || matches!( + raw_previous_sibling_text(node, parent).as_deref(), + Some("=" | "." | ":") + ) { + return false; + } + true +} + +fn ruby_protocol_declaration_name(node: &RawNode, parent: &RawNode) -> bool { + if matches!( + parent.kind.as_str(), + "method" | "singleton_method" | "class" | "module" + ) { + return true; + } + if parent.kind == "body_statement" { + let stripped = parent.text.trim_start(); + if stripped.starts_with("def ") + || stripped.starts_with("class ") + || stripped.starts_with("module ") + { + return true; + } + } + node.kind == "identifier" && parent.kind == "method_parameters" +} + +fn ruby_protocol_nested_boundary(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "class" | "module" | "method" | "singleton_method" | "lambda" + ) || (node.kind == "body_statement" + && matches!( + raw_first_child_kind(node).as_deref(), + Some("def" | "class" | "module") + )) +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_child_by_field<'a>(node: &'a RawNode, field: &str) -> Option<&'a RawNode> { + node.children + .iter() + .find(|child| child.field_name.as_deref() == Some(field)) +} + +fn raw_first_child_kind(node: &RawNode) -> Option { + node.children.first().map(|child| child.kind.clone()) +} + +fn raw_next_sibling_text(node: &RawNode, parent: &RawNode) -> Option { + let index = raw_child_index(node, parent)?; + parent + .children + .get(index + 1) + .map(|sibling| sibling.text.clone()) +} + +fn raw_previous_sibling_text(node: &RawNode, parent: &RawNode) -> Option { + let index = raw_child_index(node, parent)?; + index + .checked_sub(1) + .and_then(|previous| parent.children.get(previous)) + .map(|sibling| sibling.text.clone()) +} + +fn raw_child_index(node: &RawNode, parent: &RawNode) -> Option { + parent.children.iter().position(|child| { + child.kind == node.kind + && child.text == node.text + && child.span == node.span + && child.named == node.named + }) +} + +fn ruby_declaration_name(node: Node<'_>, parent: Node<'_>, source: &str) -> bool { + if matches!( + parent.kind(), + "method" | "singleton_method" | "class" | "module" + ) { + return true; + } + if parent.kind() == "body_statement" { + let stripped = node_text(parent, source).trim_start(); + if matches!(first_child_kind(parent), Some("def" | "class" | "module")) + || stripped.starts_with("def ") + || stripped.starts_with("class ") + || stripped.starts_with("module ") + { + return true; + } + } + matches!(node.kind(), "identifier" | "constant") && parent.kind() == "method_parameters" +} + +fn ruby_command_argument_call(node: Node<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "argument_list" { + return false; + } + !node_text(parent, source).trim_start().starts_with('(') +} + +fn ruby_single_command_argument_call(node: Node<'_>, source: &str) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "argument_list" || node_text(parent, source).trim_start().starts_with('(') { + return false; + } + let children = named_children(parent); + children.len() == 1 && children[0] == node +} + +fn ruby_require_message(message: &str) -> bool { + matches!(message, "require" | "require_relative") +} + +fn ruby_embedded_text_node(node: Node<'_>) -> bool { + let mut current = Some(node); + while let Some(node) = current { + if matches!( + node.kind(), + "string" + | "string_content" + | "heredoc_body" + | "simple_symbol" + | "symbol" + | "delimited_symbol" + ) { + return true; + } + current = node.parent(); + } + false +} + +fn ruby_narrow_no_arg_call_span( + node: Node<'_>, + source: &str, + receiver: &str, + message: &str, +) -> Option<[usize; 4]> { + if message.is_empty() || message == "[]" || message == "[]=" { + return None; + } + let needle = if receiver == "self" { + message.to_string() + } else { + format!("{receiver}.{message}") + }; + let node_span = span(node); + if let Some(line_text) = source.lines().nth(node_span[0].saturating_sub(1)) { + if let Some(start) = line_text.find(&needle) { + let end = start + needle.chars().count(); + return Some([node_span[0], start, node_span[0], end]); + } + } + let text = node_text(node, source); + let offset = text.find(&needle)?; + if text[..offset].contains('\n') || needle.contains('\n') { + return None; + } + let mut start = node_span[1] + text[..offset].chars().count(); + let end = start + needle.chars().count(); + if start == node_span[1] + && (previous_sibling_raw_text(node).as_deref() == Some("!") + || node + .start_byte() + .checked_sub(1) + .and_then(|index| source.as_bytes().get(index)) + .copied() + == Some(b'!')) + { + start += 1; + } + Some([node_span[0], start, node_span[0], end]) +} + +fn ruby_valid_call_target(target: &CallTarget<'_>) -> bool { + if invalid_call_text(&target.message) { + return false; + } + if matches!(target.message.as_str(), "[]" | "[]=") { + return true; + } + Regex::new(r"^[A-Za-z_]\w*[!?=]?$") + .unwrap() + .is_match(target.message.as_str()) +} + +fn invalid_call_text(text: &str) -> bool { + text.chars() + .any(|ch| matches!(ch, '"' | '\'' | '\n' | '\r')) +} + +fn ruby_state_variable_target(node: Node<'_>, source: &str) -> Option { + if ruby_embedded_text_node(node) { + return None; + } + matches!(node.kind(), "instance_variable" | "global_variable").then(|| Target { + receiver: "self".to_string(), + field: node_text(node, source).to_string(), + }) +} + +fn ruby_chained_element_predicate_target(target: &CallTarget<'_>) -> bool { + ruby_chained_element_predicate(&target.receiver, &target.message) +} + +fn ruby_chained_element_predicate_read_target(target: &Target) -> bool { + ruby_chained_element_predicate(&target.receiver, &target.field) +} + +fn ruby_chained_element_predicate(receiver: &str, message: &str) -> bool { + message.ends_with('?') + && receiver.contains('.') + && (receiver.contains("[:") || receiver.contains("[\"") || receiver.contains("['")) +} + +fn ruby_sorbet_signature_payload_node(node: Node<'_>, source: &str) -> bool { + let mut current = Some(node); + while let Some(candidate) = current { + if candidate.kind() == "block" { + let Some(parent) = candidate.parent() else { + return false; + }; + if parent.kind() == "call" { + let message = parent + .child_by_field_name("method") + .or_else(|| named_children(parent).into_iter().next()) + .map(|method| node_text(method, source).to_string()); + return message.as_deref() == Some("sig"); + } + return false; + } + if matches!( + candidate.kind(), + "method" | "singleton_method" | "class" | "module" + ) { + return false; + } + current = candidate.parent(); + } + false +} + +fn ruby_call_has_block(node: Node<'_>) -> bool { + named_children(node) + .into_iter() + .any(|child| matches!(child.kind(), "do_block" | "block")) +} + +fn ruby_direct_flat_map_block_statement(node: Node<'_>, source: &str) -> bool { + if node.kind() != "call" { + return false; + } + let Some(method) = node.child_by_field_name("method") else { + return false; + }; + if node_text(method, source) != "flat_map" { + return false; + } + let Some(parent) = node.parent() else { + return false; + }; + parent.kind() == "body_statement" + && named_children(parent).first().copied() == Some(node) + && named_children(node) + .iter() + .any(|child| child.kind() == "do_block" || child.kind() == "block") +} + +fn ruby_case_pattern_texts(patterns: &[Node<'_>], source: &str) -> Vec { + if patterns.is_empty() { + return Vec::new(); + } + let texts = patterns + .iter() + .map(|pattern| normalize_text(node_text(*pattern, source))) + .collect::>(); + if !texts.iter().any(|text| text.starts_with('*')) { + return texts; + } + + let mut out = Vec::new(); + let mut pending_plain = Vec::new(); + for (index, text) in texts.iter().enumerate() { + if text.starts_with('*') { + if !pending_plain.is_empty() { + out.push(pending_plain.join(", ")); + pending_plain.clear(); + } + if texts.len() == 1 || index > 0 { + out.push(text.trim_start_matches('*').to_string()); + } else { + out.push(text.clone()); + } + } else { + pending_plain.push(text.clone()); + } + } + if !pending_plain.is_empty() { + out.push(pending_plain.join(", ")); + } + out +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs new file mode 100644 index 000000000..5870978a6 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/rust.rs @@ -0,0 +1,140 @@ +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct RustProfile; + +impl LanguageProfile for RustProfile { + fn language(&self) -> Language { + Language::Rust + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_rust::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_item"] + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node_text(node, source).trim_start().starts_with("pub ") { + Some("public".to_string()) + } else { + Some("private".to_string()) + } + } + + fn impl_owner_node_kinds(&self) -> &[&str] { + &["impl_item"] + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_item"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier", "self_parameter"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "declaration_list"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier", "type_identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["field_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "compound_assignment_expr"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_expression", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &["pattern"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["let_declaration"] + } + + fn receiver_type_node_kinds(&self) -> &[&str] { + &["type_identifier", "generic_type", "scoped_type_identifier"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_expression", "match_expression", "for_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["match_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["match_arm"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["match_pattern", "pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_item", "impl_item", "struct_item"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["match_arm", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression", "tuple_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression", "scoped_identifier"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs new file mode 100644 index 000000000..78ac1a451 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/swift.rs @@ -0,0 +1,270 @@ +use super::super::tree_sitter_adapter::{named_children, CallTarget}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::{node_text, normalize_text}; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct SwiftProfile; + +impl LanguageProfile for SwiftProfile { + fn language(&self) -> Language { + Language::Swift + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_swift::LANGUAGE.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + for child in named_children(node) { + if child.kind() != "modifiers" { + continue; + } + if node_text(child, source) + .split_whitespace() + .any(|token| token == "private") + { + return Some("private".to_string()); + } + } + None + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["function_value_parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["simple_identifier"] + } + + fn inline_parameter_node_kinds(&self) -> &[&str] { + &["parameter"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["function_body", "statements"] + } + + fn nested_statement_wrapper_node_kinds(&self) -> &[&str] { + &["statements"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["simple_identifier", "type_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment"] + } + + fn expression_list_node_kinds(&self) -> &[&str] { + &["directly_assignable_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &["call_expression", "control_transfer_statement"] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "control_structure_body", "function_body"] + } + + fn local_identifier_wrapper_node_kinds(&self) -> &[&str] { + &[ + "directly_assignable_expression", + "value_argument", + "pattern", + ] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["property_declaration", "variable_declaration"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration", "directly_assignable_expression"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &[ + "equality_expression", + "comparison_expression", + "conjunction_expression", + "additive_expression", + "multiplicative_expression", + ] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_statement", "switch_statement"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_entry"] + } + + fn case_pattern_node_kinds(&self) -> &[&str] { + &["switch_pattern", "pattern"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "class_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_entry", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &[ + "conjunction_expression", + "equality_expression", + "comparison_expression", + ] + } + + fn boolean_wrapper_node_kinds(&self) -> &[&str] { + &["statements", "pattern"] + } + + fn navigation_suffix_node_kinds(&self) -> &[&str] { + &["navigation_suffix"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["navigation_expression"] + } + + fn assignment_lhs_node(&self, node: Node<'_>) -> bool { + let candidate = if node + .parent() + .map(|parent| parent.kind() == "directly_assignable_expression") + .unwrap_or(false) + { + node.parent().unwrap() + } else { + node + }; + let Some(parent) = candidate.parent() else { + return false; + }; + if parent.kind() != "assignment" { + return false; + } + named_children(parent) + .into_iter() + .next() + .map(|lhs| same_node(lhs, candidate)) + .unwrap_or(false) + } + + fn state_read_target( + &self, + node: Node<'_>, + source: &str, + ) -> Option { + if self.assignment_lhs_node(node) { + return None; + } + self.default_state_read_target(node, source) + } + + fn call_argument_nodes<'tree>(&self, node: Node<'tree>) -> Vec> { + let Some(args) = named_children(node) + .into_iter() + .find(|child| matches!(child.kind(), "call_suffix" | "value_arguments")) + else { + return Vec::new(); + }; + let value_arguments = if args.kind() == "call_suffix" { + named_children(args) + .into_iter() + .find(|child| child.kind() == "value_arguments") + } else { + Some(args) + }; + value_arguments + .map(|arguments| { + named_children(arguments) + .into_iter() + .filter(|child| child.kind() == "value_argument") + .collect() + }) + .unwrap_or_default() + } + + fn call_target<'tree>(&self, node: Node<'tree>, source: &str) -> Option> { + if node.kind() != "call_expression" { + return None; + } + let mut target = self.default_call_target(node, source)?; + if swift_single_line_switch_call(node) { + target.source_node = named_children(node).into_iter().next(); + } + Some(target) + } + + fn call_argument_texts(&self, node: Node<'_>, source: &str) -> Vec { + self.call_argument_nodes(node) + .into_iter() + .filter_map(|argument| { + let text = normalize_text(node_text(argument, source)); + let value = text + .strip_prefix('(') + .and_then(|inner| inner.strip_suffix(')')) + .unwrap_or(&text) + .trim() + .to_string(); + (!value.is_empty()).then_some(value) + }) + .collect() + } +} + +fn same_node(left: Node<'_>, right: Node<'_>) -> bool { + left.kind() == right.kind() + && left.start_byte() == right.start_byte() + && left.end_byte() == right.end_byte() +} + +fn swift_single_line_switch_call(node: Node<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "statements" || parent.start_position().row != parent.end_position().row { + return false; + } + parent + .parent() + .map(|ancestor| ancestor.kind() == "switch_entry") + .unwrap_or(false) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs new file mode 100644 index 000000000..6c3d50df4 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/typescript.rs @@ -0,0 +1,147 @@ +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct TypeScriptProfile; + +impl LanguageProfile for TypeScriptProfile { + fn language(&self) -> Language { + Language::TypeScript + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into() + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + let name = self.function_name(node, source).unwrap_or_default(); + if name.starts_with('#') { + return Some("private".to_string()); + } + for child in super::super::tree_sitter_adapter::named_children(node) { + if !matches!(child.kind(), "accessibility_modifier" | "modifier") { + continue; + } + let text = node_text(child, source); + if text.split_whitespace().any(|token| token == "private") { + return Some("private".to_string()); + } + if text.split_whitespace().any(|token| token == "protected") { + return Some("protected".to_string()); + } + if text.split_whitespace().any(|token| token == "public") { + return Some("public".to_string()); + } + } + Some("public".to_string()) + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration", "method_definition"] + } + + fn class_owner_node_kinds(&self) -> &[&str] { + &["class_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["formal_parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn field_identifier_node_kinds(&self) -> &[&str] { + &["property_identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "augmented_assignment_expression"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%=", "&&=", "||="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_statement", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["statement_block"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["lexical_declaration", "variable_declarator"] + } + + fn variable_declaration_node_kinds(&self) -> &[&str] { + &["variable_declarator"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "for_in_statement", "switch_statement"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_statement"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &[ + "function_declaration", + "method_definition", + "class_declaration", + ] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["&&", "and"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn parenthesized_wrapper_node_kinds(&self) -> &[&str] { + &["parenthesized_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["member_expression"] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs new file mode 100644 index 000000000..cfd1ebce5 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/adapters/zig.rs @@ -0,0 +1,167 @@ +use super::super::tree_sitter_adapter::{named_children, AssignmentTarget, Target}; +use super::super::Language; +use super::base::LanguageProfile; +use crate::decomplex::ast::node_text; +use tree_sitter::{Language as TreeSitterLanguage, Node}; + +pub(crate) struct ZigProfile; + +impl LanguageProfile for ZigProfile { + fn language(&self) -> Language { + Language::Zig + } + + fn grammar(&self) -> TreeSitterLanguage { + tree_sitter_zig::LANGUAGE.into() + } + + fn function_node_kinds(&self) -> &[&str] { + &["function_declaration"] + } + + fn function_visibility(&self, node: Node<'_>, source: &str) -> Option { + if node_text(node, source).trim_start().starts_with("pub ") { + Some("public".to_string()) + } else { + Some("private".to_string()) + } + } + + fn owner_name_from_declaration(&self, node: Node<'_>, source: &str) -> Option { + if node.kind() == "struct_declaration" { + return node + .parent() + .filter(|parent| parent.kind() == "variable_declaration") + .and_then(|parent| { + named_children(parent) + .into_iter() + .find(|child| child.kind() == "identifier") + }) + .map(|name| node_text(name, source).to_string()); + } + self.default_owner_name_from_declaration(node, source) + } + + fn struct_owner_node_kinds(&self) -> &[&str] { + &["struct_declaration"] + } + + fn parameter_list_node_kinds(&self) -> &[&str] { + &["parameters"] + } + + fn parameter_identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn function_body_node_kinds(&self) -> &[&str] { + &["block", "block_expression"] + } + + fn call_node_kinds(&self) -> &[&str] { + &["call_expression"] + } + + fn identifier_node_kinds(&self) -> &[&str] { + &["identifier"] + } + + fn assignment_node_kinds(&self) -> &[&str] { + &["assignment_expression", "variable_declaration"] + } + + fn assignment_operator_tokens(&self) -> &[&str] { + &["=", "+=", "-=", "*=", "/=", "%="] + } + + fn path_action_node_kinds(&self) -> &[&str] { + &[ + "call_expression", + "expression_statement", + "return_expression", + ] + } + + fn simple_action_wrapper_node_kinds(&self) -> &[&str] { + &["block"] + } + + fn local_declaration_node_kinds(&self) -> &[&str] { + &["variable_declaration"] + } + + fn comparison_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn branch_node_kinds(&self) -> &[&str] { + &["if_statement", "switch_expression"] + } + + fn case_node_kinds(&self) -> &[&str] { + &["switch_expression"] + } + + fn case_arm_node_kinds(&self) -> &[&str] { + &["switch_case"] + } + + fn case_container_stop_node_kinds(&self) -> &[&str] { + &["function_declaration", "struct_declaration"] + } + + fn case_subject_skip_node_kinds(&self) -> &[&str] { + &["switch_case", "else", "comment"] + } + + fn default_case_patterns(&self) -> &[&str] { + &["_", "default", "else"] + } + + fn boolean_and_operators(&self) -> &[&str] { + &["and", "&&"] + } + + fn boolean_container_node_kinds(&self) -> &[&str] { + &["binary_expression"] + } + + fn field_like_node_kinds(&self) -> &[&str] { + &["field_expression"] + } + + fn state_target(&self, lhs: Node<'_>, source: &str) -> Option { + zig_literal_field_target(lhs, source).or_else(|| self.default_state_target(lhs, source)) + } + + fn state_read_target(&self, node: Node<'_>, source: &str) -> Option { + zig_literal_field_target(node, source) + .or_else(|| self.default_state_read_target(node, source)) + } + + fn state_write_source_node<'tree>( + &self, + node: Node<'tree>, + assignment: &AssignmentTarget<'tree>, + ) -> Node<'tree> { + let mut cursor = node.walk(); + if node.children(&mut cursor).any(|child| child.kind() == "+=") { + assignment.lhs + } else { + assignment.source + } + } +} + +fn zig_literal_field_target(node: Node<'_>, source: &str) -> Option { + if node.kind() != "field_expression" || !node_text(node, source).trim_start().starts_with('.') { + return None; + } + let field = named_children(node) + .into_iter() + .find(|child| child.kind() == "identifier")?; + Some(Target { + receiver: ".literal".to_string(), + field: node_text(field, source).to_string(), + }) +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/complexity.rs b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs new file mode 100644 index 000000000..c83f4aad1 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/complexity.rs @@ -0,0 +1,347 @@ +use super::{FunctionDef, LocalComplexityScore}; +use crate::decomplex::ast::RawNode; +use std::collections::BTreeMap; +use std::path::Path; + +pub(crate) fn local_complexity_scores( + file: &str, + functions: &[FunctionDef], +) -> BTreeMap { + functions + .iter() + .map(|function| { + let owner = local_method_owner(file, &function.owner); + let id = format!("{}#{}", owner, function.name); + (id, LocalComplexityScorer::new().score(&function.body)) + }) + .collect() +} + +struct LocalComplexityScorer; + +impl LocalComplexityScorer { + fn new() -> Self { + Self + } + + fn score(&self, method_node: &RawNode) -> LocalComplexityScore { + let mut signals = BTreeMap::new(); + LocalComplexityScore { + score: self.round(self.score_node(method_node, 0, &mut signals)), + signals, + } + } + + fn score_node( + &self, + node: &RawNode, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + if skip_nested(node) { + return 0.0; + } + + if branch(node) { + *signals.entry("branches".to_string()).or_insert(0) += 1; + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } + return self.branch_cost(nesting) + + self.predicate_cost(condition_node(node), signals) + + self.score_children(node, nesting + 1, signals); + } + + if loop_node(node) { + *signals.entry("loops".to_string()).or_insert(0) += 1; + if nesting > 0 { + *signals.entry("nested".to_string()).or_insert(0) += 1; + } + return self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals); + } + + if case_node(node) { + *signals.entry("cases".to_string()).or_insert(0) += 1; + return 0.5 + self.score_children(node, nesting + 1, signals); + } + + if rescue_node(node) { + *signals.entry("rescues".to_string()).or_insert(0) += 1; + return self.branch_cost(nesting) + self.score_children(node, nesting + 1, signals); + } + + if early_exit(node) { + *signals.entry("early_exits".to_string()).or_insert(0) += 1; + let exit_cost = if nesting > 0 { + 0.5 + (nesting as f64 * 0.25) + } else { + 0.0 + }; + return exit_cost + self.score_children(node, nesting, signals); + } + + if boolean_node(node) { + *signals.entry("boolean_ops".to_string()).or_insert(0) += 1; + return 0.25 + self.score_children(node, nesting, signals); + } + + self.score_children(node, nesting, signals) + } + + fn score_children( + &self, + node: &RawNode, + nesting: usize, + signals: &mut BTreeMap, + ) -> f64 { + compensated_sum(node.children.iter().map(|child| { + if return_fallback_boolean_wrapper(node, child) { + 0.0 + } else if duplicate_ruby_early_exit_token(node, child) { + 0.0 + } else if transparent_single_line_suite_statement(node, child) { + self.score_children(child, nesting, signals) + } else { + self.score_node(child, nesting, signals) + } + })) + } + + fn predicate_cost(&self, node: Option<&RawNode>, signals: &mut BTreeMap) -> f64 { + let Some(node) = node else { return 0.0 }; + let bools = boolean_count(node); + *signals.entry("boolean_ops".to_string()).or_insert(0) += bools; + (bools as f64) * 0.5 + } + + fn branch_cost(&self, nesting: usize) -> f64 { + 1.1 + (nesting as f64) + } + + fn round(&self, value: f64) -> f64 { + (value * 10.0).round() / 10.0 + } +} + +fn local_method_owner(file: &str, owner: &str) -> String { + let file_owner = file_owner(file); + if owner == file_owner { + return "(top-level)".to_string(); + } + owner + .strip_prefix(&format!("{file_owner}::")) + .unwrap_or(owner) + .to_string() +} + +fn file_owner(file: &str) -> String { + Path::new(file) + .file_stem() + .and_then(|stem| stem.to_str()) + .unwrap_or("Object") + .to_string() +} + +fn skip_nested(node: &RawNode) -> bool { + matches!(node.kind.as_str(), "class" | "module" | "lambda") +} + +fn branch(node: &RawNode) -> bool { + (matches!( + node.kind.as_str(), + "if" | "unless" | "if_statement" | "if_expression" | "if_modifier" | "unless_modifier" + ) && !node.named_children().is_empty()) + || hidden_if(node) + || modifier_if(node) +} + +fn hidden_if(node: &RawNode) -> bool { + if node.kind == "expression_statement" && node.text.trim_start().starts_with("if ") { + if node.named_children().iter().any(|child| { + matches!( + child.kind.as_str(), + "if" | "unless" | "if_statement" | "if_expression" + ) + }) { + return false; + } + return true; + } + matches!( + node.kind.as_str(), + "body_statement" | "block" | "statements" | "statement_list" + ) && node + .children + .first() + .map(|child| !child.named && matches!(child.kind.as_str(), "if" | "unless")) + .unwrap_or(false) +} + +fn modifier_if(node: &RawNode) -> bool { + if matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") { + return true; + } + if node.kind != "body_statement" { + return false; + } + let mut seen_named = false; + node.children.iter().any(|child| { + seen_named |= child.named; + seen_named && !child.named && matches!(child.kind.as_str(), "if" | "unless") + }) +} + +fn loop_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "while" + | "until" + | "while_statement" + | "for" + | "for_statement" + | "for_in_statement" + | "do_block" + ) || hidden_loop(node) + || (node.kind == "expression_statement" + && starts_with_any(node.text.trim_start(), &["for", "while", "loop"])) + || (node.kind == "labeled_statement" + && node.text.trim_start().starts_with("for ") + && !has_named_control_child( + node, + &["for_statement", "for_expression", "while_statement"], + )) +} + +fn hidden_loop(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "body_statement" | "block" | "statements" | "statement_list" + ) && node + .children + .first() + .map(|child| !child.named && matches!(child.kind.as_str(), "for" | "while" | "loop")) + .unwrap_or(false) +} + +fn starts_with_any(text: &str, words: &[&str]) -> bool { + words + .iter() + .any(|word| text == *word || text.starts_with(&format!("{word} "))) +} + +fn case_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "case" | "switch_statement" | "switch_expression" | "match_statement" | "match_expression" + ) || (node.kind == "expression_statement" + && node.text.trim_start().starts_with("match ") + && !has_named_control_child( + node, + &[ + "case", + "switch_statement", + "switch_expression", + "match_statement", + "match_expression", + ], + )) +} + +fn has_named_control_child(node: &RawNode, kinds: &[&str]) -> bool { + node.named_children() + .iter() + .any(|child| kinds.contains(&child.kind.as_str())) +} + +fn rescue_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "rescue" | "rescue_modifier" | "rescue_clause" | "rescue_body" + ) +} + +fn early_exit(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "return" + | "break" + | "next" + | "redo" + | "retry" + | "return_statement" + | "break_statement" + | "continue_statement" + ) +} + +fn duplicate_ruby_early_exit_token(parent: &RawNode, child: &RawNode) -> bool { + matches!( + parent.kind.as_str(), + "return" | "break" | "next" | "redo" | "retry" + ) && !child.named + && child.text == parent.kind + && parent.text.trim() == parent.kind +} + +fn transparent_single_line_suite_statement(parent: &RawNode, child: &RawNode) -> bool { + parent.kind == "block" + && parent.children.len() == 1 + && parent.text == child.text + && matches!( + child.kind.as_str(), + "return_statement" | "break_statement" | "continue_statement" + ) +} + +fn return_fallback_boolean_wrapper(parent: &RawNode, child: &RawNode) -> bool { + parent.kind == "return_statement" + && child.kind == "expression_list" + && child + .named_children() + .iter() + .any(|grandchild| boolean_node(grandchild)) +} + +fn boolean_node(node: &RawNode) -> bool { + matches!( + node.kind.as_str(), + "binary" + | "binary_expression" + | "boolean_operator" + | "conjunction_expression" + | "disjunction_expression" + ) && node + .children + .iter() + .any(|child| !child.named && matches!(child.text.as_str(), "&&" | "||" | "and" | "or")) +} + +fn condition_node(node: &RawNode) -> Option<&RawNode> { + if modifier_if(node) { + return node.named_children().last().copied(); + } + if node.kind == "body_statement" { + return node.named_children().first().copied(); + } + node.named_children().first().copied() +} + +fn boolean_count(node: &RawNode) -> usize { + let own = usize::from(boolean_node(node)); + own + node.children.iter().map(boolean_count).sum::() +} + +fn compensated_sum(values: impl IntoIterator) -> f64 { + let mut sum = 0.0f64; + let mut compensation = 0.0f64; + for value in values { + let next = sum + value; + if sum.abs() >= value.abs() { + compensation += (sum - next) + value; + } else { + compensation += (value - next) + sum; + } + sum = next; + } + sum + compensation +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs new file mode 100644 index 000000000..1466e7aef --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/local_flow.rs @@ -0,0 +1,2239 @@ +use crate::decomplex::ast::{self, Child, Node, RawNode, Span}; +use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; +use crate::decomplex::syntax::{Document, FunctionDef, Language}; +use anyhow::Result; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::{Path, PathBuf}; +use std::sync::OnceLock; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct LocalFlowRow { + pub summaries: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct MethodSummary { + pub id: String, + pub owner: String, + pub name: String, + pub file: String, + pub line: usize, + pub span: Span, + #[serde(default = "empty_node", skip_serializing)] + pub node: Node, + #[serde(default, skip_serializing)] + pub raw_node: Option, + pub statements: Vec, + pub boundaries: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Statement { + pub index: usize, + pub line: usize, + pub end_line: usize, + pub span: Span, + pub source: String, + pub reads: BTreeSet, + pub writes: BTreeSet, + pub dependencies: Vec<(String, String)>, + pub co_uses: Vec<(String, String)>, +} + +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +pub struct Boundary { + pub before_index: usize, + pub after_index: usize, + pub line: usize, + pub kind: String, + pub text: String, +} + +const OWNER_TYPES: &[&str] = &["CLASS", "MODULE"]; +const METHOD_TYPES: &[&str] = &["DEFN", "DEFS"]; +const SKIP_NESTED_TYPES: &[&str] = &["CLASS", "MODULE", "DEFN", "DEFS", "LAMBDA"]; +const LOCAL_READ_TYPES: &[&str] = &["LVAR", "DVAR"]; +const LOCAL_WRITE_TYPES: &[&str] = &["LASGN", "DASGN"]; +const STATEMENT_CONTAINER_TYPES: &[&str] = &[ + "BLOCK", + "COMPOUND_STATEMENT", + "DECLARATION_LIST", + "FUNCTION_BODY", + "HASH", + "STATEMENTS", +]; + +fn empty_node() -> Node { + Node { + r#type: "ROOT".to_string(), + children: Vec::new(), + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 0, + text: String::new(), + } +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = super::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let mut out = Vec::new(); + for document in documents { + let normalized = normalized_local_methods(document); + let raw = raw_local_methods(document); + let raw_keys: BTreeSet<_> = raw.iter().map(method_summary_key).collect(); + out.extend(raw); + out.extend( + normalized + .into_iter() + .filter(|summary| !raw_keys.contains(&method_summary_key(summary))), + ); + } + out +} + +pub fn local_contract_assignments(method: &MethodSummary) -> BTreeMap { + let mut map = BTreeMap::new(); + for statement in &method.statements { + if statement.writes.len() != 1 { + continue; + } + let Some(name) = statement.writes.iter().next() else { + continue; + }; + if map.contains_key(name) { + continue; + } + if let Some(source) = local_contract_source(name, &statement.source) { + map.insert(name.clone(), source); + } + } + map +} + +fn local_contract_source(name: &str, source: &str) -> Option { + let pattern = format!( + r"(?s)\b{}\b\s*(?::=|=)\s*(.+?)\s*;?\s*$", + regex::escape(name) + ); + let assignment = Regex::new(&pattern).ok()?; + let rhs = assignment.captures(source)?.get(1)?.as_str().trim(); + static CONDITIONAL_SOURCE: OnceLock = OnceLock::new(); + let conditional = + CONDITIONAL_SOURCE.get_or_init(|| Regex::new(r"\s(?:if|unless|rescue)\s|\?|:").unwrap()); + if conditional.is_match(rhs) { + None + } else { + Some(rhs.to_string()) + } +} + +fn normalized_local_methods(document: &Document) -> Vec { + let mut detector = LocalFlow::new( + document.file.clone(), + document.lines.clone(), + method_metadata(document), + ); + detector.scan(&document.normalized_root) +} + +fn method_summary_key(summary: &MethodSummary) -> (String, String, usize) { + (summary.file.clone(), summary.id.clone(), summary.line) +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct MethodMetadata { + owner: String, + name: String, + params: BTreeSet, +} + +fn raw_local_methods(document: &Document) -> Vec { + let profile = language_profile(document.language); + document + .function_defs + .iter() + .map(|function| raw_method_summary(document, profile, function)) + .collect() +} + +fn raw_method_summary( + document: &Document, + profile: &dyn LanguageProfile, + function: &FunctionDef, +) -> MethodSummary { + let statement_nodes = raw_function_body_statements(&function.body, profile); + let local_names = raw_local_names(function, &statement_nodes, profile); + let statements: Vec<_> = statement_nodes + .iter() + .enumerate() + .map(|(index, statement)| raw_statement_summary(statement, index, &local_names, profile)) + .collect(); + let owner = local_flow_owner(&document.file, &function.owner); + + MethodSummary { + id: format!("{}#{}", owner, function.name), + owner, + name: function.name.clone(), + file: function.file.clone(), + line: function.line, + span: function.span, + node: normalized_node_for_span(&document.normalized_root, function.span) + .cloned() + .unwrap_or_else(|| fallback_node_from_raw(&function.body)), + raw_node: Some(function.body.clone()), + boundaries: raw_structural_boundaries(document, &statements), + statements, + } +} + +fn raw_function_body_statements<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + let body = raw_function_body_node(node, profile); + let Some(body) = body else { + return Vec::new(); + }; + + let mut named = raw_named_children(body) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if named.len() == 1 + && profile + .nested_statement_wrapper_node_kinds() + .contains(&named[0].kind.as_str()) + { + if raw_branch_node(named[0], profile) { + return vec![named[0]]; + } + named = raw_named_children(named[0]) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect(); + } + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if raw_branch_node(body, profile) || raw_assignment_statement(body, profile) || named.is_empty() + { + return vec![body]; + } + named +} + +fn raw_function_body_node<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Option<&'a RawNode> { + raw_named_children(node).into_iter().rev().find(|child| { + profile + .function_body_node_kinds() + .contains(&child.kind.as_str()) + }) +} + +fn raw_local_names( + function: &FunctionDef, + statements: &[&RawNode], + profile: &dyn LanguageProfile, +) -> BTreeSet { + let mut names: BTreeSet = function.params.iter().cloned().collect(); + if let Some(receiver) = raw_function_receiver_name(&function.body, profile) { + names.insert(receiver); + } + for statement in statements { + names.extend(raw_local_writes(statement, profile)); + } + names +} + +fn raw_function_receiver_name(node: &RawNode, profile: &dyn LanguageProfile) -> Option { + if !profile + .method_receiver_node_kinds() + .contains(&node.kind.as_str()) + { + return None; + } + let receiver_params = raw_named_children(node).into_iter().find(|child| { + profile + .parameter_list_node_kinds() + .contains(&child.kind.as_str()) + })?; + let receiver = raw_named_children(receiver_params) + .into_iter() + .find(|child| { + profile + .receiver_parameter_node_kinds() + .contains(&child.kind.as_str()) + })?; + let name = raw_named_children(receiver).into_iter().find(|child| { + profile + .first_argument_receiver_name_node_kinds() + .contains(&child.kind.as_str()) + })?; + raw_local_identifier_text(name, profile) +} + +fn raw_statement_summary( + node: &RawNode, + index: usize, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Statement { + let writes = raw_local_writes(node, profile); + let reads = raw_local_reads(node, local_names, profile); + Statement { + index, + line: node.span[0], + end_line: node.span[2], + span: node.span, + source: profile.normalize_source_text(&node.text), + dependencies: raw_assignment_dependencies(node, local_names, profile), + co_uses: co_use_pairs(&reads), + reads, + writes, + } +} + +fn raw_local_reads( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> BTreeSet { + raw_local_read_list(node, local_names, profile) + .into_iter() + .collect() +} + +fn raw_local_read_list( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Vec { + if raw_nested_local_scope(node, profile) { + return Vec::new(); + } + + let mut reads = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, parent| { + let Some(name) = raw_local_identifier_text(child, profile) else { + return; + }; + if local_names.contains(&name) + && !raw_local_write_node(child, parent, profile) + && !raw_assignment_lhs_read_in_tree(node, child, profile) + && !raw_ruby_unary_assertion_argument(node, child, parent, profile) + && !raw_python_import_name(parent, profile) + && !raw_python_with_alias_read(child, parent, profile) + && !raw_declaration_name_in_tree(node, child, profile) + && !raw_declaration_name(child, parent, profile) + && !raw_member_name(child, parent, profile) + && !raw_call_method_name(child, parent, profile) + && !raw_keyed_element_key(child, parent, profile) + && !reads.contains(&name) + { + reads.push(name); + } + }); + reads +} + +fn raw_local_writes(node: &RawNode, profile: &dyn LanguageProfile) -> BTreeSet { + if raw_nested_local_scope(node, profile) { + return BTreeSet::new(); + } + + let source = profile.normalize_source_text(&node.text); + let textual_writes_allowed = raw_assignment_statement(node, profile) + || profile + .local_declaration_node_kinds() + .contains(&node.kind.as_str()); + let mut writes = if !textual_writes_allowed { + Vec::new() + } else if profile.language() == Language::Python { + python_textual_local_writes(&source) + } else { + textual_local_writes(&source) + }; + if profile.language() == Language::Python { + writes.extend(raw_python_with_alias_names(node, profile)); + } + raw_walk_local(node, None, node, profile, &mut |child, parent| { + if raw_local_write_node(child, parent, profile) + || raw_declaration_name_in_tree(node, child, profile) + || raw_assignment_lhs_write_in_tree(node, child, profile) + { + if let Some(name) = raw_local_identifier_text(child, profile) { + writes.push(name); + } + } + }); + writes + .into_iter() + .filter_map(|name| { + let normalized = profile.normalize_local_identifier_text(&name); + (!normalized.is_empty()).then_some(normalized) + }) + .collect() +} + +fn raw_assignment_dependencies( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Vec<(String, String)> { + if profile.nested_assignment_dependencies_only() { + return raw_nested_assignment_dependencies(node, local_names, profile); + } + + let lhs_names = raw_local_writes(node, profile); + if lhs_names.is_empty() { + return Vec::new(); + } + + let reads = raw_local_reads(node, local_names, profile); + let mut deps = Vec::new(); + for lhs in &lhs_names { + for read in &reads { + if lhs != read && !lhs_names.contains(read) { + deps.push((lhs.clone(), read.clone())); + } + } + } + deps.sort(); + deps.dedup(); + deps +} + +fn raw_nested_assignment_dependencies( + node: &RawNode, + local_names: &BTreeSet, + profile: &dyn LanguageProfile, +) -> Vec<(String, String)> { + let mut deps = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, _parent| { + if !profile + .assignment_node_kinds() + .contains(&child.kind.as_str()) + { + return; + } + let children = raw_named_children(child); + let Some(lhs) = children.first().copied() else { + return; + }; + let Some(rhs) = children.get(1).copied() else { + return; + }; + let Some(lhs_name) = raw_local_identifier_text(lhs, profile) else { + return; + }; + for read in raw_local_reads(rhs, local_names, profile) { + if lhs_name != read { + deps.push((lhs_name.clone(), read)); + } + } + }); + deps.sort(); + deps.dedup(); + deps +} + +fn co_use_pairs(reads: &BTreeSet) -> Vec<(String, String)> { + let reads = reads.iter().cloned().collect::>(); + let mut out = Vec::new(); + for i in 0..reads.len() { + for j in i + 1..reads.len() { + out.push((reads[i].clone(), reads[j].clone())); + } + } + out +} + +fn raw_structural_boundaries(document: &Document, statements: &[Statement]) -> Vec { + let mut out = Vec::new(); + for i in 0..statements.len().saturating_sub(1) { + let left = &statements[i]; + let right = &statements[i + 1]; + if let Some(boundary) = raw_source_boundary(document, left.end_line + 1, right.line - 1) { + out.push(Boundary { + before_index: left.index, + after_index: right.index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text, + }); + } + } + out +} + +fn raw_source_boundary( + document: &Document, + first_line: usize, + last_line: usize, +) -> Option { + if first_line > last_line { + return None; + } + + let mut blank = None; + for line_number in first_line..=last_line { + let stripped = document + .lines + .get(line_number - 1) + .map(|line| line.trim()) + .unwrap_or(""); + if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") { + return Some(RawBoundary { + line: line_number, + kind: "comment".to_string(), + text: stripped.to_string(), + }); + } + if stripped.is_empty() && blank.is_none() { + blank = Some(RawBoundary { + line: line_number, + kind: "blank".to_string(), + text: stripped.to_string(), + }); + } + } + blank +} + +fn raw_walk_local<'a>( + node: &'a RawNode, + parent: Option<&'a RawNode>, + root: &'a RawNode, + profile: &dyn LanguageProfile, + block: &mut dyn FnMut(&'a RawNode, Option<&'a RawNode>), +) { + if !std::ptr::eq(node, root) && raw_nested_local_scope(node, profile) { + return; + } + block(node, parent); + for child in &node.children { + raw_walk_local(child, Some(node), root, profile, block); + } +} + +fn raw_nested_local_scope(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile.function_node_kinds().contains(&node.kind.as_str()) || raw_owner_node(node, profile) +} + +fn raw_owner_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .class_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .module_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .generic_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .impl_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .struct_owner_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_local_identifier_text(node: &RawNode, profile: &dyn LanguageProfile) -> Option { + if profile.language() == Language::Ruby && node.kind != "identifier" { + return None; + } + if profile + .identifier_node_kinds() + .contains(&node.kind.as_str()) + { + let text = profile.normalize_local_identifier_text(&node.text); + return (!text.is_empty()).then_some(text); + } + if profile + .local_identifier_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node.named + && raw_named_children(node).is_empty() + && simple_identifier(&node.text) + { + let text = profile.normalize_local_identifier_text(&node.text); + return (!text.is_empty()).then_some(text); + } + None +} + +fn raw_ruby_unary_assertion_argument( + root: &RawNode, + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + if profile.language() != Language::Ruby { + return false; + } + let _ = parent; + let source = root.text.as_str(); + ["assert_empty", "refute_empty", "assert_nil", "refute_nil"] + .iter() + .any(|name| source.contains(&format!("{name} {}", node.text))) +} + +fn raw_local_write_node( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + if raw_local_identifier_text(node, profile).is_none() || raw_member_name(node, parent, profile) + { + return false; + } + if raw_call_name(node, parent, profile) { + return false; + } + if raw_declaration_name(node, parent, profile) { + return true; + } + let Some(parent) = parent else { + return false; + }; + if profile + .update_statement_node_kinds() + .contains(&parent.kind.as_str()) + && raw_named_children(parent) + .first() + .map(|target| std::ptr::eq(*target, node)) + .unwrap_or(false) + { + return true; + } + if profile + .assignment_node_kinds() + .contains(&parent.kind.as_str()) + { + if let Some(lhs) = raw_named_children(parent).first() { + if raw_indexed_lhs_node(lhs, profile) + && !profile.indexed_lhs_descendants_are_writes() + && raw_contains_node(lhs, node) + { + return false; + } + if raw_contains_node(lhs, node) { + return true; + } + } + } + if profile.language() == Language::Python { + if parent.kind == "keyword_argument" { + return false; + } + if raw_python_loop_target(node, parent) + || raw_python_named_expression_lhs(node, parent) + || raw_python_typed_assignment_lhs(node, parent) + || raw_python_annotation_lhs(node, parent) + { + return true; + } + } + raw_assignment_lhs(node, parent, profile) +} + +fn raw_python_loop_target(node: &RawNode, parent: &RawNode) -> bool { + if raw_previous_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == "for") + .unwrap_or(false) + && raw_next_sibling(node, parent) + .map(|sibling| sibling.text.as_str() != ":") + .unwrap_or(false) + { + return true; + } + + let mut seen_for = false; + let mut current = raw_previous_sibling(node, parent); + while let Some(sibling) = current { + match sibling.text.as_str() { + "in" | ":" => return false, + "for" => { + seen_for = true; + break; + } + _ => current = raw_previous_sibling(sibling, parent), + } + } + if !seen_for { + return false; + } + + current = raw_next_sibling(node, parent); + while let Some(sibling) = current { + match sibling.text.as_str() { + "in" => return true, + ":" => return false, + _ => current = raw_next_sibling(sibling, parent), + } + } + false +} + +fn raw_python_typed_assignment_lhs(node: &RawNode, parent: &RawNode) -> bool { + let Some(colon) = raw_next_sibling(node, parent) else { + return false; + }; + if colon.text != ":" { + return false; + } + let Some(type_node) = raw_next_sibling(colon, parent) else { + return false; + }; + if type_node.kind != "type" { + return false; + } + raw_next_sibling(type_node, parent) + .map(|sibling| sibling.text.as_str() == "=") + .unwrap_or(false) +} + +fn raw_python_named_expression_lhs(node: &RawNode, parent: &RawNode) -> bool { + parent.kind == "named_expression" + && raw_named_children(parent) + .first() + .map(|lhs| std::ptr::eq(*lhs, node)) + .unwrap_or(false) + && raw_next_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == ":=") + .unwrap_or(false) +} + +fn raw_python_annotation_lhs(node: &RawNode, parent: &RawNode) -> bool { + let Some(colon) = raw_next_sibling(node, parent) else { + return false; + }; + if colon.text != ":" { + return false; + } + let Some(type_node) = raw_next_sibling(colon, parent) else { + return false; + }; + if type_node.kind != "type" { + return false; + } + !raw_next_sibling(type_node, parent) + .map(|sibling| sibling.text.as_str() == "=") + .unwrap_or(false) +} + +fn raw_python_with_alias_names(node: &RawNode, profile: &dyn LanguageProfile) -> Vec { + let mut names = Vec::new(); + raw_walk_local(node, None, node, profile, &mut |child, _parent| { + if child.kind == "as_pattern_target" && simple_identifier(&child.text) { + names.push(child.text.clone()); + } + }); + names +} + +fn raw_python_import_name(parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { + profile.language() == Language::Python + && parent + .map(|parent| parent.kind.as_str() == "dotted_name") + .unwrap_or(false) +} + +fn raw_python_with_alias_read( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + profile.language() == Language::Python + && (node.kind == "as_pattern_target" + || parent + .map(|parent| parent.kind.as_str() == "as_pattern_target") + .unwrap_or(false)) +} + +fn python_textual_local_writes(source: &str) -> Vec { + match split_assignment(source) { + Some((_lhs, ":=")) => Vec::new(), + _ => textual_local_writes(source), + } +} + +fn raw_declaration_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + parent + .map(|parent| { + raw_local_declaration_name_nodes(parent, profile) + .into_iter() + .any(|name| std::ptr::eq(name, node) || raw_contains_node(name, node)) + }) + .unwrap_or(false) +} + +fn raw_declaration_name_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + raw_local_declaration_name_nodes(root, profile) + .into_iter() + .any(|name| std::ptr::eq(name, target) || raw_contains_node(name, target)) + || root + .children + .iter() + .any(|child| raw_declaration_name_in_tree(child, target, profile)) +} + +fn raw_local_declaration_name_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + if !profile + .local_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + return Vec::new(); + } + + if profile + .short_variable_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + if let Some(left) = raw_named_children(node).into_iter().find(|child| { + profile + .variable_declaration_node_kinds() + .contains(&child.kind.as_str()) + }) { + let identifiers = raw_named_children(left) + .into_iter() + .filter(|child| raw_local_identifier_text(child, profile).is_some()) + .collect::>(); + if !identifiers.is_empty() { + return identifiers; + } + if simple_identifier(&left.text) { + return vec![left]; + } + } + return Vec::new(); + } + + let variables = raw_variable_declaration_nodes(node, profile); + if !variables.is_empty() { + let names = variables + .into_iter() + .flat_map(|variable| raw_variable_declaration_name_nodes(variable, profile)) + .collect::>(); + if !names.is_empty() { + return names; + } + } + + if let Some(declaration_assignment) = raw_named_children(node).into_iter().find(|child| { + profile + .declaration_assignment_node_kinds() + .contains(&child.kind.as_str()) + }) { + if let Some(lhs) = raw_named_children(declaration_assignment).first().copied() { + return raw_first_identifier(lhs, profile) + .or(Some(lhs)) + .into_iter() + .collect(); + } + } + + raw_named_children(node) + .into_iter() + .find(|child| { + profile + .local_identifier_wrapper_node_kinds() + .contains(&child.kind.as_str()) + }) + .or_else(|| raw_first_identifier(node, profile)) + .into_iter() + .collect() +} + +fn raw_variable_declaration_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + let mut out = Vec::new(); + raw_collect_variable_declaration_nodes(node, profile, &mut out); + out +} + +fn raw_collect_variable_declaration_nodes<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, + out: &mut Vec<&'a RawNode>, +) { + if profile + .variable_declaration_node_kinds() + .contains(&node.kind.as_str()) + { + out.push(node); + return; + } + for child in raw_named_children(node) { + raw_collect_variable_declaration_nodes(child, profile, out); + } +} + +fn raw_variable_declaration_name_nodes<'a>( + variable: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Vec<&'a RawNode> { + if simple_identifier(&variable.text) { + return vec![variable]; + } + + if profile + .multi_name_variable_declaration_node_kinds() + .contains(&variable.kind.as_str()) + { + let names = raw_named_children(variable) + .into_iter() + .take_while(|child| raw_local_identifier_text(child, profile).is_some()) + .collect::>(); + if !names.is_empty() { + return names; + } + } + + raw_first_identifier(variable, profile) + .into_iter() + .collect() +} + +fn raw_first_identifier<'a>( + node: &'a RawNode, + profile: &dyn LanguageProfile, +) -> Option<&'a RawNode> { + if raw_local_identifier_text(node, profile).is_some() { + return Some(node); + } + node.children + .iter() + .find_map(|child| raw_first_identifier(child, profile)) +} + +fn raw_assignment_lhs(node: &RawNode, parent: &RawNode, profile: &dyn LanguageProfile) -> bool { + if raw_previous_sibling(node, parent) + .map(|sibling| sibling.text.as_str() == ":") + .unwrap_or(false) + { + return false; + } + raw_next_sibling(node, parent) + .map(|sibling| { + !sibling.named + && profile + .assignment_operator_tokens() + .contains(&sibling.text.as_str()) + }) + .unwrap_or(false) +} + +fn raw_assignment_lhs_read_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile + .deferred_statement_node_kinds() + .contains(&root.kind.as_str()) + { + return false; + } + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) + || (profile.language() == Language::Ruby && raw_assignment_statement(root, profile)) + { + if let Some(lhs) = raw_named_children(root).first() { + if raw_assignment_lhs_read_target(lhs, target, profile) { + return true; + } + } + } + root.children + .iter() + .any(|child| raw_assignment_lhs_read_in_tree(child, target, profile)) +} + +fn raw_assignment_lhs_write_in_tree( + root: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile + .deferred_statement_node_kinds() + .contains(&root.kind.as_str()) + { + return false; + } + if profile + .assignment_node_kinds() + .contains(&root.kind.as_str()) + || (profile.language() == Language::Ruby && raw_assignment_statement(root, profile)) + { + if let Some(lhs) = raw_named_children(root).first() { + if raw_assignment_lhs_write_target(lhs, target, profile) { + return true; + } + } + } + root.children + .iter() + .any(|child| raw_assignment_lhs_write_in_tree(child, target, profile)) +} + +fn raw_assignment_lhs_read_target( + lhs: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if raw_indexed_lhs_node(lhs, profile) { + return profile.suppress_indexed_lhs_reads() && raw_contains_node(lhs, target); + } + if raw_field_like_node(lhs, profile) { + return profile.suppress_field_receiver_lhs_reads() + && raw_member_receiver_target(lhs, target, profile); + } + if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { + return std::ptr::eq(lhs, target) + || (raw_contains_node(lhs, target) + && raw_local_identifier_text(target, profile) + .map(|target_name| target_name == lhs_name) + .unwrap_or(false)); + } + if profile + .expression_list_node_kinds() + .contains(&lhs.kind.as_str()) + { + if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + return raw_named_children(lhs) + .into_iter() + .any(|child| raw_assignment_lhs_read_target(child, target, profile)); + } + raw_contains_node(lhs, target) +} + +fn raw_assignment_lhs_write_target( + lhs: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + if profile.language() == Language::Ruby && lhs.kind == "element_reference" { + return false; + } + if raw_indexed_lhs_node(lhs, profile) { + return raw_named_children(lhs) + .first() + .map(|object| raw_assignment_lhs_write_target(object, target, profile)) + .unwrap_or(false); + } + if raw_field_like_node(lhs, profile) { + return raw_member_receiver_target(lhs, target, profile); + } + if let Some(lhs_name) = raw_local_identifier_text(lhs, profile) { + return std::ptr::eq(lhs, target) + || (raw_contains_node(lhs, target) + && raw_local_identifier_text(target, profile) + .map(|target_name| target_name == lhs_name) + .unwrap_or(false)); + } + if profile + .expression_list_node_kinds() + .contains(&lhs.kind.as_str()) + { + if raw_named_children(lhs).is_empty() && raw_local_identifier_text(lhs, profile).is_some() { + return std::ptr::eq(lhs, target); + } + return raw_named_children(lhs) + .into_iter() + .any(|child| raw_assignment_lhs_write_target(child, target, profile)); + } + raw_contains_node(lhs, target) +} + +fn raw_indexed_lhs_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .indexed_lhs_node_kinds() + .contains(&node.kind.as_str()) + || (profile + .indexed_lhs_bracket_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node + .children + .iter() + .any(|child| !child.named && child.text == "[")) +} + +fn raw_field_like_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .field_like_node_kinds() + .contains(&node.kind.as_str()) + || (profile + .field_like_dot_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && node + .children + .iter() + .any(|child| !child.named && child.text == ".")) +} + +fn raw_member_receiver_target( + node: &RawNode, + target: &RawNode, + profile: &dyn LanguageProfile, +) -> bool { + let Some(receiver) = raw_named_children(node).first().copied() else { + return false; + }; + if raw_local_identifier_text(receiver, profile).is_some() { + return std::ptr::eq(receiver, target); + } + if raw_indexed_lhs_node(receiver, profile) { + return raw_named_children(receiver) + .first() + .map(|object| raw_member_receiver_target(object, target, profile)) + .unwrap_or(false); + } + if raw_field_like_node(receiver, profile) { + return raw_member_receiver_target(receiver, target, profile); + } + if raw_named_children(receiver) + .into_iter() + .any(|child| raw_member_receiver_target(child, target, profile)) + { + return true; + } + false +} + +fn raw_member_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !raw_field_like_node(parent, profile) { + return false; + } + raw_named_children(parent) + .last() + .map(|field| std::ptr::eq(*field, node)) + .unwrap_or(false) +} + +fn raw_call_name(node: &RawNode, parent: Option<&RawNode>, profile: &dyn LanguageProfile) -> bool { + let Some(parent) = parent else { + return false; + }; + if raw_field_like_node(parent, profile) { + return false; + } + profile.call_node_kinds().contains(&parent.kind.as_str()) + && raw_named_children(parent) + .first() + .map(|callee| std::ptr::eq(*callee, node)) + .unwrap_or(false) +} + +fn raw_call_method_name( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !profile.call_node_kinds().contains(&parent.kind.as_str()) { + return false; + } + parent + .children + .iter() + .find(|child| child.field_name.as_deref() == Some("method")) + .map(|method| std::ptr::eq(method, node)) + .unwrap_or(false) +} + +fn raw_keyed_element_key( + node: &RawNode, + parent: Option<&RawNode>, + profile: &dyn LanguageProfile, +) -> bool { + let Some(parent) = parent else { + return false; + }; + if !profile + .keyed_element_node_kinds() + .contains(&parent.kind.as_str()) + || !profile.keyed_element_first_named_child_is_key() + { + return false; + } + raw_named_children(parent) + .first() + .map(|key| std::ptr::eq(*key, node)) + .unwrap_or(false) + || raw_next_sibling(node, parent) + .map(|sibling| !sibling.named && sibling.text == ":") + .unwrap_or(false) +} + +fn raw_assignment_statement(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile + .assignment_node_kinds() + .contains(&node.kind.as_str()) + || node.children.iter().any(|child| { + !child.named + && profile + .assignment_operator_tokens() + .contains(&child.text.as_str()) + }) +} + +fn raw_branch_node(node: &RawNode, profile: &dyn LanguageProfile) -> bool { + profile.branch_node_kinds().contains(&node.kind.as_str()) +} + +fn raw_comment_node(node: &RawNode) -> bool { + node.kind.to_ascii_lowercase().contains("comment") +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_next_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { + let index = parent + .children + .iter() + .position(|child| std::ptr::eq(child, node))?; + parent.children.get(index + 1) +} + +fn raw_previous_sibling<'a>(node: &RawNode, parent: &'a RawNode) -> Option<&'a RawNode> { + let index = parent + .children + .iter() + .position(|child| std::ptr::eq(child, node))?; + index + .checked_sub(1) + .and_then(|previous| parent.children.get(previous)) +} + +fn raw_contains_node(root: &RawNode, target: &RawNode) -> bool { + std::ptr::eq(root, target) + || root + .children + .iter() + .any(|child| raw_contains_node(child, target)) +} + +fn normalized_node_for_span(root: &Node, span: Span) -> Option<&Node> { + if [ + root.first_lineno, + root.first_column, + root.last_lineno, + root.last_column, + ] == span + { + return Some(root); + } + root.children + .iter() + .filter_map(ast::node) + .find_map(|child| normalized_node_for_span(child, span)) +} + +fn fallback_node_from_raw(raw: &RawNode) -> Node { + Node { + r#type: "DEFN".to_string(), + children: raw + .children + .iter() + .filter(|child| child.named) + .map(|child| Child::Node(Box::new(fallback_node_from_raw(child)))) + .collect(), + first_lineno: raw.span[0], + first_column: raw.span[1], + last_lineno: raw.span[2], + last_column: raw.span[3], + text: raw.text.clone(), + } +} + +struct LocalFlow { + file: String, + lines: Vec, + methods_by_span: BTreeMap, +} + +impl LocalFlow { + fn new( + file: String, + lines: Vec, + methods_by_span: BTreeMap, + ) -> Self { + Self { + file, + lines, + methods_by_span, + } + } + + fn scan(&mut self, root: &Node) -> Vec { + let mut out = Vec::new(); + self.collect_methods(root, &Vec::new(), &mut out); + out + } + + fn collect_methods(&self, node: &Node, owners: &[String], out: &mut Vec) { + if OWNER_TYPES.contains(&node.r#type.as_str()) { + let owner = self.full_owner_name(owners, node); + for method in self.owner_methods(node) { + out.push(self.method_summary(method, Some(&owner))); + } + let mut next_owners = owners.to_vec(); + next_owners.push(self.owner_segment(node)); + self.collect_nested_owners(node, &next_owners, out); + } else if METHOD_TYPES.contains(&node.r#type.as_str()) && owners.is_empty() { + out.push(self.method_summary(node, None)); + } else { + for child in node.children.iter().filter_map(ast::node) { + self.collect_methods(child, owners, out); + } + } + } + + fn collect_nested_owners(&self, node: &Node, owners: &[String], out: &mut Vec) { + if METHOD_TYPES.contains(&node.r#type.as_str()) { + return; + } + + for child in node.children.iter().filter_map(ast::node) { + if OWNER_TYPES.contains(&child.r#type.as_str()) { + self.collect_methods(child, owners, out); + } else { + self.collect_nested_owners(child, owners, out); + } + } + } + + fn method_summary(&self, node: &Node, owner_hint: Option<&str>) -> MethodSummary { + let node_span = [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ]; + let metadata = self.methods_by_span.get(&node_span); + let owner = metadata + .map(|item| item.owner.as_str()) + .or(owner_hint) + .unwrap_or("(top-level)"); + let name = metadata + .map(|item| item.name.clone()) + .unwrap_or_else(|| self.method_name(node)); + let statement_nodes = ast::body_stmts(node) + .into_iter() + .filter(|statement| !comment_statement(statement)) + .collect::>(); + let local_names = self.local_names(&statement_nodes, metadata); + let statements: Vec<_> = statement_nodes + .iter() + .enumerate() + .map(|(index, stmt)| self.statement_summary(stmt, index, &local_names)) + .collect(); + MethodSummary { + id: format!("{}#{}", owner, name), + owner: owner.to_string(), + name, + file: self.file.clone(), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + node: node.clone(), + raw_node: None, + boundaries: self.structural_boundaries(&statements), + statements, + } + } + + fn statement_summary( + &self, + node: &Node, + index: usize, + local_names: &BTreeSet, + ) -> Statement { + let source = ast::slice(node, &self.lines); + let writes = self.local_writes(node); + let reads = self.local_reads(node, local_names, &writes); + Statement { + index, + line: node.first_lineno, + end_line: node.last_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + source, + dependencies: self.assignment_dependencies(node, local_names), + co_uses: self.co_use_edges(node, local_names), + reads, + writes, + } + } + + fn local_names( + &self, + statements: &[&Node], + metadata: Option<&MethodMetadata>, + ) -> BTreeSet { + let mut names = metadata.map(|item| item.params.clone()).unwrap_or_default(); + for statement in statements { + names.extend(self.local_writes(statement)); + } + names + } + + fn structural_boundaries(&self, statements: &[Statement]) -> Vec { + let mut out = Vec::new(); + for i in 0..statements.len().saturating_sub(1) { + let left = &statements[i]; + let right = &statements[i + 1]; + if let Some(boundary) = self.source_boundary(left.end_line + 1, right.line - 1) { + out.push(Boundary { + before_index: left.index, + after_index: right.index, + line: boundary.line, + kind: boundary.kind, + text: boundary.text, + }); + } + } + out + } + + fn source_boundary(&self, first_line: usize, last_line: usize) -> Option { + if first_line > last_line { + return None; + } + + let mut blank = None; + for line_number in first_line..=last_line { + let text = self + .lines + .get(line_number - 1) + .map(|s| s.as_str()) + .unwrap_or(""); + let stripped = text.trim(); + if stripped.starts_with('#') || stripped.starts_with("//") || stripped.starts_with("--") + { + return Some(RawBoundary { + line: line_number, + kind: "comment".to_string(), + text: stripped.to_string(), + }); + } + if stripped.is_empty() && blank.is_none() { + blank = Some(RawBoundary { + line: line_number, + kind: "blank".to_string(), + text: stripped.to_string(), + }); + } + } + blank + } + + fn owner_methods<'a>(&self, owner_node: &'a Node) -> Vec<&'a Node> { + let Some(body) = self.owner_body(owner_node) else { + return Vec::new(); + }; + + let stmts = if statement_container(body) { + body.children + .iter() + .filter_map(ast::node) + .collect::>() + } else { + vec![body] + }; + + stmts + .into_iter() + .flat_map(|stmt| { + if METHOD_TYPES.contains(&stmt.r#type.as_str()) { + vec![stmt] + } else if self.visibility_call(stmt) { + self.inline_methods(stmt) + } else { + vec![] + } + }) + .collect() + } + + fn inline_methods<'a>(&self, stmt: &'a Node) -> Vec<&'a Node> { + let Some(args) = stmt.children.get(1).and_then(ast::node) else { + return Vec::new(); + }; + args.children + .iter() + .filter_map(ast::node) + .filter(|arg| METHOD_TYPES.contains(&arg.r#type.as_str())) + .collect() + } + + fn owner_body<'a>(&self, owner_node: &'a Node) -> Option<&'a Node> { + let scope_index = if owner_node.r#type == "CLASS" { 2 } else { 1 }; + let scope = owner_node.children.get(scope_index).and_then(ast::node)?; + if scope.r#type != "SCOPE" { + return None; + } + scope.children.get(2).and_then(ast::node) + } + + fn visibility_call(&self, node: &Node) -> bool { + if node.r#type == "FCALL" { + if let Some(Child::Symbol(name)) = node.children.first() { + return matches!(name.as_str(), "public" | "protected" | "private"); + } + } + false + } + + fn method_name(&self, node: &Node) -> String { + if node.r#type == "DEFS" { + let receiver = node.children.get(0).and_then(ast::node); + let prefix = if let Some(r) = receiver { + if r.r#type == "SELF" { + "self".to_string() + } else { + ast::slice(r, &self.lines) + } + } else { + "?".to_string() + }; + format!( + "{}.{}", + prefix, + node.children + .get(1) + .and_then(|c| match c { + Child::Symbol(s) => Some(s), + _ => None, + }) + .unwrap_or(&"?".to_string()) + ) + } else { + node.children + .first() + .and_then(|c| match c { + Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + .unwrap_or_else(|| "?".to_string()) + } + } + + fn full_owner_name(&self, owners: &[String], node: &Node) -> String { + let mut next = owners.to_vec(); + next.push(self.owner_segment(node)); + next.join("::") + } + + fn owner_segment(&self, node: &Node) -> String { + let text = ast::slice( + node.children.first().and_then(ast::node).unwrap_or(node), + &self.lines, + ); + if text.is_empty() { + "(anonymous)".to_string() + } else { + text + } + } + + fn local_reads( + &self, + node: &Node, + local_names: &BTreeSet, + writes: &BTreeSet, + ) -> BTreeSet { + let mut reads = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_READ_TYPES.contains(&child.r#type.as_str()) { + if let Some(name) = local_read_name(child) { + if local_names.contains(&name) { + reads.push(name); + } + } + } + }); + reads.extend(textual_local_reads( + &ast::slice(node, &self.lines), + local_names, + writes, + )); + reads.into_iter().collect() + } + + fn local_writes(&self, node: &Node) -> BTreeSet { + let mut writes = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { + if let Some(Child::String(name)) = child.children.first() { + writes.push(name.clone()); + } + } + }); + writes.extend(textual_local_writes(&ast::slice(node, &self.lines))); + writes.into_iter().collect() + } + + fn assignment_dependencies( + &self, + node: &Node, + local_names: &BTreeSet, + ) -> Vec<(String, String)> { + let mut deps = Vec::new(); + self.walk_local(node, &mut |child| { + if LOCAL_WRITE_TYPES.contains(&child.r#type.as_str()) { + if let Some(Child::String(lhs)) = child.children.first() { + if let Some(rhs) = child.children.get(1).and_then(ast::node) { + let rhs_writes = self.local_writes(rhs); + for read in self.local_reads(rhs, local_names, &rhs_writes) { + if lhs != &read { + deps.push((lhs.clone(), read)); + } + } + } + } + } + }); + let lhs_names = self.local_writes(node); + if !lhs_names.is_empty() { + let reads = self.local_reads(node, local_names, &lhs_names); + for lhs in lhs_names { + for read in &reads { + if &lhs != read { + deps.push((lhs.clone(), read.clone())); + } + } + } + } + deps.sort(); + deps.dedup(); + deps + } + + fn co_use_edges(&self, node: &Node, local_names: &BTreeSet) -> Vec<(String, String)> { + let writes = self.local_writes(node); + let reads: Vec<_> = self + .local_reads(node, local_names, &writes) + .into_iter() + .collect(); + let mut out = Vec::new(); + for i in 0..reads.len() { + for j in i + 1..reads.len() { + out.push((reads[i].clone(), reads[j].clone())); + } + } + out + } + + fn walk_local(&self, node: &Node, blk: &mut dyn FnMut(&Node)) { + if SKIP_NESTED_TYPES.contains(&node.r#type.as_str()) { + return; + } + blk(node); + for child in node.children.iter().filter_map(ast::node) { + self.walk_local(child, blk); + } + } +} + +fn local_read_name(node: &Node) -> Option { + match node.children.first() { + Some(Child::String(name)) | Some(Child::Symbol(name)) => Some(name.clone()), + Some(Child::Nil) => Some(String::new()), + _ => None, + } +} + +fn textual_local_writes(source: &str) -> Vec { + let Some((lhs, operator)) = split_assignment(source) else { + return Vec::new(); + }; + if lhs.contains('.') + || lhs.contains("->") + || lhs.contains('[') + || lhs.contains('(') + || lhs.contains(')') + { + return Vec::new(); + } + + let identifiers = identifiers_with_positions(lhs) + .into_iter() + .map(|identifier| identifier.name) + .filter(|name| !local_keyword(name)) + .collect::>(); + if identifiers.is_empty() { + return Vec::new(); + } + + if operator == ":=" || declaration_like_lhs(lhs) || identifiers.len() == 1 { + return identifiers + .into_iter() + .filter(|name| simple_identifier(name)) + .collect(); + } + + Vec::new() +} + +fn textual_local_reads( + source: &str, + local_names: &BTreeSet, + writes: &BTreeSet, +) -> Vec { + if plain_string_literal_source(source) { + return Vec::new(); + } + + identifiers_with_positions(source) + .into_iter() + .filter(|identifier| local_names.contains(&identifier.name)) + .filter(|identifier| !writes.contains(&identifier.name)) + .filter(|identifier| !member_name(source, identifier.start)) + .filter(|identifier| !call_name(source, identifier.end)) + .map(|identifier| identifier.name) + .collect() +} + +fn plain_string_literal_source(source: &str) -> bool { + let source = source.trim(); + if source.starts_with('f') || source.starts_with('F') { + return false; + } + (source.starts_with("\"\"\"") && source.ends_with("\"\"\"")) + || (source.starts_with("'''") && source.ends_with("'''")) + || (source.starts_with('"') && source.ends_with('"')) + || (source.starts_with('\'') && source.ends_with('\'')) +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct IdentifierSpan { + name: String, + start: usize, + end: usize, +} + +fn identifiers_with_positions(source: &str) -> Vec { + let bytes = source.as_bytes(); + let mut out = Vec::new(); + let mut index = 0; + while index < bytes.len() { + let start = if bytes[index] == b'$' { + let next = index + 1; + if next < bytes.len() && identifier_start(bytes[next]) { + next + } else { + index += 1; + continue; + } + } else if identifier_start(bytes[index]) { + index + } else { + index += 1; + continue; + }; + let mut end = start + 1; + while end < bytes.len() && identifier_part(bytes[end]) { + end += 1; + } + out.push(IdentifierSpan { + name: source[start..end].to_string(), + start, + end, + }); + index = end; + } + out +} + +fn identifier_start(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphabetic() +} + +fn identifier_part(byte: u8) -> bool { + byte == b'_' || byte.is_ascii_alphanumeric() +} + +fn split_assignment(source: &str) -> Option<(&str, &str)> { + let bytes = source.as_bytes(); + let mut index = 0; + while index < bytes.len() { + if index + 1 < bytes.len() && bytes[index] == b':' && bytes[index + 1] == b'=' { + return Some((source[..index].trim(), ":=")); + } + if bytes[index] == b'=' { + let previous = index.checked_sub(1).and_then(|i| bytes.get(i)).copied(); + let next = bytes.get(index + 1).copied(); + if !matches!( + previous, + Some( + b'=' | b'!' + | b'<' + | b'>' + | b':' + | b'+' + | b'-' + | b'*' + | b'/' + | b'%' + | b'&' + | b'|' + ) + ) && !matches!(next, Some(b'=' | b'>')) + { + return Some((source[..index].trim(), "=")); + } + } + index += 1; + } + None +} + +fn declaration_like_lhs(lhs: &str) -> bool { + identifiers_with_positions(lhs) + .first() + .map(|identifier| { + matches!( + identifier.name.as_str(), + "let" + | "const" + | "var" + | "val" + | "auto" + | "int" + | "long" + | "float" + | "double" + | "bool" + | "boolean" + | "char" + | "String" + | "string" + ) + }) + .unwrap_or(false) +} + +fn local_keyword(name: &str) -> bool { + matches!( + name, + "as" | "break" + | "auto" + | "boolean" + | "bool" + | "case" + | "char" + | "class" + | "const" + | "continue" + | "default" + | "double" + | "else" + | "false" + | "float" + | "for" + | "func" + | "fun" + | "function" + | "if" + | "in" + | "int" + | "long" + | "let" + | "mut" + | "nil" + | "None" + | "null" + | "private" + | "protected" + | "public" + | "return" + | "self" + | "short" + | "static" + | "String" + | "string" + | "this" + | "true" + | "val" + | "var" + | "void" + | "while" + ) +} + +fn simple_identifier(name: &str) -> bool { + let mut chars = name.chars(); + matches!(chars.next(), Some(first) if first == '_' || first.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn member_name(source: &str, start: usize) -> bool { + let prefix = source[..start].trim_end(); + prefix.ends_with('.') || prefix.ends_with("->") || prefix.ends_with("::") +} + +fn call_name(source: &str, end: usize) -> bool { + let suffix = source[end..].trim_start(); + suffix.starts_with('(') +} + +fn method_metadata(document: &Document) -> BTreeMap { + document + .function_defs + .iter() + .map(|function| (function.span, metadata_for_function(document, function))) + .collect() +} + +fn metadata_for_function(document: &Document, function: &FunctionDef) -> MethodMetadata { + let owner = local_flow_owner(&document.file, &function.owner); + MethodMetadata { + owner, + name: function.name.clone(), + params: function.params.iter().cloned().collect(), + } +} + +fn local_flow_owner(file: &str, owner: &str) -> String { + let file_owner = file_owner(file); + if owner == file_owner { + return "(top-level)".to_string(); + } + owner + .strip_prefix(&format!("{file_owner}::")) + .unwrap_or(owner) + .to_string() +} + +fn file_owner(file: &str) -> String { + Path::new(file) + .file_stem() + .and_then(|stem| stem.to_str()) + .filter(|stem| !stem.is_empty()) + .unwrap_or("(file)") + .to_string() +} + +fn statement_container(node: &Node) -> bool { + STATEMENT_CONTAINER_TYPES.contains(&node.r#type.as_str()) +} + +fn comment_statement(node: &Node) -> bool { + node.r#type.to_ascii_lowercase().contains("comment") + || node.text.trim_start().starts_with("//") + || node.text.trim_start().starts_with('#') + || node.text.trim_start().starts_with("--") +} + +struct RawBoundary { + line: usize, + kind: String, + text: String, +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + fn summaries(source: &str, language: Language) -> Vec { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write"); + scan_files(&[file.path().to_path_buf()], language).expect("scan") + } + + #[test] + fn extracts_python_function_local_flow() { + let summaries = summaries( + "def mixed(price, tax):\n subtotal = price + tax\n total = subtotal\n return total\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.owner, "(top-level)"); + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[0].reads, + ["price".to_string(), "tax".to_string()] + .into_iter() + .collect() + ); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "subtotal".to_string())] + ); + assert_eq!( + summary.statements[2].reads, + ["total".to_string()].into_iter().collect() + ); + } + + #[test] + fn handles_non_ascii_source_without_byte_boundary_panics() { + let summaries = summaries( + "def mixed(price):\n marker = \"✓\"\n total = price\n return total\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "price".to_string())] + ); + } + + #[test] + fn preserves_self_parameter_reads_for_python_attribute_access() { + let summaries = summaries( + "class TextSuite:\n def setup(self):\n self.console = Console(file=StringIO(), color_system=\"truecolor\")\n self.text = Text.from_markup(markup)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.id == "TextSuite#setup") + .expect("setup summary"); + + assert_eq!( + summary.statements[0].reads, + ["self".to_string()].into_iter().collect() + ); + assert!(!summary.statements[0].writes.contains("file")); + assert_eq!( + summary.statements[1].reads, + ["self".to_string()].into_iter().collect() + ); + } + + #[test] + fn excludes_keyword_argument_writes_from_outer_assignment_dependencies() { + let summaries = summaries( + "def render():\n pretty = Pretty(snippets.PYTHON_DICT, indent_guides=True)\n return pretty\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "render") + .expect("render summary"); + + assert_eq!( + summary.statements[0].writes, + ["pretty".to_string()].into_iter().collect() + ); + assert!(summary.statements[0].dependencies.is_empty()); + } + + #[test] + fn mines_python_loop_and_with_locals_without_keyword_writes() { + let summaries = summaries( + "def download(urls, dest_dir):\n with ThreadPoolExecutor(max_workers=4) as pool:\n for url in urls:\n filename = url.split(\"/\")[-1]\n dest_path = os.path.join(dest_dir, filename)\n task_id = progress.add_task(\"download\", filename=filename, start=False)\n pool.submit(copy_url, task_id, url, dest_path)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "download") + .expect("download summary"); + let statement = &summary.statements[0]; + + assert!(statement.reads.contains("urls")); + assert!(statement.reads.contains("url")); + assert!(statement.reads.contains("pool")); + assert!(statement.writes.contains("url")); + assert!(statement.writes.contains("pool")); + assert!(!statement.writes.contains("urls")); + assert!(!statement.writes.contains("max_workers")); + assert!(!statement.writes.contains("start")); + } + + #[test] + fn does_not_read_python_with_alias_at_declaration_site() { + let summaries = summaries( + "def capture(console):\n with console.capture() as output:\n console.line()\n return output\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "capture") + .expect("capture summary"); + + assert!(summary.statements[0].writes.contains("output")); + assert!(!summary.statements[0].reads.contains("output")); + assert!(summary.statements[1].reads.contains("output")); + } + + #[test] + fn mines_python_named_expression_writes() { + let summaries = summaries( + "def scan(text, index):\n if (character := text[index]):\n return character\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "scan") + .expect("scan summary"); + let statement = &summary.statements[0]; + + assert!(statement.writes.contains("character")); + assert!(statement.reads.contains("text")); + assert!(statement.reads.contains("index")); + assert!(statement + .dependencies + .contains(&("character".to_string(), "text".to_string()))); + assert!(statement + .dependencies + .contains(&("character".to_string(), "index".to_string()))); + } + + #[test] + fn ignores_python_import_path_segments_that_match_locals() { + let summaries = summaries( + "def status(status):\n from .status import Status\n return status\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "status") + .expect("status summary"); + + assert!(summary.statements[0].reads.is_empty()); + assert_eq!( + summary.statements[1].reads, + ["status".to_string()].into_iter().collect() + ); + } + + #[test] + fn reads_python_callable_locals_without_marking_call_callee_as_write() { + let summaries = summaries( + "def invoke(callback, value):\n runner = callback\n return runner(value)\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "invoke") + .expect("invoke summary"); + + assert_eq!( + summary.statements[1].reads, + ["runner".to_string(), "value".to_string()] + .into_iter() + .collect() + ); + assert!(summary.statements[1].writes.is_empty()); + } + + #[test] + fn does_not_read_locals_from_plain_docstring_text() { + let summaries = summaries( + "def get_content(user):\n \"\"\"Extract text from user dict.\"\"\"\n return user\n", + Language::Python, + ); + let summary = summaries + .iter() + .find(|summary| summary.name == "get_content") + .expect("get_content summary"); + + assert!(summary.statements[0].reads.is_empty()); + assert_eq!( + summary.statements[1].reads, + ["user".to_string()].into_iter().collect() + ); + } + + #[test] + fn extracts_java_kotlin_and_swift_local_flow() { + let cases = [ + ( + Language::Java, + "class Billing {\n int mixed(int price, int tax) {\n int subtotal = price + tax;\n int total = subtotal;\n return total;\n }\n}\n", + ), + ( + Language::Kotlin, + "class Billing {\n fun mixed(price: Int, tax: Int): Int {\n val subtotal = price + tax\n val total = subtotal\n return total\n }\n}\n", + ), + ( + Language::Swift, + "class Billing {\n func mixed(price: Int, tax: Int) -> Int {\n let subtotal = price + tax\n let total = subtotal\n return total\n }\n}\n", + ), + ]; + + for (language, source) in cases { + let summaries = summaries(source, language); + let summary = summaries + .iter() + .find(|summary| summary.name == "mixed") + .expect("mixed summary"); + + assert_eq!(summary.owner, "Billing"); + assert_eq!(summary.statements.len(), 3); + assert_eq!( + summary.statements[0].reads, + ["price".to_string(), "tax".to_string()] + .into_iter() + .collect() + ); + assert_eq!( + summary.statements[1].dependencies, + vec![("total".to_string(), "subtotal".to_string())] + ); + assert_eq!( + summary.statements[2].reads, + ["total".to_string()].into_iter().collect() + ); + } + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs new file mode 100644 index 000000000..14f694a31 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/path_condition.rs @@ -0,0 +1,844 @@ +use crate::decomplex::ast::{self, normalize_text, Child, Node, RawNode, Span}; +use crate::decomplex::syntax::adapters::{language_profile, LanguageProfile}; +use crate::decomplex::syntax::{Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct PathConditionReport { + pub neglected: Vec, + pub scattered: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct NeglectedPathCondition { + pub pattern: Vec, + pub support: usize, + pub missing: String, + pub at: String, + pub spans: BTreeMap, + pub action: String, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct ScatteredPathCondition { + pub guards: Vec, + pub support: usize, + pub scatter: usize, + pub rank: usize, + pub sites: Vec, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Site { + guards: Vec, + action: String, + file: String, + defn: String, + line: usize, + span: Span, +} + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result { + let documents = super::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> PathConditionReport { + let sites = documents + .iter() + .flat_map(|document| { + fact_sites_for_document(document) + .into_iter() + .map(|site| Site { + guards: site.guards, + action: site.action, + file: site.file, + defn: site.function, + line: site.line, + span: site.span, + }) + }) + .collect::>(); + Report::new(sites).findings() +} + +pub(crate) fn fact_sites_for_document( + document: &Document, +) -> Vec { + let mut sites = sites_for_document(document); + if sites.is_empty() { + sites = normalized_sites_from_document(document); + } + dedupe_sites(sites) + .into_iter() + .map(|site| crate::decomplex::syntax::PathConditionSite { + guards: site.guards, + action: site.action, + file: site.file, + function: site.defn, + line: site.line, + span: site.span, + }) + .collect() +} + +fn sites_for_document(document: &Document) -> Vec { + let mut sites = sites_from_document_facts(document); + sites.extend(sites_from_raw_facts(document)); + sites +} + +fn normalized_sites_from_document(document: &Document) -> Vec { + let mut pc = PathCondition::new(document.file.clone(), document.lines.clone()); + pc.walk(&document.normalized_root, &Vec::new(), &Vec::new()); + pc.sites +} + +fn dedupe_sites(sites: Vec) -> Vec { + let mut seen = BTreeSet::new(); + sites + .into_iter() + .filter(|site| { + seen.insert(( + site.guards.clone(), + site.action.clone(), + site.file.clone(), + site.defn.clone(), + site.line, + )) + }) + .collect() +} + +fn sites_from_document_facts(document: &Document) -> Vec { + document + .path_condition_sites + .iter() + .map(|site| Site { + guards: site.guards.clone(), + action: site.action.clone(), + file: site.file.clone(), + defn: site.function.clone(), + line: site.line, + span: site.span, + }) + .collect() +} + +fn sites_from_raw_facts(document: &Document) -> Vec { + let profile = language_profile(document.language); + let mut sites = Vec::new(); + for function in &document.function_defs { + for statement in raw_function_body_statements(profile, &function.body) { + raw_path_walk( + document, + profile, + statement, + &function.name, + &[], + &mut sites, + ); + } + } + sites +} + +fn raw_function_body_node<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Option<&'a RawNode> { + if let Some(body) = raw_child_by_field(node, "body") { + return Some(body); + } + raw_named_children(node).into_iter().rev().find(|child| { + profile + .function_body_node_kinds() + .contains(&child.kind.as_str()) + }) +} + +fn raw_function_body_statements<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + let Some(body) = raw_function_body_node(profile, node) else { + return Vec::new(); + }; + + let mut named = raw_named_children(body) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if named.len() == 1 + && profile + .nested_statement_wrapper_node_kinds() + .contains(&named[0].kind.as_str()) + { + if raw_branch_node(profile, named[0]) { + return vec![named[0]]; + } + named = raw_named_children(named[0]) + .into_iter() + .filter(|child| !raw_comment_node(child)) + .collect(); + } + if named.is_empty() && body.text.trim().is_empty() { + return Vec::new(); + } + if raw_branch_node(profile, body) || raw_assignment_statement(profile, body) || named.is_empty() + { + return vec![body]; + } + named +} + +fn raw_path_walk( + document: &Document, + profile: &dyn LanguageProfile, + node: &RawNode, + function: &str, + guards: &[String], + out: &mut Vec, +) { + if raw_nested_local_scope(profile, node) { + return; + } + + if raw_branch_node(profile, node) { + let condition = raw_branch_condition(node); + let atoms = raw_path_condition_atoms(profile, condition); + let then_atoms = if raw_unless_node(node) { + raw_negate_guards(&atoms) + } else { + atoms.clone() + }; + let else_atoms = if raw_unless_node(node) { + atoms + } else { + raw_negate_guards(&atoms) + }; + for (child, branch_guards) in raw_branch_body_nodes(profile, node, &then_atoms, &else_atoms) + { + let mut next_guards = guards.to_vec(); + next_guards.extend(branch_guards); + raw_path_walk(document, profile, child, function, &next_guards, out); + } + return; + } + + if guards.len() >= 2 && raw_path_action_node(profile, node) { + let mut unique = guards.to_vec(); + unique.sort(); + unique.dedup(); + out.push(Site { + guards: unique, + action: profile.normalize_source_text(&node.text), + file: document.file.clone(), + defn: function.to_string(), + line: node.span[0], + span: node.span, + }); + return; + } + + for child in raw_named_children(node) { + raw_path_walk(document, profile, child, function, guards, out); + } +} + +fn raw_path_condition_atoms( + profile: &dyn LanguageProfile, + condition: Option<&RawNode>, +) -> Vec { + let Some(condition) = condition else { + return Vec::new(); + }; + if raw_boolean_container(profile, condition) && raw_boolean_and(profile, condition) { + let mut atoms = raw_flatten_boolean_and(profile, condition) + .into_iter() + .map(|child| raw_decision_member_text(profile, &child.text)) + .collect::>(); + atoms.sort(); + atoms.dedup(); + atoms + } else { + vec![raw_decision_member_text(profile, &condition.text)] + } +} + +fn raw_branch_condition(node: &RawNode) -> Option<&RawNode> { + if raw_modifier_branch(node) { + return raw_named_children(node).into_iter().last(); + } + raw_child_by_field(node, "condition") + .or_else(|| raw_child_by_field(node, "value")) + .or_else(|| raw_child_by_field(node, "subject")) + .or_else(|| raw_named_children(node).into_iter().next()) +} + +fn raw_branch_body_nodes<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, + then_guards: &[String], + else_guards: &[String], +) -> Vec<(&'a RawNode, Vec)> { + let mut bodies = Vec::new(); + if let Some(body) = + raw_child_by_field(node, "consequence").or_else(|| raw_child_by_field(node, "body")) + { + bodies.push((body, then_guards.to_vec())); + } + if let Some(body) = raw_child_by_field(node, "alternative") { + bodies.push((body, else_guards.to_vec())); + } + if bodies.is_empty() { + let named = raw_named_children(node); + bodies = if raw_modifier_branch(node) { + named + .into_iter() + .next() + .map(|body| vec![(body, then_guards.to_vec())]) + .unwrap_or_default() + } else { + named + .into_iter() + .skip(1) + .enumerate() + .map(|(index, body)| { + let guards = if index == 0 { + then_guards.to_vec() + } else { + else_guards.to_vec() + }; + (body, guards) + }) + .collect() + }; + } + bodies + .into_iter() + .flat_map(|(body, branch_guards)| { + raw_flatten_branch_body(profile, body) + .into_iter() + .map(move |child| (child, branch_guards.clone())) + }) + .collect() +} + +fn raw_modifier_branch(node: &RawNode) -> bool { + matches!(node.kind.as_str(), "if_modifier" | "unless_modifier") +} + +fn raw_flatten_branch_body<'a>( + profile: &dyn LanguageProfile, + body: &'a RawNode, +) -> Vec<&'a RawNode> { + if raw_simple_action_wrapper(profile, body) { + return vec![body]; + } + let body_children = raw_named_children(body); + let children = if profile + .path_transparent_branch_body_node_kinds() + .contains(&body.kind.as_str()) + { + body_children.into_iter().skip(1).collect::>() + } else { + body_children + }; + let children = children + .into_iter() + .flat_map(|child| { + if profile + .path_transparent_branch_body_node_kinds() + .contains(&child.kind.as_str()) + { + raw_named_children(child) + .into_iter() + .skip(1) + .collect::>() + } else { + vec![child] + } + }) + .filter(|child| !raw_comment_node(child)) + .collect::>(); + if children.is_empty() { + vec![body] + } else { + children + } +} + +fn raw_unless_node(node: &RawNode) -> bool { + node.kind.contains("unless") + || node + .children + .first() + .map(|child| child.kind == "unless" || child.text == "unless") + .unwrap_or(false) +} + +fn raw_negate_guards(guards: &[String]) -> Vec { + guards + .iter() + .map(|guard| { + guard + .strip_prefix('!') + .map(str::to_string) + .unwrap_or_else(|| format!("!{guard}")) + }) + .collect() +} + +fn raw_path_action_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if raw_branch_node(profile, node) { + return false; + } + raw_simple_action_wrapper(profile, node) + || raw_assignment_statement(profile, node) + || profile + .path_action_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_simple_action_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if !profile + .simple_action_wrapper_node_kinds() + .contains(&node.kind.as_str()) + { + return false; + } + let text = normalize_text(&node.text); + if text.contains('{') || text.contains('}') { + return false; + } + let text = text.strip_suffix(';').unwrap_or(&text).trim(); + let Some(open) = text.find('(') else { + return false; + }; + text.ends_with(')') + && text[..open] + .chars() + .all(|ch| ch == '_' || ch == '.' || ch.is_ascii_alphanumeric()) +} + +fn raw_assignment_statement(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile + .assignment_node_kinds() + .contains(&node.kind.as_str()) + || node.children.iter().any(|child| { + !child.named + && profile + .assignment_operator_tokens() + .contains(&child.text.as_str()) + }) +} + +fn raw_branch_node(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile.branch_node_kinds().contains(&node.kind.as_str()) +} + +fn raw_nested_local_scope(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile.function_node_kinds().contains(&node.kind.as_str()) + || profile + .class_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .module_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .generic_owner_node_kinds() + .contains(&node.kind.as_str()) + || profile + .struct_owner_node_kinds() + .contains(&node.kind.as_str()) +} + +fn raw_boolean_container(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if profile + .boolean_container_node_kinds() + .contains(&node.kind.as_str()) + { + return true; + } + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_boolean_container(profile, child)) + .unwrap_or(false); + } + false +} + +fn raw_boolean_and(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_boolean_and(profile, child)) + .unwrap_or(false); + } + raw_direct_operator(node) + .map(|operator| profile.boolean_and_operators().contains(&operator.as_str())) + .unwrap_or(false) +} + +fn raw_flatten_boolean_and<'a>( + profile: &dyn LanguageProfile, + node: &'a RawNode, +) -> Vec<&'a RawNode> { + if !(raw_boolean_container(profile, node) && raw_boolean_and(profile, node)) { + return vec![node]; + } + if raw_parenthesized_wrapper(profile, node) { + return raw_named_children(node) + .into_iter() + .next() + .map(|child| raw_flatten_boolean_and(profile, child)) + .unwrap_or_else(|| vec![node]); + } + raw_named_children(node) + .into_iter() + .flat_map(|child| raw_flatten_boolean_and(profile, child)) + .collect() +} + +fn raw_parenthesized_wrapper(profile: &dyn LanguageProfile, node: &RawNode) -> bool { + profile + .parenthesized_wrapper_node_kinds() + .contains(&node.kind.as_str()) + && raw_named_children(node).len() == 1 +} + +fn raw_decision_member_text(profile: &dyn LanguageProfile, text: &str) -> String { + profile.normalize_source_text(&strip_enclosing_parentheses(text)) +} + +fn strip_enclosing_parentheses(text: &str) -> String { + let mut value = text.trim().to_string(); + loop { + if !(value.starts_with('(') && value.ends_with(')')) { + break value; + } + if !enclosing_parentheses_wrap_all(&value) { + break value; + } + value = value[1..value.len() - 1].trim().to_string(); + } +} + +fn enclosing_parentheses_wrap_all(text: &str) -> bool { + let mut depth = 0isize; + for (index, ch) in text.chars().enumerate() { + if ch == '(' { + depth += 1; + } else if ch == ')' { + depth -= 1; + } + if depth == 0 && index < text.len() - 1 { + return false; + } + if depth < 0 { + return false; + } + } + depth == 0 +} + +fn raw_direct_operator(node: &RawNode) -> Option { + node.children + .iter() + .find(|child| { + let text = child.text.trim(); + !child.named && !matches!(text, "(" | ")") + }) + .map(|child| normalize_text(&child.text)) +} + +fn raw_named_children(node: &RawNode) -> Vec<&RawNode> { + node.children.iter().filter(|child| child.named).collect() +} + +fn raw_child_by_field<'a>(node: &'a RawNode, field: &str) -> Option<&'a RawNode> { + node.children + .iter() + .find(|child| child.field_name.as_deref() == Some(field)) +} + +fn raw_comment_node(node: &RawNode) -> bool { + node.kind.contains("comment") +} + +struct PathCondition { + file: String, + lines: Vec, + sites: Vec, +} + +impl PathCondition { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + sites: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { + let mut next_defstack = defstack.to_vec(); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + next_defstack.push(name.clone()); + } + } + + match node.r#type.as_str() { + "IF" | "UNLESS" => { + let cond = node.children.get(0).and_then(ast::node); + let a = node.children.get(1).and_then(ast::node); + let b = node.children.get(2).and_then(ast::node); + + let atoms = self.cond_atoms(cond); + let then_g = if node.r#type == "IF" { + atoms.clone() + } else { + self.negate(&atoms) + }; + let else_g = if node.r#type == "IF" { + self.negate(&atoms) + } else { + atoms.clone() + }; + + if let Some(a_node) = a { + let mut next_guards = guards.to_vec(); + next_guards.extend(then_g); + self.walk(a_node, &next_defstack, &next_guards); + } + if let Some(b_node) = b { + let mut next_guards = guards.to_vec(); + next_guards.extend(else_g); + self.walk(b_node, &next_defstack, &next_guards); + } + + return; + } + "CALL" | "FCALL" | "VCALL" | "ATTRASGN" | "LASGN" | "IASGN" | "OPCALL" => { + if guards.len() >= 2 { + self.record(node, &next_defstack, guards); + } + } + _ => {} + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, &next_defstack, guards); + } + } + + fn cond_atoms(&self, cond: Option<&Node>) -> Vec> { + let Some(cond) = cond else { return Vec::new() }; + ast::flatten_and(cond) + .into_iter() + .map(|a| { + let t = ast::slice(a, &self.lines); + let (text, neg) = ast::canon_polarity(&t); + vec![ + text, + if neg { + "true".to_string() + } else { + "false".to_string() + }, + ] + }) + .collect() + } + + fn negate(&self, atoms: &[Vec]) -> Vec> { + atoms + .iter() + .map(|a| { + let t = &a[0]; + let n = a[1] == "true"; + vec![ + t.clone(), + if !n { + "true".to_string() + } else { + "false".to_string() + }, + ] + }) + .collect() + } + + fn record(&mut self, node: &Node, defstack: &[String], guards: &[Vec]) { + let mut members_set = BTreeSet::new(); + for g in guards { + let prefix = if g[1] == "true" { "!" } else { "" }; + members_set.insert(format!("{}{}", prefix, g[0])); + } + let members: Vec<_> = members_set.into_iter().collect(); + + if members.len() < 2 { + return; + } + + let slice = ast::slice(node, &self.lines); + let action = if slice.len() > 80 { + slice[..80].to_string() + } else { + slice + }; + + self.sites.push(Site { + guards: members, + action, + file: self.file.clone(), + defn: defstack + .last() + .cloned() + .unwrap_or_else(|| "(top-level)".to_string()), + line: node.first_lineno, + span: [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ], + }); + } +} + +struct Report { + sites: Vec, + groups: Vec<(Vec, Vec)>, +} + +impl Report { + fn new(sites: Vec) -> Self { + let mut keys = Vec::new(); + let mut groups: BTreeMap, Vec> = BTreeMap::new(); + for s in &sites { + if !groups.contains_key(&s.guards) { + keys.push(s.guards.clone()); + } + groups.entry(s.guards.clone()).or_default().push(s.clone()); + } + + let ordered_groups = keys + .into_iter() + .map(|k| { + let v = groups.remove(&k).unwrap(); + (k, v) + }) + .collect(); + + Self { + sites, + groups: ordered_groups, + } + } + + fn findings(&self) -> PathConditionReport { + PathConditionReport { + neglected: self.neglected(3), + scattered: self.scattered(2), + } + } + + fn scattered(&self, min_scatter: usize) -> Vec { + let mut out = Vec::new(); + for (guards, sites) in &self.groups { + let scatter = sites + .iter() + .map(|site| (site.file.clone(), site.defn.clone())) + .collect::>() + .len(); + if scatter < min_scatter { + continue; + } + + let locations = sites + .iter() + .map(|site| format!("{}:{}:{}", site.file, site.defn, site.line)) + .collect::>(); + let spans = sites + .iter() + .map(|site| { + ( + format!("{}:{}:{}", site.file, site.defn, site.line), + site.span, + ) + }) + .collect::>(); + out.push(ScatteredPathCondition { + guards: guards.clone(), + support: sites.len(), + scatter, + rank: sites.len() * scatter, + sites: locations, + spans, + }); + } + out.sort_by(|a, b| b.rank.cmp(&a.rank).then_with(|| a.guards.cmp(&b.guards))); + out + } + + fn neglected(&self, min_support: usize) -> Vec { + let popular: Vec<_> = self + .groups + .iter() + .filter(|(_, s)| s.len() >= min_support) + .map(|(g, s)| (g.clone(), s.len())) + .collect(); + + let mut out = Vec::new(); + let mut seen = BTreeSet::new(); + + for s in &self.sites { + for (gs, sup) in &popular { + let gs_set: BTreeSet<_> = gs.iter().cloned().collect(); + let s_guards_set: BTreeSet<_> = s.guards.iter().cloned().collect(); + + let diff_gs_s: BTreeSet<_> = gs_set.difference(&s_guards_set).cloned().collect(); + let diff_s_gs: BTreeSet<_> = s_guards_set.difference(&gs_set).cloned().collect(); + + if diff_gs_s.len() == 1 && diff_s_gs.is_empty() { + if s.guards == *gs { + continue; + } + + let at = format!("{}:{}:{}", s.file, s.defn, s.line); + let missing = diff_gs_s.into_iter().next().unwrap(); + + // dedupe manually + let key = (gs.clone(), sup.clone(), missing.clone(), at.clone()); + if seen.insert(key) { + let mut spans = BTreeMap::new(); + spans.insert(at.clone(), s.span); + + out.push(NeglectedPathCondition { + pattern: gs.clone(), + support: *sup, + missing, + at, + spans, + action: s.action.clone(), + }); + } + } + } + } + + out.sort_by(|a, b| b.support.cmp(&a.support).then_with(|| a.at.cmp(&b.at))); + out + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/redundant_nil_guard.rs b/gems/decomplex/rust/src/decomplex/syntax/redundant_nil_guard.rs new file mode 100644 index 000000000..7296c88c2 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/redundant_nil_guard.rs @@ -0,0 +1,644 @@ +use crate::decomplex::ast::{self, Child, Node, Span}; +use crate::decomplex::syntax::{Document, Language}; +use anyhow::Result; +use serde::Serialize; +use std::collections::{BTreeMap, BTreeSet}; +use std::path::PathBuf; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +pub struct RedundantNilGuardRow { + pub at: String, + pub file: String, + pub defn: String, + pub line: usize, + pub span: Span, + pub local: String, + pub guard: String, + pub proof: String, + pub spans: BTreeMap, +} + +#[derive(Clone, Debug)] +struct Flow { + known: BTreeSet, + terminated: bool, +} + +#[derive(Clone, Debug)] +struct NilFact { + local: String, + non_nil_when_true: bool, +} + +struct CallParts<'a> { + receiver: Option<&'a Node>, + message: String, + no_args: bool, +} + +struct Finding { + file: String, + defn: String, + line: usize, + span: Span, + local: String, + guard: String, + proof: String, +} + +impl Finding { + fn to_h(&self) -> RedundantNilGuardRow { + let loc = format!("{}:{}:{}", self.file, self.defn, self.line); + let mut spans = BTreeMap::new(); + spans.insert(loc.clone(), self.span); + RedundantNilGuardRow { + at: loc, + file: self.file.clone(), + defn: self.defn.clone(), + line: self.line, + span: self.span, + local: self.local.clone(), + guard: self.guard.clone(), + proof: self.proof.clone(), + spans, + } + } +} + +const TERMINATING_CALLS: &[&str] = &["raise", "fail", "abort", "exit", "exit!"]; +const NIL_PREDICATE_MIDS: &[&str] = &["nil?", "isNull", "is_null", "nil", "is_none"]; +const NON_NIL_PREDICATE_MIDS: &[&str] = &["isSome", "is_some", "present", "present?"]; + +pub fn scan_files(files: &[PathBuf], language: Language) -> Result> { + let documents = super::parse_files(files, language)?; + Ok(scan_documents(&documents)) +} + +pub fn scan_documents(documents: &[Document]) -> Vec { + let mut findings = Vec::new(); + for document in documents { + let mut scanner = RedundantNilGuard::new(document.file.clone(), document.lines.clone()); + scanner.walk(&document.normalized_root, &Vec::new()); + findings.extend(scanner.findings); + } + let mut out: Vec<_> = findings.into_iter().map(|f| f.to_h()).collect(); + out.sort_by(|a, b| { + a.file + .cmp(&b.file) + .then_with(|| a.line.cmp(&b.line)) + .then_with(|| a.local.cmp(&b.local)) + .then_with(|| a.guard.cmp(&b.guard)) + }); + out +} + +struct RedundantNilGuard { + file: String, + lines: Vec, + findings: Vec, +} + +impl RedundantNilGuard { + fn new(file: String, lines: Vec) -> Self { + Self { + file, + lines, + findings: Vec::new(), + } + } + + fn walk(&mut self, node: &Node, defstack: &[String]) { + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + let name_index = if node.r#type == "DEFS" { 1 } else { 0 }; + if let Some(Child::Symbol(name)) = node.children.get(name_index) { + let mut next_defstack = defstack.to_vec(); + next_defstack.push(name.clone()); + self.process_block(&ast::body_stmts(node), &next_defstack, &BTreeSet::new()); + } + return; + } + + for child in node.children.iter().filter_map(ast::node) { + self.walk(child, defstack); + } + } + + fn process_block( + &mut self, + stmts: &[&Node], + defstack: &[String], + known: &BTreeSet, + ) -> Flow { + let mut current = known.clone(); + for stmt in stmts { + let flow = self.process_stmt(stmt, defstack, ¤t); + current = flow.known; + if flow.terminated { + return Flow { + known: current, + terminated: true, + }; + } + } + Flow { + known: current, + terminated: false, + } + } + + fn process_stmt(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) -> Flow { + match node.r#type.as_str() { + "IF" | "UNLESS" => self.process_branch(node, defstack, known), + "LASGN" => { + if let Some(rhs) = node.children.get(1).and_then(ast::node) { + self.inspect_node(rhs, defstack, known); + } + let mut next_known = known.clone(); + if let Some(Child::String(name)) = node.children.first() { + next_known.remove(name); + } + Flow { + known: next_known, + terminated: false, + } + } + _ => { + self.inspect_node(node, defstack, known); + Flow { + known: known.clone(), + terminated: self.terminating(node), + } + } + } + } + + fn process_branch( + &mut self, + node: &Node, + defstack: &[String], + known: &BTreeSet, + ) -> Flow { + let cond = node.children.get(0).and_then(ast::node); + let then_body = node.children.get(1).and_then(ast::node); + let else_body = node.children.get(2).and_then(ast::node); + + if let Some(cond) = cond { + self.inspect_node(cond, defstack, known); + } + + let then_known = self.known_for_branch(node.r#type.as_str(), true, cond, known); + let else_known = self.known_for_branch(node.r#type.as_str(), false, cond, known); + + let then_flow = self.process_block(&self.stmts_for(then_body), defstack, &then_known); + let else_flow = self.process_block(&self.stmts_for(else_body), defstack, &else_known); + + if then_flow.terminated && else_flow.terminated { + Flow { + known: BTreeSet::new(), + terminated: true, + } + } else if then_flow.terminated { + Flow { + known: else_flow.known, + terminated: false, + } + } else if else_flow.terminated { + Flow { + known: then_flow.known, + terminated: false, + } + } else { + let intersection: BTreeSet<_> = then_flow + .known + .intersection(&else_flow.known) + .cloned() + .collect(); + Flow { + known: intersection, + terminated: false, + } + } + } + + fn known_for_branch( + &self, + node_type: &str, + body_branch: bool, + cond: Option<&Node>, + known: &BTreeSet, + ) -> BTreeSet { + let mut next_known = known.clone(); + let cond_true_branch = if node_type == "IF" { + body_branch + } else { + !body_branch + }; + if let Some(cond) = cond { + for fact in self.branch_nil_facts(cond, cond_true_branch) { + next_known.insert(fact.local); + } + } + next_known + } + + fn inspect_node(&mut self, node: &Node, defstack: &[String], known: &BTreeSet) { + let recorded = self.record_redundant(node, defstack, known); + if matches!(node.r#type.as_str(), "DEFN" | "DEFS") { + return; + } + if recorded && (node.r#type == "OPCALL" || self.call_parts(node).is_some()) { + return; + } + for child in node.children.iter().filter_map(ast::node) { + self.inspect_node(child, defstack, known); + } + } + + fn record_redundant( + &mut self, + node: &Node, + defstack: &[String], + known: &BTreeSet, + ) -> bool { + let local = self.redundant_nil_subject(node, known); + let Some(local) = local else { return false }; + + let defn = defstack.last().map(|s| s.as_str()).unwrap_or("(top-level)"); + self.findings.push(Finding { + file: self.file.clone(), + defn: defn.to_string(), + line: node.first_lineno, + span: self.span(node), + local: local.clone(), + guard: ast::slice(node, &self.lines), + proof: format!("{} is already proven non-nil on this path", local), + }); + true + } + + fn redundant_nil_subject(&self, node: &Node, known: &BTreeSet) -> Option { + if node.r#type == "QCALL" { + return self.qcall_subject(node, known); + } + + let fact = self.nil_fact(node)?; + if known.contains(&fact.local) { + return Some(fact.local); + } + None + } + + fn nil_fact(&self, node: &Node) -> Option { + if self.parenthesized_wrapper(node) { + return self.nil_fact(self.first_node_child(node)?); + } + + if let Some(call) = self.call_parts(node) { + if call.no_args && NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { + let subject = self.subject_key(call.receiver?)?; + return Some(NilFact { + local: subject, + non_nil_when_true: false, + }); + } + if call.no_args && NON_NIL_PREDICATE_MIDS.contains(&call.message.as_str()) { + let subject = self.subject_key(call.receiver?)?; + return Some(NilFact { + local: subject, + non_nil_when_true: true, + }); + } + } + + match node.r#type.as_str() { + "OPCALL" => { + let recv = node.children.get(0).and_then(ast::node)?; + let mid = match node.children.get(1)? { + Child::Symbol(s) => s, + _ => return None, + }; + let args = node.children.get(2); + if mid == "!" { + return self.negated_nil_fact(recv); + } + if mid == "==" || mid == "!=" { + return self.comparison_nil_fact(recv, mid, args); + } + None + } + _ => None, + } + } + + fn branch_nil_facts(&self, node: &Node, cond_truth: bool) -> Vec { + if self.parenthesized_wrapper(node) { + if let Some(child) = self.first_node_child(node) { + return self.branch_nil_facts(child, cond_truth); + } + } + + if node.r#type == "AND" { + if !cond_truth { + return Vec::new(); + } + let mut facts = Vec::new(); + for child in ast::flatten_and(node) { + facts.extend(self.branch_nil_facts(child, true)); + } + return facts; + } + + if node.r#type == "OPCALL" { + if let Some(Child::Symbol(mid)) = node.children.get(1) { + if mid == "!" { + if let Some(child) = node.children.get(0).and_then(ast::node) { + return self.branch_nil_facts(child, !cond_truth); + } + } + } + } + + if let Some(safe_receiver) = self.safe_nav_receiver_fact(node) { + if cond_truth { + return vec![safe_receiver]; + } + } + + if let Some(fact) = self.nil_fact(node) { + if cond_truth == fact.non_nil_when_true { + return vec![fact]; + } + } + + if let Some(truthy) = self.truthy_subject_fact(node) { + if cond_truth { + return vec![truthy]; + } + } + + Vec::new() + } + + fn safe_nav_receiver_fact(&self, node: &Node) -> Option { + if node.r#type == "QCALL" { + let recv = node.children.get(0).and_then(ast::node)?; + let subject = self.subject_key(recv)?; + return Some(NilFact { + local: subject, + non_nil_when_true: true, + }); + } + None + } + + fn truthy_subject_fact(&self, node: &Node) -> Option { + let subject = self.subject_key(node)?; + Some(NilFact { + local: subject, + non_nil_when_true: true, + }) + } + + fn negated_nil_fact(&self, node: &Node) -> Option { + let mut fact = self.nil_fact(node)?; + fact.non_nil_when_true = !fact.non_nil_when_true; + Some(fact) + } + + fn comparison_nil_fact(&self, recv: &Node, mid: &str, args: Option<&Child>) -> Option { + let subject = self.subject_key(recv)?; + if !self.nil_arg(args) { + return None; + } + Some(NilFact { + local: subject, + non_nil_when_true: mid == "!=", + }) + } + + fn qcall_subject(&self, node: &Node, known: &BTreeSet) -> Option { + let recv = node.children.get(0).and_then(ast::node)?; + let subject = self.subject_key(recv)?; + if known.contains(&subject) { + return Some(subject); + } + None + } + + fn subject_key(&self, node: &Node) -> Option { + match node.r#type.as_str() { + "LVAR" | "DVAR" | "VCALL" => match node.children.first()? { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + }, + _ if self.call_parts(node).is_some() => { + let call = self.call_parts(node)?; + if !call.no_args || !self.stable_reader_name(&call.message) { + return None; + } + let recv = call.receiver?; + if recv.r#type == "SELF" { + return Some(format!("self.{}", call.message)); + } + let recv_key = self.subject_key(recv)?; + Some(format!("{}.{}", recv_key, call.message)) + } + _ => None, + } + } + + fn call_parts<'a>(&self, node: &'a Node) -> Option> { + match node.r#type.as_str() { + "CALL" => { + let receiver = node.children.get(0).and_then(ast::node); + let message = self.child_name(node.children.get(1)?)?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(node.children.get(2)), + }) + } + "METHOD_INVOCATION" => { + let nodes = node + .children + .iter() + .filter_map(ast::node) + .collect::>(); + let receiver = nodes.first().copied(); + let message = nodes.get(1).and_then(|child| self.node_name(child))?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(node.children.get(2)), + }) + } + "FUNCTION_CALL" | "METHOD_CALL" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + self.field_call_parts(callee, args) + } + "BLOCK" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + self.field_call_parts(callee, args) + } + "INVOCATION_EXPRESSION" => { + let callee = node.children.iter().filter_map(ast::node).next()?; + let mut parts = self.call_parts(callee)?; + let args = node + .children + .iter() + .skip(1) + .find(|child| matches!(child, Child::Node(n) if matches!(n.r#type.as_str(), "ARGUMENTS" | "ARGUMENT_LIST" | "LIST"))); + parts.no_args = self.no_call_arguments(args); + Some(parts) + } + _ => None, + } + } + + fn field_call_parts<'a>( + &self, + node: &'a Node, + args: Option<&'a Child>, + ) -> Option> { + if !matches!( + node.r#type.as_str(), + "DOT_INDEX_EXPRESSION" + | "FIELD_EXPRESSION" + | "FIELD_ACCESS" + | "MEMBER_EXPRESSION" + | "CALL" + ) { + return self.call_parts(node); + } + let nodes = node + .children + .iter() + .filter_map(ast::node) + .collect::>(); + let receiver = nodes.first().copied(); + let message = nodes.last().and_then(|child| self.node_name(child))?; + Some(CallParts { + receiver, + message, + no_args: self.no_call_arguments(args), + }) + } + + fn child_name(&self, child: &Child) -> Option { + match child { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + Child::Node(node) => self.node_name(node), + _ => None, + } + } + + fn node_name(&self, node: &Node) -> Option { + match node.children.first() { + Some(Child::String(s)) | Some(Child::Symbol(s)) => Some(s.clone()), + _ => { + let text = ast::slice(node, &self.lines).trim().to_string(); + (!text.is_empty()).then_some(text) + } + } + } + + fn no_call_arguments(&self, args: Option<&Child>) -> bool { + match args { + None | Some(Child::Nil) => true, + Some(Child::Node(node)) => { + !node.children.iter().any(|child| ast::node(child).is_some()) + } + Some(_) => false, + } + } + + fn parenthesized_wrapper(&self, node: &Node) -> bool { + matches!( + node.r#type.as_str(), + "CONDITION_CLAUSE" | "PARENTHESIZED_EXPRESSION" | "PARENTHESIZED_STATEMENTS" + ) && self.first_node_child(node).is_some() + } + + fn first_node_child<'a>(&self, node: &'a Node) -> Option<&'a Node> { + node.children.iter().find_map(ast::node) + } + + fn stable_reader_name(&self, mid: &str) -> bool { + !(mid.ends_with('=') || mid.ends_with('!') || mid == "[]") + } + + fn nil_arg(&self, args: Option<&Child>) -> bool { + let Some(Child::Node(node)) = args else { + return false; + }; + if node.r#type != "LIST" { + return false; + } + node.children.iter().any(|c| match c { + Child::Node(n) => n.r#type == "NIL", + Child::Nil => true, + _ => false, + }) + } + + fn stmts_for<'a>(&self, node: Option<&'a Node>) -> Vec<&'a Node> { + let Some(node) = node else { return Vec::new() }; + if self.call_parts(node).is_some() { + return vec![node]; + } + if node.r#type == "BLOCK" { + node.children.iter().filter_map(ast::node).collect() + } else { + vec![node] + } + } + + fn terminating(&self, node: &Node) -> bool { + if matches!(node.r#type.as_str(), "RETURN" | "NEXT" | "BREAK") { + return true; + } + if !matches!(node.r#type.as_str(), "FCALL" | "VCALL" | "CALL") + && self.call_parts(node).is_none() + { + return false; + } + + let mid = if let Some(call) = self.call_parts(node) { + Some(call.message) + } else if node.r#type == "CALL" { + node.children.get(1).and_then(|c| match c { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + } else { + node.children.get(0).and_then(|c| match c { + Child::String(s) | Child::Symbol(s) => Some(s.clone()), + _ => None, + }) + }; + + if let Some(mid) = mid { + return TERMINATING_CALLS.contains(&mid.as_str()); + } + false + } + + fn span(&self, node: &Node) -> Span { + [ + node.first_lineno, + node.first_column, + node.last_lineno, + node.last_column, + ] + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs new file mode 100644 index 000000000..a90716f54 --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax/tree_sitter_adapter.rs @@ -0,0 +1,2669 @@ +use super::{ + adapters::{ + false_simplicity_lexicon::{false_simplicity_lexicon, FalseSimplicityLexicon}, + language_profile, LanguageProfile, + }, + BranchArm, BranchDecision, CallSite, ComparisonUse, DecisionSite, DispatchSite, Document, + FunctionDef, Language, OwnerDef, PredicateAlias, SemanticEffectSite, StateDeclaration, + StateRead, StateWrite, +}; +use crate::decomplex::ast::{line, node_text, normalize_text, normalize_tree, span, RawNode}; +use crate::decomplex::syntax::complexity::local_complexity_scores; +use anyhow::{Context, Result}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::fs; +use std::path::{Path, PathBuf}; +use tree_sitter::{Node, Parser}; + +pub fn parse_file(file: PathBuf, language: Language) -> Result { + let parsed = ParsedDocument::parse(file, language)?; + let mut function_defs = Vec::new(); + let mut owner_defs = Vec::new(); + let mut call_sites = Vec::new(); + let mut state_declarations = Vec::new(); + let mut state_reads = Vec::new(); + let mut state_writes = Vec::new(); + let mut decision_sites = Vec::new(); + let mut branch_decisions = Vec::new(); + let mut branch_arms = Vec::new(); + let mut dispatch_sites = Vec::new(); + let mut predicate_aliases = Vec::new(); + let mut comparison_uses = Vec::new(); + let mut seen_writes = HashSet::new(); + let mut seen_reads = HashSet::new(); + let mut seen_calls = HashSet::new(); + let mut seen_decisions = HashSet::new(); + let mut context = ContextState::new(file_owner(&parsed.file)); + if language == Language::Ruby { + context.immutable_readers = ruby_immutable_struct_readers(&parsed.source); + } + + collect_facts( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + language, + &context, + &mut function_defs, + &mut owner_defs, + &mut call_sites, + &mut state_declarations, + &mut state_reads, + &mut state_writes, + &mut decision_sites, + &mut branch_decisions, + &mut branch_arms, + &mut predicate_aliases, + &mut comparison_uses, + &mut seen_writes, + &mut seen_reads, + &mut seen_calls, + &mut seen_decisions, + ); + collect_implicit_state_accesses( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + language, + &context, + &function_defs, + &state_declarations, + &mut state_reads, + &mut state_writes, + &mut seen_reads, + &mut seen_writes, + ); + language_profile(language).after_collect_facts(&mut function_defs, &call_sites); + collect_dispatch_sites( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + language, + &context, + &call_sites, + &mut dispatch_sites, + ); + collect_equality_dispatch_sites(&comparison_uses, &call_sites, &mut dispatch_sites); + let profile = language_profile(language); + let mut semantic_effect_sites = semantic_effect_sites_from_calls(language, &call_sites); + semantic_effect_sites.extend(profile.structural_semantic_effect_sites( + parsed.tree.root_node(), + &parsed.source, + &parsed.file, + &function_defs, + &state_reads, + &state_writes, + )); + dedup_semantic_effect_sites(&mut semantic_effect_sites); + let local_complexity_scores = + local_complexity_scores(&parsed.file.to_string_lossy(), &function_defs); + + let mut document = Document { + file: parsed.file.to_string_lossy().to_string(), + language, + source: parsed.source.clone(), + lines: parsed.source.lines().map(ToString::to_string).collect(), + root: RawNode::from_tree_sitter(parsed.tree.root_node(), &parsed.source), + normalized_root: normalize_tree(parsed.tree.root_node(), &parsed.source, language), + function_defs, + owner_defs, + call_sites, + state_declarations, + state_reads, + state_writes, + decision_sites, + branch_decisions, + branch_arms, + dispatch_sites, + semantic_effect_sites, + local_complexity_scores, + predicate_aliases, + comparison_uses, + path_condition_sites: Vec::new(), + protocol_method_effects: Vec::new(), + protocol_call_paths: Vec::new(), + }; + document.protocol_method_effects = profile.protocol_method_effects(&document); + document.protocol_call_paths = profile.protocol_call_paths(&document); + Ok(document) +} + +struct ParsedDocument { + file: PathBuf, + source: String, + tree: tree_sitter::Tree, +} + +impl ParsedDocument { + fn parse(file: PathBuf, language: Language) -> Result { + let source = fs::read_to_string(&file) + .with_context(|| format!("failed to read {}", file.display()))?; + let mut parser = Parser::new(); + parser + .set_language(&language_profile(language).grammar()) + .with_context(|| "failed to initialize tree-sitter parser")?; + let tree = parser + .parse(&source, None) + .with_context(|| format!("tree-sitter produced no tree for {}", file.display()))?; + Ok(Self { file, source, tree }) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct ContextState { + file_owner: String, + owner: Option, + function: Option, + function_line: Option, + pub receiver: Option, + locals: BTreeSet, + param_types: BTreeMap, + immutable_readers: BTreeMap>, + controls: Vec, +} + +impl ContextState { + fn new(file_owner: String) -> Self { + Self { + file_owner, + owner: None, + function: None, + function_line: None, + receiver: None, + locals: BTreeSet::new(), + param_types: BTreeMap::new(), + immutable_readers: BTreeMap::new(), + controls: Vec::new(), + } + } + + fn current_owner(&self) -> String { + self.owner + .clone() + .unwrap_or_else(|| self.file_owner.clone()) + } + + fn current_function(&self) -> String { + self.function + .clone() + .unwrap_or_else(|| "(top-level)".to_string()) + } + + fn current_control(&self) -> String { + self.controls + .last() + .cloned() + .unwrap_or_else(|| "always".to_string()) + } + + fn conditional_context(&self) -> bool { + self.controls + .iter() + .any(|control| matches!(control.as_str(), "conditional" | "iterates")) + } +} + +fn collect_facts( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + function_defs: &mut Vec, + owner_defs: &mut Vec, + call_sites: &mut Vec, + state_declarations: &mut Vec, + state_reads: &mut Vec, + state_writes: &mut Vec, + decision_sites: &mut Vec, + branch_decisions: &mut Vec, + branch_arms: &mut Vec, + predicate_aliases: &mut Vec, + comparison_uses: &mut Vec, + seen_writes: &mut HashSet, + seen_reads: &mut HashSet, + seen_calls: &mut HashSet, + seen_decisions: &mut HashSet, +) { + let next_context = push_control_context( + node, + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), + source, + language, + ); + record_function_def(node, source, file, language, &next_context, function_defs); + record_owner_def(node, source, file, language, &next_context, owner_defs); + record_call_site( + node, + source, + file, + language, + &next_context, + call_sites, + seen_calls, + ); + record_state_declaration( + node, + source, + file, + language, + &next_context, + state_declarations, + ); + record_state_read( + node, + source, + file, + language, + &next_context, + state_reads, + seen_reads, + ); + record_state_write( + node, + source, + file, + language, + &next_context, + state_writes, + seen_writes, + ); + record_decision_site( + node, + source, + file, + language, + &next_context, + decision_sites, + seen_decisions, + ); + record_branch_decision( + node, + source, + file, + language, + &next_context, + branch_decisions, + ); + record_branch_arm(node, source, file, language, &next_context, branch_arms); + record_predicate_alias( + node, + source, + file, + language, + &next_context, + predicate_aliases, + ); + record_comparison_use(node, source, file, language, &next_context, comparison_uses); + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_facts( + child, + source, + file, + language, + &next_context, + function_defs, + owner_defs, + call_sites, + state_declarations, + state_reads, + state_writes, + decision_sites, + branch_decisions, + branch_arms, + predicate_aliases, + comparison_uses, + seen_writes, + seen_reads, + seen_calls, + seen_decisions, + ); + } +} + +const GENERIC_SYSTEM_IO_BARE: &[&str] = + &["print", "println", "eprintln", "printf", "puts", "panic"]; + +fn semantic_effect_sites_from_calls( + language: Language, + call_sites: &[CallSite], +) -> Vec { + let lexicon = false_simplicity_lexicon(language); + call_sites + .iter() + .filter_map(|call| semantic_effect_site_for_call(call, &lexicon)) + .collect() +} + +fn semantic_effect_site_for_call( + call: &CallSite, + lexicon: &FalseSimplicityLexicon, +) -> Option { + let message = call.message.as_str(); + let (kind, detail) = if effect_callback_call(call, message, lexicon) { + ("callback_inversion", message.to_string()) + } else if lexicon.meta_mids.contains(&message) { + ("metaprogramming", message.to_string()) + } else if lexicon.dispatch_mids.contains(&message) { + ("dynamic_dispatch", message.to_string()) + } else if message == "call" && !call.receiver.is_empty() { + if method_object_receiver(&call.receiver, lexicon) { + ("dynamic_dispatch", "method(...).call".to_string()) + } else if variable_receiver(&call.receiver) { + ("dynamic_dispatch", format!("{}.call", call.receiver)) + } else { + return None; + } + } else if let Some((kind, detail)) = const_effect_kind_detail(call, message, lexicon) { + (kind, detail) + } else if call.receiver == "self" + && (lexicon.io_bare.contains(&message) || GENERIC_SYSTEM_IO_BARE.contains(&message)) + { + ("hidden_io", message.to_string()) + } else if call.receiver == "self" && lexicon.context_bare.contains(&message) { + ("context_dependency", message.to_string()) + } else if message.len() > 1 && message.ends_with('!') && !matches!(message, "!=" | "!~") { + ("hidden_mutation", message.to_string()) + } else { + return None; + }; + + Some(SemanticEffectSite { + kind: kind.to_string(), + detail, + file: call.file.clone(), + function: call.function.clone(), + line: call.line, + span: call.span, + }) +} + +fn const_effect_kind_detail( + call: &CallSite, + message: &str, + lexicon: &FalseSimplicityLexicon, +) -> Option<(&'static str, String)> { + let receiver = call.receiver.as_str(); + if receiver.is_empty() || receiver == "self" { + return None; + } + let base = receiver + .trim_start_matches("::") + .split("::") + .next() + .unwrap_or(""); + if base == "Dir" && lexicon.dir_context.contains(&message) { + return Some(("context_dependency", format!("Dir.{message}"))); + } + if lexicon.io_consts.contains(&base) || receiver.starts_with("Net::") { + return Some(( + "hidden_io", + format!("{}.{}", receiver.trim_start_matches("::"), message), + )); + } + if receiver == "ENV" { + return Some(("context_dependency", "ENV".to_string())); + } + if lexicon + .context_pairs + .iter() + .any(|(name, mids)| *name == base && mids.contains(&message)) + { + return Some(("context_dependency", format!("{base}.{message}"))); + } + None +} + +fn effect_callback_call(call: &CallSite, message: &str, lexicon: &FalseSimplicityLexicon) -> bool { + (call.block || call.arguments.iter().any(|arg| arg.starts_with('&'))) + && effect_callback_name(message, lexicon) + && !lexicon.meta_mids.contains(&message) +} + +fn effect_callback_name(message: &str, lexicon: &FalseSimplicityLexicon) -> bool { + lexicon.callback_set.contains(&message) + || message.starts_with("with_") + || message.starts_with("around_") + || message.starts_with("on_") + || message.starts_with("before_") + || message.starts_with("after_") + || message.ends_with("_hook") +} + +fn method_object_receiver(receiver: &str, lexicon: &FalseSimplicityLexicon) -> bool { + lexicon + .method_obj_mids + .iter() + .any(|name| receiver.contains(name)) +} + +fn variable_receiver(receiver: &str) -> bool { + let mut chars = receiver.chars(); + matches!(chars.next(), Some(first) if first == '@' || first == '$' || first == '_' || first.is_ascii_lowercase()) + && chars.all(|ch| ch == '_' || ch == '!' || ch == '?' || ch.is_ascii_alphanumeric()) +} + +fn collect_dispatch_sites( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + call_sites: &[CallSite], + out: &mut Vec, +) { + let next_context = push_control_context( + node, + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), + source, + language, + ); + record_dispatch_site(node, source, file, language, &next_context, call_sites, out); + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_dispatch_sites( + child, + source, + file, + language, + &next_context, + call_sites, + out, + ); + } +} + +fn record_dispatch_site( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + call_sites: &[CallSite], + out: &mut Vec, +) { + let profile = language_profile(language); + if !(case_node(profile, node) || profile.hidden_case(node)) { + return; + } + + let decision_node = profile.case_source_node(node); + if profile.predicate_less_case(decision_node) { + return; + } + let predicate = strip_enclosing_parentheses( + &profile.normalize_source_text(&decision_predicate(profile, decision_node, source)), + ); + if predicate.is_empty() { + return; + } + + let mut arm_members: BTreeMap> = BTreeMap::new(); + for arm in case_arms(profile, decision_node) { + let members = dispatch_members_inside( + call_sites, + &predicate, + &context.current_function(), + span(arm), + ); + for pattern in case_arm_patterns(arm, source, profile) { + for variant in dispatch_constant_patterns(&pattern) { + arm_members + .entry(variant) + .or_default() + .extend(members.clone()); + } + } + } + if arm_members.len() < 2 { + return; + } + for members in arm_members.values_mut() { + members.sort(); + members.dedup(); + } + + let mut variant_set = arm_members.keys().cloned().collect::>(); + variant_set.sort(); + let outside = dispatch_members_outside( + call_sites, + &predicate, + &context.current_function(), + span(decision_node), + ); + let site = DispatchSite { + variant_set, + arm_members, + outside, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(decision_node), + span: span(decision_node), + }; + if out.iter().any(|existing| existing == &site) { + return; + } + out.push(site); +} + +fn collect_equality_dispatch_sites( + comparisons: &[ComparisonUse], + call_sites: &[CallSite], + out: &mut Vec, +) { + let mut groups: BTreeMap<(String, String, String), Vec<(&ComparisonUse, String)>> = + BTreeMap::new(); + for comparison in comparisons { + let Some((predicate, variant)) = dispatch_equality(&comparison.canon_source) else { + continue; + }; + groups + .entry(( + comparison.file.clone(), + comparison.function.clone(), + predicate, + )) + .or_default() + .push((comparison, variant)); + } + + for ((file, function, predicate), entries) in groups { + let variant_set = entries + .iter() + .map(|(_, variant)| variant.clone()) + .collect::>(); + if variant_set.len() < 2 { + continue; + } + + let mut arm_members: BTreeMap> = BTreeMap::new(); + let mut branch_spans = Vec::new(); + for (comparison, variant) in entries { + branch_spans.push(comparison.enclosing_span); + let members = dispatch_members_inside( + call_sites, + &predicate, + &function, + comparison.enclosing_span, + ); + arm_members.entry(variant).or_default().extend(members); + } + if arm_members.len() < 2 { + continue; + } + for members in arm_members.values_mut() { + members.sort(); + members.dedup(); + } + + let outside = + dispatch_members_outside_any(call_sites, &predicate, &function, &branch_spans); + let mut variant_set = arm_members.keys().cloned().collect::>(); + variant_set.sort(); + let span = branch_spans + .into_iter() + .reduce(union_span) + .unwrap_or([0, 0, 0, 0]); + let site = DispatchSite { + variant_set, + arm_members, + outside, + file, + function, + line: span[0], + span, + }; + if out.iter().any(|existing| existing == &site) { + continue; + } + out.push(site); + } +} + +fn dispatch_equality(source: &str) -> Option<(String, String)> { + for operator in ["===", "=="] { + let Some((left, right)) = source.split_once(operator) else { + continue; + }; + let left = strip_enclosing_parentheses(&normalize_text(left)); + let right = strip_enclosing_parentheses(&normalize_text(right)); + let left_variant = dispatch_constant_pattern(&left); + let right_variant = dispatch_constant_pattern(&right); + return match (left_variant, right_variant) { + (true, false) => Some((right, left)), + (false, true) => Some((left, right)), + _ => None, + }; + } + None +} + +fn record_function_def( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let Some(name) = language_profile(language).function_name(node, source) else { + return; + }; + let function = FunctionDef { + file: file.to_string_lossy().to_string(), + name, + owner: context.current_owner(), + line: line(node), + span: span(node), + body: RawNode::from_tree_sitter(node, source), + visibility: language_profile(language).function_visibility(node, source), + params: language_profile(language).function_params(node, source), + }; + let key = ( + function.file.clone(), + function.owner.clone(), + function.name.clone(), + function.line, + ); + if out.iter().any(|existing| { + ( + existing.file.clone(), + existing.owner.clone(), + existing.name.clone(), + existing.line, + ) == key + }) { + return; + } + out.push(function); +} + +fn record_owner_def( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + if profile + .owner_def_name_from_declaration(node, source) + .is_none() + { + return; + } + let owner = OwnerDef { + file: file.to_string_lossy().to_string(), + name: context.current_owner(), + kind: profile.owner_kind(node), + line: line(node), + span: span(node), + }; + let key = (owner.file.clone(), owner.name.clone(), owner.kind.clone()); + if out.iter().any(|existing| { + ( + existing.file.clone(), + existing.name.clone(), + existing.kind.clone(), + ) == key + }) { + return; + } + out.push(owner); +} + +fn record_predicate_alias( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + let Some(name) = profile.function_name(node, source) else { + return; + }; + let Some(body) = profile.single_expression_body(node) else { + return; + }; + let Some(text) = predicate_body_text(profile, node_text(body, source)) else { + return; + }; + let file_name = file.to_string_lossy().to_string(); + out.push(PredicateAlias { + name: name.clone(), + body: text, + file: file_name, + defn: name, + owner: context.current_owner(), + line: line(node), + span: span(node), + }); +} + +fn predicate_body_text(profile: &dyn LanguageProfile, source: &str) -> Option { + let mut text = profile.normalize_source_text(source); + if text.starts_with('{') && text.ends_with('}') { + text = text[1..text.len() - 1].trim().to_string(); + } + let text = text + .strip_prefix("return ") + .unwrap_or(&text) + .trim_end_matches(';') + .trim() + .to_string(); + if text.contains(';') { + return None; + } + if text.is_empty() || text == "nil" || text.len() > 200 { + return None; + } + if assignment_like_predicate_body(&text) { + return None; + } + if predicate_like_body(&text) { + Some(text) + } else { + None + } +} + +fn assignment_like_predicate_body(text: &str) -> bool { + text.contains("||=") + || text.contains("&&=") + || text.contains("+=") + || text.contains("-=") + || text.contains("*=") + || text.contains("/=") + || text.contains("%=") + || text + .chars() + .collect::>() + .windows(3) + .any(|window| matches!(window, [left, '=', right] if !matches!(left, '=' | '!' | '<' | '>') && *right != '=')) +} + +fn predicate_like_body(text: &str) -> bool { + let lower = text.to_ascii_lowercase(); + matches!(lower.as_str(), "true" | "false") + || lower.contains("true") + || lower.contains("false") + || lower.contains("null") + || lower.contains("nil") + || text.contains("==") + || text.contains("!=") + || text.contains("&&") + || text.contains("||") + || lower.contains(" and ") + || lower.contains(" or ") +} + +fn record_comparison_use( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + if !comparison_node(profile, node, source) { + return; + } + let raw = profile.normalize_source_text(node_text(node, source)); + out.push(ComparisonUse { + canon_source: normalize_comparison_source(&raw), + raw, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(node), + span: span(node), + enclosing_span: decision_enclosing_span(profile, node), + }); +} + +fn normalize_comparison_source(source: &str) -> String { + let mut text = source.trim().to_string(); + if let Some(stripped) = text.strip_prefix('!') { + text = stripped.trim().to_string(); + } + if let Some(stripped) = text.strip_prefix("self.") { + text = stripped.to_string(); + } + if let Some(stripped) = text.strip_prefix('@') { + text = stripped.to_string(); + } + if let Some(dot_index) = text.find('.') { + let receiver = &text[..dot_index]; + let rest = &text[dot_index + 1..]; + if simple_identifier(receiver) + && (rest.contains(" == ") || rest.contains(" != ") || rest.contains('.')) + { + text = rest.to_string(); + } + } + normalize_text(&text) +} + +fn simple_identifier(text: &str) -> bool { + let mut chars = text.chars(); + let Some(first) = chars.next() else { + return false; + }; + (first == '_' || first.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn comparison_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + if profile.comparison_node_kinds().contains(&node.kind()) { + let operator = direct_operator_from_source(node, source); + return profile.comparison_operators().contains(&operator.as_str()); + } + if !profile.call_node_kinds().contains(&node.kind()) { + return false; + } + node.child_by_field_name("method") + .map(|method| node_text(method, source) == "nil?") + .unwrap_or(false) +} + +fn record_decision_site( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let profile = language_profile(language); + if profile.generated_prelude(node, source) { + return; + } + + if profile.boolean_container(node) && boolean_and(profile, node, source) { + record_conjunction_decision(profile, node, source, file, context, out, seen); + return; + } + + if case_node(profile, node) || profile.hidden_case(node) { + let decision_node = profile.case_source_node(node); + if profile.predicate_less_case(decision_node) { + return; + } + let patterns = case_patterns(decision_node, source, profile); + if patterns.len() < 2 { + return; + } + push_decision_site( + out, + seen, + DecisionSite { + kind: "case_dispatch".to_string(), + members: patterns, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(decision_node), + span: span(decision_node), + predicate: profile.normalize_source_text(&decision_predicate( + profile, + decision_node, + source, + )), + enclosing_span: span(decision_node), + }, + ); + } +} + +fn record_branch_decision( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + if !branch_decision_node(profile, node, source) { + return; + } + if branch_decision_wrapper_for_real_branch(profile, node, source) { + return; + } + let Some(condition) = branch_condition_node(profile, node) else { + return; + }; + let mut refs = BTreeSet::new(); + collect_branch_state_refs(profile, condition, source, context, &mut refs); + if refs.is_empty() { + return; + } + out.push(BranchDecision { + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: line(node), + span: span(node), + predicate: profile.normalize_source_text(node_text(condition, source)), + state_refs: refs.into_iter().collect(), + }); +} + +fn record_branch_arm( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + if profile.generated_prelude(node, source) + || branch_decision_wrapper_for_real_branch(profile, node, source) + { + return; + } + if if_arm_node(profile, node, source) { + record_if_arms(profile, node, source, file, context, out); + return; + } + if case_node(profile, node) || profile.hidden_case(node) { + let decision_node = profile.case_source_node(node); + record_case_arms(profile, decision_node, source, file, context, out); + } +} + +fn if_arm_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + if case_node(profile, node) || profile.hidden_case(node) { + return false; + } + profile.branch_node_kinds().contains(&node.kind()) + || profile.control_context(node, source).as_deref() == Some("conditional") +} + +fn record_if_arms( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, +) { + let predicate = profile.normalize_source_text(&decision_predicate(profile, node, source)); + let decision_span = span(node); + let decision_line = line(node); + let named = named_children(node); + let consequence = node + .child_by_field_name("consequence") + .or_else(|| node.child_by_field_name("body")) + .or_else(|| named.get(1).copied()); + let alternative = + node.child_by_field_name("alternative") + .or_else(|| { + named.iter().copied().find(|child| { + child.kind().contains("else") || child.kind().contains("alternative") + }) + }) + .or_else(|| { + named + .get(2) + .copied() + .filter(|candidate| consequence != Some(*candidate)) + }); + + for (arm, member) in [(consequence, "then"), (alternative, "else")] { + let Some(arm) = arm else { + continue; + }; + out.push(BranchArm { + file: file.to_string_lossy().to_string(), + function: context.current_function(), + kind: "if".to_string(), + line: line(arm), + span: span(arm), + decision_line, + decision_span, + predicate: predicate.clone(), + member: member.to_string(), + body: profile.normalize_source_text(node_text(arm, source)), + }); + } +} + +fn record_case_arms( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, +) { + let predicate = profile.normalize_source_text(&decision_predicate(profile, node, source)); + let decision_span = span(node); + let decision_line = line(node); + for arm in case_arms(profile, node) { + let pattern = case_arm_patterns(arm, source, profile) + .into_iter() + .find(|pattern| !default_case_pattern(profile, pattern)) + .unwrap_or_default(); + if pattern.is_empty() { + continue; + } + out.push(BranchArm { + file: file.to_string_lossy().to_string(), + function: context.current_function(), + kind: "case".to_string(), + line: line(arm), + span: span(arm), + decision_line, + decision_span, + predicate: predicate.clone(), + member: pattern.clone(), + body: case_arm_body(profile, arm, source, &pattern), + }); + } +} + +fn case_arm_body( + profile: &dyn LanguageProfile, + arm: Node<'_>, + source: &str, + pattern: &str, +) -> String { + let body = named_children(arm) + .into_iter() + .filter(|child| { + !profile.case_pattern_node_kinds().contains(&child.kind()) + && !matches!(child.kind(), "then" | "else") + }) + .last() + .map(|child| node_text(child, source)) + .unwrap_or_else(|| node_text(arm, source)); + let mut text = profile.normalize_source_text(body); + for prefix in [format!("when {pattern} then "), format!("when {pattern} ")] { + if let Some(stripped) = text.strip_prefix(&prefix) { + text = stripped.to_string(); + break; + } + } + text +} + +fn branch_decision_node(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + profile.branch_node_kinds().contains(&node.kind()) + || profile.hidden_case(node) + || profile.control_context(node, source).as_deref() == Some("conditional") +} + +fn branch_decision_wrapper_for_real_branch( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, +) -> bool { + if profile.branch_node_kinds().contains(&node.kind()) || profile.hidden_case(node) { + return false; + } + if profile.control_context(node, source).as_deref() != Some("conditional") { + return false; + } + first_named_child(node) + .map(|child| branch_decision_node(profile, child, source)) + .unwrap_or(false) +} + +fn branch_condition_node<'tree>( + _profile: &dyn LanguageProfile, + node: Node<'tree>, +) -> Option> { + node.child_by_field_name("condition") + .or_else(|| node.child_by_field_name("value")) + .or_else(|| node.child_by_field_name("subject")) + .or_else(|| first_named_child(node)) +} + +fn collect_branch_state_refs( + profile: &dyn LanguageProfile, + node: Node<'_>, + source: &str, + context: &ContextState, + out: &mut BTreeSet, +) { + if let Some(target) = profile.state_read_target(node, source) { + let field = if profile.language() == Language::Ruby { + target.field.clone() + } else { + normalized_state_ref_field(&target.field) + }; + let receiver = target.receiver.trim_start_matches('$'); + if namespace_receiver(receiver) || constant_like_state_ref(receiver, &field) { + // Constants and type namespaces are not mutable object state. + } else if branch_local_ref(node, source, receiver, &field, context) { + // Function-local bindings are not object state, even when a + // language permits bare predicate-style method calls. + } else if profile.language() == Language::Ruby + && ruby_immutable_param_state_read(receiver, &field, context) + { + // Sorbet T::Struct readers on typed params are immutable data reads, + // not mutable object state. + } else if receiver.is_empty() || receiver == "self" { + out.insert(field); + } else { + out.insert(format!("{receiver}.{field}")); + } + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_branch_state_refs(profile, child, source, context, out); + } +} + +fn dedup_semantic_effect_sites(sites: &mut Vec) { + let mut seen = HashSet::new(); + sites.retain(|site| { + seen.insert(( + site.kind.clone(), + site.detail.clone(), + site.file.clone(), + site.function.clone(), + site.line, + site.span, + )) + }); +} + +fn branch_local_ref( + node: Node<'_>, + source: &str, + receiver: &str, + field: &str, + context: &ContextState, +) -> bool { + (receiver.is_empty() || matches!(receiver, "self" | "this")) + && context.locals.contains(field) + && normalize_text(node_text(node, source)) == field +} + +fn ruby_immutable_param_state_read(receiver: &str, field: &str, context: &ContextState) -> bool { + if receiver.is_empty() || matches!(receiver, "self" | "this") { + return false; + } + let Some(param) = receiver.split('.').next() else { + return false; + }; + let Some(type_name) = context.param_types.get(param) else { + return false; + }; + let field = field.trim_end_matches('?'); + ruby_immutable_reader(type_name, field, &context.immutable_readers) +} + +fn ruby_immutable_reader( + type_name: &str, + field: &str, + readers: &BTreeMap>, +) -> bool { + let short = type_name.split("::").last().unwrap_or(type_name); + readers + .get(type_name) + .or_else(|| readers.get(short)) + .map(|fields| fields.contains(field)) + .unwrap_or(false) +} + +fn ruby_immutable_struct_readers(source: &str) -> BTreeMap> { + let mut readers: BTreeMap> = BTreeMap::new(); + let mut class_stack = Vec::new(); + for line in source.lines() { + let stripped = line.trim(); + if let Some(name) = stripped + .strip_prefix("class ") + .and_then(|rest| rest.split_once("< T::Struct").map(|(name, _)| name.trim())) + .filter(|name| ruby_constant_path(name)) + { + class_stack.push(name.to_string()); + continue; + } + if let Some(owner) = class_stack.last() { + if let Some(field) = stripped + .strip_prefix("const :") + .and_then(|rest| { + rest.split(|ch: char| !ch.is_ascii_alphanumeric() && ch != '_') + .next() + }) + .filter(|field| !field.is_empty()) + { + readers + .entry(owner.clone()) + .or_default() + .insert(field.to_string()); + continue; + } + } + if !class_stack.is_empty() && stripped.trim_end_matches(';') == "end" { + class_stack.pop(); + } + } + readers +} + +fn ruby_sig_param_types(source: &str, function_line: usize) -> BTreeMap { + let lines = source.lines().collect::>(); + let mut sig_lines = Vec::new(); + let mut cursor = function_line.saturating_sub(2); + while let Some(line) = lines.get(cursor) { + let stripped = line.trim(); + if stripped.is_empty() { + if sig_lines.is_empty() { + break; + } + } else if sig_lines.is_empty() && !stripped.starts_with("sig") { + break; + } + sig_lines.push(*line); + if stripped.starts_with("sig") { + break; + } + if cursor == 0 || sig_lines.len() >= 8 { + break; + } + cursor -= 1; + } + sig_lines.reverse(); + let sig = sig_lines.join("\n"); + let Some(params_start) = sig.find("params(").map(|index| index + "params(".len()) else { + return BTreeMap::new(); + }; + let rest = &sig[params_start..]; + let Some(params_end) = rest.find(')') else { + return BTreeMap::new(); + }; + rest[..params_end] + .split(',') + .filter_map(|part| { + let (name, type_name) = part.split_once(':')?; + let name = name.trim(); + let type_name = type_name.trim(); + (ruby_identifier(name) && ruby_constant_path(type_name)) + .then(|| (name.to_string(), type_name.to_string())) + }) + .collect() +} + +fn ruby_identifier(value: &str) -> bool { + let mut chars = value.chars(); + matches!(chars.next(), Some(ch) if ch == '_' || ch.is_ascii_alphabetic()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) +} + +fn ruby_constant_path(value: &str) -> bool { + value.split("::").all(|part| { + let mut chars = part.chars(); + matches!(chars.next(), Some(ch) if ch.is_ascii_uppercase()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) + }) +} + +fn declared_state_index(declarations: &[StateDeclaration]) -> BTreeMap> { + let mut index: BTreeMap> = BTreeMap::new(); + for declaration in declarations { + index + .entry(declaration.owner.clone()) + .or_default() + .insert(declaration.field.clone()); + } + index +} + +fn function_param_index( + function_defs: &[FunctionDef], +) -> BTreeMap<(String, String), BTreeSet> { + let mut index: BTreeMap<(String, String), BTreeSet> = BTreeMap::new(); + for function in function_defs { + index + .entry((function.owner.clone(), function.name.clone())) + .or_default() + .extend(function.params.iter().cloned()); + } + index +} + +fn local_declaration_index( + root: Node<'_>, + source: &str, + language: Language, + context: &ContextState, +) -> BTreeMap<(String, String), BTreeSet> { + let mut index = BTreeMap::new(); + local_declaration_index_for_node(root, source, language, context, &mut index); + index +} + +fn local_declaration_index_for_node( + node: Node<'_>, + source: &str, + language: Language, + context: &ContextState, + out: &mut BTreeMap<(String, String), BTreeSet>, +) { + let next_context = push_control_context( + node, + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), + source, + language, + ); + let profile = language_profile(language); + if local_variable_declarator(profile, node) { + let owner = next_context.current_owner(); + let function = next_context.current_function(); + if function != "(top-level)" { + if let Some(name) = local_name_node(profile, node, source) { + out.entry((owner, function)) + .or_default() + .insert(node_text(name, source).to_string()); + } + } + } + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + local_declaration_index_for_node(child, source, language, &next_context, out); + } +} + +fn local_variable_declarator(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile + .local_variable_declarator_node_kinds() + .contains(&node.kind()) + && !inside_kind(node, profile.field_declaration_node_kinds()) +} + +fn local_name_node<'tree>( + profile: &dyn LanguageProfile, + node: Node<'tree>, + source: &str, +) -> Option> { + node.child_by_field_name("name") + .or_else(|| profile.declarator_name_node(node, source)) + .or_else(|| { + named_children(node).into_iter().find(|child| { + profile.identifier_node_kinds().contains(&child.kind()) + || profile + .field_identifier_node_kinds() + .contains(&child.kind()) + }) + }) +} + +fn implicit_state_identifier(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile.identifier_node_kinds().contains(&node.kind()) + || profile.field_identifier_node_kinds().contains(&node.kind()) +} + +fn identifier_declaration_site(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + if node + .parent() + .map(|parent| { + profile + .declaration_site_parent_node_kinds() + .contains(&parent.kind()) + }) + .unwrap_or(false) + { + return true; + } + inside_kind(node, profile.field_declaration_node_kinds()) +} + +fn member_message_identifier(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if !profile.field_like_node_kinds().contains(&parent.kind()) { + return false; + } + let field = parent + .child_by_field_name("field") + .or_else(|| parent.child_by_field_name("property")) + .or_else(|| parent.child_by_field_name("name")) + .or_else(|| named_children(parent).into_iter().last()); + field.map(|field| same_node(field, node)).unwrap_or(false) +} + +fn implicit_assignment_lhs(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + if let Some(parent) = node.parent() { + if profile.assignment_node_kinds().contains(&parent.kind()) { + let lhs = parent + .child_by_field_name("left") + .or_else(|| first_named_child(parent)); + return lhs.map(|lhs| same_node(lhs, node)).unwrap_or(false); + } + } + profile.assignment_lhs_node(node) +} + +fn normalized_state_ref_field(field: &str) -> String { + field + .trim_start_matches('@') + .trim_start_matches('$') + .to_string() +} + +fn constant_like_state_ref(receiver: &str, field: &str) -> bool { + constant_namespace_receiver(receiver) || (receiver.is_empty() && starts_uppercase(field)) +} + +fn starts_uppercase(value: &str) -> bool { + matches!(value.chars().next(), Some(ch) if ch.is_ascii_uppercase()) +} + +fn constant_namespace_receiver(value: &str) -> bool { + let text = value.trim().trim_start_matches("::"); + if text.is_empty() || !starts_uppercase(text) { + return false; + } + text.split("::").all(|part| { + !part.is_empty() + && part + .chars() + .all(|ch| ch == '_' || ch == '.' || ch.is_ascii_alphanumeric()) + && part + .split('.') + .all(|segment| !segment.is_empty() && starts_uppercase(segment)) + }) +} + +fn record_conjunction_decision( + profile: &dyn LanguageProfile, + mut node: Node<'_>, + source: &str, + file: &Path, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let from_wrapper = profile.parenthesized_wrapper(node); + if from_wrapper + && node + .parent() + .map(|parent| profile.boolean_container(parent) && boolean_and(profile, parent, source)) + .unwrap_or(false) + { + return; + } + + if from_wrapper { + if let Some(child) = first_named_child(node) { + node = child; + } + } + + if !from_wrapper + && node + .parent() + .map(|parent| { + profile.boolean_container(parent) + && boolean_and(profile, parent, source) + && span(parent) != span(node) + }) + .unwrap_or(false) + { + return; + } + + let mut members = flatten_boolean_and(profile, node, source) + .into_iter() + .map(|child| profile.normalize_source_text(&decision_member_text(child, source))) + .collect::>(); + members.sort(); + members.dedup(); + if members.len() < 2 { + return; + } + + push_decision_site( + out, + seen, + DecisionSite { + kind: "conjunction".to_string(), + members, + file: file.to_string_lossy().to_string(), + function: context.current_function(), + line: conjunction_span(node)[0], + span: conjunction_span(node), + predicate: profile.normalize_source_text(node_text(node, source)), + enclosing_span: decision_enclosing_span(profile, node), + }, + ); +} + +fn push_decision_site(out: &mut Vec, seen: &mut HashSet, site: DecisionSite) { + let key = format!( + "{}\0{}\0{}\0{}\0{:?}\0{}", + site.file, + site.function, + site.kind, + site.line, + site.span, + site.members.join("\0") + ); + if seen.insert(key) { + out.push(site); + } +} + +fn push_owner_context( + node: Node<'_>, + source: &str, + context: &ContextState, + language: Language, +) -> ContextState { + let profile = language_profile(language); + let Some(owner) = profile + .owner_name_from_declaration(node, source) + .or_else(|| profile.receiver_convention_owner_name(node, source)) + else { + return context.clone(); + }; + let parent_owner = context.owner.clone(); + let full_owner = if let Some(parent) = parent_owner { + if parent != owner && !owner.contains("::") { + format!("{parent}::{owner}") + } else { + owner + } + } else { + owner + }; + let mut next = context.clone(); + next.owner = Some(full_owner); + next +} + +fn push_function_context( + node: Node<'_>, + mut context: ContextState, + source: &str, + language: Language, +) -> ContextState { + let profile = language_profile(language); + let Some(function) = profile.function_name(node, source) else { + return context; + }; + let owner = context.current_owner(); + context.function = Some(function); + context.function_line = Some(line(node)); + context.owner = Some(owner); + context.receiver = profile.function_receiver_name(node, source); + context.locals = profile.function_params(node, source).into_iter().collect(); + context.param_types = if language == Language::Ruby { + ruby_sig_param_types(source, line(node)) + } else { + BTreeMap::new() + }; + if let Some(receiver) = &context.receiver { + context.locals.insert(receiver.clone()); + } + context +} + +fn push_control_context( + node: Node<'_>, + mut context: ContextState, + source: &str, + language: Language, +) -> ContextState { + if let Some(control) = language_profile(language).control_context(node, source) { + context.controls.push(control); + } + context +} + +fn record_call_site( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let profile = language_profile(language); + let Some(mut target) = profile.call_target(node, source) else { + return; + }; + normalize_call_receiver(&mut target, context); + if profile.noise_call(&target) { + return; + } + + let source_node = target.source_node.unwrap_or(node); + if target.receiver == "self" + && target.message == context.current_function() + && context.function_line == Some(line(source_node)) + { + return; + } + let file_name = file.to_string_lossy().to_string(); + let owner = context.current_owner(); + let function = context.current_function(); + let mut call_span = target.span.unwrap_or_else(|| span(source_node)); + if target.message.ends_with('?') && call_span[0] == call_span[2] { + if let Some(line_text) = source.lines().nth(call_span[0].saturating_sub(1)) { + if line_text.as_bytes().get(call_span[1]).copied() == Some(b'!') { + call_span[1] += 1; + } + } + } + let key = format!( + "{}\0{}\0{}\0{:?}\0{}\0{}", + file_name, owner, function, call_span, target.receiver, target.message + ); + if !seen.insert(key) { + return; + } + + out.push(CallSite { + receiver: target.receiver, + message: target.message, + file: file_name, + function, + owner, + line: line(source_node), + span: call_span, + conditional: context.conditional_context(), + arguments: target.arguments, + control: Some(context.current_control()), + safe_navigation: target.safe_navigation, + block: target.block || profile.call_has_block(source_node), + }); +} + +fn record_state_declaration( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, +) { + let profile = language_profile(language); + let Some((field, r#type)) = profile.state_declaration(node, source) else { + return; + }; + let declaration = StateDeclaration { + field, + owner: context.current_owner(), + r#type, + file: file.to_string_lossy().to_string(), + line: line(node), + span: span(node), + }; + let key = ( + declaration.file.clone(), + declaration.owner.clone(), + declaration.field.clone(), + ); + if out.iter().any(|existing| { + ( + existing.file.clone(), + existing.owner.clone(), + existing.field.clone(), + ) == key + }) { + return; + } + out.push(declaration); +} + +fn record_state_read( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let profile = language_profile(language); + if profile.assignment_lhs_node(node) { + return; + } + + let Some(target) = profile.state_read_target(node, source) else { + return; + }; + let target = normalize_target_receiver(target, context); + if namespace_receiver(&target.receiver) + || constant_like_state_ref(&target.receiver, &target.field) + { + return; + } + + let file_name = file.to_string_lossy().to_string(); + let owner = context.current_owner(); + let function = context.current_function(); + let line = line(node); + let key = format!( + "{}\0{}\0{}\0{:?}\0{}\0{}", + file_name, + owner, + function, + span(node), + target.receiver, + target.field + ); + if !seen.insert(key) { + return; + } + + out.push(StateRead { + field: target.field, + receiver: target.receiver, + file: file_name, + function, + line, + span: span(node), + owner, + }); +} + +fn collect_implicit_state_accesses( + root: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + function_defs: &[FunctionDef], + state_declarations: &[StateDeclaration], + state_reads: &mut Vec, + state_writes: &mut Vec, + seen_reads: &mut HashSet, + seen_writes: &mut HashSet, +) { + let profile = language_profile(language); + if !profile.implicit_state_accesses() { + return; + } + let declared = declared_state_index(state_declarations); + if declared.is_empty() { + return; + } + let locals = local_declaration_index(root, source, language, context); + let params = function_param_index(function_defs); + collect_implicit_state_accesses_for_node( + root, + source, + file, + language, + context, + &declared, + &locals, + ¶ms, + state_reads, + state_writes, + seen_reads, + seen_writes, + ); +} + +fn collect_implicit_state_accesses_for_node( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + declared: &BTreeMap>, + locals: &BTreeMap<(String, String), BTreeSet>, + params: &BTreeMap<(String, String), BTreeSet>, + state_reads: &mut Vec, + state_writes: &mut Vec, + seen_reads: &mut HashSet, + seen_writes: &mut HashSet, +) { + let next_context = push_control_context( + node, + push_function_context( + node, + push_owner_context(node, source, context, language), + source, + language, + ), + source, + language, + ); + record_implicit_state_access( + node, + source, + file, + language, + &next_context, + declared, + locals, + params, + state_reads, + state_writes, + seen_reads, + seen_writes, + ); + + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_implicit_state_accesses_for_node( + child, + source, + file, + language, + &next_context, + declared, + locals, + params, + state_reads, + state_writes, + seen_reads, + seen_writes, + ); + } +} + +fn record_implicit_state_access( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + declared: &BTreeMap>, + locals: &BTreeMap<(String, String), BTreeSet>, + params: &BTreeMap<(String, String), BTreeSet>, + state_reads: &mut Vec, + state_writes: &mut Vec, + seen_reads: &mut HashSet, + seen_writes: &mut HashSet, +) { + let profile = language_profile(language); + if !implicit_state_identifier(profile, node) { + return; + } + let owner = context.current_owner(); + let function = context.current_function(); + if function == "(top-level)" { + return; + } + let field = node_text(node, source).to_string(); + if !declared + .get(&owner) + .map(|fields| fields.contains(&field)) + .unwrap_or(false) + { + return; + } + let scope = (owner.clone(), function.clone()); + if params + .get(&scope) + .map(|fields| fields.contains(&field)) + .unwrap_or(false) + || locals + .get(&scope) + .map(|fields| fields.contains(&field)) + .unwrap_or(false) + || identifier_declaration_site(profile, node) + || member_message_identifier(profile, node) + { + return; + } + + let file_name = file.to_string_lossy().to_string(); + if implicit_assignment_lhs(profile, node) { + let key = format!( + "{}\0{}\0{}\0{}\0self\0{}", + file_name, + owner, + function, + line(node), + field + ); + if seen_writes.insert(key) { + state_writes.push(StateWrite { + field, + receiver: "self".to_string(), + file: file_name, + function, + line: line(node), + span: span(node), + owner, + }); + } + } else { + let key = format!( + "{}\0{}\0{}\0{:?}\0self\0{}", + file_name, + owner, + function, + span(node), + field + ); + if seen_reads.insert(key) { + state_reads.push(StateRead { + field, + receiver: "self".to_string(), + file: file_name, + function, + line: line(node), + span: span(node), + owner, + }); + } + } +} + +fn record_state_write( + node: Node<'_>, + source: &str, + file: &Path, + language: Language, + context: &ContextState, + out: &mut Vec, + seen: &mut HashSet, +) { + let profile = language_profile(language); + if profile.skip_state_write_node(node) { + return; + } + + let Some(assignment) = profile.assignment_target(node) else { + return; + }; + let Some(target) = profile.state_target(assignment.lhs, source) else { + return; + }; + let target = normalize_target_receiver(target, context); + if profile.skip_state_write_target(&target) { + return; + } + + let file_name = file.to_string_lossy().to_string(); + let owner = context.current_owner(); + let function = context.current_function(); + let source_node = profile.state_write_source_node(node, &assignment); + let line = line(source_node); + let key = format!( + "{}\0{}\0{}\0{}\0{}\0{}", + file_name, owner, function, line, target.receiver, target.field + ); + if !seen.insert(key) { + return; + } + + out.push(StateWrite { + field: target.field, + receiver: target.receiver, + file: file_name, + function, + line, + span: span(source_node), + owner, + }); +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct AssignmentTarget<'tree> { + pub(crate) lhs: Node<'tree>, + pub(crate) source: Node<'tree>, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct Target { + pub(crate) receiver: String, + pub(crate) field: String, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct CallTarget<'tree> { + pub(crate) receiver: String, + pub(crate) message: String, + pub(crate) arguments: Vec, + pub(crate) source_node: Option>, + pub(crate) span: Option<[usize; 4]>, + pub(crate) safe_navigation: bool, + pub(crate) block: bool, +} + +impl<'tree> CallTarget<'tree> { + pub(crate) fn new(receiver: String, message: String, arguments: Vec) -> Self { + Self { + receiver, + message, + arguments, + source_node: None, + span: None, + safe_navigation: false, + block: false, + } + } +} + +pub(crate) fn normalize_type_owner(text: &str) -> String { + let value = text.trim(); + let value = value.trim_start_matches(['&', '*']); + let value = value + .replace("const", "") + .replace("mut", "") + .replace("var", ""); + let value = value.trim(); + let value = value.split(['(', '{', '<', ' ']).next().unwrap_or(""); + value.split('.').last().unwrap_or("").to_string() +} + +fn file_owner(file: &Path) -> String { + file.file_stem() + .and_then(|stem| stem.to_str()) + .filter(|stem| !stem.is_empty()) + .unwrap_or("(file)") + .to_string() +} + +fn namespace_receiver(text: &str) -> bool { + let receiver = text.trim(); + if receiver.starts_with('@') { + return true; + } + if matches!(receiver, "std" | "builtin" | "build_options") + || receiver.starts_with("std.") + || receiver.starts_with("builtin.") + || receiver.starts_with("build_options.") + { + return true; + } + + if !starts_uppercase(receiver) { + return false; + } + !receiver.contains('(') || receiver.contains('.') || receiver.contains("::") +} + +pub(crate) fn first_named_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { + named_children(node) + .into_iter() + .find(|child| kinds.iter().any(|kind| *kind == child.kind())) + .map(|child| node_text(child, source).to_string()) +} + +pub(crate) fn first_named_child(node: Node<'_>) -> Option> { + let mut cursor = node.walk(); + let child = node.named_children(&mut cursor).next(); + child +} + +pub(crate) fn first_named_child_except<'tree>( + node: Node<'tree>, + excluded_kind: &str, +) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() != excluded_kind) +} + +pub(crate) fn first_named_child_with_kind<'tree>( + node: Node<'tree>, + kind: &str, +) -> Option> { + named_children(node) + .into_iter() + .find(|child| child.kind() == kind) +} + +pub(crate) fn named_children(node: Node<'_>) -> Vec> { + let mut cursor = node.walk(); + node.named_children(&mut cursor).collect() +} + +fn inside_kind(node: Node<'_>, kinds: &[&str]) -> bool { + let mut parent = node.parent(); + let mut seen = HashSet::new(); + while let Some(current) = parent { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + break; + } + if kinds.contains(¤t.kind()) { + return true; + } + parent = current.parent(); + } + false +} + +fn same_node(left: Node<'_>, right: Node<'_>) -> bool { + left.kind() == right.kind() && span(left) == span(right) +} + +pub(crate) fn first_child_kind(node: Node<'_>) -> Option<&str> { + let mut cursor = node.walk(); + let kind = node.children(&mut cursor).next().map(|child| child.kind()); + kind +} + +pub(crate) fn previous_sibling_raw_text(node: Node<'_>) -> Option { + node.prev_sibling() + .map(|sibling| sibling.kind().to_string()) +} + +pub(crate) fn next_sibling_raw_text(node: Node<'_>) -> Option { + node.next_sibling() + .map(|sibling| sibling.kind().to_string()) +} + +pub(crate) fn strip_assignment_suffix(text: &str) -> String { + text.strip_suffix('=').unwrap_or(text).to_string() +} + +fn case_node(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile.case_node_kinds().contains(&node.kind()) +} + +fn case_patterns(node: Node<'_>, source: &str, profile: &dyn LanguageProfile) -> Vec { + let mut out = case_arms(profile, node) + .into_iter() + .flat_map(|arm| case_arm_patterns(arm, source, profile)) + .filter(|pattern| !default_case_pattern(profile, pattern)) + .collect::>(); + out.sort(); + out.dedup(); + out +} + +fn case_arms<'tree>(profile: &dyn LanguageProfile, node: Node<'tree>) -> Vec> { + let mut arms = Vec::new(); + let mut stack = named_children(node); + while let Some(child) = stack.pop() { + if profile.case_arm_node_kinds().contains(&child.kind()) { + arms.push(child); + } else if !profile + .case_container_stop_node_kinds() + .contains(&child.kind()) + { + stack.extend(named_children(child)); + } + } + arms.reverse(); + arms +} + +fn case_arm_patterns(child: Node<'_>, source: &str, profile: &dyn LanguageProfile) -> Vec { + if !profile.case_arm_node_kinds().contains(&child.kind()) { + return Vec::new(); + } + if node_text(child, source).trim_start().starts_with("else") { + return Vec::new(); + } + + let patterns = named_children(child) + .into_iter() + .filter(|node| profile.case_pattern_node_kinds().contains(&node.kind())) + .collect::>(); + if !patterns.is_empty() { + return profile.case_pattern_texts(&patterns, source); + } + + let value = child + .child_by_field_name("value") + .or_else(|| child.child_by_field_name("pattern")) + .or_else(|| { + named_children(child).into_iter().find(|candidate| { + profile + .case_pattern_node_kinds() + .contains(&candidate.kind()) + }) + }) + .or_else(|| first_named_child(child)); + value + .filter(|node| !node.kind().contains("statement") && !node.kind().contains("block")) + .map(|node| vec![profile.normalize_source_text(node_text(node, source))]) + .unwrap_or_default() +} + +fn default_case_pattern(profile: &dyn LanguageProfile, text: &str) -> bool { + text.is_empty() || profile.default_case_patterns().contains(&text) +} + +fn dispatch_members_inside( + call_sites: &[CallSite], + predicate: &str, + function: &str, + outer: [usize; 4], +) -> Vec { + let mut members = dispatch_member_calls(call_sites, predicate, function) + .into_iter() + .filter(|call| dispatch_inside_span(call.span, outer)) + .map(dispatch_member_name) + .collect::>(); + members.sort(); + members.dedup(); + members +} + +fn dispatch_members_outside( + call_sites: &[CallSite], + predicate: &str, + function: &str, + decision_span: [usize; 4], +) -> Vec { + let mut members = dispatch_member_calls(call_sites, predicate, function) + .into_iter() + .filter(|call| !dispatch_inside_span(call.span, decision_span)) + .map(dispatch_member_name) + .collect::>(); + members.sort(); + members.dedup(); + members +} + +fn dispatch_members_outside_any( + call_sites: &[CallSite], + predicate: &str, + function: &str, + decision_spans: &[[usize; 4]], +) -> Vec { + let mut members = dispatch_member_calls(call_sites, predicate, function) + .into_iter() + .filter(|call| { + !decision_spans + .iter() + .any(|span| dispatch_inside_span(call.span, *span)) + }) + .map(dispatch_member_name) + .collect::>(); + members.sort(); + members.dedup(); + members +} + +fn dispatch_member_calls<'a>( + call_sites: &'a [CallSite], + predicate: &str, + function: &str, +) -> Vec<&'a CallSite> { + call_sites + .iter() + .filter(|call| { + call.function == function && call.receiver == predicate && !call.message.is_empty() + }) + .collect() +} + +fn dispatch_member_name(call: &CallSite) -> String { + strip_assignment_suffix(&call.message) +} + +fn dispatch_constant_patterns(member: &str) -> Vec { + member + .split(',') + .map(|pattern| { + pattern + .trim() + .strip_prefix("case ") + .unwrap_or(pattern.trim()) + }) + .filter(|pattern| dispatch_constant_pattern(pattern)) + .map(ToString::to_string) + .collect() +} + +fn dispatch_constant_pattern(pattern: &str) -> bool { + if pattern.is_empty() { + return false; + } + pattern.replace("::", ".").split(['.', '_']).all(|part| { + let mut chars = part.chars(); + matches!(chars.next(), Some(first) if first.is_ascii_uppercase()) + && chars.all(|ch| ch == '_' || ch.is_ascii_alphanumeric()) + }) +} + +fn dispatch_inside_span(inner: [usize; 4], outer: [usize; 4]) -> bool { + let starts_after_or_at = inner[0] > outer[0] || (inner[0] == outer[0] && inner[1] >= outer[1]); + let ends_before_or_at = inner[2] < outer[2] || (inner[2] == outer[2] && inner[3] <= outer[3]); + starts_after_or_at && ends_before_or_at +} + +fn union_span(left: [usize; 4], right: [usize; 4]) -> [usize; 4] { + let starts_before_or_at = left[0] < right[0] || (left[0] == right[0] && left[1] <= right[1]); + let ends_after_or_at = left[2] > right[2] || (left[2] == right[2] && left[3] >= right[3]); + [ + if starts_before_or_at { + left[0] + } else { + right[0] + }, + if starts_before_or_at { + left[1] + } else { + right[1] + }, + if ends_after_or_at { left[2] } else { right[2] }, + if ends_after_or_at { left[3] } else { right[3] }, + ] +} + +fn decision_predicate(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> String { + let target = profile.decision_subject(node); + strip_enclosing_parentheses(&normalize_text( + target + .map(|child| node_text(child, source)) + .unwrap_or_else(|| node_text(node, source)), + )) +} + +fn boolean_and(profile: &dyn LanguageProfile, node: Node<'_>, source: &str) -> bool { + if profile.parenthesized_wrapper(node) { + return first_named_child(node) + .map(|child| boolean_and(profile, child, source)) + .unwrap_or(false); + } + profile + .boolean_and_operators() + .contains(&direct_operator_from_source(node, source).as_str()) +} + +fn flatten_boolean_and<'tree>( + profile: &dyn LanguageProfile, + node: Node<'tree>, + source: &str, +) -> Vec> { + if !(profile.boolean_container(node) && boolean_and(profile, node, source)) { + return vec![node]; + } + if profile.parenthesized_wrapper(node) { + return first_named_child(node) + .map(|child| flatten_boolean_and(profile, child, source)) + .unwrap_or_else(|| vec![node]); + } + named_children(node) + .into_iter() + .flat_map(|child| flatten_boolean_and(profile, child, source)) + .collect() +} + +fn conjunction_span(node: Node<'_>) -> [usize; 4] { + let mut base = span(node); + if node.kind() == "pattern" && node.start_position().column > 0 { + base[1] += 1; + } + base +} + +fn decision_enclosing_span(profile: &dyn LanguageProfile, node: Node<'_>) -> [usize; 4] { + let mut parent = node.parent(); + let mut seen = HashSet::new(); + while let Some(current) = parent { + let key = format!("{:?}\0{}", span(current), current.kind()); + if !seen.insert(key) { + break; + } + if branch_like_node(profile, current) { + return span(current); + } + parent = current.parent(); + } + span(node) +} + +fn branch_like_node(profile: &dyn LanguageProfile, node: Node<'_>) -> bool { + profile.branch_node_kinds().contains(&node.kind()) + || profile.case_node_kinds().contains(&node.kind()) + || matches!( + node.kind(), + "if" | "unless" + | "if_statement" + | "if_expression" + | "while" + | "while_statement" + | "for_statement" + | "foreach_statement" + | "for_expression" + ) +} + +fn decision_member_text(node: Node<'_>, source: &str) -> String { + normalize_text(&strip_enclosing_parentheses(node_text(node, source))) +} + +fn strip_enclosing_parentheses(text: &str) -> String { + let mut value = text.trim().to_string(); + loop { + if !(value.starts_with('(') && value.ends_with(')')) { + break value; + } + if !enclosing_parentheses_wrap_all(&value) { + break value; + } + value = value[1..value.len() - 1].trim().to_string(); + } +} + +fn enclosing_parentheses_wrap_all(text: &str) -> bool { + let mut depth = 0isize; + for (index, ch) in text.chars().enumerate() { + if ch == '(' { + depth += 1; + } else if ch == ')' { + depth -= 1; + } + if depth == 0 && index < text.len() - 1 { + return false; + } + if depth < 0 { + return false; + } + } + depth == 0 +} + +pub(crate) fn direct_operator(node: Node<'_>) -> String { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .find(|child| !child.is_named() && !matches!(child.kind(), "(" | ")")) + .map(|child| child.kind().to_string()) + .unwrap_or_default(); + result +} + +fn direct_operator_from_source(node: Node<'_>, source: &str) -> String { + let mut cursor = node.walk(); + let result = node + .children(&mut cursor) + .find(|child| !child.is_named() && !matches!(node_text(*child, source), "(" | ")")) + .map(|child| node_text(child, source).to_string()) + .unwrap_or_default(); + result +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + fn document(source: &str) -> Document { + let mut file = NamedTempFile::new().expect("tempfile"); + file.write_all(source.as_bytes()).expect("write source"); + parse_file(file.path().to_path_buf(), Language::Ruby).expect("document") + } + + #[test] + fn extracts_ruby_attribute_and_instance_writes() { + let doc = document( + r#" +class Box + def a(n) + n.storage = :heap + n.provenance = :heap + @field = 1 + @counter += 1 + n.count += 1 + e[:kind] = 1 + end + def self.b(x); x.value = 1; end +end +"#, + ); + + let summary: Vec<(&str, &str, &str, &str)> = doc + .state_writes + .iter() + .map(|write| { + ( + write.owner.as_str(), + write.function.as_str(), + write.receiver.as_str(), + write.field.as_str(), + ) + }) + .collect(); + + assert_eq!( + summary, + vec![ + ("Box", "a", "n", "storage"), + ("Box", "a", "n", "provenance"), + ("Box", "a", "self", "@field"), + ("Box", "a", "self", "@counter"), + ("Box", "self.b", "x", "value"), + ] + ); + } + + #[test] + fn extracts_nested_owner_names() { + let doc = document( + r#" +module Outer + class Inner + def set(node) + node.state = :ready + end + end +end +"#, + ); + + assert_eq!(doc.state_writes.len(), 1); + assert_eq!(doc.state_writes[0].owner, "Outer::Inner"); + assert_eq!(doc.state_writes[0].function, "set"); + assert_eq!(doc.state_writes[0].field, "state"); + } + + #[test] + fn language_profiles_own_parser_and_receiver_metadata() { + assert_eq!(language_profile(Language::Ruby).language(), Language::Ruby); + assert_eq!(language_profile(Language::C).language(), Language::C); + assert!(language_profile(Language::C).first_argument_receiver()); + assert!(!language_profile(Language::Lua).first_argument_receiver()); + + let mut parser = Parser::new(); + parser + .set_language(&language_profile(Language::Lua).grammar()) + .expect("lua grammar"); + } + + #[test] + fn lua_profile_owns_generated_prelude_filter() { + let source = "local _tl_compat; local ok, compat53 = pcall(require, \"compat53.module\")\nfunction real() end\n"; + let mut parser = Parser::new(); + parser + .set_language(&language_profile(Language::Lua).grammar()) + .expect("lua grammar"); + let tree = parser.parse(source, None).expect("parse lua"); + let node = tree.root_node().named_child(0).expect("first lua node"); + + assert!(language_profile(Language::Lua).generated_prelude(node, source)); + assert!(!language_profile(Language::Ruby).generated_prelude(node, source)); + } +} + +#[cfg(test)] +mod c_tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + #[test] + fn test_c_assignment() { + let mut file = NamedTempFile::new().unwrap(); + file.write_all(b"typedef struct Node { int storage; } Node; void node_set(Node* self) { self->storage = 1; }") + .unwrap(); + let doc = parse_file(file.path().to_path_buf(), Language::C).unwrap(); + assert_eq!(doc.function_defs[0].owner, "Node"); + assert_eq!(doc.state_writes[0].receiver, "self"); + assert_eq!(doc.state_writes[0].field, "storage"); + } +} + +fn normalize_target_receiver(mut target: Target, context: &ContextState) -> Target { + target.receiver = canonical_self_receiver(&target.receiver); + if let Some(current_receiver) = &context.receiver { + if &target.receiver == current_receiver { + target.receiver = "self".to_string(); + } else if target + .receiver + .starts_with(&format!("{}.", current_receiver)) + { + target.receiver = format!( + "self.{}", + target + .receiver + .strip_prefix(&format!("{}.", current_receiver)) + .unwrap() + ); + } + } + target +} + +fn normalize_call_receiver(target: &mut CallTarget<'_>, context: &ContextState) { + target.receiver = canonical_self_receiver(&target.receiver); + if let Some(current_receiver) = &context.receiver { + if &target.receiver == current_receiver { + target.receiver = "self".to_string(); + } else if target + .receiver + .starts_with(&format!("{}.", current_receiver)) + { + target.receiver = format!( + "self.{}", + target + .receiver + .strip_prefix(&format!("{}.", current_receiver)) + .unwrap() + ); + } + } +} + +fn canonical_self_receiver(receiver: &str) -> String { + match receiver { + "self" | "this" | "$this" => "self".to_string(), + _ => receiver.to_string(), + } +} diff --git a/gems/decomplex/rust/src/decomplex/syntax_oracle.rs b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs new file mode 100644 index 000000000..be07a0cbb --- /dev/null +++ b/gems/decomplex/rust/src/decomplex/syntax_oracle.rs @@ -0,0 +1,226 @@ +use crate::decomplex::syntax::{self, Document, Language}; +use anyhow::Result; +use serde_json::{json, Value}; +use std::path::PathBuf; + +pub const FORMAT: &str = "decomplex.syntax-facts.v1"; + +pub fn project_files(files: &[PathBuf], language: Language) -> Result { + let documents = syntax::parse_files(files, language)?; + Ok(json!({ + "format": FORMAT, + "documents": documents.iter().map(project_document).collect::>(), + })) +} + +pub fn project_document(document: &Document) -> Value { + json!({ + "file": logical_file(&document.file), + "language": document.language.as_str(), + "functions": sorted(document.function_defs.iter().map(|function| json!({ + "name": function.name, + "owner": function.owner, + "line": function.line, + "span": function.span, + "visibility": function.visibility, + "params": function.params, + })).collect()), + "owners": sorted(document.owner_defs.iter().map(|owner| json!({ + "name": owner.name, + "kind": owner.kind, + "line": owner.line, + "span": owner.span, + })).collect()), + "calls": sorted(document.call_sites.iter().map(|call| json!({ + "receiver": call.receiver, + "message": call.message, + "function": call.function, + "owner": call.owner, + "line": call.line, + "span": call.span, + "conditional": call.conditional, + "arguments": call.arguments, + "control": call.control, + "safe_navigation": call.safe_navigation, + "block": call.block, + })).collect()), + "state_declarations": sorted(document.state_declarations.iter().map(|declaration| json!({ + "field": declaration.field, + "owner": declaration.owner, + "type": declaration.r#type, + "line": declaration.line, + "span": declaration.span, + })).collect()), + "state_param_origins": Vec::::new(), + "state_reads": sorted(document.state_reads.iter().map(|read| json!({ + "field": read.field, + "receiver": read.receiver, + "function": read.function, + "owner": read.owner, + "line": read.line, + "span": read.span, + })).collect()), + "state_writes": sorted(document.state_writes.iter().map(|write| json!({ + "field": write.field, + "receiver": write.receiver, + "function": write.function, + "owner": write.owner, + "line": write.line, + "span": write.span, + })).collect()), + "decisions": sorted(document.decision_sites.iter().map(|decision| json!({ + "kind": decision.kind, + "members": decision.members, + "function": decision.function, + "line": decision.line, + "span": decision.span, + "predicate": decision.predicate, + "enclosing_span": decision.enclosing_span, + })).collect()), + "branch_decisions": sorted(document.branch_decisions.iter().map(|decision| json!({ + "function": decision.function, + "line": decision.line, + "span": decision.span, + "predicate": decision.predicate, + "state_refs": decision.state_refs, + })).collect()), + "branch_arms": sorted(document.branch_arms.iter().map(|arm| json!({ + "function": arm.function, + "kind": arm.kind, + "line": arm.line, + "span": arm.span, + "decision_line": arm.decision_line, + "decision_span": arm.decision_span, + "predicate": arm.predicate, + "member": arm.member, + "body": arm.body, + })).collect()), + "dispatch_sites": sorted(document.dispatch_sites.iter().map(|site| json!({ + "variant_set": site.variant_set, + "arm_members": site.arm_members, + "outside": site.outside, + "function": site.function, + "line": site.line, + "span": site.span, + })).collect()), + "semantic_effects": sorted(document.semantic_effect_sites.iter().map(|site| json!({ + "kind": site.kind, + "detail": site.detail, + "function": site.function, + "line": site.line, + "span": site.span, + })).collect()), + "predicate_bodies": sorted(document.predicate_aliases.iter().map(|predicate| json!({ + "name": predicate.name, + "owner": predicate.owner, + "body": predicate.body, + "line": predicate.line, + "span": predicate.span, + })).collect()), + "comparisons": sorted(document.comparison_uses.iter().map(|comparison| json!({ + "source": comparison.raw, + "raw": comparison.raw, + "canon_source": comparison.canon_source, + "operator": comparison_operator(&comparison.raw), + "function": comparison.function, + "line": comparison.line, + "span": comparison.span, + })).collect()), + "path_conditions": sorted(syntax::path_condition::fact_sites_for_document(document).iter().map(|site| json!({ + "guards": site.guards, + "action": site.action, + "function": site.function, + "line": site.line, + "span": site.span, + })).collect()), + "protocol_method_effects": sorted(document.protocol_method_effects.iter().map(|effect| json!({ + "owner": effect.owner, + "name": effect.name, + "line": effect.line, + "reads": effect.reads, + "writes": effect.writes, + })).collect()), + "protocol_call_paths": sorted(document.protocol_call_paths.iter().map(|path| json!({ + "owner": path.owner, + "name": path.name, + "line": path.line, + "calls": path.calls.iter().map(|call| json!({ + "mid": call.mid, + "line": call.line, + "span": call.span, + })).collect::>(), + })).collect()), + "clone_candidates": sorted(syntax::clone_candidates(document).iter().map(|candidate| json!({ + "line": candidate.line, + "span": candidate.span, + "method_name": candidate.method_name, + "node_name": candidate.node_name, + "mass": candidate.mass, + "fingerprint": candidate.fingerprint, + "child_fingerprints": candidate.child_fingerprints, + "child_masses": candidate.child_masses, + })).collect()), + "redundant_nil_guards": sorted(syntax::redundant_nil_guard::scan_documents(std::slice::from_ref(document)).iter().map(|finding| json!({ + "defn": finding.defn, + "line": finding.line, + "span": finding.span, + "local": finding.local, + "guard": finding.guard, + "proof": finding.proof, + })).collect()), + "local_methods": sorted(syntax::local_flow::scan_documents(std::slice::from_ref(document)).iter().map(|method| json!({ + "id": method.id, + "owner": method.owner, + "name": method.name, + "line": method.line, + "span": method.span, + "statements": method.statements.iter().map(|statement| json!({ + "index": statement.index, + "line": statement.line, + "end_line": statement.end_line, + "span": statement.span, + "source": statement.source, + "reads": statement.reads, + "writes": statement.writes, + "dependencies": statement.dependencies, + "co_uses": statement.co_uses, + })).collect::>(), + "boundaries": method.boundaries.iter().map(|boundary| json!({ + "before_index": boundary.before_index, + "after_index": boundary.after_index, + "line": boundary.line, + "kind": boundary.kind, + "text": boundary.text, + })).collect::>(), + "local_contract_assignments": syntax::local_flow::local_contract_assignments(method), + })).collect()), + "local_complexity_scores": sorted(document.local_complexity_scores.iter().map(|(id, score)| json!({ + "id": id, + "score": score.score, + "signals": score.signals, + })).collect()), + }) +} + +fn sorted(mut rows: Vec) -> Vec { + rows.sort_by_key(|row| row.to_string()); + rows +} + +fn logical_file(file: &str) -> String { + let path = file.replace('\\', "/"); + let marker = "gems/decomplex/examples/"; + if let Some(index) = path.find(marker) { + return path[index..].to_string(); + } + path +} + +fn comparison_operator(source: &str) -> &str { + for operator in ["!==", "===", "!=", "==", ">=", "<=", ">", "<"] { + if source.contains(operator) { + return operator; + } + } + "" +} diff --git a/gems/decomplex/rust/src/lib.rs b/gems/decomplex/rust/src/lib.rs new file mode 100644 index 000000000..4f8adb8cf --- /dev/null +++ b/gems/decomplex/rust/src/lib.rs @@ -0,0 +1 @@ +pub mod decomplex; diff --git a/gems/decomplex/rust/src/main.rs b/gems/decomplex/rust/src/main.rs new file mode 100644 index 000000000..0e201e141 --- /dev/null +++ b/gems/decomplex/rust/src/main.rs @@ -0,0 +1,1318 @@ +use anyhow::{bail, Context, Result}; +use decomplex_rust::decomplex::detectors::{ + co_update, decision_pressure, derived_state, false_simplicity, fat_union, flay_similarity, + function_lcom, implicit_control_flow, inconsistent_rename_clone, local_flow, locality_drag, + miner, operational_discontinuity, oversized_predicate, path_condition, predicate_alias, + redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, + structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, +}; +use decomplex_rust::decomplex::parallel; +use decomplex_rust::decomplex::report::Report; +use decomplex_rust::decomplex::report_facts::{self, Options as ReportFactsOptions, VcsFilter}; +use decomplex_rust::decomplex::syntax::{Document, Language, LocalComplexityScore}; +use decomplex_rust::decomplex::syntax_oracle; +use serde::Deserialize; +use serde_json::{json, Value}; +use std::io::Read; +use std::path::PathBuf; + +fn main() -> Result<()> { + let worker = std::thread::Builder::new() + .name("decomplex-rust".to_string()) + .stack_size(64 * 1024 * 1024) + .spawn(run) + .with_context(|| "failed to start decomplex worker thread")?; + + match worker.join() { + Ok(result) => result, + Err(payload) => std::panic::resume_unwind(payload), + } +} + +fn run() -> Result<()> { + let command = parse_args(std::env::args().skip(1).collect())?; + parallel::set_jobs_for_process(command.jobs())?; + match command { + Command::StateWrites { + language, files, .. + } => { + let language = Language::parse(&language)?; + let facts = co_update::state_writes_for_files(&files, language) + .with_context(|| "failed to extract state-write facts")?; + println!("{}", serde_json::to_string(&facts)?); + } + Command::CoUpdate { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = co_update::scan_files(&files, language) + .with_context(|| "failed to scan co-update facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::PredicateAliases { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = predicate_alias::scan_files(&files, language) + .with_context(|| "failed to scan predicate-alias facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::Miner { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = miner::scan_files(&files, language) + .with_context(|| "failed to scan decision-site miner facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::SemanticAliases { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = semantic_alias::scan_files(&files, language) + .with_context(|| "failed to scan semantic-alias facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::DecisionPressure { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = decision_pressure::scan_files(&files, language) + .with_context(|| "failed to scan decision-pressure facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::StateBranchDensity { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = state_branch_density::scan_files(&files, language) + .with_context(|| "failed to scan state-branch-density facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::TemporalOrderingPressure { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = temporal_ordering_pressure::scan_files(&files, language) + .with_context(|| "failed to scan temporal-ordering-pressure facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::RedundantNilGuard { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = redundant_nil_guard::scan_files(&files, language) + .with_context(|| "failed to scan redundant-nil-guard facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::StateMesh { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = state_mesh::scan_files(&files, language) + .with_context(|| "failed to scan state-mesh facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::InconsistentRenameClone { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = inconsistent_rename_clone::scan_files(&files, language) + .with_context(|| "failed to scan inconsistent-rename-clone facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::DerivedState { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = derived_state::scan_files(&files, language) + .with_context(|| "failed to scan derived-state facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::ImplicitControlFlow { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = implicit_control_flow::scan_files(&files, language) + .with_context(|| "failed to scan implicit-control-flow facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::WeightedInlinedComplexity { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = weighted_inlined_cognitive_complexity::scan_files(&files, language) + .with_context(|| "failed to scan weighted-inlined-complexity facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::LocalityDrag { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = locality_drag::scan_files(&files, language) + .with_context(|| "failed to scan locality-drag facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::OperationalDiscontinuity { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = operational_discontinuity::scan_files(&files, language) + .with_context(|| "failed to scan operational-discontinuity facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::StructuralTopology { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = structural_topology::scan_files(&files, language) + .with_context(|| "failed to scan structural-topology facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::LocalFlow { + language, files, .. + } => { + let language = Language::parse(&language)?; + let report = local_flow::scan_files(&files, language) + .with_context(|| "failed to scan local-flow facts")?; + println!("{}", serde_json::to_string(&report)?); + } + Command::FlaySimilarity { + language, + mass, + fuzzy, + files, + .. + } => { + let language = Language::parse(&language)?; + let findings = flay_similarity::scan_files(&files, language, mass, fuzzy) + .with_context(|| "failed to scan structural similarity")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::OversizedPredicate { + language, files, .. + } => { + let language = Language::parse(&language)?; + let findings = oversized_predicate::scan_files(&files, language) + .with_context(|| "failed to scan oversized-predicate facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::PathCondition { + language, files, .. + } => { + let language = Language::parse(&language)?; + let findings = path_condition::scan_files(&files, language) + .with_context(|| "failed to scan path-condition facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::SequenceMine { + language, files, .. + } => { + let language = Language::parse(&language)?; + let findings = sequence_mine::scan_files(&files, language) + .with_context(|| "failed to scan sequence-mine facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::FunctionLcom { + language, files, .. + } => { + let language = Language::parse(&language)?; + let findings = function_lcom::scan_files(&files, language) + .with_context(|| "failed to scan function-lcom facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::FalseSimplicity { + language, files, .. + } => { + let language = Language::parse(&language)?; + let findings = false_simplicity::scan_files(&files, language) + .with_context(|| "failed to scan false-simplicity facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::FatUnion { + language, files, .. + } => { + let language = Language::parse(&language)?; + let findings = fat_union::scan_files(&files, language) + .with_context(|| "failed to scan fat-union facts")?; + println!("{}", serde_json::to_string(&findings)?); + } + Command::Facts { + options, + targets, + output, + .. + } => { + let facts = report_facts::collect(&targets, &options) + .with_context(|| "failed to collect report facts")?; + write_json(&facts, output.as_ref())?; + } + Command::Report { + options, + targets, + format, + output, + .. + } => { + let facts = report_facts::collect(&targets, &options) + .with_context(|| "failed to collect report facts")?; + render_report(&facts, &format, output.as_ref())?; + } + Command::RenderReport { + input, + from_stdin, + format, + output, + } => { + let facts = read_facts(input.as_ref(), from_stdin)?; + render_report(&facts, &format, output.as_ref())?; + } + Command::SyntaxFacts { + language, files, .. + } => { + let language = Language::parse(&language)?; + let facts = syntax_oracle::project_files(&files, language) + .with_context(|| "failed to collect syntax facts")?; + println!("{}", serde_json::to_string(&facts)?); + } + Command::DetectorFacts { input } => { + let fixture = read_facts(Some(&input), false)?; + let detector = fixture + .get("detector") + .and_then(Value::as_str) + .with_context(|| format!("{} missing detector", input.display()))?; + let input = detector_fact_input(&fixture).with_context(|| { + format!("failed to read detector facts from {}", input.display()) + })?; + let output = run_detector_on_fact_input(detector, &input, &fixture)?; + println!("{}", serde_json::to_string(&output)?); + } + } + Ok(()) +} + +enum Command { + StateWrites { + language: String, + files: Vec, + jobs: Option, + }, + CoUpdate { + language: String, + files: Vec, + jobs: Option, + }, + PredicateAliases { + language: String, + files: Vec, + jobs: Option, + }, + Miner { + language: String, + files: Vec, + jobs: Option, + }, + SemanticAliases { + language: String, + files: Vec, + jobs: Option, + }, + DecisionPressure { + language: String, + files: Vec, + jobs: Option, + }, + StateBranchDensity { + language: String, + files: Vec, + jobs: Option, + }, + TemporalOrderingPressure { + language: String, + files: Vec, + jobs: Option, + }, + RedundantNilGuard { + language: String, + files: Vec, + jobs: Option, + }, + StateMesh { + language: String, + files: Vec, + jobs: Option, + }, + InconsistentRenameClone { + language: String, + files: Vec, + jobs: Option, + }, + DerivedState { + language: String, + files: Vec, + jobs: Option, + }, + ImplicitControlFlow { + language: String, + files: Vec, + jobs: Option, + }, + WeightedInlinedComplexity { + language: String, + files: Vec, + jobs: Option, + }, + LocalityDrag { + language: String, + files: Vec, + jobs: Option, + }, + OperationalDiscontinuity { + language: String, + files: Vec, + jobs: Option, + }, + StructuralTopology { + language: String, + files: Vec, + jobs: Option, + }, + LocalFlow { + language: String, + files: Vec, + jobs: Option, + }, + FlaySimilarity { + language: String, + mass: usize, + fuzzy: usize, + files: Vec, + jobs: Option, + }, + OversizedPredicate { + language: String, + files: Vec, + jobs: Option, + }, + PathCondition { + language: String, + files: Vec, + jobs: Option, + }, + SequenceMine { + language: String, + files: Vec, + jobs: Option, + }, + FunctionLcom { + language: String, + files: Vec, + jobs: Option, + }, + FalseSimplicity { + language: String, + files: Vec, + jobs: Option, + }, + FatUnion { + language: String, + files: Vec, + jobs: Option, + }, + Facts { + options: ReportFactsOptions, + targets: Vec, + output: Option, + jobs: Option, + }, + Report { + options: ReportFactsOptions, + targets: Vec, + format: String, + output: Option, + jobs: Option, + }, + RenderReport { + input: Option, + from_stdin: bool, + format: String, + output: Option, + }, + SyntaxFacts { + language: String, + files: Vec, + jobs: Option, + }, + DetectorFacts { + input: PathBuf, + }, +} + +impl Command { + fn jobs(&self) -> Option { + match self { + Self::StateWrites { jobs, .. } + | Self::CoUpdate { jobs, .. } + | Self::PredicateAliases { jobs, .. } + | Self::Miner { jobs, .. } + | Self::SemanticAliases { jobs, .. } + | Self::DecisionPressure { jobs, .. } + | Self::StateBranchDensity { jobs, .. } + | Self::TemporalOrderingPressure { jobs, .. } + | Self::RedundantNilGuard { jobs, .. } + | Self::StateMesh { jobs, .. } + | Self::InconsistentRenameClone { jobs, .. } + | Self::DerivedState { jobs, .. } + | Self::ImplicitControlFlow { jobs, .. } + | Self::WeightedInlinedComplexity { jobs, .. } + | Self::LocalityDrag { jobs, .. } + | Self::OperationalDiscontinuity { jobs, .. } + | Self::StructuralTopology { jobs, .. } + | Self::LocalFlow { jobs, .. } + | Self::FlaySimilarity { jobs, .. } + | Self::OversizedPredicate { jobs, .. } + | Self::PathCondition { jobs, .. } + | Self::SequenceMine { jobs, .. } + | Self::FunctionLcom { jobs, .. } + | Self::FalseSimplicity { jobs, .. } + | Self::FatUnion { jobs, .. } + | Self::Facts { jobs, .. } + | Self::Report { jobs, .. } + | Self::SyntaxFacts { jobs, .. } => *jobs, + Self::RenderReport { .. } | Self::DetectorFacts { .. } => None, + } + } +} + +fn parse_args(args: Vec) -> Result { + let mut cursor = args.into_iter(); + let Some(command) = cursor.next() else { + bail!("usage: decomplex-rust COMMAND [--language ruby] [--jobs N] FILE..."); + }; + match command.as_str() { + "facts" => { + let args = parse_report_facts_args(cursor.collect(), false)?; + if args.targets.is_empty() { + bail!("facts requires at least one file or directory"); + } + Ok(Command::Facts { + options: args.options, + targets: args.targets, + output: args.output, + jobs: args.jobs, + }) + } + "report" => { + let args = parse_report_facts_args(cursor.collect(), true)?; + if args.targets.is_empty() { + bail!("report requires at least one file or directory"); + } + Ok(Command::Report { + options: args.options, + targets: args.targets, + format: args.format, + output: args.output, + jobs: args.jobs, + }) + } + "render-report" => { + let args = parse_render_report_args(cursor.collect())?; + Ok(Command::RenderReport { + input: args.input, + from_stdin: args.from_stdin, + format: args.format, + output: args.output, + }) + } + "detector-facts" => { + let input = parse_input_only_args(cursor.collect(), "detector-facts")?; + Ok(Command::DetectorFacts { input }) + } + "syntax-facts" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("syntax-facts requires at least one file"); + } + Ok(Command::SyntaxFacts { + language, + files, + jobs, + }) + } + "state-writes" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("state-writes requires at least one file"); + } + Ok(Command::StateWrites { + language, + files, + jobs, + }) + } + "co-update" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("co-update requires at least one file"); + } + Ok(Command::CoUpdate { + language, + files, + jobs, + }) + } + "predicate-aliases" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("predicate-aliases requires at least one file"); + } + Ok(Command::PredicateAliases { + language, + files, + jobs, + }) + } + "miner" | "decision-miner" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("miner requires at least one file"); + } + Ok(Command::Miner { + language, + files, + jobs, + }) + } + "semantic-aliases" | "semantic-alias" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("semantic-aliases requires at least one file"); + } + Ok(Command::SemanticAliases { + language, + files, + jobs, + }) + } + "decision-pressure" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("decision-pressure requires at least one file"); + } + Ok(Command::DecisionPressure { + language, + files, + jobs, + }) + } + "state-branch-density" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("state-branch-density requires at least one file"); + } + Ok(Command::StateBranchDensity { + language, + files, + jobs, + }) + } + "temporal-ordering-pressure" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("temporal-ordering-pressure requires at least one file"); + } + Ok(Command::TemporalOrderingPressure { + language, + files, + jobs, + }) + } + "redundant-nil-guard" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("redundant-nil-guard requires at least one file"); + } + Ok(Command::RedundantNilGuard { + language, + files, + jobs, + }) + } + "state-mesh" | "state-heatmap" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("state-mesh requires at least one file"); + } + Ok(Command::StateMesh { + language, + files, + jobs, + }) + } + "inconsistent-rename-clone" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("inconsistent-rename-clone requires at least one file"); + } + Ok(Command::InconsistentRenameClone { + language, + files, + jobs, + }) + } + "derived-state" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("derived-state requires at least one file"); + } + Ok(Command::DerivedState { + language, + files, + jobs, + }) + } + "implicit-control-flow" | "ordered-protocol-mine" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("implicit-control-flow requires at least one file"); + } + Ok(Command::ImplicitControlFlow { + language, + files, + jobs, + }) + } + "weighted-inlined-complexity" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("weighted-inlined-complexity requires at least one file"); + } + Ok(Command::WeightedInlinedComplexity { + language, + files, + jobs, + }) + } + "locality-drag" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("locality-drag requires at least one file"); + } + Ok(Command::LocalityDrag { + language, + files, + jobs, + }) + } + "operational-discontinuity" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("operational-discontinuity requires at least one file"); + } + Ok(Command::OperationalDiscontinuity { + language, + files, + jobs, + }) + } + "structural-topology" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("structural-topology requires at least one file"); + } + Ok(Command::StructuralTopology { + language, + files, + jobs, + }) + } + "local-flow" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("local-flow requires at least one file"); + } + Ok(Command::LocalFlow { + language, + files, + jobs, + }) + } + "oversized-predicate" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("oversized-predicate requires at least one file"); + } + Ok(Command::OversizedPredicate { + language, + files, + jobs, + }) + } + "path-condition" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("path-condition requires at least one file"); + } + Ok(Command::PathCondition { + language, + files, + jobs, + }) + } + "sequence-mine" | "broken-protocol" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("sequence-mine requires at least one file"); + } + Ok(Command::SequenceMine { + language, + files, + jobs, + }) + } + "function-lcom" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("function-lcom requires at least one file"); + } + Ok(Command::FunctionLcom { + language, + files, + jobs, + }) + } + "false-simplicity" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("false-simplicity requires at least one file"); + } + Ok(Command::FalseSimplicity { + language, + files, + jobs, + }) + } + "fat-union" => { + let (language, files, jobs) = parse_language_files_and_jobs(cursor.collect())?; + if files.is_empty() { + bail!("fat-union requires at least one file"); + } + Ok(Command::FatUnion { + language, + files, + jobs, + }) + } + "flay-similarity" => { + let mut language = String::from("ruby"); + let mut mass = 32usize; + let mut fuzzy = 1usize; + let mut jobs = None; + let mut files = Vec::new(); + let mut rest = cursor.collect::>().into_iter(); + while let Some(arg) = rest.next() { + if arg == "--language" { + language = rest.next().with_context(|| "--language requires a value")?; + } else if let Some(value) = arg.strip_prefix("--language=") { + language = value.to_string(); + } else if arg == "--mass" { + mass = rest + .next() + .with_context(|| "--mass requires a value")? + .parse() + .with_context(|| "--mass must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--mass=") { + mass = value.parse().with_context(|| "--mass must be an integer")?; + } else if arg == "--fuzzy" { + fuzzy = rest + .next() + .with_context(|| "--fuzzy requires a value")? + .parse() + .with_context(|| "--fuzzy must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--fuzzy=") { + fuzzy = value + .parse() + .with_context(|| "--fuzzy must be an integer")?; + } else if arg == "--jobs" { + jobs = Some(parse_jobs( + rest.next().with_context(|| "--jobs requires a value")?, + )?); + } else if let Some(value) = arg.strip_prefix("--jobs=") { + jobs = Some(parse_jobs(value.to_string())?); + } else { + files.push(PathBuf::from(arg)); + } + } + if files.is_empty() { + bail!("flay-similarity requires at least one file"); + } + Ok(Command::FlaySimilarity { + language, + mass, + fuzzy, + files, + jobs, + }) + } + _ => bail!("unknown decomplex-rust command: {command}"), + } +} + +fn parse_input_only_args(args: Vec, command: &str) -> Result { + let mut input = None; + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--input" { + input = Some(PathBuf::from( + cursor.next().with_context(|| "--input requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--input=") { + input = Some(PathBuf::from(value)); + } else { + bail!("unknown {command} argument: {arg}"); + } + } + input.with_context(|| format!("{command} requires --input=FILE")) +} + +#[derive(Deserialize)] +struct DetectorFactDocuments { + documents: Vec, +} + +struct DetectorFactInput { + documents: Vec, + local_methods: Vec, +} + +fn detector_fact_input(fixture: &Value) -> Result { + let input = fixture + .get("input") + .cloned() + .with_context(|| "detector fact fixture missing input")?; + let documents: DetectorFactDocuments = serde_json::from_value(input.clone())?; + let mut local_methods = Vec::new(); + + if let Some(methods) = input.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + for document in input + .get("documents") + .and_then(Value::as_array) + .into_iter() + .flatten() + { + if let Some(methods) = document.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + } + + Ok(DetectorFactInput { + documents: documents.documents, + local_methods, + }) +} + +fn run_detector_on_fact_input( + detector: &str, + input: &DetectorFactInput, + fixture: &Value, +) -> Result { + let documents = input.documents.as_slice(); + match detector { + "co-update" => Ok(json!(co_update::scan_documents(documents))), + "decision-pressure" => { + if input.local_methods.is_empty() { + Ok(json!(decision_pressure::scan_documents(documents))) + } else { + Ok(json!(decision_pressure::scan_documents_with_summaries( + documents, + input.local_methods.clone() + ))) + } + } + "predicate-alias" | "predicate-aliases" => { + Ok(json!(predicate_alias::scan_documents(documents))) + } + "miner" | "decision-miner" => Ok(json!(miner::scan_documents(documents))), + "semantic-alias" | "semantic-aliases" => { + Ok(json!(semantic_alias::scan_documents(documents))) + } + "flay-similarity" | "structural-similarity" => { + let options = fixture.get("options").unwrap_or(&Value::Null); + let mass = value_usize(options, "mass", 32)?; + let fuzzy = value_usize(options, "fuzzy", 1)?; + Ok(json!({ + "findings": flay_similarity::scan_documents(documents, mass, fuzzy), + })) + } + "temporal-ordering-pressure" => { + Ok(json!(temporal_ordering_pressure::scan_documents(documents))) + } + "state-branch-density" => Ok(json!(state_branch_density::scan_documents(documents))), + "redundant-nil-guard" => Ok(json!(redundant_nil_guard::scan_documents(documents))), + "state-mesh" | "state-heatmap" => Ok(json!(state_mesh::scan_documents(documents))), + "inconsistent-rename-clone" => { + Ok(json!(inconsistent_rename_clone::scan_documents(documents))) + } + "derived-state" => { + if input.local_methods.is_empty() { + Ok(json!(derived_state::scan_documents(documents))) + } else { + Ok(json!(derived_state::scan_summaries( + input.local_methods.clone() + ))) + } + } + "implicit-control-flow" | "ordered-protocol-mine" => { + Ok(json!(implicit_control_flow::scan_documents(documents))) + } + "weighted-inlined-complexity" => { + if input.local_methods.is_empty() { + Ok(json!( + weighted_inlined_cognitive_complexity::scan_documents(documents) + )) + } else { + Ok(json!( + weighted_inlined_cognitive_complexity::scan_documents_with_summaries( + documents, + input.local_methods.clone() + ) + )) + } + } + "locality-drag" => { + if input.local_methods.is_empty() { + Ok(json!(locality_drag::scan_documents(documents))) + } else { + Ok(json!(locality_drag::scan_summaries_with_scores( + input.local_methods.clone(), + complexity_scores(documents) + ))) + } + } + "operational-discontinuity" => { + if input.local_methods.is_empty() { + Ok(json!(operational_discontinuity::scan_documents(documents))) + } else { + Ok(json!(operational_discontinuity::scan_summaries( + input.local_methods.clone() + ))) + } + } + "oversized-predicate" => Ok(json!(oversized_predicate::scan_documents(documents))), + "path-condition" => Ok(json!({ + "neglected": path_condition::scan_documents(documents).neglected, + })), + "sequence-mine" | "broken-protocol" => Ok(json!(sequence_mine::scan_documents(documents))), + "function-lcom" => { + if input.local_methods.is_empty() { + Ok(json!(function_lcom::scan_documents(documents))) + } else { + Ok(json!(function_lcom::scan_summaries( + input.local_methods.clone() + ))) + } + } + "false-simplicity" => Ok(json!(false_simplicity::scan_documents(documents))), + "fat-union" => Ok(json!(fat_union::scan_documents(documents))), + "local-flow" => Ok(json!(local_flow::scan_documents(documents))), + "structural-topology" => Ok(json!(structural_topology::scan_documents(documents))), + _ => bail!("unsupported detector fact fixture: {detector}"), + } +} + +fn complexity_scores( + documents: &[Document], +) -> std::collections::BTreeMap<(String, String), LocalComplexityScore> { + documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect() +} + +fn value_usize(options: &Value, key: &str, default: usize) -> Result { + match options.get(key) { + Some(value) => value + .as_u64() + .map(|value| value as usize) + .with_context(|| format!("option {key} must be an integer")), + None => Ok(default), + } +} + +struct ReportFactsArgs { + options: ReportFactsOptions, + targets: Vec, + output: Option, + jobs: Option, + format: String, +} + +struct RenderReportArgs { + input: Option, + from_stdin: bool, + output: Option, + format: String, +} + +fn parse_render_report_args(args: Vec) -> Result { + let mut input = None; + let mut from_stdin = false; + let mut output = None; + let mut format = "markdown".to_string(); + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--from-stdin" { + from_stdin = true; + } else if arg == "--input" { + input = Some(PathBuf::from( + cursor.next().with_context(|| "--input requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--input=") { + input = Some(PathBuf::from(value)); + } else if arg == "--output" { + output = Some(PathBuf::from( + cursor.next().with_context(|| "--output requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--output=") { + output = Some(PathBuf::from(value)); + } else if arg == "--format" { + format = cursor.next().with_context(|| "--format requires a value")?; + } else if let Some(value) = arg.strip_prefix("--format=") { + format = value.to_string(); + } else { + bail!("unknown render-report argument: {arg}"); + } + } + if input.is_none() && !from_stdin { + bail!("render-report requires facts JSON on stdin or --input=FILE"); + } + Ok(RenderReportArgs { + input, + from_stdin, + output, + format, + }) +} + +fn parse_report_facts_args(args: Vec, allow_format: bool) -> Result { + let mut options = ReportFactsOptions::default(); + let mut targets = Vec::new(); + let mut output = None; + let mut jobs = None; + let mut format = "markdown".to_string(); + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--language" { + let value = cursor + .next() + .with_context(|| "--language requires a value")?; + options.language = Some(Language::parse(&value)?); + } else if let Some(value) = arg.strip_prefix("--language=") { + options.language = Some(Language::parse(value)?); + } else if arg == "--jobs" { + jobs = Some(parse_jobs( + cursor.next().with_context(|| "--jobs requires a value")?, + )?); + } else if let Some(value) = arg.strip_prefix("--jobs=") { + jobs = Some(parse_jobs(value.to_string())?); + } else if arg == "--exclude" { + options.excludes.push( + cursor + .next() + .with_context(|| "--exclude requires a value")?, + ); + } else if let Some(value) = arg.strip_prefix("--exclude=") { + options.excludes.push(value.to_string()); + } else if arg == "--output" { + output = Some(PathBuf::from( + cursor.next().with_context(|| "--output requires a value")?, + )); + } else if let Some(value) = arg.strip_prefix("--output=") { + output = Some(PathBuf::from(value)); + } else if arg == "--format" { + if !allow_format { + bail!("facts does not support --format"); + } + format = cursor.next().with_context(|| "--format requires a value")?; + } else if let Some(value) = arg.strip_prefix("--format=") { + if !allow_format { + bail!("facts does not support --format"); + } + format = value.to_string(); + } else if arg == "--mass" { + options.mass = cursor + .next() + .with_context(|| "--mass requires a value")? + .parse() + .with_context(|| "--mass must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--mass=") { + options.mass = value.parse().with_context(|| "--mass must be an integer")?; + } else if arg == "--fuzzy" { + options.fuzzy = cursor + .next() + .with_context(|| "--fuzzy requires a value")? + .parse() + .with_context(|| "--fuzzy must be an integer")?; + } else if let Some(value) = arg.strip_prefix("--fuzzy=") { + options.fuzzy = value + .parse() + .with_context(|| "--fuzzy must be an integer")?; + } else if arg == "--vcs" { + options.vcs = Some(parse_vcs_filter( + cursor.next().with_context(|| "--vcs requires a value")?, + )?); + } else if let Some(value) = arg.strip_prefix("--vcs=") { + options.vcs = Some(parse_vcs_filter(value.to_string())?); + } else { + targets.push(PathBuf::from(arg)); + } + } + Ok(ReportFactsArgs { + options, + targets, + output, + jobs, + format, + }) +} + +fn write_json(value: &serde_json::Value, output: Option<&PathBuf>) -> Result<()> { + let text = serde_json::to_string_pretty(value)?; + if let Some(path) = output { + std::fs::write(path, text)?; + } else { + println!("{text}"); + } + Ok(()) +} + +fn read_facts(input: Option<&PathBuf>, from_stdin: bool) -> Result { + let payload = if let Some(path) = input { + std::fs::read_to_string(path) + .with_context(|| format!("failed to read {}", path.display()))? + } else if from_stdin { + let mut payload = String::new(); + std::io::stdin() + .read_to_string(&mut payload) + .with_context(|| "failed to read facts JSON from stdin")?; + payload + } else { + bail!("render-report requires facts JSON on stdin or --input=FILE"); + }; + if payload.trim().is_empty() { + bail!("render-report requires facts JSON on stdin or --input=FILE"); + } + serde_json::from_str(&payload).with_context(|| "failed to parse report facts JSON") +} + +fn render_report(facts: &serde_json::Value, format: &str, output: Option<&PathBuf>) -> Result<()> { + let report = Report::from_facts(facts)?; + let text = match format { + "markdown" | "md" => report.to_markdown(), + "sarif" | "json" => report.to_sarif(), + _ => bail!("unsupported report format: {format}"), + }; + if let Some(path) = output { + std::fs::write(path, text)?; + } else { + println!("{text}"); + } + Ok(()) +} + +fn parse_language_files_and_jobs( + args: Vec, +) -> Result<(String, Vec, Option)> { + let mut language = String::from("ruby"); + let mut jobs = None; + let mut files = Vec::new(); + let mut cursor = args.into_iter(); + while let Some(arg) = cursor.next() { + if arg == "--language" { + language = cursor + .next() + .with_context(|| "--language requires a value")?; + } else if let Some(value) = arg.strip_prefix("--language=") { + language = value.to_string(); + } else if arg == "--jobs" { + jobs = Some(parse_jobs( + cursor.next().with_context(|| "--jobs requires a value")?, + )?); + } else if let Some(value) = arg.strip_prefix("--jobs=") { + jobs = Some(parse_jobs(value.to_string())?); + } else { + files.push(PathBuf::from(arg)); + } + } + Ok((language, files, jobs)) +} + +fn parse_vcs_filter(value: String) -> Result { + match value.as_str() { + "git" => Ok(VcsFilter::Git), + _ => bail!("unsupported --vcs value: {value}"), + } +} + +fn parse_jobs(value: String) -> Result { + let jobs = value + .parse::() + .with_context(|| "--jobs must be an integer")?; + if jobs == 0 { + bail!("--jobs must be greater than zero"); + } + Ok(jobs) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_common_jobs_option() { + let command = parse_args(vec![ + "co-update".to_string(), + "--jobs=4".to_string(), + "a.rb".to_string(), + ]) + .expect("command"); + + assert_eq!(command.jobs(), Some(4)); + } + + #[test] + fn rejects_zero_jobs_option() { + assert!(parse_args(vec![ + "co-update".to_string(), + "--jobs=0".to_string(), + "a.rb".to_string(), + ]) + .is_err()); + } + + #[test] + fn parses_git_vcs_filter_for_facts() { + let command = parse_args(vec![ + "facts".to_string(), + "--vcs=git".to_string(), + "src".to_string(), + ]) + .expect("command"); + + match command { + Command::Facts { options, .. } => assert_eq!(options.vcs, Some(VcsFilter::Git)), + _ => panic!("expected facts command"), + } + } +} diff --git a/gems/decomplex/rust/tests/examples_oracle.rs b/gems/decomplex/rust/tests/examples_oracle.rs new file mode 100644 index 000000000..c9ee159d9 --- /dev/null +++ b/gems/decomplex/rust/tests/examples_oracle.rs @@ -0,0 +1,1176 @@ +use anyhow::{bail, Context, Result}; +use decomplex_rust::decomplex::detectors::{ + co_update, decision_pressure, derived_state, false_simplicity, fat_union, flay_similarity, + function_lcom, implicit_control_flow, inconsistent_rename_clone, local_flow, locality_drag, + miner, operational_discontinuity, oversized_predicate, path_condition, predicate_alias, + redundant_nil_guard, semantic_alias, sequence_mine, state_branch_density, state_mesh, + structural_topology, temporal_ordering_pressure, weighted_inlined_cognitive_complexity, +}; +use decomplex_rust::decomplex::report::Report; +use decomplex_rust::decomplex::syntax::{Document, Language, LocalComplexityScore}; +use decomplex_rust::decomplex::syntax_oracle; +use serde::{Deserialize, Serialize}; +use serde_json::{json, Map, Value}; +use std::collections::BTreeSet; +use std::fs; +use std::path::{Path, PathBuf}; + +#[test] +fn shared_examples_match_oracles() -> Result<()> { + let examples_root = examples_root(); + let oracle_dir = examples_root.join("oracles"); + let mut failures = Vec::new(); + + for fixture in fixture_paths(&examples_root)? { + let detector = file_stem(&fixture)?; + let oracle_path = oracle_dir.join(format!("{detector}.json")); + if !oracle_path.is_file() { + failures.push(format!( + "{}: missing oracle {}", + fixture.display(), + oracle_path.display() + )); + continue; + } + + let oracle: Value = serde_json::from_str(&fs::read_to_string(&oracle_path)?)?; + let expected = oracle + .get("expected") + .cloned() + .with_context(|| format!("{} missing expected", oracle_path.display()))?; + let detector_name = oracle + .get("detector") + .and_then(Value::as_str) + .with_context(|| format!("{} missing detector", oracle_path.display()))?; + let options = oracle.get("options").cloned().unwrap_or_else(|| json!({})); + let language = language_for_fixture(&fixture)?; + let actual = run_detector(detector_name, &[fixture.clone()], language, &options) + .with_context(|| format!("{} {}", detector_name, fixture.display()))?; + let projected = project_detector_output(&detector, actual); + + if projected != expected { + failures.push(format!( + "{} {}\nexpected: {}\nactual: {}", + detector_name, + fixture.display(), + expected, + projected + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!("shared example oracle failures:\n{}", failures.join("\n\n")) + } +} + +#[test] +fn shared_detector_fact_examples_match_exact_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in detector_fact_fixture_paths(&examples_root)? { + let fixture_value: Value = serde_json::from_str(&fs::read_to_string(&fixture)?)?; + let detector = fixture_value + .get("detector") + .and_then(Value::as_str) + .with_context(|| format!("{} missing detector", fixture.display()))?; + let expected = fixture_value + .get("expected") + .cloned() + .with_context(|| format!("{} missing expected", fixture.display()))?; + let input = detector_fact_input(&fixture_value) + .with_context(|| format!("{} input", fixture.display()))?; + let actual = run_detector_on_fact_input(detector, &input, &fixture_value) + .with_context(|| format!("{} {}", detector, fixture.display()))?; + + if actual != expected { + failures.push(format!( + "{} {}\nexpected: {}\nactual: {}", + detector, + fixture.display(), + expected, + actual + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "shared detector fact oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + +#[test] +fn shared_local_flow_consumer_fact_examples_match_exact_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in local_flow_fact_fixture_paths(&examples_root)? { + let fixture_value: Value = serde_json::from_str(&fs::read_to_string(&fixture)?)?; + let expected_by_detector = fixture_value + .get("expected") + .and_then(Value::as_object) + .with_context(|| format!("{} missing expected", fixture.display()))?; + let input = detector_fact_input(&fixture_value) + .with_context(|| format!("{} input", fixture.display()))?; + + for (detector, expected) in expected_by_detector { + let actual = run_detector_on_fact_input(detector, &input, &fixture_value) + .with_context(|| format!("{} {}", detector, fixture.display()))?; + if actual != *expected { + failures.push(format!( + "{} {}\nexpected: {}\nactual: {}", + detector, + fixture.display(), + expected, + actual + )); + } + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "shared local-flow consumer fact oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + +#[test] +fn shared_report_fact_examples_match_postprocess_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in report_fact_fixture_paths(&examples_root)? { + let fixture_value: Value = serde_json::from_str(&fs::read_to_string(&fixture)?)?; + let facts = fixture_value + .get("input") + .with_context(|| format!("{} missing input", fixture.display()))?; + let expected = fixture_value + .get("expected") + .cloned() + .with_context(|| format!("{} missing expected", fixture.display()))?; + let expected_markdown = fs::read_to_string(fixture.with_extension("md")) + .with_context(|| format!("{} missing markdown oracle", fixture.display()))? + .trim_end() + .to_string(); + let report = Report::from_facts(facts) + .with_context(|| format!("failed to build report from {}", fixture.display()))?; + let actual = project_report(&report); + + if actual != expected { + failures.push(format!( + "{}\nexpected: {}\nactual: {}", + fixture.display(), + expected, + actual + )); + } + let markdown = report.to_markdown().trim_end().to_string(); + if markdown != expected_markdown { + failures.push(format!( + "{} markdown\nexpected: {}\nactual: {}", + fixture.display(), + expected_markdown, + markdown + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "shared report fact oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + +#[test] +fn ruby_source_fact_examples_match_oracles() -> Result<()> { + let examples_root = examples_root(); + let mut failures = Vec::new(); + + for fixture in source_fact_fixture_paths(&examples_root)? { + let name = file_stem(&fixture)?; + let oracle_path = examples_root + .join("source-facts") + .join("oracles") + .join(format!("ruby-{name}.json")); + let expected: Value = serde_json::from_str(&fs::read_to_string(&oracle_path)?) + .with_context(|| format!("read {}", oracle_path.display()))?; + let mut actual = Map::new(); + if let Some(syntax_expected) = expected.get("syntax") { + actual.insert( + "syntax".to_string(), + project_source_syntax(&fixture, syntax_expected)?, + ); + } + if expected.get("local_flow").is_some() { + actual.insert( + "local_flow".to_string(), + project_local_flow(&value(local_flow::scan_files( + &[fixture.clone()], + Language::Ruby, + )?)?), + ); + } + + let actual = Value::Object(actual); + if actual != expected { + failures.push(format!( + "{}\nexpected: {}\nactual: {}", + fixture.display(), + expected, + actual + )); + } + } + + if failures.is_empty() { + Ok(()) + } else { + bail!( + "ruby source-facts oracle failures:\n{}", + failures.join("\n\n") + ) + } +} + +fn examples_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("../examples") +} + +fn fixture_paths(examples_root: &Path) -> Result> { + let mut paths = Vec::new(); + for language_dir in fs::read_dir(examples_root)? { + let language_dir = language_dir?.path(); + if !language_dir.is_dir() + || language_dir.file_name().and_then(|name| name.to_str()) == Some("oracles") + { + continue; + } + for entry in fs::read_dir(&language_dir)? { + let path = entry?.path(); + if path.is_file() && language_for_fixture(&path).is_ok() { + paths.push(path); + } + } + } + paths.sort(); + Ok(paths) +} + +fn report_fact_fixture_paths(examples_root: &Path) -> Result> { + let report_root = examples_root.join("facts").join("report"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&report_root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("json") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + +fn detector_fact_fixture_paths(examples_root: &Path) -> Result> { + let detector_root = examples_root.join("facts").join("detectors"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&detector_root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("json") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + +fn local_flow_fact_fixture_paths(examples_root: &Path) -> Result> { + let root = examples_root.join("facts").join("local-flow"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("json") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + +fn source_fact_fixture_paths(examples_root: &Path) -> Result> { + let root = examples_root.join("source-facts").join("ruby"); + let mut paths = Vec::new(); + for entry in fs::read_dir(&root)? { + let path = entry?.path(); + if path.extension().and_then(|extension| extension.to_str()) == Some("rb") { + paths.push(path); + } + } + paths.sort(); + Ok(paths) +} + +#[derive(Deserialize)] +struct DetectorFactInput { + documents: Vec, + local_methods: Vec, +} + +fn detector_fact_input(fixture: &Value) -> Result { + let input = fixture + .get("input") + .cloned() + .with_context(|| "detector fact fixture missing input")?; + let documents = serde_json::from_value::(input.clone())?.documents; + let mut local_methods = Vec::new(); + + if let Some(methods) = input.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + for document in input + .get("documents") + .and_then(Value::as_array) + .into_iter() + .flatten() + { + if let Some(methods) = document.get("local_methods") { + local_methods.extend(serde_json::from_value::>( + methods.clone(), + )?); + } + } + + Ok(DetectorFactInput { + documents, + local_methods, + }) +} + +#[derive(Deserialize)] +struct DetectorFactDocuments { + documents: Vec, +} + +fn file_stem(path: &Path) -> Result { + path.file_stem() + .and_then(|stem| stem.to_str()) + .map(str::to_string) + .with_context(|| format!("missing file stem for {}", path.display())) +} + +fn language_for_fixture(path: &Path) -> Result { + let extension = path + .extension() + .and_then(|extension| extension.to_str()) + .with_context(|| format!("missing extension for {}", path.display()))?; + Language::for_extension(extension) + .with_context(|| format!("unsupported fixture extension: {}", path.display())) +} + +fn run_detector( + detector: &str, + files: &[PathBuf], + language: Language, + options: &Value, +) -> Result { + match detector { + "co-update" => value(co_update::scan_files(files, language)?), + "decision-pressure" => value(decision_pressure::scan_files(files, language)?), + "predicate-alias" | "predicate-aliases" => { + value(predicate_alias::scan_files(files, language)?) + } + "miner" | "decision-miner" => value(miner::scan_files(files, language)?), + "semantic-alias" | "semantic-aliases" => { + value(semantic_alias::scan_files(files, language)?) + } + "flay-similarity" | "structural-similarity" => { + let mass = option_usize(options, "mass", 32)?; + let fuzzy = option_usize(options, "fuzzy", 1)?; + Ok(json!({ "findings": flay_similarity::scan_files(files, language, mass, fuzzy)? })) + } + "temporal-ordering-pressure" => { + value(temporal_ordering_pressure::scan_files(files, language)?) + } + "state-branch-density" => value(state_branch_density::scan_files(files, language)?), + "redundant-nil-guard" => value(redundant_nil_guard::scan_files(files, language)?), + "state-mesh" | "state-heatmap" => value(state_mesh::scan_files(files, language)?), + "inconsistent-rename-clone" => { + value(inconsistent_rename_clone::scan_files(files, language)?) + } + "derived-state" => value(derived_state::scan_files(files, language)?), + "implicit-control-flow" | "ordered-protocol-mine" => { + value(implicit_control_flow::scan_files(files, language)?) + } + "weighted-inlined-complexity" => value(weighted_inlined_cognitive_complexity::scan_files( + files, language, + )?), + "locality-drag" => value(locality_drag::scan_files(files, language)?), + "operational-discontinuity" => { + value(operational_discontinuity::scan_files(files, language)?) + } + "oversized-predicate" => value(oversized_predicate::scan_files(files, language)?), + "path-condition" => value(path_condition::scan_files(files, language)?), + "sequence-mine" | "broken-protocol" => value(sequence_mine::scan_files(files, language)?), + "function-lcom" => value(function_lcom::scan_files(files, language)?), + "false-simplicity" => value(false_simplicity::scan_files(files, language)?), + "fat-union" => value(fat_union::scan_files(files, language)?), + "local-flow" => value(local_flow::scan_files(files, language)?), + "structural-topology" => value(structural_topology::scan_files(files, language)?), + _ => bail!("unsupported detector: {detector}"), + } +} + +fn run_detector_on_fact_input( + detector: &str, + input: &DetectorFactInput, + fixture: &Value, +) -> Result { + let documents = input.documents.as_slice(); + match detector { + "co-update" => value(co_update::scan_documents(documents)), + "decision-pressure" => { + if input.local_methods.is_empty() { + value(decision_pressure::scan_documents(documents)) + } else { + value(decision_pressure::scan_documents_with_summaries( + documents, + input.local_methods.clone(), + )) + } + } + "predicate-alias" | "predicate-aliases" => { + value(predicate_alias::scan_documents(documents)) + } + "miner" | "decision-miner" => value(miner::scan_documents(documents)), + "semantic-alias" | "semantic-aliases" => value(semantic_alias::scan_documents(documents)), + "flay-similarity" | "structural-similarity" => { + let options = fixture.get("options").cloned().unwrap_or_else(|| json!({})); + let mass = option_usize(&options, "mass", 32)?; + let fuzzy = option_usize(&options, "fuzzy", 1)?; + value(json!({ "findings": flay_similarity::scan_documents(documents, mass, fuzzy) })) + } + "temporal-ordering-pressure" => { + value(temporal_ordering_pressure::scan_documents(documents)) + } + "state-branch-density" => value(state_branch_density::scan_documents(documents)), + "redundant-nil-guard" => value(redundant_nil_guard::scan_documents(documents)), + "state-mesh" | "state-heatmap" => value(state_mesh::scan_documents(documents)), + "inconsistent-rename-clone" => value(inconsistent_rename_clone::scan_documents(documents)), + "derived-state" => { + if input.local_methods.is_empty() { + value(derived_state::scan_documents(documents)) + } else { + value(derived_state::scan_summaries(input.local_methods.clone())) + } + } + "implicit-control-flow" | "ordered-protocol-mine" => { + value(implicit_control_flow::scan_documents(documents)) + } + "weighted-inlined-complexity" => { + if input.local_methods.is_empty() { + value(weighted_inlined_cognitive_complexity::scan_documents( + documents, + )) + } else { + value( + weighted_inlined_cognitive_complexity::scan_documents_with_summaries( + documents, + input.local_methods.clone(), + ), + ) + } + } + "locality-drag" => { + if input.local_methods.is_empty() { + value(locality_drag::scan_documents(documents)) + } else { + value(locality_drag::scan_summaries_with_scores( + input.local_methods.clone(), + complexity_scores(documents), + )) + } + } + "operational-discontinuity" => { + if input.local_methods.is_empty() { + value(operational_discontinuity::scan_documents(documents)) + } else { + value(operational_discontinuity::scan_summaries( + input.local_methods.clone(), + )) + } + } + "oversized-predicate" => value(oversized_predicate::scan_documents(documents)), + "path-condition" => { + let report = path_condition::scan_documents(documents); + value(json!({ "neglected": report.neglected })) + } + "sequence-mine" | "broken-protocol" => value(sequence_mine::scan_documents(documents)), + "function-lcom" => { + if input.local_methods.is_empty() { + value(function_lcom::scan_documents(documents)) + } else { + value(function_lcom::scan_summaries(input.local_methods.clone())) + } + } + "false-simplicity" => value(false_simplicity::scan_documents(documents)), + "fat-union" => value(fat_union::scan_documents(documents)), + "local-flow" => value(local_flow::scan_documents(documents)), + "structural-topology" => value(structural_topology::scan_documents(documents)), + _ => bail!("unsupported detector: {detector}"), + } +} + +fn complexity_scores( + documents: &[Document], +) -> std::collections::BTreeMap<(String, String), LocalComplexityScore> { + documents + .iter() + .flat_map(|document| { + document + .local_complexity_scores + .iter() + .map(|(id, score)| ((document.file.clone(), id.clone()), score.clone())) + }) + .collect() +} + +fn project_report(report: &Report) -> Value { + json!({ + "convergence": report.convergence_value(), + "root_clusters": report.root_clusters_value(), + "sarif": compact_sarif(&report.to_sarif_value(false, false, Some(8))), + }) +} + +fn compact_sarif(sarif: &Value) -> Value { + let run = field(sarif, "runs") + .as_array() + .and_then(|runs| runs.first()) + .unwrap_or(&Value::Null); + let results = array(field(run, "results")); + json!({ + "rule_count": array(field(field(field(run, "tool"), "driver"), "rules")).len(), + "result_count": results.len(), + "rule_ids": results.iter().map(|result| field(result, "ruleId").clone()).collect::>(), + "messages": results.iter().map(|result| field(field(result, "message"), "text").clone()).collect::>(), + "locations": results.iter().map(|result| { + let location = field( + array(field(result, "locations")).first().unwrap_or(&Value::Null), + "physicalLocation", + ); + json!({ + "uri": field(field(location, "artifactLocation"), "uri").clone(), + "startLine": field(field(location, "region"), "startLine").clone(), + }) + }).collect::>(), + }) +} + +fn value(value: T) -> Result { + Ok(serde_json::to_value(value)?) +} + +fn option_usize(options: &Value, key: &str, default: usize) -> Result { + match options.get(key) { + Some(value) => value + .as_u64() + .map(|value| value as usize) + .with_context(|| format!("option {key} must be an integer")), + None => Ok(default), + } +} + +fn project_detector_output(detector: &str, output: Value) -> Value { + match detector { + "co-update" => json!({ + "co_written_pairs": rows(field(&output, "co_written_pairs"), &["pair", "support"]), + "neglected_updates": rows(field(&output, "neglected_updates"), &["pair", "support", "has", "missing"]), + }), + "decision-pressure" => rows(&output, &["contract", "decisions", "essential", "methods"]), + "predicate-alias" => json!({ + "alias_clusters": array(field(&output, "alias_clusters")).iter().map(|row| { + json!({ "name_count": array(field(row, "names")).len() }) + }).collect::>() + }), + "miner" => json!({ + "missing_abstractions": array(field(&output, "missing_abstractions")).iter().map(|row| { + pick(row, &["kind", "members", "support", "scatter"]) + }).collect::>(), + "neglected_conditions": rows(field(&output, "neglected_conditions"), &["pattern", "support", "missing"]), + }), + "semantic-alias" => json!({ + "alias_clusters": array(field(&output, "alias_clusters")).iter().map(|row| { + json!({ + "canon": canonical_predicate(field(row, "canon")), + "name_count": array(field(row, "names")).len(), + }) + }).collect::>(), + "reification_miss_count": array(field(&output, "reification_misses")).len(), + }), + "flay-similarity" => Value::Array( + array(field(&output, "findings")) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["clone_type", "node"])); + projected.insert( + "site_count".to_string(), + json!(array(field(row, "sites")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "temporal-ordering-pressure" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick( + row, + &[ + "owner", + "public_methods", + "state_methods", + "writers", + "orderings", + ], + )); + projected.insert( + "state_fields".to_string(), + json!(canonical_state_refs(field(row, "state_fields"))), + ); + projected.insert( + "shared_fields".to_string(), + json!(canonical_state_refs(field(row, "shared_fields"))), + ); + Value::Object(projected) + }) + .collect(), + ), + "state-branch-density" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["decisions"])); + projected.insert( + "method".to_string(), + json!(canonical_method_name(field(row, "method"))), + ); + projected.insert( + "state_refs".to_string(), + json!(canonical_state_refs(field(row, "state_refs"))), + ); + Value::Object(projected) + }) + .collect(), + ), + "redundant-nil-guard" => rows(&output, &["local"]), + "state-mesh" => project_state_mesh(&output), + "inconsistent-rename-clone" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["ref_name"])); + projected.insert( + "divergent_count".to_string(), + json!(array(field(row, "divergent")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "derived-state" => rows(&output, &["derived", "source"]), + "implicit-control-flow" => json!({ + "ordered_protocols": project_protocols(field(&output, "ordered_protocols")), + "order_drift": project_protocols(field(&output, "order_drift")), + }), + "weighted-inlined-complexity" => Value::Array( + array(&output) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["method", "depth"])); + projected.insert( + "callee_count".to_string(), + json!(array(field(row, "single_caller_callees")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "locality-drag" => rows(&output, &["variable"]), + "operational-discontinuity" => rows(&output, &["resets", "confidence"]), + "oversized-predicate" => Value::Array( + array(field(&output, "findings")) + .iter() + .map(|row| { + let mut projected = object(pick(row, &["count"])); + projected.insert( + "atom_count".to_string(), + json!(array(field(row, "atoms")).len()), + ); + Value::Object(projected) + }) + .collect(), + ), + "path-condition" => Value::Array( + array(field(&output, "neglected")) + .iter() + .map(|row| { + json!({ + "pattern": canonical_predicate_atoms(field(row, "pattern")), + "support": field(row, "support").clone(), + "missing": canonical_predicate(field(row, "missing")), + "action": canonical_action(field(row, "action")), + }) + }) + .collect(), + ), + "sequence-mine" => rows( + field(&output, "broken"), + &["pair", "support", "has", "missing"], + ), + "function-lcom" => rows( + &output, + &[ + "mode", + "components", + "locals", + "statements", + "terminal_join", + ], + ), + "false-simplicity" => rows(&output, &["kind"]), + "fat-union" => Value::Array( + array(field(&output, "fat_unions")) + .iter() + .map(|row| { + let mut projected = object(pick( + row, + &["common", "variant", "degenerate", "support", "scatter"], + )); + projected.insert( + "variant_set".to_string(), + json!(canonical_variants(field(row, "variant_set"))), + ); + Value::Object(projected) + }) + .collect(), + ), + "local-flow" => project_local_flow(&output), + "structural-topology" => json!({ + "method_count": array(field(&output, "methods")).len(), + "edges": rows(field(&output, "edges"), &["caller_name", "callee_name", "type"]), + }), + _ => scrub_locations(&output), + } +} + +fn project_source_syntax(fixture: &Path, expected: &Value) -> Result { + let projection = syntax_oracle::project_files(&[fixture.to_path_buf()], Language::Ruby)?; + let document = array(field(&projection, "documents")) + .first() + .cloned() + .unwrap_or(Value::Null); + let mut out = Map::new(); + if let Some(object) = expected.as_object() { + for key in object.keys() { + let keys = match key.as_str() { + "functions" => &["name", "owner", "line", "visibility", "params"][..], + "owners" => &["name", "kind", "line"][..], + "calls" => &[ + "receiver", + "message", + "function", + "line", + "conditional", + "control", + "safe_navigation", + "block", + "arguments", + ][..], + "state_declarations" => &["field", "owner", "type", "line"][..], + "state_param_origins" => { + &["field", "receiver", "owner", "param", "function", "line"][..] + } + "state_reads" => &["receiver", "field", "function", "line"][..], + "state_writes" => &["receiver", "field", "function", "line"][..], + "decisions" => &["kind", "members", "function", "line", "predicate"][..], + "branch_decisions" => &["function", "line", "predicate", "state_refs"][..], + "branch_arms" => &[ + "function", + "kind", + "line", + "decision_line", + "predicate", + "member", + "body", + ][..], + "dispatch_sites" => { + &["variant_set", "arm_members", "outside", "function", "line"][..] + } + "semantic_effects" => &["kind", "detail", "function", "line"][..], + "predicate_bodies" => &["name", "owner", "body", "line"][..], + "comparisons" => &[ + "source", + "raw", + "canon_source", + "operator", + "function", + "line", + ][..], + "path_conditions" => &["guards", "action", "function", "line"][..], + "protocol_method_effects" => &["owner", "name", "line", "reads", "writes"][..], + "protocol_call_paths" => &["owner", "name", "line", "calls"][..], + "clone_candidates" => &[ + "method_name", + "node_name", + "line", + "mass", + "fingerprint", + "child_fingerprints", + "child_masses", + ][..], + "redundant_nil_guards" => &["defn", "line", "local", "guard", "proof"][..], + "local_methods" => &[ + "id", + "owner", + "name", + "line", + "statements", + "boundaries", + "local_contract_assignments", + ][..], + "local_complexity_scores" => &["id", "score", "signals"][..], + _ => bail!("unsupported source syntax section: {key}"), + }; + out.insert(key.clone(), rows(field(&document, key), keys)); + } + } + Ok(Value::Object(out)) +} + +fn project_local_flow(output: &Value) -> Value { + Value::Array( + array(output) + .iter() + .map(|method| { + json!({ + "method": field(method, "name").clone(), + "statements": array(field(method, "statements")).iter().map(|statement| { + json!({ + "reads": sorted_array(field(statement, "reads")), + "writes": sorted_array(field(statement, "writes")), + "dependencies": field(statement, "dependencies").clone(), + "co_uses": canonical_co_uses(field(statement, "co_uses")), + }) + }).collect::>(), + "boundaries": array(field(method, "boundaries")).iter().map(|boundary| { + pick(boundary, &["before_index", "after_index", "kind"]) + }).collect::>(), + }) + }) + .collect(), + ) +} + +fn project_state_mesh(output: &Value) -> Value { + let state_mesh = field(output, "state_mesh"); + let fields = field(output, "fields"); + let field_names = fields + .as_object() + .map(|object| { + canonical_state_refs(&Value::Array( + object.keys().cloned().map(Value::String).collect(), + )) + }) + .unwrap_or_default(); + json!({ + "state_mesh": pick( + state_mesh, + &["total_fields", "total_writes", "total_reads", "total_re_derivations"], + ), + "field_names": field_names, + }) +} + +fn project_protocols(rows_value: &Value) -> Value { + Value::Array( + array(rows_value) + .iter() + .map(|row| { + let mut projected = object(pick( + row, + &["protocol", "dependency", "support", "observed", "missing"], + )); + projected.insert( + "states".to_string(), + json!(canonical_state_refs(field(row, "states"))), + ); + Value::Object(projected) + }) + .collect(), + ) +} + +fn canonical_co_uses(value: &Value) -> Value { + let mut pairs = array(value) + .iter() + .map(|pair| { + let mut items = array(pair) + .iter() + .map(|item| item.as_str().unwrap_or_default().to_string()) + .collect::>(); + items.sort(); + json!(items) + }) + .collect::>(); + pairs.sort_by_key(|item| item.to_string()); + Value::Array(pairs) +} + +fn rows(value: &Value, keys: &[&str]) -> Value { + Value::Array(array(value).iter().map(|row| pick(row, keys)).collect()) +} + +fn pick(row: &Value, keys: &[&str]) -> Value { + let mut out = Map::new(); + if let Some(object) = row.as_object() { + for key in keys { + if let Some(value) = object.get(*key) { + out.insert((*key).to_string(), canonical_value(value)); + } + } + } + Value::Object(out) +} + +fn canonical_value(value: &Value) -> Value { + match value { + Value::Object(object) => { + let mut out = Map::new(); + let mut keys = object.keys().collect::>(); + keys.sort(); + for key in keys { + out.insert(key.clone(), canonical_value(&object[key])); + } + Value::Object(out) + } + Value::Array(values) => Value::Array(values.iter().map(canonical_value).collect()), + _ => value.clone(), + } +} + +fn scrub_locations(value: &Value) -> Value { + match value { + Value::Object(object) => { + let mut out = Map::new(); + let mut keys = object.keys().collect::>(); + keys.sort(); + for key in keys { + if LOCATION_KEYS.contains(&key.as_str()) { + continue; + } + out.insert(key.clone(), scrub_locations(&object[key])); + } + Value::Object(out) + } + Value::Array(values) => Value::Array(values.iter().map(scrub_locations).collect()), + _ => value.clone(), + } +} + +const LOCATION_KEYS: &[&str] = &[ + "at", + "boundaries", + "boundary_crossings", + "component_lines", + "defn", + "examples", + "file", + "gap_lines", + "line", + "locations", + "predicate", + "raw", + "reason", + "sites", + "span", + "spans", + "source", +]; + +fn canonical_variants(value: &Value) -> Vec { + let mut values = array(value) + .iter() + .map(|item| item.as_str().unwrap_or("").replace(':', ".")) + .map(|text| collapse_dots(&text.replace('_', "."))) + .collect::>(); + values.sort(); + values +} + +fn canonical_state_refs(value: &Value) -> Vec { + let mut values = BTreeSet::new(); + for item in array(value) { + let mut text = value_text(item); + if let Some(stripped) = text.strip_prefix('@') { + text = stripped.to_string(); + } + if let Some(stripped) = text.strip_prefix("self.") { + text = stripped.to_string(); + } else if let Some(stripped) = text.strip_prefix("this.") { + text = stripped.to_string(); + } + values.insert(text); + } + values.into_iter().collect() +} + +fn canonical_method_name(value: &Value) -> String { + value_text(value) + .rsplit(['.', ':', '#']) + .next() + .unwrap_or("") + .to_string() +} + +fn canonical_predicate_atoms(value: &Value) -> Vec { + let mut atoms = array(value) + .iter() + .map(canonical_predicate) + .collect::>(); + atoms.sort(); + atoms +} + +fn canonical_predicate(value: &Value) -> String { + let mut text = value_text(value) + .trim() + .trim_end_matches(';') + .trim() + .to_string(); + text = replace_symbol_literals(&text); + text = strip_noarg_suffix(&text); + text +} + +fn canonical_action(value: &Value) -> String { + canonical_predicate(value) +} + +fn replace_symbol_literals(text: &str) -> String { + let mut out = String::new(); + let chars = text.chars().collect::>(); + let mut i = 0; + while i < chars.len() { + if chars[i] == ':' && i + 1 < chars.len() && ident_start(chars[i + 1]) { + i += 1; + let start = i; + while i < chars.len() && ident_continue(chars[i]) { + i += 1; + } + out.push_str(&chars[start..i].iter().collect::().to_uppercase()); + } else { + out.push(chars[i]); + i += 1; + } + } + out +} + +fn strip_noarg_suffix(text: &str) -> String { + let mut out = String::new(); + let chars = text.chars().collect::>(); + let mut i = 0; + while i < chars.len() { + if ident_start(chars[i]) { + let start = i; + i += 1; + while i < chars.len() && (ident_continue(chars[i]) || chars[i] == '.') { + i += 1; + } + if i < chars.len() && chars[i] == '?' { + out.push_str(&chars[start..i].iter().collect::()); + i += 1; + } else if i + 1 < chars.len() && chars[i] == '(' && chars[i + 1] == ')' { + out.push_str(&chars[start..i].iter().collect::()); + i += 2; + } else { + out.push_str(&chars[start..i].iter().collect::()); + } + } else { + out.push(chars[i]); + i += 1; + } + } + out +} + +fn ident_start(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphabetic() +} + +fn ident_continue(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() +} + +fn collapse_dots(text: &str) -> String { + let mut out = String::new(); + let mut previous_dot = false; + for ch in text.chars() { + if ch == '.' { + if !previous_dot { + out.push(ch); + } + previous_dot = true; + } else { + out.push(ch); + previous_dot = false; + } + } + out +} + +fn sorted_array(value: &Value) -> Value { + let mut values = array(value).iter().map(canonical_value).collect::>(); + values.sort_by_key(|value| value.to_string()); + Value::Array(values) +} + +fn object(value: Value) -> Map { + value.as_object().cloned().unwrap_or_default() +} + +fn field<'a>(value: &'a Value, key: &str) -> &'a Value { + value + .as_object() + .and_then(|object| object.get(key)) + .unwrap_or(&Value::Null) +} + +fn array(value: &Value) -> &[Value] { + value.as_array().map(Vec::as_slice).unwrap_or(&[]) +} + +fn value_text(value: &Value) -> String { + match value { + Value::String(text) => text.clone(), + Value::Null => String::new(), + _ => value.to_string(), + } +} diff --git a/gems/decomplex/test/architecture_invariants_test.rb b/gems/decomplex/test/architecture_invariants_test.rb new file mode 100644 index 000000000..7210bbf6e --- /dev/null +++ b/gems/decomplex/test/architecture_invariants_test.rb @@ -0,0 +1,175 @@ +# frozen_string_literal: true + +require "minitest/autorun" + +class DecomplexArchitectureInvariantsTest < Minitest::Test + ROOT = File.expand_path("..", __dir__) + LIB = File.join(ROOT, "lib", "decomplex") + DETECTOR_BASENAMES = %w[ + co_update decision_pressure derived_state false_simplicity fat_union + flay_similarity function_lcom inconsistent_rename_clone local_flow + locality_drag miner mutability_pressure operational_discontinuity + ordered_protocol_mine oversized_predicate path_condition predicate_alias + redundant_nil_guard semantic_alias sequence_mine site_extractor + state_branch_density state_mesh structural_topology superfluous_state + temporal_ordering_pressure weighted_inlined_cognitive_complexity + ].freeze + DETECTOR_FILES = DETECTOR_BASENAMES.map { |name| File.join(LIB, "#{name}.rb") }.freeze + POST_SYNTAX_CONSUMER_BASENAMES = ( + DETECTOR_BASENAMES + %w[ + convergence delta report report_facts root_cause sarif + ] + ).uniq.freeze + POST_SYNTAX_CONSUMER_FILES = + POST_SYNTAX_CONSUMER_BASENAMES.map { |name| File.join(LIB, "#{name}.rb") }.freeze + + RAW_TREE_SITTER_PATTERNS = { + "raw child traversal" => /(? /\bchild_by_field_name\b/, + "raw byte offsets" => /\b(?:start_byte|end_byte)\b/, + "raw point offsets" => /\b(?:start_point|end_point)\b/, + "Tree-sitter classes" => /\bTreeSitter(?:Adapter|LanguageAdapter|Normalizer|NodeFacade|FacadeContext)?\b/, + "raw node predicate helpers" => /\b(?:ts_node\?|tree_sitter_node\?)\b/, + "raw node duck typing" => /respond_to\?\s*\(\s*:children\s*\)/ + }.freeze + ADAPTER_BOUNDARY_PATTERNS = RAW_TREE_SITTER_PATTERNS.merge( + "syntax adapter profile access" => /\bSyntax\.language_profile\b|\blanguage_profile\s*\(/, + "raw document root access" => /\bdocument\.root\b/, + "normalized document root access" => /\bdocument\.normalized_root\b/ + ).freeze + CONCRETE_LANGUAGE_BRANCH_PATTERNS = { + "concrete language branch" => + /\b(?:case|when|if|elsif)\b.*(?::ruby|:python|:javascript|:typescript|:go|:rust|:zig|:lua|:c|:cpp|:csharp|:java|:swift|:kotlin|:php)\b|\blanguage\s*==\s*(?::ruby|:python|:javascript|:typescript|:go|:rust|:zig|:lua|:c|:cpp|:csharp|:java|:swift|:kotlin|:php)\b/ + }.freeze + + SYNTAX_RB_EXTENSION_HOST_PATTERNS = { + "clone similarity belongs in syntax/clone_similarity.rb" => /\b(?:CloneCandidate|clone_candidates|CLONE_)/, + "dispatch facts belong in syntax/dispatch.rb" => /\b(?:DispatchSite|dispatch_sites|DISPATCH_)/, + "nil guard facts belong in syntax/nil_guards.rb" => /\b(?:NilGuard|redundant_nil_guard_findings)/, + "local complexity facts belong in syntax/complexity.rb" => /\b(?:LocalComplexity|local_complexity_scores)/ + }.freeze + SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS = { + "concrete language adapters belong under lib/decomplex/syntax/" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/, + "language profiles must instantiate concrete adapters, not the base adapter" => + /:\s*TreeSitterLanguageAdapter\.new\(/ + }.freeze + ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS = { + "language lexicons belong in the language adapter file" => + /^\s*[A-Z_]+_LEXICON\s*=/, + "concrete language adapters belong in the language adapter file" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/ + }.freeze + LANGUAGE_ADAPTER_FILES = { + "ruby.rb" => "RubySyntaxAdapter", + "python.rb" => "PythonSyntaxAdapter", + "javascript.rb" => "JavaScriptSyntaxAdapter", + "typescript.rb" => "TypeScriptSyntaxAdapter", + "go.rb" => "GoSyntaxAdapter", + "rust.rb" => "RustSyntaxAdapter", + "zig.rb" => "ZigSyntaxAdapter", + "lua.rb" => "LuaSyntaxAdapter", + "c.rb" => "CSyntaxAdapter", + "cpp.rb" => "CppSyntaxAdapter", + "csharp.rb" => "CSharpSyntaxAdapter", + "java.rb" => "JavaSyntaxAdapter", + "swift.rb" => "SwiftSyntaxAdapter", + "kotlin.rb" => "KotlinSyntaxAdapter", + "php.rb" => "PhpSyntaxAdapter" + }.freeze + + def test_detectors_do_not_talk_to_tree_sitter_nodes_directly + offenders = scan_files(DETECTOR_FILES, RAW_TREE_SITTER_PATTERNS) + + assert_empty offenders, format_offenders( + "Detectors must consume Syntax facts instead of raw Tree-sitter nodes", + offenders + ) + end + + def test_post_syntax_consumers_do_not_cross_adapter_boundary + offenders = scan_files(POST_SYNTAX_CONSUMER_FILES, ADAPTER_BOUNDARY_PATTERNS) + + assert_empty offenders, format_offenders( + "Code after Syntax must consume facts instead of parser or adapter internals", + offenders + ) + end + + def test_post_syntax_consumers_do_not_branch_on_concrete_languages + offenders = scan_files(POST_SYNTAX_CONSUMER_FILES, CONCRETE_LANGUAGE_BRANCH_PATTERNS) + + assert_empty offenders, format_offenders( + "Code after Syntax must not contain language-specific branches", + offenders + ) + end + + def test_detector_specific_syntax_extensions_do_not_live_in_syntax_rb + syntax_rb = File.join(LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_EXTENSION_HOST_PATTERNS) + + assert_empty offenders, format_offenders( + "Detector-facing parser extensions must live under lib/decomplex/syntax/", + offenders + ) + end + + def test_language_adapter_implementations_do_not_live_in_syntax_rb + syntax_rb = File.join(LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS) + + assert_empty offenders, format_offenders( + "Core syntax.rb must not absorb concrete language adapter implementation", + offenders + ) + end + + def test_each_language_adapter_has_its_own_file + offenders = LANGUAGE_ADAPTER_FILES.filter_map do |file_name, class_name| + path = File.join(LIB, "syntax", file_name) + next "#{file_name}: missing file" unless File.file?(path) + + source = File.read(path) + next if source.match?(/^\s*class\s+#{Regexp.escape(class_name)}\b/) + + "#{file_name}: missing #{class_name}" + end + + assert_empty offenders, format_offenders( + "Every supported language must have an explicit adapter file", + offenders + ) + end + + def test_adapter_loader_does_not_absorb_language_implementations + adapters_rb = File.join(LIB, "syntax", "adapters.rb") + offenders = scan_files([adapters_rb], ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS) + + assert_empty offenders, format_offenders( + "Adapter loader must only load adapters and shared base helpers", + offenders + ) + end + + private + + def scan_files(files, patterns) + files.sort.flat_map do |path| + rel = path.delete_prefix("#{ROOT}/") + File.readlines(path, chomp: true).each_with_index.flat_map do |line, index| + next if line.strip.start_with?("#") + + patterns.filter_map do |name, pattern| + next unless line.match?(pattern) + + "#{rel}:#{index + 1}: #{name}: #{line.strip}" + end + end.compact + end + end + + def format_offenders(message, offenders) + ([message] + offenders.map { |offender| " #{offender}" }).join("\n") + end +end diff --git a/gems/decomplex/test/ast_test.rb b/gems/decomplex/test/ast_test.rb new file mode 100644 index 000000000..6a479bd58 --- /dev/null +++ b/gems/decomplex/test/ast_test.rb @@ -0,0 +1,2384 @@ +# frozen_string_literal: true + +require "minitest/autorun" +require "tempfile" +require_relative "../lib/decomplex/ast" +require_relative "../lib/decomplex/syntax" + +class AstTest < Minitest::Test + def test_python_f_string_interpolation_after_literal_equals_is_not_dropped + with_python_file(<<~PY) do |file| + class Tag: + @property + def markup(self): + return f"[{self.name}={self.parameters}]" + PY + root, = parse_python(file) + dstr = nodes_of_type(root, "DSTR").find { |node| node.text == 'f"[{self.name}={self.parameters}]"' } + + refute_nil dstr + assert_equal %w[STRING_START STR EVSTR STR EVSTR STR STRING_END], dstr.children.map(&:type).map(&:to_s) + end + end + + def test_lua_elseif_branch_is_preserved_as_if_alternative + with_language_file(<<~LUA, ".lua", :lua) do |file| + if test_env.LUA_V == "5.1" then + one() + elseif test_env.LUA_V == "5.2" then + two() + end + LUA + root, = parse_language(file, :lua) + if_node = nodes_of_type(root, "IF").find { |node| node.text.include?("test_env.LUA_V") } + + refute_nil if_node + assert_equal "ELSEIF_STATEMENT", if_node.children[2].type.to_s + end + end + + def test_lua_assigned_function_if_else_normalizes_as_if_not_iter + with_language_file(<<~LUA, ".lua", :lua) do |file| + local make_unreadable = function(path) + if is_win then + fs.execute("x") + else + fs.execute("y") + end + end + LUA + root, = parse_language(file, :lua) + lambda_node = nodes_of_type(root, "LAMBDA").find { |node| node.text.start_with?("function(path)") } + + refute_nil lambda_node + if_node = nodes_of_type(lambda_node, "IF").first + refute_nil if_node + assert_empty nodes_of_type(root, "ITER") + assert_equal "ELSE_STATEMENT", if_node.children[2].type.to_s + end + end + + def test_python_yield_statement_predicate_recognizes_expression_statement_wrapper + with_python_file(<<~PY) do |file| + def gen(): + yield item + other() + PY + document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + yield_statement = ts_nodes(document.root).find do |node| + node.kind == "expression_statement" && node.text == "yield item" + end + block = ts_nodes(document.root).find do |node| + node.kind == "block" && node.text == "yield item\n other()" + end + + refute_nil yield_statement + refute_nil block + assert normalizer.send(:yield_statement?, yield_statement) + refute normalizer.send(:yield_statement?, block) + end + end + + def test_python_yield_in_multi_statement_body_stays_statement_not_whole_block + with_python_file(<<~PY) do |file| + def gen(): + yield item + other() + PY + root, = parse_python(file) + defn = nodes_of_type(root, "DEFN").find { |node| node.text == "def gen():\n yield item\n other()" } + scope = defn.children[1] + body = scope.children[2] + + refute_nil defn + assert_equal "BLOCK", body.type.to_s + assert_equal %w[YIELD EXPRESSION_STATEMENT], body.children.map(&:type).map(&:to_s) + end + end + + def test_wrapped_return_statement_normalizes_return_value_before_tail_elision + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + def check + return value + end + RUBY + root, = parse_language(file, :ruby) + defn = nodes_of_type(root, "DEFN").find { |node| node.children.first == :check } + + refute_nil defn + body = defn.children[1].children[2] + assert_equal "VCALL", body.type.to_s + assert_equal :value, body.children.first + end + + with_language_file(<<~PY, ".py", :python) do |file| + def check(): + return value + PY + root, = parse_language(file, :python) + defn = nodes_of_type(root, "DEFN").find { |node| node.children.first == :check } + + refute_nil defn + body = defn.children[1].children[2] + assert_equal "RETURN", body.type.to_s + assert_equal "LVAR", body.children.first.type.to_s + end + + with_language_file(<<~LUA, ".lua", :lua) do |file| + function check() + return value + end + LUA + root, = parse_language(file, :lua) + defn = nodes_of_type(root, "DEFN").find { |node| node.children.first == :check } + + refute_nil defn + body = defn.children[1].children[2] + assert_equal "RETURN", body.type.to_s + assert_equal "EXPRESSION_LIST", body.children.first.type.to_s + end + end + + def test_ruby_singleton_method_receiver_ignores_method_body + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + def object.hidden + value + end + RUBY + root, = parse_language(file, :ruby) + defs = nodes_of_type(root, "DEFS").find { |node| node.children[1] == :hidden } + + refute_nil defs + receiver = defs.children[0] + assert_equal "VCALL", receiver.type.to_s + assert_equal :object, receiver.children[0] + assert_equal "object", receiver.text + end + end + + def test_ruby_super_statement_predicate_recognizes_bare_and_argument_forms + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + class Child < Parent + def bare + super + end + + def with_arg + super :item + end + + def other + value + end + end + RUBY + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + bare = ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "super" } + with_arg = ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "super :item" } + other = ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "value" } + + refute_nil bare + refute_nil with_arg + refute_nil other + assert normalizer.send(:super_statement?, bare) + assert normalizer.send(:super_statement?, with_arg) + refute normalizer.send(:super_statement?, other) + end + end + + def test_ruby_super_statement_normalizes_bare_and_arguments + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + class Child < Parent + def bare + super + end + + def with_arg + super :item + end + end + RUBY + root, = parse_language(file, :ruby) + bare = nodes_of_type(root, "SUPER").find { |node| node.text == "super" } + with_arg = nodes_of_type(root, "SUPER").find { |node| node.text == "super :item" } + + refute_nil bare + refute_nil with_arg + assert_nil bare.children.first + assert_equal "LIST", with_arg.children.first.type.to_s + assert_equal "LIT", with_arg.children.first.children.first.type.to_s + end + end + + def test_ruby_argument_list_element_reference_predicate + with_language_file(<<~RUBY, ".rb", :ruby) do |file| + def indexed + return items[0] + return obj.foo[0] + return [0] + return items[0], other + return items[] + return items[0] { nope } + end + RUBY + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + argument_lists = ts_nodes(document.root).select { |node| node.kind == "argument_list" } + + assert normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[0]" }) + assert normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "obj.foo[0]" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "[0]" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[0], other" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[]" }) + refute normalizer.send(:argument_list_element_reference?, argument_lists.find { |node| node.text == "items[0] { nope }" }) + end + end + + def test_dynamic_scope_rewrites_locals_without_crossing_scope_boundaries + inner_assignment = ast_node(:LASGN, children: [:inner]) + node = ast_node(:BLOCK, children: [ + ast_node(:LASGN, children: [:value]), + ast_node(:LVAR, children: [:value]), + ast_node(:DEFN, children: [:nested, ast_node(:SCOPE, children: [nil, nil, inner_assignment])]) + ]) + + result = Decomplex::Ast::TreeSitterNormalizer.allocate.send(:dynamic_scope, node) + + assert_equal :DASGN, result.children[0].type + assert_equal :DVAR, result.children[1].type + assert_equal :DEFN, result.children[2].type + assert_equal :LASGN, inner_assignment.type + end + + def test_link_when_chain_sets_next_arm_and_pads_short_when_nodes + fallback = ast_node(:ELSE) + first = ast_node(:WHEN, children: [:patterns, :body, nil]) + second = ast_node(:WHEN, children: [:patterns, :body, nil]) + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + result = normalizer.send(:link_when_chain, [first, second], fallback) + + assert_same first, result + assert_same second, first.children[2] + assert_same fallback, second.children[2] + + short = ast_node(:WHEN, children: [:patterns]) + result = normalizer.send(:link_when_chain, [short], fallback) + + assert_same short, result + assert_nil short.children[1] + assert_same fallback, short.children[2] + end + + def test_link_rescue_chain_sets_next_rescue_and_pads_short_resbody_nodes + first = ast_node(:RESBODY, children: [:exceptions, :body, nil]) + second = ast_node(:RESBODY, children: [:exceptions, :body, nil]) + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + result = normalizer.send(:link_rescue_chain, [first, second]) + + assert_same first, result + assert_same second, first.children[2] + assert_nil second.children[2] + + short = ast_node(:RESBODY, children: [:exceptions]) + result = normalizer.send(:link_rescue_chain, [short]) + + assert_same short, result + assert_nil short.children[1] + assert_nil short.children[2] + end + + def test_infix_statement_parts_extracts_allowed_wrapper_parts + body = ruby_syntax_node("def calc\n left + right\nend\n", "body_statement", "left + right") + return_args = ruby_syntax_node("def calc\n return left + right\nend\n", "argument_list", "left + right") + boolean = ruby_syntax_node("def calc\n left && right\nend\n", "body_statement", "left && right") + unsupported = ruby_syntax_node("def calc\n left + right\nend\n", "identifier", "left") + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + assert_equal ["left", "+", "right"], infix_parts_text(normalizer, body) + assert_equal ["left", "+", "right"], infix_parts_text(normalizer, return_args) + assert_equal [nil, nil, nil], infix_parts_text(normalizer, boolean) + assert_equal [nil, nil, nil], infix_parts_text(normalizer, unsupported) + end + + def test_argument_list_unary_not_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + assert normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return !flag\nend\n", "argument_list", "!flag")) + assert normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return !!flag\nend\n", "argument_list", "!!flag")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return flag\nend\n", "argument_list", "flag")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return !flag, other\nend\n", "argument_list", "!flag, other")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return (!flag)\nend\n", "argument_list", "(!flag)")) + refute normalizer.send(:argument_list_unary_not?, ruby_syntax_node("def check\n return not flag\nend\n", "argument_list", "not flag")) + end + + def test_unary_not_statement_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + + assert normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n !flag\nend\n", "body_statement", "!flag")) + assert normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n !!flag\nend\n", "body_statement", "!!flag")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n flag\nend\n", "body_statement", "flag")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n !flag; other\nend\n", "body_statement", "!flag; other")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n (!flag)\nend\n", "body_statement", "(!flag)")) + refute normalizer.send(:unary_not_statement?, ruby_syntax_node("def check\n not flag\nend\n", "body_statement", "not flag")) + end + + def test_unary_not_expression_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(:ruby)) + ruby_source = "def check\n !flag\n !!flag\n -flag\n not flag\nend\n" + + assert normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "!flag")) + assert normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "!!flag")) + refute normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "-flag")) + refute normalizer.send(:unary_not_expression?, ruby_syntax_node(ruby_source, "unary", "not flag")) + + with_language_file("function check(flag: boolean) { return !flag; }\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "!flag" } + refute_nil node + assert normalizer.send(:unary_not_expression?, node) + end + + with_language_file("if not flag:\n pass\n", ".py", :python) do |file| + document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "not_operator" && candidate.text == "not flag" } + refute_nil node + refute normalizer.send(:unary_not_expression?, node) + end + + with_language_file("if not flag then end\n", ".lua", :lua) do |file| + document = parse_syntax(file, :lua) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "not flag" } + refute_nil node + refute normalizer.send(:unary_not_expression?, node) + end + end + + def test_unary_minus_expression_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(:ruby)) + ruby_source = "def check\n -flag\n !flag\n value\nend\n" + + assert normalizer.send(:unary_minus_expression?, ruby_syntax_node(ruby_source, "unary", "-flag")) + refute normalizer.send(:unary_minus_expression?, ruby_syntax_node(ruby_source, "unary", "!flag")) + + with_language_file("function check(value: number) { return -value; }\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_expression" && candidate.text == "-value" } + refute_nil node + assert normalizer.send(:unary_minus_expression?, node) + end + + with_language_file("x = -value\n", ".py", :python) do |file| + document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "unary_operator" && candidate.text == "-value" } + refute_nil node + assert normalizer.send(:unary_minus_expression?, node) + end + + with_language_file("local x = -value\n", ".lua", :lua) do |file| + document = parse_syntax(file, :lua) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == "expression_list" && candidate.text == "-value" } + refute_nil node + assert normalizer.send(:unary_minus_expression?, node) + end + end + + def test_tree_sitter_normalizer_selects_language_specific_normalization_adapters + { + ruby: Decomplex::Ast::RubyTreeSitterNormalizationAdapter, + python: Decomplex::Ast::PythonTreeSitterNormalizationAdapter, + lua: Decomplex::Ast::LuaTreeSitterNormalizationAdapter, + typescript: Decomplex::Ast::TypeScriptTreeSitterNormalizationAdapter, + javascript: Decomplex::Ast::TypeScriptTreeSitterNormalizationAdapter, + rust: Decomplex::Ast::RustTreeSitterNormalizationAdapter + }.each do |language, adapter_class| + assert_instance_of adapter_class, Decomplex::Ast::TreeSitterNormalizationAdapter.for(fake_document(language)) + end + end + + def test_tree_sitter_normalizer_rejects_unsupported_normalization_languages + error = assert_raises(Decomplex::Ast::UnsupportedLanguageError) do + Decomplex::Ast::TreeSitterNormalizationAdapter.for(fake_document(:go)) + end + + assert_includes error.message, ":go" + end + + def test_parse_semantic_returns_language_neutral_ruby_facts + with_language_file(<<~RB, ".rb", :ruby) do |file| + class User + def active? + admin? + end + end + RB + root, = Decomplex::Ast.parse_semantic(file, language: :ruby) + + assert Decomplex::Ast.semantic_node?(root) + assert_equal :root, root.type + assert_equal :ruby, root.language + assert root.children.any? { |node| node.type == :owner && node[:name] == "User" } + assert root.children.any? { |node| node.type == :function && node[:name] == "active?" } + assert root.children.any? { |node| node.type == :call && node[:message] == "admin?" } + refute root.children.any? { |node| %i[DEFN VCALL FCALL CALL].include?(node.type) } + end + end + + def test_parse_semantic_returns_language_neutral_python_facts + with_python_file(<<~PY) do |file| + def check(user): + return user.active() + PY + root, = Decomplex::Ast.parse_semantic(file, language: :python) + + assert Decomplex::Ast.semantic_node?(root) + assert_equal :root, root.type + assert_equal :python, root.language + assert root.children.any? { |node| node.type == :function && node[:name] == "check" } + assert root.children.any? { |node| node.type == :call && node[:receiver] == "user" && node[:message] == "active" } + refute root.children.any? { |node| %i[DEFN VCALL FCALL CALL].include?(node.type) } + end + end + + def test_safe_navigation_call_recognizes_typescript_optional_chain + with_language_file("user?.name;\nuser?.name();\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + member = ts_nodes(document.root).find { |candidate| candidate.kind == "member_expression" && candidate.text == "user?.name" } + call = ts_nodes(document.root).find { |candidate| candidate.kind == "call_expression" && candidate.text == "user?.name()" } + + refute_nil member + refute_nil call + assert normalizer.send(:safe_navigation_call?, member) + assert normalizer.send(:safe_navigation_call?, call) + end + end + + def test_binary_operator + ruby_source = "def calc\n left + right\n left && right\n value\nend\n" + + with_language_file(ruby_source, ".rb", :ruby) do |file| + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + + assert_equal "+", normalizer.send(:binary_operator, ts_nodes(document.root).find { |node| node.kind == "binary" && node.text == "left + right" }) + assert_equal "&&", normalizer.send(:binary_operator, ts_nodes(document.root).find { |node| node.kind == "binary" && node.text == "left && right" }) + assert_equal "", normalizer.send(:binary_operator, ts_nodes(document.root).find { |node| node.kind == "body_statement" && node.text == "left + right\n left && right\n value" }) + end + + with_language_file("const value = left + right && other;\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + outer = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_expression" && candidate.text == "left + right && other" } + inner = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_expression" && candidate.text == "left + right" } + + refute_nil outer + refute_nil inner + assert_equal "&&", normalizer.send(:binary_operator, outer) + assert_equal "+", normalizer.send(:binary_operator, inner) + end + + with_language_file("value = left + right and other\n", ".py", :python) do |file| + document = parse_syntax(file, :python) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + outer = ts_nodes(document.root).find { |candidate| candidate.kind == "boolean_operator" && candidate.text == "left + right and other" } + inner = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_operator" && candidate.text == "left + right" } + + refute_nil outer + refute_nil inner + assert_equal "and", normalizer.send(:binary_operator, outer) + assert_equal "+", normalizer.send(:binary_operator, inner) + end + + with_language_file("local value = left + right and other\n", ".lua", :lua) do |file| + document = parse_syntax(file, :lua) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + outer = ts_nodes(document.root).find { |candidate| candidate.kind == "expression_list" && candidate.text == "left + right and other" } + inner = ts_nodes(document.root).find { |candidate| candidate.kind == "binary_expression" && candidate.text == "left + right" } + + refute_nil outer + refute_nil inner + assert_equal "and", normalizer.send(:binary_operator, outer) + assert_equal "+", normalizer.send(:binary_operator, inner) + end + end + + def test_operator_call_expression_predicate + { + ruby: ["def calc\n left + right\n left && right\nend\n", ".rb", "binary", "left + right", "binary", "left && right"], + typescript: ["const value = left + right && other;\n", ".ts", "binary_expression", "left + right", "binary_expression", "left + right && other"], + python: ["value = left + right and other\n", ".py", "binary_operator", "left + right", "boolean_operator", "left + right and other"], + lua: ["local value = left + right\nlocal other = left and right\n", ".lua", "expression_list", "left + right", "expression_list", "left and right"] + }.each do |language, (source, suffix, positive_kind, positive_text, negative_kind, negative_text)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + positive = ts_nodes(document.root).find { |candidate| candidate.kind == positive_kind && candidate.text == positive_text } + negative = ts_nodes(document.root).find { |candidate| candidate.kind == negative_kind && candidate.text == negative_text } + + refute_nil positive + refute_nil negative + assert normalizer.send(:operator_call_expression?, positive) + refute normalizer.send(:operator_call_expression?, negative) + end + end + end + + def test_operator_call_normalizes_python_and_lua_arithmetic + { + python: ["value = left + right\n", ".py"], + lua: ["local value = left + right\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + opcall = nodes_of_type(root, "OPCALL").find { |node| node.text == "left + right" } + + refute_nil opcall + assert_equal "+", opcall.children[1].to_s + end + end + end + + def test_lua_boolean_expression_normalizes_as_and + with_language_file("local value = left and right\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + and_node = nodes_of_type(root, "AND").find { |node| node.text == "left and right" } + + refute_nil and_node + assert_equal %w[LVAR LVAR], and_node.children.map(&:type).map(&:to_s) + end + end + + def test_lua_comparison_expression_normalizes_as_opcall + with_language_file("local value = left == right\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + opcall = nodes_of_type(root, "OPCALL").find { |node| node.text == "left == right" } + + refute_nil opcall + assert_equal "==", opcall.children[1].to_s + assert_equal %w[LVAR LVAR], [opcall.children[0].type, opcall.children[2].children.first.type].map(&:to_s) + end + end + + def test_lua_long_string_assignment_normalizes_as_literal_expression_list + with_language_file("local c_module_source = [[\n #include \n]]\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + assignment = nodes_of_type(root, "LASGN").find { |node| node.children.first == "c_module_source" } + + refute_nil assignment + expression_list = assignment.children[1] + assert_equal "EXPRESSION_LIST", expression_list.type.to_s + assert_equal "[[\n #include \n]]", expression_list.text + assert_equal ["STR"], expression_list.children.map(&:type).map(&:to_s) + assert_equal "\n #include \n", expression_list.children.first.children.first + assert_empty nodes_of_type(root, "OPCALL").select { |node| node.text.include?("") } + end + end + + def test_comparison_operator + { + ruby: ["def calc\n left == right\nend\n", ".rb", "body_statement", "left == right", "identifier", "left"], + typescript: ["const value = left === right;\n", ".ts", "binary_expression", "left === right", "identifier", "left"], + python: ["value = left == right\n", ".py", "comparison_operator", "left == right", "identifier", "left"], + lua: ["local value = left == right\nlocal other = left + right\n", ".lua", "expression_list", "left == right", "expression_list", "left + right"] + }.each do |language, (source, suffix, positive_kind, positive_text, negative_kind, negative_text)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + positive = ts_nodes(document.root).find { |candidate| candidate.kind == positive_kind && candidate.text == positive_text } + negative = ts_nodes(document.root).find { |candidate| candidate.kind == negative_kind && candidate.text == negative_text } + + refute_nil positive + refute_nil negative + refute_empty normalizer.send(:comparison_operator, positive).to_s + assert_empty normalizer.send(:comparison_operator, negative).to_s + end + end + end + + def test_spaced_text + { + ruby: ["def calc\n left + right\nend\n", ".rb", "body_statement", "left + right"], + typescript: ["const value = left + right;\n", ".ts", "binary_expression", "left + right"], + python: ["value = left + right\n", ".py", "binary_operator", "left + right"], + lua: ["local value = left + right\n", ".lua", "expression_list", "left + right"] + }.each do |language, (source, suffix, kind, text)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal " #{text} ", normalizer.send(:spaced_text, node) + end + end + end + + def test_class_node_predicate + { + ruby: ["class Thing; end\n", ".rb", "class", "class Thing; end", true], + python: ["class Thing:\n pass\n", ".py", "class_definition", "class Thing:\n pass", true], + typescript: ["class Thing {}\n", ".ts", "class_declaration", "class Thing {}", true], + lua: ["local Thing = {}\n", ".lua", "variable_declaration", "local Thing = {}", false] + }.each do |language, (source, suffix, kind, text, expected)| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:class_node?, node) + end + end + end + + def test_empty_class_scope_uses_class_source + with_language_file("class Thing; end\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + class_node = nodes_of_type(root, "CLASS").find { |node| node.text == "class Thing; end" } + + refute_nil class_node + scope = class_node.children[2] + assert_equal "SCOPE", scope.type.to_s + assert_equal "class Thing; end", scope.text + assert_equal [1, 0, 1, 16], [scope.first_lineno, scope.first_column, scope.last_lineno, scope.last_column] + end + end + + def test_unwrap_node_predicate + cases = [ + [:ruby, "def check\n (value)\n value\nend\n", ".rb", "parenthesized_statements", "(value)", true], + [:python, "value\n(value)\n", ".py", "expression_statement", "value", false], + [:python, "value\n(value)\n", ".py", "expression_statement", "(value)", true], + [:typescript, "const value = (other);\n", ".ts", "parenthesized_expression", "(other)", true], + [:lua, "local first = (other)\nlocal second = left + right\n", ".lua", "expression_list", "(other)", true], + [:lua, "local first = (other)\nlocal second = left + right\n", ".lua", "expression_list", "left + right", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:unwrap_node?, node) + end + end + end + + def test_statement_node_predicate + cases = [ + [:ruby, "def check\n return value\nend\n", ".rb", "body_statement", "return value", true], + [:ruby, "def check\n return value\nend\n", ".rb", "identifier", "check", false], + [:python, "value\n(value)\n", ".py", "expression_statement", "(value)", true], + [:python, "value\n(value)\n", ".py", "identifier", "value", false], + [:typescript, "function check() { return value + other; }\n", ".ts", "return_statement", "return value + other;", true], + [:typescript, "function check() { return value + other; }\n", ".ts", "binary_expression", "value + other", true], + [:typescript, "function check() { return value + other; }\n", ".ts", "identifier", "value", false], + [:lua, "return value\n", ".lua", "return_statement", "return value", true], + [:lua, "return value\n", ".lua", "expression_list", "value", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:statement_node?, node) + end + end + end + + def test_local_identifier_predicate + cases = [ + [:ruby, "def check\nend\nclass Thing; end\n", ".rb", "identifier", "check", true], + [:ruby, "def check\nend\nclass Thing; end\n", ".rb", "constant", "Thing", false], + [:python, "def check(value):\n pass\n", ".py", "identifier", "value", true], + [:python, "def check(value):\n pass\n", ".py", "parameters", "(value)", false], + [:typescript, "const value = object.field;\n", ".ts", "identifier", "value", true], + [:typescript, "const value = object.field;\n", ".ts", "property_identifier", "field", true], + [:typescript, "const value = object.field;\n", ".ts", "lexical_declaration", "const value = object.field;", false], + [:lua, "local value = other\nprint(value)\n", ".lua", "identifier", "value", true], + [:lua, "local value = other\n", ".lua", "expression_list", "other", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:local_identifier?, node) + end + end + end + + def test_ruby_local_name_predicate + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@local_stack, [ + Set.new(%w[outer shared]), + Set.new(%w[inner]) + ]) + + assert normalizer.send(:ruby_local_name?, "outer") + assert normalizer.send(:ruby_local_name?, "inner") + assert normalizer.send(:ruby_local_name?, "shared") + refute normalizer.send(:ruby_local_name?, "missing") + end + + def test_ruby_predicate + { + ruby: true, + python: false, + lua: false, + typescript: false + }.each do |language, expected| + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(language)) + + assert_equal expected, normalizer.send(:ruby?) + end + end + + def test_interpolated_string_predicate + cases = [ + [:ruby, "name = \"hi \#{user}\"\nplain = \"hi\"\n", ".rb", "string", "\"hi \#{user}\"", true], + [:ruby, "name = \"hi \#{user}\"\nplain = \"hi\"\n", ".rb", "string", "\"hi\"", false], + [:python, "name = f\"hi {user}\"\nplain = \"hi\"\n", ".py", "string", "f\"hi {user}\"", true], + [:python, "name = f\"hi {user}\"\nplain = \"hi\"\n", ".py", "string", "\"hi\"", false], + [:typescript, "const name = `hi ${user}`;\nconst plain = `hi`;\n", ".ts", "template_string", "`hi ${user}`", true], + [:typescript, "const name = `hi ${user}`;\nconst plain = `hi`;\n", ".ts", "template_string", "`hi`", false], + [:lua, "local name = \"hi\"\n", ".lua", "expression_list", "\"hi\"", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:interpolated_string?, node) + end + end + end + + def test_const_node_predicate + cases = [ + [:ruby, "class Thing; end\ndef check; end\n", ".rb", "constant", "Thing", true], + [:ruby, "class Thing; end\ndef check; end\n", ".rb", "identifier", "check", false], + [:python, "class Thing:\n pass\n", ".py", "identifier", "Thing", false], + [:typescript, "type Thing = Other;\nconst value = Thing;\n", ".ts", "type_identifier", "Thing", true], + [:typescript, "type Thing = Other;\nconst value = Thing;\n", ".ts", "identifier", "value", false], + [:lua, "local Thing = {}\n", ".lua", "variable_list", "Thing", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:const_node?, node) + end + end + end + + def test_self_node_predicate + cases = [ + [:ruby, "self\nother\n", ".rb", "self", "self", true], + [:ruby, "self\nother\n", ".rb", "identifier", "other", false], + [:python, "self.value\nother.value\n", ".py", "identifier", "self", true], + [:python, "self.value\nother.value\n", ".py", "identifier", "other", false], + [:typescript, "this.value;\nother;\n", ".ts", "this", "this", true], + [:typescript, "this.value;\nother;\n", ".ts", "identifier", "other", false], + [:lua, "print(self.value)\nprint(other.value)\n", ".lua", "identifier", "self", true], + [:lua, "print(self.value)\nprint(other.value)\n", ".lua", "identifier", "other", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:self_node?, node) + end + end + end + + def test_instance_variable_predicate + cases = [ + [:ruby, "@value\nname\n", ".rb", "instance_variable", "@value", true], + [:ruby, "@value\nname\n", ".rb", "identifier", "name", false], + [:python, "@decorator\ndef call():\n pass\n", ".py", "decorator", "@decorator", false], + [:typescript, "@sealed\nclass Thing {}\n", ".ts", "decorator", "@sealed", false], + [:lua, "print(value)\n", ".lua", "identifier", "value", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:instance_variable?, node) + end + end + end + + def test_global_variable_predicate + cases = [ + [:ruby, "$value\nname\n", ".rb", "global_variable", "$value", true], + [:ruby, "$value\nname\n", ".rb", "identifier", "name", false], + [:python, "value = \"$name\"\n", ".py", "string_content", "$name", false], + [:typescript, "const $value = other;\n", ".ts", "identifier", "$value", false], + [:lua, "print(\"$name\")\n", ".lua", "string_content", "$name", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:global_variable?, node) + end + end + end + + def test_literal_fragment_assignment_context_predicate + cases = [ + [:ruby, "value = \"left = right\"\n", ".rb", "string_content", "left = right", true], + [:ruby, "value = 1\n", ".rb", "identifier", "value", false], + [:python, "value = \"left = right\"\n", ".py", "string_content", "left = right", true], + [:typescript, "const value = \"left = right\";\n", ".ts", "string_fragment", "left = right", true], + [:lua, "local value = \"left = right\"\n", ".lua", "string_content", "left = right", true], + [:lua, "local value = other\n", ".lua", "variable_list", "value", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:literal_fragment_assignment_context?, node) + end + end + end + + def test_collect_identifier_names + cases = [ + [:ruby, "left, *rest = values\n", ".rb", "left_assignment_list", "left, *rest", %w[left rest]], + [:typescript, "const value = { shorthand };\n", ".ts", "object", "{ shorthand }", %w[shorthand]], + [:lua, "local value = other\n", ".lua", "variable_declaration", "local value = other", %w[other value]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + locals = Set.new + + refute_nil node + normalizer.send(:collect_identifier_names, node, locals) + assert_equal expected, locals.to_a.sort + end + end + end + + def test_assignment_operator_predicate + cases = [ + [:ruby, "=", true], + [:ruby, "**=", true], + [:ruby, "??=", false], + [:python, ":=", true], + [:python, "//=", true], + [:python, "&&=", false], + [:typescript, "??=", true], + [:typescript, ">>>=", true], + [:typescript, ":=", false], + [:lua, "=", true], + [:lua, "+=", false] + ] + + cases.each do |language, text, expected| + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@document, fake_document(language)) + + assert_equal expected, normalizer.send(:assignment_operator?, text) + end + end + + def test_operator_assignment_operator + cases = [ + [:ruby, "value **= other\nflag ||= fallback\n", ".rb", "operator_assignment", "value **= other", :"**"], + [:ruby, "value **= other\nflag ||= fallback\n", ".rb", "operator_assignment", "flag ||= fallback", :"||"], + [:python, "value //= other\n", ".py", "expression_statement", "value //= other", :"//"], + [:typescript, "value ??= other;\ncount >>>= 1;\n", ".ts", "augmented_assignment_expression", "value ??= other", :"??"], + [:typescript, "value ??= other;\ncount >>>= 1;\n", ".ts", "augmented_assignment_expression", "count >>>= 1", :">>>"] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:operator_assignment_operator, node) + end + end + end + + def test_ruby_global_augmented_assignment_uses_global_read_receiver + with_language_file("$value += 1\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + assignment = nodes_of_type(root, "GASGN").find { |node| node.text == "$value += 1" } + + refute_nil assignment + call = assignment.children[1] + assert_equal "CALL", call.type.to_s + receiver = call.children[0] + assert_equal "GVAR", receiver.type.to_s + assert_equal ["$value"], receiver.children + end + end + + def test_lua_member_assignment_normalizes_as_attribute_assignment + with_language_file("user.name = value\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + assignment = nodes_of_type(root, "ATTRASGN").find { |node| node.text == "user.name = value" } + + refute_nil assignment + receiver = assignment.children[0] + assert_equal "LVAR", receiver.type.to_s + assert_equal ["user"], receiver.children + assert_equal :name=, assignment.children[1] + assert_equal "LIST", assignment.children[2].type.to_s + end + end + + def test_first_named + cases = [ + [:ruby, "class Thing; end\nname\n", ".rb", "class", "class Thing; end", ["constant", "Thing"]], + [:ruby, "class Thing; end\nname\n", ".rb", "identifier", "name", nil], + [:python, "def check(value):\n return value\n", ".py", "function_definition", "def check(value):\n return value", ["identifier", "check"]], + [:typescript, "function check(value) { return value; }\n", ".ts", "function_declaration", "function check(value) { return value; }", ["identifier", "check"]], + [:lua, "print(value)\n", ".lua", "function_call", "print(value)", ["identifier", "print"]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:first_named, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_block_child + cases = [ + [:ruby, "def check\n call\nend\n", ".rb", "method", "def check\n call\nend", ["body_statement", "call"]], + [:ruby, "items.each do\n call\nend\n", ".rb", "call", "items.each do\n call\nend", ["do_block", "do\n call\nend"]], + [:python, "def check():\n call()\n", ".py", "function_definition", "def check():\n call()", ["block", "call()"]], + [:typescript, "function check() { call(); }\n", ".ts", "function_declaration", "function check() { call(); }", ["statement_block", "{ call(); }"]], + [:lua, "function check()\n call()\nend\n", ".lua", "function_declaration", "function check()\n call()\nend", ["block", "call()"]], + [:ruby, "name\n", ".rb", "identifier", "name", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:block_child, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_branch_child + cases = [ + [:ruby, "if ready\n call\nelse\n stop\nend\n", ".rb", "if", "if ready\n call\nelse\n stop\nend", "identifier", "ready", 0, ["then", "\n call"]], + [:ruby, "if ready\n call\nelse\n stop\nend\n", ".rb", "if", "if ready\n call\nelse\n stop\nend", "identifier", "ready", 1, nil], + [:ruby, "if ready\n # note\n call\nend\n", ".rb", "if", "if ready\n # note\n call\nend", "identifier", "ready", 0, ["then", "\n call"]], + [:python, "if ready:\n call()\nelse:\n stop()\n", ".py", "if_statement", "if ready:\n call()\nelse:\n stop()", "identifier", "ready", 1, ["else_clause", "else:\n stop()"]], + [:typescript, "if (ready) { call(); } else { stop(); }\n", ".ts", "if_statement", "if (ready) { call(); } else { stop(); }", "parenthesized_expression", "(ready)", 0, ["statement_block", "{ call(); }"]], + [:lua, "if ready then\n call()\nelse\n stop()\nend\n", ".lua", "if_statement", "if ready then\n call()\nelse\n stop()\nend", "identifier", "ready", 1, ["else_statement", "else\n stop()"]] + ] + + cases.each do |language, source, suffix, kind, text, cond_kind, cond_text, index, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + condition = ts_nodes(document.root).find { |candidate| candidate.kind == cond_kind && candidate.text == cond_text } + + refute_nil node + refute_nil condition + found = normalizer.send(:branch_child, node, condition, index) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_explicit_alternative + cases = [ + [:ruby, "if ready\n call\nelsif other\n stop\nend\n", ".rb", "if", "if ready\n call\nelsif other\n stop\nend", ["elsif", "elsif other\n stop"]], + [:ruby, "if ready\n call\nend\n", ".rb", "if", "if ready\n call\nend", nil], + [:python, "if ready:\n call()\nelif other:\n stop()\n", ".py", "if_statement", "if ready:\n call()\nelif other:\n stop()", ["elif_clause", "elif other:\n stop()"]], + [:typescript, "if (ready) { call(); } else { stop(); }\n", ".ts", "if_statement", "if (ready) { call(); } else { stop(); }", ["else_clause", "else { stop(); }"]], + [:lua, "if ready then\n call()\nelseif other then\n stop()\nend\n", ".lua", "if_statement", "if ready then\n call()\nelseif other then\n stop()\nend", ["elseif_statement", "elseif other then\n stop()"]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:explicit_alternative, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_wrap + cases = [ + [:ruby, "first\nsecond\n", ".rb", "identifier", "second"], + [:python, "first\nsecond\n", ".py", "expression_statement", "second"], + [:typescript, "first;\nsecond;\n", ".ts", "identifier", "second"], + [:lua, "print(first)\nprint(second)\n", ".lua", "identifier", "second"] + ] + + cases.each do |language, source, suffix, kind, text| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + wrapped = normalizer.send(:wrap, :WRAPPED, children: [:child], source: node) + assert_equal :WRAPPED, wrapped.type + assert_equal [:child], wrapped.children + assert_equal node.start_point.row + 1, wrapped.first_lineno + assert_equal node.start_point.column, wrapped.first_column + assert_equal node.end_point.row + 1, wrapped.last_lineno + assert_equal node.end_point.column, wrapped.last_column + assert_equal node.text, wrapped.text + + inner = normalizer.send(:wrap, :INNER, children: [], source: node) + outer = normalizer.send(:wrap, :OUTER, children: [:child], source: inner) + assert_equal :OUTER, outer.type + assert_equal [:child], outer.children + assert_equal inner.first_lineno, outer.first_lineno + assert_equal inner.first_column, outer.first_column + assert_equal inner.last_lineno, outer.last_lineno + assert_equal inner.last_column, outer.last_column + assert_equal inner.text, outer.text + end + end + end + + def test_source_before_child + cases = [ + [:ruby, "if ready\n call\nend\n", ".rb", "if", "if ready\n call\nend", "then", "\n call", "if ready"], + [:python, "if ready:\n call()\n", ".py", "if_statement", "if ready:\n call()", "block", "call()", "if ready:"], + [:typescript, "if (ready) { call(); }\n", ".ts", "if_statement", "if (ready) { call(); }", "statement_block", "{ call(); }", "if (ready)"], + [:lua, "if ready then\n call()\nend\n", ".lua", "if_statement", "if ready then\n call()\nend", "block", "call()", "if ready then"], + [:ruby, "puts value\n", ".rb", "call", "puts value", "identifier", "puts", "puts value"], + [:python, "call()\n", ".py", "expression_statement", "call()", "identifier", "call", "call()"], + [:typescript, "call();\n", ".ts", "expression_statement", "call();", "identifier", "call", "call();"], + [:lua, "call()\n", ".lua", "function_call", "call()", "identifier", "call", "call()"] + ] + + cases.each do |language, source, suffix, kind, text, child_kind, child_text, expected_text| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + child = ts_nodes(document.root).find { |candidate| candidate.kind == child_kind && candidate.text == child_text } + + refute_nil node + refute_nil child + source_node = normalizer.send(:source_before_child, node, child) + wrapped = normalizer.send(:wrap, :WRAPPED, children: [], source: source_node) + + assert_equal expected_text, wrapped.text + assert_equal node.start_point.row + 1, wrapped.first_lineno + assert_equal node.start_point.column, wrapped.first_column + end + end + end + + def test_source_from_normalized_nodes + cases = [ + [:ruby, "first\nsecond\n", ".rb", "identifier", "first", "identifier", "second", "first\nsecond"], + [:python, "first\nsecond\n", ".py", "expression_statement", "first", "expression_statement", "second", "first\nsecond"], + [:typescript, "first;\nsecond;\n", ".ts", "expression_statement", "first;", "expression_statement", "second;", "first;\nsecond;"], + [:lua, "print(first)\nprint(second)\n", ".lua", "function_call", "print(first)", "function_call", "print(second)", "print(first)\nprint(second)"], + [:ruby, "first + second\n", ".rb", "identifier", "first", "identifier", "second", "first + second"] + ] + + cases.each do |language, source, suffix, first_kind, first_text, last_kind, last_text, expected_text| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + first_raw = ts_nodes(document.root).find { |candidate| candidate.kind == first_kind && candidate.text == first_text } + last_raw = ts_nodes(document.root).find { |candidate| candidate.kind == last_kind && candidate.text == last_text } + + refute_nil first_raw + refute_nil last_raw + first_node = normalizer.send(:wrap, :FIRST, children: [], source: first_raw) + last_node = normalizer.send(:wrap, :LAST, children: [], source: last_raw) + source_node = normalizer.send(:source_from_normalized_nodes, first_node, last_node) + + assert_equal :SOURCE, source_node.type + assert_equal [], source_node.children + assert_equal first_node.first_lineno, source_node.first_lineno + assert_equal first_node.first_column, source_node.first_column + assert_equal last_node.last_lineno, source_node.last_lineno + assert_equal last_node.last_column, source_node.last_column + assert_equal expected_text, source_node.text + end + end + end + + def test_named_field + cases = [ + [:ruby, "def check(value)\n value\nend\n", ".rb", "method", "def check(value)\n value\nend", "name", ["identifier", "check"]], + [:ruby, "def check(value)\n value\nend\n", ".rb", "method", "def check(value)\n value\nend", "missing", nil], + [:python, "if ready:\n call()\n", ".py", "if_statement", "if ready:\n call()", "body", ["block", "call()"]], + [:python, "if ready:\n call()\n", ".py", "if_statement", "if ready:\n call()", "condition", ["identifier", "ready"]], + [:typescript, "function check(value) { return value; }\n", ".ts", "function_declaration", "function check(value) { return value; }", "body", ["statement_block", "{ return value; }"]], + [:lua, "function check(value)\n return value\nend\n", ".lua", "function_declaration", "function check(value)\n return value\nend", "body", ["block", "return value"]] + ] + + cases.each do |language, source, suffix, kind, text, field, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:named_field, node, field) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_parent_node + cases = [ + [:ruby, "def check\nend\n", ".rb", "identifier", "check", ["method", "def check\nend"]], + [:ruby, "value\n", ".rb", "program", "value\n", nil], + [:python, "if ready:\n call()\n", ".py", "identifier", "ready", ["if_statement", "if ready:\n call()"]], + [:typescript, "call(value);\n", ".ts", "identifier", "value", ["arguments", "(value)"]], + [:lua, "call(value)\n", ".lua", "identifier", "value", ["arguments", "(value)"]] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:parent_node, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_next_sibling + cases = [ + [:ruby, "a + b\n", ".rb", "identifier", "a", ["+", "+"]], + [:python, "a + b\n", ".py", "identifier", "a", ["+", "+"]], + [:typescript, "a + b;\n", ".ts", "identifier", "a", ["+", "+"]], + [:lua, "print(a, b)\n", ".lua", "identifier", "a", [",", ","]], + [:ruby, "a\n", ".rb", "identifier", "a", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:next_sibling, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_prev_sibling + cases = [ + [:ruby, "a + b\n", ".rb", "identifier", "b", ["+", "+"]], + [:python, "a + b\n", ".py", "identifier", "b", ["+", "+"]], + [:typescript, "a + b;\n", ".ts", "identifier", "b", ["+", "+"]], + [:lua, "print(a, b)\n", ".lua", "identifier", "b", [",", ","]], + [:ruby, "a\n", ".rb", "identifier", "a", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:prev_sibling, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_next_named_sibling + cases = [ + [:ruby, "a + b\n", ".rb", "identifier", "a", ["identifier", "b"]], + [:python, "a + b\n", ".py", "identifier", "a", ["identifier", "b"]], + [:typescript, "a + b;\n", ".ts", "identifier", "a", ["identifier", "b"]], + [:lua, "print(a, b)\n", ".lua", "identifier", "a", ["identifier", "b"]], + [:ruby, "a\n", ".rb", "identifier", "a", nil] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + found = normalizer.send(:next_named_sibling, node) + if expected + assert_equal expected, [found&.kind, found&.text] + else + assert_nil found + end + end + end + end + + def test_ternary_statement_predicate + cases = [ + [:ruby, "def f(cond, a, b)\n cond ? a : b\nend\n", ".rb", "body_statement", "cond ? a : b", true], + [:python, "value = a if cond else b\n", ".py", "conditional_expression", "a if cond else b", true], + [:typescript, "const value = cond ? a : b;\n", ".ts", "ternary_expression", "cond ? a : b", true], + [:lua, "local value = cond and a or b\n", ".lua", "expression_list", "cond and a or b", false], + [:ruby, "def f(cond)\n cond\nend\n", ".rb", "body_statement", "cond", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:ternary_statement?, node) + end + end + end + + def test_ternary_statement_normalizes_to_if_across_languages + { + ruby: ["def f(cond, a, b)\n cond ? a : b\nend\n", ".rb"], + python: ["def f(cond, a, b):\n return a if cond else b\n", ".py"], + typescript: ["function f(cond: boolean, a: number, b: number) { return cond ? a : b; }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + if_node = nodes_of_type(root, "IF").find { |node| node.text.include?("cond") } + + refute_nil if_node + assert_equal %w[cond a b], if_node.children.map(&:text) + end + end + end + + def test_case_argument_list_predicate + cases = [ + [ + :ruby, + "def f(x)\n return case x\n when 1 then :one\n else :other\n end\nend\n", + ".rb", + "argument_list", + "case x\n when 1 then :one\n else :other\n end", + true + ], + [:ruby, "case x\nwhen 1 then :one\nelse :other\nend\n", ".rb", "case", "case x\nwhen 1 then :one\nelse :other\nend", false], + [:python, "match value:\n case 1:\n one()\n", ".py", "case_clause", "case 1:\n one()", false], + [:typescript, "switch (value) { case 1: one(); break; }\n", ".ts", "switch_case", "case 1: one(); break;", false], + [:lua, "if value == 1 then one() end\n", ".lua", "if_statement", "if value == 1 then one() end", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:case_argument_list?, node) + end + end + end + + def test_leading_function_statement_predicate + cases = [ + [:ruby, "def outer\n def inner\n x\n end\nend\n", ".rb", "body_statement", "def inner\n x\n end", true], + [:python, "def outer():\n def inner():\n x\n", ".py", "block", "def inner():\n x", true], + [:lua, "function outer()\n function inner()\n x()\n end\nend\n", ".lua", "block", "function inner()\n x()\n end", true], + [:typescript, "function outer() { function inner() { x; } }\n", ".ts", "function_declaration", "function inner() { x; }", false], + [:ruby, "def outer\n x\nend\n", ".rb", "body_statement", "x", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_function_statement?, node) + end + end + end + + def test_leading_function_statement_normalizes_nested_functions + { + ruby: ["def outer\n def inner\n x\n end\nend\n", ".rb"], + python: ["def outer():\n def inner():\n x\n", ".py"], + lua: ["function outer()\n function inner()\n x()\n end\nend\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + inner = nodes_of_type(root, "DEFN").find { |node| node.children.first == :inner } + + refute_nil inner + assert_empty nodes_of_type(root, "ITER").select { |node| node.text.include?("inner") } + end + end + end + + def test_lambda_expression_predicate + cases = [ + [:ruby, "fn = ->(x) { x + 1 }\n", ".rb", "lambda", "->(x) { x + 1 }", true], + [:python, "fn = lambda x: x + 1\n", ".py", "lambda", "lambda x: x + 1", true], + [:typescript, "const fn = (x) => x + 1;\n", ".ts", "arrow_function", "(x) => x + 1", true], + [:typescript, "const fn = function(x) { return x + 1; };\n", ".ts", "function_expression", "function(x) { return x + 1; }", true], + [:lua, "local fn = function(x) return x + 1 end\n", ".lua", "expression_list", "function(x) return x + 1 end", true], + [:lua, "function f(x) return x + 1 end\n", ".lua", "function_declaration", "function f(x) return x + 1 end", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:lambda_expression?, node) + end + end + end + + def test_lambda_expressions_normalize_across_languages + { + ruby: ["fn = ->(x) { x + 1 }\n", ".rb"], + python: ["fn = lambda x: x + 1\n", ".py"], + typescript: ["const fn = (x) => x + 1;\n", ".ts"], + lua: ["local fn = function(x) return x + 1 end\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "LAMBDA"), "expected LAMBDA for #{language}" + end + end + end + + def test_leading_owner_statement_predicate + cases = [ + [:ruby, "def outer\n class Inner\n value\n end\nend\n", ".rb", "body_statement", "class Inner\n value\n end", true], + [:ruby, "def outer\n module Inner\n value\n end\nend\n", ".rb", "body_statement", "module Inner\n value\n end", true], + [:python, "def outer():\n class Inner:\n pass\n", ".py", "block", "class Inner:\n pass", true], + [:typescript, "function outer() { class Inner {} }\n", ".ts", "class_declaration", "class Inner {}", false], + [:lua, "function outer()\n Inner = {}\nend\n", ".lua", "block", "Inner = {}", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_owner_statement?, node) + end + end + end + + def test_leading_owner_statement_normalizes_nested_classes + { + ruby: ["def outer\n class Inner\n value\n end\nend\n", ".rb"], + python: ["def outer():\n class Inner:\n pass\n", ".py"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + inner = nodes_of_type(root, "CLASS").find { |node| node.text.include?("Inner") } + + refute_nil inner + assert_empty nodes_of_type(root, "ITER").select { |node| node.text.include?("Inner") } + end + end + end + + def test_zero_child_identifier_call_predicate + cases = [ + [:ruby, "foo?\n", ".rb", "call", "foo?", true], + [:ruby, "foo!\n", ".rb", "call", "foo!", true], + [:ruby, "foo()\n", ".rb", "call", "foo()", false], + [:python, "foo()\n", ".py", "expression_statement", "foo()", false], + [:typescript, "foo();\n", ".ts", "call_expression", "foo()", false], + [:lua, "foo()\n", ".lua", "function_call", "foo()", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:zero_child_identifier_call?, node) + end + end + end + + def test_zero_child_identifier_call_normalizes_to_vcall + %w[foo? foo!].each do |call| + with_language_file("#{call}\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + vcall = nodes_of_type(root, "VCALL").find { |node| node.text == call } + + refute_nil vcall + assert_equal call.to_sym, vcall.children.first + end + end + end + + def test_dotted_call_parts + cases = [ + [:ruby, "user.name\n", ".rb", "call", "user.name", "identifier", "user", "name"], + [:ruby, "user&.name\n", ".rb", "call", "user&.name", "identifier", "user", "name"], + [:python, "user.name()\n", ".py", "attribute", "user.name", "identifier", "user", "name"], + [:typescript, "user.name();\n", ".ts", "member_expression", "user.name", "identifier", "user", "name"], + [:typescript, "user.name;\n", ".ts", "expression_statement", "user.name;", "identifier", "user", "name"], + [:lua, "user.name()\n", ".lua", "dot_index_expression", "user.name", "identifier", "user", "name"] + ] + + cases.each do |language, source, suffix, kind, text, receiver_kind, receiver_text, method_name| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + receiver, method = normalizer.send(:dotted_call_parts, node) + assert_equal receiver_kind, receiver.kind + assert_equal receiver_text, receiver.text.to_s + assert_equal method_name, method + end + end + end + + def test_python_bare_dotted_expression_normalizes_as_call + with_language_file("user.name\n", ".py", :python) do |file| + root, = parse_language(file, :python) + call = nodes_of_type(root, "CALL").find { |node| node.text == "user.name" } + + refute_nil call + assert_equal "LVAR", call.children.first.type.to_s + assert_equal :name, call.children[1] + end + end + + def test_typescript_bare_dotted_expression_normalizes_as_call + with_language_file("user.name;\n", ".ts", :typescript) do |file| + document = parse_syntax(file, :typescript) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find do |candidate| + candidate.kind == "expression_statement" && candidate.text == "user.name;" + end + call = normalizer.send(:normalize_dotted_expression, node) + + refute_nil call + assert_equal "CALL", call.type.to_s + assert_equal "LVAR", call.children.first.type.to_s + assert_equal :name, call.children[1] + end + end + + def test_leading_if_statement_predicate + cases = [ + [:ruby, "def f\n if x\n y\n end\nend\n", ".rb", "body_statement", "if x\n y\n end", true], + [:python, "def f():\n if x:\n y()\n", ".py", "block", "if x:\n y()", true], + [:lua, "function f()\n if x then\n y()\n end\nend\n", ".lua", "block", "if x then\n y()\n end", true], + [:typescript, "function f() { if (x) { y(); } }\n", ".ts", "if_statement", "if (x) { y(); }", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_if_statement?, node) + end + end + end + + def test_leading_if_statement_normalizes_across_languages + { + ruby: ["def f\n if x\n y\n end\nend\n", ".rb"], + python: ["def f():\n if x:\n y()\n", ".py"], + lua: ["function f()\n if x then\n y()\n end\nend\n", ".lua"], + typescript: ["function f() { if (x) { y(); } }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "IF") + end + end + end + + def test_leading_case_statement_predicate + cases = [ + [ + :ruby, + "def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", + ".rb", + "body_statement", + "case x\n when 1 then y\n else z\n end", + true + ], + [ + :python, + "def f(x):\n match x:\n case 1:\n y()\n", + ".py", + "block", + "match x:\n case 1:\n y()", + true + ], + [ + :typescript, + "function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", + ".ts", + "switch_statement", + "switch (x) { case 1: y(); break; default: z(); }", + false + ], + [ + :lua, + "function f(x)\n if x == 1 then y() end\nend\n", + ".lua", + "block", + "if x == 1 then y() end", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_case_statement?, node) + end + end + end + + def test_leading_case_statement_normalizes_across_languages + { + ruby: ["def f(x)\n case x\n when 1 then y\n else z\n end\nend\n", ".rb"], + python: ["def f(x):\n match x:\n case 1:\n y()\n", ".py"], + typescript: ["function f(x) { switch (x) { case 1: y(); break; default: z(); } }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "CASE") + end + end + end + + def test_case_default_branches_normalize_as_when_fallbacks + { + python: ["match x:\n case 1:\n one()\n case _:\n other()\n", ".py", "other()"], + typescript: ["switch (x) { case 1: one(); break; default: other(); }\n", ".ts", "other()"] + }.each do |language, (source, suffix, fallback_text)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + case_node = nodes_of_type(root, "CASE").first + + refute_nil case_node + whens = nodes_of_type(case_node, "WHEN") + assert_equal 1, whens.size + fallback = whens.first.children[2] + assert Decomplex::Ast.node?(fallback) + assert_equal "VCALL", fallback.type.to_s + assert_equal fallback_text, fallback.text + end + end + end + + def test_ruby_case_patterns_preserve_childless_tree_sitter_pattern_text + with_language_file("case value\nwhen Foo\n one\nend\ncase\nwhen ready\n two\nend\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + whens = nodes_of_type(root, "WHEN") + + const_pattern = whens.find { |node| node.text == "when Foo\n one" }.children.first.children.first + assert_equal "CONST", const_pattern.type.to_s + assert_equal :Foo, const_pattern.children.first + + call_pattern = whens.find { |node| node.text == "when ready\n two" }.children.first.children.first + assert_equal "VCALL", call_pattern.type.to_s + assert_equal :ready, call_pattern.children.first + end + end + + def test_leading_loop_statement_predicate + cases = [ + [:ruby, "def f(x)\n while x\n y\n end\nend\n", ".rb", "body_statement", "while x\n y\n end", true], + [:python, "def f(x):\n while x:\n y()\n", ".py", "block", "while x:\n y()", true], + [:lua, "function f(x)\n while x do\n y()\n end\nend\n", ".lua", "block", "while x do\n y()\n end", true], + [:typescript, "function f(x) { while (x) { y(); } }\n", ".ts", "while_statement", "while (x) { y(); }", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:leading_loop_statement?, node) + end + end + end + + def test_leading_loop_statement_normalizes_across_languages + { + ruby: ["def f(x)\n while x\n y\n end\nend\n", ".rb"], + python: ["def f(x):\n while x:\n y()\n", ".py"], + lua: ["function f(x)\n while x do\n y()\n end\nend\n", ".lua"], + typescript: ["function f(x) { while (x) { y(); } }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "WHILE") + end + end + end + + def test_rescue_body_statement_predicate + cases = [ + [ + :ruby, + "def f\n work\nrescue Error => e\n handle\nend\n", + ".rb", + "body_statement", + "work\nrescue Error => e\n handle", + true + ], + [ + :python, + "try:\n work()\nexcept Error as e:\n handle(e)\n", + ".py", + "try_statement", + "try:\n work()\nexcept Error as e:\n handle(e)", + true + ], + [ + :python, + "def f():\n try:\n work()\n except Error as e:\n handle(e)\n", + ".py", + "block", + "try:\n work()\n except Error as e:\n handle(e)", + true + ], + [ + :typescript, + "try { work(); } catch (e) { handle(e); }\n", + ".ts", + "try_statement", + "try { work(); } catch (e) { handle(e); }", + true + ], + [ + :lua, + "local ok, err = pcall(work)\n", + ".lua", + "variable_declaration", + "local ok, err = pcall(work)", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:rescue_body_statement?, node) + end + end + end + + def test_python_flattened_bare_except_normalizes_as_rescue + with_python_file(<<~PY) do |file| + def get_exception(): + try: + pass + except: + foobarbaz + PY + root, = parse_python(file) + rescue_node = nodes_of_type(root, "RESCUE").first + resbody = nodes_of_type(root, "RESBODY").first + + refute_nil rescue_node + refute_nil resbody + assert_nil rescue_node.children.first + assert_nil resbody.children.first + assert_equal "VCALL", resbody.children[1].type.to_s + assert_equal :foobarbaz, resbody.children[1].children.first + end + end + + def test_python_flattened_try_except_preserves_try_body + with_python_file(<<~PY) do |file| + def f(): + try: + work() + except Error as e: + handle(e) + PY + root, = parse_python(file) + rescue_node = nodes_of_type(root, "RESCUE").first + resbody = nodes_of_type(root, "RESBODY").first + + refute_nil rescue_node + assert_equal "VCALL", rescue_node.children.first.type.to_s + assert_equal "work()", rescue_node.children.first.text + refute_nil resbody.children.first + end + end + + def test_rescue_body_statement_normalizes_across_languages + { + ruby: ["def f\n work\nrescue Error => e\n handle\nend\n", ".rb"], + python: ["try:\n work()\nexcept Error as e:\n handle(e)\n", ".py"], + typescript: ["try { work(); } catch (e) { handle(e); }\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + + refute_empty nodes_of_type(root, "RESCUE") + resbodies = nodes_of_type(root, "RESBODY") + refute_empty resbodies + refute_nil resbodies.first.children.first if %i[ruby python].include?(language) + end + end + end + + def test_rescue_clause_preserves_qualified_exception_constant + with_language_file("begin\n work\nrescue Net::Error\n handle\nend\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + resbody = nodes_of_type(root, "RESBODY").first + + refute_nil resbody + exceptions = resbody.children.first + assert_equal "LIST", exceptions.type.to_s + assert_equal ["Net::Error"], exceptions.children.map { |child| child.children.first.to_s } + end + end + + def test_ensure_body_statement_predicate + cases = [ + [ + :ruby, + "def f\n work\nensure\n cleanup\nend\n", + ".rb", + "body_statement", + "work\nensure\n cleanup", + true + ], + [ + :python, + "try:\n work()\nfinally:\n cleanup()\n", + ".py", + "try_statement", + "try:\n work()\nfinally:\n cleanup()", + true + ], + [ + :typescript, + "try { work(); } finally { cleanup(); }\n", + ".ts", + "try_statement", + "try { work(); } finally { cleanup(); }", + true + ], + [ + :lua, + "work()\ncleanup()\n", + ".lua", + "function_call", + "work()", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:ensure_body_statement?, node) + end + end + end + + def test_ensure_body_statement_normalizes_across_languages + { + ruby: ["def f\n work\nensure\n cleanup\nend\n", ".rb"], + python: ["try:\n work()\nfinally:\n cleanup()\n", ".py"], + typescript: ["try { work(); } finally { cleanup(); }\n", ".ts"], + python_rescue: ["try:\n work()\nexcept Error as e:\n handle(e)\nfinally:\n cleanup()\n", ".py"] + }.each do |language, (source, suffix)| + parse_language_name = language == :python_rescue ? :python : language + with_language_file(source, suffix, parse_language_name) do |file| + root, = parse_language(file, parse_language_name) + + refute_empty nodes_of_type(root, "ENSURE") + refute_empty nodes_of_type(root, "RESCUE") if language == :python_rescue + end + end + end + + def test_array_literal_statement_predicate + cases = [ + [:ruby, "def f\n [a, b]\nend\n", ".rb", "body_statement", "[a, b]", true], + [:python, "def f():\n [a, b]\n", ".py", "block", "[a, b]", true], + [:typescript, "function f() { [a, b]; }\n", ".ts", "expression_statement", "[a, b];", true], + [:lua, "function f()\n {a, b}\nend\n", ".lua", "block", "\n {a, b}", true], + [:lua, "function f()\n {x = a, y = b}\nend\n", ".lua", "block", "\n {x = a, y = b}", false], + [ + :lua, + "local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", + ".lua", + "arguments", + "({rocks_tree, \"a_rock\"})", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:array_literal_statement?, node) + end + end + end + + def test_array_literal_statement_normalizes_across_languages + { + ruby: ["def f\n [a, b]\nend\n", ".rb"], + python: ["def f():\n [a, b]\n", ".py"], + typescript: ["function f() { [a, b]; }\n", ".ts"], + lua: ["function f()\n {a, b}\nend\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + lists = nodes_of_type(root, "LIST") + + refute_empty lists + assert lists.any? { |node| node.text.include?("a") && node.text.include?("b") } + end + end + end + + def test_element_reference_statement_predicate + cases = [ + [:ruby, "def f\n items[0]\nend\n", ".rb", "body_statement", "items[0]", true], + [:ruby, "def f\n [0]\nend\n", ".rb", "body_statement", "[0]", false], + [:python, "def f():\n items[0]\n", ".py", "block", "items[0]", true], + [:python, "return items[0]\n", ".py", "subscript", "items[0]", true], + [:typescript, "function f() { items[0]; }\n", ".ts", "expression_statement", "items[0];", true], + [:typescript, "return items[0];\n", ".ts", "subscript_expression", "items[0]", true], + [:lua, "return items[1]\n", ".lua", "expression_list", "items[1]", true], + [:lua, "print(items[1])\n", ".lua", "bracket_index_expression", "items[1]", true] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:element_reference_statement?, node) + end + end + end + + def test_element_reference_statement_normalizes_across_languages + { + ruby: ["def f\n items[0]\nend\n", ".rb"], + python: ["def f():\n items[0]\n", ".py"], + typescript: ["function f() { items[0]; }\n", ".ts"], + lua: ["return items[1]\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + calls = nodes_of_type(root, "CALL") + + assert calls.any? { |node| node.children[1] == :[] && node.text.include?("items") }, + "expected element reference CALL for #{language}" + end + end + end + + def test_hash_literal_statement_predicate + cases = [ + [:ruby, "def f\n {a: b}\nend\n", ".rb", "body_statement", "{a: b}", true], + [:python, "def f():\n {\"a\": b}\n", ".py", "block", "{\"a\": b}", true], + [:typescript, "function f() { ({a: b}); }\n", ".ts", "expression_statement", "({a: b});", true], + [:typescript, "return {a: b};\n", ".ts", "object", "{a: b}", true], + [:lua, "function f()\n {a = b}\nend\n", ".lua", "block", "\n {a = b}", true], + [:lua, "function f()\n {a, b}\nend\n", ".lua", "block", "\n {a, b}", false], + [ + :lua, + "assert.same(install, { bin = { P\"bin/binfile\" } })\n", + ".lua", + "arguments", + "(install, { bin = { P\"bin/binfile\" } })", + false + ] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:hash_literal_statement?, node) + end + end + end + + def test_hash_literal_statement_normalizes_across_languages + { + ruby: ["def f\n {a: b}\nend\n", ".rb"], + python: ["def f():\n {\"a\": b}\n", ".py"], + typescript: ["function f() { ({a: b}); }\n", ".ts"], + lua: ["function f()\n {a = b}\nend\n", ".lua"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + hashes = nodes_of_type(root, "HASH") + + assert hashes.any? { |node| node.text.include?("a") && node.text.include?("b") }, + "expected hash literal HASH for #{language}" + assert_empty nodes_of_type(root, "OBJECT") if language == :typescript + assert_empty nodes_of_type(root, "FCALL").select { |node| node.children.first == :"" } if language == :lua + end + end + end + + def test_lua_call_arguments_with_keyed_table_preserve_argument_list + with_language_file("assert.same(install, { bin = { P\"bin/binfile\" } })\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + call = nodes_of_type(root, "FUNCTION_CALL").find { |node| node.text.start_with?("assert.same") } + + refute_nil call + arguments = call.children[1] + assert_equal "ARGUMENTS", arguments.type.to_s + assert_equal %w[LVAR HASH], arguments.children.map(&:type).map(&:to_s) + assert_equal "install", arguments.children.first.children.first + end + end + + def test_lua_call_arguments_with_positional_table_preserve_table_fields + with_language_file("local rocks_path = table.concat({rocks_tree, \"a_rock\"})\n", ".lua", :lua) do |file| + root, = parse_language(file, :lua) + arguments = nodes_of_type(root, "ARGUMENTS").find { |node| node.text == "({rocks_tree, \"a_rock\"})" } + + refute_nil arguments + table = arguments.children.first + assert_equal "ARGUMENTS", arguments.type.to_s + assert_equal "HASH", table.type.to_s + assert_equal %w[FIELD FIELD], table.children.map(&:type).map(&:to_s) + assert_empty table.children.first.children + assert_equal "STR", table.children[1].children.first.type.to_s + end + end + + def test_empty_body_statement_predicate + cases = [ + [:python, "def f():\n pass\n", ".py", "block", "pass", true], + [:typescript, "function f() {}\n", ".ts", "statement_block", "{}", true], + [:typescript, "function f() { work(); }\n", ".ts", "statement_block", "{ work(); }", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:empty_body_statement?, node) + end + end + end + + def test_empty_body_statement_normalizes_across_languages + { + python: ["def f():\n pass\n", ".py"], + typescript: ["function f() {}\n", ".ts"] + }.each do |language, (source, suffix)| + with_language_file(source, suffix, language) do |file| + root, = parse_language(file, language) + defn = nodes_of_type(root, "DEFN").first + scope = defn.children[1] + + assert_nil scope.children[2] + assert_empty nodes_of_type(root, "VCALL").select { |node| node.text == "pass" } if language == :python + end + end + end + + def test_heredoc_body_statement_predicate + ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n" + cases = [ + [:ruby, ruby_source, ".rb", "body_statement", "puts <<~TXT\n hi\n TXT", true], + [:ruby, ruby_source, ".rb", "call", "puts <<~TXT", false], + [:python, "def f():\n value = 1\n", ".py", "block", "value = 1", false], + [:typescript, "function f() { value = 1; }\n", ".ts", "statement_block", "{ value = 1; }", false], + [:lua, "function f()\n value = 1\nend\n", ".lua", "block", "value = 1", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:heredoc_body_statement?, node) + end + end + end + + def test_heredoc_call_for_body_predicate + ruby_source = "def f\n puts <<~TXT\n hi\n TXT\nend\n" + cases = [ + [:ruby, ruby_source, ".rb", "body_statement", "puts <<~TXT\n hi\n TXT", true], + [:ruby, ruby_source, ".rb", "call", "puts <<~TXT", true], + [:ruby, ruby_source, ".rb", "argument_list", "<<~TXT", true], + [:ruby, ruby_source, ".rb", "method", ruby_source.chomp, false], + [:python, "def f():\n value = 1\n", ".py", "block", "value = 1", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:heredoc_call_for_body?, node) + end + end + end + + def test_ruby_heredoc_argument_normalizes_as_dynamic_string + with_language_file("def f\n puts <<~TXT\n hi\n TXT\nend\n", ".rb", :ruby) do |file| + root, = parse_language(file, :ruby) + call = nodes_of_type(root, "FCALL").find { |node| node.text == "puts <<~TXT" } + + refute_nil call + assert_equal :puts, call.children[0] + + args = call.children[1] + assert_equal "LIST", args.type.to_s + dstr = args.children.first + assert_equal "DSTR", dstr.type.to_s + assert_equal ["STR"], dstr.children.map { |child| child.type.to_s } + assert_equal "\n hi\n ", dstr.children.first.children.first + end + end + + def test_normalize_children_skips_heredoc_body + with_language_file("def f\n x = <<~TXT\n hi\n TXT\nend\n", ".rb", :ruby) do |file| + document = parse_syntax(file, :ruby) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + body = ts_nodes(document.root).find do |node| + node.kind == "body_statement" && node.text.include?("<<~TXT") + end + + refute_nil body + children = normalizer.send(:normalize_children, body) + assert_equal ["LASGN"], children.map { |child| child.type.to_s } + assert_equal ["STR"], children.first.children[1].children.map { |child| child.type.to_s } + end + end + + def test_with_current_heredoc_body_restores_previous_body + normalizer = Decomplex::Ast::TreeSitterNormalizer.allocate + normalizer.instance_variable_set(:@current_heredoc_body, :outer) + + result = normalizer.send(:with_current_heredoc_body, :inner) do + assert_equal :inner, normalizer.instance_variable_get(:@current_heredoc_body) + :result + end + + assert_equal :result, result + assert_equal :outer, normalizer.instance_variable_get(:@current_heredoc_body) + end + + def test_interpolated_statement_predicate + cases = [ + [:ruby, "def f\n \"hi \#{name}\"\nend\n", ".rb", "body_statement", "\"hi \#{name}\"", true], + [:python, "def f():\n f\"hi {name}\"\n", ".py", "block", "f\"hi {name}\"", false], + [:typescript, "function f() { `hi ${name}`; }\n", ".ts", "expression_statement", "`hi ${name}`;", false], + [:lua, "function f()\n \"hi\"\nend\n", ".lua", "block", "\n \"hi\"", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:interpolated_statement?, node) + end + end + end + + def test_concatenated_string_statement_predicate + cases = [ + [:ruby, "def f\n \"a\" \"b\"\nend\n", ".rb", "body_statement", "\"a\" \"b\"", true], + [:python, "def f():\n \"a\" \"b\"\n", ".py", "block", "\"a\" \"b\"", true], + [:typescript, "function f() { \"a\"; }\n", ".ts", "expression_statement", "\"a\";", false], + [:lua, "function f()\n \"a\"\nend\n", ".lua", "block", "\n \"a\"", false] + ] + + cases.each do |language, source, suffix, kind, text, expected| + with_language_file(source, suffix, language) do |file| + document = parse_syntax(file, language) + normalizer = Decomplex::Ast::TreeSitterNormalizer.new(document) + node = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + + refute_nil node + assert_equal expected, normalizer.send(:concatenated_string_statement?, node) + end + end + end + + def test_concatenated_string_statement_normalizes_python_adjacent_strings + with_python_file(<<~PY) do |file| + def f(): + "a" "b" + PY + root, = parse_python(file) + dstr = nodes_of_type(root, "DSTR").find { |node| node.text == "\"a\"" } + + refute_nil dstr + assert_equal %w[STR STR], dstr.children.map(&:type).map(&:to_s) + end + end + + private + + def ast_node(type, children: []) + Decomplex::Ast::Node.new( + type: type, + children: children, + first_lineno: 1, + first_column: 0, + last_lineno: 1, + last_column: 1, + text: type.to_s + ) + end + + def fake_document(language) + Object.new.tap { |document| document.define_singleton_method(:language) { language } } + end + + def ruby_syntax_node(source, kind, text) + found = nil + with_language_file(source, ".rb", :ruby) do |file| + document = parse_syntax(file, :ruby) + found = ts_nodes(document.root).find { |candidate| candidate.kind == kind && candidate.text == text } + end + refute_nil found + found + end + + def infix_parts_text(normalizer, node) + normalizer.send(:infix_statement_parts, node).map do |part| + part.respond_to?(:text) ? part.text : part + end + end + + def parse_python(file) + parse_language(file, :python) + end + + def parse_language(file, language) + with_env("DECOMPLEX_FORCE_LANGUAGE", language.to_s) do + Decomplex::Ast.normalized_cache.clear + Decomplex::Ast.parse(file) + end + rescue LoadError => e + skip e.message + end + + def parse_syntax(file, language) + with_env("DECOMPLEX_FORCE_LANGUAGE", language.to_s) do + Decomplex::Syntax.parse(file, parser: "tree_sitter") + end + rescue LoadError => e + skip e.message + end + + def nodes_of_type(node, type) + out = [] + walk_nodes(node) { |child| out << child if child.type.to_s == type } + out + end + + def walk_nodes(node, &block) + return unless Decomplex::Ast.node?(node) + + yield node + node.children.each { |child| walk_nodes(child, &block) } + end + + def ts_nodes(node) + out = [] + walk_ts_nodes(node) { |child| out << child } + out + end + + def walk_ts_nodes(node, &block) + return unless node.respond_to?(:kind) + + yield node + node.named_children.each { |child| walk_ts_nodes(child, &block) } + end + + def with_python_file(source) + with_language_file(source, ".py", :python) { |file| yield file } + end + + def with_language_file(source, suffix, _language) + file = Tempfile.new(["decomplex_ast", suffix]) + file.write(source) + file.close + yield file.path + ensure + file&.unlink + end + + def with_env(key, value) + old = ENV[key] + value.nil? ? ENV.delete(key) : ENV[key] = value + yield + ensure + old.nil? ? ENV.delete(key) : ENV[key] = old + end +end diff --git a/gems/decomplex/test/decision_pressure_test.rb b/gems/decomplex/test/decision_pressure_test.rb index db4906cb9..492c01f2b 100644 --- a/gems/decomplex/test/decision_pressure_test.rb +++ b/gems/decomplex/test/decision_pressure_test.rb @@ -155,4 +155,17 @@ def a(n) ensure f&.unlink end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + r = rank(<<~RB) + def a(node) + ti = node.full_type + return 1 if ti.is_a?(Type) + end + RB + + assert_equal ".full_type", r.first[:contract] + end + end end diff --git a/gems/decomplex/test/derived_state_test.rb b/gems/decomplex/test/derived_state_test.rb index 8002dad2d..a201944d6 100644 --- a/gems/decomplex/test/derived_state_test.rb +++ b/gems/decomplex/test/derived_state_test.rb @@ -104,4 +104,18 @@ def f(a) assert_equal "b", out.first[:derived] assert_equal "a", out.first[:source] end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + out = scan(<<~RB) + def f(a) + b = a + 1 + a = recompute(a) + use(b) + end + RB + + assert_equal 1, out.size + end + end end diff --git a/gems/decomplex/test/detector_runner_test.rb b/gems/decomplex/test/detector_runner_test.rb new file mode 100644 index 000000000..d34c3581b --- /dev/null +++ b/gems/decomplex/test/detector_runner_test.rb @@ -0,0 +1,709 @@ +# frozen_string_literal: true + +require "minitest/autorun" +require "open3" +require "tempfile" +require_relative "../lib/decomplex" + +class DetectorRunnerTest < Minitest::Test + FIXTURE = "gems/decomplex/test/fixtures/co_update_sample.rb" + + def test_co_update_ruby_engine_canonical_json_is_frozen + expected = <<~JSON + {"co_written_pairs":[{"pair":["provenance","storage"],"sites":["gems/decomplex/test/fixtures/co_update_sample.rb:stable_one","gems/decomplex/test/fixtures/co_update_sample.rb:stable_two","gems/decomplex/test/fixtures/co_update_sample.rb:stable_three"],"support":3}],"neglected_updates":[{"at":"gems/decomplex/test/fixtures/co_update_sample.rb:misses_provenance:17","has":"storage","missing":"provenance","pair":["provenance","storage"],"recv":"node","spans":{"gems/decomplex/test/fixtures/co_update_sample.rb:misses_provenance:17":[17,2,17,22]},"support":3}]} + JSON + + assert_equal expected, Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby") + end + + def test_co_update_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("co-update", [FIXTURE]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + + def test_native_command_language_for_recognizes_jvm_and_swift_extensions + assert_equal "java", Decomplex::Native::Command.language_for("Example.java") + assert_equal "kotlin", Decomplex::Native::Command.language_for("Example.kt") + assert_equal "kotlin", Decomplex::Native::Command.language_for("Example.kts") + assert_equal "swift", Decomplex::Native::Command.language_for("Example.swift") + end + + def test_miner_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-miner", ".rb"]) do |file| + file.write(<<~RUBY) + def one(a, b, c) + a && b && c + end + + def two(a, b, c) + a && b && c + end + + def three(a, b, c) + a && b && c + end + + def broken(a, b) + a && b + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("miner", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_flay_similarity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-flay", ".rb"]) do |file| + file.write(<<~RUBY) + def one(a, b) + total = a + b + puts total + total * 2 + end + + def two(x, y) + total = x + y + puts total + total * 2 + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("flay-similarity", [file.path], mass: 4, fuzzy: 1) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_semantic_alias_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-semantic-alias", ".rb"]) do |file| + file.write(<<~RUBY) + def frame?; @provenance == :frame; end + def is_frame?; provenance == :frame; end + def heap?; @provenance == :heap; end + def somewhere(node) + return 1 if node.provenance == :frame + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("semantic-alias", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_predicate_alias_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-predicate-alias", ".rb"]) do |file| + file.write(<<~RUBY) + def first?; true; end + def second?; true; end + + def nil_body; nil; end + def other_nil_body; nil; end + + def setup + super + self[:type_params] ||= [] + end + + def type_params + self[:type_params] ||= [] + end + + def emit_one + <<~ZIG.chomp + hi + ZIG + end + + def emit_two + <<~ZIG.chomp + bye + ZIG + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("predicate-alias", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_temporal_ordering_pressure_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-temporal-ordering", ".rb"]) do |file| + file.write(<<~RUBY) + class Order + def one; @a = 1; end + def two; @a = 2; @b = 3; end + def three; @b = 4; end + def reader; @a; end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("temporal-ordering-pressure", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_state_branch_density_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-state-branch", ".rb"]) do |file| + file.write(<<~RUBY) + class User < T::Struct + const :name, String + const :admin, T::Boolean + end + + class Checker + sig { params(user: User).void } + def check(user) + if user.admin + @checked = true + end + if @checked && user.name == "admin" + puts "Hello" + end + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("state-branch-density", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_redundant_nil_guard_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-redundant-nil", ".rb"]) do |file| + file.write(<<~RUBY) + def check(x) + if x + puts x.nil? + x&.foo + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("redundant-nil-guard", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_state_mesh_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-state-mesh", ".rb"]) do |file| + file.write(<<~RUBY) + class Mesh + def initialize + @a = 1 + @b = 2 + end + + def writer + @a = 3 + end + + def reader + @a + @b + end + + def a_alias + @a + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("state-mesh", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_inconsistent_rename_clone_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-rename", ".rb"]) do |file| + file.write(<<~RUBY) + def one(a, b) + res = a + b + puts res + res * 2 + end + + def two(x, b) + res = x + b + puts res + res * 2 + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("inconsistent-rename-clone", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_derived_state_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-derived", ".rb"]) do |file| + file.write(<<~RUBY) + def check(a) + b = a + 1 + a = 2 + puts b + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("derived-state", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_implicit_control_flow_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-implicit", ".rb"]) do |file| + file.write(<<~RUBY) + class Flow + def prepare; @a = 1; end + def validate; @b = @a; end + def run + prepare + validate + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("implicit-control-flow", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_weighted_inlined_complexity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-weighted", ".rb"]) do |file| + file.write(<<~RUBY) + class Complex + def entry + helper_one + helper_two if condition? + end + + private + def helper_one + if a; b; else; c; end + end + + def helper_two + while x; y; end + end + + def condition?; true; end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("weighted-inlined-complexity", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_locality_drag_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-locality", ".rb"]) do |file| + file.write(<<~RUBY) + def heavy(x) + y = x + 1 + # Unrelated work + a = 1; b = 2; c = 3; d = 4; e = 5 + puts a, b, c, d, e + # Finally use y + puts y + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("locality-drag", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_operational_discontinuity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-discontinuity", ".rb"]) do |file| + file.write(<<~RUBY) + def phase_shift + a = 1 + b = 2 + + # Phase 2 + x = 3 + y = 4 + puts x, y + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("operational-discontinuity", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_oversized_predicate_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-oversized", ".rb"]) do |file| + file.write(<<~RUBY) + def complex_check + if a && b && c && d + puts "Too big" + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("oversized-predicate", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_path_condition_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-path", ".rb"]) do |file| + file.write(<<~RUBY) + def one + if a && b + puts "Here" + end + end + + def two + if a + if b + puts "Also here" + end + end + end + + def three + if a + puts "Neglected" + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("path-condition", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_sequence_mine_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-sequence", ".rb"]) do |file| + file.write(<<~RUBY) + def one + prepare + validate + execute + end + + def two + prepare + validate + execute + end + + def three + prepare + validate + execute + end + + def broken + prepare + execute + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("sequence-mine", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_function_lcom_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-lcom", ".rb"]) do |file| + file.write(<<~RUBY) + def disjoint_concerns + a = 1 + b = a + 1 + puts b + + x = 2 + y = x + 2 + puts y + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("function-lcom", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_false_simplicity_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-false", ".rb"]) do |file| + file.write(<<~RUBY) + class Meta + def hack + send(:foo) + puts "Hidden IO" + $GLOBAL_STATE = 1 + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("false-simplicity", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_fat_union_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-fat", ".rb"]) do |file| + file.write(<<~RUBY) + def handle(node) + case node + when CallNode + node.name + node.args + when LocalVarNode + node.name + node.type + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("fat-union", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_decision_pressure_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-decision-pressure", ".rb"]) do |file| + file.write(<<~RUBY) + def scan(node) + value = node.respond_to?(:symbol) ? node.symbol&.reg : nil + value.nil? + ensure + node&.cleanup + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("decision-pressure", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_local_flow_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-local-flow", ".rb"]) do |file| + file.write(<<~RUBY) + class Billing + def mixed(price, tax) + subtotal = price + tax + total = subtotal.round + + timestamp = Time.now + buffer = [] + buffer << timestamp + [total, buffer] + end + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("local-flow", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_structural_topology_rust_engine_matches_ruby_engine_byte_for_byte + skip "cargo is not available" unless cargo_available? + + Tempfile.create(["decomplex-structural-topology", ".rb"]) do |file| + file.write(<<~RUBY) + class Worker + def run(items) + prepare + if ready? + validate + end + items.each do |item| + helper(item) + end + end + + private + def prepare; end + def ready?; true; end + def validate; end + def helper(item); item; end + + public :validate + end + RUBY + file.flush + + ok, ruby_json, rust_json = Decomplex::DetectorRunner.compare("structural-topology", [file.path]) + + assert ok, diff_message(ruby_json, rust_json) + assert_equal ruby_json, rust_json + end + end + + def test_detector_cli_compare_engines_outputs_canonical_json + skip "cargo is not available" unless cargo_available? + + stdout, stderr, status = Open3.capture3( + "ruby", + "gems/decomplex/exe/decomplex", + "detector", + "co-update", + "--compare-engines", + FIXTURE + ) + + assert status.success?, stderr + assert_equal Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby"), stdout + end + + def test_detector_cli_compare_engines_accepts_jobs + skip "cargo is not available" unless cargo_available? + + stdout, stderr, status = Open3.capture3( + "ruby", + "gems/decomplex/exe/decomplex", + "detector", + "co-update", + "--compare-engines", + "--jobs=2", + FIXTURE + ) + + assert status.success?, stderr + assert_equal Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby"), stdout + end + + def test_detector_cli_benchmark_keeps_json_stdout_canonical + stdout, stderr, status = Open3.capture3( + "ruby", + "gems/decomplex/exe/decomplex", + "detector", + "co-update", + "--engine=ruby", + "--json", + "--benchmark", + FIXTURE + ) + + assert status.success?, stderr + assert_equal Decomplex::DetectorRunner.canonical_json("co-update", [FIXTURE], engine: "ruby"), stdout + assert_match(/decomplex detector=co-update engine=ruby files=1 elapsed=\d+\.\d+s/, stderr) + end + + private + + def cargo_available? + system("cargo", "--version", out: File::NULL, err: File::NULL) + end + + def diff_message(left, right) + "ruby and rust detector output differed\n--- ruby\n#{left}\n--- rust\n#{right}" + end +end diff --git a/gems/decomplex/test/examples_oracle_test.rb b/gems/decomplex/test/examples_oracle_test.rb new file mode 100644 index 000000000..f556cefcf --- /dev/null +++ b/gems/decomplex/test/examples_oracle_test.rb @@ -0,0 +1,350 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require_relative "../lib/decomplex/detector_runner" + +class ExamplesOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples", __dir__) + ORACLE_DIR = File.join(EXAMPLES_ROOT, "oracles") + ENGINES = Decomplex::DetectorRunner::ENGINES.freeze + SOURCE_EXTENSIONS = Decomplex::Syntax.supported_exts.freeze + LOCATION_KEYS = %w[ + at boundaries boundary_crossings component_lines defn examples file + gap_lines line locations predicate raw reason sites span spans source + ].freeze + + ORACLE_PATHS = Dir[File.join(ORACLE_DIR, "*.json")].sort.freeze + FIXTURE_PATHS = Dir[File.join(EXAMPLES_ROOT, "*", "*")] + .select { |path| SOURCE_EXTENSIONS.include?(File.extname(path)) } + .sort + .freeze + DETECTOR_FACT_PATHS = Dir[File.join(EXAMPLES_ROOT, "facts", "detectors", "*.json")].sort.freeze + + def test_shared_oracle_files_exist + refute_empty ORACLE_PATHS + end + + def test_detector_fact_oracles_exist + refute_empty DETECTOR_FACT_PATHS + end + + def test_shared_oracles_are_engine_agnostic + pinned = ORACLE_PATHS.select { |path| JSON.parse(File.read(path)).key?("engine") } + + assert_empty pinned, "shared example oracles must not pin detector engines:\n#{pinned.join("\n")}" + end + + def test_each_detector_has_one_fixture_per_language + languages = FIXTURE_PATHS.map { |path| File.basename(File.dirname(path)) }.uniq.sort + detectors = ORACLE_PATHS.map { |path| File.basename(path, ".json") }.sort + + detectors.each do |detector| + actual = FIXTURE_PATHS + .select { |path| File.basename(path, File.extname(path)) == detector } + .map { |path| File.basename(File.dirname(path)) } + .sort + assert_equal languages, actual, "#{detector} fixture languages" + end + end + + FIXTURE_PATHS.product(ENGINES).each_with_index do |(fixture_path, engine), index| + language = File.basename(File.dirname(fixture_path)) + detector = File.basename(fixture_path, File.extname(fixture_path)) + method_name = "test_#{index}_#{engine}_#{language}_#{detector.tr("-", "_")}_matches_shared_oracle" + + define_method(method_name) do + assert_fixture_matches_shared_oracle(fixture_path, engine) + end + end + + DETECTOR_FACT_PATHS.product(ENGINES).each_with_index do |(fixture_path, engine), index| + detector = File.basename(fixture_path, ".json") + method_name = "test_detector_fact_#{index}_#{engine}_#{detector.tr("-", "_")}_matches_exact_oracle" + + define_method(method_name) do + assert_detector_fact_fixture_matches_exact_oracle(fixture_path, engine) + end + end + + private + + def assert_fixture_matches_shared_oracle(fixture_path, engine) + detector = File.basename(fixture_path, File.extname(fixture_path)) + oracle_path = File.join(ORACLE_DIR, "#{detector}.json") + + assert File.file?(oracle_path), "missing shared oracle #{oracle_path}" + + oracle = JSON.parse(File.read(oracle_path)) + expected = oracle.fetch("expected") + assert meaningful?(expected), "#{oracle_path} expected projection is empty" + + options = symbolize_options(oracle.fetch("options", {})) + actual = JSON.parse( + Decomplex::DetectorRunner.canonical_json( + oracle.fetch("detector"), + [fixture_path], + engine: engine, + **options + ) + ) + + assert_equal expected, project_detector_output(detector, actual), "#{engine} #{fixture_path}" + end + + def assert_detector_fact_fixture_matches_exact_oracle(fixture_path, engine) + fixture = JSON.parse(File.read(fixture_path)) + expected = fixture.fetch("expected") + assert meaningful?(expected), "#{fixture_path} expected output is empty" + + actual = JSON.parse( + Decomplex::DetectorRunner.canonical_json_from_fact_fixture(fixture_path, engine: engine) + ) + + assert_equal expected, actual, "#{engine} #{fixture_path}" + end + + def symbolize_options(options) + options.each_with_object({}) { |(key, value), out| out[key.to_sym] = value } + end + + def project_detector_output(detector, output) + case detector + when "co-update" + { + "co_written_pairs" => rows(output["co_written_pairs"], %w[pair support]), + "neglected_updates" => rows(output["neglected_updates"], %w[pair support has missing]) + } + when "decision-pressure" + rows(output, %w[contract decisions essential methods]) + when "predicate-alias" + { + "alias_clusters" => Array(output["alias_clusters"]).map do |row| + { "name_count" => Array(row["names"]).size } + end + } + when "miner" + { + "missing_abstractions" => Array(output["missing_abstractions"]).map do |row| + pick(row, %w[kind members support scatter]) + end, + "neglected_conditions" => rows(output["neglected_conditions"], %w[pattern support missing]) + } + when "semantic-alias" + { + "alias_clusters" => Array(output["alias_clusters"]).map do |row| + { "canon" => canonical_predicate(row["canon"]), "name_count" => Array(row["names"]).size } + end, + "reification_miss_count" => Array(output["reification_misses"]).size + } + when "flay-similarity" + Array(output["findings"]).map do |row| + pick(row, %w[clone_type node]).merge("site_count" => Array(row["sites"]).size) + end + when "temporal-ordering-pressure" + Array(output).map do |row| + pick(row, %w[owner public_methods state_methods writers orderings]).merge( + "state_fields" => canonical_state_refs(row["state_fields"]), + "shared_fields" => canonical_state_refs(row["shared_fields"]) + ) + end + when "state-branch-density" + Array(output).map do |row| + pick(row, %w[decisions]).merge( + "method" => canonical_method_name(row["method"]), + "state_refs" => canonical_state_refs(row["state_refs"]) + ) + end + when "redundant-nil-guard" + rows(output, %w[local]) + when "state-mesh" + project_state_mesh(output) + when "inconsistent-rename-clone" + Array(output).map do |row| + pick(row, %w[ref_name]).merge("divergent_count" => Array(row["divergent"]).size) + end + when "derived-state" + rows(output, %w[derived source]) + when "implicit-control-flow" + { + "ordered_protocols" => project_protocols(output["ordered_protocols"]), + "order_drift" => project_protocols(output["order_drift"]) + } + when "weighted-inlined-complexity" + Array(output).map do |row| + pick(row, %w[method depth]).merge("callee_count" => Array(row["single_caller_callees"]).size) + end + when "locality-drag" + rows(output, %w[variable]) + when "operational-discontinuity" + rows(output, %w[resets confidence]) + when "oversized-predicate" + Array(output["findings"]).map do |row| + pick(row, %w[count]).merge("atom_count" => Array(row["atoms"]).size) + end + when "path-condition" + Array(output["neglected"]).map do |row| + { + "pattern" => canonical_predicate_atoms(row["pattern"]), + "support" => row["support"], + "missing" => canonical_predicate(row["missing"]), + "action" => canonical_action(row["action"]) + } + end + when "sequence-mine" + rows(output["broken"], %w[pair support has missing]) + when "function-lcom" + rows(output, %w[mode components locals statements terminal_join]) + when "false-simplicity" + rows(output, %w[kind]) + when "fat-union" + Array(output["fat_unions"]).map do |row| + pick(row, %w[common variant degenerate support scatter]).merge( + "variant_set" => canonical_variants(row["variant_set"]) + ) + end + when "local-flow" + Array(output).map do |method| + { + "method" => method["name"], + "statements" => Array(method["statements"]).map do |statement| + row = pick(statement, %w[reads writes dependencies co_uses]) + row["co_uses"] = canonical_co_uses(row.fetch("co_uses", [])) + row + end, + "boundaries" => rows(method["boundaries"], %w[before_index after_index kind]) + } + end + when "structural-topology" + { + "method_count" => Array(output["methods"]).size, + "edges" => rows(output["edges"], %w[caller_name callee_name type]) + } + else + scrub_locations(output) + end + end + + def project_state_mesh(output) + state_mesh = output.fetch("state_mesh", {}) + fields = output.fetch("fields", {}) + { + "state_mesh" => pick(state_mesh, %w[total_fields total_writes total_reads total_re_derivations]), + "field_names" => canonical_state_refs(fields.keys) + } + end + + def project_protocols(rows) + Array(rows).map do |row| + pick(row, %w[protocol dependency support observed missing]).merge( + "states" => canonical_state_refs(row["states"]) + ) + end + end + + def canonical_variants(value) + Array(value).map do |item| + item.to_s + .sub(/\A([A-Z][A-Za-z0-9]*)_([A-Z][A-Za-z0-9]*)\z/, '\1.\2') + .tr(":", ".") + .gsub(/\.+/, ".") + end.sort + end + + def canonical_co_uses(value) + Array(value).map { |pair| Array(pair).map(&:to_s).sort }.sort_by { |pair| JSON.generate(pair) } + end + + def canonical_state_refs(value) + Array(value).map do |item| + text = item.to_s + text = text.sub(/\A@/, "") + text = text.sub(/\A(?:self|this)\./, "") + text + end.uniq.sort + end + + def canonical_method_name(value) + value.to_s.split(/[.:#]/).last.to_s + end + + def canonical_predicate_atoms(value) + Array(value).map { |item| canonical_predicate(item) }.sort + end + + def canonical_predicate(value) + text = value.to_s.strip + text = text.delete_suffix(";").strip + text = text.gsub(/:([A-Za-z_]\w*)/) { Regexp.last_match(1).upcase } + text = text.gsub(/\b([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*)\.(\w+)\?/, '\1.\2') + text = text.gsub(/\b([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*)\.(\w+)\(\)/, '\1.\2') + text + end + + def canonical_action(value) + canonical_predicate(value).sub(/\A([A-Za-z_]\w*)\((.*)\)\z/, '\1(\2)') + end + + def present_rows(value) + Array(value).empty? ? [] : [{ "present" => true }] + end + + def rows(value, keys) + Array(value).map { |row| pick(row, keys) } + end + + def pick(row, keys) + keys.each_with_object({}) do |key, out| + out[key] = canonical_value(row[key]) if row.key?(key) + end + end + + def canonical_value(value) + case value + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + original = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key } + out[key] = canonical_value(value.fetch(original)) + end + when Array + value.map { |item| canonical_value(item) } + when Symbol + value.to_s + else + value + end + end + + def scrub_locations(value) + case value + when Hash + value.keys.map(&:to_s).sort.each_with_object({}) do |key, out| + next if LOCATION_KEYS.include?(key) + + original = value.key?(key) ? key : value.keys.find { |candidate| candidate.to_s == key } + out[key] = scrub_locations(value.fetch(original)) + end + when Array + value.map { |item| scrub_locations(item) } + when Symbol + value.to_s + else + value + end + end + + def meaningful?(value) + case value + when Hash + value.any? { |_key, item| meaningful?(item) } + when Array + !value.empty? && value.any? { |item| meaningful?(item) } + when NilClass + false + when String + !value.empty? + else + true + end + end +end diff --git a/gems/decomplex/test/false_simplicity_test.rb b/gems/decomplex/test/false_simplicity_test.rb index 9ccb053a3..cd50333ea 100644 --- a/gems/decomplex/test/false_simplicity_test.rb +++ b/gems/decomplex/test/false_simplicity_test.rb @@ -30,24 +30,6 @@ def scan2(ruby1, ruby2) Decomplex::FalseSimplicity.scan(paths) end - def ast(type, children = [], line: 1) - Decomplex::Ast::Node.new( - type: type, - children: children, - first_lineno: line, - first_column: 0, - last_lineno: line, - last_column: 1, - text: "" - ) - end - - def scan_ast(root, language:) - detector = Decomplex::FalseSimplicity.new("inline", [], language: language) - detector.walk(root, [], []) - Decomplex::FalseSimplicity::Report.new(detector.hits, detector.classrecs) - end - def has(r, kind, detail = nil) r.hits.any? { |h| h.kind == kind && (detail.nil? || h.detail == detail) } end @@ -58,29 +40,6 @@ def details(r, kind) # ---- 1. hidden dynamic dispatch ------------------------------------- - def test_non_ruby_languages_do_not_inherit_ruby_lexicon - root = ast(:ROOT, [ - ast(:CALL, [ast(:LVAR, ["obj"]), :send, nil]), - ast(:CALL, [ast(:CONST, [:File]), :read, nil]), - ast(:FCALL, [:getattr, nil]), - ast(:FCALL, [:eval, nil]) - ]) - - ruby = scan_ast(root, language: :ruby) - assert has(ruby, :dynamic_dispatch, "send") - assert has(ruby, :hidden_io, "File.read") - - python = scan_ast(root, language: :python) - refute has(python, :dynamic_dispatch, "send") - refute has(python, :hidden_io, "File.read") - assert has(python, :dynamic_dispatch, "getattr") - - zig = scan_ast(root, language: :zig) - refute has(zig, :dynamic_dispatch, "send") - refute has(zig, :hidden_io, "File.read") - assert has(zig, :metaprogramming, "eval") - end - def test_dynamic_dispatch_positive r = scan(<<~RB) def a(o); o.send(:m, 1); end @@ -107,6 +66,18 @@ def k; yield 9; end :>=, 2 end + def test_scan_does_not_use_legacy_ast_parse + f = Tempfile.new(["fs", ".rb"]) + f.write("def a(o); o.send(:m); end\n") + f.close + @tmp = [f] + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse called" }) do + r = Decomplex::FalseSimplicity.scan([f.path]) + assert has(r, :dynamic_dispatch, "send") + end + end + def test_dynamic_dispatch_no_false_positive r = scan(<<~RB) def a(o); o.run(1); end diff --git a/gems/decomplex/test/fat_union_test.rb b/gems/decomplex/test/fat_union_test.rb index fde97706e..43344638d 100644 --- a/gems/decomplex/test/fat_union_test.rb +++ b/gems/decomplex/test/fat_union_test.rb @@ -197,4 +197,20 @@ def lower_b(n) ensure f end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + fu = scan(<<~RB) + def lower(n) + case n + when AST::Call then n.line; n.ty + when AST::Func then n.line; n.ty + when AST::Lit then n.line; n.ty + end + end + RB + + assert_equal 1, fu.size + end + end end diff --git a/gems/decomplex/test/fixtures/co_update_sample.rb b/gems/decomplex/test/fixtures/co_update_sample.rb new file mode 100644 index 000000000..fc183143b --- /dev/null +++ b/gems/decomplex/test/fixtures/co_update_sample.rb @@ -0,0 +1,23 @@ +def stable_one(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_two(node) + node.storage = :heap + node.provenance = :heap +end + +def stable_three(node) + node.storage = :heap + node.provenance = :heap +end + +def misses_provenance(node) + node.storage = :heap +end + +def ignored_index_write(entry) + entry[:storage] = :heap + entry[:provenance] = :heap +end diff --git a/gems/decomplex/test/flay_similarity_test.rb b/gems/decomplex/test/flay_similarity_test.rb index 374e903cd..a0793690e 100644 --- a/gems/decomplex/test/flay_similarity_test.rb +++ b/gems/decomplex/test/flay_similarity_test.rb @@ -7,7 +7,10 @@ class FlaySimilarityTest < Minitest::Test def grammar_available?(language) env = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" - ENV[env] && File.file?(ENV[env]) + return true if ENV[env] && File.file?(ENV[env]) + + adapter = Decomplex::Syntax::TreeSitterAdapter.new + adapter.send(:grammar_candidates, language).any? { |path| File.file?(path) } end def scan(source, ext: ".rb", mass: 8, fuzzy: 1) diff --git a/gems/decomplex/test/inconsistent_rename_clone_test.rb b/gems/decomplex/test/inconsistent_rename_clone_test.rb index 49cec2e1c..35a8a50b7 100644 --- a/gems/decomplex/test/inconsistent_rename_clone_test.rb +++ b/gems/decomplex/test/inconsistent_rename_clone_test.rb @@ -81,4 +81,25 @@ def replace(parent, old_child, new_child) RB assert_empty out end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + out = scan(<<~RB) + def original + src = fetch(1) + check(src) + store(src) + finalize(src) + end + def pasted + dst = fetch(2) + check(dst) + store(src) + finalize(dst) + end + RB + + refute_empty out + end + end end diff --git a/gems/decomplex/test/local_flow_fact_oracle_test.rb b/gems/decomplex/test/local_flow_fact_oracle_test.rb new file mode 100644 index 000000000..93ade23ed --- /dev/null +++ b/gems/decomplex/test/local_flow_fact_oracle_test.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "tempfile" +require_relative "../lib/decomplex/detector_runner" + +class LocalFlowFactOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples/facts/local-flow", __dir__) + ENGINES = Decomplex::DetectorRunner::ENGINES.freeze + + FIXTURE_PATHS = Dir[File.join(EXAMPLES_ROOT, "*.json")].sort.freeze + + def test_local_flow_fact_fixtures_exist + refute_empty FIXTURE_PATHS + end + + FIXTURE_PATHS.product(ENGINES).each_with_index do |(fixture_path, engine), index| + name = File.basename(fixture_path, ".json") + method_name = "test_#{index}_#{engine}_#{name.tr("-", "_")}_local_flow_consumers_match_oracle" + + define_method(method_name) do + assert_local_flow_fact_fixture(fixture_path, engine) + end + end + + private + + def assert_local_flow_fact_fixture(fixture_path, engine) + fixture = JSON.parse(File.read(fixture_path)) + input = fixture.fetch("input") + fixture.fetch("expected").each do |detector, expected| + Tempfile.create(["decomplex-local-flow-fact", ".json"]) do |file| + file.write(JSON.pretty_generate({ "detector" => detector, "input" => input, "expected" => expected })) + file.flush + actual = JSON.parse(Decomplex::DetectorRunner.canonical_json_from_fact_fixture(file.path, engine: engine)) + assert_equal expected, actual, "#{engine} #{fixture_path} #{detector}" + end + end + end +end diff --git a/gems/decomplex/test/local_flow_test.rb b/gems/decomplex/test/local_flow_test.rb index 3d2f2b9b0..deab5135e 100644 --- a/gems/decomplex/test/local_flow_test.rb +++ b/gems/decomplex/test/local_flow_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tempfile" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/local_flow" class LocalFlowTest < Minitest::Test @@ -33,7 +34,7 @@ def mixed(price, tax) terminal = summary.statements.last assert_equal Set["total", "buffer"], terminal.reads - assert_includes terminal.co_uses, ["total", "buffer"] + assert_includes terminal.co_uses.map(&:sort), ["buffer", "total"] end def test_collects_top_level_and_inline_private_methods @@ -58,6 +59,18 @@ class Worker assert_equal Set["input"], helper.statements.first.reads end + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + summaries = scan(<<~RB) + def top_level(value) + result = value + end + RB + + assert_equal ["top_level"], summaries.map(&:name) + end + end + private def scan(code) diff --git a/gems/decomplex/test/ordered_protocol_mine_test.rb b/gems/decomplex/test/ordered_protocol_mine_test.rb index 3ff52c68b..08fd01399 100644 --- a/gems/decomplex/test/ordered_protocol_mine_test.rb +++ b/gems/decomplex/test/ordered_protocol_mine_test.rb @@ -46,6 +46,24 @@ def drift; validate(node); prepare(node); commit(node); end assert_equal %w[phase], hit[:states] end + def test_scan_does_not_use_legacy_ast_parse + file = Tempfile.new(["ordered_protocol", ".rb"]) + file.write(<<~RB) + class CompilerPhase + def prepare; @phase = :prepared; end + def validate; @valid = @phase; end + def run; prepare; validate; end + end + RB + file.close + @files << file + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse called" }) do + report = Decomplex::OrderedProtocolMine.scan([file.path]) + refute_empty report.ordered_protocols + end + end + def test_reports_single_state_dependent_protocol_pressure report = scan(<<~RB) class BillingService diff --git a/gems/decomplex/test/oversized_predicate_test.rb b/gems/decomplex/test/oversized_predicate_test.rb index fe808b0ca..6bc7133c9 100644 --- a/gems/decomplex/test/oversized_predicate_test.rb +++ b/gems/decomplex/test/oversized_predicate_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tmpdir" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/oversized_predicate" class OversizedPredicateTest < Minitest::Test @@ -32,6 +33,24 @@ def eligible(t, info) end end + def test_scan_uses_syntax_not_ast_facades + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + Decomplex::Ast.stub(:parse_semantic, ->(*) { raise "Ast.parse_semantic should not be used" }) do + with_file(<<~RUBY) do |file| + def eligible(t, info) + if t.map? && !t.numeric_map? && !info.close_zig && !t.sharded? + true + end + end + RUBY + findings = Decomplex::OversizedPredicate.scan([file]).findings + + assert_equal 1, findings.size + end + end + end + end + def test_nested_or_conditions_count_as_atoms with_file(<<~RUBY) do |file| def ready(a, b, c, d) diff --git a/gems/decomplex/test/path_condition_test.rb b/gems/decomplex/test/path_condition_test.rb index cf7ab897f..be4c6b36f 100644 --- a/gems/decomplex/test/path_condition_test.rb +++ b/gems/decomplex/test/path_condition_test.rb @@ -75,4 +75,15 @@ def only(x, y); go(x) if x.a? && y.b?; end assert_empty r.scattered(min_scatter: 2) assert_empty r.neglected(min_support: 3) end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + r = rep(<<~RB) + def one(x, y); go(x) if x.a? && y.b?; end + def two(x, y); go(x) if x.a? && y.b?; end + RB + + assert_equal 1, r.scattered(min_scatter: 2).size + end + end end diff --git a/gems/decomplex/test/predicate_alias_test.rb b/gems/decomplex/test/predicate_alias_test.rb index aef8d6fb5..2ddadfdae 100644 --- a/gems/decomplex/test/predicate_alias_test.rb +++ b/gems/decomplex/test/predicate_alias_test.rb @@ -57,4 +57,15 @@ def somewhere assert_equal 1, rm.size assert_equal "framey?", rm.first[:predicate] end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + pa = Decomplex::PredicateAlias.scan(files(<<~RB)) + def first?; true; end + def second?; true; end + RB + + assert_equal [%w[first? second?]], pa.alias_clusters.map { |cluster| cluster[:names].sort } + end + end end diff --git a/gems/decomplex/test/redundant_nil_guard_test.rb b/gems/decomplex/test/redundant_nil_guard_test.rb index 7846b6083..6d5203770 100644 --- a/gems/decomplex/test/redundant_nil_guard_test.rb +++ b/gems/decomplex/test/redundant_nil_guard_test.rb @@ -233,4 +233,15 @@ def use(x) ensure f&.unlink end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + assert_equal ["x&.call"], guards(<<~RB) + def use(x) + return if x.nil? + x&.call + end + RB + end + end end diff --git a/gems/decomplex/test/report_facts_oracle_test.rb b/gems/decomplex/test/report_facts_oracle_test.rb new file mode 100644 index 000000000..dfff2d61b --- /dev/null +++ b/gems/decomplex/test/report_facts_oracle_test.rb @@ -0,0 +1,115 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "tempfile" +require_relative "../lib/decomplex/report" +require_relative "../lib/decomplex/native/command" + +class ReportFactsOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples", __dir__) + REPORT_FACT_PATHS = Dir[File.join(EXAMPLES_ROOT, "facts", "report", "*.json")].sort.freeze + + def test_report_fact_oracles_exist + refute_empty REPORT_FACT_PATHS + end + + REPORT_FACT_PATHS.each_with_index do |fixture_path, index| + name = File.basename(fixture_path, ".json").tr("-", "_") + + define_method("test_report_fact_#{index}_#{name}_matches_ruby_and_rust") do + assert_report_fact_oracle(fixture_path) + end + end + + private + + def assert_report_fact_oracle(fixture_path) + fixture = JSON.parse(File.read(fixture_path)) + facts = fixture.fetch("input") + expected = fixture.fetch("expected") + expected_markdown = expected_markdown_for(fixture_path) + + ruby_report = Decomplex::Report.from_facts(JSON.generate(facts)) + assert_equal expected, project_report(ruby_report), "ruby #{fixture_path}" + assert_equal expected_markdown, ruby_report.to_markdown.rstrip, "markdown ruby #{fixture_path}" + + skip "cargo is not available" unless rust_available? + + Tempfile.create(["decomplex-report-facts-oracle", ".json"]) do |file| + file.write(JSON.pretty_generate(facts)) + file.flush + + rust_markdown = Decomplex::Native::Command.run( + "render-report", "--input", file.path, "--format", "markdown" + ) + rust_sarif = JSON.parse(Decomplex::Native::Command.run( + "render-report", "--input", file.path, "--format", "sarif" + )) + + assert_equal expected_markdown, rust_markdown.rstrip, "markdown rust #{fixture_path}" + assert_equal JSON.parse(ruby_report.to_sarif), rust_sarif, "sarif #{fixture_path}" + end + end + + def project_report(report) + { + "convergence" => json_safe(report.instance_variable_get(:@convergence)), + "root_clusters" => json_safe(report.root_clusters), + "sarif" => compact_sarif(report) + } + end + + def expected_markdown_for(fixture_path) + markdown_path = fixture_path.sub(/\.json\z/, ".md") + assert File.file?(markdown_path), "missing markdown oracle #{markdown_path}" + + File.read(markdown_path).rstrip + end + + def compact_sarif(report) + compact_sarif_hash(JSON.parse(report.to_sarif( + include_snapshot: false, + include_finding_payload: false, + max_results: 8 + ))) + end + + def compact_sarif_hash(sarif) + run = sarif.fetch("runs").first + results = run.fetch("results") + { + "rule_count" => run.dig("tool", "driver", "rules").size, + "result_count" => results.size, + "rule_ids" => results.map { |result| result.fetch("ruleId") }, + "messages" => results.map { |result| result.dig("message", "text") }, + "locations" => results.map do |result| + location = result.dig("locations", 0, "physicalLocation") + { + "uri" => location.dig("artifactLocation", "uri"), + "startLine" => location.dig("region", "startLine") + } + end + } + end + + def json_safe(value) + case value + when Hash + value.to_h { |key, child| [key.to_s, json_safe(child)] } + when Array + value.map { |child| json_safe(child) } + when Symbol + value.to_s + else + value + end + end + + def rust_available? + env = ENV["DECOMPLEX_RUST_BIN"] + return true if env && !env.empty? && File.executable?(env) + + system("cargo", "--version", out: File::NULL, err: File::NULL) + end +end diff --git a/gems/decomplex/test/report_test.rb b/gems/decomplex/test/report_test.rb index 3b0b3b298..68736f921 100644 --- a/gems/decomplex/test/report_test.rb +++ b/gems/decomplex/test/report_test.rb @@ -49,6 +49,35 @@ def test_json_report_is_sarif_alias assert_equal JSON.parse(r.to_sarif), JSON.parse(r.to_json) end + def test_report_facts_round_trip_to_same_markdown + f = Tempfile.new(["rep_facts", ".rb"]) + f.write("def a(n)\n case n\n when A then 1\n when B then 2\n end\nend\n" \ + "def b(n)\n case n\n when A then 3\n when B then 4\n end\nend\n") + f.close + + facts = Decomplex::ReportFacts.from_files([f.path], engine: "ruby") + from_source = Decomplex::Report.new([f.path]).to_markdown + from_facts = Decomplex::Report.from_facts(JSON.generate(facts)).to_markdown + + assert_equal Decomplex::ReportFacts::FORMAT, facts.fetch("format") + assert_equal from_source, from_facts + ensure + f&.unlink + end + + def test_report_from_facts_does_not_reparse_source + f = Tempfile.new(["rep_facts_deleted", ".rb"]) + f.write("def a(n)\n if n && ready?\n run\n end\nend\n") + f.close + + facts = Decomplex::ReportFacts.from_files([f.path], engine: "ruby") + f.unlink + + md = Decomplex::Report.from_facts(JSON.generate(facts)).to_markdown + assert_includes md, "# Decomplex Report" + assert_includes md, "Files analyzed: 1" + end + def test_compact_sarif_omits_heavy_payloads_for_ci_uploads sarif = JSON.parse(report.to_sarif(include_snapshot: false, include_finding_payload: false, max_results: 2)) run = sarif.fetch("runs").first @@ -73,6 +102,63 @@ def test_sarif_result_locations_use_report_finding_locations assert result.fetch("partialFingerprints").fetch("decomplexFinding") end + def test_sarif_message_includes_detector_specific_derived_state_context + r = Decomplex::Report.allocate + message = r.send(:sarif_message, "Derived-State Staleness", { + derived: "style", + source: "options", + derived_at: 12, + source_reassigned_at: 30 + }, {}) + + assert_includes message, "`style` derived from `options` at line 12" + assert_includes message, "`options` reassigned at line 30" + assert_includes message, "`style` is not recomputed" + end + + def test_sarif_message_includes_detector_specific_protocol_context + r = Decomplex::Report.allocate + message = r.send(:sarif_message, "Broken Protocols", { + has: "lock", + missing: "unlock", + support: 8, + confidence: 0.89 + }, {}) + + assert_includes message, "does `lock` without co-called `unlock`" + assert_includes message, "support=8" + assert_includes message, "confidence=0.89" + end + + def test_sarif_includes_actionable_state_heatmap_context + f = Tempfile.new(["rep_state_sarif", ".rb"]) + f.write(<<~RB) + class BillingService + def set_user(user); @user = user; end + def set_cart(cart); @cart = cart; end + def process + charge(@user) if @cart + audit(@user) + end + end + RB + f.close + + sarif = JSON.parse(Decomplex::Report.new([f.path]).to_sarif) + result = sarif.fetch("runs").first.fetch("results").find do |entry| + entry.fetch("ruleId") == "decomplex.state-heatmap" + end + + refute_nil result + message = result.fetch("message").fetch("text") + assert_includes message, "state `" + assert_includes message, "writes=" + assert_includes message, "reads=" + assert_includes message, "writers" + ensure + f&.unlink + end + def test_markdown_orders_sections_by_signal_tier_not_volume md = report.to_markdown prio = md[/## Project Prioritization.*?\n\n(.*?)\n\n/m, 1].to_s diff --git a/gems/decomplex/test/semantic_alias_test.rb b/gems/decomplex/test/semantic_alias_test.rb index ffe30beb5..bffca7ca8 100644 --- a/gems/decomplex/test/semantic_alias_test.rb +++ b/gems/decomplex/test/semantic_alias_test.rb @@ -19,7 +19,7 @@ def test_canon_strips_receiver_polarity_and_self_ivar assert_equal "provenance == :frame", c assert_equal "provenance == :frame", Decomplex::SemanticAlias.canon("@provenance == :frame") assert_equal "provenance == :frame", Decomplex::SemanticAlias.canon("self.provenance == :frame") - t, neg = Decomplex::Ast.canon_polarity("!x.heap?") + t, neg = Decomplex::SemanticAlias.canon_polarity("!x.heap?") assert_equal "x.heap?", t assert neg end @@ -64,4 +64,15 @@ def use(n); n.provenance == :heap ? 1 : 2; end assert_equal 1, rm.size assert_equal "heap?", rm.first[:predicate] end + + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + r = scan(<<~RB) + def frame?; @provenance == :frame; end + def is_frame?; provenance == :frame; end + RB + + assert_equal [%w[frame? is_frame?]], r.alias_clusters.map { |cluster| cluster[:names].sort } + end + end end diff --git a/gems/decomplex/test/sequence_mine_test.rb b/gems/decomplex/test/sequence_mine_test.rb index 67b07743d..2224b72f7 100644 --- a/gems/decomplex/test/sequence_mine_test.rb +++ b/gems/decomplex/test/sequence_mine_test.rb @@ -25,6 +25,21 @@ def d; alloc_mark(w); body4; cleanup(w); end assert(pairs.any? { |h| h[:pair] == %w[alloc_mark cleanup] && h[:support] == 4 }) end + def test_scan_uses_syntax_not_ast_facades + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + Decomplex::Ast.stub(:parse_semantic, ->(*) { raise "Ast.parse_semantic should not be used" }) do + r = scan(<<~RB) + def a; alloc_mark(x); cleanup(x); end + def b; alloc_mark(y); cleanup(y); end + def c; alloc_mark(z); cleanup(z); end + def d; alloc_mark(w); cleanup(w); end + RB + + assert(r.co_called_pairs(min_support: 4).any? { |h| h[:pair] == %w[alloc_mark cleanup] }) + end + end + end + def test_method_calling_one_without_the_other_is_broken_protocol r = scan(<<~RB) def a; alloc_mark(x); cleanup(x); end diff --git a/gems/decomplex/test/source_facts_oracle_test.rb b/gems/decomplex/test/source_facts_oracle_test.rb new file mode 100644 index 000000000..12d902c00 --- /dev/null +++ b/gems/decomplex/test/source_facts_oracle_test.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require_relative "../lib/decomplex/detector_runner" +require_relative "../lib/decomplex/syntax_oracle" + +class SourceFactsOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples/source-facts", __dir__) + ORACLE_ROOT = File.join(EXAMPLES_ROOT, "oracles") + ENGINES = %w[ruby rust].freeze + + FIXTURES = Dir[File.join(EXAMPLES_ROOT, "ruby", "*.rb")].sort.freeze + + def test_ruby_source_fact_fixtures_exist + refute_empty FIXTURES + end + + FIXTURES.product(ENGINES).each_with_index do |(fixture_path, engine), index| + name = File.basename(fixture_path, ".rb") + method_name = "test_#{index}_#{engine}_ruby_#{name}_source_facts_match_oracle" + + define_method(method_name) do + assert_source_facts_match_oracle(fixture_path, engine) + end + end + + private + + def assert_source_facts_match_oracle(fixture_path, engine) + name = File.basename(fixture_path, ".rb") + oracle_path = File.join(ORACLE_ROOT, "ruby-#{name}.json") + assert File.file?(oracle_path), "missing source-facts oracle #{oracle_path}" + + expected = JSON.parse(File.read(oracle_path)) + actual = {} + actual["syntax"] = project_syntax(fixture_path, engine, expected.fetch("syntax", {})) if expected.key?("syntax") + actual["local_flow"] = project_local_flow(fixture_path, engine) if expected.key?("local_flow") + + assert_equal expected, actual, "#{engine} #{fixture_path}" + end + + def project_syntax(fixture_path, engine, expected) + document = Decomplex::SyntaxOracle.project([fixture_path], engine: engine, language: :ruby) + .fetch("documents") + .first + expected.keys.each_with_object({}) do |section, out| + out[section] = syntax_rows(document.fetch(section), syntax_keys(section)) + end + end + + def syntax_keys(section) + { + "functions" => %w[name owner line visibility params], + "owners" => %w[name kind line], + "calls" => %w[receiver message function line conditional control safe_navigation block arguments], + "state_declarations" => %w[field owner type line], + "state_param_origins" => %w[field receiver owner param function line], + "state_reads" => %w[receiver field function line], + "state_writes" => %w[receiver field function line], + "decisions" => %w[kind members function line predicate], + "branch_decisions" => %w[function line predicate state_refs], + "branch_arms" => %w[function kind line decision_line predicate member body], + "dispatch_sites" => %w[variant_set arm_members outside function line], + "semantic_effects" => %w[kind detail function line], + "predicate_bodies" => %w[name owner body line], + "comparisons" => %w[source raw canon_source operator function line], + "path_conditions" => %w[guards action function line], + "protocol_method_effects" => %w[owner name line reads writes], + "protocol_call_paths" => %w[owner name line calls], + "clone_candidates" => %w[method_name node_name line mass fingerprint child_fingerprints child_masses], + "redundant_nil_guards" => %w[defn line local guard proof], + "local_methods" => %w[id owner name line statements boundaries local_contract_assignments], + "local_complexity_scores" => %w[id score signals] + }.fetch(section) + end + + def syntax_rows(rows, keys) + Array(rows).map do |row| + projected = pick(row, keys) + canonicalize_local_method_statements(projected) if projected.key?("statements") + projected + end + end + + def canonicalize_local_method_statements(row) + row["statements"] = Array(row["statements"]).map do |statement| + next statement unless statement.is_a?(Hash) + + statement.merge("co_uses" => canonical_co_uses(statement.fetch("co_uses", []))) + end + end + + def project_local_flow(fixture_path, engine) + output = JSON.parse( + Decomplex::DetectorRunner.canonical_json("local-flow", [fixture_path], engine: engine) + ) + Array(output).map do |method| + { + "method" => method["name"], + "statements" => Array(method["statements"]).map do |statement| + row = pick(statement, %w[reads writes dependencies co_uses]) + row["co_uses"] = canonical_co_uses(row.fetch("co_uses", [])) + row + end, + "boundaries" => Array(method["boundaries"]).map do |boundary| + pick(boundary, %w[before_index after_index kind]) + end + } + end + end + + def canonical_co_uses(co_uses) + Array(co_uses).map { |pair| Array(pair).map(&:to_s).sort } + .sort_by { |pair| JSON.generate(pair) } + end + + def pick(row, keys) + keys.each_with_object({}) do |key, out| + out[key] = row[key] if row.key?(key) + end + end +end diff --git a/gems/decomplex/test/state_branch_density_test.rb b/gems/decomplex/test/state_branch_density_test.rb index 561fdf1bf..8d818198e 100644 --- a/gems/decomplex/test/state_branch_density_test.rb +++ b/gems/decomplex/test/state_branch_density_test.rb @@ -68,6 +68,23 @@ def pure(a, b) assert_empty rows end + def test_scan_uses_syntax_facts_not_legacy_ast_parse + f = Tempfile.new(["state_branch", ".rb"]) + f.write(<<~RB) + def risky(user) + pay if user.name + end + RB + f.close + @files << f + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + rows = Decomplex::StateBranchDensity.scan([f.path]).findings + + assert_equal ["user.name"], rows.first[:state_refs] + end + end + def test_groups_multiple_state_branches_per_method_and_keeps_spans rows = scan(<<~RB) def lifecycle(order) diff --git a/gems/decomplex/test/state_mesh_test.rb b/gems/decomplex/test/state_mesh_test.rb index 9c29ee0b8..2d4a2ae4b 100644 --- a/gems/decomplex/test/state_mesh_test.rb +++ b/gems/decomplex/test/state_mesh_test.rb @@ -49,6 +49,28 @@ def c(x); x.storage = :heap; end sm.writes.each { |w| assert_equal "storage", w.norm } end + def test_scan_uses_syntax_facts_for_writes_and_reads + f = Tempfile.new(["sm", ".rb"]) + f.write(<<~RB) + def a(x); x.storage = :heap; end + def b(x); x.storage = :frame; end + def c(x); use(x.storage); end + RB + f.close + @tempfiles << f + no_misses = Struct.new(:reification_misses).new([]) + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + Decomplex::SemanticAlias.stub(:scan, ->(*) { no_misses }) do + sm = Decomplex::StateMesh.scan([f.path]) + sm.run + + assert_equal 2, sm.writes.size + assert_equal 1, sm.reads.size + end + end + end + def test_discover_ivar_writes sm = scan(<<~RB) def a; @storage = :heap; end @@ -352,4 +374,4 @@ def test_normalize_strips_at assert_equal "storage", sm.normalize("storage") assert_equal "provenance", sm.normalize("@provenance") end -end \ No newline at end of file +end diff --git a/gems/decomplex/test/structural_topology_test.rb b/gems/decomplex/test/structural_topology_test.rb index 26f857d6c..1cf2dc536 100644 --- a/gems/decomplex/test/structural_topology_test.rb +++ b/gems/decomplex/test/structural_topology_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tempfile" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/structural_topology" class StructuralTopologyTest < Minitest::Test @@ -91,6 +92,25 @@ def prepare; end end end + def test_scan_uses_syntax_facts_not_legacy_ast_parse + with_ruby_file(<<~RB) do |path| + class Runner + def run + prepare + end + + def prepare; end + end + RB + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + graph = Decomplex::StructuralTopology.scan([path]) + + assert graph.method_for("Runner", "run") + assert_includes graph.edges_for_owner("Runner").map(&:callee_name), "prepare" + end + end + end + def test_tracks_top_level_same_file_helper_edges with_ruby_file(<<~RB) do |path| def run diff --git a/gems/decomplex/test/syntax_oracle_test.rb b/gems/decomplex/test/syntax_oracle_test.rb new file mode 100644 index 000000000..0b885cb25 --- /dev/null +++ b/gems/decomplex/test/syntax_oracle_test.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require_relative "../lib/decomplex/syntax_oracle" + +class SyntaxOracleTest < Minitest::Test + EXAMPLES_ROOT = File.expand_path("../examples/syntax-facts", __dir__) + ORACLE_ROOT = File.join(EXAMPLES_ROOT, "oracles") + ENGINES = %w[ruby rust].freeze + + FIXTURES = Dir[File.join(EXAMPLES_ROOT, "*", "*")] + .select { |path| File.file?(path) && Decomplex::Syntax.supported_source?(path) } + .sort + .freeze + + def test_syntax_fact_fixtures_exist + refute_empty FIXTURES + end + + FIXTURES.product(ENGINES).each_with_index do |(fixture_path, engine), index| + language = File.basename(File.dirname(fixture_path)) + name = File.basename(fixture_path, File.extname(fixture_path)) + method_name = "test_#{index}_#{engine}_#{language}_#{name}_syntax_facts_match_oracle" + + define_method(method_name) do + assert_syntax_facts_match_oracle(fixture_path, engine) + end + end + + private + + def assert_syntax_facts_match_oracle(fixture_path, engine) + language = File.basename(File.dirname(fixture_path)) + name = File.basename(fixture_path, File.extname(fixture_path)) + oracle_path = File.join(ORACLE_ROOT, "#{language}-#{name}.json") + + assert File.file?(oracle_path), "missing syntax oracle #{oracle_path}" + + expected = JSON.parse(File.read(oracle_path)) + actual = Decomplex::SyntaxOracle.project([fixture_path], engine: engine, language: language) + actual = project_expected_shape(actual, expected) + + assert_equal expected, actual, "#{engine} #{fixture_path}" + end + + def project_expected_shape(actual, expected) + case expected + when Hash + expected.keys.each_with_object({}) do |key, out| + out[key] = project_expected_shape(actual.fetch(key), expected.fetch(key)) + end + when Array + return actual unless expected.any? { |item| item.is_a?(Hash) } + + keys = expected.flat_map { |item| item.is_a?(Hash) ? item.keys : [] }.uniq + actual.map do |item| + item.is_a?(Hash) ? project_expected_shape(item.slice(*keys), expected.find { |row| row.is_a?(Hash) }) : item + end.sort_by { |item| JSON.generate(item) } + else + actual + end + end +end diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index 5531ea77b..b2790827f 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -6,6 +6,19 @@ require_relative "../lib/decomplex/report" class SyntaxTest < Minitest::Test + def self.populate_tree_sitter_env_defaults + adapter = Decomplex::Syntax::TreeSitterAdapter.new + Decomplex::Syntax::LANGUAGE_PROFILES.each_key do |language| + env = "DECOMPLEX_TS_#{language.to_s.upcase}_PATH" + next if ENV[env] && File.file?(ENV[env]) + + candidate = adapter.send(:grammar_candidates, language).find { |path| File.file?(path) } + ENV[env] = candidate if candidate + end + end + + populate_tree_sitter_env_defaults + def with_file(source, ext = ".rb") file = Tempfile.new(["syntax", ext]) file.write(source) @@ -90,6 +103,157 @@ def test_tree_sitter_grammar_candidates_keep_only_current_platform_prebuilds assert_equal [current], adapter.send(:platform_prebuilds, [other, current]) end + def test_language_profiles_have_language_specific_lexicons + examples = { + lua: ["script.lua", "value == nil", "error('bad')"], + c: ["src/main.c", "ptr == NULL", "abort()"], + cpp: ["src/main.cpp", "value == nullptr", "throw Error{}"], + csharp: ["src/Program.cs", "value is string", "throw new Exception()"], + java: ["src/Main.java", "value instanceof String", "throw new RuntimeException()"], + swift: ["Sources/App.swift", "if let value = maybe", "fatalError()"], + kotlin: ["src/Main.kt", "value as? String", "require(value != null)"] + } + + examples.each do |language, (path, type_guard, diagnostic)| + lexicon = Decomplex::Syntax.language_lexicon(language) + + assert_equal language, Decomplex::Syntax.language_for(path) + assert_instance_of Decomplex::Syntax::LanguageLexicon, lexicon, language + assert lexicon.type_guard?(type_guard), language + assert lexicon.diagnostic?(diagnostic), language + end + end + + def test_tree_sitter_language_profile_owns_parser_metadata + c = Decomplex::Syntax.language_profile(:c) + assert_equal %w[.c .h], c.extensions + assert_equal "tree-sitter-c", c.package + assert_equal %w[c], c.grammar_names + assert c.first_argument_receiver? + + csharp = Decomplex::Syntax.language_profile(:csharp) + assert_equal "tree-sitter-c-sharp", csharp.package + assert_equal %w[c-sharp csharp], csharp.grammar_names + assert_equal "c_sharp", csharp.tree_sitter_language_name + refute csharp.first_argument_receiver? + end + + def test_language_profile_fails_loudly_without_supported_language + refute Decomplex::Syntax.const_defined?(:GENERIC_LANGUAGE_PROFILE, false) + + missing = assert_raises(ArgumentError) do + Decomplex::Syntax.language_profile(nil) + end + assert_match(/missing Syntax language profile/, missing.message) + + unsupported = assert_raises(ArgumentError) do + Decomplex::Syntax.language_profile(:wat) + end + assert_match(/unsupported Syntax language profile/, unsupported.message) + end + + def test_tree_sitter_adapter_requires_language_profile_context + adapter = Decomplex::Syntax::TreeSitterAdapter.new + + error = assert_raises(ArgumentError) do + adapter.send(:syntax_profile, nil) + end + assert_match(/missing Syntax language profile context/, error.message) + end + + def test_tree_sitter_language_adapter_normalizes_non_breaking_space + profile = Decomplex::Syntax.language_profile(:python) + + assert_equal "alpha beta", profile.send(:normalize_text, "alpha\u00A0beta") + end + + def test_tree_sitter_adapter_delegates_language_normalization_to_profiles + adapter_class = Decomplex::Syntax::TreeSitterAdapter + profile_class = Decomplex::Syntax::TreeSitterLanguageAdapter + ruby_profile_class = Decomplex::Syntax::RubySyntaxAdapter + + refute adapter_class.const_defined?(:BRANCH_KINDS, false) + refute adapter_class.const_defined?(:NOISE_MESSAGES, false) + refute adapter_class.private_method_defined?(:record_state_write) + assert profile_class.private_method_defined?(:record_state_write) + assert ruby_profile_class.private_method_defined?(:skip_state_write_node?) + assert ruby_profile_class.private_method_defined?(:skip_state_write_target?) + assert ruby_profile_class.private_method_defined?(:hidden_case?) + assert ruby_profile_class.private_method_defined?(:case_pattern_texts) + assert ruby_profile_class.private_method_defined?(:direct_state_ref) + end + + def test_tree_sitter_document_walks_seed_language_context + adapter = Decomplex::Syntax::TreeSitterAdapter.new + document = Struct.new(:root, :file, :language, :lines) + .new(Object.new, "/tmp/demo.py", :python, []) + captured = [] + + adapter.define_singleton_method(:walk) do |doc, profile, &_block| + captured << profile.initial_stack(doc) + end + + adapter.decision_sites(document) + adapter.branch_decisions(document, immutable_readers: {}, immutable_reader_types: {}, type_aliases: {}) + adapter.branch_arms(document) + adapter.structural_facts(document) + + expected = [{ file_owner: "demo", language: :python }] + assert_equal [expected, expected, expected, expected], captured + end + + def test_force_language_override_handles_ambiguous_headers + assert_equal :c, Decomplex::Syntax.language_for("include/demo.h") + + with_env("DECOMPLEX_FORCE_LANGUAGE", "cpp") do + assert_equal :cpp, Decomplex::Syntax.language_for("include/demo.h") + assert_equal :cpp, Decomplex::Syntax.language_for("src/demo.c") + end + end + + def test_tree_sitter_lua_adapter_ignores_generated_teal_compat_prelude + grammar = ENV["DECOMPLEX_TS_LUA_PATH"] + skip "set DECOMPLEX_TS_LUA_PATH to run Lua structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~LUA, ".lua") do |path| + local _tl_compat; if (tonumber((_VERSION or ""):match("[%d.]*$")) or 0) < 5.3 then local pcall, require = pcall, require; local ok, compat53 = pcall(require, "compat53.module"); if ok then compat53.module(_ENV) end end + function real(a, b) + if a and b then + return true + end + end + LUA + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :lua) + + assert_empty doc.decision_sites.select { |decision| decision.line == 1 } + assert_empty doc.branch_arms.select { |arm| arm.line == 1 } + assert_includes doc.decision_sites.map { |decision| [decision.line, decision.kind, decision.members] }, + [3, :conjunction, %w[a b]] + end + end + + def test_tree_sitter_go_adapter_extracts_name_type_struct_fields + grammar = ENV["DECOMPLEX_TS_GO_PATH"] + skip "set DECOMPLEX_TS_GO_PATH to run Go structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~GO, ".go") do |path| + package util + + type Slab struct { + I16 []int16 + Count int + } + GO + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :go) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Slab", :owner] + assert_includes doc.state_declarations.map { |state| [state.owner, state.field, state.type] }, + ["Slab", "I16", "[]int16"] + assert_includes doc.state_declarations.map { |state| [state.owner, state.field, state.type] }, + ["Slab", "Count", "int"] + end + end + def test_tree_sitter_ruby_adapter_extracts_portable_facts_when_grammar_is_available grammar = ENV["DECOMPLEX_TS_RUBY_PATH"] skip "set DECOMPLEX_TS_RUBY_PATH to run Tree-sitter adapter smoke test" unless grammar && File.file?(grammar) @@ -114,6 +278,40 @@ def classify(node) end end + def test_tree_sitter_ruby_adapter_applies_method_visibility + grammar = ENV["DECOMPLEX_TS_RUBY_PATH"] + skip "set DECOMPLEX_TS_RUBY_PATH to run Tree-sitter adapter smoke test" unless grammar && File.file?(grammar) + + with_file(<<~RB) do |path| + class Worker + def run; end + + private + def prepare; end + def validate; end + + public :validate + protected + def guarded; end + + private def inline_helper; end + def self.build; end + def Worker.explicit; end + end + RB + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :ruby) + functions = doc.function_defs.to_h { |fn| [fn.name, fn] } + + assert_equal :public, functions.fetch("run").visibility + assert_equal :private, functions.fetch("prepare").visibility + assert_equal :public, functions.fetch("validate").visibility + assert_equal :protected, functions.fetch("guarded").visibility + assert_equal :private, functions.fetch("inline_helper").visibility + assert_equal :public, functions.fetch("self.build").visibility + assert_equal :public, functions.fetch("Worker.explicit").visibility + end + end + def test_tree_sitter_language_profiles_extract_portable_facts_when_grammars_are_available profiles = { python: [ @@ -248,16 +446,410 @@ class Worker: def __init__(self, items): self.items = items - def call(self): - self.items.append("x") + def call(self): + self.items.append("x") + + def run(items): + prepare(items) PY doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) assert_includes doc.state_writes.map { |write| [write.receiver, write.field] }, ["self", "items"] assert_includes doc.state_param_origins.map { |origin| [origin.owner, origin.function, origin.receiver, origin.field, origin.param] }, ["Worker", "__init__", "self", "items", "items"] - assert_includes doc.call_sites.map { |call| [call.owner, call.function, call.receiver, call.message] }, - ["Worker", "call", "self.items", "append"] + assert_includes doc.call_sites.map { |call| [call.function, call.receiver, call.message] }, + ["call", "self.items", "append"] + assert_includes doc.call_sites.map { |call| [call.function, call.receiver, call.message, call.arguments] }, + ["run", "self", "prepare", ["items"]] + end + end + + def test_tree_sitter_python_adapter_extracts_typed_attribute_assignments + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + class Worker: + def __init__(self, items): + self.items = items + self.cache: dict[str, int] = items + PY + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.span] }, + ["self", "items", [3, 8, 3, 26]] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.span] }, + ["self", "cache", [4, 8, 4, 42]] + assert_includes doc.state_param_origins.map { |origin| [origin.receiver, origin.field, origin.param, origin.span] }, + ["self", "items", "items", [3, 8, 3, 26]] + assert_includes doc.state_param_origins.map { |origin| [origin.receiver, origin.field, origin.param, origin.span] }, + ["self", "cache", "items", [4, 8, 4, 42]] + assert_empty doc.state_reads + end + end + + def test_tree_sitter_python_adapter_extracts_typed_splat_parameters + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def reconfigure(*args: Any, **kwargs: Any) -> None: + new_console = Console(*args, **kwargs) + PY + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + + assert_equal %w[args kwargs], doc.function_defs.first.params + statement = doc.local_methods.first.statements.first + assert_equal %w[args kwargs], statement.reads.to_a.sort + assert_equal [["new_console", "args"], ["new_console", "kwargs"]], statement.dependencies + end + end + + def test_tree_sitter_python_adapter_treats_annotation_only_locals_as_writes + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def parse_version(): + version_integers: tuple[int, ...] + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_empty statement.reads + assert_equal ["version_integers"], statement.writes.to_a + end + end + + def test_tree_sitter_python_adapter_treats_typed_local_assignment_as_write + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def process(value): + try: + return_value: PromptType = convert(value) + except ValueError: + raise + return return_value + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_includes statements[0].writes, "return_value" + assert_includes statements[0].reads, "value" + refute_includes statements[0].reads, "return_value" + assert_equal ["return_value"], statements[1].reads.to_a + end + end + + def test_tree_sitter_python_adapter_mines_loop_and_with_locals_without_keyword_writes + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def download(urls, dest_dir): + with ThreadPoolExecutor(max_workers=4) as pool: + for url in urls: + filename = url.split("/")[-1] + dest_path = os.path.join(dest_dir, filename) + task_id = progress.add_task("download", filename=filename, start=False) + pool.submit(copy_url, task_id, url, dest_path) + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_includes statement.reads, "urls" + assert_includes statement.reads, "url" + assert_includes statement.reads, "pool" + assert_includes statement.writes, "url" + assert_includes statement.writes, "pool" + refute_includes statement.writes, "urls" + refute_includes statement.writes, "max_workers" + refute_includes statement.writes, "start" + end + end + + def test_tree_sitter_python_adapter_counts_callable_locals_as_reads + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def invoke(callback, value): + runner = callback + return runner(value) + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_equal ["callback"], statements[0].reads.to_a + assert_equal %w[runner value], statements[1].reads.to_a.sort + end + end + + def test_tree_sitter_python_adapter_mines_named_expression_writes + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def scan(text, index): + if (character := text[index]): + return character + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_includes statement.writes, "character" + assert_includes statement.reads, "text" + assert_includes statement.reads, "index" + assert_includes statement.dependencies, ["character", "text"] + assert_includes statement.dependencies, ["character", "index"] + end + end + + def test_tree_sitter_python_adapter_groups_try_except_as_one_statement + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def foo(): + try: + raise RuntimeError("Hello") + except Exception as e: + raise e from e + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_equal 1, statements.length + assert_includes statements.first.writes, "e" + assert_includes statements.first.reads, "e" + end + end + + def test_tree_sitter_python_adapter_groups_if_elif_chain_as_one_statement + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def align(value): + if value == "left": + return 1 + elif value == "right": + return 2 + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_equal 1, statements.length + assert_equal ["value"], statements.first.reads.to_a + end + end + + def test_tree_sitter_python_adapter_ignores_import_paths_that_match_locals + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def inspect(): + from rich._inspect import Inspect + _inspect = Inspect() + return _inspect + PY + statements = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + + assert_empty statements[0].reads + assert_equal ["_inspect"], statements[1].writes.to_a + assert_equal ["_inspect"], statements[2].reads.to_a + end + end + + def test_tree_sitter_python_adapter_reads_bare_with_context_local + grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] + skip "set DECOMPLEX_TS_PYTHON_PATH to run Python structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~PY, ".py") do |path| + def use_status(status): + with status: + sleep(0.2) + PY + statement = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + .local_methods + .first + .statements + .first + + assert_equal ["status"], statement.reads.to_a + end + end + + def test_tree_sitter_c_adapter_extracts_functions_branches_and_pointer_state + grammar = ENV["DECOMPLEX_TS_C_PATH"] + skip "set DECOMPLEX_TS_C_PATH to run C structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~C, ".c") do |path| + typedef struct Node { int storage; int ready; int enabled; int kind; } Node; + static int classify(Node* node) { + node->storage = 1; + if (node->ready && node->enabled) return 1; + switch (node->kind) { case 1: return 1; case 2: return 2; default: return 0; } + } + C + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :c) + + assert_includes doc.function_defs.map(&:name), "classify" + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["self", "storage", "classify"] + assert_includes doc.decision_sites.map(&:kind), :conjunction + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_cpp_adapter_extracts_class_methods_and_pointer_state + grammar = ENV["DECOMPLEX_TS_CPP_PATH"] + skip "set DECOMPLEX_TS_CPP_PATH to run C++ structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~CPP, ".cpp") do |path| + class Parser { + public: + int parse(Node* node) { + node->storage = 1; + if (node == nullptr || node->ready) return 1; + switch (node->kind) { case 1: return 1; case 2: return 2; default: return 0; } + } + }; + CPP + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :cpp) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["node", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_csharp_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_CSHARP_PATH"] + skip "set DECOMPLEX_TS_CSHARP_PATH to run C# structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~CS, ".cs") do |path| + public sealed class Parser { + private int _storage; + public int Parse(Node node) { + this._storage = 1; + if (node == null || node.Ready) return 1; + switch (node.Kind) { case 1: return 1; case 2: return 2; default: return 0; } + } + } + CS + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :csharp) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "Parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["self", "_storage", "Parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_java_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_JAVA_PATH"] + skip "set DECOMPLEX_TS_JAVA_PATH to run Java structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~JAVA, ".java") do |path| + public final class Parser { + private int storage; + public int parse(Node node) { + this.storage = 1; + if (node == null || node.ready()) return 1; + switch (node.kind()) { case 1: return 1; case 2: return 2; default: return 0; } + } + } + JAVA + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :java) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["self", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_swift_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_SWIFT_PATH"] + skip "set DECOMPLEX_TS_SWIFT_PATH to run Swift structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~SWIFT, ".swift") do |path| + final class Parser { + private var storage: Int = 0 + func parse(_ node: Node) -> Int { + self.storage = 1 + if node == nil || node.ready { return 1 } + switch node.kind { + case .one: return 1 + case .two: return 2 + default: return 0 + } + } + } + SWIFT + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :swift) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["self", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch + end + end + + def test_tree_sitter_kotlin_adapter_extracts_class_methods_and_member_state + grammar = ENV["DECOMPLEX_TS_KOTLIN_PATH"] + skip "set DECOMPLEX_TS_KOTLIN_PATH to run Kotlin structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~KOTLIN, ".kt") do |path| + class Parser { + var storage: Int = 0 + fun parse(node: Node): Int { + this.storage = 1 + if (node == null || node.ready) return 1 + return when (node.kind) { + Kind.ONE -> 1 + Kind.TWO -> 2 + else -> 0 + } + } + } + KOTLIN + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :kotlin) + + assert_includes doc.owner_defs.map { |owner| [owner.name, owner.kind] }, ["Parser", :class] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Parser", "parse"] + assert_includes doc.state_writes.map { |write| [write.receiver, write.field, write.function] }, + ["self", "storage", "parse"] + assert_includes doc.decision_sites.map(&:kind), :case_dispatch end end diff --git a/gems/decomplex/test/temporal_ordering_pressure_test.rb b/gems/decomplex/test/temporal_ordering_pressure_test.rb index 12ab8bb6d..776a22a5f 100644 --- a/gems/decomplex/test/temporal_ordering_pressure_test.rb +++ b/gems/decomplex/test/temporal_ordering_pressure_test.rb @@ -56,6 +56,23 @@ def helper_two; @state = :two; end assert_empty rows end + def test_scan_uses_syntax_facts_not_legacy_ast_parse + f = Tempfile.new(["temporal", ".rb"]) + f.write(<<~RB) + class BillingService + def set_user(user); @user = user; end + def set_cart(cart); @cart = cart; end + def validate_user; @validated = @user && @cart; end + end + RB + f.close + @files << f + + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + refute_empty Decomplex::TemporalOrderingPressure.scan([f.path]) + end + end + def test_requires_shared_state_not_just_many_independent_writers rows = scan(<<~RB) class IndependentSetters diff --git a/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb b/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb index 7276fc7b2..e80b2190a 100644 --- a/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb +++ b/gems/decomplex/test/weighted_inlined_cognitive_complexity_test.rb @@ -2,6 +2,7 @@ require "minitest/autorun" require "tempfile" +require_relative "../lib/decomplex/ast" require_relative "../lib/decomplex/weighted_inlined_cognitive_complexity" class WeightedInlinedCognitiveComplexityTest < Minitest::Test @@ -244,6 +245,28 @@ def right(item) assert_equal %w[left right], left[:call_chain] end + def test_scan_does_not_use_legacy_ast_parse + Decomplex::Ast.stub(:parse, ->(*) { raise "legacy Ast.parse should not be used" }) do + out = scan(<<~RB, min_score: 2, min_hidden: 1, max_depth: 2) + class Pipeline + def run(input) + prepare(input) + end + + def prepare(input) + if input.ready? + if input.valid? && !input.locked? + true + end + end + end + end + RB + + refute_empty out + end + end + def test_handles_modules_inline_visibility_loops_rescue_and_shared_reason out = scan(<<~RB, min_score: 0, min_hidden: 0, max_depth: 1) class EmptyOwner; end diff --git a/gems/espalier/exe/espalier b/gems/espalier/exe/espalier index 2531b0b29..739d0ebda 100755 --- a/gems/espalier/exe/espalier +++ b/gems/espalier/exe/espalier @@ -27,7 +27,7 @@ end OptionParser.new do |opts| opts.banner = "Usage: espalier [options] " - opts.on("-f", "--format FORMAT", [:markdown, :yaml, :report, :sarif, :json], "Output format (markdown, yaml, report, sarif, json). Default: markdown") do |f| + opts.on("-f", "--format FORMAT", [:markdown, :yaml, :report, :dot, :sarif, :json], "Output format (markdown, yaml, report, dot, sarif, json). Default: markdown") do |f| options[:format] = f end @@ -70,6 +70,8 @@ if options[:manifest] ).to_markdown elsif options[:format] == :yaml Espalier::Formatter.to_yaml(manifest) + elsif options[:format] == :dot + Espalier::Formatter.to_dot(manifest) elsif %i[sarif json].include?(options[:format]) Espalier::Formatter.to_sarif(manifest) else @@ -147,6 +149,8 @@ manifest = aggregator.aggregate(modules) # Format & Outflow output_contents = if options[:format] == :yaml Espalier::Formatter.to_yaml(manifest) + elsif options[:format] == :dot + Espalier::Formatter.to_dot(manifest) elsif %i[sarif json].include?(options[:format]) Espalier::Formatter.to_sarif(manifest) elsif options[:format] == :report diff --git a/gems/espalier/lib/espalier.rb b/gems/espalier/lib/espalier.rb index 54fcdc5ce..709f5d7c1 100644 --- a/gems/espalier/lib/espalier.rb +++ b/gems/espalier/lib/espalier.rb @@ -5,6 +5,8 @@ require_relative "espalier/privacy_analyzer" require_relative "espalier/architecture_analyzer" require_relative "espalier/aggregator" +require_relative "espalier/dependency_graph" +require_relative "espalier/graphviz_formatter" require_relative "espalier/formatter" require_relative "espalier/reporter" diff --git a/gems/espalier/lib/espalier/dependency_graph.rb b/gems/espalier/lib/espalier/dependency_graph.rb new file mode 100644 index 000000000..bc4a29d8d --- /dev/null +++ b/gems/espalier/lib/espalier/dependency_graph.rb @@ -0,0 +1,486 @@ +# frozen_string_literal: true + +require "set" + +module Espalier + # Builds a manifest-derived dependency graph without owning rendering. + class DependencyGraph + Node = Struct.new(:id, :kind, :label, :owner, :file, :line, :metadata, keyword_init: true) + Edge = Struct.new(:source, :target, :kind, :label, :conditional, :weight, :metadata, keyword_init: true) + + CORE_TYPES = %w[ + Array BasicObject Boolean Class FalseClass Float Hash Integer NilClass + Object Proc Set String Symbol T TrueClass + ].freeze + + attr_reader :nodes_by_id, :edges_by_key + + def self.from_manifest(manifest, include_external: false) + Builder.new(manifest, include_external: include_external).build + end + + def self.owner_node_id(owner) + "owner:#{owner}" + end + + def self.function_node_id(owner, function_name) + "fn:#{owner}##{function_name}" + end + + def self.external_node_id(name) + "external:#{name}" + end + + def initialize + @nodes_by_id = {} + @edges_by_key = {} + end + + def add_node(node) + @nodes_by_id[node.id] ||= node + end + + def add_edge(edge) + edge.weight ||= 1 + edge.conditional = !!edge.conditional + key = [edge.source, edge.target, edge.kind, edge.label, edge.conditional] + existing = @edges_by_key[key] + if existing + existing.weight += edge.weight + else + @edges_by_key[key] = edge + end + end + + def nodes + @nodes_by_id.values.sort_by { |node| [node.kind.to_s, node.owner.to_s, node.id] } + end + + def edges + @edges_by_key.values.sort_by do |edge| + [edge.source, edge.target, edge.kind.to_s, edge.label.to_s, edge.conditional ? 1 : 0] + end + end + + def owner_nodes + nodes.select { |node| node.kind == :owner } + end + + def function_nodes + nodes.select { |node| node.kind == :function } + end + + def nodes_for_owner(owner) + nodes.select { |node| node.owner == owner && node.kind != :external } + end + + def cyclic_node_ids + @cyclic_node_ids ||= begin + cyclic = Set.new + strongly_connected_components.each do |component| + next if component.size <= 1 + + component.each { |node_id| cyclic << node_id } + end + edges.each { |edge| cyclic << edge.source if edge.source == edge.target } + cyclic + end + end + + def cycle_component_by_node + @cycle_component_by_node ||= begin + out = {} + strongly_connected_components.each_with_index do |component, index| + next if component.size <= 1 + + component.each { |node_id| out[node_id] = index } + end + out + end + end + + private + + def strongly_connected_components + @strongly_connected_components ||= begin + index = 0 + stack = [] + indices = {} + lowlinks = {} + on_stack = Set.new + components = [] + adjacency = edges.each_with_object(Hash.new { |h, k| h[k] = [] }) do |edge, out| + out[edge.source] << edge.target + end + + visit = lambda do |node_id| + indices[node_id] = index + lowlinks[node_id] = index + index += 1 + stack << node_id + on_stack << node_id + + adjacency[node_id].each do |target| + if !indices.key?(target) + visit.call(target) + lowlinks[node_id] = [lowlinks[node_id], lowlinks[target]].min + elsif on_stack.include?(target) + lowlinks[node_id] = [lowlinks[node_id], indices[target]].min + end + end + + return unless lowlinks[node_id] == indices[node_id] + + component = [] + loop do + member = stack.pop + on_stack.delete(member) + component << member + break if member == node_id + end + components << component.sort + end + + @nodes_by_id.each_key { |node_id| visit.call(node_id) unless indices.key?(node_id) } + components + end + end + + class Builder + def initialize(manifest, include_external:) + @manifest = Array(manifest) + @include_external = include_external + @graph = DependencyGraph.new + @owners = Set.new + @owner_by_simple = {} + @functions_by_owner = Hash.new { |h, k| h[k] = Set.new } + @state_types_by_owner = Hash.new { |h, k| h[k] = {} } + end + + def build + index_manifest + add_nodes + add_state_type_edges + add_internal_call_edges + add_delegation_edges + @graph + end + + private + + def index_manifest + @manifest.each do |mod| + owner = value(mod, :module).to_s + next if owner.empty? + + @owners << owner + functions(mod).each { |fn| @functions_by_owner[owner] << value(fn, :name).to_s } + @state_types_by_owner[owner] = state_type_index(mod) + end + + grouped = @owners.group_by { |owner| owner.split("::").last } + @owner_by_simple = grouped.each_with_object({}) do |(simple, owners), out| + out[simple] = owners.first if owners.size == 1 + end + end + + def add_nodes + @manifest.each do |mod| + owner = value(mod, :module).to_s + next if owner.empty? + + @graph.add_node(owner_node(mod, owner)) + functions(mod).each do |fn| + name = value(fn, :name).to_s + next if name.empty? + + @graph.add_node(function_node(mod, fn, owner, name)) + end + end + end + + def owner_node(mod, owner) + Node.new( + id: DependencyGraph.owner_node_id(owner), + kind: :owner, + label: owner, + owner: owner, + file: value(mod, :file), + line: value(mod, :line), + metadata: { + type: value(mod, :type), + language: value(mod, :language), + function_count: functions(mod).size, + state_count: states(mod).size + } + ) + end + + def function_node(mod, fn, owner, name) + effects = value(fn, :EFFECTS) || {} + Node.new( + id: DependencyGraph.function_node_id(owner, name), + kind: :function, + label: name, + owner: owner, + file: value(mod, :file), + line: value(fn, :line), + metadata: { + visibility: value(fn, :visibility) || :public, + signature: value(fn, :signature), + reads: Array(value(effects, :reads)), + writes: Array(value(effects, :writes)) + } + ) + end + + def add_state_type_edges + @manifest.each do |mod| + source_owner = value(mod, :module).to_s + states(mod).each do |state| + target_owner = owner_for_type(value(state, :type)) + next unless target_owner + next if target_owner == source_owner + + @graph.add_edge( + Edge.new( + source: DependencyGraph.owner_node_id(source_owner), + target: DependencyGraph.owner_node_id(target_owner), + kind: :state_type, + label: "state #{value(state, :name)}", + conditional: false, + weight: 1, + metadata: { state: value(state, :name) } + ) + ) + end + end + end + + def add_internal_call_edges + @manifest.each do |mod| + owner = value(mod, :module).to_s + graph = value(mod, :call_graph) || {} + Array(value(graph, :internal_edges)).each do |edge| + caller = value(edge, :caller).to_s + callee = value(edge, :callee).to_s + next unless function?(owner, caller) && function?(owner, callee) + + conditional = value(edge, :type).to_s == "conditional" + add_call_edge( + source_owner: owner, + source_function: caller, + target_id: DependencyGraph.function_node_id(owner, callee), + kind: :internal_call, + label: conditional ? "conditional internal" : "internal", + conditional: conditional + ) + end + end + end + + def add_delegation_edges + @manifest.each do |mod| + owner = value(mod, :module).to_s + functions(mod).each do |fn| + source_function = value(fn, :name).to_s + delegation_calls(fn).each do |call| + target = target_for_call(owner, call[:name]) + next unless target + + add_call_edge( + source_owner: owner, + source_function: source_function, + target_id: target[:id], + kind: target[:kind], + label: target[:kind] == :internal_call ? internal_label(call[:conditional]) : call_label(call[:name], call[:conditional], target[:method]), + conditional: call[:conditional], + metadata: { call: call[:name] } + ) + end + end + end + end + + def add_call_edge(source_owner:, source_function:, target_id:, kind:, label:, conditional:, metadata: {}) + source_id = DependencyGraph.function_node_id(source_owner, source_function) + return unless @graph.nodes_by_id.key?(source_id) + return unless @graph.nodes_by_id.key?(target_id) + + @graph.add_edge( + Edge.new( + source: source_id, + target: target_id, + kind: kind, + label: label, + conditional: conditional, + weight: 1, + metadata: metadata + ) + ) + end + + def delegation_calls(fn) + delegations = value(fn, :DELEGATIONS) || {} + always = Array(value(delegations, :always_calls)).map do |name| + { name: name.to_s, conditional: false } + end + conditional = Array(value(delegations, :conditionally_calls)).map do |name| + { name: name.to_s, conditional: true } + end + always + conditional + end + + def target_for_call(source_owner, call_name) + if function?(source_owner, call_name) + return { + id: DependencyGraph.function_node_id(source_owner, call_name), + kind: :internal_call, + method: call_name + } + end + + receiver = receiver_for(call_name) + return nil unless receiver + + method = method_for(call_name) + target_owner = owner_for_receiver(source_owner, receiver) + if target_owner + if method && function?(target_owner, method) + return { + id: DependencyGraph.function_node_id(target_owner, method), + kind: :delegation, + method: method + } + end + + return { + id: DependencyGraph.owner_node_id(target_owner), + kind: :owner_call, + method: method + } + end + + external_target(receiver, method) + end + + def external_target(receiver, method) + return nil unless @include_external + return nil unless receiver.match?(/\A[A-Z]/) + + id = DependencyGraph.external_node_id(receiver) + @graph.add_node( + Node.new( + id: id, + kind: :external, + label: receiver, + owner: nil, + file: nil, + line: nil, + metadata: { method: method } + ) + ) + { id: id, kind: :external_call, method: method } + end + + def owner_for_receiver(source_owner, receiver) + return nil if receiver == "self" || receiver == "this" + return source_owner if receiver == source_owner + + state_type = state_type_for(source_owner, receiver) + return owner_for_type(state_type) if state_type + + return nil unless receiver.match?(/\A[A-Z]/) + + owner_for_type(receiver) + end + + def state_type_for(owner, receiver) + state_types = @state_types_by_owner[owner] + return state_types[receiver] if state_types.key?(receiver) + + if receiver.start_with?("@") + state_name = receiver.split(".").first + return state_types[state_name] + end + + if receiver.start_with?("self.", "this.") + field = receiver.split(".")[1] + return state_types[field] || state_types["@#{field}"] + end + + nil + end + + def call_label(call_name, conditional, method) + label = method ? "calls #{method}" : "calls" + conditional ? "conditional #{label}" : label + end + + def internal_label(conditional) + conditional ? "conditional internal" : "internal" + end + + def receiver_for(call_name) + return nil unless call_name.include?(".") + + parts = call_name.split(".") + parts[0...-1].join(".") + end + + def method_for(call_name) + return nil unless call_name.include?(".") + + call_name.split(".").last + end + + def function?(owner, function_name) + @functions_by_owner[owner].include?(function_name.to_s) + end + + def owner_for_type(type_text) + return nil if type_text.nil? + + text = type_text.to_s + return text if @owners.include?(text) + return @owner_by_simple[text] if @owner_by_simple.key?(text) + + owner_type_tokens(text).each do |token| + next if CORE_TYPES.include?(token) + return token if @owners.include?(token) + return @owner_by_simple[token] if @owner_by_simple.key?(token) + + simple = token.split("::").last + return @owner_by_simple[simple] if @owner_by_simple.key?(simple) + end + nil + end + + def owner_type_tokens(text) + text.scan(/[A-Z][A-Za-z0-9]*(?:::[A-Z][A-Za-z0-9]*)*/) + end + + def state_type_index(mod) + states(mod).each_with_object({}) do |state, out| + state_name = value(state, :name).to_s + type = value(state, :type) + out[state_name] = type.to_s if type && !type.to_s.empty? + end + end + + def functions(mod) + Array(value(mod, :functions)) + end + + def states(mod) + Array(value(mod, :state)) + end + + def value(hash, key) + return nil unless hash.respond_to?(:[]) + + hash[key] || hash[key.to_s] + end + end + end +end diff --git a/gems/espalier/lib/espalier/formatter.rb b/gems/espalier/lib/espalier/formatter.rb index 730db63e4..c05e21f6c 100644 --- a/gems/espalier/lib/espalier/formatter.rb +++ b/gems/espalier/lib/espalier/formatter.rb @@ -2,6 +2,8 @@ require "yaml" require "json" +require_relative "dependency_graph" +require_relative "graphviz_formatter" sibling_sarif = File.expand_path("../../../decomplex/lib/decomplex/sarif", __dir__) if File.file?("#{sibling_sarif}.rb") require sibling_sarif @@ -83,6 +85,10 @@ def to_yaml(manifest) YAML.dump(manifest) end + def to_dot(manifest) + GraphvizFormatter.new(DependencyGraph.from_manifest(manifest)).to_dot + end + def to_sarif(manifest) JSON.pretty_generate(to_sarif_hash(manifest)) end diff --git a/gems/espalier/lib/espalier/graphviz_formatter.rb b/gems/espalier/lib/espalier/graphviz_formatter.rb new file mode 100644 index 000000000..17e98b8de --- /dev/null +++ b/gems/espalier/lib/espalier/graphviz_formatter.rb @@ -0,0 +1,223 @@ +# frozen_string_literal: true + +module Espalier + # Renders an Espalier::DependencyGraph as Graphviz DOT. + class GraphvizFormatter + GRAPH_ATTRIBUTES = { + rankdir: "LR", + compound: true, + concentrate: true, + fontsize: 12, + fontname: "Arial", + label: "Espalier Dependency Graph", + labelloc: "t", + nodesep: 0.35, + ranksep: 0.75 + }.freeze + + NODE_ATTRIBUTES = { + shape: "box", + style: "rounded,filled", + fillcolor: "#ffffff", + color: "#6b7280", + fontname: "Arial", + fontsize: 10 + }.freeze + + EDGE_ATTRIBUTES = { + color: "#4b5563", + fontname: "Arial", + fontsize: 9, + arrowsize: 0.7 + }.freeze + + def initialize(graph) + @graph = graph + end + + def to_dot + lines = [] + lines << "digraph espalier_dependencies {" + lines << " graph#{attributes(GRAPH_ATTRIBUTES)};" + lines << " node#{attributes(NODE_ATTRIBUTES)};" + lines << " edge#{attributes(EDGE_ATTRIBUTES)};" + lines << "" + owner_clusters.each do |owner, nodes| + lines.concat(cluster_lines(owner, nodes)) + end + external_nodes.each do |node| + lines << " #{quote(node.id)}#{attributes(node_attributes(node))};" + end + lines << "" + @graph.edges.each do |edge| + lines << " #{quote(edge.source)} -> #{quote(edge.target)}#{attributes(edge_attributes(edge))};" + end + lines << "}" + lines.join("\n") + end + + private + + def owner_clusters + @graph.owner_nodes.map(&:owner).sort.to_h do |owner| + [owner, @graph.nodes_for_owner(owner)] + end + end + + def external_nodes + @graph.nodes.select { |node| node.kind == :external } + end + + def cluster_lines(owner, nodes) + lines = [] + lines << " subgraph #{quote(cluster_id(owner))} {" + lines << " label=#{quote(owner)};" + lines << " color=#{quote("#d1d5db")};" + lines << " style=#{quote("rounded")};" + nodes.sort_by { |node| [node.kind == :owner ? 0 : 1, node.label.to_s] }.each do |node| + lines << " #{quote(node.id)}#{attributes(node_attributes(node))};" + end + lines << " }" + lines << "" + lines + end + + def cluster_id(owner) + "cluster_#{owner.to_s.gsub(/[^A-Za-z0-9_]/, "_")}" + end + + def node_attributes(node) + attrs = case node.kind + when :owner + owner_node_attributes(node) + when :function + function_node_attributes(node) + else + external_node_attributes(node) + end + if @graph.cyclic_node_ids.include?(node.id) + attrs = attrs.merge(color: "#b91c1c", penwidth: 2.0, fillcolor: cycle_fill(node)) + end + attrs + end + + def owner_node_attributes(node) + metadata = node.metadata || {} + details = [] + details << metadata[:type].to_s if metadata[:type] + details << "#{metadata[:function_count]} fn" + details << "#{metadata[:state_count]} state" + { + shape: "component", + fillcolor: "#e0f2fe", + color: "#0369a1", + label: ([node.label] + details).join("\n"), + tooltip: tooltip_for(node) + }.merge(url_attribute(node)) + end + + def function_node_attributes(node) + metadata = node.metadata || {} + reads = Array(metadata[:reads]).size + writes = Array(metadata[:writes]).size + details = ["#{metadata[:visibility] || :public} R#{reads} W#{writes}"] + details << "L#{node.line}" if node.line + { + shape: writes.positive? ? "box3d" : "box", + fillcolor: writes.positive? ? "#fff7ed" : "#ffffff", + color: writes.positive? ? "#c2410c" : "#6b7280", + label: ([node.label] + details).join("\n"), + tooltip: tooltip_for(node) + }.merge(url_attribute(node)) + end + + def external_node_attributes(node) + { + shape: "box", + style: "rounded,dashed,filled", + fillcolor: "#f3f4f6", + color: "#9ca3af", + label: node.label, + tooltip: tooltip_for(node) + } + end + + def edge_attributes(edge) + attrs = { + label: edge.weight && edge.weight > 1 ? "#{edge.label} x#{edge.weight}" : edge.label + }.merge(edge_style(edge)) + + source_component = @graph.cycle_component_by_node[edge.source] + if source_component && source_component == @graph.cycle_component_by_node[edge.target] + attrs = attrs.merge(color: "#b91c1c", penwidth: 2.0) + end + attrs + end + + def edge_style(edge) + case edge.kind + when :state_type + { color: "#7c3aed", style: "dotted", arrowhead: "vee" } + when :internal_call + { color: "#374151", style: edge.conditional ? "dashed" : "solid" } + when :delegation + { color: "#2563eb", style: edge.conditional ? "dashed" : "solid" } + when :owner_call + { color: "#0891b2", style: edge.conditional ? "dashed" : "solid" } + when :external_call + { color: "#9ca3af", style: edge.conditional ? "dashed" : "dotted" } + else + { color: "#4b5563", style: edge.conditional ? "dashed" : "solid" } + end + end + + def cycle_fill(node) + node.kind == :owner ? "#fee2e2" : "#fff1f2" + end + + def tooltip_for(node) + parts = [node.label] + parts << node.file if node.file + parts << "line #{node.line}" if node.line + if (signature = node.metadata && node.metadata[:signature]) + parts << signature + end + parts.join(" | ") + end + + def url_attribute(node) + return {} unless node.file + + url = node.file.to_s + url += "#L#{node.line}" if node.line + { URL: url } + end + + def attributes(hash) + return "" if hash.empty? + + " [" + hash.sort_by { |key, _value| key.to_s }.map { |key, value| "#{key}=#{dot_value(value)}" }.join(", ") + "]" + end + + def dot_value(value) + case value + when true + "true" + when false + "false" + when Numeric + value.to_s + else + quote(value) + end + end + + def quote(value) + text = value.to_s + text = text.gsub("\\", "\\\\\\\\") + .gsub("\"", "\\\"") + .gsub("\n", "\\n") + "\"#{text}\"" + end + end +end diff --git a/gems/espalier/test/ast_extractor_test.rb b/gems/espalier/test/ast_extractor_test.rb index d86087924..4fcc2e45e 100644 --- a/gems/espalier/test/ast_extractor_test.rb +++ b/gems/espalier/test/ast_extractor_test.rb @@ -12,7 +12,10 @@ class AstExtractorTest < Minitest::Test typescript: "DECOMPLEX_TS_TYPESCRIPT_PATH", go: "DECOMPLEX_TS_GO_PATH", rust: "DECOMPLEX_TS_RUST_PATH", - zig: "DECOMPLEX_TS_ZIG_PATH" + zig: "DECOMPLEX_TS_ZIG_PATH", + c: "DECOMPLEX_TS_C_PATH", + cpp: "DECOMPLEX_TS_CPP_PATH", + csharp: "DECOMPLEX_TS_CSHARP_PATH" }.freeze def parse_ruby(code) @@ -298,4 +301,164 @@ class Unit { refute_nil run[:span], language end end + + def test_extracts_architecture_parity_facts_across_supported_tree_sitter_languages + profiles = { + python: [ + ".py", + <<~PY, + class Worker: + def work(self): + pass + class Unit: + def __init__(self, value): + self.value = value + self.other = Worker() + def run(self): + self.value = self.value + 1 + self.other.work() + self._bump() + def _bump(self): + pass + PY + "Unit", + "run", + "_bump", + "other", + nil, + "self.other" + ], + typescript: [ + ".ts", + <<~TS, + class Worker { work(): void {} } + class Unit { + value: number; + private other: Worker; + constructor(value: number) { this.value = value; this.other = new Worker(); } + public run(): void { this.value = this.value + 1; this.other.work(); this.bump(); } + private bump(): void {} + } + TS + "Unit", + "run", + "bump", + "other", + "Worker", + "this.other" + ], + go: [ + ".go", + <<~GO, + package p + type Worker struct{} + func (w *Worker) Work() {} + type Unit struct { value int; other *Worker } + func (u *Unit) Run() { u.value = u.value + 1; u.other.Work(); u.bump() } + func (u *Unit) bump() {} + GO + "Unit", + "Run", + "bump", + "other", + "*Worker", + "self.other" + ], + rust: [ + ".rs", + <<~RS, + struct Worker {} + impl Worker { fn work(&self) {} } + struct Unit { value: i32, other: Worker } + impl Unit { + pub fn run(&mut self) { self.value = self.value + 1; self.other.work(); self.bump(); } + fn bump(&self) {} + } + RS + "Unit", + "run", + "bump", + "other", + "Worker", + "self.other" + ], + c: [ + ".c", + <<~C, + typedef struct Worker { int ready; } Worker; + typedef struct Unit { int value; Worker *other; } Unit; + void worker_work(Worker *worker) {} + static void unit_bump(Unit *unit) {} + void unit_run(Unit *unit) { unit->value = unit->value + 1; worker_work(unit->other); unit_bump(unit); } + C + "Unit", + "unit_run", + "unit_bump", + "other", + "Worker", + "self.other" + ], + cpp: [ + ".cpp", + <<~CPP, + class Worker { public: void work() {} }; + class Unit { + int value; + Worker other; + public: + void run(){ value = value + 1; other.work(); bump(); } + private: + void bump(){} + }; + CPP + "Unit", + "run", + "bump", + "other", + "Worker", + "other" + ], + csharp: [ + ".cs", + <<~CS, + class Worker { public void Work() {} } + class Unit { + private int value; + private Worker other = new Worker(); + public void Run(){ value = value + 1; other.Work(); Bump(); } + private void Bump(){} + } + CS + "Unit", + "Run", + "Bump", + "other", + "Worker", + "other" + ] + } + + available = profiles.select do |language, _profile| + grammar = ENV[GRAMMAR_ENVS.fetch(language)] + grammar && File.file?(grammar) + end + skip "set Tree-sitter grammar paths to run architecture parity extractor test" if available.empty? + + available.each do |language, (ext, source, owner_name, run_name, helper_name, state_name, state_type, receiver)| + mods = parse_source(source, ext) + mod = mods.find { |candidate| candidate[:name] == owner_name } + refute_nil mod, language + assert_includes mod[:states], state_name, language + assert_equal state_type, mod[:ivar_types][state_name] if state_type + + vis = mod[:methods].to_h { |method| [method[:name], method[:visibility]] } + assert_equal :public, vis[run_name], language + assert_equal :private, vis[helper_name], language + + run = mod[:methods].find { |method| method[:name] == run_name } + assert_includes run[:effects][:writes], "value", language + assert_includes run[:effects][:reads], state_name, language + assert_includes run[:delegations].map { |call| call[:receiver] }, receiver, language + end + end end diff --git a/gems/espalier/test/dependency_graph_test.rb b/gems/espalier/test/dependency_graph_test.rb new file mode 100644 index 000000000..79f288f86 --- /dev/null +++ b/gems/espalier/test/dependency_graph_test.rb @@ -0,0 +1,193 @@ +# frozen_string_literal: true + +require "minitest/autorun" +require_relative "../lib/espalier" + +class DependencyGraphTest < Minitest::Test + def test_dot_output_renders_owner_function_and_dependency_edges + dot = Espalier::Formatter.to_dot(service_manifest) + + assert_includes dot, "digraph espalier_dependencies" + assert_includes dot, "\"cluster_Service\"" + assert_includes dot, "\"owner:Service\"" + assert_includes dot, "\"fn:Service#run\"" + assert_includes dot, "\"fn:Repository#fetch\"" + assert_includes dot, "\"fn:Service#run\" -> \"fn:Service#prepare\"" + assert_includes dot, "label=\"internal x2\"" + assert_includes dot, "\"fn:Service#run\" -> \"fn:Repository#fetch\"" + assert_includes dot, "label=\"calls fetch\"" + assert_includes dot, "\"fn:Service#run\" -> \"fn:Repository#retry\"" + assert_includes dot, "label=\"conditional calls retry\"" + assert_includes dot, "style=\"dashed\"" + assert_includes dot, "\"owner:Service\" -> \"owner:Repository\"" + assert_includes dot, "label=\"state @repo\"" + assert_includes dot, "style=\"dotted\"" + refute_includes dot, "external:String" + end + + def test_graph_aggregates_duplicate_internal_edges + graph = Espalier::DependencyGraph.from_manifest(service_manifest) + edges = graph.edges.select do |edge| + edge.source == "fn:Service#run" && + edge.target == "fn:Service#prepare" && + edge.kind == :internal_call + end + + assert_equal 1, edges.size + assert_equal 2, edges.first.weight + end + + def test_dot_output_escapes_labels_and_tooltips + manifest = [ + { + module: "Quoted\"Owner", + file: "src/quoted.rb", + type: :class, + functions: [ + { + name: "say_\"hello\"", + signature: "def say_\"hello\"", + line: 3, + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: {} + } + ] + } + ] + + dot = Espalier::Formatter.to_dot(manifest) + + assert_includes dot, "\"owner:Quoted\\\"Owner\"" + assert_includes dot, "label=\"Quoted\\\"Owner" + assert_includes dot, "say_\\\"hello\\\"" + end + + def test_string_key_manifest_from_yaml_is_supported + manifest = [ + { + "module" => "Client", + "file" => "src/client.rb", + "type" => "class", + "state" => [{ "name" => "@server", "type" => "Server" }], + "functions" => [ + { + "name" => "call", + "line" => 5, + "EFFECTS" => { "reads" => ["@server"], "writes" => [] }, + "DELEGATIONS" => { "always_calls" => ["@server.handle"] } + } + ] + }, + { + "module" => "Server", + "file" => "src/server.rb", + "type" => "class", + "functions" => [ + { + "name" => "handle", + "EFFECTS" => { "reads" => [], "writes" => [] }, + "DELEGATIONS" => {} + } + ] + } + ] + + dot = Espalier::Formatter.to_dot(manifest) + + assert_includes dot, "\"fn:Client#call\" -> \"fn:Server#handle\"" + assert_includes dot, "URL=\"src/client.rb#L5\"" + end + + def test_cycles_are_highlighted + dot = Espalier::Formatter.to_dot( + [ + owner("A", calls: ["B.call"]), + owner("B", calls: ["A.call"]) + ] + ) + + assert_includes dot, "\"fn:A#call\" -> \"fn:B#call\"" + assert_includes dot, "\"fn:B#call\" -> \"fn:A#call\"" + assert_includes dot, "penwidth=2.0" + assert_includes dot, "color=\"#b91c1c\"" + end + + private + + def service_manifest + [ + { + module: "Service", + file: "src/service.rb", + type: :class, + line: 1, + state: [{ name: "@repo", type: "Repository", properties: [] }], + functions: [ + { + name: "run", + signature: "def run", + visibility: :public, + line: 4, + EFFECTS: { reads: ["@repo"], writes: [] }, + DELEGATIONS: { + always_calls: ["prepare", "@repo.fetch", "String.upcase"], + conditionally_calls: ["Repository.retry"] + }, + CALL_GRAPH: { internal_calls: ["prepare"] } + }, + { + name: "prepare", + signature: "def prepare", + visibility: :private, + line: 10, + EFFECTS: { reads: [], writes: ["@repo"] }, + DELEGATIONS: {}, + CALL_GRAPH: { internal_callers: ["run"] } + } + ], + call_graph: { + internal_edges: [{ caller: "run", callee: "prepare", type: :always }] + } + }, + { + module: "Repository", + file: "src/repository.rb", + type: :class, + state: [], + functions: [ + { + name: "fetch", + signature: "def fetch", + visibility: :public, + line: 3, + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: {} + }, + { + name: "retry", + signature: "def retry", + visibility: :public, + line: 8, + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: {} + } + ] + } + ] + end + + def owner(name, calls:) + { + module: name, + file: "src/#{name.downcase}.rb", + type: :class, + functions: [ + { + name: "call", + EFFECTS: { reads: [], writes: [] }, + DELEGATIONS: { always_calls: calls } + } + ] + } + end +end diff --git a/gems/lineage/CONTRIBUTING.md b/gems/lineage/CONTRIBUTING.md index 48f59e9fa..aabe430fa 100644 --- a/gems/lineage/CONTRIBUTING.md +++ b/gems/lineage/CONTRIBUTING.md @@ -76,9 +76,11 @@ provider adapters direct database authority. Logical-unit identity is the core contract. Changes to extraction should be conservative and tested against renames, moves, and refactors. -The current extractor is heuristic. Planned Tree-sitter-backed profiles -should replace extraction internals without changing the storage and -history contracts. +Supported production languages should use Tree-sitter-backed extraction, +not line regexes. Regex-style heuristics are acceptable only as a +bootstrap fallback for secondary languages that are explicitly marked +experimental. If a Tree-sitter-backed file cannot be parsed, prefer +returning no units over inventing low-confidence boundaries. ## UI And LSP diff --git a/gems/lineage/Cargo.lock b/gems/lineage/Cargo.lock index 897e16a10..d8c790dfd 100644 --- a/gems/lineage/Cargo.lock +++ b/gems/lineage/Cargo.lock @@ -761,8 +761,16 @@ dependencies = [ "tower-http", "tower-lsp", "tree-sitter", + "tree-sitter-c", + "tree-sitter-c-sharp", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-javascript", "tree-sitter-language", + "tree-sitter-python", + "tree-sitter-ruby", "tree-sitter-rust", + "tree-sitter-typescript", "tree-sitter-zig", ] @@ -1217,6 +1225,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.10.0" @@ -1417,13 +1431,64 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.23.2" +version = "0.24.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0203df02a3b6dd63575cc1d6e609edc2181c9a11867a271b25cfd2abff3ec5ca" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" dependencies = [ "cc", "regex", "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afd2b1bf1585dc2ef6d69e87d01db8adb059006649dd5f96f31aa789ee6e9c71" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-go" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" +dependencies = [ + "cc", "tree-sitter-language", ] @@ -1433,6 +1498,26 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c199356c799a8945965bb5f2c55b2ad9d9aa7c4b4f6e587fe9dea0bc715e5f9c" +[[package]] +name = "tree-sitter-python" +version = "0.23.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-ruby" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-rust" version = "0.23.0" @@ -1443,11 +1528,21 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-zig" -version = "1.0.2" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2994e37b8ef1f715b931a5ff084a1b1713b1bc56e7aaebd148cc3efe0bf29ad9" +checksum = "ab11fc124851b0db4dd5e55983bbd9631192e93238389dcd44521715e5d53e28" dependencies = [ "cc", "tree-sitter-language", diff --git a/gems/lineage/Cargo.toml b/gems/lineage/Cargo.toml index 7f69bc22c..3b0fda3dd 100644 --- a/gems/lineage/Cargo.toml +++ b/gems/lineage/Cargo.toml @@ -31,10 +31,18 @@ sha2 = "0.10" tokio = { version = "1", features = ["io-std", "macros", "net", "rt", "rt-multi-thread"] } tower-http = { version = "0.5", features = ["set-header", "trace"] } tower-lsp = "0.20" -tree-sitter = "=0.23.2" +tree-sitter = "=0.24.7" +tree-sitter-c = "=0.23.4" +tree-sitter-c-sharp = "=0.23.1" +tree-sitter-cpp = "=0.23.4" +tree-sitter-go = "=0.23.4" +tree-sitter-javascript = "=0.23.1" tree-sitter-language = "=0.1.3" +tree-sitter-python = "=0.23.6" +tree-sitter-ruby = "=0.23.1" tree-sitter-rust = "=0.23.0" -tree-sitter-zig = "=1.0.2" +tree-sitter-typescript = "=0.23.2" +tree-sitter-zig = "=1.1.2" [dev-dependencies] tempfile = "=3.10.1" diff --git a/gems/lineage/README.md b/gems/lineage/README.md index 8caa87f39..7e38e05ed 100644 --- a/gems/lineage/README.md +++ b/gems/lineage/README.md @@ -120,7 +120,7 @@ from the same source and commit. | Coverage | `ingest-coverage` | Codecov JSON, SimpleCov JSON, Cobertura XML, kcov Cobertura XML | | Test exposure | `ingest-test-exposure` | Lineage `test-exposure` JSON | | Mutation testing | `ingest-mutants` | Ruby `mutant-facts/v1` | -| Systems hazards | `ingest-hazards` | Zig hazard provider | +| Systems hazards | `ingest-hazards` | Zig, Go, Rust, C, C++, C# hazard providers | | Stack traces | `ingest` | Sentry-style event JSON | | Static analysis and risk findings | `ingest-sarif` | SARIF 2.1.0 files from Decomplex, SlopCop, Boobytrap, Nil-Kill, Espalier, and third-party tools | @@ -283,6 +283,11 @@ Recommended CLEAR lanes: - Zig kcov unit coverage: `--format cobertura --test-type unit` - Zig systems evidence: `--test-type loom`, `--test-type vopr`, or `--test-type tsan` +- Rust systems evidence: `--test-type loom` for concurrency/atomic + checks and `--test-type miri` for unsafe-code checks +- C/C++ systems evidence: `--test-type tsan`, `asan`, `lsan`, or + `ubsan` +- C# systems evidence: `--test-type concurrency` or `unsafe` ### Test Exposure @@ -359,8 +364,12 @@ cargo run --manifest-path gems/lineage/Cargo.toml -- ingest-hazards \ --commit "$(git rev-parse HEAD)" ``` -The current first-party provider scans Zig runtime/lib hazard sites used -by CLEAR's Loom and VOPR coverage work. +First-party providers currently support `zig`, `go`, `rust`, `c`, +`cpp`, and `csharp`. Zig scans the CLEAR runtime/lib Loom and VOPR +hazard sites. Rust scans Loom-relevant concurrency/atomic sites and +unsafe blocks/operations. C and C++ scan sanitizer-relevant concurrency, +raw-memory, lifetime, and UB hazards. C# scans concurrency and unsafe +native-memory hazards. ### Stack Traces @@ -381,19 +390,23 @@ file. ## Supported Languages Roadmap -Lineage currently uses a heuristic logical-unit extractor. Ruby and Zig -are the most exercised paths because CLEAR uses them for compiler and -runtime review. Other language extraction is experimental until the -planned Tree-sitter-backed profiles replace the bootstrap extractor. - -- [x] Ruby: used for CLEAR compiler review. -- [x] Zig: used for CLEAR runtime review and systems hazards. -- [ ] Python: experimentally supported. -- [ ] JavaScript: experimentally supported. -- [ ] Lua: experimentally supported. -- [ ] C: experimentally supported. -- [ ] Go: experimentally supported. -- [ ] Assembly: experimentally supported. +Lineage uses Tree-sitter-backed logical-unit extraction for the core +languages it aims to track as a ground-truth risk ledger. For those +languages, parse failures produce no units instead of falling back to +regex boundaries. Heuristic extraction remains only for secondary +experimental languages. + +- [x] Ruby: Tree-sitter-backed; used for CLEAR compiler review. +- [x] Zig: Tree-sitter-backed; used for CLEAR runtime review and + systems hazards. +- [x] Rust: Tree-sitter-backed. +- [x] Python: Tree-sitter-backed. +- [x] JavaScript / TypeScript: Tree-sitter-backed. +- [x] Go: Tree-sitter-backed, including concurrency hazards. +- [x] C / C++: Tree-sitter-backed, including sanitizer hazards. +- [x] C#: Tree-sitter-backed, including concurrency/unsafe hazards. +- [ ] Lua: experimental heuristic extraction. +- [ ] Assembly: experimental label extraction. ## Boundaries diff --git a/gems/lineage/docs/agents/cross-lang-support.md b/gems/lineage/docs/agents/cross-lang-support.md new file mode 100644 index 000000000..1e43ff3ac --- /dev/null +++ b/gems/lineage/docs/agents/cross-lang-support.md @@ -0,0 +1,77 @@ +# Cross-Language Support Validation + +This document tracks the first practical validation pass for building Lineage databases from non-CLEAR repositories and ingesting analyzer, lint, coverage, hazard, and runtime evidence. + +`gems/lineage/docs/agents/plugins.md` describes the plugin architecture and broad language targets. It does not prescribe exact repositories, so this pass used representative active OSS projects with enough real code to exercise the adapters. + +## Goal + +Create one `lineage.db` per target repository, ingest the best available evidence, start a Lineage UI server for each on `0.0.0.0`, and spot check that the UI can review the project with cross-language data. + +## Validation Matrix + +| Language | Repository | Local Clone | Database | UI Port | Status | +| --- | --- | --- | --- | --- | --- | +| Python | `https://github.com/Textualize/rich` | `/tmp/lineage-rich` | `/tmp/lineage-rich/lineage.db` | `8081` | Complete | +| TypeScript | `https://github.com/colinhacks/zod` | `/tmp/lineage-zod` | `/tmp/lineage-zod/lineage.db` | `8082` | Complete | +| Go | `https://github.com/junegunn/fzf` | `/tmp/lineage-fzf` | `/tmp/lineage-fzf/lineage.db` | `8083` | Complete | +| Lua | `https://github.com/luarocks/luarocks` | `/tmp/lineage-lua-luarocks` | `/tmp/lineage-lua-luarocks/lineage.db` | `8084` | Complete, no coverage | +| C | `https://github.com/libuv/libuv` | `/tmp/lineage-c-libuv` | `/tmp/lineage-c-libuv/lineage.db` | `8085` | Complete, no coverage | +| C++ | `https://github.com/fmtlib/fmt` | `/tmp/lineage-cpp-fmt` | `/tmp/lineage-cpp-fmt/lineage.db` | `8086` | Complete, no coverage | +| C# | `https://github.com/serilog/serilog` | `/tmp/lineage-csharp-serilog` | `/tmp/lineage-csharp-serilog/lineage.db` | `8087` | Complete, no coverage | +| Java | `https://github.com/google/gson` | `/tmp/lineage-java-gson` | `/tmp/lineage-java-gson/lineage.db` | `8088` | Complete, no coverage | +| Swift | `https://github.com/apple/swift-argument-parser` | `/tmp/lineage-swift-argument-parser` | `/tmp/lineage-swift-argument-parser/lineage.db` | `8089` | Complete, no coverage | +| Kotlin | `https://github.com/square/okio` | `/tmp/lineage-kotlin-okio` | `/tmp/lineage-kotlin-okio/lineage.db` | `8090` | Complete, no coverage | + +All UI servers were restarted with detached sessions and smoke checked through `curl` on ports `8081` through `8090`. + +## Evidence Targets + +Each repository received as much of this evidence as the current tools could produce without repository-specific hacks: + +- `lineage build`: Git history, logical units, churn, and ownership. +- Decomplex SARIF: structural complexity findings. +- SlopCop SARIF: coverage gaps and constraint findings. +- Boobytrap SARIF: bug-risk findings derived from churn, complexity, and coverage. +- Nil-kill SARIF: optionality, union, hidden enum, and primitive pressure findings where the language adapter supports them. +- Espalier SARIF: architectural pressure findings where the language adapter supports them. +- Lint SARIF: native lint output converted or emitted as SARIF where the repository already had a reasonable local toolchain. +- Coverage: native coverage output ingested through Lineage-supported formats when the toolchain was available. +- Runtime traces: Sentry-style stack trace ingestion for Python smoke coverage. +- Hazards: Go concurrency hazards for `fzf`. + +## Current Counts + +| Language | Logical Units | SARIF Artifacts | SARIF Findings | Quality Events | Coverage Line Events | Hazards | Runtime Events | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| Python / Rich | 2,152 | 6 | 6,270 | 1,022 | 7,792 | 0 | 1 | +| TypeScript / Zod | 2,437 | 6 | 8,112 | 1,365 | 8,908 | 0 | 0 | +| Go / fzf | 1,421 | 7 | 13,316 | 608 | 16,422 | 312 | 0 | +| Lua / LuaRocks | 1,043 | 6 | 5,056 | 0 | 0 | 0 | 0 | +| C / libuv | 3,920 | 6 | 21,895 | 0 | 0 | 0 | 0 | +| C++ / fmt | 6,014 | 6 | 2,982 | 0 | 0 | 0 | 0 | +| C# / Serilog | 615 | 6 | 1,281 | 0 | 0 | 0 | 0 | +| Java / Gson | 4,921 | 6 | 2,624 | 0 | 0 | 0 | 0 | +| Swift / Argument Parser | 1,938 | 6 | 835 | 0 | 0 | 0 | 0 | +| Kotlin / Okio | 3,357 | 6 | 1,900 | 0 | 0 | 0 | 0 | + +## Adapter Work Completed + +- Replaced generic language placeholders with explicit Decomplex lexicons for Lua, C, C++, C#, Java, Swift, and Kotlin. +- Added real Tree-sitter syntax support and tests for C, C++, C#, Java, Swift, and Kotlin structural facts. +- Added Swift member access and `switch_entry` support. +- Added Kotlin `when_expression` and `when_entry` support. +- Added grammar candidate support for packages that ship `tree_sitter_*_binding.node`, needed by `tree-sitter-kotlin`. +- Added Go concurrency hazard detection through SlopCop/Lineage. +- Fixed Lineage source extraction and coverage ingestion issues found during TypeScript/Go validation. +- Fixed Nil-kill static-only normalization so non-Ruby languages do not accidentally depend on stale runtime traces. +- Replaced Lineage regex-first logical-unit extraction for Ruby, Python, JavaScript/TypeScript, Go, Rust, Zig, C/C++, and C# with Tree-sitter-backed extraction. The regex heuristic path is now only for secondary experimental languages. + +## Environment Gaps + +- Lua coverage/lint was limited by missing local LuaRocks/Busted tooling. +- C and C++ coverage was not generated in this pass; static analyzer, syntax lint, and SARIF ingestion were validated. +- C#, Java, Swift, and Kotlin native build/lint/coverage were limited by missing `dotnet`, Java, Swift, and Kotlin toolchains in this environment. +- TypeScript and Go runtime tracing are still out of scope for this pass. + +These are environment/toolchain gaps, not Lineage ingestion blockers. The DBs and UIs exist for all requested languages. diff --git a/gems/lineage/docs/agents/lang-support-quality.md b/gems/lineage/docs/agents/lang-support-quality.md new file mode 100644 index 000000000..69483ffc6 --- /dev/null +++ b/gems/lineage/docs/agents/lang-support-quality.md @@ -0,0 +1,208 @@ +# Multi-Language Support Quality Pass + +This pass spot checked the validation DBs created for Python, TypeScript, Go, Lua, C, C++, C#, Java, Swift, and Kotlin. The goal was not to prove feature parity with Ruby, but to verify that Lineage can ingest and display useful SARIF/coverage/risk evidence for each language, and to fix clear cross-language false positives found during review. + +## Quality Checklist + +- Lineage DB exists and UI serves the repository. +- SARIF artifacts ingest into `sarif_findings` with stable paths and line anchors. +- Decomplex findings include enough detector-specific context to be actionable. +- Nil-kill static pressure findings do not flag obviously typed or non-null constructs as loose contracts. +- SlopCop and Boobytrap produce useful output when coverage/churn exists, and degrade clearly when coverage is absent. +- Espalier emits architecture facts where class/function ownership extraction is mature. +- Native lint SARIF is ingested when the local toolchain can produce it. +- Runtime or hazard evidence is present for languages where support currently exists. + +## Fixes From This Pass + +- Decomplex SARIF messages now include detector-specific payloads for the major findings. For example, Rich `console.py` now shows `Derived-State Staleness: max_height derived from size at line 995; size reassigned at line 996 but max_height is not recomputed` instead of only naming the method. +- Decomplex suppresses generated Lua/Teal `_tl_compat` compatibility prelude branches. LuaRocks no longer reports line-1 generated prelude missing-abstraction findings. +- Decomplex extracts Go `name type` struct field declarations, so fields like `I16 []int16` keep their type. +- Nil-kill no longer treats Python `-> None` as nullable pressure by itself. `str | None`, `None | str`, `Optional[...]`, `null`, and `undefined` still count. +- Nil-kill Go static evidence now preserves typed struct fields through to SARIF; fzf no longer reports `Slab#I16` as an untyped field. + +Regression tests added: + +- `gems/decomplex/test/syntax_test.rb`: Lua generated prelude suppression and Go name-type struct fields. +- `gems/decomplex/test/report_test.rb`: actionable SARIF messages for derived-state staleness and broken protocols. +- `gems/nil-kill/spec/multi_language_runtime_spec.rb`: Python `-> None` nullable handling and Go typed struct field evidence. + +## Current Validation DBs + +All UI servers responded with HTTP 200 on ports `8081` through `8090` after SARIF reingest and UI summary refresh. + +| Language | Repo | Port | Logical Units | SARIF Artifacts | SARIF Findings | Coverage Lines | Quality Events | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | +| Python | Rich | 8081 | 2,152 | 6 | 6,905 | 7,792 | 1,022 | +| TypeScript | Zod | 8082 | 2,437 | 6 | 8,246 | 8,908 | 1,365 | +| Go | fzf | 8083 | 1,421 | 7 | 13,219 | 16,422 | 608 | +| Lua | LuaRocks | 8084 | 1,043 | 6 | 5,731 | 0 | 0 | +| C | libuv | 8085 | 3,920 | 6 | 30,310 | 0 | 0 | +| C++ | fmt | 8086 | 6,014 | 6 | 5,120 | 0 | 0 | +| C# | Serilog | 8087 | 615 | 6 | 1,524 | 0 | 0 | +| Java | Gson | 8088 | 4,921 | 6 | 3,542 | 0 | 0 | +| Swift | Argument Parser | 8089 | 1,938 | 6 | 1,129 | 0 | 0 | +| Kotlin | Okio | 8090 | 3,357 | 6 | 2,243 | 0 | 0 | + +Swift and Kotlin SARIF reingest skipped two non-SARIF JSON evidence files in each `tmp/lineage-sarif` directory. That is expected because the ingest command accepts directories and ignores JSON files that are not SARIF documents. + +## Language Findings + +### Python / Rich + +Status: good. + +The strongest path is covered: Lineage DB, Decomplex, Nil-kill, Espalier, SlopCop, Boobytrap, native lint, coverage, quality events, and one runtime stack-trace smoke event all ingest. Rich is the best multi-language validation target after CLEAR Ruby because it has meaningful Python type annotations and coverage. + +Spot checks: + +- Decomplex state-branch and derived-state findings now include state refs, predicates, and stale variable/source details. +- Nil-kill nullable signatures now avoid false positives for plain `-> None`, while still flagging real nullable params/returns. +- SlopCop and Boobytrap findings are anchored to real coverage/churn data. +- Native lint SARIF from Black is visible and path-anchored. + +Remaining caveat: test/example files are included in the validation DB. That is useful for ingestion coverage, but production review should use source-role filtering in Lineage. + +### TypeScript / Zod + +Status: good, with test-file noise. + +TypeScript SARIF ingestion, coverage, Decomplex, Nil-kill, Espalier, SlopCop, and Boobytrap all produce anchored findings. Decomplex points at real large schema/parser functions and TypeScript annotations feed Nil-kill static pressure. + +Spot checks: + +- Decomplex state-branch density on Zod parser paths includes concrete `_def`/schema refs and predicates. +- Nil-kill flags `unknown`/`any`-style slots without requiring runtime tracing. +- SlopCop and Boobytrap coverage/churn rows ingest correctly. + +Remaining caveat: broken-protocol and Boobytrap rows in test suites are noisy. This is mostly a source-role/ranking issue, not a TypeScript parser failure. + +### Go / fzf + +Status: good. + +Go has the best non-Ruby systems-language story in this pass. Lineage ingests coverage, SlopCop coverage gaps, Boobytrap risk, Decomplex, Nil-kill static facts, and Go concurrency hazard SARIF. + +Spot checks: + +- SlopCop Go constraint SARIF flags channel and lock/sync hazards lacking race coverage. +- Decomplex identifies large terminal/control-flow functions with convergence across several detectors. +- Nil-kill now preserves typed struct fields such as `Slab.I16 []int16`, removing a concrete false positive. + +Remaining caveat: Go hazard support is currently concurrency-focused. Other safety categories need explicit language rules if we want broader Go systems checks. + +### Lua / LuaRocks + +Status: usable static ingestion, experimental analysis quality. + +Lineage DB and SARIF ingestion work. Decomplex produces useful Lua findings after generated Teal prelude suppression. Nil-kill and Espalier are sparse, which matches the current maturity of Lua ownership/type extraction. + +Spot checks: + +- Generated `_tl_compat` prelude line-1 missing-abstraction findings are gone. +- Real Lua findings remain, e.g. repeated guard tuples and state-branch predicates. +- SlopCop/Boobytrap rows exist but are static/no-coverage quality because no Lua coverage was available. + +Remaining caveat: Lua needs better function ownership and module/type conventions before Espalier and Nil-kill can be more than light static signals. + +### C / libuv + +Status: strong SARIF ingestion, experimental analysis quality. + +Lineage handles the large libuv DB and ingests Decomplex, SlopCop, Boobytrap, Nil-kill, Espalier, and syntax-lint SARIF. Decomplex results are plentiful and anchored. + +Spot checks: + +- Decomplex state-branch density points at real C state/predicate-heavy files like `src/win/pipe.c`. +- SlopCop/Boobytrap can rank paths, but no coverage was generated in this environment. +- Native syntax lint catches environment/header availability issues. Those are useful as toolchain diagnostics, not code-quality verdicts. + +Remaining caveat: C has no coverage here, and C header/platform conditionals create noisy lint results unless the native build environment is configured. + +### C++ / fmt + +Status: strong SARIF ingestion, experimental analysis quality. + +Lineage ingests fmt SARIF and the UI handles template-heavy headers. Decomplex and Nil-kill produce anchored findings; Espalier has limited but nonzero ownership extraction. + +Spot checks: + +- Decomplex findings are anchored in headers and bundled tests. +- Nil-kill nullable findings around pointer/time APIs are plausible. +- Native C++ syntax lint found module/toolchain issues. + +Remaining caveat: bundled third-party/test code is included, so production review needs source-role filtering. C++ templates and macros need more language-specific tuning before high confidence architecture claims. + +### C# / Serilog + +Status: usable static ingestion, moderate Decomplex signal. + +SARIF ingestion works and Decomplex points at real branch-heavy formatting/parsing code. Nil-kill nullable signature findings map well to C# nullable-style APIs. + +Spot checks: + +- Decomplex state-branch findings include concrete property names and predicates. +- Nil-kill nullable signature findings are plausible in Serilog configuration APIs. +- SlopCop/Boobytrap are static/no-coverage quality because coverage was unavailable. + +Remaining caveat: Espalier emitted no findings in this validation pass, so C# architecture extraction needs more work before it can be relied on. + +### Java / Gson + +Status: usable static ingestion, moderate Decomplex/Espalier signal. + +Java SARIF ingestion works. Decomplex, Nil-kill, Espalier, SlopCop, and Boobytrap all produce anchored findings, with Decomplex pointing at real parser/adapter complexity. + +Spot checks: + +- Decomplex state-branch density in `TypeAdapters` and `JsonReader` has meaningful refs/predicates. +- Espalier emits read-only function facts for immutable-style value methods. +- Nil-kill untyped fields/methods are plausible where generic/reflection-heavy Java code defeats simple extraction. + +Remaining caveat: no Java coverage or native lint was available in this environment, so risk ranking lacks coverage-backed confidence. + +### Swift / Argument Parser + +Status: usable static ingestion, experimental analysis quality. + +Lineage DB and SARIF ingestion work. Decomplex and Espalier produce anchored Swift findings; Nil-kill static evidence ingests. SlopCop is empty because no coverage was generated. + +Spot checks: + +- Decomplex state-branch density in completion generation includes Swift option/subcommand predicates. +- Espalier has limited read-only function extraction. +- Nil-kill static untyped signatures are present where generic Swift inference is not yet mature. + +Remaining caveat: Swift needs coverage ingestion and better function/owner extraction before architecture metrics should be treated as strong signal. + +### Kotlin / Okio + +Status: usable static ingestion, moderate Decomplex signal. + +Kotlin DB and SARIF ingestion work. Decomplex has useful findings in buffer/filesystem code, and Espalier emits a small set of function facts. Nil-kill static findings are anchored. + +Spot checks: + +- Decomplex state-branch density in `Buffer.kt` includes concrete buffer/segment refs and predicates. +- Espalier identifies some read-only/impure functions. +- Nil-kill untyped signatures point at equality/select APIs where extraction needs stronger Kotlin typing rules. + +Remaining caveat: no coverage was generated, SlopCop is empty, and Kotlin parser extraction needs more language-specific tuning before architecture metrics are high confidence. + +## Cross-Cutting Assessment + +The common ingestion path is solid: all ten DBs load, SARIF artifacts persist, UI summaries refresh, and servers respond. Decomplex is the most broadly useful analyzer across all languages because Tree-sitter extraction gives it enough syntax to anchor complexity findings. + +The biggest quality divider is coverage. Python, TypeScript, and Go have coverage-backed SlopCop/Boobytrap signal; the other seven languages currently have static-only or churn-only risk, which should be presented as lower confidence. + +Nil-kill is useful for Python, TypeScript, Go, C#, Java, Swift, and Kotlin static pressure, but language-specific type extraction determines signal quality. The Go struct-field and Python `-> None` fixes show the right pattern: false positives should be fixed in the shared syntax/provider layer with regression tests, not tuned per repository. + +Espalier is useful where class/function ownership extraction is mature. It is sparse for Lua, C, C#, and Swift/Kotlin compared with Ruby/TypeScript/Go/Java. Treat missing Espalier signal in those languages as adapter immaturity, not proof of good architecture. + +## Recommended Next Work + +- Add source-role filtering in Lineage views and ranking so `src`/production findings can be reviewed separately from tests, examples, vendored code, and generated code. +- Add explicit generated/vendor detection to the shared source filter for common language artifacts. +- Improve C/C++ native build-aware lint/coverage collection; static parser output alone is not enough for high-confidence systems-language review. +- Add coverage ingestion recipes for Lua, C#, Java, Swift, and Kotlin validation repos. +- Continue adding language-specific ownership/type extraction only when a spot check finds a concrete false positive or missing high-value signal. diff --git a/gems/lineage/src/extract.rs b/gems/lineage/src/extract.rs index e6f15ae40..353089eda 100644 --- a/gems/lineage/src/extract.rs +++ b/gems/lineage/src/extract.rs @@ -3,8 +3,10 @@ use std::collections::BTreeSet; use std::collections::HashMap; use tree_sitter::{Language, Node, Parser}; -pub const DEFAULT_CODE_EXTENSIONS: &[&str] = - &["rb", "zig", "py", "js", "lua", "c", "go", "rs", "S"]; +pub const DEFAULT_CODE_EXTENSIONS: &[&str] = &[ + "rb", "zig", "py", "js", "jsx", "mjs", "cjs", "ts", "tsx", "lua", "c", "h", "cc", "cpp", + "cxx", "hh", "hpp", "hxx", "cs", "java", "swift", "kt", "kts", "go", "rs", "S", +]; const DEFAULT_IGNORED_COMPONENTS: &[&str] = &[ ".git", ".zig-cache", @@ -173,21 +175,12 @@ impl BoundaryExtractor for HeuristicExtractor { let ext = extension(&file.path).map(|value| normalize_extension(&value)); let lines: Vec<&str> = file.contents.lines().collect(); - let mut candidates = ext - .as_deref() - .and_then(|extension| tree_sitter_candidates(file, extension, &lines)); - - if candidates.as_ref().map(Vec::is_empty).unwrap_or(true) { - let mut detected = Vec::new(); - for (index, line) in lines.iter().enumerate() { - if let Some(candidate) = detect_candidate(line, (index + 1) as u32, ext.as_deref()) { - detected.push(candidate); - } + let candidates = match ext.as_deref() { + Some(extension) if TreeSitterAdapter::for_extension(extension).is_some() => { + tree_sitter_candidates(file, extension, &lines).unwrap_or_default() } - candidates = Some(detected); - } - - let candidates = candidates.unwrap_or_default(); + _ => heuristic_candidates(&lines, ext.as_deref()), + }; candidates .iter() .enumerate() @@ -231,9 +224,15 @@ fn detect_candidate(line: &str, line_number: u32, extension: Option<&str>) -> Op match extension { Some("rb") | Some("py") => detect_ruby_python(trimmed, line_number), - Some("js") => detect_javascript(trimmed, line_number), + Some("js") | Some("jsx") | Some("mjs") | Some("cjs") | Some("ts") | Some("tsx") => { + detect_javascript_typescript(trimmed, line_number) + } Some("lua") => detect_lua(trimmed, line_number), - Some("c") => detect_c(trimmed, line_number), + Some("c") | Some("h") | Some("cc") | Some("cpp") | Some("cxx") | Some("hh") + | Some("hpp") | Some("hxx") => detect_c_family(trimmed, line_number), + Some("cs") | Some("java") => detect_csharp_java(trimmed, line_number), + Some("swift") => detect_swift(trimmed, line_number), + Some("kt") | Some("kts") => detect_kotlin(trimmed, line_number), Some("go") => detect_go(trimmed, line_number), Some("zig") => detect_rust_or_zig(trimmed, line_number), Some("S") => detect_assembly(trimmed, line_number), @@ -241,16 +240,44 @@ fn detect_candidate(line: &str, line_number: u32, extension: Option<&str>) -> Op } } +fn heuristic_candidates(lines: &[&str], extension: Option<&str>) -> Vec { + let mut detected = Vec::new(); + for (index, line) in lines.iter().enumerate() { + if let Some(candidate) = detect_candidate(line, (index + 1) as u32, extension) { + detected.push(candidate); + } + } + detected +} + #[derive(Debug, Clone, Copy)] enum TreeSitterAdapter { + C, + Cpp, + CSharp, + Go, + JavaScript, + Python, + Ruby, Rust, + Tsx, + TypeScript, Zig, } impl TreeSitterAdapter { fn for_extension(extension: &str) -> Option { match extension { + "c" | "h" => Some(Self::C), + "cc" | "cpp" | "cxx" | "hh" | "hpp" | "hxx" => Some(Self::Cpp), + "cs" => Some(Self::CSharp), + "go" => Some(Self::Go), + "js" | "jsx" | "mjs" | "cjs" => Some(Self::JavaScript), + "py" | "pyi" => Some(Self::Python), + "rb" => Some(Self::Ruby), "rs" => Some(Self::Rust), + "tsx" => Some(Self::Tsx), + "ts" => Some(Self::TypeScript), "zig" => Some(Self::Zig), _ => None, } @@ -258,7 +285,16 @@ impl TreeSitterAdapter { fn language(self) -> Language { match self { + Self::C => tree_sitter_c::LANGUAGE.into(), + Self::Cpp => tree_sitter_cpp::LANGUAGE.into(), + Self::CSharp => tree_sitter_c_sharp::LANGUAGE.into(), + Self::Go => tree_sitter_go::LANGUAGE.into(), + Self::JavaScript => tree_sitter_javascript::LANGUAGE.into(), + Self::Python => tree_sitter_python::LANGUAGE.into(), + Self::Ruby => tree_sitter_ruby::LANGUAGE.into(), Self::Rust => tree_sitter_rust::LANGUAGE.into(), + Self::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(), + Self::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), Self::Zig => tree_sitter_zig::LANGUAGE.into(), } } @@ -270,7 +306,15 @@ impl TreeSitterAdapter { lines: &[&str], ) -> Option { match self { + Self::C => c_candidate_for_node(node, source, lines), + Self::Cpp => cpp_candidate_for_node(node, source, lines), + Self::CSharp => csharp_candidate_for_node(node, source, lines), + Self::Go => go_candidate_for_node(node, source, lines), + Self::JavaScript => javascript_candidate_for_node(node, source, lines), + Self::Python => python_candidate_for_node(node, source, lines), + Self::Ruby => ruby_candidate_for_node(node, source, lines), Self::Rust => rust_candidate_for_node(node, source, lines), + Self::Tsx | Self::TypeScript => typescript_candidate_for_node(node, source, lines), Self::Zig => zig_candidate_for_node(node, source, lines), } } @@ -283,14 +327,44 @@ fn tree_sitter_candidates( ) -> Option> { let adapter = TreeSitterAdapter::for_extension(extension)?; let mut parser = Parser::new(); - parser.set_language(&adapter.language()).ok()?; - let tree = parser.parse(&file.contents, None)?; + if let Err(error) = parser.set_language(&adapter.language()) { + if std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!( + "tree-sitter language setup failed in {} ({extension}): {error:?}", + file.path + ); + } + return None; + } + let tree = match parser.parse(&file.contents, None) { + Some(tree) => tree, + None => { + if std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!("tree-sitter produced no tree in {} ({extension})", file.path); + } + return None; + } + }; if tree.root_node().has_error() { + if std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!( + "tree-sitter parse error in {} ({extension}): {}", + file.path, + tree.root_node().to_sexp() + ); + } return None; } let mut candidates = Vec::new(); collect_tree_sitter_candidates(tree.root_node(), adapter, &file.contents, lines, &mut candidates); + if candidates.is_empty() && std::env::var("LINEAGE_DEBUG_EXTRACT").is_ok() { + eprintln!( + "tree-sitter found no units in {} ({extension}): {}", + file.path, + tree.root_node().to_sexp() + ); + } Some(candidates) } @@ -312,6 +386,202 @@ fn collect_tree_sitter_candidates( } } +fn ruby_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "module" => tree_sitter_named_candidate(node, UnitKind::Module, source, lines), + "method" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class", "module", "method", "singleton_method"]), + UnitKind::Function, + source, + lines, + )) + } + "singleton_method" => { + let object = field_text(node, "object", source)?; + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name( + node, + &format!("{}.{}", clean_owner_name(object), name), + source, + &["class", "module", "method", "singleton_method"], + ), + UnitKind::Function, + source, + lines, + )) + } + _ => None, + } +} + +fn python_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class_definition" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "function_definition" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_definition", "function_definition"]), + UnitKind::Function, + source, + lines, + )) + } + "type_alias_statement" => { + let name = field_text(node, "name", source) + .or_else(|| first_identifier_child(node, source))?; + Some(tree_sitter_candidate( + node, + name.to_string(), + UnitKind::Class, + source, + lines, + )) + } + _ => None, + } +} + +fn javascript_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class_declaration" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "function_declaration" | "generator_function_declaration" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "function_declaration"]), + UnitKind::Function, + source, + lines, + )) + } + "method_definition" => javascript_method_candidate(node, source, lines), + "variable_declarator" => javascript_variable_callable_candidate(node, source, lines), + _ => None, + } +} + +fn typescript_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "abstract_class_declaration" | "class_declaration" | "enum_declaration" + | "interface_declaration" | "internal_module" | "type_alias_declaration" => { + tree_sitter_named_candidate(node, UnitKind::Class, source, lines) + } + "function_declaration" | "generator_function_declaration" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "abstract_class_declaration", "function_declaration"]), + UnitKind::Function, + source, + lines, + )) + } + "method_definition" => javascript_method_candidate(node, source, lines), + "variable_declarator" => javascript_variable_callable_candidate(node, source, lines), + "public_field_definition" => javascript_field_callable_candidate(node, source, lines), + _ => None, + } +} + +fn go_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "function_declaration" => tree_sitter_named_candidate(node, UnitKind::Function, source, lines), + "method_declaration" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + go_qualified_method_name(node, name, source), + UnitKind::Function, + source, + lines, + )) + } + "type_spec" | "type_alias" => { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + name.to_string(), + UnitKind::Class, + source, + lines, + )) + } + _ => None, + } +} + +fn c_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "function_definition" => { + let name = c_like_function_name(node, source)?; + Some(tree_sitter_candidate(node, name, UnitKind::Function, source, lines)) + } + "struct_specifier" | "union_specifier" | "enum_specifier" => c_like_type_candidate(node, source, lines), + "type_definition" => c_like_typedef_candidate(node, source, lines), + _ => None, + } +} + +fn cpp_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "function_definition" => { + let name = c_like_function_name(node, source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, &name, source, &["class_specifier", "namespace_definition"]), + UnitKind::Function, + source, + lines, + )) + } + "class_specifier" | "struct_specifier" | "union_specifier" | "enum_specifier" => { + c_like_type_candidate(node, source, lines) + } + "namespace_definition" => tree_sitter_named_candidate(node, UnitKind::Module, source, lines), + "type_definition" => c_like_typedef_candidate(node, source, lines), + _ => None, + } +} + +fn csharp_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + match node.kind() { + "class_declaration" | "struct_declaration" | "interface_declaration" | "enum_declaration" + | "record_declaration" => tree_sitter_named_candidate(node, UnitKind::Class, source, lines), + "namespace_declaration" => tree_sitter_named_candidate(node, UnitKind::Module, source, lines), + "method_declaration" | "constructor_declaration" => { + let name = field_text(node, "name", source) + .map(str::to_string) + .or_else(|| nearest_owner_name(node, source, &["class_declaration", "struct_declaration", "record_declaration"]))?; + Some(tree_sitter_candidate( + node, + qualified_name( + node, + &name, + source, + &[ + "class_declaration", + "struct_declaration", + "interface_declaration", + "record_declaration", + "namespace_declaration", + ], + ), + UnitKind::Function, + source, + lines, + )) + } + _ => None, + } +} + fn rust_candidate_for_node(node: Node<'_>, source: &str, lines: &[&str]) -> Option { let kind = node.kind(); match kind { @@ -366,6 +636,68 @@ fn tree_sitter_named_candidate( Some(tree_sitter_candidate(node, name.to_string(), kind, source, lines)) } +fn javascript_method_candidate(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name( + node, + name, + source, + &["class_declaration", "abstract_class_declaration", "function_declaration"], + ), + UnitKind::Function, + source, + lines, + )) +} + +fn javascript_variable_callable_candidate( + node: Node<'_>, + source: &str, + lines: &[&str], +) -> Option { + let value = node.child_by_field_name("value")?; + if !matches!( + value.kind(), + "arrow_function" | "function" | "function_expression" | "generator_function" | "class" + ) { + return None; + } + let name = field_text(node, "name", source)?; + let kind = if value.kind() == "class" { + UnitKind::Class + } else { + UnitKind::Function + }; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "abstract_class_declaration"]), + kind, + source, + lines, + )) +} + +fn javascript_field_callable_candidate( + node: Node<'_>, + source: &str, + lines: &[&str], +) -> Option { + let value = node.child_by_field_name("value")?; + if !matches!(value.kind(), "arrow_function" | "function" | "function_expression") { + return None; + } + let name = field_text(node, "name", source)?; + Some(tree_sitter_candidate( + node, + qualified_name(node, name, source, &["class_declaration", "abstract_class_declaration"]), + UnitKind::Function, + source, + lines, + )) +} + fn tree_sitter_candidate( node: Node<'_>, name: String, @@ -419,6 +751,155 @@ fn first_identifier_child<'a>(node: Node<'_>, source: &'a str) -> Option<&'a str None } +fn qualified_name(node: Node<'_>, base: &str, source: &str, owner_kinds: &[&str]) -> String { + let mut owners = Vec::new(); + let mut current = node; + while let Some(parent) = current.parent() { + if owner_kinds.contains(&parent.kind()) { + if let Some(owner) = owner_name(parent, source) { + owners.push(owner); + } + } + current = parent; + } + owners.reverse(); + owners.push(clean_owner_name(base)); + owners.join(".") +} + +fn nearest_owner_name(mut node: Node<'_>, source: &str, owner_kinds: &[&str]) -> Option { + while let Some(parent) = node.parent() { + if owner_kinds.contains(&parent.kind()) { + return owner_name(parent, source); + } + node = parent; + } + None +} + +fn owner_name(node: Node<'_>, source: &str) -> Option { + match node.kind() { + "class" | "module" | "class_definition" | "class_declaration" | "abstract_class_declaration" + | "interface_declaration" | "record_declaration" | "struct_declaration" | "enum_declaration" + | "namespace_definition" | "namespace_declaration" | "internal_module" => { + field_text(node, "name", source).map(clean_owner_name) + } + "function_definition" | "function_declaration" | "method" | "method_definition" + | "method_declaration" | "singleton_method" => { + field_text(node, "name", source).map(clean_owner_name) + } + "function_item" => field_text(node, "name", source).map(clean_owner_name), + "type_spec" | "type_alias" => field_text(node, "name", source).map(clean_owner_name), + "class_specifier" | "struct_specifier" | "union_specifier" | "enum_specifier" => { + c_like_type_name(node, source) + } + _ => None, + } +} + +fn go_qualified_method_name(node: Node<'_>, name: &str, source: &str) -> String { + let Some(receiver) = node.child_by_field_name("receiver") else { + return name.to_string(); + }; + let receiver_text = receiver.utf8_text(source.as_bytes()).unwrap_or_default(); + let receiver_type = receiver_text + .split_whitespace() + .last() + .unwrap_or(receiver_text) + .trim_matches(|ch: char| matches!(ch, '*' | '(' | ')' | '[' | ']')); + if receiver_type.is_empty() { + name.to_string() + } else { + format!("{receiver_type}.{name}") + } +} + +fn c_like_function_name(node: Node<'_>, source: &str) -> Option { + let declarator = node.child_by_field_name("declarator")?; + declarator_name(declarator, source).map(|name| clean_owner_name(&name)) +} + +fn c_like_type_candidate(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + let name = c_like_type_name(node, source)?; + Some(tree_sitter_candidate(node, name, UnitKind::Class, source, lines)) +} + +fn c_like_typedef_candidate(node: Node<'_>, source: &str, lines: &[&str]) -> Option { + let name = node + .child_by_field_name("declarator") + .and_then(|declarator| declarator_name(declarator, source)) + .or_else(|| last_descendant_text(node, source, &["type_identifier", "identifier"]))?; + Some(tree_sitter_candidate( + node, + clean_owner_name(&name), + UnitKind::Class, + source, + lines, + )) +} + +fn c_like_type_name(node: Node<'_>, source: &str) -> Option { + field_text(node, "name", source) + .map(clean_owner_name) + .or_else(|| first_descendant_text(node, source, &["type_identifier", "identifier"]).map(|text| clean_owner_name(&text))) +} + +fn declarator_name(node: Node<'_>, source: &str) -> Option { + if let Some(name) = field_text(node, "name", source) { + return Some(name.to_string()); + } + if matches!( + node.kind(), + "identifier" | "field_identifier" | "type_identifier" | "qualified_identifier" | "scoped_identifier" + ) { + return node.utf8_text(source.as_bytes()).ok().map(str::to_string); + } + if let Some(child) = node.child_by_field_name("declarator") { + return declarator_name(child, source); + } + first_descendant_text( + node, + source, + &[ + "field_identifier", + "identifier", + "qualified_identifier", + "scoped_identifier", + "type_identifier", + ], + ) +} + +fn first_descendant_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { + if kinds.contains(&node.kind()) { + return node.utf8_text(source.as_bytes()).ok().map(str::to_string); + } + for index in 0..node.named_child_count() { + if let Some(child) = node.named_child(index) { + if let Some(text) = first_descendant_text(child, source, kinds) { + return Some(text); + } + } + } + None +} + +fn last_descendant_text(node: Node<'_>, source: &str, kinds: &[&str]) -> Option { + let mut found = if kinds.contains(&node.kind()) { + node.utf8_text(source.as_bytes()).ok().map(str::to_string) + } else { + None + }; + for index in 0..node.named_child_count() { + if let Some(child) = node.named_child(index) { + if let Some(text) = last_descendant_text(child, source, kinds) { + found = Some(text); + } + } + } + found +} + fn rust_method_owner(node: Node<'_>, source: &str) -> Option { let impl_node = ancestor_kind(node, "impl_item")?; field_text(impl_node, "type", source).map(clean_owner_name) @@ -528,14 +1009,8 @@ fn ruby_python_def_rest(line: &str) -> Option<&str> { } } -fn detect_javascript(line: &str, line_number: u32) -> Option { - let line = line - .strip_prefix("export default ") - .unwrap_or(line) - .strip_prefix("export ") - .unwrap_or(line) - .strip_prefix("async ") - .unwrap_or(line); +fn detect_javascript_typescript(line: &str, line_number: u32) -> Option { + let line = strip_javascript_modifiers(line); if let Some(rest) = line.strip_prefix("function ") { return named_candidate(rest, UnitKind::Function, line, line_number); @@ -543,9 +1018,79 @@ fn detect_javascript(line: &str, line_number: u32) -> Option { if let Some(rest) = line.strip_prefix("class ") { return named_candidate(rest, UnitKind::Class, line, line_number); } + if let Some(rest) = line.strip_prefix("interface ") { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(rest) = line.strip_prefix("type ") { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(name) = javascript_const_callable_name(line) { + return Some(Candidate { + name: name.to_string(), + kind: UnitKind::Function, + signature: line.trim().to_string(), + line: line_number, + end_line: None, + }); + } None } +fn strip_javascript_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("export default ") + .or_else(|| line.strip_prefix("export ")) + .or_else(|| line.strip_prefix("declare ")) + .or_else(|| line.strip_prefix("abstract ")) + .or_else(|| line.strip_prefix("async ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + +fn javascript_const_callable_name(line: &str) -> Option<&str> { + let rest = line + .strip_prefix("const ") + .or_else(|| line.strip_prefix("let ")) + .or_else(|| line.strip_prefix("var "))?; + let name = javascript_identifier(rest)?; + let after_name = rest[name.len()..].trim_start(); + if after_name.starts_with('=') && after_name.contains("=>") { + return Some(name); + } + if after_name.starts_with(':') { + let type_annotation = after_name.split('=').next().unwrap_or(after_name); + if type_annotation.contains("=>") || type_annotation.contains('(') { + return Some(name); + } + } + None +} + +fn javascript_identifier(input: &str) -> Option<&str> { + let input = input.trim_start(); + let end = input + .char_indices() + .find_map(|(index, ch)| { + if ch.is_alphanumeric() || ch == '_' || ch == '$' { + None + } else { + Some(index) + } + }) + .unwrap_or(input.len()); + let ident = &input[..end]; + if ident.is_empty() { + None + } else { + Some(ident) + } +} + fn detect_lua(line: &str, line_number: u32) -> Option { let rest = line .strip_prefix("local function ") @@ -553,14 +1098,20 @@ fn detect_lua(line: &str, line_number: u32) -> Option { named_candidate(rest, UnitKind::Function, line, line_number) } -fn detect_c(line: &str, line_number: u32) -> Option { +fn detect_c_family(line: &str, line_number: u32) -> Option { + if let Some(rest) = c_family_type_rest(line) { + return named_candidate(rest, UnitKind::Class, line, line_number); + } if line.ends_with(';') || !line.contains('(') || !line.contains(')') || !line.contains('{') { return None; } let before_paren = line.split_once('(')?.0.trim_end(); let name = before_paren.split_whitespace().last()?; - if matches!(name, "if" | "for" | "while" | "switch" | "return" | "sizeof") { + if matches!( + name, + "if" | "for" | "while" | "switch" | "return" | "sizeof" | "catch" + ) { return None; } @@ -573,6 +1124,128 @@ fn detect_c(line: &str, line_number: u32) -> Option { }) } +fn c_family_type_rest(line: &str) -> Option<&str> { + let line = strip_c_family_modifiers(line); + line.strip_prefix("class ") + .or_else(|| line.strip_prefix("struct ")) + .or_else(|| line.strip_prefix("enum ")) + .or_else(|| line.strip_prefix("namespace ")) +} + +fn strip_c_family_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("template ") + .or_else(|| line.strip_prefix("export ")) + .or_else(|| line.strip_prefix("public ")) + .or_else(|| line.strip_prefix("private ")) + .or_else(|| line.strip_prefix("protected ")) + .or_else(|| line.strip_prefix("internal ")) + .or_else(|| line.strip_prefix("static ")) + .or_else(|| line.strip_prefix("inline ")) + .or_else(|| line.strip_prefix("constexpr ")) + .or_else(|| line.strip_prefix("sealed ")) + .or_else(|| line.strip_prefix("abstract ")) + .or_else(|| line.strip_prefix("partial ")) + .or_else(|| line.strip_prefix("readonly ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + +fn detect_csharp_java(line: &str, line_number: u32) -> Option { + let line = strip_c_family_modifiers(line); + if let Some(rest) = line + .strip_prefix("class ") + .or_else(|| line.strip_prefix("interface ")) + .or_else(|| line.strip_prefix("struct ")) + .or_else(|| line.strip_prefix("enum ")) + .or_else(|| line.strip_prefix("record ")) + { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + detect_c_family(line, line_number) +} + +fn detect_swift(line: &str, line_number: u32) -> Option { + let line = strip_swift_modifiers(line); + if let Some(rest) = line + .strip_prefix("class ") + .or_else(|| line.strip_prefix("struct ")) + .or_else(|| line.strip_prefix("enum ")) + .or_else(|| line.strip_prefix("protocol ")) + .or_else(|| line.strip_prefix("actor ")) + { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(rest) = line.strip_prefix("func ") { + return named_candidate(rest, UnitKind::Function, line, line_number); + } + None +} + +fn strip_swift_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("public ") + .or_else(|| line.strip_prefix("private ")) + .or_else(|| line.strip_prefix("fileprivate ")) + .or_else(|| line.strip_prefix("internal ")) + .or_else(|| line.strip_prefix("open ")) + .or_else(|| line.strip_prefix("final ")) + .or_else(|| line.strip_prefix("static ")) + .or_else(|| line.strip_prefix("mutating ")) + .or_else(|| line.strip_prefix("async ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + +fn detect_kotlin(line: &str, line_number: u32) -> Option { + let line = strip_kotlin_modifiers(line); + if let Some(rest) = line + .strip_prefix("class ") + .or_else(|| line.strip_prefix("interface ")) + .or_else(|| line.strip_prefix("object ")) + .or_else(|| line.strip_prefix("enum class ")) + .or_else(|| line.strip_prefix("data class ")) + .or_else(|| line.strip_prefix("sealed class ")) + { + return named_candidate(rest, UnitKind::Class, line, line_number); + } + if let Some(rest) = line.strip_prefix("fun ") { + return named_candidate(rest, UnitKind::Function, line, line_number); + } + None +} + +fn strip_kotlin_modifiers(mut line: &str) -> &str { + loop { + let next = line + .strip_prefix("public ") + .or_else(|| line.strip_prefix("private ")) + .or_else(|| line.strip_prefix("protected ")) + .or_else(|| line.strip_prefix("internal ")) + .or_else(|| line.strip_prefix("open ")) + .or_else(|| line.strip_prefix("final ")) + .or_else(|| line.strip_prefix("abstract ")) + .or_else(|| line.strip_prefix("suspend ")) + .or_else(|| line.strip_prefix("inline ")) + .or_else(|| line.strip_prefix("override ")) + .unwrap_or(line); + if next == line { + return line; + } + line = next; + } +} + fn detect_go(line: &str, line_number: u32) -> Option { let rest = line.strip_prefix("func ")?; if rest.starts_with('(') { @@ -677,7 +1350,7 @@ mod tests { assert_eq!(units.len(), 2); assert_eq!(units[0].name, "Worker"); assert_eq!(units[0].kind, UnitKind::Class); - assert_eq!(units[1].name, "run"); + assert_eq!(units[1].name, "Worker.run"); assert_eq!(units[1].kind, UnitKind::Function); } @@ -693,10 +1366,49 @@ mod tests { }; let extractor = HeuristicExtractor::default(); - assert_eq!(extractor.extract_units(&go)[0].name, "Run"); + assert_eq!(extractor.extract_units(&go)[0].name, "Worker.Run"); assert_eq!(extractor.extract_units(&zig)[0].name, "run"); } + #[test] + fn extracts_typescript_symbols_with_tree_sitter() { + let file = BlobFile { + path: "packages/zod/src/demo.ts".into(), + contents: r#" +export interface ParseContext { + async?: boolean; +} + +export type Result = { value: T }; + +export abstract class Parser { + abstract run(value: unknown): Result; +} + +export function parse(value: unknown): Result { + return { value }; +} + +export const safeParse: (value: unknown) => Result = (value) => { + return { value }; +}; +"# + .into(), + }; + + let units = HeuristicExtractor::default().extract_units(&file); + let names: Vec<_> = units + .iter() + .map(|unit| (unit.kind, unit.name.as_str())) + .collect(); + + assert!(names.contains(&(UnitKind::Class, "ParseContext"))); + assert!(names.contains(&(UnitKind::Class, "Result"))); + assert!(names.contains(&(UnitKind::Class, "Parser"))); + assert!(names.contains(&(UnitKind::Function, "parse"))); + assert!(names.contains(&(UnitKind::Function, "safeParse"))); + } + #[test] fn extracts_rust_symbols_with_tree_sitter() { let file = BlobFile { @@ -799,8 +1511,81 @@ pub fn StringMap(comptime Value: type) type { let units = HeuristicExtractor::default().extract_units(&file); let names: Vec<_> = units.iter().map(|unit| unit.name.as_str()).collect(); - assert!(names.contains(&"self.build!")); - assert!(names.contains(&"value=")); + assert!(names.contains(&"Worker.self.build!")); + assert!(names.contains(&"Worker.value=")); + } + + #[test] + fn tree_sitter_extraction_handles_nested_and_multiline_boundaries() { + let python = BlobFile { + path: "src/service.py".into(), + contents: r#" +class Worker: + def run( + self, + value: int, + ) -> int: + def normalize(next_value: int) -> int: + return next_value + 1 + return normalize(value) +"# + .into(), + }; + let typescript = BlobFile { + path: "src/service.ts".into(), + contents: r#" +export class Worker { + async run( + value: number, + ): Promise { + return value + 1; + } +} +"# + .into(), + }; + + let extractor = HeuristicExtractor::default(); + let python_names: Vec<_> = extractor + .extract_units(&python) + .into_iter() + .map(|unit| (unit.name, unit.start_line, unit.end_line)) + .collect(); + assert!(python_names.contains(&("Worker".to_string(), 2, 9))); + assert!(python_names.contains(&("Worker.run".to_string(), 3, 9))); + assert!(python_names.contains(&("Worker.run.normalize".to_string(), 7, 8))); + + let typescript_names: Vec<_> = extractor + .extract_units(&typescript) + .into_iter() + .map(|unit| (unit.name, unit.start_line, unit.end_line)) + .collect(); + assert!(typescript_names.contains(&("Worker".to_string(), 2, 8))); + assert!(typescript_names.contains(&("Worker.run".to_string(), 3, 7))); + } + + #[test] + fn tree_sitter_extraction_ignores_strings_comments_and_parse_errors() { + let ruby = BlobFile { + path: "src/demo.rb".into(), + contents: "class Real\n TEXT = \"def fake\\nend\"\n # def also_fake\n def run\n end\nend\n".into(), + }; + let invalid_go = BlobFile { + path: "broken.go".into(), + contents: "func RegexWouldHaveMatched() {\n".into(), + }; + + let extractor = HeuristicExtractor::default(); + let ruby_names: Vec<_> = extractor + .extract_units(&ruby) + .into_iter() + .map(|unit| unit.name) + .collect(); + assert!(ruby_names.contains(&"Real".to_string())); + assert!(ruby_names.contains(&"Real.run".to_string())); + assert!(!ruby_names.contains(&"fake".to_string())); + assert!(!ruby_names.contains(&"also_fake".to_string())); + assert!(extractor.extract_units(&invalid_go).is_empty()); } #[test] @@ -811,6 +1596,13 @@ pub fn StringMap(comptime Value: type) type { assert!(filter.supports_path("zig/main.zig")); assert!(filter.supports_path("src/vm.S")); assert!(filter.supports_path("src/main.c")); + assert!(filter.supports_path("src/main.h")); + assert!(filter.supports_path("src/main.cpp")); + assert!(filter.supports_path("src/main.hpp")); + assert!(filter.supports_path("src/Program.cs")); + assert!(filter.supports_path("src/Main.java")); + assert!(filter.supports_path("Sources/App.swift")); + assert!(filter.supports_path("src/main.kt")); assert!(filter.supports_path("gems/lineage/src/ui.rs")); assert!(filter.supports_path("script/tool.lua")); assert!(!filter.supports_path("benchmarks/x/bench.profile/transpiled.zig")); @@ -862,6 +1654,46 @@ pub fn StringMap(comptime Value: type) type { assert_eq!(extractor.extract_units(&asm)[0].name, "boot"); } + #[test] + fn extracts_c_family_and_managed_language_units() { + let extractor = HeuristicExtractor::default(); + let cpp = BlobFile { + path: "include/demo.hpp".into(), + contents: "class Parser {\n};\nstatic int parse_value(int x) { return x; }\n".into(), + }; + let csharp = BlobFile { + path: "src/Program.cs".into(), + contents: "public sealed class Program {}\nprivate static int Run(int x) { return x; }\n".into(), + }; + let java = BlobFile { + path: "src/Main.java".into(), + contents: "public interface Handler {}\npublic int handle(int x) { return x; }\n".into(), + }; + let swift = BlobFile { + path: "Sources/App.swift".into(), + contents: "public struct App {}\npublic func run(_ x: Int) -> Int { x }\n".into(), + }; + let kotlin = BlobFile { + path: "src/main.kt".into(), + contents: "data class Box(val value: Int)\nsuspend fun run(value: Int): Int = value\n".into(), + }; + + let cpp_names: Vec<_> = extractor + .extract_units(&cpp) + .into_iter() + .map(|unit| (unit.kind, unit.name)) + .collect(); + assert!(cpp_names.contains(&(UnitKind::Class, "Parser".to_string()))); + assert!(cpp_names.contains(&(UnitKind::Function, "parse_value".to_string()))); + + assert_eq!(extractor.extract_units(&csharp)[0].name, "Program"); + assert_eq!(extractor.extract_units(&java)[0].name, "Handler"); + assert_eq!(extractor.extract_units(&swift)[0].name, "App"); + assert_eq!(extractor.extract_units(&swift)[1].name, "run"); + assert_eq!(extractor.extract_units(&kotlin)[0].name, "Box"); + assert_eq!(extractor.extract_units(&kotlin)[1].name, "run"); + } + #[test] fn gives_same_named_units_distinct_ordinals_and_ids() { let file = BlobFile { diff --git a/gems/lineage/src/hazard.rs b/gems/lineage/src/hazard.rs index 56d4f5f7b..cf6423bbd 100644 --- a/gems/lineage/src/hazard.rs +++ b/gems/lineage/src/hazard.rs @@ -32,6 +32,11 @@ pub fn ingest_hazards( ) -> Result { match provider { "zig" => ingest_zig_hazards(storage, repo.as_ref(), commit, timestamp), + "go" => ingest_go_hazards(storage, repo.as_ref(), commit, timestamp), + "rust" => ingest_rust_hazards(storage, repo.as_ref(), commit, timestamp), + "c" => ingest_c_hazards(storage, repo.as_ref(), commit, timestamp), + "cpp" => ingest_cpp_hazards(storage, repo.as_ref(), commit, timestamp), + "csharp" => ingest_csharp_hazards(storage, repo.as_ref(), commit, timestamp), other => anyhow::bail!("unsupported hazard provider {other:?}"), } } @@ -41,6 +46,71 @@ fn ingest_zig_hazards( repo: &Path, commit: &str, timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "zig", zig_source_files, scan_zig_sites) +} + +fn ingest_go_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "go", go_source_files, scan_go_sites) +} + +fn ingest_rust_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "rust", rust_source_files, scan_rust_sites) +} + +fn ingest_c_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "c", c_source_files, scan_c_sites) +} + +fn ingest_cpp_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards(storage, repo, commit, timestamp, "cpp", cpp_source_files, scan_cpp_sites) +} + +fn ingest_csharp_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, +) -> Result { + ingest_language_hazards( + storage, + repo, + commit, + timestamp, + "csharp", + csharp_source_files, + scan_csharp_sites, + ) +} + +fn ingest_language_hazards( + storage: &Storage, + repo: &Path, + commit: &str, + timestamp: Option, + language: &str, + source_files: fn(&Path) -> Result>, + scan_sites: fn(&str, &str) -> Vec, ) -> Result { let repo = repo .canonicalize() @@ -48,7 +118,7 @@ fn ingest_zig_hazards( let timestamp = timestamp .or_else(|| storage.commit_timestamp(commit).ok().flatten()) .unwrap_or_else(now_timestamp); - let files = zig_source_files(&repo)?; + let files = source_files(&repo)?; let extractor = HeuristicExtractor::default(); let mut stats = HazardIngestStats { scanned_files: files.len(), @@ -57,7 +127,7 @@ fn ingest_zig_hazards( }; storage.begin_transaction()?; - storage.deactivate_active_hazards("zig")?; + storage.deactivate_active_hazards(language)?; for path in files { let abs = repo.join(&path); let contents = fs::read_to_string(&abs) @@ -67,7 +137,7 @@ fn ingest_zig_hazards( contents: contents.clone(), }; let units = extractor.extract_units(&blob); - for site in scan_zig_sites(&path, &contents) { + for site in scan_sites(&path, &contents) { stats.hazards += 1; let unit = unit_for_site(&blob, &units, site.line); let resolved_id = storage @@ -78,7 +148,7 @@ fn ingest_zig_hazards( } storage.insert_hazard_event(&HazardEvent { unit_id: resolved_id, - language: "zig".into(), + language: language.into(), hazard_type: site.hazard_type.clone(), required_evidence: site.required_evidence.clone(), path: site.path.clone(), @@ -88,7 +158,7 @@ fn ingest_zig_hazards( detected_at_hash: commit.to_string(), is_active: true, payload_json: json!({ - "provider": "zig", + "provider": language, "source": site.source, "timestamp": timestamp }) @@ -111,6 +181,83 @@ fn zig_source_files(repo: &Path) -> Result> { Ok(files) } +fn go_source_files(repo: &Path) -> Result> { + let mut files = Vec::new(); + collect_go_files(repo, Path::new(""), &mut files)?; + files.sort(); + files.dedup(); + Ok(files) +} + +fn rust_source_files(repo: &Path) -> Result> { + collect_language_files(repo, rust_source_path) +} + +fn c_source_files(repo: &Path) -> Result> { + collect_language_files(repo, c_source_path) +} + +fn cpp_source_files(repo: &Path) -> Result> { + collect_language_files(repo, cpp_source_path) +} + +fn csharp_source_files(repo: &Path) -> Result> { + collect_language_files(repo, csharp_source_path) +} + +fn collect_language_files(repo: &Path, source_path: fn(&str) -> bool) -> Result> { + let mut files = Vec::new(); + collect_matching_files(repo, Path::new(""), &mut files, source_path)?; + files.sort(); + files.dedup(); + Ok(files) +} + +fn collect_matching_files( + repo: &Path, + rel_dir: &Path, + out: &mut Vec, + source_path: fn(&str) -> bool, +) -> Result<()> { + let abs = repo.join(rel_dir); + if !abs.is_dir() { + return Ok(()); + } + for entry in fs::read_dir(&abs)? { + let entry = entry?; + let path = entry.path(); + let rel = rel_path(repo, &path)?; + if path.is_dir() { + if !excluded_common_dir(&rel) { + collect_matching_files(repo, Path::new(&rel), out, source_path)?; + } + } else if source_path(&rel) { + out.push(rel); + } + } + Ok(()) +} + +fn collect_go_files(repo: &Path, rel_dir: &Path, out: &mut Vec) -> Result<()> { + let abs = repo.join(rel_dir); + if !abs.is_dir() { + return Ok(()); + } + for entry in fs::read_dir(&abs)? { + let entry = entry?; + let path = entry.path(); + let rel = rel_path(repo, &path)?; + if path.is_dir() { + if !excluded_go_dir(&rel) { + collect_go_files(repo, Path::new(&rel), out)?; + } + } else if rel.ends_with(".go") && !excluded_go_file(&rel) { + out.push(rel); + } + } + Ok(()) +} + fn collect_zig_files(repo: &Path, rel_dir: &Path, out: &mut Vec) -> Result<()> { let abs = repo.join(rel_dir); if !abs.is_dir() { @@ -129,6 +276,61 @@ fn collect_zig_files(repo: &Path, rel_dir: &Path, out: &mut Vec) -> Resu Ok(()) } +fn excluded_go_dir(path: &str) -> bool { + let name = path.rsplit('/').next().unwrap_or(path); + matches!(name, ".git" | "vendor" | "testdata" | "node_modules" | "tmp" | "dist") + || name.starts_with('.') +} + +fn excluded_common_dir(path: &str) -> bool { + let name = path.rsplit('/').next().unwrap_or(path); + matches!( + name, + ".git" + | "vendor" + | "third_party" + | "node_modules" + | "tmp" + | "dist" + | "build" + | "target" + | "bin" + | "obj" + | "packages" + | "cmake-build-debug" + | "cmake-build-release" + | "tests" + | "test" + | "benches" + | "examples" + ) || name.starts_with('.') +} + +fn excluded_go_file(path: &str) -> bool { + let Some(name) = path.rsplit('/').next() else { + return true; + }; + name.ends_with("_test.go") +} + +fn rust_source_path(path: &str) -> bool { + path.ends_with(".rs") +} + +fn c_source_path(path: &str) -> bool { + path.ends_with(".c") || path.ends_with(".h") +} + +fn cpp_source_path(path: &str) -> bool { + [".cc", ".cpp", ".cxx", ".hh", ".hpp", ".hxx"] + .iter() + .any(|suffix| path.ends_with(suffix)) +} + +fn csharp_source_path(path: &str) -> bool { + path.ends_with(".cs") +} + fn excluded_zig_file(path: &str) -> bool { let Some(name) = path.rsplit('/').next() else { return true; @@ -201,6 +403,151 @@ fn scan_zig_sites(path: &str, contents: &str) -> Vec { sites } +fn scan_go_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_go_comment(line, &mut in_block_comment); + if code.trim().is_empty() { + continue; + } + if is_go_goroutine_site(&code) { + sites.push(site(path, line_no, line, "go_race_goroutine", "race")); + } + if is_go_atomic_site(&code) { + sites.push(site(path, line_no, line, "go_race_atomic", "race")); + } + if is_go_lock_site(&code) { + sites.push(site(path, line_no, line, "go_race_lock", "race")); + } + if is_go_waitgroup_site(&code) { + sites.push(site(path, line_no, line, "go_concurrency_waitgroup", "concurrency")); + } + if is_go_channel_site(&code) { + sites.push(site(path, line_no, line, "go_concurrency_channel", "concurrency")); + } + } + sites +} + +fn scan_rust_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + let mut unsafe_depth = 0_i32; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_rust_atomic_site(&code) { + sites.push(site(path, line_no, line, "rust_loom_atomic", "loom")); + } + if is_rust_concurrency_site(&code) { + sites.push(site(path, line_no, line, "rust_loom_concurrency", "loom")); + } + if code.contains("unsafe fn ") || code.contains("unsafe fn(") { + sites.push(site(path, line_no, line, "rust_unsafe_fn", "miri")); + } + if code.contains("unsafe impl ") { + sites.push(site(path, line_no, line, "rust_unsafe_impl", "miri")); + } + let starts_unsafe = code.contains("unsafe {"); + if starts_unsafe { + sites.push(site(path, line_no, line, "rust_unsafe_block", "miri")); + } + if (unsafe_depth > 0 || starts_unsafe) && is_rust_unsafe_operation(&code) { + sites.push(site(path, line_no, line, "rust_unsafe_operation", "miri")); + } + unsafe_depth = update_unsafe_depth(&code, unsafe_depth); + } + sites +} + +fn scan_c_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_c_tsan_site(&code) { + sites.push(site(path, line_no, line, "c_tsan_concurrency", "tsan")); + } + if is_c_asan_api_site(&code) { + sites.push(site(path, line_no, line, "c_asan_raw_memory_api", "asan")); + } + if is_c_pointer_hazard(&code) { + sites.push(site(path, line_no, line, "c_asan_pointer", "asan")); + } + if is_c_lsan_site(&code) { + sites.push(site(path, line_no, line, "c_lsan_lifetime", "lsan")); + } + if is_arithmetic_ub_site(&code) { + sites.push(site(path, line_no, line, "c_ubsan_arithmetic", "ubsan")); + } + if is_c_cast_ub_site(&code) { + sites.push(site(path, line_no, line, "c_ubsan_cast", "ubsan")); + } + } + sites +} + +fn scan_cpp_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_cpp_tsan_site(&code) { + sites.push(site(path, line_no, line, "cpp_tsan_concurrency", "tsan")); + } + if is_cpp_asan_api_site(&code) { + sites.push(site(path, line_no, line, "cpp_asan_raw_memory_api", "asan")); + } + if is_cpp_pointer_or_cast_hazard(&code) { + sites.push(site(path, line_no, line, "cpp_asan_pointer_or_cast", "asan")); + } + if is_cpp_lsan_site(&code) { + sites.push(site(path, line_no, line, "cpp_lsan_lifetime", "lsan")); + } + if is_arithmetic_ub_site(&code) { + sites.push(site(path, line_no, line, "cpp_ubsan_arithmetic", "ubsan")); + } + if contains_any(&code, &["reinterpret_cast<", "const_cast<", "static_cast<"]) { + sites.push(site(path, line_no, line, "cpp_ubsan_cast", "ubsan")); + } + } + sites +} + +fn scan_csharp_sites(path: &str, contents: &str) -> Vec { + let mut sites = Vec::new(); + let mut in_block_comment = false; + let mut unsafe_depth = 0_i32; + for (index, line) in contents.lines().enumerate() { + let line_no = (index + 1) as u32; + let code = strip_quoted_literals(&strip_go_comment(line, &mut in_block_comment)); + if code.trim().is_empty() { + continue; + } + if is_csharp_concurrency_site(&code) { + sites.push(site(path, line_no, line, "csharp_concurrency", "concurrency")); + } + if is_csharp_unsafe_site(&code, unsafe_depth) { + sites.push(site(path, line_no, line, "csharp_unsafe_memory", "unsafe")); + } + unsafe_depth = update_csharp_unsafe_depth(&code, unsafe_depth); + } + sites +} + fn site( path: &str, line: u32, @@ -217,6 +564,377 @@ fn site( } } +fn is_go_goroutine_site(code: &str) -> bool { + code.trim_start().starts_with("go ") || code.contains("; go ") +} + +fn is_go_atomic_site(code: &str) -> bool { + code.contains("atomic.") +} + +fn is_go_lock_site(code: &str) -> bool { + [ + "sync.Mutex", + "sync.RWMutex", + "sync.Map", + "sync.Once", + "sync.Cond", + ".Lock(", + ".Unlock(", + ".RLock(", + ".RUnlock(", + ] + .iter() + .any(|needle| code.contains(needle)) +} + +fn is_go_waitgroup_site(code: &str) -> bool { + ["sync.WaitGroup", ".Add(", ".Done(", ".Wait("] + .iter() + .any(|needle| code.contains(needle)) +} + +fn is_go_channel_site(code: &str) -> bool { + code.contains("make(chan") + || code.contains("select {") + || code.contains("<-") +} + +fn contains_any(code: &str, needles: &[&str]) -> bool { + needles.iter().any(|needle| code.contains(needle)) +} + +fn is_rust_atomic_site(code: &str) -> bool { + contains_any( + code, + &[ + "std::sync::atomic", + "core::sync::atomic", + "Ordering::", + ".load(", + ".store(", + ".swap(", + ".compare_exchange(", + ".compare_exchange_weak(", + ".fetch_add(", + ".fetch_sub(", + ".fetch_or(", + ".fetch_and(", + ".fetch_xor(", + ".fetch_update(", + "fence(", + "AtomicBool", + "AtomicI", + "AtomicU", + "AtomicPtr", + ], + ) +} + +fn is_rust_concurrency_site(code: &str) -> bool { + contains_any( + code, + &[ + "thread::spawn", + "std::thread::spawn", + "std::sync::Mutex", + "std::sync::RwLock", + "std::sync::Condvar", + "std::sync::Arc", + "Arc<", + "Mutex<", + "RwLock<", + "Condvar", + "mpsc::", + "crossbeam::channel", + ".lock(", + ".try_lock(", + ], + ) +} + +fn is_rust_unsafe_operation(code: &str) -> bool { + contains_any( + code, + &[ + "std::ptr::", + "core::ptr::", + "ptr::read", + "ptr::write", + "ptr::copy", + "copy_nonoverlapping", + "from_raw", + "into_raw", + "get_unchecked", + "get_unchecked_mut", + "unwrap_unchecked", + "transmute", + "assume_init", + "MaybeUninit", + "addr_of!", + "asm!", + ".add(", + ".offset(", + ".read(", + ".write(", + ".copy_to(", + ".copy_from(", + ], + ) || pointer_deref_site(code) +} + +fn update_unsafe_depth(code: &str, unsafe_depth: i32) -> i32 { + let relevant = if unsafe_depth > 0 { + code + } else if let Some(index) = code.find("unsafe {") { + &code[index..] + } else { + "" + }; + if relevant.is_empty() { + return unsafe_depth; + } + (unsafe_depth + brace_delta(relevant)).max(0) +} + +fn is_c_tsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "_Atomic", + "atomic_", + "__atomic_", + "__sync_", + "pthread_create", + "pthread_mutex_", + "pthread_rwlock_", + "pthread_cond_", + "pthread_spin_", + "pthread_barrier_", + "mtx_", + "cnd_", + "thrd_create", + ], + ) +} + +fn is_c_asan_api_site(code: &str) -> bool { + contains_any( + code, + &[ + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "vsprintf(", + "vsnprintf(", + "gets(", + "scanf(", + "sscanf(", + "fscanf(", + "alloca(", + ], + ) +} + +fn is_c_lsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "malloc(", + "calloc(", + "realloc(", + "aligned_alloc(", + "posix_memalign(", + "strdup(", + "strndup(", + "free(", + ], + ) +} + +fn is_c_pointer_hazard(code: &str) -> bool { + code.contains("->") || pointer_deref_site(code) +} + +fn is_c_cast_ub_site(code: &str) -> bool { + contains_any( + code, + &[ + "(intptr_t)", + "(uintptr_t)", + "(size_t)", + "(ssize_t)", + "(int)", + "(long)", + "(short)", + "(char)", + "(void *)", + "(char *)", + "(int *)", + "(long *)", + ], + ) +} + +fn is_cpp_tsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "std::thread", + "std::jthread", + "std::async", + "std::atomic", + "std::mutex", + "std::shared_mutex", + "std::recursive_mutex", + "std::condition_variable", + "std::lock_guard", + "std::unique_lock", + "std::scoped_lock", + "std::call_once", + ".lock(", + ".try_lock(", + ".unlock(", + ], + ) +} + +fn is_cpp_asan_api_site(code: &str) -> bool { + contains_any( + code, + &[ + "std::memcpy(", + "std::memmove(", + "std::memset(", + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "std::span<", + "std::string_view", + ], + ) +} + +fn is_cpp_lsan_site(code: &str) -> bool { + contains_any( + code, + &[ + "malloc(", + "calloc(", + "realloc(", + "free(", + "std::malloc(", + "std::calloc(", + "std::realloc(", + "std::free(", + "new ", + "new[]", + "delete ", + "delete[]", + ], + ) +} + +fn is_cpp_pointer_or_cast_hazard(code: &str) -> bool { + code.contains("->") + || pointer_deref_site(code) + || contains_any(code, &["reinterpret_cast<", "const_cast<"]) +} + +fn is_arithmetic_ub_site(code: &str) -> bool { + contains_any(code, &[" / ", " % ", "<<", ">>"]) +} + +fn is_csharp_concurrency_site(code: &str) -> bool { + contains_any( + code, + &[ + "Task.Run", + "Task.Factory.StartNew", + "new Thread", + "ThreadPool.", + "Parallel.", + "lock (", + "lock(", + "Monitor.", + "Interlocked.", + "Volatile.", + "ConcurrentDictionary", + "ConcurrentQueue", + "ConcurrentBag", + "BlockingCollection", + "SemaphoreSlim", + "Mutex", + "ReaderWriterLockSlim", + "SpinLock", + ], + ) +} + +fn is_csharp_unsafe_site(code: &str, unsafe_depth: i32) -> bool { + (unsafe_depth > 0 && (code.contains("->") || pointer_deref_site(code))) + || contains_any( + code, + &[ + "unsafe", + "fixed (", + "fixed(", + "stackalloc", + "Marshal.", + "IntPtr", + "UIntPtr", + "GCHandle", + "Unsafe.", + "MemoryMarshal.", + "byte*", + "char*", + "int*", + "long*", + "void*", + ], + ) +} + +fn update_csharp_unsafe_depth(code: &str, unsafe_depth: i32) -> i32 { + let relevant = if unsafe_depth > 0 { + code + } else if let Some(index) = code.find("unsafe {") { + &code[index..] + } else { + "" + }; + if relevant.is_empty() { + return unsafe_depth; + } + (unsafe_depth + brace_delta(relevant)).max(0) +} + +fn pointer_deref_site(code: &str) -> bool { + let trimmed = code.trim_start(); + trimmed.starts_with('*') + || contains_any(code, &["= *", "=*", "return *", "(*", ", *", "[*"]) +} + +fn brace_delta(code: &str) -> i32 { + code.chars().fold(0_i32, |total, ch| match ch { + '{' => total + 1, + '}' => total - 1, + _ => total, + }) +} + fn is_atomic_site(code: &str) -> bool { code.contains("@atomic") || code.contains("@cmpxchg") @@ -342,6 +1060,70 @@ fn strip_zig_comment(line: &str) -> &str { line.split_once("//").map(|(code, _)| code).unwrap_or(line) } +fn strip_go_comment(line: &str, in_block_comment: &mut bool) -> String { + let mut out = String::new(); + let mut rest = line; + loop { + if *in_block_comment { + let Some((_, after)) = rest.split_once("*/") else { + return out; + }; + *in_block_comment = false; + rest = after; + continue; + } + let block = rest.find("/*"); + let line_comment = rest.find("//"); + match (block, line_comment) { + (Some(block), Some(comment)) if comment < block => { + out.push_str(&rest[..comment]); + return out; + } + (Some(block), _) => { + out.push_str(&rest[..block]); + rest = &rest[block + 2..]; + *in_block_comment = true; + } + (_, Some(comment)) => { + out.push_str(&rest[..comment]); + return out; + } + (None, None) => { + out.push_str(rest); + return out; + } + } + } +} + +fn strip_quoted_literals(line: &str) -> String { + let mut out = String::with_capacity(line.len()); + let mut chars = line.chars().peekable(); + while let Some(ch) = chars.next() { + if ch == '"' || ch == '\'' { + let quote = ch; + out.push_str("\"\""); + let mut escaped = false; + for inner in chars.by_ref() { + if escaped { + escaped = false; + continue; + } + if inner == '\\' { + escaped = true; + continue; + } + if inner == quote { + break; + } + } + } else { + out.push(ch); + } + } + out +} + fn unit_for_site(blob: &BlobFile, units: &[LogicalUnit], line: u32) -> LogicalUnit { units .iter() @@ -398,4 +1180,99 @@ mod tests { assert_eq!(stats.hazards, 2); assert_eq!(storage.count_rows("unit_hazards").unwrap(), 2); } + + #[test] + fn ingests_go_concurrency_hazards_for_current_snapshot() { + let dir = tempdir().unwrap(); + fs::write( + dir.path().join("worker.go"), + "package demo\n\nimport \"sync/atomic\"\n\nfunc run(ch chan int) {\n go func() { ch <- 1 }()\n value := atomic.LoadInt64(&counter)\n _ = value\n}\n", + ) + .unwrap(); + fs::write( + dir.path().join("worker_test.go"), + "package demo\n\nfunc TestRun() { go run(nil) }\n", + ) + .unwrap(); + let storage = Storage::open_memory().unwrap(); + + let stats = ingest_hazards(&storage, dir.path(), "go", "abc", Some(10)).unwrap(); + + assert_eq!(stats.scanned_files, 1); + assert_eq!(stats.hazards, 3); + assert_eq!(storage.count_rows("unit_hazards").unwrap(), 3); + } + + #[test] + fn ingests_rust_loom_and_unsafe_hazards_for_current_snapshot() { + let dir = tempdir().unwrap(); + fs::create_dir_all(dir.path().join("src")).unwrap(); + fs::write( + dir.path().join("src/lib.rs"), + "use std::sync::atomic::{AtomicUsize, Ordering};\n\npub fn run(ptr: *const u8) -> usize {\n let value = AtomicUsize::new(0);\n value.fetch_add(1, Ordering::SeqCst);\n unsafe {\n ptr.add(1).read()\n }\n}\n", + ) + .unwrap(); + let storage = Storage::open_memory().unwrap(); + + let stats = ingest_hazards(&storage, dir.path(), "rust", "abc", Some(10)).unwrap(); + + assert_eq!(stats.scanned_files, 1); + assert_eq!(stats.hazards, 5); + assert_eq!(storage.count_rows("unit_hazards").unwrap(), 5); + } + + #[test] + fn system_hazard_scans_cover_c_cpp_and_csharp_categories() { + let c_types = hazard_types(scan_c_sites( + "runtime.c", + "void run(char *dst, char *src, int n) {\n pthread_mutex_lock(&lock);\n char *buf = malloc(32);\n memcpy(dst, src, n);\n int shifted = n << src[0];\n free(buf);\n}\n", + )); + assert!(c_types.contains(&"c_tsan_concurrency".to_string())); + assert!(c_types.contains(&"c_asan_raw_memory_api".to_string())); + assert!(c_types.contains(&"c_lsan_lifetime".to_string())); + assert!(c_types.contains(&"c_ubsan_arithmetic".to_string())); + + let cpp_types = hazard_types(scan_cpp_sites( + "runtime.cpp", + "void run(char *dst, char *src, int n) {\n std::atomic ready;\n auto *buf = new char[32];\n std::memcpy(dst, src, n);\n auto raw = reinterpret_cast(dst);\n auto shifted = n << raw[0];\n delete[] buf;\n}\n", + )); + assert!(cpp_types.contains(&"cpp_tsan_concurrency".to_string())); + assert!(cpp_types.contains(&"cpp_asan_raw_memory_api".to_string())); + assert!(cpp_types.contains(&"cpp_asan_pointer_or_cast".to_string())); + assert!(cpp_types.contains(&"cpp_lsan_lifetime".to_string())); + assert!(cpp_types.contains(&"cpp_ubsan_cast".to_string())); + assert!(cpp_types.contains(&"cpp_ubsan_arithmetic".to_string())); + + let csharp_types = hazard_types(scan_csharp_sites( + "Worker.cs", + "public unsafe class Worker {\n public void Run(byte* ptr) {\n Task.Run(() => {});\n fixed (byte* p = buffer) {\n *p = 1;\n }\n }\n}\n", + )); + assert!(csharp_types.contains(&"csharp_concurrency".to_string())); + assert!(csharp_types.contains(&"csharp_unsafe_memory".to_string())); + } + + #[test] + fn go_hazard_scan_ignores_comments() { + let sites = scan_go_sites( + "demo.go", + "package demo\n\nfunc run() {\n // go func() {}()\n /* atomic.AddInt64(&x, 1) */\n ch <- 1\n}\n", + ); + + assert_eq!(sites.len(), 1); + assert_eq!(sites[0].hazard_type, "go_concurrency_channel"); + } + + #[test] + fn systems_hazard_scans_ignore_comments_and_strings() { + let sites = scan_c_sites( + "runtime.c", + "void run(void) {\n // pthread_mutex_lock(&lock);\n const char *s = \"memcpy(dst, src, n)\";\n}\n", + ); + + assert!(sites.is_empty()); + } + + fn hazard_types(sites: Vec) -> Vec { + sites.into_iter().map(|site| site.hazard_type).collect() + } } diff --git a/gems/lineage/src/lsp.rs b/gems/lineage/src/lsp.rs index cf7a904f6..4dc7e3f12 100644 --- a/gems/lineage/src/lsp.rs +++ b/gems/lineage/src/lsp.rs @@ -555,6 +555,8 @@ mod tests { distinct_tests: 2, mutant_verified_tests: 1, mutant_killed_tests: 1, + stochastic_mutant_verified_tests: 1, + invariant_mutant_verified_tests: 0, line_hits: Some(4), line_coverage: None, mutant_coverage: None, diff --git a/gems/lineage/src/mutant.rs b/gems/lineage/src/mutant.rs index 56daba1d0..a04342f3e 100644 --- a/gems/lineage/src/mutant.rs +++ b/gems/lineage/src/mutant.rs @@ -248,7 +248,7 @@ fn matching_units<'a>(units: &'a [LogicalUnit], fact: &MutantFact) -> Vec<&'a Lo let aliases = method_aliases(&fact.method); units .iter() - .filter(|unit| aliases.iter().any(|alias| alias == &unit.name)) + .filter(|unit| unit_matches_aliases(unit, &aliases)) .collect() } @@ -281,7 +281,7 @@ fn fallback_matching_unit_entries( let aliases = method_aliases(&fact.method); units .iter() - .filter(|unit| aliases.iter().any(|alias| alias == &unit.name)) + .filter(|unit| unit_matches_aliases(unit, &aliases)) .collect::>() }; for unit in path_matches { @@ -316,7 +316,7 @@ fn fallback_owner_mentioned_function_entries( continue; } for unit in units { - if unit.kind.as_str() != "function" || !aliases.iter().any(|alias| alias == &unit.name) { + if unit.kind.as_str() != "function" || !unit_matches_aliases(unit, &aliases) { continue; } if !owner_needles.iter().any(|needle| { @@ -346,8 +346,7 @@ fn fallback_unique_source_function_entry( continue; } for unit in units { - if unit.kind.as_str() == "function" && aliases.iter().any(|alias| alias == &unit.name) - { + if unit.kind.as_str() == "function" && unit_matches_aliases(unit, &aliases) { candidates.push(UnitMatch { path: path.clone(), unit: unit.clone(), @@ -412,6 +411,13 @@ fn owner_text_needles(owner: &str) -> Vec { needles } +fn unit_matches_aliases(unit: &LogicalUnit, aliases: &[String]) -> bool { + aliases.iter().any(|alias| { + unit.name == *alias + || (!alias.contains('.') && !alias.contains('#') && unit.name.ends_with(&format!(".{alias}"))) + }) +} + fn method_aliases(method: &str) -> Vec { let raw = method.trim().trim_end_matches('*'); let mut aliases = vec![raw.to_string()]; @@ -706,7 +712,7 @@ mod tests { assert_eq!(stats.facts, 1); assert_eq!(stats.units, 1); assert_eq!(stats.quality_events, 1); - assert_eq!(stats.exposure_events, 4); + assert_eq!(stats.exposure_events, 3); let killed: i64 = storage .connection() .query_row( @@ -715,7 +721,7 @@ mod tests { |row| row.get(0), ) .unwrap(); - assert_eq!(killed, 4); + assert_eq!(killed, 3); } #[test] diff --git a/gems/lineage/src/quality.rs b/gems/lineage/src/quality.rs index 4697eb6b0..41afe0f39 100644 --- a/gems/lineage/src/quality.rs +++ b/gems/lineage/src/quality.rs @@ -238,8 +238,8 @@ fn record_from_codecov_node(node: &Value) -> Option { let line_coverage = node .get("totals") .and_then(|totals| totals.get("coverage")) - .and_then(Value::as_f64) - .or_else(|| node.get("coverage").and_then(Value::as_f64)); + .and_then(finite_json_f64) + .or_else(|| node.get("coverage").and_then(finite_json_f64)); Some(CoverageRecord { path, @@ -359,7 +359,12 @@ fn record_from_generic_node(node: &Value) -> Option { } fn metric_value(node: &Value, keys: &[&str]) -> Option { - keys.iter().find_map(|key| node.get(*key).and_then(Value::as_f64)) + keys.iter() + .find_map(|key| node.get(*key).and_then(finite_json_f64)) +} + +fn finite_json_f64(value: &Value) -> Option { + value.as_f64().filter(|number| number.is_finite()) } fn line_hits_from_generic_node(node: &Value) -> Vec { @@ -436,6 +441,7 @@ fn parse_cobertura_records(input: &str) -> Result> { class .attribute("line-rate") .and_then(|value| value.parse::().ok()) + .filter(|value| value.is_finite()) .map(|value| value * 100.0) } else { let covered = line_hits.iter().filter(|hit| hit.hits > 0).count(); @@ -538,6 +544,9 @@ fn record_metric( let Some(new_value) = value else { return Ok(0); }; + if !new_value.is_finite() { + return Ok(0); + } let recorded = storage.record_quality_metric(&QualityEvent { unit_id: unit_id.to_string(), commit_hash: commit_hash.to_string(), @@ -643,6 +652,27 @@ mod tests { assert_eq!(records[0].line_hits[1], CoverageLineHit { line: 2, hits: 0 }); } + #[test] + fn ignores_non_finite_cobertura_line_rate() { + let payload = r#" + + + + + + + + + "#; + + let records = parse_coverage_input(payload, "cobertura").unwrap(); + + assert_eq!(records.len(), 1); + assert_eq!(records[0].path, "src/generated.go"); + assert_eq!(records[0].line_coverage, None); + assert!(records[0].line_hits.is_empty()); + } + #[test] fn parses_simplecov_resultset_line_hits() { let value = json!({ diff --git a/gems/lineage/src/storage.rs b/gems/lineage/src/storage.rs index 725f7e6c4..6c6b9a169 100644 --- a/gems/lineage/src/storage.rs +++ b/gems/lineage/src/storage.rs @@ -1433,6 +1433,7 @@ impl Storage { line_exposure AS ( SELECT e.path, e.line, + l.hits, COUNT(DISTINCT CASE WHEN e.is_verified = 1 THEN e.test_type END) AS verified_test_types, MAX(CASE WHEN e.is_verified = 1 AND e.is_mutation_verified = 1 THEN 1 ELSE 0 END) AS mutant_verified, MAX(CASE WHEN e.is_verified = 1 AND e.is_mutation_killed = 1 THEN 1 ELSE 0 END) AS mutant_killed, @@ -1475,7 +1476,7 @@ impl Storage { SUM(stochastic_mutant_killed) AS stochastic_mutant_killed_covered_lines, SUM(invariant_mutant_verified) AS invariant_mutant_verified_covered_lines, SUM(invariant_mutant_killed) AS invariant_mutant_killed_covered_lines, - SUM(CASE WHEN verified_test_types >= 2 THEN 1 ELSE 0 END) AS multi_type_covered_lines + SUM(CASE WHEN verified_test_types >= 2 OR hits > 1 THEN 1 ELSE 0 END) AS multi_type_covered_lines FROM line_exposure GROUP BY path ), diff --git a/gems/lineage/src/ui.rs b/gems/lineage/src/ui.rs index 22b131858..4eb9ce37d 100644 --- a/gems/lineage/src/ui.rs +++ b/gems/lineage/src/ui.rs @@ -95,9 +95,35 @@ struct UiCoverageContext { covered_lines: i64, partial_lines: i64, missed_lines: i64, + multi_type_lines: i64, + mutant_backed_lines: i64, + stochastic_mutant_backed_lines: i64, + invariant_mutant_backed_lines: i64, coverage_percent: f64, } +#[derive(Debug, Clone, Copy, PartialEq)] +struct LineQualityBar { + tracked_lines: i64, + covered_lines: i64, + partial_lines: i64, + multi_type_lines: i64, + mutant_backed_lines: i64, + coverage_percent: f64, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +struct LineQualitySegments { + multi: f64, + covered: f64, + partial: f64, + missed: f64, + mutant_multi: f64, + mutant_covered: f64, + mutant_partial: f64, + mutant_gap: f64, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum CoverageSort { Path, @@ -319,6 +345,8 @@ pub struct UiLineAnnotation { pub distinct_tests: i64, pub mutant_verified_tests: i64, pub mutant_killed_tests: i64, + pub stochastic_mutant_verified_tests: i64, + pub invariant_mutant_verified_tests: i64, pub line_hits: Option, pub line_coverage: Option, pub mutant_coverage: Option, @@ -467,6 +495,168 @@ struct IndexPageTemplate<'a> { body: &'a str, } +#[derive(Template)] +#[template(path = "app.html")] +struct AppTemplate<'a> { + source_sidebar: bool, + sidebar: &'a str, + main: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_sidebar.html")] +struct DashboardSidebarTemplate<'a> { + summary: &'a str, + nav: &'a str, + current_directory: &'a str, + show_directory_input: bool, + filter: &'a str, + search_options: &'a str, + files: &'a str, +} + +#[derive(Template)] +#[template(path = "source_sidebar.html")] +struct SourceSidebarTemplate<'a> { + path: &'a str, + nav: &'a str, + outline: &'a str, + show_empty_outline: bool, +} + +#[derive(Template)] +#[template(path = "source_unavailable.html")] +struct SourceUnavailableTemplate<'a> { + error: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard.html")] +struct DashboardTemplate<'a> { + branch_context: &'a str, + warnings: &'a str, + active_hazards: &'a str, + highest_hazard_files: &'a str, + highest_risk_units: &'a str, + highest_architecture_risks: &'a str, + code_tree_heading: &'a str, + code_tree: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_disclosure.html")] +struct DashboardDisclosureTemplate<'a> { + title: &'a str, + open: bool, + body: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_ratio_bar.html")] +struct DashboardRatioBarTemplate<'a> { + label: &'a str, + detail: &'a str, + bar: &'a str, + total: i64, + total_label: &'a str, + covered: i64, + covered_label: &'a str, +} + +#[derive(Template)] +#[template(path = "dashboard_hazard_files.html")] +struct DashboardHazardFilesTemplate<'a> { + files: &'a [DashboardHazardFileItem], +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct DashboardHazardFileItem { + href: String, + path: String, + detail: String, + hazards: i64, +} + +#[derive(Template)] +#[template(path = "hotspot_list.html")] +struct HotspotListTemplate<'a> { + wrapper_class: &'a str, + empty_message: &'a str, + items: &'a [HotspotItem], +} + +#[derive(Debug, Clone, PartialEq)] +struct HotspotItem { + href: String, + kind: String, + name: String, + path: String, + detail: String, + score: String, +} + +#[derive(Template)] +#[template(path = "coverage_table.html")] +struct CoverageTableTemplate<'a> { + name_header: &'a str, + total_header: &'a str, + covered_header: &'a str, + partial_header: &'a str, + missed_header: &'a str, + percent_header: &'a str, + rows: &'a str, + empty: bool, + subtotal: &'a str, +} + +#[derive(Template)] +#[template(path = "branch_context.html")] +struct BranchContextTemplate<'a> { + branch: &'a str, + commit: &'a str, + coverage_percent: &'a str, + covered_lines: i64, + tracked_lines: i64, + partial_lines: i64, + missed_lines: i64, + mutant_backed_lines: i64, + stochastic_mutant_backed_lines: i64, + invariant_mutant_backed_lines: i64, + line_quality_bar: &'a str, + breadcrumbs: &'a str, +} + +#[derive(Template)] +#[template(path = "source_view.html")] +struct SourceViewTemplate<'a> { + path: &'a str, + summary: &'a str, + layers_menu: &'a str, + branch_context: &'a str, + warnings: &'a str, + code_lines: &'a str, + history: &'a str, +} + +#[derive(Template)] +#[template(path = "layers_menu.html")] +struct LayersMenuTemplate; + +#[derive(Template)] +#[template(path = "warning_banner.html")] +struct WarningBannerTemplate<'a> { + warnings: &'a [WarningBannerItem], +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct WarningBannerItem { + input_id: String, + key: String, + level: String, + label: String, + detail: String, +} + #[derive(Clone)] struct UiServerState { db: Arc, @@ -502,6 +692,8 @@ struct AnnotationBuilder { distinct_tests: i64, mutant_verified_tests: i64, mutant_killed_tests: i64, + stochastic_mutant_verified_tests: i64, + invariant_mutant_verified_tests: i64, line_hits: Option, line_coverage: Option, mutant_coverage: Option, @@ -1788,6 +1980,7 @@ fn dashboard_line_counts( ) SELECT path, line, + latest_lines.hits, COUNT(DISTINCT CASE WHEN is_verified = 1 THEN test_type END) AS verified_test_types, MAX(CASE WHEN is_verified = 1 AND is_mutation_verified = 1 THEN 1 ELSE 0 END) AS mutant_verified, MAX(CASE WHEN is_verified = 1 AND is_mutation_killed = 1 THEN 1 ELSE 0 END) AS mutant_killed, @@ -1832,12 +2025,14 @@ fn dashboard_line_counts( row.get::<_, i64>(6)?, row.get::<_, i64>(7)?, row.get::<_, i64>(8)?, + row.get::<_, i64>(9)?, )) })?; for row in rows { let ( path, _line, + hits, verified_test_types, has_mutant_verified, has_mutant_killed, @@ -1870,7 +2065,7 @@ fn dashboard_line_counts( if has_invariant_mutant_killed > 0 { counts.invariant_mutant_killed += 1; } - if verified_test_types >= 2 { + if verified_test_types >= 2 || hits > 1 { counts.multi_type += 1; } } @@ -2921,6 +3116,8 @@ pub fn line_annotations( distinct_tests: builder.distinct_tests, mutant_verified_tests: builder.mutant_verified_tests, mutant_killed_tests: builder.mutant_killed_tests, + stochastic_mutant_verified_tests: builder.stochastic_mutant_verified_tests, + invariant_mutant_verified_tests: builder.invariant_mutant_verified_tests, line_hits: builder.line_hits, line_coverage: builder.line_coverage, mutant_coverage: builder.mutant_coverage, @@ -3005,6 +3202,8 @@ fn empty_annotation(line: u32) -> UiLineAnnotation { distinct_tests: 0, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: None, line_coverage: None, mutant_coverage: None, @@ -3196,7 +3395,7 @@ fn apply_test_exposure( r#" WITH ranked_exposure AS ( SELECT path, line, branch_id, test_id, test_type, is_verified, - is_mutation_verified, is_mutation_killed, + is_mutation_verified, is_mutation_killed, mutation_kind, ROW_NUMBER() OVER ( PARTITION BY path, line, COALESCE(branch_id, ''), test_id, test_type ORDER BY timestamp DESC, id DESC @@ -3211,7 +3410,17 @@ fn apply_test_exposure( ) SELECT line, test_type, COUNT(DISTINCT test_id), COUNT(DISTINCT CASE WHEN is_mutation_verified = 1 THEN test_id END), - COUNT(DISTINCT CASE WHEN is_mutation_killed = 1 THEN test_id END) + COUNT(DISTINCT CASE WHEN is_mutation_killed = 1 THEN test_id END), + COUNT(DISTINCT CASE + WHEN is_mutation_verified = 1 + AND lower(COALESCE(mutation_kind, '')) = 'stochastic' + THEN test_id + END), + COUNT(DISTINCT CASE + WHEN is_mutation_verified = 1 + AND lower(COALESCE(mutation_kind, '')) IN ('invariant', 'contract') + THEN test_id + END) FROM latest_exposure WHERE is_verified = 1 GROUP BY line, test_type @@ -3224,10 +3433,20 @@ fn apply_test_exposure( row.get::<_, i64>(2)?, row.get::<_, i64>(3)?, row.get::<_, i64>(4)?, + row.get::<_, i64>(5)?, + row.get::<_, i64>(6)?, )) })?; for row in rows { - let (line, test_type, tests, mutation_verified, mutation_killed) = row?; + let ( + line, + test_type, + tests, + mutation_verified, + mutation_killed, + stochastic_mutation_verified, + invariant_mutation_verified, + ) = row?; let entry = lines.entry(line).or_default(); if paint_line_coverage { entry.covered = true; @@ -3237,6 +3456,8 @@ fn apply_test_exposure( entry.distinct_tests += tests; entry.mutant_verified_tests += mutation_verified; entry.mutant_killed_tests += mutation_killed; + entry.stochastic_mutant_verified_tests += stochastic_mutation_verified; + entry.invariant_mutant_verified_tests += invariant_mutation_verified; entry.mutant_tested |= mutation_verified > 0 || mutation_killed > 0; } Ok(()) @@ -4139,7 +4360,6 @@ fn render_index_page( )?; let child_directories = directory_index(&files, ¤t_directory); let child_files = files_in_directory(&files, ¤t_directory); - let table_files = sorted_table_files(&files, filter, ¤t_directory, sort); let filtered = filtered_files_in_directory(&files, filter, ¤t_directory); let branch_context = branch_context(repo); let payload = selected_path @@ -4147,105 +4367,138 @@ fn render_index_page( .map(|path| source_payload_with_overlays(storage, repo, path, commit, overlays)) .transpose(); + let source_sidebar = matches!(&payload, Ok(Some(_))); + let sidebar = match &payload { + Ok(Some(payload)) => render_source_sidebar(payload, ¤t_directory, filter), + _ => render_dashboard_sidebar(DashboardSidebarArgs { + dashboard: &dashboard, + current_directory: ¤t_directory, + filter, + files: &files, + child_directories: &child_directories, + child_files: &child_files, + filtered_files: &filtered, + selected_path: selected_path.as_deref(), + }), + }; + let main = match &payload { + Ok(Some(payload)) => render_source_view(payload, filter, &branch_context), + Ok(None) => render_dashboard( + &dashboard, + ¤t_directory, + &child_directories, + &child_files, + filter, + sort, + &branch_context, + ), + Err(error) => render_source_unavailable(&error.to_string()), + }; + let app = AppTemplate { + source_sidebar, + sidebar: &sidebar, + main: &main, + } + .render() + .context("render lineage app template")?; + render_page("Lineage", &app) +} + +fn render_page(title: &str, body: &str) -> Result { + IndexPageTemplate { title, body } + .render() + .context("render lineage index template") +} + +struct DashboardSidebarArgs<'a> { + dashboard: &'a UiDashboard, + current_directory: &'a str, + filter: &'a str, + files: &'a [UiFile], + child_directories: &'a [UiDirectory], + child_files: &'a [&'a UiFile], + filtered_files: &'a [&'a UiFile], + selected_path: Option<&'a str>, +} + +fn render_dashboard_sidebar(args: DashboardSidebarArgs<'_>) -> String { + let summary = format!( + "{} files{} | {:.1}% covered", + args.dashboard.files, + directory_label_suffix(args.current_directory), + args.dashboard.coverage_percent + ); + let nav = render_sidebar_navigation(args.current_directory, args.filter); + let search_options = + render_search_options(args.files, args.child_directories, args.current_directory); + let file_links = render_sidebar_file_links(&args); + render_template_string( + DashboardSidebarTemplate { + summary: &summary, + nav: &nav, + current_directory: args.current_directory, + show_directory_input: !args.current_directory.is_empty(), + filter: args.filter, + search_options: &search_options, + files: &file_links, + }, + "dashboard sidebar template", + ) +} + +fn render_sidebar_file_links(args: &DashboardSidebarArgs<'_>) -> String { let mut out = String::new(); - out.push_str("
'); - match &payload { - Ok(Some(payload)) => { - out.push_str("

Lineage

"); - out.push_str(&html_escape(&payload.path)); - out.push_str("
"); - out.push_str(&render_sidebar_navigation(¤t_directory, filter)); - out.push_str("
"); - let outline = render_source_outline(payload); - if outline.is_empty() { - out.push_str(""); - } else { - out.push_str(&outline); - } - } - _ => { - out.push_str("

Lineage

"); - out.push_str(&format!( - "{} files{} | {:.1}% covered", - dashboard.files, - directory_label_suffix(¤t_directory), - dashboard.coverage_percent + if args.filter.trim().is_empty() { + if !args.current_directory.is_empty() { + out.push_str(&render_parent_directory_link( + args.current_directory, + args.filter, )); - out.push_str("
"); - out.push_str(&render_sidebar_navigation(¤t_directory, filter)); - out.push_str("
"); - out.push_str("
"); - if !current_directory.is_empty() { - out.push_str(""); - } - out.push_str(""); - out.push_str(&render_search_options(&files, &child_directories, ¤t_directory)); - out.push_str("
"); - out.push_str(""); } - } - out.push_str("
"); - match payload { - Ok(Some(payload)) => out.push_str(&render_source_view(&payload, filter, &branch_context)), - Ok(None) => { - out.push_str(&render_dashboard( - &dashboard, - ¤t_directory, - &child_directories, - &table_files, - filter, - sort, - &branch_context, - )); + for directory in args.child_directories { + out.push_str(&render_directory_link(directory, false, args.filter)); + } + for file in args.child_files { + let active = args.selected_path == Some(file.path.as_str()); + out.push_str(&render_file_link(file, active, args.filter)); + } + if args.child_directories.is_empty() && args.child_files.is_empty() { + out.push_str("
No tracked files in this directory.
"); + } + } else { + for file in args.filtered_files { + let active = args.selected_path == Some(file.path.as_str()); + out.push_str(&render_file_link(file, active, args.filter)); } - Err(error) => { - out.push_str("
Source unavailable
"); - out.push_str("
"); - out.push_str(&html_escape(&error.to_string())); - out.push_str("
"); - out.push_str("
The selected path is not available in the current checkout. Regenerate coverage for HEAD or open a historical commit view.
"); + if args.filtered_files.is_empty() { + out.push_str("
No matching files in this directory.
"); } } - out.push_str("
"); - render_page("Lineage", &out) + out } -fn render_page(title: &str, body: &str) -> Result { - IndexPageTemplate { title, body } - .render() - .context("render lineage index template") +fn render_source_sidebar(payload: &UiSourcePayload, current_directory: &str, filter: &str) -> String { + let nav = render_sidebar_navigation(current_directory, filter); + let outline = render_source_outline(payload); + render_template_string( + SourceSidebarTemplate { + path: &payload.path, + nav: &nav, + outline: &outline, + show_empty_outline: outline.is_empty(), + }, + "source sidebar template", + ) +} + +fn render_source_unavailable(error: &str) -> String { + render_template_string(SourceUnavailableTemplate { error }, "source unavailable template") +} + +fn render_template_string(template: T, name: &str) -> String { + template.render().unwrap_or_else(|error| { + panic!("failed to render {name}: {error}"); + }) } fn filtered_files<'a>(files: &'a [UiFile], filter: &str) -> Vec<&'a UiFile> { @@ -4267,38 +4520,6 @@ fn filtered_files_in_directory<'a>( .collect() } -fn sorted_table_files<'a>( - files: &'a [UiFile], - filter: &str, - directory: &str, - sort: CoverageSort, -) -> Vec<&'a UiFile> { - let mut files = filtered_files_in_directory(files, filter, directory); - files.sort_by(|left, right| match sort { - CoverageSort::Path => left.path.cmp(&right.path), - CoverageSort::Total => right - .tracked_lines - .cmp(&left.tracked_lines) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Covered => right - .covered_lines - .cmp(&left.covered_lines) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Partial => partial_line_count(right.covered_lines, right.dark_arm_findings) - .cmp(&partial_line_count(left.covered_lines, left.dark_arm_findings)) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Missed => missed_line_count(right.tracked_lines, right.covered_lines) - .cmp(&missed_line_count(left.tracked_lines, left.covered_lines)) - .then_with(|| left.path.cmp(&right.path)), - CoverageSort::Percent => right - .line_coverage - .partial_cmp(&left.line_coverage) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| left.path.cmp(&right.path)), - }); - files -} - fn files_in_directory<'a>(files: &'a [UiFile], directory: &str) -> Vec<&'a UiFile> { let directory = normalize_directory(directory); files @@ -4402,44 +4623,277 @@ fn render_source_outline(payload: &UiSourcePayload) -> String { return String::new(); } + let containers = outline_containers(&payload.symbols); + let functions = outline_functions(&payload.symbols, &containers); let mut out = String::new(); out.push_str(""); - out + (None, outline_short_name(&symbol.name)) +} + +fn outline_short_name(name: &str) -> String { + normalize_outline_owner(name) + .rsplit('.') + .next() + .unwrap_or(name) + .trim_start_matches("self.") + .to_string() +} + +fn normalize_outline_owner(name: &str) -> String { + name.replace("::", ".") +} + +fn resolve_outline_owner(owner: &str, containers: &[OutlineContainer<'_>]) -> Option { + let normalized = normalize_outline_owner(owner); + containers + .iter() + .find(|container| container.full_name == normalized) + .or_else(|| { + containers + .iter() + .find(|container| container.full_name.ends_with(&format!(".{normalized}"))) + }) + .or_else(|| containers.iter().find(|container| container.display_name == normalized)) + .map(|container| container.full_name.clone()) +} + +fn containing_outline_owner( + symbol: &UiSourceSymbol, + containers: &[OutlineContainer<'_>], +) -> Option { + containers + .iter() + .filter(|container| outline_contains(container.symbol, symbol)) + .max_by_key(|container| container.depth) + .map(|container| container.full_name.clone()) +} + +fn root_outline_entries<'a>( + containers: &'a [OutlineContainer<'a>], + functions: &'a [OutlineFunction<'a>], +) -> Vec> { + sorted_outline_entries( + containers + .iter() + .filter(|container| container.parent.is_none()) + .map(OutlineEntry::Container) + .chain( + functions + .iter() + .filter(|function| function.owner.is_none()) + .map(OutlineEntry::Function), + ), + ) +} + +fn child_outline_entries<'a>( + owner: &str, + containers: &'a [OutlineContainer<'a>], + functions: &'a [OutlineFunction<'a>], +) -> Vec> { + sorted_outline_entries( + containers + .iter() + .filter(|container| container.parent.as_deref() == Some(owner)) + .map(OutlineEntry::Container) + .chain( + functions + .iter() + .filter(|function| function.owner.as_deref() == Some(owner)) + .map(OutlineEntry::Function), + ), + ) +} + +fn sorted_outline_entries<'a>( + entries: impl Iterator>, +) -> Vec> { + let mut entries = entries.collect::>(); + entries.sort_by(|left, right| { + left.start_line() + .cmp(&right.start_line()) + .then_with(|| left.kind_rank().cmp(&right.kind_rank())) + }); + entries +} + +fn render_outline_entry( + out: &mut String, + entry: OutlineEntry<'_>, + containers: &[OutlineContainer<'_>], + functions: &[OutlineFunction<'_>], +) { + match entry { + OutlineEntry::Container(container) => { + render_outline_symbol_link(out, container.symbol, &container.display_name, container.depth); + for child in child_outline_entries(&container.full_name, containers, functions) { + render_outline_entry(out, child, containers, functions); + } + } + OutlineEntry::Function(function) => { + render_outline_symbol_link( + out, + function.symbol, + &function.display_name, + function.depth, + ); + } + } +} + +fn render_outline_symbol_link( + out: &mut String, + symbol: &UiSourceSymbol, + display_name: &str, + depth: usize, +) { + out.push_str(""); + if symbol.impure { + out.push_str(""); + } + out.push_str(""); + out.push_str(&html_escape(&outline_kind_label(symbol))); + out.push_str(""); + out.push_str(&html_escape(display_name)); + out.push_str(""); } fn outline_kind_label(symbol: &UiSourceSymbol) -> String { @@ -4556,224 +5010,228 @@ fn render_directory_link(directory: &UiDirectory, active: bool, filter: &str) -> out } -fn render_coverage_bar( - tracked_lines: i64, - covered_lines: i64, - line_coverage: f64, - mutant_killed_covered_lines: i64, - dark_arm_findings: i64, -) -> String { - let (strong, weak) = coverage_bar_widths( - tracked_lines, - covered_lines, - line_coverage, - mutant_killed_covered_lines, - dark_arm_findings, - ); +fn render_line_quality_bar(bar: LineQualityBar) -> String { + let segments = line_quality_segments(bar); let title = format!( - "{:.1}% covered; {:.1}% mutant-killed/no-partial confidence; {:.1}% weak covered tail", - (strong + weak).min(100.0), - strong, - weak + "{:.1}% covered; {} total, {} covered, {} multi-covered, {} partial, {} missed, {} mutant-backed", + bar.coverage_percent.clamp(0.0, 100.0), + bar.tracked_lines.max(0), + bar.covered_lines.clamp(0, bar.tracked_lines.max(0)), + bar.multi_type_lines.max(0), + bar.partial_lines.max(0), + missed_line_count(bar.tracked_lines, bar.covered_lines), + bar.mutant_backed_lines.max(0) ); format!( - "", + concat!( + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ), html_escape(&title), - strong, - weak + segments.multi, + segments.covered, + segments.partial, + segments.missed, + segments.mutant_multi, + segments.mutant_covered, + segments.mutant_partial, + segments.mutant_gap ) } -fn coverage_bar_widths( - tracked_lines: i64, - covered_lines: i64, - line_coverage: f64, - mutant_killed_covered_lines: i64, - dark_arm_findings: i64, -) -> (f64, f64) { - if tracked_lines <= 0 { - let covered = line_coverage.clamp(0.0, 100.0); - return (0.0, covered); - } - let covered_lines = covered_lines.clamp(0, tracked_lines); - let dark_arm_lines = dark_arm_findings.clamp(0, covered_lines); - let missing_mutant_lines = covered_lines - .saturating_sub(mutant_killed_covered_lines.clamp(0, covered_lines)); - let weak_lines = missing_mutant_lines.max(dark_arm_lines).min(covered_lines); - let strong_lines = covered_lines.saturating_sub(weak_lines); - ( - percent(strong_lines, tracked_lines), - percent(weak_lines, tracked_lines), - ) +fn line_quality_segments(bar: LineQualityBar) -> LineQualitySegments { + let tracked_lines = bar.tracked_lines.max(0); + if tracked_lines == 0 { + let covered: f64 = bar.coverage_percent.clamp(0.0, 100.0); + return LineQualitySegments { + multi: 0.0, + covered, + partial: 0.0, + missed: (100.0 - covered).max(0.0), + mutant_multi: 0.0, + mutant_covered: 0.0, + mutant_partial: 0.0, + mutant_gap: 100.0, + }; + } + let covered_lines = bar.covered_lines.clamp(0, tracked_lines); + let partial_lines = bar.partial_lines.clamp(0, covered_lines); + let full_covered_lines = covered_lines.saturating_sub(partial_lines); + let multi_type_lines = bar.multi_type_lines.clamp(0, full_covered_lines); + let covered_single_lines = full_covered_lines.saturating_sub(multi_type_lines); + let missed_lines = tracked_lines.saturating_sub(covered_lines); + let mutant_backed_lines = bar.mutant_backed_lines.clamp(0, covered_lines); + let mutant_multi_lines = mutant_backed_lines.min(multi_type_lines); + let remaining_mutant = mutant_backed_lines.saturating_sub(mutant_multi_lines); + let mutant_covered_lines = remaining_mutant.min(covered_single_lines); + let remaining_mutant = remaining_mutant.saturating_sub(mutant_covered_lines); + let mutant_partial_lines = remaining_mutant.min(partial_lines); + let mutant_painted_lines = mutant_multi_lines + mutant_covered_lines + mutant_partial_lines; + LineQualitySegments { + multi: percent(multi_type_lines, tracked_lines), + covered: percent(covered_single_lines, tracked_lines), + partial: percent(partial_lines, tracked_lines), + missed: percent(missed_lines, tracked_lines), + mutant_multi: percent(mutant_multi_lines, tracked_lines), + mutant_covered: percent(mutant_covered_lines, tracked_lines), + mutant_partial: percent(mutant_partial_lines, tracked_lines), + mutant_gap: percent(tracked_lines.saturating_sub(mutant_painted_lines), tracked_lines), + } } fn render_dashboard( dashboard: &UiDashboard, directory: &str, - _directories: &[UiDirectory], + directories: &[UiDirectory], files: &[&UiFile], filter: &str, sort: CoverageSort, branch_context: &UiBranchContext, ) -> String { let directory = normalize_directory(directory); - let mut out = String::new(); - out.push_str("
"); - if directory.is_empty() { - out.push_str("Coverage Dashboard"); - } else { - out.push_str("Directory: "); - out.push_str(&html_escape(&directory)); - out.push('/'); - } - out.push_str("
Current Lineage database snapshot"); - if !directory.is_empty() { - out.push_str(" scoped to "); - out.push_str(&html_escape(&directory)); - out.push('/'); - } - out.push_str("
"); - out.push_str("
root"); - if !directory.is_empty() { - out.push_str("up"); - } - out.push_str("
"); - out.push_str("
"); let coverage_context = dashboard_coverage_context(dashboard, directory.as_str(), files); - out.push_str(&render_branch_context(branch_context, &coverage_context, filter)); - out.push_str("
"); - out.push_str(&render_metric( - "Line coverage", - &format!("{:.1}%", dashboard.coverage_percent), - &format!( - "{} / {} tracked lines covered", - dashboard.covered_lines, dashboard.tracked_lines - ), - )); - out.push_str(&render_metric( - "Hazard evidence", - &format!("{:.1}%", dashboard.hazard_evidence_percent), - &format!( - "{} / {} active hazards have required systems evidence", - dashboard.evidence_covered_hazards, dashboard.active_hazards - ), - )); - out.push_str(&render_metric( - "Hazard verification", - &format!("{:.1}%", dashboard.hazard_coverage_percent), - &format!( - "{} / {} active hazards have evidence plus invariant mutants", - dashboard.covered_hazards, dashboard.active_hazards - ), - )); - out.push_str(&render_metric( - "Mutant-backed lines", - &format!("{:.1}%", dashboard.mutant_verified_covered_percent), - &format!( - "{} / {} covered lines have mutant-verified evidence", - dashboard.mutant_verified_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "Stochastic mutants", - &format!("{:.1}%", dashboard.stochastic_mutant_verified_covered_percent), - &format!( - "{} / {} covered lines are stochastic-mutant verified", - dashboard.stochastic_mutant_verified_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "Invariant mutants", - &format!("{:.1}%", dashboard.invariant_mutant_verified_covered_percent), - &format!( - "{} / {} covered lines are invariant-mutant verified", - dashboard.invariant_mutant_verified_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "Multi-type lines", - &format!("{:.1}%", dashboard.multi_type_covered_percent), - &format!( - "{} / {} covered lines have multiple verified test types", - dashboard.multi_type_covered_lines, dashboard.covered_lines - ), - )); - out.push_str(&render_metric( - "SARIF findings", - &dashboard.sarif_findings.to_string(), - "persisted first-party and ecosystem analysis findings", - )); - out.push_str(&render_metric( - "Files", - &dashboard.files.to_string(), - &format!("{} files currently report coverage", dashboard.files_with_coverage), - )); - out.push_str("
"); - out.push_str(&render_warning_banner(&dashboard.warnings)); - - out.push_str("

Highest Risk Units

"); - out.push_str(&render_unit_hotspots(&dashboard.top_units, filter)); - out.push_str("
"); - - out.push_str("

Highest Architectural Risks

"); - out.push_str(&render_architecture_risks( - &dashboard.top_architecture_risks, - filter, - )); - out.push_str("
"); - - out.push_str("

Code tree

"); - out.push_str(&render_code_tree_table( + let branch_context = render_branch_context(branch_context, &coverage_context, filter); + let warnings = render_warning_banner(&dashboard.warnings); + let active_hazards = render_active_hazards_section(dashboard); + let highest_hazard_files = render_highest_hazard_files_section(dashboard, filter); + let highest_risk_units = render_dashboard_disclosure( + "Highest Risk Units", + false, + &render_unit_hotspots(&dashboard.top_units, filter), + ); + let highest_architecture_risks = render_dashboard_disclosure( + "Highest Architectural Risks", + false, + &render_architecture_risks(&dashboard.top_architecture_risks, filter), + ); + let code_tree_heading = format!( + "Directory entries ({} dirs - {} files - {} SARIF findings)", + directories.len(), + files.len(), + dashboard.sarif_findings + ); + let code_tree = render_code_tree_table( dashboard, &directory, + directories, files, filter, sort, - )); - out.push_str("
"); + ); + render_template_string( + DashboardTemplate { + branch_context: &branch_context, + warnings: &warnings, + active_hazards: &active_hazards, + highest_hazard_files: &highest_hazard_files, + highest_risk_units: &highest_risk_units, + highest_architecture_risks: &highest_architecture_risks, + code_tree_heading: &code_tree_heading, + code_tree: &code_tree, + }, + "dashboard template", + ) +} - out.push_str("

Active Hazards

"); +fn render_dashboard_disclosure(title: &str, open: bool, body: &str) -> String { + render_template_string( + DashboardDisclosureTemplate { title, open, body }, + "dashboard disclosure template", + ) +} + +fn render_active_hazards_section(dashboard: &UiDashboard) -> String { + let mut body = String::new(); if dashboard.active_hazards == 0 { - out.push_str("

No active systems hazards are recorded.

"); + body.push_str("

No active systems hazards are recorded.

"); } else { - out.push_str("
"); - out.push_str("

"); - out.push_str(&format!( - "{} hazards have required systems evidence; {} also have invariant-mutant proof.", - dashboard.evidence_covered_hazards, + body.push_str(&render_dashboard_ratio_bar_row( + "Hazard verification", + dashboard.active_hazards, dashboard.covered_hazards, + &format!( + "{} total hazards / {} covered / {} with required systems evidence", + dashboard.active_hazards, + dashboard.covered_hazards, + dashboard.evidence_covered_hazards + ), + "active hazards", + "covered hazards", + "hazard-bar", )); - out.push_str("

"); } - out.push_str("
"); + render_dashboard_disclosure("Active Hazards", dashboard.active_hazards > 0, &body) +} - out.push_str("

Highest Hazard Files

"); - if dashboard.top_hazard_files.is_empty() { - out.push_str("

No hazard-heavy files to show.

"); - } else { - out.push_str(""); - } - out.push_str("
"); - out.push_str("
"); - out +fn render_highest_hazard_files_section(dashboard: &UiDashboard, filter: &str) -> String { + let files = dashboard + .top_hazard_files + .iter() + .map(|file| DashboardHazardFileItem { + href: page_href(&file.path, None, filter), + path: file.path.clone(), + detail: file_detail_text(file), + hazards: file.hazards, + }) + .collect::>(); + let body = render_template_string( + DashboardHazardFilesTemplate { files: &files }, + "dashboard hazard files template", + ); + render_dashboard_disclosure( + "Highest Hazard Files", + dashboard.active_hazards > 0 && !dashboard.top_hazard_files.is_empty(), + &body, + ) +} + +fn render_dashboard_ratio_bar_row( + label: &str, + total: i64, + covered: i64, + detail: &str, + total_label: &str, + covered_label: &str, + bar_class: &str, +) -> String { + let bar = render_ratio_bar(total, covered, bar_class); + render_template_string( + DashboardRatioBarTemplate { + label, + detail, + bar: &bar, + total: total.max(0), + total_label, + covered: covered.max(0), + covered_label, + }, + "dashboard ratio bar template", + ) +} + +fn render_ratio_bar(total: i64, covered: i64, bar_class: &str) -> String { + let total = total.max(0); + let covered = covered.clamp(0, total); + let covered_percent = percent(covered, total); + let missed_percent = 100.0 - covered_percent; + format!( + "", + html_escape(bar_class), + covered, + total, + covered_percent, + missed_percent.max(0.0) + ) } fn dashboard_coverage_context( @@ -4792,44 +5250,113 @@ fn dashboard_coverage_context( covered_lines: dashboard.covered_lines, partial_lines, missed_lines: missed_line_count(dashboard.tracked_lines, dashboard.covered_lines), + multi_type_lines: dashboard.multi_type_covered_lines, + mutant_backed_lines: dashboard.mutant_verified_covered_lines, + stochastic_mutant_backed_lines: dashboard.stochastic_mutant_verified_covered_lines, + invariant_mutant_backed_lines: dashboard.invariant_mutant_verified_covered_lines, coverage_percent: dashboard.coverage_percent, } } fn source_coverage_context(payload: &UiSourcePayload) -> UiCoverageContext { + let has_exact_line_hits = payload + .annotations + .iter() + .any(|annotation| annotation.line_hits.is_some()); let tracked_lines = payload .annotations .iter() .filter(|annotation| { - annotation.line_hits.is_some() - || annotation.line_coverage.is_some() - || annotation.covered - || !annotation.test_types.is_empty() - || !annotation.findings.is_empty() - || !annotation.hazards.is_empty() + if has_exact_line_hits { + annotation.line_hits.is_some() + } else { + annotation.line_coverage.is_some() + || annotation.covered + || !annotation.test_types.is_empty() + || !annotation.findings.is_empty() + || !annotation.hazards.is_empty() + } }) .count() as i64; let covered_lines = payload .annotations .iter() - .filter(|annotation| annotation.line_hits.unwrap_or(if annotation.covered { 1 } else { 0 }) > 0) + .filter(|annotation| { + if has_exact_line_hits { + annotation.line_hits.unwrap_or(0) > 0 + } else { + annotation.line_hits.unwrap_or(if annotation.covered { 1 } else { 0 }) > 0 + } + }) .count() as i64; let partial_lines = payload .annotations .iter() - .filter(|annotation| annotation_has_dark_arms(annotation)) + .filter(|annotation| { + (!has_exact_line_hits || annotation.line_hits.is_some()) + && annotation_has_dark_arms(annotation) + }) .count() as i64; let partial_lines = partial_lines.clamp(0, covered_lines); + let multi_type_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && (annotation.line_hits.unwrap_or(0) > 1 + || annotation.test_types.len() >= 2 + || annotation.distinct_tests >= 2) + }) + .count() as i64; + let mutant_backed_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && annotation.mutant_verified_tests > 0 + }) + .count() as i64; + let stochastic_mutant_backed_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && annotation.stochastic_mutant_verified_tests > 0 + }) + .count() as i64; + let invariant_mutant_backed_lines = payload + .annotations + .iter() + .filter(|annotation| { + annotation_counts_for_coverage_context(annotation, has_exact_line_hits) + && annotation.invariant_mutant_verified_tests > 0 + }) + .count() as i64; UiCoverageContext { path: payload.path.clone(), tracked_lines, covered_lines, partial_lines, missed_lines: missed_line_count(tracked_lines, covered_lines), + multi_type_lines: multi_type_lines.clamp(0, covered_lines), + mutant_backed_lines: mutant_backed_lines.clamp(0, covered_lines), + stochastic_mutant_backed_lines: stochastic_mutant_backed_lines.clamp(0, covered_lines), + invariant_mutant_backed_lines: invariant_mutant_backed_lines.clamp(0, covered_lines), coverage_percent: percent(covered_lines, tracked_lines), } } +fn annotation_counts_for_coverage_context( + annotation: &UiLineAnnotation, + has_exact_line_hits: bool, +) -> bool { + if has_exact_line_hits { + annotation.line_hits.unwrap_or(0) > 0 + } else { + annotation.line_hits.unwrap_or(if annotation.covered { 1 } else { 0 }) > 0 + } +} + fn partial_line_count(covered_lines: i64, partial_findings: i64) -> i64 { partial_findings.clamp(0, covered_lines.max(0)) } @@ -4843,41 +5370,33 @@ fn render_branch_context( coverage: &UiCoverageContext, filter: &str, ) -> String { - let mut out = String::new(); - out.push_str("
"); - out.push_str("
Branch Context
"); - out.push_str(&html_escape(&context.branch)); - out.push_str("Source: latest commit "); - out.push_str(&html_escape(&context.commit)); - out.push_str("
Coverage on branch"); - out.push_str(&format!("{:.2}%", coverage.coverage_percent)); - out.push_str(""); - out.push_str(&format!( - "{} of {} lines covered; {} partial, {} missed", - coverage.covered_lines, - coverage.tracked_lines, - coverage.partial_lines, - coverage.missed_lines - )); - out.push_str(""); - out.push_str(&render_coverage_bar( - coverage.tracked_lines, - coverage.covered_lines, - coverage.coverage_percent, - coverage.covered_lines.saturating_sub(coverage.partial_lines), - coverage.partial_lines, - )); - out.push_str("
"); - out.push_str("
"); - out.push_str(&render_path_breadcrumb(&coverage.path, filter)); - out.push_str("
"); - out.push_str("uncovered"); - out.push_str("partial"); - out.push_str("!hazard"); - out.push_str("covered"); - out.push_str("
"); - out.push_str("
"); - out + let line_quality_bar = render_line_quality_bar(LineQualityBar { + tracked_lines: coverage.tracked_lines, + covered_lines: coverage.covered_lines, + partial_lines: coverage.partial_lines, + multi_type_lines: coverage.multi_type_lines, + mutant_backed_lines: coverage.mutant_backed_lines, + coverage_percent: coverage.coverage_percent, + }); + let breadcrumbs = render_path_breadcrumb(&coverage.path, filter); + let coverage_percent = format!("{:.2}", coverage.coverage_percent); + render_template_string( + BranchContextTemplate { + branch: &context.branch, + commit: &context.commit, + coverage_percent: &coverage_percent, + covered_lines: coverage.covered_lines, + tracked_lines: coverage.tracked_lines, + partial_lines: coverage.partial_lines, + missed_lines: coverage.missed_lines, + mutant_backed_lines: coverage.mutant_backed_lines.max(0), + stochastic_mutant_backed_lines: coverage.stochastic_mutant_backed_lines.max(0), + invariant_mutant_backed_lines: coverage.invariant_mutant_backed_lines.max(0), + line_quality_bar: &line_quality_bar, + breadcrumbs: &breadcrumbs, + }, + "branch context template", + ) } fn render_path_breadcrumb(path: &str, filter: &str) -> String { @@ -4916,52 +5435,161 @@ fn render_path_breadcrumb(path: &str, filter: &str) -> String { fn render_code_tree_table( dashboard: &UiDashboard, directory: &str, + directories: &[UiDirectory], files: &[&UiFile], filter: &str, sort: CoverageSort, ) -> String { - let mut out = String::new(); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str(""); - for file in files { - out.push_str(&render_file_coverage_row(file, directory, filter)); - } - if files.is_empty() { - out.push_str(""); - } - out.push_str(""); + let name_header = render_sort_link("Name", CoverageSort::Path, sort, directory, filter); + let total_header = render_sort_link("Total", CoverageSort::Total, sort, directory, filter); + let covered_header = render_sort_link("Covered", CoverageSort::Covered, sort, directory, filter); + let partial_header = render_sort_link("Partial", CoverageSort::Partial, sort, directory, filter); + let missed_header = render_sort_link("Missed", CoverageSort::Missed, sort, directory, filter); + let percent_header = render_sort_link("%", CoverageSort::Percent, sort, directory, filter); + let mut rows = String::new(); + for entry in sorted_code_tree_entries(directories, files, sort) { + rows.push_str(&render_code_tree_row(&entry, directory, filter)); + } + let empty = directories.is_empty() && files.is_empty(); let partial = files .iter() .map(|file| partial_line_count(file.covered_lines, file.dark_arm_findings)) .sum::(); let partial = partial.clamp(0, dashboard.covered_lines); - out.push_str(&render_coverage_table_row( + let subtotal = render_coverage_table_row( None, + "", "Subtotal", "", dashboard.tracked_lines, dashboard.covered_lines, partial, - dashboard.mutant_killed_covered_lines, + dashboard.multi_type_covered_lines, + dashboard.mutant_verified_covered_lines, dashboard.coverage_percent, - )); - out.push_str("
"); - out.push_str(&render_sort_link("File list", CoverageSort::Path, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Total", CoverageSort::Total, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Covered", CoverageSort::Covered, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Partial", CoverageSort::Partial, sort, directory, filter)); - out.push_str(""); - out.push_str(&render_sort_link("Missed", CoverageSort::Missed, sort, directory, filter)); - out.push_str("Coverage"); - out.push_str(&render_sort_link("%", CoverageSort::Percent, sort, directory, filter)); - out.push_str("
No tracked files in this directory.
"); - out + ); + render_template_string( + CoverageTableTemplate { + name_header: &name_header, + total_header: &total_header, + covered_header: &covered_header, + partial_header: &partial_header, + missed_header: &missed_header, + percent_header: &percent_header, + rows: &rows, + empty, + subtotal: &subtotal, + }, + "coverage table template", + ) +} + +#[derive(Debug, Clone, PartialEq)] +enum CodeTreeEntry<'a> { + Directory(&'a UiDirectory), + File(&'a UiFile), +} + +impl CodeTreeEntry<'_> { + fn name(&self) -> &str { + match self { + CodeTreeEntry::Directory(directory) => &directory.path, + CodeTreeEntry::File(file) => &file.path, + } + } + + fn tracked_lines(&self) -> i64 { + match self { + CodeTreeEntry::Directory(directory) => directory.tracked_lines, + CodeTreeEntry::File(file) => file.tracked_lines, + } + } + + fn covered_lines(&self) -> i64 { + match self { + CodeTreeEntry::Directory(directory) => directory.covered_lines, + CodeTreeEntry::File(file) => file.covered_lines, + } + } + + fn partial_findings(&self) -> i64 { + match self { + CodeTreeEntry::Directory(directory) => directory.dark_arm_findings, + CodeTreeEntry::File(file) => file.dark_arm_findings, + } + } + + fn missed_lines(&self) -> i64 { + missed_line_count(self.tracked_lines(), self.covered_lines()) + } + + fn line_coverage(&self) -> f64 { + match self { + CodeTreeEntry::Directory(directory) => directory.line_coverage, + CodeTreeEntry::File(file) => file.line_coverage, + } + } + + fn path_for_tiebreak(&self) -> &str { + self.name() + } +} + +fn sorted_code_tree_entries<'a>( + directories: &'a [UiDirectory], + files: &'a [&'a UiFile], + sort: CoverageSort, +) -> Vec> { + let mut entries = directories + .iter() + .map(CodeTreeEntry::Directory) + .chain(files.iter().copied().map(CodeTreeEntry::File)) + .collect::>(); + entries.sort_by(|left, right| match sort { + CoverageSort::Path => code_tree_entry_kind_rank(left) + .cmp(&code_tree_entry_kind_rank(right)) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Total => right + .tracked_lines() + .cmp(&left.tracked_lines()) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Covered => right + .covered_lines() + .cmp(&left.covered_lines()) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Partial => partial_line_count(right.covered_lines(), right.partial_findings()) + .cmp(&partial_line_count(left.covered_lines(), left.partial_findings())) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Missed => right + .missed_lines() + .cmp(&left.missed_lines()) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + CoverageSort::Percent => right + .line_coverage() + .partial_cmp(&left.line_coverage()) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| code_tree_entry_kind_rank(left).cmp(&code_tree_entry_kind_rank(right))) + .then_with(|| left.path_for_tiebreak().cmp(right.path_for_tiebreak())), + }); + entries +} + +fn code_tree_entry_kind_rank(entry: &CodeTreeEntry<'_>) -> u8 { + match entry { + CodeTreeEntry::Directory(_) => 0, + CodeTreeEntry::File(_) => 1, + } +} + +fn render_code_tree_row(entry: &CodeTreeEntry<'_>, directory: &str, filter: &str) -> String { + match entry { + CodeTreeEntry::Directory(child) => render_directory_coverage_row(child, directory, filter), + CodeTreeEntry::File(file) => render_file_coverage_row(file, directory, filter), + } } fn render_sort_link( @@ -5002,36 +5630,55 @@ fn render_file_coverage_row(file: &UiFile, directory: &str, filter: &str) -> Str ); render_coverage_table_row( Some(&page_href(&file.path, None, filter)), + "fa-regular fa-file-lines", &display_path, &detail, file.tracked_lines, file.covered_lines, file.dark_arm_findings, - file.mutant_killed_covered_lines, + file.multi_type_covered_lines, + file.mutant_verified_covered_lines, file.line_coverage, ) } -fn render_unit_hotspots(units: &[UiUnitHotspot], filter: &str) -> String { - if units.is_empty() { - return "

No function or class hotspots to show.

".to_string(); +fn render_directory_coverage_row(directory: &UiDirectory, parent: &str, filter: &str) -> String { + let mut display_path = file_display_path(&directory.path, parent); + if !display_path.ends_with('/') { + display_path.push('/'); } + let detail = format!( + "{} files, {} units, {} hazards, {} SARIF, {} tests, {} mutant killed", + directory.files, + directory.units, + directory.hazards, + directory.sarif_findings, + directory.distinct_tests, + directory.mutant_killed_tests + ); + render_coverage_table_row( + Some(&directory_href(&directory.path, filter)), + "fa-regular fa-folder", + &display_path, + &detail, + directory.tracked_lines, + directory.covered_lines, + directory.dark_arm_findings, + 0, + 0, + directory.line_coverage, + ) +} - let mut out = String::new(); - out.push_str(""); - out + ), + score: format!("{:.1}", unit.score), + }) + .collect::>(); + render_template_string( + HotspotListTemplate { + wrapper_class: "unit-hotspots", + empty_message: "No function or class hotspots to show.", + items: &items, + }, + "unit hotspot template", + ) } fn render_architecture_risks(risks: &[UiArchitectureRisk], filter: &str) -> String { - if risks.is_empty() { - return "

No Espalier architectural risks to show.

".to_string(); - } - - let mut out = String::new(); - out.push_str(""); - out + ), + score: format!("{:.1}", risk.score), + }) + .collect::>(); + render_template_string( + HotspotListTemplate { + wrapper_class: "unit-hotspots architecture-hotspots", + empty_message: "No Espalier architectural risks to show.", + items: &items, + }, + "architecture hotspot template", + ) } fn unit_kind_label(kind: &str, name: &str) -> String { @@ -5095,12 +5741,14 @@ fn unit_kind_label(kind: &str, name: &str) -> String { fn render_coverage_table_row( href: Option<&str>, + icon_class: &str, name: &str, detail: &str, tracked_lines: i64, covered_lines: i64, partial_findings: i64, - mutant_killed_covered_lines: i64, + multi_type_lines: i64, + mutant_backed_lines: i64, line_coverage: f64, ) -> String { let partial = partial_line_count(covered_lines, partial_findings); @@ -5117,9 +5765,11 @@ fn render_coverage_table_row( if let Some(href) = href { out.push_str(""); + out.push_str("\" class=\"coverage-name-link\">"); out.push_str(&html_escape(name)); - out.push_str(""); + out.push_str("
"); } else { out.push_str(""); out.push_str(&html_escape(name)); @@ -5139,13 +5789,14 @@ fn render_coverage_table_row( out.push_str(""); out.push_str(&missed.to_string()); out.push_str(""); - out.push_str(&render_coverage_bar( + out.push_str(&render_line_quality_bar(LineQualityBar { tracked_lines, covered_lines, - percent_value, - mutant_killed_covered_lines, - partial, - )); + partial_lines: partial, + multi_type_lines, + mutant_backed_lines, + coverage_percent: percent_value, + })); out.push_str(""); out.push_str(&format!("{percent_value:.2}%")); out.push_str(""); @@ -5163,8 +5814,8 @@ fn file_display_path(path: &str, directory: &str) -> String { } } -fn file_detail(file: &UiFile) -> String { - html_escape(&format!( +fn file_detail_text(file: &UiFile) -> String { + format!( "{} units | {} / {} lines | {} hazards | {} SARIF | {} tests | {} mutant-killed tests", file.units, file.covered_lines, @@ -5173,51 +5824,29 @@ fn file_detail(file: &UiFile) -> String { file.sarif_findings, file.distinct_tests, file.mutant_killed_tests - )) -} - -fn render_metric(label: &str, value: &str, detail: &str) -> String { - let mut out = String::new(); - out.push_str("
"); - out.push_str(&html_escape(label)); - out.push_str("
"); - out.push_str(&html_escape(value)); - out.push_str("

"); - out.push_str(&html_escape(detail)); - out.push_str("

"); - out + ) } fn render_warning_banner(warnings: &[UiWarning]) -> String { - if warnings.is_empty() { - return String::new(); - } - - let mut out = String::new(); - out.push_str("
"); - for (index, warning) in warnings.iter().enumerate() { + let items = warnings + .iter() + .enumerate() + .map(|(index, warning)| { let key = warning_dismiss_key(warning); let input_id = format!("warning-dismiss-{index}-{}", stable_slug(&key)); - out.push_str("
"); - out.push_str(&html_escape(&warning.label)); - out.push_str("

"); - out.push_str(&html_escape(&warning.detail)); - out.push_str("

"); - } - out.push_str("
"); - out + WarningBannerItem { + input_id, + key, + level: warning.level.clone(), + label: warning.label.clone(), + detail: warning.detail.clone(), + } + }) + .collect::>(); + render_template_string( + WarningBannerTemplate { warnings: &items }, + "warning banner template", + ) } fn warning_dismiss_key(warning: &UiWarning) -> String { @@ -5280,43 +5909,21 @@ fn render_source_view( .map(|annotation| annotation.findings.len()) .sum(); - let mut out = String::new(); - out.push_str("
"); - out.push_str( - "", - ); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str( - "", - ); - out.push_str( - "", - ); - out.push_str("
"); - out.push_str(&html_escape(&payload.path)); - out.push_str("
"); - out.push_str(&format!( + let summary = format!( "{} covered lines | {} mutant lines | {} hazards | {} partial | {} SARIF", covered, mutant, hazards, dark_arms, findings - )); - out.push_str("
"); - out.push_str( - "
", ); - out.push_str(&render_layers_menu()); - out.push_str("
"); - out.push_str(&render_branch_context( + let layers_menu = render_layers_menu(); + let branch_context = render_branch_context( branch_context, &source_coverage_context(payload), filter, - )); - out.push_str(&render_warning_banner(&payload.warnings)); - out.push_str("
"); + ); + let warnings = render_warning_banner(&payload.warnings); + let mut code_lines = String::new(); for (index, line) in payload.lines.iter().enumerate() { let line_no = (index + 1) as u32; - out.push_str(&render_code_line( + code_lines.push_str(&render_code_line( &payload.path, line_no, line, @@ -5325,21 +5932,23 @@ fn render_source_view( comment_fold_lines.get(&line_no), )); } - out.push_str("
"); - out.push_str(&render_history(payload, filter)); - out.push_str("
"); - out + let history = render_history(payload, filter); + render_template_string( + SourceViewTemplate { + path: &payload.path, + summary: &summary, + layers_menu: &layers_menu, + branch_context: &branch_context, + warnings: &warnings, + code_lines: &code_lines, + history: &history, + }, + "source view template", + ) } fn render_layers_menu() -> String { - let mut out = String::new(); - out.push_str("
Layers
"); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str(""); - out.push_str("
"); - out + render_template_string(LayersMenuTemplate, "layers menu template") } fn render_history(payload: &UiSourcePayload, filter: &str) -> String { @@ -7128,6 +7737,44 @@ mod tests { assert_eq!(line.dark_arm_spans[0].span, Some([2, 2, 2, 6])); } + #[test] + fn source_outline_groups_qualified_methods_under_containers() { + let payload = UiSourcePayload { + path: "gems/slopcop/lib/slopcop/dark_arm_overlay.rb".into(), + commit: None, + lines: Vec::new(), + versions: Vec::new(), + symbols: vec![ + empty_source_symbol("module", "SlopCop", 1, 20), + empty_source_symbol("class", "DarkArmOverlay", 3, 19), + empty_source_symbol("function", "SlopCop.DarkArmOverlay.build", 4, 5), + empty_source_symbol("function", "SlopCop.DarkArmOverlay.to_json", 7, 8), + empty_source_symbol("function", "SlopCop.DarkArmOverlay.to_sarif", 11, 12), + ], + blame: Vec::new(), + annotations: Vec::new(), + warnings: Vec::new(), + }; + + let outline = render_source_outline(&payload); + + assert!(outline.contains("outline-depth-0")); + assert!(outline.contains("outline-depth-1")); + assert!(outline.contains("outline-depth-2")); + assert!(outline.contains("SlopCop")); + assert!(outline.contains("DarkArmOverlay")); + assert!(outline.contains("build")); + assert!(outline.contains("to_json")); + assert!(outline.contains("to_sarif")); + assert!(!outline.contains("SlopCop.DarkArmOverlay.build")); + assert!( + outline.find(">build
").unwrap() < outline.find(">to_json
").unwrap() + ); + assert!( + outline.find(">to_json").unwrap() < outline.find(">to_sarif").unwrap() + ); + } + #[test] fn source_payload_includes_persisted_sarif_findings() { let dir = tempdir().unwrap(); @@ -7463,7 +8110,7 @@ mod tests { .unwrap(); storage .insert_event(&Event { - unit_id: unit.id, + unit_id: unit.id.clone(), commit_hash: "abc".into(), event_type: EventType::Change, path: "src/demo.rb".into(), @@ -7480,17 +8127,62 @@ mod tests { .record_coverage_line("abc", 10, "src/demo.rb", 1, 0) .unwrap(); storage - .record_coverage_line("abc", 10, "src/demo.rb", 2, 1) + .record_coverage_line("abc", 10, "src/demo.rb", 2, 2) + .unwrap(); + storage + .insert_test_exposure_event(&TestExposureEvent { + unit_id: unit.id.clone(), + commit_hash: "abc".into(), + timestamp: 10, + path: "src/demo.rb".into(), + function: Some("run".into()), + line: Some(3), + branch_id: None, + test_id: "spec/demo_spec.rb:1".into(), + test_type: "unit".into(), + mutation_status: None, + mutation_kind: None, + is_mutation_verified: false, + is_mutation_killed: false, + is_verified: true, + payload_json: "{}".into(), + }) + .unwrap(); + storage + .insert_test_exposure_event(&TestExposureEvent { + unit_id: unit.id.clone(), + commit_hash: "abc".into(), + timestamp: 10, + path: "src/demo.rb".into(), + function: Some("run".into()), + line: Some(2), + branch_id: None, + test_id: "spec/demo_spec.rb:2".into(), + test_type: "unit".into(), + mutation_status: Some("killed".into()), + mutation_kind: Some("invariant".into()), + is_mutation_verified: true, + is_mutation_killed: true, + is_verified: true, + payload_json: "{}".into(), + }) .unwrap(); - let payload = source_payload(&storage, dir.path(), "src/demo.rb", None).unwrap(); let line_one = payload.annotations.iter().find(|line| line.line == 1).unwrap(); let line_two = payload.annotations.iter().find(|line| line.line == 2).unwrap(); + let line_three = payload.annotations.iter().find(|line| line.line == 3).unwrap(); + let coverage = source_coverage_context(&payload); assert!(!line_one.covered); assert_eq!(line_one.line_hits, Some(0)); assert!(line_two.covered); - assert_eq!(line_two.line_hits, Some(1)); + assert_eq!(line_two.line_hits, Some(2)); + assert!(line_three.test_types.contains(&"unit".to_string())); + assert_eq!(coverage.tracked_lines, 2); + assert_eq!(coverage.covered_lines, 1); + assert_eq!(coverage.missed_lines, 1); + assert_eq!(coverage.multi_type_lines, 1); + assert_eq!(coverage.mutant_backed_lines, 1); } #[test] @@ -7664,6 +8356,8 @@ mod tests { distinct_tests: 9, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: Some(3), line_coverage: Some(100.0), mutant_coverage: None, @@ -8031,7 +8725,7 @@ mod tests { assert!(html.contains("func")); assert!(html.contains("class=\"outline-hotspot\"")); assert!(html.contains("run")); - assert!(html.contains("class=\"coverage-bar\"")); + assert!(html.contains("class=\"coverage-bar line-quality-bar\"")); } #[test] @@ -8043,16 +8737,166 @@ mod tests { } #[test] - fn coverage_bar_splits_strong_and_weak_covered_lines() { - let (strong, weak) = coverage_bar_widths(10, 8, 80.0, 5, 1); + fn line_quality_segments_split_coverage_and_mutant_backing() { + let segments = line_quality_segments(LineQualityBar { + tracked_lines: 10, + covered_lines: 8, + partial_lines: 2, + multi_type_lines: 3, + mutant_backed_lines: 4, + coverage_percent: 80.0, + }); - assert_eq!(strong, 50.0); - assert_eq!(weak, 30.0); + assert_eq!(segments.multi, 30.0); + assert_eq!(segments.covered, 30.0); + assert_eq!(segments.partial, 20.0); + assert_eq!(segments.missed, 20.0); + assert_eq!(segments.mutant_multi, 30.0); + assert_eq!(segments.mutant_covered, 10.0); + assert_eq!(segments.mutant_partial, 0.0); + assert_eq!(segments.mutant_gap, 60.0); + + let html = render_line_quality_bar(LineQualityBar { + tracked_lines: 10, + covered_lines: 8, + partial_lines: 2, + multi_type_lines: 3, + mutant_backed_lines: 4, + coverage_percent: 80.0, + }); - let (strong, weak) = coverage_bar_widths(10, 8, 80.0, 8, 2); + assert!(html.contains("line-quality-bar")); + assert!(html.contains("coverage-track")); + assert!(html.contains("mutant-track")); + assert!(html.contains("coverage-partial")); + } - assert_eq!(strong, 60.0); - assert_eq!(weak, 20.0); + #[test] + fn dashboard_renders_collapsible_risks_hazards_first_and_stacked_bars() { + let dashboard = UiDashboard { + files: 2, + tracked_lines: 10, + covered_lines: 8, + coverage_percent: 80.0, + active_hazards: 2, + sarif_findings: 7, + evidence_covered_hazards: 2, + hazard_evidence_percent: 100.0, + covered_hazards: 1, + hazard_coverage_percent: 50.0, + mutant_verified_covered_lines: 4, + mutant_verified_covered_percent: 50.0, + mutant_killed_covered_lines: 4, + mutant_killed_covered_percent: 50.0, + stochastic_mutant_verified_covered_lines: 1, + stochastic_mutant_verified_covered_percent: 12.5, + stochastic_mutant_killed_covered_lines: 1, + stochastic_mutant_killed_covered_percent: 12.5, + invariant_mutant_verified_covered_lines: 2, + invariant_mutant_verified_covered_percent: 25.0, + invariant_mutant_killed_covered_lines: 2, + invariant_mutant_killed_covered_percent: 25.0, + multi_type_covered_lines: 3, + multi_type_covered_percent: 37.5, + files_with_coverage: 2, + top_hazard_files: vec![UiFile { + hazards: 2, + ..ui_file_for_sort("zig/runtime/a.zig", 10, 8, 1) + }], + top_units: Vec::new(), + top_architecture_risks: Vec::new(), + warnings: Vec::new(), + }; + let files = dashboard.top_hazard_files.iter().collect::>(); + let branch_context = UiBranchContext { + branch: "feature".to_string(), + commit: "abcdef123456".to_string(), + }; + let html = render_dashboard( + &dashboard, + "", + &[], + &files, + "", + CoverageSort::Path, + &branch_context, + ); + + assert!(html.contains("
")); + assert!(html.contains("

Active Hazards

")); + assert!(html.contains("

Highest Risk Units

")); + assert!(html.contains("

Highest Architectural Risks

")); + assert!(html.contains("class=\"coverage-bar line-quality-bar\"")); + assert!(html.contains("8 of 10 lines covered; 1 partial, 2 missed")); + assert!(!html.contains(">8 covered lines")); + assert!(html.contains("4 mutant-backed / 1 stochastic / 2 invariant")); + assert!(html.contains("class=\"ratio-bar hazard-bar\"")); + assert!(html.contains("Directory entries (0 dirs - 1 files - 7 SARIF findings)")); + assert!(html.contains("class=\"coverage-name-link\">Lines")); + assert!(!html.contains("Mutants")); + assert_eq!(html.matches("class=\"ratio-bar hazard-bar\"").count(), 1); + assert_eq!(html.matches("class=\"ratio-bar mutant-bar\"").count(), 0); + assert!( + html.find("4 mutant-backed / 1 stochastic / 2 invariant").unwrap() + < html.find("Active Hazards").unwrap(), + "mutant detail should live in the top branch-context bar, not between dashboard sections" + ); + assert!( + html.find("Active Hazards").unwrap() < html.find("Directory entries").unwrap(), + "hazards should render above code tree" + ); + assert!( + html.find("Highest Hazard Files").unwrap() < html.find("Highest Risk Units").unwrap(), + "hazard files should render above risk sections" + ); + + let no_hazard = UiDashboard { + active_hazards: 0, + covered_hazards: 0, + evidence_covered_hazards: 0, + top_hazard_files: Vec::new(), + ..dashboard + }; + let hazards = render_active_hazards_section(&no_hazard); + assert!(hazards.contains("
")); + assert!(!hazards.contains(" open")); + assert!(hazards.contains("No active systems hazards are recorded.")); + } + + #[test] + fn branch_context_legend_lists_coverage_states_without_hazard_marker() { + let context = UiBranchContext { + branch: "feature".to_string(), + commit: "abcdef123456".to_string(), + }; + let coverage = UiCoverageContext { + path: "src/demo.rb".to_string(), + tracked_lines: 4, + covered_lines: 3, + partial_lines: 1, + missed_lines: 1, + multi_type_lines: 1, + mutant_backed_lines: 1, + stochastic_mutant_backed_lines: 1, + invariant_mutant_backed_lines: 0, + coverage_percent: 75.0, + }; + let html = render_branch_context(&context, &coverage, ""); + + assert!(html.contains("coverage-multi\" style=\"width:25.000%")); + assert!(html.contains("Multi-covered")); + assert!(html.contains(">covered")); + assert!(html.contains(">partial")); + assert!(html.contains(">missed")); + assert!(!html.contains("legend-alert")); + assert!(!html.contains(">hazard")); + assert!(html.find("Multi-covered").unwrap() < html.find(">covered").unwrap()); + assert!(html.find(">covered").unwrap() < html.find(">partial").unwrap()); + assert!(html.find(">partial").unwrap() < html.find(">missed").unwrap()); } #[test] @@ -8365,44 +9209,32 @@ mod tests { } #[test] - fn sorted_table_files_includes_descendant_files_and_sorts_by_metrics() { + fn sorted_code_tree_entries_list_immediate_directories_before_files() { let files = vec![ ui_file_for_sort("src/a.rb", 10, 9, 1), ui_file_for_sort("src/internal/b.rb", 20, 10, 2), ui_file_for_sort("src/internal/deeper/c.rb", 4, 4, 0), ui_file_for_sort("zig/runtime/a.zig", 8, 1, 0), ]; + let directories = directory_index(&files, "src"); + let files = files_in_directory(&files, "src"); - let by_path = sorted_table_files(&files, "", "src", CoverageSort::Path) - .into_iter() - .map(|file| file.path.as_str()) - .collect::>(); - let by_missed = sorted_table_files(&files, "", "src", CoverageSort::Missed) + let by_path = sorted_code_tree_entries(&directories, &files, CoverageSort::Path) .into_iter() - .map(|file| file.path.as_str()) + .map(|entry| entry.name().to_string()) .collect::>(); - let by_percent = sorted_table_files(&files, "", "src", CoverageSort::Percent) + let by_missed = sorted_code_tree_entries(&directories, &files, CoverageSort::Missed) .into_iter() - .map(|file| file.path.as_str()) + .map(|entry| entry.name().to_string()) .collect::>(); - let filtered = sorted_table_files(&files, "deeper", "src", CoverageSort::Path) + let by_percent = sorted_code_tree_entries(&directories, &files, CoverageSort::Percent) .into_iter() - .map(|file| file.path.as_str()) + .map(|entry| entry.name().to_string()) .collect::>(); - assert_eq!( - by_path, - vec!["src/a.rb", "src/internal/b.rb", "src/internal/deeper/c.rb"] - ); - assert_eq!( - by_missed, - vec!["src/internal/b.rb", "src/a.rb", "src/internal/deeper/c.rb"] - ); - assert_eq!( - by_percent, - vec!["src/internal/deeper/c.rb", "src/a.rb", "src/internal/b.rb"] - ); - assert_eq!(filtered, vec!["src/internal/deeper/c.rb"]); + assert_eq!(by_path, vec!["src/internal", "src/a.rb"]); + assert_eq!(by_missed, vec!["src/internal", "src/a.rb"]); + assert_eq!(by_percent, vec!["src/a.rb", "src/internal"]); } #[test] @@ -8446,7 +9278,7 @@ mod tests { ); storage.upsert_logical_unit(&unit, 10).unwrap(); storage - .record_coverage_line("abc", 10, "src/a.rb", 1, 1) + .record_coverage_line("abc", 10, "src/a.rb", 1, 2) .unwrap(); storage .record_coverage_line("abc", 10, "src/a.rb", 2, 0) @@ -8491,11 +9323,13 @@ mod tests { assert_eq!(files[0].tracked_lines, 2); assert_eq!(files[0].covered_lines, 1); assert_eq!(files[0].mutant_killed_covered_lines, 1); + assert_eq!(files[0].multi_type_covered_lines, 1); let dashboard = dashboard_summary(&storage).unwrap(); assert_eq!(dashboard.files, 1); assert_eq!(dashboard.tracked_lines, 2); assert_eq!(dashboard.covered_lines, 1); + assert_eq!(dashboard.multi_type_covered_lines, 1); assert_eq!(dashboard.mutant_killed_covered_percent, 100.0); } @@ -8565,6 +9399,8 @@ flags: distinct_tests: 0, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: Some(1), line_coverage: None, mutant_coverage: None, @@ -8631,6 +9467,8 @@ flags: distinct_tests: 0, mutant_verified_tests: 0, mutant_killed_tests: 0, + stochastic_mutant_verified_tests: 0, + invariant_mutant_verified_tests: 0, line_hits: Some(1), line_coverage: None, mutant_coverage: None, diff --git a/gems/lineage/ui/assets/app.css b/gems/lineage/ui/assets/app.css index 3c593af98..e55cb610a 100644 --- a/gems/lineage/ui/assets/app.css +++ b/gems/lineage/ui/assets/app.css @@ -10,8 +10,14 @@ --hazard: #b42318; --dark-arm: #374151; --dark-arm-bg: rgba(31, 41, 55, 0.22); + --link: #1d4ed8; } * { box-sizing: border-box; } + a { + color: var(--link); + text-decoration: none; + } + a:hover { text-decoration: underline; } body { margin: 0; background: var(--bg); @@ -41,7 +47,7 @@ h2 { margin: 0 0 10px; font-size: 13px; letter-spacing: 0; } .subtle { color: var(--muted); font-size: 12px; } .nav-links { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 6px; } - .home-link { color: #1d4ed8; font-size: 12px; text-decoration: none; } + .home-link { color: var(--link); font-size: 12px; text-decoration: none; } .toolbar { display: flex; gap: 8px; padding: 10px 14px; border-bottom: 1px solid var(--line); } input { width: 100%; @@ -69,11 +75,12 @@ gap: 8px; border-radius: 6px; padding: 7px 8px; - color: var(--text); + color: var(--link); text-decoration: none; } .file:hover, .file.active { background: #eef2f7; } - .dir-up { color: var(--muted); } + .file:hover .file-path { text-decoration: underline; } + .dir-up { color: var(--link); } .file-path { overflow: hidden; text-overflow: ellipsis; @@ -93,19 +100,28 @@ } .coverage-pill { color: #166534; background: rgba(34, 197, 94, 0.08); } .coverage-bar { - display: flex; - height: 8px; + display: grid; + grid-template-rows: 1fr 1fr; + height: 12px; min-width: 130px; - border-radius: 999px; - background: rgba(148, 163, 184, 0.18); + border: 1px solid rgba(100, 116, 139, 0.22); + border-radius: 0; + background: #fff; overflow: hidden; } .coverage-bar span { display: block; height: 100%; } - .coverage-strong { background: rgba(22, 101, 52, 0.54); } - .coverage-weak { background: rgba(34, 197, 94, 0.18); } + .coverage-track, + .mutant-track { + display: flex !important; + min-width: 0; + } + .coverage-multi { background: rgba(20, 83, 45, 0.72); } + .coverage-covered { background: rgba(34, 197, 94, 0.24); } + .coverage-partial { background: rgba(31, 41, 55, 0.22); } + .coverage-missed { background: transparent; } .outline { border-top: 1px solid var(--line); padding: 8px; @@ -129,12 +145,16 @@ grid-template-columns: 30px 36px minmax(0, 1fr); gap: 6px; padding: 4px 2px; - color: var(--text); + color: var(--link); text-decoration: none; font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; font-size: 11px; } - .outline a:hover { background: #eef2f7; } + .outline a:hover { + background: #eef2f7; + text-decoration: none; + } + .outline a:hover .outline-name { text-decoration: underline; } .outline-kind { color: var(--muted); } .outline-rail { display: grid; @@ -162,9 +182,17 @@ .hotspot-light-red .outline-hotspot { background: rgba(248, 113, 113, 0.66); } .hotspot-red .outline-hotspot { background: rgba(185, 28, 28, 0.78); } .hotspot-deep-red .outline-hotspot { background: rgba(127, 29, 29, 0.92); } - .pure-symbol .outline-name { color: #166534; } - .impure-symbol .outline-name { color: #7f1d1d; } - .outline-name { min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } + .outline-depth-1 { padding-left: 14px; } + .outline-depth-2 { padding-left: 28px; } + .outline-depth-3 { padding-left: 42px; } + .outline-depth-4 { padding-left: 56px; } + .outline-name { + color: var(--link); + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } main { min-width: 0; min-height: 0; overflow: hidden; display: flex; flex-direction: column; } .topbar { display: grid; @@ -347,6 +375,21 @@ min-width: 220px; margin-top: 4px; } + .branch-summary-foot { + display: flex; + justify-content: space-between; + gap: 12px; + color: var(--muted); + font-size: 12px; + line-height: 1.2; + min-width: 220px; + } + .branch-summary-foot span { + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } .coverage-on-branch strong { color: #166534; font-size: 26px; @@ -390,40 +433,14 @@ .legend-swatch { inline-size: 14px; block-size: 8px; - border-radius: 999px; + border-radius: 0; display: inline-block; border: 1px solid rgba(100, 116, 139, 0.18); } - .legend-uncovered { background: rgba(148, 163, 184, 0.18); } + .legend-multi { background: rgba(20, 83, 45, 0.72); } + .legend-missed { background: #fff; } .legend-partial { background: rgba(31, 41, 55, 0.22); } - .legend-covered { background: rgba(34, 197, 94, 0.14); } - .legend-alert { - display: inline-flex; - align-items: center; - justify-content: center; - inline-size: 14px; - block-size: 14px; - border-radius: 999px; - background: #7f1d1d; - color: #fff; - font-size: 10px; - font-style: normal; - font-weight: 700; - } - .metric-grid { - display: grid; - grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); - gap: 10px; - } - .metric { - border: 1px solid var(--line); - border-radius: 6px; - background: #fff; - padding: 12px; - } - .metric div { color: var(--muted); font-size: 12px; } - .metric strong { display: block; margin-top: 4px; font-size: 24px; letter-spacing: 0; } - .metric p { margin: 4px 0 0; color: var(--muted); font-size: 12px; } + .legend-covered { background: rgba(34, 197, 94, 0.24); } .warning-banner { display: grid; gap: 8px; @@ -470,17 +487,80 @@ border-top: 1px solid var(--line); padding-top: 14px; } - .hazard-bar { + .dashboard-disclosure { + padding-top: 0; + } + .dashboard-disclosure summary { + display: flex; + align-items: center; + gap: 6px; + min-height: 28px; + padding-top: 14px; + cursor: pointer; + list-style: none; + } + .dashboard-disclosure summary::-webkit-details-marker { display: none; } + .dashboard-disclosure h2 { margin: 0; } + .dashboard-disclosure-arrow::before { + content: ">"; + display: inline-block; + inline-size: 12px; + text-align: center; + transform: rotate(0deg); + transition: transform 120ms ease; + } + .dashboard-disclosure[open] .dashboard-disclosure-arrow::before { + transform: rotate(90deg); + } + .dashboard-section-body { + margin-top: 10px; + } + .dashboard-bar-row { + display: grid; + gap: 4px; + } + .dashboard-bar-head, + .dashboard-bar-foot { + display: flex; + justify-content: space-between; + gap: 12px; + color: var(--muted); + font-size: 12px; + } + .dashboard-bar-head strong { + color: var(--text); + } + .dashboard-bar-head span, + .dashboard-bar-foot span { + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + .ratio-bar { + display: flex; height: 8px; - max-width: 520px; - border-radius: 999px; - background: rgba(180, 35, 24, 0.14); + min-width: 100%; + border: 1px solid rgba(100, 116, 139, 0.22); + border-radius: 0; + background: #fff; overflow: hidden; } - .hazard-bar span { + .ratio-bar span { display: block; height: 100%; - background: #166534; + } + .ratio-covered { + background: rgba(34, 197, 94, 0.36); + } + .ratio-missed { + background: transparent; + } + .hazard-bar .ratio-covered { + background: rgba(22, 101, 52, 0.54); + } + .hazard-bar { + max-width: 520px; } .dashboard-files { display: grid; @@ -578,14 +658,20 @@ gap: 5px; align-items: center; justify-content: flex-end; - color: inherit; + color: var(--link); text-decoration: none; } .name-col .sort-link { justify-content: flex-start; } - .sort-link:hover { color: #1d4ed8; } - .active-sort { color: var(--text); } + .sort-link:hover { + color: var(--link); + text-decoration: underline; + } + .active-sort { + color: var(--link); + font-weight: 700; + } .sort-marker { border: 1px solid var(--line); border-radius: 999px; @@ -611,13 +697,30 @@ display: grid; gap: 2px; } - .coverage-name a, + .coverage-name a { + color: var(--link); + text-decoration: none; + font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + } .coverage-name span { color: var(--text); text-decoration: none; font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; } - .coverage-name a:hover { color: #1d4ed8; } + .coverage-name a:hover { text-decoration: underline; } + .coverage-name-link { + display: inline-flex; + align-items: center; + gap: 6px; + min-width: 0; + } + .coverage-name-link span { + color: inherit; + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } .coverage-name small { color: var(--muted); font-weight: 400; @@ -869,10 +972,11 @@ } .finding-panel a, .finding-panel strong { - color: #334155; font-weight: 700; text-decoration: none; } + .finding-panel a { color: var(--link); } + .finding-panel strong { color: #334155; } .finding-panel a:hover { text-decoration: underline; } .finding-tier { color: var(--muted); @@ -942,12 +1046,15 @@ min-height: 30px; padding: 6px 16px; border-bottom: 1px solid var(--line); - color: var(--text); + color: var(--link); text-decoration: none; font-size: 12px; } .history-row:last-child { border-bottom: 0; } - .history-row:hover { background: #f8fafc; } + .history-row:hover { + background: #f8fafc; + text-decoration: underline; + } .history-row code, .history-row span:nth-child(4), .history-row span:nth-child(5) { diff --git a/gems/lineage/ui/templates/app.html b/gems/lineage/ui/templates/app.html new file mode 100644 index 000000000..a9b4d3c1f --- /dev/null +++ b/gems/lineage/ui/templates/app.html @@ -0,0 +1,8 @@ +
+ + {{ sidebar|safe }} + +
+ {{ main|safe }} +
+
diff --git a/gems/lineage/ui/templates/branch_context.html b/gems/lineage/ui/templates/branch_context.html new file mode 100644 index 000000000..bc59acf81 --- /dev/null +++ b/gems/lineage/ui/templates/branch_context.html @@ -0,0 +1,31 @@ +
+
+
+
Branch Context
+ {{ branch }} + Source: latest commit {{ commit }} +
+
+ Coverage on branch + {{ coverage_percent }}% + {{ covered_lines }} of {{ tracked_lines }} lines covered; {{ partial_lines }} partial, {{ missed_lines }} missed + + {{ line_quality_bar|safe }} + +
+ {{ mutant_backed_lines }} mutant-backed / {{ stochastic_mutant_backed_lines }} stochastic / {{ invariant_mutant_backed_lines }} invariant +
+
+
+
+
+ {{ breadcrumbs|safe }} +
+
+ Multi-covered + covered + partial + missed +
+
+
diff --git a/gems/lineage/ui/templates/coverage_table.html b/gems/lineage/ui/templates/coverage_table.html new file mode 100644 index 000000000..6cd81db9a --- /dev/null +++ b/gems/lineage/ui/templates/coverage_table.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + {% if empty %} + + {% else %} + {{ rows|safe }} + {% endif %} + + + {{ subtotal|safe }} + +
{{ name_header|safe }}{{ total_header|safe }}{{ covered_header|safe }}{{ partial_header|safe }}{{ missed_header|safe }}Coverage{{ percent_header|safe }}
No tracked files in this directory.
diff --git a/gems/lineage/ui/templates/dashboard.html b/gems/lineage/ui/templates/dashboard.html new file mode 100644 index 000000000..ff9195a32 --- /dev/null +++ b/gems/lineage/ui/templates/dashboard.html @@ -0,0 +1,14 @@ +
+
+ {{ branch_context|safe }} + {{ warnings|safe }} + {{ active_hazards|safe }} + {{ highest_hazard_files|safe }} + {{ highest_risk_units|safe }} + {{ highest_architecture_risks|safe }} +
+

{{ code_tree_heading }}

+ {{ code_tree|safe }} +
+
+
diff --git a/gems/lineage/ui/templates/dashboard_disclosure.html b/gems/lineage/ui/templates/dashboard_disclosure.html new file mode 100644 index 000000000..3ac90f859 --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_disclosure.html @@ -0,0 +1,9 @@ +
+ + +

{{ title }}

+
+
+ {{ body|safe }} +
+
diff --git a/gems/lineage/ui/templates/dashboard_hazard_files.html b/gems/lineage/ui/templates/dashboard_hazard_files.html new file mode 100644 index 000000000..70321575a --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_hazard_files.html @@ -0,0 +1,15 @@ +{% if files.len() == 0 %} +

No hazard-heavy files to show.

+{% else %} +
+ {% for file in files %} + + + {{ file.path }} + {{ file.detail }} + + {{ file.hazards }} + + {% endfor %} +
+{% endif %} diff --git a/gems/lineage/ui/templates/dashboard_ratio_bar.html b/gems/lineage/ui/templates/dashboard_ratio_bar.html new file mode 100644 index 000000000..a248b2d0d --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_ratio_bar.html @@ -0,0 +1,11 @@ +
+
+ {{ label }} + {{ detail }} +
+ {{ bar|safe }} +
+ {{ total }} {{ total_label }} + {{ covered }} {{ covered_label }} +
+
diff --git a/gems/lineage/ui/templates/dashboard_sidebar.html b/gems/lineage/ui/templates/dashboard_sidebar.html new file mode 100644 index 000000000..a7602addc --- /dev/null +++ b/gems/lineage/ui/templates/dashboard_sidebar.html @@ -0,0 +1,16 @@ +
+

Lineage

+
{{ summary }}
+ {{ nav|safe }} +
+
+ {% if show_directory_input %} + + {% endif %} + + + {{ search_options|safe }} +
+ diff --git a/gems/lineage/ui/templates/hotspot_list.html b/gems/lineage/ui/templates/hotspot_list.html new file mode 100644 index 000000000..9d95634b4 --- /dev/null +++ b/gems/lineage/ui/templates/hotspot_list.html @@ -0,0 +1,17 @@ +{% if items.len() == 0 %} +

{{ empty_message }}

+{% else %} + +{% endif %} diff --git a/gems/lineage/ui/templates/layers_menu.html b/gems/lineage/ui/templates/layers_menu.html new file mode 100644 index 000000000..7c568919f --- /dev/null +++ b/gems/lineage/ui/templates/layers_menu.html @@ -0,0 +1,31 @@ +
+ + + Layers + +
+ + + + +
+
diff --git a/gems/lineage/ui/templates/source_sidebar.html b/gems/lineage/ui/templates/source_sidebar.html new file mode 100644 index 000000000..dca03e135 --- /dev/null +++ b/gems/lineage/ui/templates/source_sidebar.html @@ -0,0 +1,13 @@ +
+

Lineage

+
{{ path }}
+ {{ nav|safe }} +
+{% if show_empty_outline %} + +{% else %} + {{ outline|safe }} +{% endif %} diff --git a/gems/lineage/ui/templates/source_unavailable.html b/gems/lineage/ui/templates/source_unavailable.html new file mode 100644 index 000000000..e7f498908 --- /dev/null +++ b/gems/lineage/ui/templates/source_unavailable.html @@ -0,0 +1,9 @@ +
+
+
Source unavailable
+
{{ error }}
+
+
+
+
The selected path is not available in the current checkout. Regenerate coverage for HEAD or open a historical commit view.
+
diff --git a/gems/lineage/ui/templates/source_view.html b/gems/lineage/ui/templates/source_view.html new file mode 100644 index 000000000..8b322a635 --- /dev/null +++ b/gems/lineage/ui/templates/source_view.html @@ -0,0 +1,29 @@ +
+ + + + + + +
+
+
{{ path }}
+
{{ summary }}
+
+
+
+ + +
+ {{ layers_menu|safe }} +
+
+
+ {{ branch_context|safe }} + {{ warnings|safe }} +
+ {{ code_lines|safe }} +
+
+ {{ history|safe }} +
diff --git a/gems/lineage/ui/templates/warning_banner.html b/gems/lineage/ui/templates/warning_banner.html new file mode 100644 index 000000000..edaa8ec69 --- /dev/null +++ b/gems/lineage/ui/templates/warning_banner.html @@ -0,0 +1,12 @@ +{% if warnings.len() > 0 %} +
+ {% for warning in warnings %} + +
+ + {{ warning.label }} +

{{ warning.detail }}

+
+ {% endfor %} +
+{% endif %} diff --git a/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb b/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb index 5d48ae0c3..eee2556c7 100644 --- a/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb +++ b/gems/nil-kill/lib/nil_kill/commands/normalize_command.rb @@ -12,10 +12,10 @@ def run static_path = option("--static") || abort("normalize requires --static PATH") output = option("--output") || File.join(TMP_DIR, "evidence.json") analyze = !@argv.delete("--no-analyze") - traces = options("--traces") - traces = [RUNTIME_DIR] if traces.empty? + explicit_traces = options("--traces") static = JSON.parse(File.read(static_path)) root = File.expand_path(option("--root") || static["root"] || ROOT) + traces = explicit_traces.empty? ? default_trace_paths(static) : explicit_traces bundle = Runtime::Normalizer.new(root: root).normalize(static: static, trace_paths: traces, analyze: analyze) FileUtils.mkdir_p(File.dirname(output)) File.write(output, JSON.pretty_generate(bundle)) @@ -41,6 +41,24 @@ def options(name) def option(name) options(name).last end + + def default_trace_paths(static) + languages = static_languages(static) + return [RUNTIME_DIR] if languages.empty? || languages == ["ruby"] + + [] + end + + def static_languages(static) + canonical = Schema::EvidenceBundle.canonical_static(static) + languages = [] + languages.concat(Array(static["languages"])) if static.is_a?(Hash) + languages.concat(Hash(canonical["language_capabilities"]).keys) + Array(canonical["files"]).each { |file| languages << file["language"] } + Array(canonical["methods"]).each { |method| languages << (method["language"] || method["lang"]) } + Array(canonical["fields"]).each { |field| languages << field["language"] } + languages.map(&:to_s).reject(&:empty?).uniq.sort + end end end end diff --git a/gems/nil-kill/lib/nil_kill/report.rb b/gems/nil-kill/lib/nil_kill/report.rb index 9dffa10a8..e0c3cd336 100644 --- a/gems/nil-kill/lib/nil_kill/report.rb +++ b/gems/nil-kill/lib/nil_kill/report.rb @@ -241,13 +241,21 @@ def sarif_rules(evidence) short_description: "Nil-Kill static analysis signal" ) end - action_rules + diagnostic_rules + static_rules + pressure_rules = sarif_pressure_findings(evidence).map { |finding| finding.fetch("kind") }.uniq.map do |kind| + Decomplex::Sarif.rule( + id: "nil-kill.pressure.#{Decomplex::Sarif.slug(kind)}", + name: "Pressure: #{kind.tr("_", " ")}", + short_description: "Nil-Kill pressure signal" + ) + end + action_rules + diagnostic_rules + static_rules + pressure_rules end def sarif_results(evidence) sarif_actions(evidence).map { |action| sarif_action_result(action, evidence) } + sarif_diagnostics(evidence).map { |diagnostic| sarif_diagnostic_result(diagnostic) } + - sarif_static_findings(evidence).map { |finding| sarif_static_result(finding) } + sarif_static_findings(evidence).map { |finding| sarif_static_result(finding) } + + sarif_pressure_findings(evidence).map { |finding| sarif_pressure_result(finding) } end def sarif_actions(evidence) @@ -283,7 +291,8 @@ def static_method_findings(method) findings << { "kind" => "untyped_signature", "level" => "warning", - "message" => "static signature includes an untyped or unknown type for #{static_member_label(method)}", + "message" => "untyped signature pressure: #{static_member_label(method)} has `#{signature}`; " \ + "replace Any/T.untyped/unknown with the narrowest contract to stop downstream type guards", "path" => method["path"], "line" => method["line"], "static_kind" => method["kind"] || "method", @@ -297,7 +306,8 @@ def static_method_findings(method) findings << { "kind" => "nullable_signature", "level" => "note", - "message" => "static signature includes a nullable type for #{static_member_label(method)}", + "message" => "nilability pressure: #{static_member_label(method)} has `#{signature}`; " \ + "confirm absence is meaningful, otherwise tighten the contract or use an empty collection/value", "path" => method["path"], "line" => method["line"], "static_kind" => method["kind"] || "method", @@ -317,7 +327,8 @@ def static_field_finding(field) { "kind" => "untyped_field", "level" => "warning", - "message" => "static field has no precise type for #{static_member_label(field)}", + "message" => "untyped field pressure: #{static_member_label(field)} has no precise static type; " \ + "add a declared field type or typed initializer so readers do not need guards", "path" => field["path"], "line" => field["line"], "static_kind" => field["kind"] || "field", @@ -336,9 +347,9 @@ def static_nullable_signature?(signature) text = signature.to_s text.match?(/\bT\.nilable\b/) || text.match?(/\bOptional\s*\[/) || - text.match?(/\bNone\b/) || text.match?(/\bnull\b/) || text.match?(/\bundefined\b/) || + text.match?(/\bNone\s*\|/) || text.match?(/\|\s*(?:None|null|undefined)\b/) end @@ -351,6 +362,77 @@ def static_member_label(member) "#{owner}##{name}" end + def sarif_pressure_findings(evidence) + hidden_enum_pressure_findings(evidence) + + fallibility_pressure_findings(evidence) + + primitive_record_pressure_findings(evidence) + end + + def hidden_enum_pressure_findings(evidence) + Array(evidence.dig("facts", "hidden_enum_pressure")).map do |row| + values = Array(row["values"]).first(10).join(", ") + label = pressure_member_label(row) + { + "kind" => "hidden_enum", + "level" => row["confidence"].to_s == "high" ? "warning" : "note", + "message" => "hidden enum pressure: #{label} #{row["kind"]} `#{row["slot"]}` has values #{values}; " \ + "decision pressure #{row["decision_pressure"].to_i}, score #{row["score"].to_i}; " \ + "#{row["suggestion"]}", + "path" => row["path"], + "line" => row["line"], + "pressure" => row, + } + end + end + + def fallibility_pressure_findings(evidence) + fallibility_display_rows(Array(evidence.dig("facts", "fallibility_pressure"))).map do |row| + runtime = row["runtime"] || {} + raised = "#{runtime["raised_calls"].to_i}/#{runtime["calls"].to_i}" + classes = Array(runtime["raised_classes"]).first(4).join(", ") + class_text = classes.empty? ? "" : "; raised #{classes}" + { + "kind" => "fallibility", + "level" => row["handler_pressure"].to_i.positive? || runtime["raised_calls"].to_i.positive? ? "warning" : "note", + "message" => "fallibility pressure: #{row["label"]} score #{row["score"].to_i}; " \ + "direct sources #{Array(row["direct_sources"]).size}; runtime raises #{raised} " \ + "(#{runtime["raised_rate"].to_f}%#{class_text}); handlers #{row["handler_pressure"].to_i}; " \ + "unhandled callers #{Array(row["fallible_callers"]).size}", + "path" => row["path"], + "line" => row["line"], + "pressure" => row, + } + end + end + + def primitive_record_pressure_findings(evidence) + hash_record_struct_pressure(evidence).map do |row| + location = parse_location(Array(row["examples"]).first) + keys = Array(row["keys"]).first(10).join(", ") + { + "kind" => "primitive_record", + "level" => row["total_pressure"].to_i >= 3 ? "warning" : "note", + "message" => "primitive record pressure: #{row["label"]} behaves like an ad-hoc struct; " \ + "total pressure #{row["total_pressure"].to_i} " \ + "(return #{row["return_slots"].to_i}, param #{row["param_slots"].to_i}, " \ + "ivar #{row["ivar_slots"].to_i}, collection #{row["collection_slots"].to_i}); keys #{keys}", + "path" => location[:path], + "line" => location[:line], + "pressure" => row, + } + end + end + + def pressure_member_label(row) + owner = row["owner"].to_s + method = row["method"].to_s + return owner if method.empty? + return method if owner.empty? + + separator = row["method_kind"] == "class" ? "." : "#" + "#{owner}#{separator}#{method}" + end + def sarif_action_result(action, evidence) kind = action["kind"].to_s.empty? ? "action" : action["kind"].to_s Decomplex::Sarif.result( @@ -393,6 +475,20 @@ def sarif_static_result(finding) ) end + def sarif_pressure_result(finding) + kind = finding["kind"].to_s.empty? ? "pressure" : finding["kind"].to_s + Decomplex::Sarif.result( + rule_id: "nil-kill.pressure.#{Decomplex::Sarif.slug(kind)}", + level: finding["level"] || "note", + message: finding["message"] || kind, + path: finding["path"], + line: finding["line"], + properties: Decomplex::Sarif.json_safe_value(finding).merge( + "source_format" => "nil-kill.pressure" + ) + ) + end + def sarif_action_level(action) case action["confidence"].to_s when HIGH then "warning" diff --git a/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb b/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb index 615e85ef2..16ddb5b8d 100644 --- a/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb +++ b/gems/nil-kill/lib/nil_kill/runtime/trace_loader.rb @@ -4,23 +4,30 @@ module NilKill module Runtime class TraceLoader + MAX_DIAGNOSTICS_PER_FILE_CODE = 20 + def initialize(paths) @paths = Array(paths).flatten.compact end def each_event event_files.each do |file| + diagnostics = Hash.new(0) File.foreach(file).with_index(1) do |line, line_no| next if line.strip.empty? event = JSON.parse(line) unless event.is_a?(Hash) && event["event"] - yield nil, diagnostic(file, line_no, "not_raw_trace_event", "JSONL row is not a Raw Runtime Trace Event v1 object") + yield_limited_diagnostic(diagnostics, file, line_no, "not_raw_trace_event", + "JSONL row is not a Raw Runtime Trace Event v1 object") { |diagnostic| yield nil, diagnostic } + break if diagnostics["not_raw_trace_event"] > MAX_DIAGNOSTICS_PER_FILE_CODE next end yield event, nil rescue JSON::ParserError => e - yield nil, diagnostic(file, line_no, "invalid_json", e.message) + yield_limited_diagnostic(diagnostics, file, line_no, "invalid_json", e.message) do |diagnostic| + yield nil, diagnostic + end end end end @@ -37,6 +44,16 @@ def event_files private + def yield_limited_diagnostic(counts, file, line_no, code, message) + counts[code] += 1 + if counts[code] <= MAX_DIAGNOSTICS_PER_FILE_CODE + yield diagnostic(file, line_no, code, message) + elsif counts[code] == MAX_DIAGNOSTICS_PER_FILE_CODE + 1 + yield diagnostic(file, line_no, "#{code}_suppressed", + "suppressed additional #{code} diagnostics for this trace file") + end + end + def diagnostic(file, line_no, code, message) { "severity" => "warning", diff --git a/gems/nil-kill/spec/multi_language_runtime_spec.rb b/gems/nil-kill/spec/multi_language_runtime_spec.rb index c36da4fdc..3dbdbbe39 100644 --- a/gems/nil-kill/spec/multi_language_runtime_spec.rb +++ b/gems/nil-kill/spec/multi_language_runtime_spec.rb @@ -119,6 +119,31 @@ def fetch(self, value: str | None) -> str | None: ... end end + it "does not report Python None-only returns as nullable signatures" do + report = NilKill::Report.allocate + + void_method = { + "language" => "python", + "path" => "src/worker.py", + "owner" => "Worker", + "name" => "call", + "kind" => "method", + "line" => 10, + "signature" => "def call(self, value: str) -> None:", + } + maybe_method = void_method.merge( + "name" => "fetch", + "line" => 20, + "signature" => "def fetch(self, value: str | None) -> str | None:", + ) + + void_findings = report.send(:static_method_findings, void_method) + maybe_findings = report.send(:static_method_findings, maybe_method) + + expect(void_findings.map { |finding| finding["kind"] }).not_to include("nullable_signature") + expect(maybe_findings.map { |finding| finding["kind"] }).to include("nullable_signature") + end + it "uses TypeScript provider annotations when building Tree-sitter static evidence" do grammar = ENV["DECOMPLEX_TS_TYPESCRIPT_PATH"] skip "set DECOMPLEX_TS_TYPESCRIPT_PATH to run TypeScript Tree-sitter static evidence test" unless grammar && File.file?(grammar) @@ -185,6 +210,38 @@ class Worker { end end + it "keeps Go name-type struct fields typed in static evidence" do + grammar = ENV["DECOMPLEX_TS_GO_PATH"] + skip "set DECOMPLEX_TS_GO_PATH to run Go Tree-sitter static evidence test" unless grammar && File.file?(grammar) + + Dir.mktmpdir("nil-kill-go-static", NilKill::ROOT) do |dir| + src = File.join(dir, "src") + FileUtils.mkdir_p(src) + File.write(File.join(src, "slab.go"), <<~GO) + package util + + type Slab struct { + I16 []int16 + Count int + } + GO + + evidence = NilKill::StaticEvidence.build([src], root: dir) + fields = evidence.fetch("fields") + report = NilKill::Report.allocate + + expect(evidence.dig("facts", "state_types", "Slab\u0000I16")).to eq("[]int16") + expect(evidence.dig("facts", "state_types", "Slab\u0000Count")).to eq("int") + expect(fields).to include(a_hash_including( + "language" => "go", + "name" => "I16", + "declared_type" => "[]int16" + )) + expect(report.send(:static_field_finding, fields.find { |field| field["name"] == "I16" })).to be_nil + expect(report.send(:static_field_finding, fields.find { |field| field["name"] == "Count" })).to be_nil + end + end + it "exposes provider capabilities from trace-spec" do spec = NilKill::Commands::TraceSpecCommand.new([]).spec languages = spec.fetch("language_capabilities").to_h { |cap| [cap.fetch("language"), cap] } @@ -327,6 +384,76 @@ def call(self, value): end end + it "does not default non-Ruby static normalization to stale legacy runtime files" do + Dir.mktmpdir("nil-kill-python-no-default-traces", NilKill::ROOT) do |dir| + static_path = File.join(dir, "static.json") + output_path = File.join(dir, "evidence.json") + FileUtils.mkdir_p(NilKill::RUNTIME_DIR) + File.write(File.join(NilKill::RUNTIME_DIR, "collections-stale.jsonl"), + 30.times.map { JSON.generate("legacy" => true) }.join("\n") + "\n") + File.write(static_path, JSON.pretty_generate( + "files" => [{"path" => "pkg/user.py", "language" => "python", "digest" => "sha256:test"}], + "methods" => [], + "fields" => [], + "language_capabilities" => {"python" => NilKill::Languages.capability_for("python")} + )) + + NilKill::Commands::NormalizeCommand.new(["--static", static_path, "--output", output_path, "--no-analyze"]).run + evidence = JSON.parse(File.read(output_path)) + + expect(evidence["languages"]).to eq(["python"]) + expect(evidence["diagnostics"]).to eq([]) + expect(evidence.dig("metadata", "trace_files")).to eq([]) + end + end + + it "keeps Ruby-only normalization defaulting to the legacy runtime directory" do + Dir.mktmpdir("nil-kill-ruby-default-traces", NilKill::ROOT) do |dir| + static_path = File.join(dir, "static.json") + output_path = File.join(dir, "evidence.json") + FileUtils.mkdir_p(NilKill::RUNTIME_DIR) + File.write(File.join(NilKill::RUNTIME_DIR, "events.jsonl"), JSON.generate( + "schema_version" => 1, + "event" => "process_start", + "language" => "ruby", + "run_id" => "run-1", + "pid" => 1, + "thread_id" => "main", + "timestamp_ns" => 1, + "path" => "src/demo.rb", + "line" => 1, + "payload" => {} + ) + "\n") + File.write(static_path, JSON.pretty_generate( + "files" => [{"path" => "src/demo.rb", "language" => "ruby", "digest" => "sha256:test"}], + "methods" => [], + "fields" => [] + )) + + NilKill::Commands::NormalizeCommand.new(["--static", static_path, "--output", output_path, "--no-analyze"]).run + evidence = JSON.parse(File.read(output_path)) + + expect(evidence["languages"]).to eq(["ruby"]) + expect(evidence.dig("runtime", "runs")).to include(a_hash_including("run_id" => "run-1")) + expect(evidence.dig("metadata", "trace_files")).not_to be_empty + end + end + + it "caps incompatible JSONL diagnostics per trace file" do + Dir.mktmpdir("nil-kill-bad-traces", NilKill::ROOT) do |dir| + trace_path = File.join(dir, "collections-stale.jsonl") + File.write(trace_path, 40.times.map { JSON.generate("legacy" => true) }.join("\n") + "\n") + + diagnostics = [] + NilKill::Runtime::TraceLoader.new([trace_path]).each_event do |_event, diagnostic| + diagnostics << diagnostic if diagnostic + end + + expect(diagnostics.count { |diagnostic| diagnostic["code"] == "not_raw_trace_event" }).to eq(20) + expect(diagnostics).to include(a_hash_including("code" => "not_raw_trace_event_suppressed")) + end + end + it "keeps legacy Ruby runtime loading behind the normalizer boundary" do Dir.mktmpdir("nil-kill-legacy-runtime", NilKill::ROOT) do |dir| source = File.join(dir, "sample.rb") diff --git a/gems/nil-kill/spec/nil_kill_spec.rb b/gems/nil-kill/spec/nil_kill_spec.rb index 8e101187f..ad5a852fc 100644 --- a/gems/nil-kill/spec/nil_kill_spec.rb +++ b/gems/nil-kill/spec/nil_kill_spec.rb @@ -286,12 +286,87 @@ def pick "nil-kill.static.nullable-signature", "nil-kill.static.untyped-field", ) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.static.untyped-signature", + "message" => a_hash_including("text" => include("replace Any/T.untyped/unknown")), + )) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.static.nullable-signature", + "message" => a_hash_including("text" => include("nilability pressure")), + )) expect(results).not_to include(a_hash_including( "ruleId" => "nil-kill.static.untyped-field", "message" => a_hash_including("text" => include("CurrentUnitSpan#id")), )) end + it "renders pressure facts as actionable SARIF findings" do + evidence = { + "facts" => { + "hidden_enum_pressure" => [{ + "path" => "src/workflow.rb", + "line" => 10, + "owner" => "Workflow", + "method" => "label", + "method_kind" => "instance", + "kind" => "param", + "slot" => "status", + "confidence" => "high", + "score" => 12, + "values" => %w[:active :pending], + "decision_pressure" => 2, + "runtime" => {"calls" => 5, "classes" => ["Symbol"]}, + "blockers" => [], + "suggestion" => "review for a named Status enum or literal-union contract", + "decisions" => [], + }], + "fallibility_pressure" => [{ + "label" => "Parser#parse", + "path" => "src/parser.rb", + "line" => 12, + "score" => 9, + "direct_sources" => [{"path" => "src/parser.rb", "line" => 15, "kind" => "raise", "code" => "raise ParserError"}], + "runtime" => {"calls" => 20, "ok_calls" => 18, "raised_calls" => 2, "raised_rate" => 10.0, "raised_classes" => ["ParserError"]}, + "fallible_callers" => ["Compiler#run"], + "handler_pressure" => 1, + "exclusive_handlers" => 1, + "shared_handlers" => 0, + "handlers" => [], + }], + "collection_index_lookups" => [{ + "path" => "src/options.rb", + "line" => 8, + "code" => "opts[:mode]", + "receiver" => "opts", + "receiver_type" => "Hash", + "index" => ":mode", + "lookup_type" => "T.untyped", + "status" => "untyped receiver", + }], + "param_origins" => [], + "return_origins" => [], + }, + "actions" => [], + "diagnostics" => [], + } + + sarif = JSON.parse(described_class.new(["--format=sarif"], evidence: evidence).to_sarif(evidence)) + results = sarif.fetch("runs").first.fetch("results") + + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.pressure.hidden-enum", + "message" => a_hash_including("text" => include("hidden enum pressure: Workflow#label param `status`")), + )) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.pressure.fallibility", + "message" => a_hash_including("text" => include("fallibility pressure: Parser#parse")), + )) + expect(results).to include(a_hash_including( + "ruleId" => "nil-kill.pressure.primitive-record", + "message" => a_hash_including("text" => include("primitive record pressure")), + )) + end + it "--hygiene emits only the slot summary and action counts, skipping heavy sections" do Dir.mktmpdir("nil-kill-hygiene-report", NilKill::ROOT) do |dir| report = described_class.new(["--hygiene"]) diff --git a/gems/slopcop/README.md b/gems/slopcop/README.md index 663799e77..622d5a554 100644 --- a/gems/slopcop/README.md +++ b/gems/slopcop/README.md @@ -130,8 +130,9 @@ This is the format Lineage uses for gutter and source overlays. ## Concurrency Hazard Detection / Constraint Reports -`constraints` checks changed files against named coverage constraints, -currently used by CLEAR for Loom and VOPR hazard coverage: +`constraints` checks changed files against named coverage constraints. +It currently supports first-party hazard providers for Zig, Go, Rust, +C, C++, and C#: ```bash bundle exec gems/slopcop/exe/slopcop constraints \ @@ -139,11 +140,14 @@ bundle exec gems/slopcop/exe/slopcop constraints \ --base=origin/master \ --coverage=loom:zig/zig-out/coverage-loom/merged/kcov-merged/cobertura.xml \ --coverage=vopr:zig/zig-out/coverage-vopr/merged/kcov-merged/cobertura.xml \ + --language=zig \ --markdown=/tmp/slopcop-constraints.md \ --json=/tmp/slopcop-constraints.json \ --sarif=/tmp/slopcop-constraints.sarif ``` +Common evidence tags are `loom`, `vopr`, `race`, `concurrency`, +`tsan`, `asan`, `lsan`, `ubsan`, `miri`, and `unsafe`. Findings are advisory unless `--strict` is supplied. ## CI Integration @@ -187,16 +191,19 @@ SlopCop relies on [Boobytrap](../boobytrap/README.md) for branch-arm normalization and [Decomplex](../decomplex/README.md) language lexicons for classifying type/null guards and diagnostic paths. Ruby support has been battle tested to develop the CLEAR compiler. Zig -support is currently being used for CLEAR runtime hazard coverage. Other -languages are currently experimental. +support is currently being used for CLEAR runtime hazard coverage. Go, +Rust, C, C++, and C# hazard providers are experimental. - [x] Ruby: fully supported. - [ ] Python: experimentally supported. - [ ] JavaScript: experimentally supported. - [ ] TypeScript: experimentally supported. -- [ ] Go: experimentally supported. -- [ ] Rust: experimentally supported. - [ ] Zig: experimentally supported. +- [ ] Go: experimentally supported, including concurrency hazards. +- [ ] Rust: experimentally supported, including Loom and unsafe hazards. +- [ ] C: experimentally supported, including sanitizer hazards. +- [ ] C++: experimentally supported, including sanitizer hazards. +- [ ] C#: experimentally supported, including concurrency/unsafe hazards. ## Boundaries diff --git a/gems/slopcop/exe/slopcop b/gems/slopcop/exe/slopcop index a96eb5656..73f115f98 100755 --- a/gems/slopcop/exe/slopcop +++ b/gems/slopcop/exe/slopcop @@ -21,6 +21,7 @@ def usage slopcop constraints [--repo=.] --base=origin/master [--head=HEAD] \\ [--coverage=loom:zig/zig-out/coverage-loom/merged/kcov-merged/cobertura.xml] \\ [--coverage=vopr:zig/zig-out/coverage-vopr/merged/kcov-merged/cobertura.xml] \\ + [--language=zig|go|rust|c|cpp|csharp] \\ [--sarif=slopcop-constraints.sarif] [--json=constraints.sarif] \\ [--markdown=constraints.md] [--strict] @@ -29,7 +30,8 @@ def usage kcov Cobertura XML, kcov codecov JSON, coverage.py JSON, or Nil-Kill branch coverage JSON constraints --coverage - typed coverage input. Currently supported: loom:PATH and vopr:PATH. + typed coverage input. Common types: loom:PATH, vopr:PATH, race:PATH, concurrency:PATH, + tsan:PATH, asan:PATH, lsan:PATH, ubsan:PATH, miri:PATH, unsafe:PATH. Findings are advisory warnings unless --strict is supplied. dark-arms Lineage-ready SARIF JSON overlay containing all classified dark arms. @@ -37,7 +39,7 @@ def usage exit 1 end -TREE_SITTER_EXTS = %w[rb py pyi js jsx mjs cjs ts tsx go rs zig].freeze +TREE_SITTER_EXTS = %w[rb py pyi js jsx mjs cjs ts tsx go rs zig c h cc cpp cxx hh hpp hxx cs].freeze def default_file_globs roots = %w[app lib src] @@ -168,6 +170,7 @@ def run_constraints(args) base: nil, head: "HEAD", coverage: [], + languages: ["zig"], sarif: nil, json: nil, markdown: nil, @@ -182,6 +185,8 @@ def run_constraints(args) when /\A--head=(.+)/ then opts[:head] = Regexp.last_match(1) when /\A--coverage=(.+)/ then opts[:coverage] << Regexp.last_match(1) when "--coverage" then opts[:coverage] << args.shift.to_s + when /\A--language=(.+)/ then opts[:languages] = Regexp.last_match(1).split(",") + when "--language" then opts[:languages] = args.shift.to_s.split(",") when /\A--sarif=(.+)/ then opts[:sarif] = Regexp.last_match(1) when /\A--json=(.+)/ then opts[:json] = Regexp.last_match(1) when /\A--markdown=(.+)/ then opts[:markdown] = Regexp.last_match(1) @@ -195,7 +200,8 @@ def run_constraints(args) repo: opts[:repo], base: opts[:base], head: opts[:head], - coverage_specs: opts[:coverage] + coverage_specs: opts[:coverage], + languages: opts[:languages] ) write_output(opts[:sarif], audit.to_sarif) if opts[:sarif] diff --git a/gems/slopcop/lib/slopcop/constraints.rb b/gems/slopcop/lib/slopcop/constraints.rb index ec6fcdf5d..e7c89d148 100644 --- a/gems/slopcop/lib/slopcop/constraints.rb +++ b/gems/slopcop/lib/slopcop/constraints.rb @@ -1,9 +1,14 @@ # frozen_string_literal: true require_relative "constraints/audit" +require_relative "constraints/c_provider" +require_relative "constraints/cpp_provider" +require_relative "constraints/csharp_provider" require_relative "constraints/diff" require_relative "constraints/evidence" require_relative "constraints/finding" +require_relative "constraints/go_provider" +require_relative "constraints/rust_provider" require_relative "constraints/sarif" require_relative "constraints/zig_provider" @@ -13,6 +18,11 @@ module Constraints def providers { + "c" => CProvider, + "cpp" => CppProvider, + "csharp" => CsharpProvider, + "go" => GoProvider, + "rust" => RustProvider, "zig" => ZigProvider } end diff --git a/gems/slopcop/lib/slopcop/constraints/c_provider.rb b/gems/slopcop/lib/slopcop/constraints/c_provider.rb new file mode 100644 index 000000000..eb632e907 --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/c_provider.rb @@ -0,0 +1,155 @@ +# frozen_string_literal: true + +require_relative "language_provider" + +module SlopCop + module Constraints + module CProvider + module_function + + EXCLUDED_DIRS = %w[.git vendor third_party node_modules build cmake-build-debug cmake-build-release tmp dist tests test].freeze + TSAN_NEEDLES = [ + "_Atomic", + "atomic_", + "__atomic_", + "__sync_", + "pthread_create", + "pthread_mutex_", + "pthread_rwlock_", + "pthread_cond_", + "pthread_spin_", + "pthread_barrier_", + "mtx_", + "cnd_", + "thrd_create" + ].freeze + ASAN_NEEDLES = [ + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "vsprintf(", + "vsnprintf(", + "gets(", + "scanf(", + "sscanf(", + "fscanf(", + "alloca(" + ].freeze + LSAN_NEEDLES = [ + "malloc(", + "calloc(", + "realloc(", + "aligned_alloc(", + "posix_memalign(", + "strdup(", + "strndup(", + "free(" + ].freeze + + def rules + evidence_rule("tsan", "C TSan coverage missing", "C shared-concurrency site lacks TSan coverage evidence") + + evidence_rule("asan", "C ASan coverage missing", "C raw-memory site lacks ASan coverage evidence") + + evidence_rule("lsan", "C LSan coverage missing", "C allocation/lifetime site lacks LSan coverage evidence") + + evidence_rule("ubsan", "C UBSan coverage missing", "C undefined-behavior site lacks UBSan coverage evidence") + end + + def evidence_rule(evidence, name, short) + [ + { + "id" => "slopcop-c-#{evidence}-uncovered", + "name" => name, + "shortDescription" => { "text" => short }, + "fullDescription" => { + "text" => "A changed C #{evidence.upcase} hazard was not reached by #{evidence.upcase} coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + (path.end_with?(".c") || path.end_with?(".h")) && + !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + "slopcop-c-#{required_evidence}-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + add_tsan_site(sites, path, line, source, code) + add_asan_site(sites, path, line, source, code) + add_lsan_site(sites, path, line, source, code) + add_ubsan_site(sites, path, line, source, code) + end + sites + end + + def add_tsan_site(sites, path, line, source, code) + return unless LanguageProvider.any_include?(code, TSAN_NEEDLES) + + sites << LanguageProvider.hazard(path, line, source, "c_tsan_concurrency", "tsan", "C atomic/thread/lock site") + end + + def add_asan_site(sites, path, line, source, code) + if LanguageProvider.any_include?(code, ASAN_NEEDLES) + sites << LanguageProvider.hazard(path, line, source, "c_asan_raw_memory_api", "asan", "C raw-memory or unchecked buffer API") + end + if pointer_hazard?(code) + sites << LanguageProvider.hazard(path, line, source, "c_asan_pointer", "asan", "C pointer dereference/arithmetic site") + end + end + + def add_lsan_site(sites, path, line, source, code) + return unless LanguageProvider.any_include?(code, LSAN_NEEDLES) + + sites << LanguageProvider.hazard(path, line, source, "c_lsan_lifetime", "lsan", "C allocation/free lifetime site") + end + + def add_ubsan_site(sites, path, line, source, code) + if arithmetic_ub_site?(code) + sites << LanguageProvider.hazard(path, line, source, "c_ubsan_arithmetic", "ubsan", "C divide/modulo/shift arithmetic site") + end + if cast_ub_site?(code) + sites << LanguageProvider.hazard(path, line, source, "c_ubsan_cast", "ubsan", "C pointer/integer cast site") + end + end + + def pointer_hazard?(code) + code.include?("->") || + code.match?(/\A\s*\*\s*[A-Za-z_][A-Za-z0-9_]*/) || + code.match?(/(?:=\s*|return\s+|\(|,|\[)\*\s*[A-Za-z_][A-Za-z0-9_]*/) + end + + def arithmetic_ub_site?(code) + code.match?(%r{[A-Za-z0-9_\])]\s*(?:/|%)\s*[A-Za-z_(]}) || + code.match?(/[A-Za-z0-9_\])]\s*(?:<<|>>)\s*[A-Za-z_(]/) + end + + def cast_ub_site?(code) + code.match?(/\([A-Za-z_][A-Za-z0-9_\s]*(?:\*|intptr_t|uintptr_t|size_t|ssize_t|int|long|short|char)[A-Za-z0-9_\s\*]*\)\s*[A-Za-z_(&*]/) + end + end + end +end diff --git a/gems/slopcop/lib/slopcop/constraints/cpp_provider.rb b/gems/slopcop/lib/slopcop/constraints/cpp_provider.rb new file mode 100644 index 000000000..fc150edce --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/cpp_provider.rb @@ -0,0 +1,153 @@ +# frozen_string_literal: true + +require_relative "language_provider" + +module SlopCop + module Constraints + module CppProvider + module_function + + EXCLUDED_DIRS = %w[.git vendor third_party node_modules build cmake-build-debug cmake-build-release tmp dist tests test].freeze + EXTENSIONS = %w[.cc .cpp .cxx .hh .hpp .hxx].freeze + TSAN_NEEDLES = [ + "std::thread", + "std::jthread", + "std::async", + "std::atomic", + "std::mutex", + "std::shared_mutex", + "std::recursive_mutex", + "std::condition_variable", + "std::lock_guard", + "std::unique_lock", + "std::scoped_lock", + "std::call_once", + ".lock(", + ".try_lock(", + ".unlock(" + ].freeze + ASAN_NEEDLES = [ + "std::memcpy(", + "std::memmove(", + "std::memset(", + "memcpy(", + "memmove(", + "memset(", + "strcpy(", + "strncpy(", + "strcat(", + "strncat(", + "sprintf(", + "snprintf(", + "std::span<", + "std::string_view" + ].freeze + LSAN_NEEDLES = [ + "malloc(", + "calloc(", + "realloc(", + "free(", + "std::malloc(", + "std::calloc(", + "std::realloc(", + "std::free(" + ].freeze + + def rules + evidence_rule("tsan", "C++ TSan coverage missing", "C++ shared-concurrency site lacks TSan coverage evidence") + + evidence_rule("asan", "C++ ASan coverage missing", "C++ raw-memory site lacks ASan coverage evidence") + + evidence_rule("lsan", "C++ LSan coverage missing", "C++ allocation/lifetime site lacks LSan coverage evidence") + + evidence_rule("ubsan", "C++ UBSan coverage missing", "C++ undefined-behavior site lacks UBSan coverage evidence") + end + + def evidence_rule(evidence, name, short) + [ + { + "id" => "slopcop-cpp-#{evidence}-uncovered", + "name" => name, + "shortDescription" => { "text" => short }, + "fullDescription" => { + "text" => "A changed C++ #{evidence.upcase} hazard was not reached by #{evidence.upcase} coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + EXTENSIONS.any? { |extension| path.end_with?(extension) } && + !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + "slopcop-cpp-#{required_evidence}-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + add_tsan_site(sites, path, line, source, code) + add_asan_site(sites, path, line, source, code) + add_lsan_site(sites, path, line, source, code) + add_ubsan_site(sites, path, line, source, code) + end + sites + end + + def add_tsan_site(sites, path, line, source, code) + return unless LanguageProvider.any_include?(code, TSAN_NEEDLES) + + sites << LanguageProvider.hazard(path, line, source, "cpp_tsan_concurrency", "tsan", "C++ atomic/thread/lock site") + end + + def add_asan_site(sites, path, line, source, code) + if LanguageProvider.any_include?(code, ASAN_NEEDLES) + sites << LanguageProvider.hazard(path, line, source, "cpp_asan_raw_memory_api", "asan", "C++ raw-memory or unchecked buffer API") + end + if pointer_or_cast_hazard?(code) + sites << LanguageProvider.hazard(path, line, source, "cpp_asan_pointer_or_cast", "asan", "C++ pointer/cast hazard") + end + end + + def add_lsan_site(sites, path, line, source, code) + if LanguageProvider.any_include?(code, LSAN_NEEDLES) || code.match?(/\b(?:new|delete)(?:\[\])?\b/) + sites << LanguageProvider.hazard(path, line, source, "cpp_lsan_lifetime", "lsan", "C++ allocation/free lifetime site") + end + end + + def add_ubsan_site(sites, path, line, source, code) + if arithmetic_ub_site?(code) + sites << LanguageProvider.hazard(path, line, source, "cpp_ubsan_arithmetic", "ubsan", "C++ divide/modulo/shift arithmetic site") + end + if code.match?(/\b(?:reinterpret_cast|const_cast|static_cast)\s*") || + code.match?(/\b(?:reinterpret_cast|const_cast)\s*>)\s*[A-Za-z_(]/) + end + end + end +end diff --git a/gems/slopcop/lib/slopcop/constraints/csharp_provider.rb b/gems/slopcop/lib/slopcop/constraints/csharp_provider.rb new file mode 100644 index 000000000..4c3380c24 --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/csharp_provider.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +require_relative "language_provider" + +module SlopCop + module Constraints + module CsharpProvider + module_function + + EXCLUDED_DIRS = %w[.git bin obj packages node_modules tmp dist tests test].freeze + CONCURRENCY_NEEDLES = [ + "Task.Run", + "Task.Factory.StartNew", + "new Thread", + "ThreadPool.", + "Parallel.", + "lock (", + "lock(", + "Monitor.", + "Interlocked.", + "Volatile.", + "ConcurrentDictionary", + "ConcurrentQueue", + "ConcurrentBag", + "BlockingCollection", + "SemaphoreSlim", + "Mutex", + "ReaderWriterLockSlim", + "SpinLock" + ].freeze + UNSAFE_NEEDLES = [ + "unsafe", + "fixed (", + "fixed(", + "stackalloc", + "Marshal.", + "IntPtr", + "UIntPtr", + "GCHandle", + "Unsafe.", + "MemoryMarshal." + ].freeze + + def rules + [ + { + "id" => "slopcop-csharp-concurrency-uncovered", + "name" => "C# concurrency coverage missing", + "shortDescription" => { "text" => "C# concurrency site lacks concurrency coverage evidence" }, + "fullDescription" => { + "text" => "A changed C# task, thread, lock, or concurrent collection site was not reached by concurrency coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + }, + { + "id" => "slopcop-csharp-unsafe-uncovered", + "name" => "C# unsafe coverage missing", + "shortDescription" => { "text" => "C# unsafe/native-memory site lacks unsafe coverage evidence" }, + "fullDescription" => { + "text" => "A changed C# unsafe, native-memory, pointer, or Marshal site was not reached by unsafe coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + path.end_with?(".cs") && !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + required_evidence == "concurrency" ? "slopcop-csharp-concurrency-uncovered" : "slopcop-csharp-unsafe-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + unsafe_depth = 0 + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + if concurrency_site?(code) + sites << LanguageProvider.hazard(path, line, source, "csharp_concurrency", "concurrency", "C# task/thread/lock site") + end + if unsafe_site?(code, unsafe_depth) + sites << LanguageProvider.hazard(path, line, source, "csharp_unsafe_memory", "unsafe", "C# unsafe/native-memory site") + end + unsafe_depth = update_unsafe_depth(code, unsafe_depth) + end + sites + end + + def concurrency_site?(code) + LanguageProvider.any_include?(code, CONCURRENCY_NEEDLES) + end + + def unsafe_site?(code, unsafe_depth) + unsafe_depth.positive? && pointer_operation?(code) || + LanguageProvider.any_include?(code, UNSAFE_NEEDLES) || + code.match?(/\b(?:byte|char|int|long|void)\s*\*/) + end + + def pointer_operation?(code) + code.include?("->") || code.match?(/\*\s*[A-Za-z_][A-Za-z0-9_]*/) + end + + def update_unsafe_depth(code, unsafe_depth) + code = code.chomp + relevant = if unsafe_depth.positive? + code + elsif (match = code.match(/\bunsafe\s*\{.*\z/)) + match[0] + else + "" + end + return unsafe_depth if relevant.empty? + + [unsafe_depth + relevant.count("{") - relevant.count("}"), 0].max + end + end + end +end diff --git a/gems/slopcop/lib/slopcop/constraints/go_provider.rb b/gems/slopcop/lib/slopcop/constraints/go_provider.rb new file mode 100644 index 000000000..78f6aa1dd --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/go_provider.rb @@ -0,0 +1,158 @@ +# frozen_string_literal: true + +require_relative "finding" + +module SlopCop + module Constraints + module GoProvider + module_function + + def rules + [ + { + "id" => "slopcop-go-race-uncovered", + "name" => "Go race coverage missing", + "shortDescription" => { "text" => "Go shared-concurrency site lacks race coverage evidence" }, + "fullDescription" => { + "text" => "A changed Go goroutine, atomic, lock, or sync primitive was not reached by race coverage." + }, + "defaultConfiguration" => { "level" => "warning" } + }, + { + "id" => "slopcop-go-concurrency-uncovered", + "name" => "Go concurrency coverage missing", + "shortDescription" => { "text" => "Go channel/wait site lacks concurrency coverage evidence" }, + "fullDescription" => { + "text" => "A changed Go channel or wait-group site was not reached by concurrency coverage." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + repo = File.expand_path(repo) + additions.each_with_object([]) do |(path, lines), out| + next unless source_path?(path) + + lines.each do |line| + source = source_line(repo, path, line) + next if source.empty? + + scan_line(path, line, source).each do |hazard| + next if covered?(evidence, hazard) + + out << Finding.new( + path: path, + line: line, + rule_id: rule_id_for(hazard[:required_evidence]), + message: "changed #{hazard[:label]} has no #{hazard[:required_evidence]} coverage evidence", + source: source.strip, + hazard_type: hazard[:hazard_type], + required_evidence: hazard[:required_evidence], + severity: "warning" + ) + end + end + end + end + + def source_path?(path) + path.end_with?(".go") && !path.end_with?("_test.go") && !path.split("/").include?("vendor") + end + + def scan_hazards(repo:, paths: nil) + repo = File.expand_path(repo) + files = if paths && !Array(paths).empty? + Array(paths).select { |path| source_path?(path) } + else + Dir.chdir(repo) { Dir["**/*.go"] }.select { |path| source_path?(path) } + end + files.flat_map do |path| + File.readlines(File.join(repo, path)).each_with_index.flat_map do |source, index| + scan_line(path, index + 1, source).map do |hazard| + hazard.merge(path: path, line: index + 1, source: source.strip) + end + end + end.sort_by { |site| [site[:path], site[:line], site[:hazard_type]] } + end + + def scan_line(path, line, source) + code = strip_comment(source) + return [] if code.strip.empty? + + hazards = [] + hazards << hazard(path, line, "go_race_goroutine", "race", "goroutine launch") if goroutine_site?(code) + hazards << hazard(path, line, "go_race_atomic", "race", "atomic operation") if atomic_site?(code) + hazards << hazard(path, line, "go_race_lock", "race", "lock/sync primitive") if lock_site?(code) + hazards << hazard(path, line, "go_concurrency_waitgroup", "concurrency", "wait group operation") if waitgroup_site?(code) + hazards << hazard(path, line, "go_concurrency_channel", "concurrency", "channel operation") if channel_site?(code) + hazards + end + + def covered?(evidence, hazard) + evidence_type = hazard[:required_evidence] + return false unless evidence.known_type?(evidence_type) + + evidence.line_covered?(evidence_type, hazard[:path], hazard[:line]) + end + + def rule_id_for(required_evidence) + required_evidence == "race" ? "slopcop-go-race-uncovered" : "slopcop-go-concurrency-uncovered" + end + + def hazard(path, line, hazard_type, required_evidence, label) + { + path: path, + line: line, + hazard_type: hazard_type, + required_evidence: required_evidence, + label: label + } + end + + def goroutine_site?(code) + code.lstrip.start_with?("go ") || code.include?("; go ") + end + + def atomic_site?(code) + code.include?("atomic.") + end + + def lock_site?(code) + [ + "sync.Mutex", + "sync.RWMutex", + "sync.Map", + "sync.Once", + "sync.Cond", + ".Lock(", + ".Unlock(", + ".RLock(", + ".RUnlock(" + ].any? { |needle| code.include?(needle) } + end + + def waitgroup_site?(code) + ["sync.WaitGroup", ".Add(", ".Done(", ".Wait("].any? { |needle| code.include?(needle) } + end + + def channel_site?(code) + code.include?("make(chan") || + code.include?("select {") || + code.include?("<-") + end + + def strip_comment(source) + source.split("//", 2).first.to_s + end + + def source_line(repo, path, line) + file = File.join(repo, path) + return "" unless File.file?(file) + + File.readlines(file)[line.to_i - 1].to_s.rstrip + end + end + end +end diff --git a/gems/slopcop/lib/slopcop/constraints/language_provider.rb b/gems/slopcop/lib/slopcop/constraints/language_provider.rb new file mode 100644 index 000000000..970de415d --- /dev/null +++ b/gems/slopcop/lib/slopcop/constraints/language_provider.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +require "set" + +require_relative "finding" + +module SlopCop + module Constraints + module LanguageProvider + module_function + + def findings(provider, repo:, additions:, evidence:) + repo = File.expand_path(repo) + additions.each_with_object([]) do |(path, lines), out| + next unless provider.source_path?(path) + + hazards = provider.scan_file(path, source_contents(repo, path)) + changed = lines.to_set + hazards.each do |hazard| + next unless changed.include?(hazard[:line]) + next if covered?(evidence, hazard) + + out << Finding.new( + path: path, + line: hazard[:line], + rule_id: provider.rule_id_for(hazard[:required_evidence]), + message: "changed #{hazard[:label]} has no #{hazard[:required_evidence]} coverage evidence", + source: hazard[:source], + hazard_type: hazard[:hazard_type], + required_evidence: hazard[:required_evidence], + severity: "warning" + ) + end + end + end + + def scan_hazards(provider, repo:, paths: nil) + repo = File.expand_path(repo) + files = if paths && !Array(paths).empty? + Array(paths).select { |path| provider.source_path?(path) } + else + Dir.chdir(repo) { Dir["**/*"] }.select { |path| File.file?(File.join(repo, path)) && provider.source_path?(path) } + end + files.flat_map do |path| + provider.scan_file(path, source_contents(repo, path)) + end.sort_by { |site| [site[:path], site[:line], site[:hazard_type]] } + end + + def covered?(evidence, hazard) + evidence_type = hazard[:required_evidence] + return false unless evidence.known_type?(evidence_type) + + evidence.line_covered?(evidence_type, hazard[:path], hazard[:line]) + end + + def source_contents(repo, path) + file = File.join(repo, path) + File.file?(file) ? File.read(file) : "" + end + + def hazard(path, line, source, hazard_type, required_evidence, label) + { + path: path, + line: line, + source: source.strip, + hazard_type: hazard_type, + required_evidence: required_evidence, + label: label + } + end + + def c_style_code(line, in_block_comment) + out = +"" + rest = line.to_s + loop do + if in_block_comment[:active] + after = rest.split("*/", 2)[1] + return strip_strings(out) unless after + + in_block_comment[:active] = false + rest = after + next + end + + block = rest.index("/*") + comment = rest.index("//") + case + when block && comment && comment < block + out << rest[0...comment] + return strip_strings(out) + when block + out << rest[0...block] + rest = rest[(block + 2)..].to_s + in_block_comment[:active] = true + when comment + out << rest[0...comment] + return strip_strings(out) + else + out << rest + return strip_strings(out) + end + end + end + + def strip_strings(code) + code.to_s.gsub(/"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'/, '""') + end + + def excluded_path?(path, dirs:, file_suffixes: []) + parts = path.split("/") + return true if parts.any? { |part| dirs.include?(part) || part.start_with?(".") } + + file_suffixes.any? { |suffix| path.end_with?(suffix) } + end + + def token?(code, token) + code.match?(/(? "slopcop-rust-loom-uncovered", + "name" => "Rust Loom coverage missing", + "shortDescription" => { "text" => "Rust concurrency site lacks Loom coverage evidence" }, + "fullDescription" => { + "text" => "A changed Rust atomic, lock, thread, or shared-concurrency site was not reached by Loom coverage evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + }, + { + "id" => "slopcop-rust-miri-uncovered", + "name" => "Rust unsafe coverage missing", + "shortDescription" => { "text" => "Rust unsafe site lacks Miri/unsafe coverage evidence" }, + "fullDescription" => { + "text" => "A changed Rust unsafe block, unsafe declaration, or unsafe operation was not reached by Miri-style evidence." + }, + "defaultConfiguration" => { "level" => "warning" } + } + ] + end + + def findings(repo:, additions:, evidence:) + LanguageProvider.findings(self, repo: repo, additions: additions, evidence: evidence) + end + + def scan_hazards(repo:, paths: nil) + LanguageProvider.scan_hazards(self, repo: repo, paths: paths) + end + + def source_path?(path) + path.end_with?(".rs") && !LanguageProvider.excluded_path?(path, dirs: EXCLUDED_DIRS) + end + + def rule_id_for(required_evidence) + required_evidence == "loom" ? "slopcop-rust-loom-uncovered" : "slopcop-rust-miri-uncovered" + end + + def scan_file(path, contents) + sites = [] + comment = { active: false } + unsafe_depth = 0 + contents.lines.each_with_index do |source, index| + line = index + 1 + code = LanguageProvider.c_style_code(source, comment) + next if code.strip.empty? + + add_loom_sites(sites, path, line, source, code) + add_unsafe_sites(sites, path, line, source, code, unsafe_depth) + unsafe_depth = update_unsafe_depth(code, unsafe_depth) + end + sites + end + + def add_loom_sites(sites, path, line, source, code) + if atomic_site?(code) + sites << LanguageProvider.hazard(path, line, source, "rust_loom_atomic", "loom", "atomic or memory-ordering site") + end + if concurrency_site?(code) + sites << LanguageProvider.hazard(path, line, source, "rust_loom_concurrency", "loom", "thread/lock/shared-concurrency site") + end + end + + def add_unsafe_sites(sites, path, line, source, code, unsafe_depth) + if code.match?(/\bunsafe\s+fn\b/) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_fn", "miri", "unsafe function") + end + if code.match?(/\bunsafe\s+impl\b/) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_impl", "miri", "unsafe impl") + end + if unsafe_block_start?(code) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_block", "miri", "unsafe block") + end + if unsafe_operation?(code) && (unsafe_depth.positive? || unsafe_block_start?(code)) + sites << LanguageProvider.hazard(path, line, source, "rust_unsafe_operation", "miri", "unsafe operation inside unsafe context") + end + end + + def atomic_site?(code) + code.match?(/\bAtomic(?:Bool|I(?:8|16|32|64|size)|U(?:8|16|32|64|size)|Ptr)\b/) || + LanguageProvider.any_include?(code, ATOMIC_NEEDLES) + end + + def concurrency_site?(code) + LanguageProvider.any_include?(code, CONCURRENCY_NEEDLES) + end + + def unsafe_block_start?(code) + code.match?(/\bunsafe\s*\{/) + end + + def unsafe_operation?(code) + LanguageProvider.any_include?(code, UNSAFE_API_NEEDLES) || + code.match?(/(?:\w|\))\s*\.\s*(?:add|offset|read|write|copy_to|copy_from)\s*\(/) || + code.match?(/\*\s*[A-Za-z_][A-Za-z0-9_]*/) + end + + def update_unsafe_depth(code, unsafe_depth) + code = code.chomp + relevant = if unsafe_depth.positive? + code + elsif (match = code.match(/\bunsafe\s*\{.*\z/)) + match[0] + else + "" + end + return unsafe_depth if relevant.empty? + + [unsafe_depth + relevant.count("{") - relevant.count("}"), 0].max + end + end + end +end diff --git a/gems/slopcop/test/constraints_go_provider_test.rb b/gems/slopcop/test/constraints_go_provider_test.rb new file mode 100644 index 000000000..e659be35e --- /dev/null +++ b/gems/slopcop/test/constraints_go_provider_test.rb @@ -0,0 +1,93 @@ +# frozen_string_literal: true + +require "json" +require "minitest/autorun" +require "tmpdir" +require "fileutils" + +require_relative "../lib/slopcop" + +class ConstraintsGoProviderTest < Minitest::Test + def test_go_provider_is_registered + assert_same SlopCop::Constraints::GoProvider, SlopCop::Constraints.providers.fetch("go") + end + + def test_scans_go_concurrency_hazards + Dir.mktmpdir do |dir| + File.write(File.join(dir, "worker.go"), <<~GO) + package demo + + import "sync/atomic" + + func run(ch chan int) { + go func() { ch <- 1 }() + _ = atomic.LoadInt64(&counter) + // go ignored() + } + GO + + hazards = SlopCop::Constraints::GoProvider.scan_hazards(repo: dir) + + hazard_types = hazards.map { |hazard| hazard[:hazard_type] } + assert_includes hazard_types, "go_race_goroutine" + assert_includes hazard_types, "go_concurrency_channel" + assert_includes hazard_types, "go_race_atomic" + refute hazards.any? { |hazard| hazard[:source].include?("ignored") } + end + end + + def test_findings_are_suppressed_by_matching_coverage_evidence + Dir.mktmpdir do |dir| + File.write(File.join(dir, "worker.go"), <<~GO) + package demo + + func run(ch chan int) { + go func() { ch <- 1 }() + } + GO + coverage = File.join(dir, "coverage.json") + File.write( + coverage, + JSON.dump( + coverage: { + "worker.go" => { + "4" => 1 + } + } + ) + ) + evidence = SlopCop::Constraints::Evidence.from_specs(["race:#{coverage}", "concurrency:#{coverage}"], repo: dir) + + findings = SlopCop::Constraints::GoProvider.findings( + repo: dir, + additions: { "worker.go" => [4] }, + evidence: evidence + ) + + assert_empty findings + end + end + + def test_uncovered_changed_go_hazard_gets_finding + Dir.mktmpdir do |dir| + File.write(File.join(dir, "worker.go"), <<~GO) + package demo + + func run(ch chan int) { + ch <- 1 + } + GO + evidence = SlopCop::Constraints::Evidence.from_specs([], repo: dir) + + findings = SlopCop::Constraints::GoProvider.findings( + repo: dir, + additions: { "worker.go" => [4] }, + evidence: evidence + ) + + assert_equal 1, findings.length + assert_equal "slopcop-go-concurrency-uncovered", findings.first.rule_id + assert_equal "go_concurrency_channel", findings.first.hazard_type + end + end +end diff --git a/gems/slopcop/test/constraints_systems_provider_test.rb b/gems/slopcop/test/constraints_systems_provider_test.rb new file mode 100644 index 000000000..b5e792ac7 --- /dev/null +++ b/gems/slopcop/test/constraints_systems_provider_test.rb @@ -0,0 +1,140 @@ +# frozen_string_literal: true + +require "json" +require "fileutils" +require "minitest/autorun" +require "tmpdir" + +require_relative "../lib/slopcop" + +class ConstraintsSystemsProviderTest < Minitest::Test + def test_new_systems_providers_are_registered + assert_same SlopCop::Constraints::RustProvider, SlopCop::Constraints.providers.fetch("rust") + assert_same SlopCop::Constraints::CProvider, SlopCop::Constraints.providers.fetch("c") + assert_same SlopCop::Constraints::CppProvider, SlopCop::Constraints.providers.fetch("cpp") + assert_same SlopCop::Constraints::CsharpProvider, SlopCop::Constraints.providers.fetch("csharp") + end + + def test_rust_provider_finds_loom_and_unsafe_hazards + with_file("src/lib.rs", <<~RS) do |dir, path| + use std::sync::atomic::{AtomicUsize, Ordering}; + + pub fn run(ptr: *const u8) -> usize { + let value = AtomicUsize::new(0); + value.fetch_add(1, Ordering::SeqCst); + unsafe { + ptr.add(1).read() + } + } + RS + hazards = SlopCop::Constraints::RustProvider.scan_hazards(repo: dir, paths: [path]) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "rust_loom_atomic" + assert_includes types, "rust_unsafe_block" + assert_includes types, "rust_unsafe_operation" + end + end + + def test_rust_provider_suppresses_matching_loom_coverage + with_file("src/lib.rs", "pub fn run(v: &AtomicUsize) { v.fetch_add(1, Ordering::SeqCst); }\n") do |dir, path| + evidence = SlopCop::Constraints::Evidence.from_specs(["loom:#{coverage_json(dir, path, 1 => 1)}"], repo: dir) + findings = SlopCop::Constraints::RustProvider.findings(repo: dir, additions: { path => [1] }, evidence: evidence) + + assert_empty findings + end + end + + def test_c_provider_finds_sanitizer_hazard_families + with_file("src/runtime.c", <<~C) do |dir, path| + #include + void run(char *dst, char *src, int n) { + pthread_mutex_lock(&lock); + char *buf = malloc(32); + memcpy(dst, src, n); + int shifted = n << src[0]; + free(buf); + } + C + hazards = SlopCop::Constraints::CProvider.scan_hazards(repo: dir, paths: [path]) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "c_tsan_concurrency" + assert_includes types, "c_asan_raw_memory_api" + assert_includes types, "c_lsan_lifetime" + assert_includes types, "c_ubsan_arithmetic" + end + end + + def test_cpp_provider_finds_sanitizer_hazard_families + with_file("src/runtime.cpp", <<~CPP) do |_dir, path| + #include + void run(char *dst, char *src, int n) { + std::atomic ready; + auto *buf = new char[32]; + std::memcpy(dst, src, n); + auto raw = reinterpret_cast(dst); + auto shifted = n << raw[0]; + delete[] buf; + } + CPP + hazards = SlopCop::Constraints::CppProvider.scan_file(path, File.read(File.join(_dir, path))) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "cpp_tsan_concurrency" + assert_includes types, "cpp_asan_raw_memory_api" + assert_includes types, "cpp_asan_pointer_or_cast" + assert_includes types, "cpp_lsan_lifetime" + assert_includes types, "cpp_ubsan_cast" + assert_includes types, "cpp_ubsan_arithmetic" + end + end + + def test_csharp_provider_finds_concurrency_and_unsafe_hazards + with_file("src/Worker.cs", <<~CS) do |dir, path| + using System.Threading.Tasks; + public unsafe class Worker { + public void Run(byte* ptr) { + Task.Run(() => {}); + fixed (byte* p = buffer) { + *p = 1; + } + } + } + CS + hazards = SlopCop::Constraints::CsharpProvider.scan_hazards(repo: dir, paths: [path]) + types = hazards.map { |hazard| hazard[:hazard_type] } + + assert_includes types, "csharp_concurrency" + assert_includes types, "csharp_unsafe_memory" + end + end + + def test_comment_and_string_hazards_are_ignored + with_file("src/runtime.c", <<~C) do |dir, path| + void run(void) { + // pthread_mutex_lock(&lock); + const char *s = "memcpy(dst, src, n)"; + } + C + assert_empty SlopCop::Constraints::CProvider.scan_hazards(repo: dir, paths: [path]) + end + end + + private + + def with_file(path, contents) + Dir.mktmpdir do |dir| + abs = File.join(dir, path) + FileUtils.mkdir_p(File.dirname(abs)) + File.write(abs, contents) + yield dir, path + end + end + + def coverage_json(dir, path, hits) + coverage = File.join(dir, "coverage.json") + File.write(coverage, JSON.dump(coverage: { path => hits.transform_keys(&:to_s) })) + coverage + end +end diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 000000000..bc510afc1 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,413 @@ +{ + "name": "cheat", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", + "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", + "tree-sitter-c": "^0.24.1", + "tree-sitter-c-sharp": "^0.23.5", + "tree-sitter-cpp": "^0.23.4", + "tree-sitter-go": "^0.25.0", + "tree-sitter-java": "^0.23.5", + "tree-sitter-javascript": "^0.25.0", + "tree-sitter-kotlin": "^0.3.8", + "tree-sitter-php": "^0.24.2", + "tree-sitter-python": "^0.25.0", + "tree-sitter-ruby": "^0.23.1", + "tree-sitter-rust": "^0.24.0", + "tree-sitter-swift": "^0.7.1", + "tree-sitter-typescript": "^0.23.2" + } + }, + "node_modules/@tree-sitter-grammars/tree-sitter-lua": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@tree-sitter-grammars/tree-sitter-lua/-/tree-sitter-lua-0.4.1.tgz", + "integrity": "sha512-EwagFaU6ZveVk18/Y8qUhZkkiBKnQ7dSCHbm//TUroLVKy3i1rOYGy/cNHtSkAb1eDvS1HhCLybH2S541Cya/g==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.5.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/@tree-sitter-grammars/tree-sitter-zig": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@tree-sitter-grammars/tree-sitter-zig/-/tree-sitter-zig-1.1.2.tgz", + "integrity": "sha512-J0L31HZ2isy3F5zb2g5QWQOv2r/pbruQNL9ADhuQv2pn5BQOzxt80WcEJaYXBeuJ8GHxVT42slpCna8k1c8LOw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, + "node_modules/node-addon-api": { + "version": "8.8.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.8.0.tgz", + "integrity": "sha512-c5Ko1fZJIJmzhFIkhRN76WTq+fC6tWnGy9CXA0fA+XygsWZmEwG8vmbkNqxMyoaa0Tin4djul49NzdVcJJcjeA==", + "license": "MIT", + "engines": { + "node": "^18 || ^20 || >= 21" + } + }, + "node_modules/node-gyp-build": { + "version": "4.8.4", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz", + "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==", + "license": "MIT", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, + "node_modules/tree-sitter-c": { + "version": "0.24.1", + "resolved": "https://registry.npmjs.org/tree-sitter-c/-/tree-sitter-c-0.24.1.tgz", + "integrity": "sha512-lkYwWN3SRecpvaeqmFKkuPNR3ZbtnvHU+4XAEEkJdrp3JfSp2pBrhXOtvfsENUneye76g889Y0ddF2DM0gEDpA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-c-sharp": { + "version": "0.23.5", + "resolved": "https://registry.npmjs.org/tree-sitter-c-sharp/-/tree-sitter-c-sharp-0.23.5.tgz", + "integrity": "sha512-xJGOeXPMmld0nES5+080N/06yY6LQi+KWGWV4LfZaZe6srJPtUtfhIbRSN7EZN6IaauzW28v6W4QHFwmeUW6HQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-cli": { + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.23.2.tgz", + "integrity": "sha512-kPPXprOqREX+C/FgUp2Qpt9jd0vSwn+hOgjzVv/7hapdoWpa+VeWId53rf4oNNd29ikheF12BYtGD/W90feMbA==", + "hasInstallScript": true, + "license": "MIT", + "bin": { + "tree-sitter": "cli.js" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/tree-sitter-cpp": { + "version": "0.23.4", + "resolved": "https://registry.npmjs.org/tree-sitter-cpp/-/tree-sitter-cpp-0.23.4.tgz", + "integrity": "sha512-qR5qUDyhZ5jJ6V8/umiBxokRbe89bCGmcq/dk94wI4kN86qfdV8k0GHIUEKaqWgcu42wKal5E97LKpLeVW8sKw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.1", + "node-gyp-build": "^4.8.2", + "tree-sitter-c": "^0.23.1" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-cpp/node_modules/tree-sitter-c": { + "version": "0.23.6", + "resolved": "https://registry.npmjs.org/tree-sitter-c/-/tree-sitter-c-0.23.6.tgz", + "integrity": "sha512-0dxXKznVyUA0s6PjNolJNs2yF87O5aL538A/eR6njA5oqX3C3vH4vnx3QdOKwuUdpKEcFdHuiDpRKLLCA/tjvQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-go": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/tree-sitter-go/-/tree-sitter-go-0.25.0.tgz", + "integrity": "sha512-APBc/Dq3xz/e35Xpkhb1blu5UgW+2E3RyGWawZSCNcbGwa7jhSQPS8KsUupuzBla8PCo8+lz9W/JDJjmfRa2tw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-java": { + "version": "0.23.5", + "resolved": "https://registry.npmjs.org/tree-sitter-java/-/tree-sitter-java-0.23.5.tgz", + "integrity": "sha512-Yju7oQ0Xx7GcUT01mUglPP+bYfvqjNCGdxqigTnew9nLGoII42PNVP3bHrYeMxswiCRM0yubWmN5qk+zsg0zMA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-javascript": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/tree-sitter-javascript/-/tree-sitter-javascript-0.25.0.tgz", + "integrity": "sha512-1fCbmzAskZkxcZzN41sFZ2br2iqTYP3tKls1b/HKGNPQUVOpsUxpmGxdN/wMqAk3jYZnYBR1dd/y/0avMeU7dw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.3.1", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-kotlin": { + "version": "0.3.8", + "resolved": "https://registry.npmjs.org/tree-sitter-kotlin/-/tree-sitter-kotlin-0.3.8.tgz", + "integrity": "sha512-A4obq6bjzmYrA+F0JLLoheFPcofFkctNaZSpnDd+GPn1SfVZLY4/GG4C0cYVBTOShuPBGGAOPLM1JWLZQV4m1g==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^7.1.0", + "node-gyp-build": "^4.8.0" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree_sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-kotlin/node_modules/node-addon-api": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", + "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==", + "license": "MIT" + }, + "node_modules/tree-sitter-php": { + "version": "0.24.2", + "resolved": "https://registry.npmjs.org/tree-sitter-php/-/tree-sitter-php-0.24.2.tgz", + "integrity": "sha512-zwgAePc/HozNaWOOfwRAA+3p8yhuehRw8Fb7vn5qd2XjiIc93uJPryDTMYTSjBRjVIUg/KY6pM3rRzs8dSwKfw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.22.4" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-python": { + "version": "0.25.0", + "resolved": "https://registry.npmjs.org/tree-sitter-python/-/tree-sitter-python-0.25.0.tgz", + "integrity": "sha512-eCmJx6zQa35GxaCtQD+wXHOhYqBxEL+bp71W/s3fcDMu06MrtzkVXR437dRrCrbrDbyLuUDJpAgycs7ncngLXw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.5.0", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.25.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-ruby": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/tree-sitter-ruby/-/tree-sitter-ruby-0.23.1.tgz", + "integrity": "sha512-d9/RXgWjR6HanN7wTYhS5bpBQLz1VkH048Vm3CodPGyJVnamXMGb8oEhDypVCBq4QnHui9sTXuJBBP3WtCw5RA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-rust": { + "version": "0.24.0", + "resolved": "https://registry.npmjs.org/tree-sitter-rust/-/tree-sitter-rust-0.24.0.tgz", + "integrity": "sha512-NWemUDf629Tfc90Y0Z55zuwPCAHkLxWnMf2RznYu4iBkkrQl2o/CHGB7Cr52TyN5F1DAx8FmUnDtCy9iUkXZEQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.4" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-swift": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/tree-sitter-swift/-/tree-sitter-swift-0.7.1.tgz", + "integrity": "sha512-pneKVTuGamaBsqqqfB9BvNQjktzh/0IVPR54jLB5Fq/JTDQwYHd0Wo6pVyZ5jAYpbztzq+rJ/rpL9ruxTmSoKw==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.0.0", + "node-gyp-build": "^4.8.0", + "tree-sitter-cli": "^0.23", + "which": "2.0.2" + }, + "peerDependencies": { + "tree-sitter": "^0.22.1" + }, + "peerDependenciesMeta": { + "tree_sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-typescript": { + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/tree-sitter-typescript/-/tree-sitter-typescript-0.23.2.tgz", + "integrity": "sha512-e04JUUKxTT53/x3Uq1zIL45DoYKVfHH4CZqwgZhPg5qYROl5nQjV+85ruFzFGZxu+QeFVbRTPDRnqL9UbU4VeA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2", + "tree-sitter-javascript": "^0.23.1" + }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/tree-sitter-typescript/node_modules/tree-sitter-javascript": { + "version": "0.23.1", + "resolved": "https://registry.npmjs.org/tree-sitter-javascript/-/tree-sitter-javascript-0.23.1.tgz", + "integrity": "sha512-/bnhbrTD9frUYHQTiYnPcxyHORIw157ERBa6dqzaKxvR/x3PC4Yzd+D1pZIMS6zNg2v3a8BZ0oK7jHqsQo9fWA==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "node-addon-api": "^8.2.2", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 000000000..0d38f430d --- /dev/null +++ b/package.json @@ -0,0 +1,20 @@ +{ + "private": true, + "dependencies": { + "@tree-sitter-grammars/tree-sitter-lua": "^0.4.1", + "@tree-sitter-grammars/tree-sitter-zig": "^1.1.2", + "tree-sitter-c": "^0.24.1", + "tree-sitter-c-sharp": "^0.23.5", + "tree-sitter-cpp": "^0.23.4", + "tree-sitter-go": "^0.25.0", + "tree-sitter-java": "^0.23.5", + "tree-sitter-javascript": "^0.25.0", + "tree-sitter-kotlin": "^0.3.8", + "tree-sitter-php": "^0.24.2", + "tree-sitter-python": "^0.25.0", + "tree-sitter-ruby": "^0.23.1", + "tree-sitter-rust": "^0.24.0", + "tree-sitter-swift": "^0.7.1", + "tree-sitter-typescript": "^0.23.2" + } +} diff --git a/spec/decomplex_architecture_invariants_spec.rb b/spec/decomplex_architecture_invariants_spec.rb new file mode 100644 index 000000000..c3c15272d --- /dev/null +++ b/spec/decomplex_architecture_invariants_spec.rb @@ -0,0 +1,128 @@ +require "rspec" + +RSpec.describe "architecture invariants: decomplex syntax boundaries" do + ROOT = File.expand_path("..", __dir__) + DECOMPLEX_LIB = File.join(ROOT, "gems", "decomplex", "lib", "decomplex") + DETECTOR_BASENAMES = %w[ + co_update decision_pressure derived_state false_simplicity fat_union + flay_similarity function_lcom inconsistent_rename_clone local_flow + locality_drag miner mutability_pressure operational_discontinuity + ordered_protocol_mine oversized_predicate path_condition predicate_alias + redundant_nil_guard semantic_alias sequence_mine site_extractor + state_branch_density state_mesh structural_topology superfluous_state + temporal_ordering_pressure weighted_inlined_cognitive_complexity + ].freeze + DETECTOR_FILES = DETECTOR_BASENAMES.map { |name| File.join(DECOMPLEX_LIB, "#{name}.rb") }.freeze + + RAW_TREE_SITTER_PATTERNS = { + "raw child traversal" => /(? /\bchild_by_field_name\b/, + "raw byte offsets" => /\b(?:start_byte|end_byte)\b/, + "raw point offsets" => /\b(?:start_point|end_point)\b/, + "Tree-sitter classes" => /\bTreeSitter(?:Adapter|LanguageAdapter|Normalizer|NodeFacade|FacadeContext)?\b/, + "raw node predicate helpers" => /\b(?:ts_node\?|tree_sitter_node\?)\b/, + "raw node duck typing" => /respond_to\?\s*\(\s*:children\s*\)/ + }.freeze + + SYNTAX_RB_EXTENSION_HOST_PATTERNS = { + "clone similarity belongs in syntax/clone_similarity.rb" => /\b(?:CloneCandidate|clone_candidates|CLONE_)/, + "dispatch facts belong in syntax/dispatch.rb" => /\b(?:DispatchSite|dispatch_sites|DISPATCH_)/, + "nil guard facts belong in syntax/nil_guards.rb" => /\b(?:NilGuard|redundant_nil_guard_findings)/, + "local complexity facts belong in syntax/complexity.rb" => /\b(?:LocalComplexity|local_complexity_scores)/ + }.freeze + + SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS = { + "concrete language adapters belong under lib/decomplex/syntax/" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/, + "language profiles must instantiate concrete adapters, not the base adapter" => + /:\s*TreeSitterLanguageAdapter\.new\(/ + }.freeze + + ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS = { + "language lexicons belong in the language adapter file" => + /^\s*[A-Z_]+_LEXICON\s*=/, + "concrete language adapters belong in the language adapter file" => + /^\s*class\s+(?!TreeSitterLanguageAdapter\b)\w+SyntaxAdapter\b/ + }.freeze + + LANGUAGE_ADAPTER_FILES = { + "ruby.rb" => "RubySyntaxAdapter", + "python.rb" => "PythonSyntaxAdapter", + "javascript.rb" => "JavaScriptSyntaxAdapter", + "typescript.rb" => "TypeScriptSyntaxAdapter", + "go.rb" => "GoSyntaxAdapter", + "rust.rb" => "RustSyntaxAdapter", + "zig.rb" => "ZigSyntaxAdapter", + "lua.rb" => "LuaSyntaxAdapter", + "c.rb" => "CSyntaxAdapter", + "cpp.rb" => "CppSyntaxAdapter", + "csharp.rb" => "CSharpSyntaxAdapter", + "java.rb" => "JavaSyntaxAdapter", + "swift.rb" => "SwiftSyntaxAdapter", + "kotlin.rb" => "KotlinSyntaxAdapter", + "php.rb" => "PhpSyntaxAdapter" + }.freeze + + def scan_files(files, patterns) + files.sort.flat_map do |path| + rel = path.delete_prefix("#{ROOT}/") + File.readlines(path, chomp: true).each_with_index.flat_map do |line, index| + next if line.strip.start_with?("#") + + patterns.filter_map do |name, pattern| + "#{rel}:#{index + 1}: #{name}: #{line.strip}" if line.match?(pattern) + end + end.compact + end + end + + def format_offenders(message, offenders) + ([message] + offenders.map { |offender| " #{offender}" }).join("\n") + end + + it "keeps detectors behind Syntax facts instead of raw Tree-sitter nodes" do + offenders = scan_files(DETECTOR_FILES, RAW_TREE_SITTER_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Detectors must consume Syntax facts instead of raw Tree-sitter nodes", offenders) + end + + it "keeps detector-facing syntax extensions out of syntax.rb" do + syntax_rb = File.join(DECOMPLEX_LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_EXTENSION_HOST_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Detector-facing parser extensions must live under lib/decomplex/syntax/", offenders) + end + + it "keeps concrete language adapter implementation out of syntax.rb" do + syntax_rb = File.join(DECOMPLEX_LIB, "syntax.rb") + offenders = scan_files([syntax_rb], SYNTAX_RB_ADAPTER_IMPLEMENTATION_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Core syntax.rb must not absorb concrete language adapter implementation", offenders) + end + + it "keeps one adapter file per supported language" do + offenders = LANGUAGE_ADAPTER_FILES.filter_map do |file_name, class_name| + path = File.join(DECOMPLEX_LIB, "syntax", file_name) + next "#{file_name}: missing file" unless File.file?(path) + + source = File.read(path) + next if source.match?(/^\s*class\s+#{Regexp.escape(class_name)}\b/) + + "#{file_name}: missing #{class_name}" + end + + expect(offenders).to be_empty, + format_offenders("Every supported language must have an explicit adapter file", offenders) + end + + it "keeps the adapter loader from absorbing language implementations" do + adapters_rb = File.join(DECOMPLEX_LIB, "syntax", "adapters.rb") + offenders = scan_files([adapters_rb], ADAPTER_LOADER_LANGUAGE_IMPLEMENTATION_PATTERNS) + + expect(offenders).to be_empty, + format_offenders("Adapter loader must only load adapters and shared base helpers", offenders) + end +end