From 4442d8c78072b992654c0b3b3749fdb049e6c010 Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Mon, 15 Dec 2025 07:14:25 +0100 Subject: [PATCH 1/9] Experiment: Reactive analysis with skip-lite CMT cache Vendor skip-lite library and integrate reactive analysis capabilities: - Vendor skip-lite marshal_cache and reactive_file_collection modules - Modify C++ code to handle ReScript CMT file format (CMI+CMT headers) - Add CmtCache module for mmap-based CMT file reading - Add ReactiveAnalysis module for incremental file processing - Add CLI flags: -cmt-cache, -reactive, -runs - Add README.md with usage and benchmark instructions Benchmark results (~5000 files): - Standard: CMT processing 0.78s, Total 1.01s - Reactive (warm): CMT processing 0.01s, Total 0.20s - Speedup: 74x for CMT processing, 5x total The reactive mode caches processed file_data and uses read_cmt_if_changed to skip unchanged files entirely on subsequent runs. --- analysis/reanalyze/README.md | 169 ++++ analysis/reanalyze/src/Cli.ml | 9 + analysis/reanalyze/src/CmtCache.ml | 42 + analysis/reanalyze/src/CmtCache.mli | 28 + analysis/reanalyze/src/ReactiveAnalysis.ml | 149 ++++ analysis/reanalyze/src/Reanalyze.ml | 69 +- analysis/reanalyze/src/dune | 2 +- analysis/vendor/dune | 2 +- analysis/vendor/skip-lite/dune | 8 + analysis/vendor/skip-lite/marshal_cache/dune | 7 + .../skip-lite/marshal_cache/marshal_cache.ml | 71 ++ .../skip-lite/marshal_cache/marshal_cache.mli | 120 +++ .../marshal_cache/marshal_cache_stubs.cpp | 804 ++++++++++++++++++ .../skip-lite/reactive_file_collection/dune | 3 + .../reactive_file_collection.ml | 95 +++ .../reactive_file_collection.mli | 115 +++ analysis/vendor/skip-lite/test_cmt.ml | 119 +++ docs/reactive_reanalyze_design.md | 469 ++++++++++ .../deadcode-benchmark/Makefile | 29 +- 19 files changed, 2286 insertions(+), 24 deletions(-) create mode 100644 analysis/reanalyze/README.md create mode 100644 analysis/reanalyze/src/CmtCache.ml create mode 100644 analysis/reanalyze/src/CmtCache.mli create mode 100644 analysis/reanalyze/src/ReactiveAnalysis.ml create mode 100644 analysis/vendor/skip-lite/dune create mode 100644 analysis/vendor/skip-lite/marshal_cache/dune create mode 100644 analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml create mode 100644 analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli create mode 100644 analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp create mode 100644 analysis/vendor/skip-lite/reactive_file_collection/dune create mode 100644 analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml create mode 100644 analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli create mode 100644 analysis/vendor/skip-lite/test_cmt.ml create mode 100644 docs/reactive_reanalyze_design.md diff --git a/analysis/reanalyze/README.md b/analysis/reanalyze/README.md new file mode 100644 index 0000000000..7255664a54 --- /dev/null +++ b/analysis/reanalyze/README.md @@ -0,0 +1,169 @@ +# Reanalyze + +Dead code analysis and other experimental analyses for ReScript. + +## Analyses + +- **Dead Code Elimination (DCE)** - Detect unused values, types, and modules +- **Exception Analysis** - Track potential exceptions through call chains +- **Termination Analysis** - Experimental analysis for detecting non-terminating functions + +## Usage + +```bash +# Run DCE analysis on current project (reads rescript.json) +rescript-editor-analysis reanalyze -config + +# Run DCE analysis on specific CMT directory +rescript-editor-analysis reanalyze -dce-cmt path/to/lib/bs + +# Run all analyses +rescript-editor-analysis reanalyze -all +``` + +## Performance Options + +### Parallel Processing + +Use multiple CPU cores for faster analysis: + +```bash +# Use 4 parallel domains +reanalyze -config -parallel 4 + +# Auto-detect number of cores +reanalyze -config -parallel -1 +``` + +### CMT Cache (Experimental) + +Use memory-mapped cache for CMT file reading: + +```bash +reanalyze -config -cmt-cache +``` + +### Reactive Mode (Experimental) + +Cache processed file data and skip unchanged files on subsequent runs: + +```bash +reanalyze -config -reactive +``` + +This provides significant speedup for repeated analysis (e.g., in a watch mode or service): + +| Mode | CMT Processing | Total | Speedup | +|------|----------------|-------|---------| +| Standard | 0.78s | 1.01s | 1x | +| Reactive (warm) | 0.01s | 0.20s | 5x | + +### Benchmarking + +Run analysis multiple times to measure cache effectiveness: + +```bash +reanalyze -config -reactive -timing -runs 3 +``` + +## CLI Flags + +| Flag | Description | +|------|-------------| +| `-config` | Read analysis mode from rescript.json | +| `-dce` | Run dead code analysis | +| `-exception` | Run exception analysis | +| `-termination` | Run termination analysis | +| `-all` | Run all analyses | +| `-parallel n` | Use n parallel domains (0=sequential, -1=auto) | +| `-cmt-cache` | Use mmap cache for CMT files | +| `-reactive` | Cache processed file_data, skip unchanged files | +| `-runs n` | Run analysis n times (for benchmarking) | +| `-timing` | Report timing of analysis phases | +| `-debug` | Print debug information | +| `-json` | Output in JSON format | +| `-ci` | Internal flag for CI mode | + +## Architecture + +See [ARCHITECTURE.md](ARCHITECTURE.md) for details on the analysis pipeline. + +The DCE analysis is structured as a pure pipeline: + +1. **MAP** - Process each `.cmt` file independently → per-file data +2. **MERGE** - Combine all per-file data → project-wide view +3. **SOLVE** - Compute dead/live status → issues +4. **REPORT** - Output issues + +This design enables order-independence, parallelization, and incremental updates. + +## Reactive Analysis + +The reactive mode (`-reactive`) uses skip-lite's Marshal_cache to efficiently detect file changes: + +1. **First run**: All files are processed and results cached +2. **Subsequent runs**: Only changed files are re-processed +3. **Unchanged files**: Return cached `file_data` immediately (no I/O or unmarshalling) + +This is the foundation for a persistent analysis service that can respond to file changes in milliseconds. + +## Development + +### Testing + +```bash +# Run reanalyze tests +make test-reanalyze + +# Run with shuffled file order (order-independence test) +make test-reanalyze-order-independence + +# Run parallel mode test +make test-reanalyze-parallel +``` + +### Benchmarking + +The benchmark project generates ~5000 files to measure analysis performance: + +```bash +cd tests/analysis_tests/tests-reanalyze/deadcode-benchmark + +# Generate files, build, and run sequential vs parallel benchmark +make benchmark + +# Compare CMT cache effectiveness (cold vs warm) +make time-cache + +# Benchmark reactive mode (shows speedup on repeated runs) +make time-reactive +``` + +#### Reactive Benchmark + +The `make time-reactive` target runs: + +1. **Standard mode** (baseline) - Full analysis every time +2. **Reactive mode** with 3 runs - First run is cold (processes all files), subsequent runs are warm (skip unchanged files) + +Example output: + +``` +=== Reactive mode benchmark === + +Standard (baseline): + CMT processing: 0.78s + Total: 1.01s + +Reactive mode (3 runs): + === Run 1/3 === + CMT processing: 0.78s + Total: 1.02s + === Run 2/3 === + CMT processing: 0.01s <-- 74x faster + Total: 0.20s <-- 5x faster + === Run 3/3 === + CMT processing: 0.01s + Total: 0.20s +``` + diff --git a/analysis/reanalyze/src/Cli.ml b/analysis/reanalyze/src/Cli.ml index 240d369b18..edff3e3e2b 100644 --- a/analysis/reanalyze/src/Cli.ml +++ b/analysis/reanalyze/src/Cli.ml @@ -27,3 +27,12 @@ let parallel = ref 0 (* timing: report internal timing of analysis phases *) let timing = ref false + +(* use mmap cache for CMT files *) +let cmtCache = ref false + +(* use reactive/incremental analysis (caches processed file_data) *) +let reactive = ref false + +(* number of analysis runs (for benchmarking reactive mode) *) +let runs = ref 1 diff --git a/analysis/reanalyze/src/CmtCache.ml b/analysis/reanalyze/src/CmtCache.ml new file mode 100644 index 0000000000..53425cb369 --- /dev/null +++ b/analysis/reanalyze/src/CmtCache.ml @@ -0,0 +1,42 @@ +(** CMT file cache using Marshal_cache for efficient mmap-based reading. + + This module provides cached reading of CMT files with automatic + invalidation when files change on disk. It's used to speed up + repeated analysis runs by avoiding re-reading unchanged files. *) + +[@@@alert "-unsafe"] + +(** Read a CMT file, using the mmap cache for efficiency. + The file is memory-mapped and the cache automatically detects + when the file changes on disk. *) +let read_cmt path : Cmt_format.cmt_infos = + Marshal_cache.with_unmarshalled_file path Fun.id + +(** Read a CMT file only if it changed since the last access. + Returns [Some cmt_infos] if the file changed (or first access), + [None] if the file is unchanged. + + This is the key function for incremental analysis - unchanged + files return [None] immediately without any unmarshalling. *) +let read_cmt_if_changed path : Cmt_format.cmt_infos option = + Marshal_cache.with_unmarshalled_if_changed path Fun.id + +(** Clear the CMT cache, unmapping all memory. + Useful for testing or to free memory. *) +let clear () = Marshal_cache.clear () + +(** Invalidate a specific path in the cache. + The next read will re-load the file from disk. *) +let invalidate path = Marshal_cache.invalidate path + +(** Cache statistics *) +type stats = { + entry_count: int; + mapped_bytes: int; +} + +(** Get cache statistics *) +let stats () : stats = + let s = Marshal_cache.stats () in + { entry_count = s.entry_count; mapped_bytes = s.mapped_bytes } + diff --git a/analysis/reanalyze/src/CmtCache.mli b/analysis/reanalyze/src/CmtCache.mli new file mode 100644 index 0000000000..74d6a73c85 --- /dev/null +++ b/analysis/reanalyze/src/CmtCache.mli @@ -0,0 +1,28 @@ +(** CMT file cache using Marshal_cache for efficient mmap-based reading. + + This module provides cached reading of CMT files with automatic + invalidation when files change on disk. *) + +val read_cmt : string -> Cmt_format.cmt_infos +(** Read a CMT file, using the mmap cache for efficiency. *) + +val read_cmt_if_changed : string -> Cmt_format.cmt_infos option +(** Read a CMT file only if it changed since the last access. + Returns [Some cmt_infos] if the file changed (or first access), + [None] if the file is unchanged. *) + +val clear : unit -> unit +(** Clear the CMT cache, unmapping all memory. *) + +val invalidate : string -> unit +(** Invalidate a specific path in the cache. *) + +type stats = { + entry_count: int; + mapped_bytes: int; +} +(** Cache statistics *) + +val stats : unit -> stats +(** Get cache statistics *) + diff --git a/analysis/reanalyze/src/ReactiveAnalysis.ml b/analysis/reanalyze/src/ReactiveAnalysis.ml new file mode 100644 index 0000000000..a6b6a6cf46 --- /dev/null +++ b/analysis/reanalyze/src/ReactiveAnalysis.ml @@ -0,0 +1,149 @@ +(** Reactive analysis service using cached file processing. + + This module provides incremental analysis that only re-processes + files that have changed, caching the processed file_data for + unchanged files. *) + +[@@@alert "-unsafe"] + +(** Result of processing a single CMT file *) +type cmt_file_result = { + dce_data: DceFileProcessing.file_data option; + exception_data: Exception.file_result option; +} + +(** Result of processing all CMT files *) +type all_files_result = { + dce_data_list: DceFileProcessing.file_data list; + exception_results: Exception.file_result list; +} + +(** Cached file_data for a single CMT file. + We cache the processed result, not just the raw CMT data. *) +type cached_file = { + path: string; + file_data: DceFileProcessing.file_data option; + exception_data: Exception.file_result option; +} + +(** The file cache - maps CMT paths to processed results *) +let file_cache : (string, cached_file) Hashtbl.t = Hashtbl.create 1024 + +(** Process cmt_infos into a file result *) +let process_cmt_infos ~config ~cmtFilePath cmt_infos : cmt_file_result option = + let excludePath sourceFile = + config.DceConfig.cli.exclude_paths + |> List.exists (fun prefix_ -> + let prefix = + match Filename.is_relative sourceFile with + | true -> prefix_ + | false -> Filename.concat (Sys.getcwd ()) prefix_ + in + String.length prefix <= String.length sourceFile + && + try String.sub sourceFile 0 (String.length prefix) = prefix + with Invalid_argument _ -> false) + in + match cmt_infos.Cmt_format.cmt_annots |> FindSourceFile.cmt with + | Some sourceFile when not (excludePath sourceFile) -> + let is_interface = + match cmt_infos.cmt_annots with + | Interface _ -> true + | _ -> Filename.check_suffix sourceFile "i" + in + let module_name = sourceFile |> Paths.getModuleName in + let dce_file_context : DceFileProcessing.file_context = + {source_path = sourceFile; module_name; is_interface} + in + let file_context = + DeadCommon.FileContext. + {source_path = sourceFile; module_name; is_interface} + in + let dce_data = + if config.DceConfig.run.dce then + Some + (cmt_infos + |> DceFileProcessing.process_cmt_file ~config ~file:dce_file_context + ~cmtFilePath) + else None + in + let exception_data = + if config.DceConfig.run.exception_ then + cmt_infos |> Exception.processCmt ~file:file_context + else None + in + if config.DceConfig.run.termination then + cmt_infos |> Arnold.processCmt ~config ~file:file_context; + Some {dce_data; exception_data} + | _ -> None + +(** Process a CMT file, using cached result if file unchanged. + Returns the cached result if the file hasn't changed since last access. *) +let process_cmt_cached ~config cmtFilePath : cmt_file_result option = + match CmtCache.read_cmt_if_changed cmtFilePath with + | None -> + (* File unchanged - return cached result *) + (match Hashtbl.find_opt file_cache cmtFilePath with + | Some cached -> + Some { dce_data = cached.file_data; exception_data = cached.exception_data } + | None -> + (* First time seeing this file - shouldn't happen, but handle gracefully *) + None) + | Some cmt_infos -> + (* File changed or new - process it *) + let result = process_cmt_infos ~config ~cmtFilePath cmt_infos in + (* Cache the result *) + (match result with + | Some r -> + Hashtbl.replace file_cache cmtFilePath { + path = cmtFilePath; + file_data = r.dce_data; + exception_data = r.exception_data; + } + | None -> ()); + result + +(** Process all files incrementally. + First run processes all files. Subsequent runs only process changed files. *) +let process_files_incremental ~config cmtFilePaths : all_files_result = + Timing.time_phase `FileLoading (fun () -> + let dce_data_list = ref [] in + let exception_results = ref [] in + let processed = ref 0 in + let from_cache = ref 0 in + + cmtFilePaths |> List.iter (fun cmtFilePath -> + (* Check if file was in cache *before* processing *) + let was_cached = Hashtbl.mem file_cache cmtFilePath in + match process_cmt_cached ~config cmtFilePath with + | Some {dce_data; exception_data} -> + (match dce_data with + | Some data -> dce_data_list := data :: !dce_data_list + | None -> ()); + (match exception_data with + | Some data -> exception_results := data :: !exception_results + | None -> ()); + (* Track whether it was from cache *) + if was_cached then + incr from_cache + else + incr processed + | None -> () + ); + + if !Cli.timing then + Printf.eprintf "Reactive: %d files processed, %d from cache\n%!" !processed !from_cache; + + {dce_data_list = List.rev !dce_data_list; exception_results = List.rev !exception_results}) + +(** Clear all cached file data *) +let clear () = + Hashtbl.clear file_cache; + CmtCache.clear () + +(** Get cache statistics *) +let stats () = + let file_count = Hashtbl.length file_cache in + let cmt_stats = CmtCache.stats () in + (file_count, cmt_stats) + diff --git a/analysis/reanalyze/src/Reanalyze.ml b/analysis/reanalyze/src/Reanalyze.ml index 006454247d..58a4883e18 100644 --- a/analysis/reanalyze/src/Reanalyze.ml +++ b/analysis/reanalyze/src/Reanalyze.ml @@ -9,7 +9,10 @@ type cmt_file_result = { (** Process a cmt file and return its results. Conceptually: map over files, then merge results. *) let loadCmtFile ~config cmtFilePath : cmt_file_result option = - let cmt_infos = Cmt_format.read_cmt cmtFilePath in + let cmt_infos = + if !Cli.cmtCache then CmtCache.read_cmt cmtFilePath + else Cmt_format.read_cmt cmtFilePath + in let excludePath sourceFile = config.DceConfig.cli.exclude_paths |> List.exists (fun prefix_ -> @@ -206,20 +209,26 @@ let processFilesParallel ~config ~numDomains (cmtFilePaths : string list) : Conceptually: map process_cmt_file over all files. *) let processCmtFiles ~config ~cmtRoot : all_files_result = let cmtFilePaths = collectCmtFilePaths ~cmtRoot in - let numDomains = - match !Cli.parallel with - | n when n > 0 -> n - | n when n < 0 -> - (* Auto-detect: use recommended domain count (number of cores) *) - Domain.recommended_domain_count () - | _ -> 0 - in - if numDomains > 0 then ( - if !Cli.timing then - Printf.eprintf "Using %d parallel domains for %d files\n%!" numDomains - (List.length cmtFilePaths); - processFilesParallel ~config ~numDomains cmtFilePaths) - else processFilesSequential ~config cmtFilePaths + (* Reactive mode: use incremental processing that skips unchanged files *) + if !Cli.reactive then + let result = ReactiveAnalysis.process_files_incremental ~config cmtFilePaths in + {dce_data_list = result.dce_data_list; exception_results = result.exception_results} + else begin + let numDomains = + match !Cli.parallel with + | n when n > 0 -> n + | n when n < 0 -> + (* Auto-detect: use recommended domain count (number of cores) *) + Domain.recommended_domain_count () + | _ -> 0 + in + if numDomains > 0 then ( + if !Cli.timing then + Printf.eprintf "Using %d parallel domains for %d files\n%!" numDomains + (List.length cmtFilePaths); + processFilesParallel ~config ~numDomains cmtFilePaths) + else processFilesSequential ~config cmtFilePaths + end (* Shuffle a list using Fisher-Yates algorithm *) let shuffle_list lst = @@ -345,14 +354,22 @@ let runAnalysis ~dce_config ~cmtRoot = let runAnalysisAndReport ~cmtRoot = Log_.Color.setup (); Timing.enabled := !Cli.timing; - Timing.reset (); if !Cli.json then EmitJson.start (); let dce_config = DceConfig.current () in - runAnalysis ~dce_config ~cmtRoot; - Log_.Stats.report ~config:dce_config; - Log_.Stats.clear (); - if !Cli.json then EmitJson.finish (); - Timing.report () + let numRuns = max 1 !Cli.runs in + for run = 1 to numRuns do + Timing.reset (); + if numRuns > 1 && !Cli.timing then + Printf.eprintf "\n=== Run %d/%d ===\n%!" run numRuns; + runAnalysis ~dce_config ~cmtRoot; + if run = numRuns then begin + (* Only report on last run *) + Log_.Stats.report ~config:dce_config; + Log_.Stats.clear () + end; + Timing.report () + done; + if !Cli.json then EmitJson.finish () let cli () = let analysisKindSet = ref false in @@ -463,6 +480,16 @@ let cli () = "n Process files in parallel using n domains (0 = sequential, default; \ -1 = auto-detect cores)" ); ("-timing", Set Cli.timing, "Report internal timing of analysis phases"); + ( "-cmt-cache", + Set Cli.cmtCache, + "Use mmap cache for CMT files (faster for repeated analysis)" ); + ( "-reactive", + Set Cli.reactive, + "Use reactive analysis (caches processed file_data, skips unchanged \ + files)" ); + ( "-runs", + Int (fun n -> Cli.runs := n), + "n Run analysis n times (for benchmarking cache effectiveness)" ); ("-version", Unit versionAndExit, "Show version information and exit"); ("--version", Unit versionAndExit, "Show version information and exit"); ] diff --git a/analysis/reanalyze/src/dune b/analysis/reanalyze/src/dune index e8b736446f..a0045f8230 100644 --- a/analysis/reanalyze/src/dune +++ b/analysis/reanalyze/src/dune @@ -2,4 +2,4 @@ (name reanalyze) (flags (-w "+6+26+27+32+33+39")) - (libraries jsonlib ext ml str unix)) + (libraries jsonlib ext ml str unix marshal_cache)) diff --git a/analysis/vendor/dune b/analysis/vendor/dune index 07b8286153..7ccd94c6b7 100644 --- a/analysis/vendor/dune +++ b/analysis/vendor/dune @@ -1 +1 @@ -(dirs ext ml res_syntax json flow_parser) +(dirs ext ml res_syntax json flow_parser skip-lite) diff --git a/analysis/vendor/skip-lite/dune b/analysis/vendor/skip-lite/dune new file mode 100644 index 0000000000..4830047662 --- /dev/null +++ b/analysis/vendor/skip-lite/dune @@ -0,0 +1,8 @@ +; skip-lite vendor directory +(dirs marshal_cache reactive_file_collection) + +; Test executable for CMT file support +(executable + (name test_cmt) + (modules test_cmt) + (libraries marshal_cache ml)) diff --git a/analysis/vendor/skip-lite/marshal_cache/dune b/analysis/vendor/skip-lite/marshal_cache/dune new file mode 100644 index 0000000000..714dbcfc98 --- /dev/null +++ b/analysis/vendor/skip-lite/marshal_cache/dune @@ -0,0 +1,7 @@ +(library + (name marshal_cache) + (foreign_stubs + (language cxx) + (names marshal_cache_stubs) + (flags (:standard -std=c++17))) + (c_library_flags (-lstdc++))) diff --git a/analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml b/analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml new file mode 100644 index 0000000000..66da5e9c9f --- /dev/null +++ b/analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml @@ -0,0 +1,71 @@ +(* Marshal Cache - OCaml implementation *) + +exception Cache_error of string * string + +type stats = { + entry_count : int; + mapped_bytes : int; +} + +(* Register the exception with the C runtime for proper propagation *) +let () = Callback.register_exception + "Marshal_cache.Cache_error" + (Cache_error ("", "")) + +(* External C stubs *) +external with_unmarshalled_file_stub : string -> ('a -> 'r) -> 'r + = "mfc_with_unmarshalled_file" + +external with_unmarshalled_if_changed_stub : string -> ('a -> 'r) -> 'r option + = "mfc_with_unmarshalled_if_changed" + +external clear_stub : unit -> unit = "mfc_clear" +external invalidate_stub : string -> unit = "mfc_invalidate" +external set_max_entries_stub : int -> unit = "mfc_set_max_entries" +external set_max_bytes_stub : int -> unit = "mfc_set_max_bytes" +external stats_stub : unit -> int * int = "mfc_stats" + +(* Public API *) + +let convert_failure path msg = + (* C code raises Failure with "path: message" format *) + (* Only convert if message starts with the path (i.e., from our C code) *) + let prefix = path ^ ": " in + let prefix_len = String.length prefix in + if String.length msg >= prefix_len && String.sub msg 0 prefix_len = prefix then + let error_msg = String.sub msg prefix_len (String.length msg - prefix_len) in + raise (Cache_error (path, error_msg)) + else + (* Re-raise user callback exceptions as-is *) + raise (Failure msg) + +let with_unmarshalled_file path f = + try + with_unmarshalled_file_stub path f + with + | Failure msg -> convert_failure path msg + [@@alert "-unsafe"] + +let with_unmarshalled_if_changed path f = + try + with_unmarshalled_if_changed_stub path f + with + | Failure msg -> convert_failure path msg + [@@alert "-unsafe"] + +let clear () = clear_stub () + +let invalidate path = invalidate_stub path + +let set_max_entries n = + if n < 0 then invalid_arg "Marshal_cache.set_max_entries: negative value"; + set_max_entries_stub n + +let set_max_bytes n = + if n < 0 then invalid_arg "Marshal_cache.set_max_bytes: negative value"; + set_max_bytes_stub n + +let stats () = + let (entry_count, mapped_bytes) = stats_stub () in + { entry_count; mapped_bytes } + diff --git a/analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli b/analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli new file mode 100644 index 0000000000..091c3f69c6 --- /dev/null +++ b/analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli @@ -0,0 +1,120 @@ +(** Marshal Cache + + A high-performance cache for marshalled files that keeps file contents + memory-mapped (off-heap) and provides efficient repeated access with + automatic invalidation when files change on disk. + + {2 Memory Model} + + There is no fixed-size memory pool. Each cached file gets its own [mmap] + of exactly its file size: + + - {b mmap'd bytes}: Live in virtual address space (off-heap), managed by + OS + cache LRU eviction + - {b Unmarshalled value}: Lives in OCaml heap, managed by GC, exists only + during callback + + Physical RAM pages are allocated on demand (first access). Under memory + pressure, the OS can evict pages back to disk since they're file-backed. + + {2 Usage Example} + + {[ + Marshal_cache.with_unmarshalled_file "/path/to/data.marshal" + (fun (data : my_data_type) -> + (* Process data here - mmap stays valid for duration of callback *) + process data + ) + ]} + + {2 Platform Support} + + - macOS 10.13+: Fully supported + - Linux (glibc): Fully supported + - FreeBSD/OpenBSD: Should work (uses same mtime API as macOS) + - Windows: Not supported (no mmap) *) + +(** Exception raised for cache-related errors. + Contains the file path and an error message. *) +exception Cache_error of string * string + +(** Cache statistics. *) +type stats = { + entry_count : int; (** Number of files currently cached *) + mapped_bytes : int; (** Total bytes of memory-mapped data *) +} + +(** [with_unmarshalled_file path f] calls [f] with the unmarshalled value + from [path]. Guarantees the underlying mmap stays valid for the duration + of [f]. + + The cache automatically detects file changes via: + - Modification time (nanosecond precision where available) + - File size + - Inode number (detects atomic file replacement) + + {b Type safety warning}: This function is inherently unsafe. The caller + must ensure the type ['a] matches the actual marshalled data. Using the + wrong type results in undefined behavior (crashes, memory corruption). + This is equivalent to [Marshal.from_*] in terms of type safety. + + @raise Cache_error if the file cannot be read, mapped, or unmarshalled. + @raise exn if [f] raises; the cache state remains consistent. + + {b Thread safety}: Safe to call from multiple threads/domains. The cache + uses internal locking. The lock is released during the callback [f]. *) +val with_unmarshalled_file : string -> ('a -> 'r) -> 'r + [@@alert unsafe "Caller must ensure the file contains data of the expected type"] + +(** [with_unmarshalled_if_changed path f] is like {!with_unmarshalled_file} but + only unmarshals if the file changed since the last access. + + Returns [Some (f data)] if the file changed (or is accessed for the first time). + Returns [None] if the file has not changed since last access (no unmarshal occurs). + + This is the key primitive for building reactive/incremental systems: + {[ + let my_cache = Hashtbl.create 100 + + let get_result path = + match Marshal_cache.with_unmarshalled_if_changed path process with + | Some result -> + Hashtbl.replace my_cache path result; + result + | None -> + Hashtbl.find my_cache path (* use cached result *) + ]} + + @raise Cache_error if the file cannot be read, mapped, or unmarshalled. + @raise exn if [f] raises; the cache state remains consistent. *) +val with_unmarshalled_if_changed : string -> ('a -> 'r) -> 'r option + [@@alert unsafe "Caller must ensure the file contains data of the expected type"] + +(** Remove all entries from the cache, unmapping all memory. + Entries currently in use (during a callback) are preserved and will be + cleaned up when their callbacks complete. *) +val clear : unit -> unit + +(** [invalidate path] removes a specific path from the cache. + No-op if the path is not cached or is currently in use. *) +val invalidate : string -> unit + +(** [set_max_entries n] sets the maximum number of cached entries. + When exceeded, least-recently-used entries are evicted. + Default: 10000. Set to 0 for unlimited (not recommended for long-running + processes). + + @raise Invalid_argument if [n < 0] *) +val set_max_entries : int -> unit + +(** [set_max_bytes n] sets the maximum total bytes of mapped memory. + When exceeded, least-recently-used entries are evicted. + Default: 1GB (1073741824). Set to 0 for unlimited. + + @raise Invalid_argument if [n < 0] *) +val set_max_bytes : int -> unit + +(** [stats ()] returns cache statistics. + Useful for monitoring cache usage. *) +val stats : unit -> stats + diff --git a/analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp b/analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp new file mode 100644 index 0000000000..a18ba1b5a1 --- /dev/null +++ b/analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp @@ -0,0 +1,804 @@ +// marshal_cache_stubs.cpp +// skip-lite: Marshal cache with mmap and LRU eviction +// OCaml 5+ compatible +// +// ============================================================================= +// WARNING: OCaml C FFI and GC Pitfalls +// ============================================================================= +// +// This file interfaces with the OCaml runtime. The OCaml garbage collector +// can move values in memory at any allocation point. Failure to handle this +// correctly causes memory corruption and segfaults. +// +// KEY RULES: +// +// 1. NEVER use String_val(v) across an allocation +// ------------------------------------------------ +// BAD: +// const char* s = String_val(str_val); +// some_ocaml_alloc(); // GC may run, str_val moves, s is now dangling +// use(s); // SEGFAULT +// +// GOOD: +// std::string s(String_val(str_val)); // Copy to C++ string first +// some_ocaml_alloc(); +// use(s.c_str()); // Safe, C++ owns the memory +// +// 2. NEVER nest allocations in Store_field +// ------------------------------------------------ +// BAD: +// value tuple = caml_alloc_tuple(2); +// Store_field(tuple, 0, caml_copy_string(s)); // DANGEROUS! +// // caml_copy_string allocates, may trigger GC, tuple address is +// // computed BEFORE the call, so we write to stale memory +// +// GOOD: +// value tuple = caml_alloc_tuple(2); +// value str = caml_copy_string(s); // Allocate first +// Store_field(tuple, 0, str); // Then store +// +// 3. CAMLlocal doesn't help with evaluation order +// ------------------------------------------------ +// CAMLlocal registers a variable so GC updates it when values move. +// But it doesn't fix the evaluation order problem in Store_field. +// The address computation happens before the nested function call. +// +// 4. Raising exceptions from C is tricky +// ------------------------------------------------ +// caml_raise* functions do a longjmp, so: +// - CAMLparam/CAMLlocal frames are not properly unwound +// - C++ destructors may not run (avoid RAII in throwing paths) +// - Prefer raising simple exceptions (Failure) and converting in OCaml +// +// 5. Callbacks can trigger arbitrary GC +// ------------------------------------------------ +// When calling caml_callback*, the OCaml code can allocate freely. +// All value variables from before the callback may be stale after. +// Either re-read them or use CAMLlocal to keep them updated. +// +// CURRENT APPROACH: +// - Errors are raised as Failure("path: message") from C +// - The OCaml wrapper catches Failure and converts to Cache_error +// - This avoids complex allocation sequences in exception-raising paths +// +// ============================================================================= + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// OCaml headers +extern "C" { +#include +#include +#include +#include +#include +#include +} + +// Platform-specific mtime access (nanosecond precision) +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) + #define MTIME_SEC(st) ((st).st_mtimespec.tv_sec) + #define MTIME_NSEC(st) ((st).st_mtimespec.tv_nsec) +#else // Linux and others + #define MTIME_SEC(st) ((st).st_mtim.tv_sec) + #define MTIME_NSEC(st) ((st).st_mtim.tv_nsec) +#endif + +namespace { + +// File identity for cache invalidation (mtime + size + inode) +struct FileId { + time_t mtime_sec; + long mtime_nsec; + off_t size; + ino_t ino; + + bool operator==(const FileId& other) const { + return mtime_sec == other.mtime_sec && + mtime_nsec == other.mtime_nsec && + size == other.size && + ino == other.ino; + } + + bool operator!=(const FileId& other) const { + return !(*this == other); + } +}; + +// A memory mapping +struct Mapping { + void* ptr = nullptr; + size_t len = 0; + FileId file_id = {}; + + bool is_valid() const { + return ptr != nullptr && ptr != MAP_FAILED && ptr != reinterpret_cast(1); + } +}; + +// Cache entry for a single file +struct Entry { + std::string path; + Mapping current; + size_t in_use = 0; // Number of active callbacks + std::vector old_mappings; // Deferred unmaps + std::list::iterator lru_iter; +}; + +// The global cache singleton +class MarshalCache { +public: + static MarshalCache& instance() { + static MarshalCache inst; + return inst; + } + + // Acquire a mapping, incrementing in_use. Returns pointer, length, and whether file changed. + // Throws std::runtime_error on failure. + void acquire_mapping(const std::string& path, void** out_ptr, size_t* out_len, bool* out_changed); + + // Release a mapping, decrementing in_use and cleaning up old mappings. + void release_mapping(const std::string& path); + + // Clear all entries (only those not in use) + void clear(); + + // Invalidate a specific path + void invalidate(const std::string& path); + + // Set limits + void set_max_entries(size_t n) { + std::lock_guard lock(mutex_); + max_entries_ = n; + evict_if_needed(); + } + + void set_max_bytes(size_t n) { + std::lock_guard lock(mutex_); + max_bytes_ = n; + evict_if_needed(); + } + + // Stats: (entry_count, total_mapped_bytes) + std::pair stats() { + std::lock_guard lock(mutex_); + return {cache_.size(), current_bytes_}; + } + +private: + MarshalCache() = default; + ~MarshalCache() { clear_internal(); } + + // Prevent copying + MarshalCache(const MarshalCache&) = delete; + MarshalCache& operator=(const MarshalCache&) = delete; + + // Must be called with mutex_ held + void evict_if_needed(); + void unmap_mapping(const Mapping& m); + void touch_lru(Entry& entry); + void clear_internal(); + + // Get file identity, throws on error + FileId get_file_id(const char* path); + + // Create a new mapping for a file, throws on error + Mapping create_mapping(const char* path, const FileId& file_id); + + std::unordered_map cache_; + std::list lru_order_; // front = most recent + std::mutex mutex_; + + size_t max_entries_ = 10000; + size_t max_bytes_ = 1ULL << 30; // 1GB default + size_t current_bytes_ = 0; +}; + +FileId MarshalCache::get_file_id(const char* path) { + struct stat st; + if (stat(path, &st) != 0) { + throw std::runtime_error(std::string("stat failed: ") + path + ": " + strerror(errno)); + } + return FileId{ + MTIME_SEC(st), + MTIME_NSEC(st), + st.st_size, + st.st_ino + }; +} + +Mapping MarshalCache::create_mapping(const char* path, const FileId& file_id) { + int fd = open(path, O_RDONLY); + if (fd < 0) { + throw std::runtime_error(std::string("open failed: ") + path + ": " + strerror(errno)); + } + + size_t len = static_cast(file_id.size); + void* ptr = nullptr; + + if (len > 0) { + ptr = mmap(nullptr, len, PROT_READ, MAP_PRIVATE, fd, 0); + } else { + // Empty file: use a sentinel non-null pointer + ptr = reinterpret_cast(1); + } + + // Close fd immediately - mapping remains valid on POSIX + close(fd); + + if (len > 0 && (ptr == MAP_FAILED || ptr == nullptr)) { + throw std::runtime_error(std::string("mmap failed: ") + path + ": " + strerror(errno)); + } + + Mapping m; + m.ptr = ptr; + m.len = len; + m.file_id = file_id; + return m; +} + +void MarshalCache::unmap_mapping(const Mapping& m) { + if (m.is_valid() && m.len > 0) { + munmap(m.ptr, m.len); + } +} + +void MarshalCache::touch_lru(Entry& entry) { + // Move to front of LRU list + lru_order_.erase(entry.lru_iter); + lru_order_.push_front(entry.path); + entry.lru_iter = lru_order_.begin(); +} + +void MarshalCache::evict_if_needed() { + // Must be called with mutex_ held + // Use >= because this is called BEFORE adding a new entry + while ((max_entries_ > 0 && cache_.size() >= max_entries_) || + (max_bytes_ > 0 && current_bytes_ >= max_bytes_)) { + if (lru_order_.empty()) break; + + // Find least-recently-used entry that is not in use + bool evicted = false; + for (auto it = lru_order_.rbegin(); it != lru_order_.rend(); ++it) { + auto cache_it = cache_.find(*it); + if (cache_it != cache_.end() && cache_it->second.in_use == 0) { + Entry& entry = cache_it->second; + + // Unmap current and all old mappings + unmap_mapping(entry.current); + for (const auto& m : entry.old_mappings) { + unmap_mapping(m); + } + current_bytes_ -= entry.current.len; + + lru_order_.erase(entry.lru_iter); + cache_.erase(cache_it); + evicted = true; + break; + } + } + if (!evicted) break; // All entries are in use + } +} + +void MarshalCache::acquire_mapping(const std::string& path, + void** out_ptr, size_t* out_len, bool* out_changed) { + std::unique_lock lock(mutex_); + + // Get current file identity + FileId current_id = get_file_id(path.c_str()); + + // Lookup or create entry + auto it = cache_.find(path); + bool needs_remap = false; + + if (it == cache_.end()) { + needs_remap = true; + } else if (it->second.current.file_id != current_id) { + needs_remap = true; + } + + if (needs_remap) { + // Only evict if we're adding a NEW entry (not updating existing) + // This prevents evicting the entry we're about to update + if (it == cache_.end()) { + evict_if_needed(); + } + + // Create new mapping (may throw) + Mapping new_mapping = create_mapping(path.c_str(), current_id); + + if (it == cache_.end()) { + // Insert new entry + Entry entry; + entry.path = path; + entry.current = new_mapping; + entry.in_use = 0; + lru_order_.push_front(path); + entry.lru_iter = lru_order_.begin(); + + cache_[path] = std::move(entry); + it = cache_.find(path); + } else { + // Update existing entry + Entry& entry = it->second; + Mapping old = entry.current; + entry.current = new_mapping; + + // Handle old mapping + if (old.is_valid()) { + if (entry.in_use == 0) { + unmap_mapping(old); + } else { + // Defer unmap until callbacks complete + entry.old_mappings.push_back(old); + } + current_bytes_ -= old.len; + } + } + + current_bytes_ += new_mapping.len; + } + + Entry& entry = it->second; + entry.in_use++; + touch_lru(entry); + + *out_ptr = entry.current.ptr; + *out_len = entry.current.len; + *out_changed = needs_remap; + + // Mutex released here (RAII) +} + +void MarshalCache::release_mapping(const std::string& path) { + std::lock_guard lock(mutex_); + + auto it = cache_.find(path); + if (it == cache_.end()) return; // Entry was evicted + + Entry& entry = it->second; + if (entry.in_use > 0) { + entry.in_use--; + } + + if (entry.in_use == 0 && !entry.old_mappings.empty()) { + // Clean up deferred unmaps + for (const auto& m : entry.old_mappings) { + unmap_mapping(m); + } + entry.old_mappings.clear(); + } +} + +void MarshalCache::clear_internal() { + for (auto& [path, entry] : cache_) { + if (entry.in_use == 0) { + unmap_mapping(entry.current); + } + for (const auto& m : entry.old_mappings) { + unmap_mapping(m); + } + } + cache_.clear(); + lru_order_.clear(); + current_bytes_ = 0; +} + +void MarshalCache::clear() { + std::lock_guard lock(mutex_); + + // Only clear entries not in use + for (auto it = cache_.begin(); it != cache_.end(); ) { + Entry& entry = it->second; + + // Always clean up old_mappings + for (const auto& m : entry.old_mappings) { + unmap_mapping(m); + } + entry.old_mappings.clear(); + + if (entry.in_use == 0) { + unmap_mapping(entry.current); + current_bytes_ -= entry.current.len; + lru_order_.erase(entry.lru_iter); + it = cache_.erase(it); + } else { + ++it; + } + } +} + +void MarshalCache::invalidate(const std::string& path) { + std::lock_guard lock(mutex_); + + auto it = cache_.find(path); + if (it == cache_.end()) return; + + Entry& entry = it->second; + + // Clean up old_mappings + for (const auto& m : entry.old_mappings) { + unmap_mapping(m); + } + entry.old_mappings.clear(); + + if (entry.in_use == 0) { + unmap_mapping(entry.current); + current_bytes_ -= entry.current.len; + lru_order_.erase(entry.lru_iter); + cache_.erase(it); + } + // If in_use > 0, the entry stays but will be refreshed on next access +} + +} // anonymous namespace + + +// ============================================================================= +// OCaml FFI stubs +// ============================================================================= + +extern "C" { + +// Helper to raise an error as Failure (converted to Cache_error in OCaml) +[[noreturn]] +static void raise_cache_error(const char* path, const char* message) { + std::string full_msg = std::string(path) + ": " + message; + caml_failwith(full_msg.c_str()); +} + +// ============================================================================= +// CMT/CMI file format support +// ============================================================================= +// +// ReScript/OCaml compiler generates several file types with headers before Marshal data: +// +// Pure .cmt files (typed tree only): +// - "Caml1999T0xx" (12 bytes) - CMT magic +// - Marshal data (cmt_infos record) +// +// Combined .cmt/.cmti files (interface + typed tree): +// - "Caml1999I0xx" (12 bytes) - CMI magic +// - Marshal data #1 (cmi_name, cmi_sign) +// - Marshal data #2 (crcs) +// - Marshal data #3 (flags) +// - "Caml1999T0xx" (12 bytes) - CMT magic +// - Marshal data (cmt_infos record) +// +// Pure .cmi files (compiled interface only): +// - "Caml1999I0xx" (12 bytes) - CMI magic +// - Marshal data #1 (cmi_name, cmi_sign) +// - Marshal data #2 (crcs) +// - Marshal data #3 (flags) +// +// This code handles all formats and finds the CMT Marshal data. +// ============================================================================= + +static constexpr size_t OCAML_MAGIC_LENGTH = 12; +static constexpr const char* CMT_MAGIC_PREFIX = "Caml1999T"; +static constexpr const char* CMI_MAGIC_PREFIX = "Caml1999I"; +static constexpr size_t MAGIC_PREFIX_LENGTH = 9; // Length of "Caml1999T" or "Caml1999I" + +// Check if data at offset starts with a specific prefix +static bool has_prefix_at(const unsigned char* data, size_t len, size_t offset, + const char* prefix, size_t prefix_len) { + if (len < offset + prefix_len) return false; + return memcmp(data + offset, prefix, prefix_len) == 0; +} + +// Check for Marshal magic at given offset +// Marshal magic: 0x8495A6BE (small/32-bit) or 0x8495A6BF (large/64-bit) +static bool has_marshal_magic_at(const unsigned char* data, size_t len, size_t offset) { + if (len < offset + 4) return false; + uint32_t magic = (static_cast(data[offset]) << 24) | + (static_cast(data[offset + 1]) << 16) | + (static_cast(data[offset + 2]) << 8) | + static_cast(data[offset + 3]); + return magic == 0x8495A6BEu || magic == 0x8495A6BFu; +} + +// Get the size of a Marshal value from its header +// Marshal header format (20 bytes for small, 32 bytes for large): +// 4 bytes: magic +// 4 bytes: data_len (or 8 bytes for large) +// 4 bytes: num_objects (or 8 bytes for large) +// 4 bytes: size_32 (or 8 bytes for large) +// 4 bytes: size_64 (or 8 bytes for large) +// Total Marshal value size = header_size + data_len +static size_t get_marshal_total_size(const unsigned char* data, size_t len, size_t offset) { + if (len < offset + 20) { + throw std::runtime_error("not enough data for Marshal header"); + } + + uint32_t magic = (static_cast(data[offset]) << 24) | + (static_cast(data[offset + 1]) << 16) | + (static_cast(data[offset + 2]) << 8) | + static_cast(data[offset + 3]); + + bool is_large = (magic == 0x8495A6BFu); + size_t header_size = is_large ? 32 : 20; + + if (len < offset + header_size) { + throw std::runtime_error("not enough data for Marshal header"); + } + + // data_len is at offset 4 (32-bit) or offset 4 (64-bit, we read low 32 bits which is enough) + uint32_t data_len; + if (is_large) { + // For large format, data_len is 8 bytes. Read as 64-bit but we only care about reasonable sizes. + // High 32 bits at offset+4, low 32 bits at offset+8 + uint32_t high = (static_cast(data[offset + 4]) << 24) | + (static_cast(data[offset + 5]) << 16) | + (static_cast(data[offset + 6]) << 8) | + static_cast(data[offset + 7]); + uint32_t low = (static_cast(data[offset + 8]) << 24) | + (static_cast(data[offset + 9]) << 16) | + (static_cast(data[offset + 10]) << 8) | + static_cast(data[offset + 11]); + if (high != 0) { + throw std::runtime_error("Marshal data too large (>4GB)"); + } + data_len = low; + } else { + data_len = (static_cast(data[offset + 4]) << 24) | + (static_cast(data[offset + 5]) << 16) | + (static_cast(data[offset + 6]) << 8) | + static_cast(data[offset + 7]); + } + + return header_size + data_len; +} + +// Find the offset where CMT Marshal data starts +// Returns the offset, or throws on error +static size_t find_cmt_marshal_offset(const unsigned char* data, size_t len) { + if (len < 4) { + throw std::runtime_error("file too small"); + } + + // Check for pure Marshal file (starts with Marshal magic) + if (has_marshal_magic_at(data, len, 0)) { + return 0; + } + + // Check for pure CMT file (starts with "Caml1999T") + if (has_prefix_at(data, len, 0, CMT_MAGIC_PREFIX, MAGIC_PREFIX_LENGTH)) { + if (len < OCAML_MAGIC_LENGTH + 4) { + throw std::runtime_error("CMT file too small"); + } + if (!has_marshal_magic_at(data, len, OCAML_MAGIC_LENGTH)) { + throw std::runtime_error("CMT file: no Marshal magic after header"); + } + return OCAML_MAGIC_LENGTH; + } + + // Check for CMI file (starts with "Caml1999I") + // This may be a combined CMI+CMT file, need to skip CMI data to find CMT + if (has_prefix_at(data, len, 0, CMI_MAGIC_PREFIX, MAGIC_PREFIX_LENGTH)) { + if (len < OCAML_MAGIC_LENGTH + 4) { + throw std::runtime_error("CMI file too small"); + } + + // Skip the CMI header + size_t offset = OCAML_MAGIC_LENGTH; + + // CMI section has 3 Marshal values: + // 1. (cmi_name, cmi_sign) + // 2. crcs + // 3. flags + for (int i = 0; i < 3; i++) { + if (!has_marshal_magic_at(data, len, offset)) { + throw std::runtime_error("CMI file: expected Marshal value in CMI section"); + } + size_t marshal_size = get_marshal_total_size(data, len, offset); + offset += marshal_size; + if (offset > len) { + throw std::runtime_error("CMI file: Marshal value extends past end of file"); + } + } + + // Now check if there's a CMT section after the CMI data + if (has_prefix_at(data, len, offset, CMT_MAGIC_PREFIX, MAGIC_PREFIX_LENGTH)) { + // Found CMT magic after CMI data + offset += OCAML_MAGIC_LENGTH; + if (!has_marshal_magic_at(data, len, offset)) { + throw std::runtime_error("CMT section: no Marshal magic after header"); + } + return offset; + } + + // No CMT section - this is a pure CMI file + // Return the first CMI Marshal value (not ideal but allows reading CMI files) + throw std::runtime_error("CMI file without CMT section - use read_cmi instead"); + } + + // Unknown format + throw std::runtime_error("unrecognized file format (not Marshal, CMT, or CMI)"); +} + +// Unmarshal from mmap'd memory (zero-copy using OCaml 5+ API) +// Handles both pure Marshal files and CMT/CMI files with headers +static value unmarshal_from_ptr(void* ptr, size_t len) { + CAMLparam0(); + CAMLlocal1(result); + + if (len == 0) { + caml_failwith("marshal_cache: empty file"); + } + + const unsigned char* data = static_cast(ptr); + + // Find where CMT Marshal data starts (handles CMT/CMI headers) + size_t offset; + try { + offset = find_cmt_marshal_offset(data, len); + } catch (const std::exception& e) { + std::string msg = std::string("marshal_cache: ") + e.what(); + caml_failwith(msg.c_str()); + } + + // Validate remaining length + size_t marshal_len = len - offset; + if (marshal_len < 20) { + caml_failwith("marshal_cache: Marshal data too small"); + } + + // OCaml 5+ API: unmarshal directly from memory block (zero-copy!) + const char* marshal_ptr = reinterpret_cast(data + offset); + result = caml_input_value_from_block(marshal_ptr, static_cast(marshal_len)); + + CAMLreturn(result); +} + +// Main entry point: with_unmarshalled_file +CAMLprim value mfc_with_unmarshalled_file(value path_val, value closure_val) { + CAMLparam2(path_val, closure_val); + CAMLlocal2(unmarshalled, result); + + const char* path = String_val(path_val); + std::string path_str(path); + + void* ptr = nullptr; + size_t len = 0; + bool changed = false; + + // Acquire mapping (may throw) + try { + MarshalCache::instance().acquire_mapping(path_str, &ptr, &len, &changed); + } catch (const std::exception& e) { + // Use path_str.c_str() instead of path, because raise_cache_error + // allocates and can trigger GC which would invalidate the pointer + // from String_val(path_val) + raise_cache_error(path_str.c_str(), e.what()); + CAMLreturn(Val_unit); // Not reached + } + + // Unmarshal (may allocate, may trigger GC, may raise) + unmarshalled = unmarshal_from_ptr(ptr, len); + + // Call the OCaml callback + result = caml_callback_exn(closure_val, unmarshalled); + + // Release mapping before potentially re-raising + MarshalCache::instance().release_mapping(path_str); + + // Check if callback raised an exception + if (Is_exception_result(result)) { + value exn = Extract_exception(result); + caml_raise(exn); + } + + CAMLreturn(result); +} + +// Reactive entry point: only unmarshal if file changed +// Returns Some(f(data)) if changed, None if unchanged +CAMLprim value mfc_with_unmarshalled_if_changed(value path_val, value closure_val) { + CAMLparam2(path_val, closure_val); + CAMLlocal3(unmarshalled, result, some_result); + + const char* path = String_val(path_val); + std::string path_str(path); + + void* ptr = nullptr; + size_t len = 0; + bool changed = false; + + // Acquire mapping (may throw) + try { + MarshalCache::instance().acquire_mapping(path_str, &ptr, &len, &changed); + } catch (const std::exception& e) { + raise_cache_error(path_str.c_str(), e.what()); + CAMLreturn(Val_unit); // Not reached + } + + if (!changed) { + // File unchanged - release and return None + MarshalCache::instance().release_mapping(path_str); + CAMLreturn(Val_none); + } + + // File changed - unmarshal and call callback + unmarshalled = unmarshal_from_ptr(ptr, len); + + // Call the OCaml callback + result = caml_callback_exn(closure_val, unmarshalled); + + // Release mapping before potentially re-raising + MarshalCache::instance().release_mapping(path_str); + + // Check if callback raised an exception + if (Is_exception_result(result)) { + value exn = Extract_exception(result); + caml_raise(exn); + } + + // Wrap in Some + some_result = caml_alloc(1, 0); + Store_field(some_result, 0, result); + + CAMLreturn(some_result); +} + +// Clear all cache entries +CAMLprim value mfc_clear(value unit) { + CAMLparam1(unit); + MarshalCache::instance().clear(); + CAMLreturn(Val_unit); +} + +// Invalidate a specific path +CAMLprim value mfc_invalidate(value path_val) { + CAMLparam1(path_val); + const char* path = String_val(path_val); + std::string path_str(path); // Copy immediately for consistency + MarshalCache::instance().invalidate(path_str); + CAMLreturn(Val_unit); +} + +// Set max entries +CAMLprim value mfc_set_max_entries(value n_val) { + CAMLparam1(n_val); + size_t n = Long_val(n_val); + MarshalCache::instance().set_max_entries(n); + CAMLreturn(Val_unit); +} + +// Set max bytes +CAMLprim value mfc_set_max_bytes(value n_val) { + CAMLparam1(n_val); + size_t n = Long_val(n_val); + MarshalCache::instance().set_max_bytes(n); + CAMLreturn(Val_unit); +} + +// Get stats: returns (entry_count, total_mapped_bytes) +CAMLprim value mfc_stats(value unit) { + CAMLparam1(unit); + CAMLlocal1(result); + + auto [entries, bytes] = MarshalCache::instance().stats(); + + result = caml_alloc_tuple(2); + Store_field(result, 0, Val_long(entries)); + Store_field(result, 1, Val_long(bytes)); + + CAMLreturn(result); +} + +} // extern "C" + diff --git a/analysis/vendor/skip-lite/reactive_file_collection/dune b/analysis/vendor/skip-lite/reactive_file_collection/dune new file mode 100644 index 0000000000..e83405cb88 --- /dev/null +++ b/analysis/vendor/skip-lite/reactive_file_collection/dune @@ -0,0 +1,3 @@ +(library + (name reactive_file_collection) + (libraries marshal_cache)) diff --git a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml b/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml new file mode 100644 index 0000000000..a7e1babf5f --- /dev/null +++ b/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml @@ -0,0 +1,95 @@ +(* Reactive File Collection - Implementation *) + +type event = + | Added of string + | Removed of string + | Modified of string + +type 'v t = { + data : (string, 'v) Hashtbl.t; + process : 'a. 'a -> 'v; +} + +(* We need to use Obj.magic to make the polymorphic process function work + with Marshal_cache which returns 'a. This is safe because the user + guarantees the file contains data of the expected type. *) +type 'v process_fn = Obj.t -> 'v + +type 'v t_internal = { + data_internal : (string, 'v) Hashtbl.t; + process_internal : 'v process_fn; +} + +let create (type a v) ~(process : a -> v) : v t = + let process_internal : v process_fn = fun obj -> process (Obj.obj obj) in + let t = { + data_internal = Hashtbl.create 256; + process_internal; + } in + (* Safe cast - same representation *) + Obj.magic t + +let to_internal (t : 'v t) : 'v t_internal = Obj.magic t + +let add t path = + let t = to_internal t in + let value = Marshal_cache.with_unmarshalled_file path (fun data -> + t.process_internal (Obj.repr data) + ) in + Hashtbl.replace t.data_internal path value + [@@alert "-unsafe"] + +let remove t path = + let t = to_internal t in + Hashtbl.remove t.data_internal path + +let update t path = + (* Just reload - Marshal_cache handles the file reading efficiently *) + add t path + +let apply t events = + List.iter (function + | Added path -> add t path + | Removed path -> remove t path + | Modified path -> update t path + ) events + +let get t path = + let t = to_internal t in + Hashtbl.find_opt t.data_internal path + +let find t path = + let t = to_internal t in + Hashtbl.find t.data_internal path + +let mem t path = + let t = to_internal t in + Hashtbl.mem t.data_internal path + +let length t = + let t = to_internal t in + Hashtbl.length t.data_internal + +let is_empty t = + length t = 0 + +let iter f t = + let t = to_internal t in + Hashtbl.iter f t.data_internal + +let fold f t init = + let t = to_internal t in + Hashtbl.fold f t.data_internal init + +let to_list t = + fold (fun k v acc -> (k, v) :: acc) t [] + +let paths t = + fold (fun k _ acc -> k :: acc) t [] + +let values t = + fold (fun _ v acc -> v :: acc) t [] + + + + diff --git a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli b/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli new file mode 100644 index 0000000000..56ae3e4c2e --- /dev/null +++ b/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli @@ -0,0 +1,115 @@ +(** Reactive File Collection + + A collection that maps file paths to processed values, with efficient + delta-based updates. Designed for use with file watchers. + + {2 Usage Example} + + {[ + (* Create collection with processing function *) + let coll = Reactive_file_collection.create + ~process:(fun (data : Cmt_format.cmt_infos) -> + extract_types data + ) + + (* Initial load *) + List.iter (Reactive_file_collection.add coll) (glob "*.cmt") + + (* On file watcher events *) + match event with + | Created path -> Reactive_file_collection.add coll path + | Deleted path -> Reactive_file_collection.remove coll path + | Modified path -> Reactive_file_collection.update coll path + + (* Access the collection *) + Reactive_file_collection.iter (fun path value -> ...) coll + ]} + + {2 Thread Safety} + + Not thread-safe. Use external synchronization if accessed from + multiple threads/domains. *) + +(** The type of a reactive file collection with values of type ['v]. *) +type 'v t + +(** Events for batch updates. *) +type event = + | Added of string (** File was created *) + | Removed of string (** File was deleted *) + | Modified of string (** File was modified *) + +(** {1 Creation} *) + +val create : process:('a -> 'v) -> 'v t +(** [create ~process] creates an empty collection. + + [process] is called to transform unmarshalled file contents into values. + + {b Type safety warning}: The caller must ensure files contain data of + type ['a]. This has the same safety properties as [Marshal.from_*]. + + @alert unsafe Caller must ensure files contain data of the expected type *) + +(** {1 Delta Operations} *) + +val add : 'v t -> string -> unit +(** [add t path] adds a file to the collection. + Loads the file, unmarshals, and processes immediately. + + @raise Marshal_cache.Cache_error if file cannot be read or unmarshalled *) + +val remove : 'v t -> string -> unit +(** [remove t path] removes a file from the collection. + No-op if path is not in collection. *) + +val update : 'v t -> string -> unit +(** [update t path] reloads a modified file. + Equivalent to remove + add, but more efficient. + + @raise Marshal_cache.Cache_error if file cannot be read or unmarshalled *) + +val apply : 'v t -> event list -> unit +(** [apply t events] applies multiple events. + More efficient than individual operations for batches. + + @raise Marshal_cache.Cache_error if any added/modified file fails *) + +(** {1 Access} *) + +val get : 'v t -> string -> 'v option +(** [get t path] returns the value for [path], or [None] if not present. *) + +val find : 'v t -> string -> 'v +(** [find t path] returns the value for [path]. + @raise Not_found if path is not in collection *) + +val mem : 'v t -> string -> bool +(** [mem t path] returns [true] if [path] is in the collection. *) + +val length : 'v t -> int +(** [length t] returns the number of files in the collection. *) + +val is_empty : 'v t -> bool +(** [is_empty t] returns [true] if the collection is empty. *) + +(** {1 Iteration} *) + +val iter : (string -> 'v -> unit) -> 'v t -> unit +(** [iter f t] applies [f] to each (path, value) pair. *) + +val fold : (string -> 'v -> 'acc -> 'acc) -> 'v t -> 'acc -> 'acc +(** [fold f t init] folds [f] over all (path, value) pairs. *) + +val to_list : 'v t -> (string * 'v) list +(** [to_list t] returns all (path, value) pairs as a list. *) + +val paths : 'v t -> string list +(** [paths t] returns all paths in the collection. *) + +val values : 'v t -> 'v list +(** [values t] returns all values in the collection. *) + + + + diff --git a/analysis/vendor/skip-lite/test_cmt.ml b/analysis/vendor/skip-lite/test_cmt.ml new file mode 100644 index 0000000000..c2a4c21f7e --- /dev/null +++ b/analysis/vendor/skip-lite/test_cmt.ml @@ -0,0 +1,119 @@ +(* Test that Marshal_cache can read CMT files *) + +[@@@alert "-unsafe"] + +let print_cmt_info (cmt : Cmt_format.cmt_infos) = + Printf.printf " Module name: %s\n%!" cmt.cmt_modname; + Printf.printf " Build dir: %s\n%!" cmt.cmt_builddir; + (match cmt.cmt_sourcefile with + | Some sf -> Printf.printf " Source file: %s\n%!" sf + | None -> Printf.printf " Source file: none\n%!") + +let test_cmt_file_standard path = + Printf.printf "Testing with Cmt_format.read_cmt: %s\n%!" path; + try + let cmt = Cmt_format.read_cmt path in + print_cmt_info cmt; + Printf.printf " SUCCESS with standard read_cmt\n%!"; + true + with e -> + Printf.printf " FAILED: %s\n%!" (Printexc.to_string e); + false + +let test_cmt_file_cache path = + Printf.printf "Testing with Marshal_cache: %s\n%!" path; + try + Marshal_cache.with_unmarshalled_file path (fun (cmt : Cmt_format.cmt_infos) -> + print_cmt_info cmt; + Printf.printf " SUCCESS with Marshal_cache!\n%!"; + true + ) + with + | Marshal_cache.Cache_error (p, msg) -> + Printf.printf " Cache_error: %s: %s\n%!" p msg; + false + | e -> + Printf.printf " FAILED: %s\n%!" (Printexc.to_string e); + false + +let test_cmt_file path = + if not (Sys.file_exists path) then begin + Printf.printf "File not found: %s\n%!" path; + false + end else begin + Printf.printf "\n=== Testing: %s ===\n%!" path; + let std_ok = test_cmt_file_standard path in + Printf.printf "\n%!"; + let cache_ok = test_cmt_file_cache path in + std_ok && cache_ok + end + + +let () = + Printf.printf "=== Marshal_cache CMT Test ===\n\n%!"; + + (* Get CMT files from command line args or find in lib/bs *) + let cmt_files = + if Array.length Sys.argv > 1 then + Array.to_list (Array.sub Sys.argv 1 (Array.length Sys.argv - 1)) + else begin + (* Find CMT files in lib/bs *) + let find_cmt_in_dir dir = + if Sys.file_exists dir && Sys.is_directory dir then begin + let rec find acc dir = + Array.fold_left (fun acc name -> + let path = Filename.concat dir name in + if Sys.is_directory path then + find acc path + else if Filename.check_suffix path ".cmt" then + path :: acc + else + acc + ) acc (Sys.readdir dir) + in + find [] dir + end else [] + in + let lib_bs = "lib/bs" in + let files = find_cmt_in_dir lib_bs in + Printf.printf "Found %d CMT files in %s\n\n%!" (List.length files) lib_bs; + files + end + in + + (* Test first 3 CMT files *) + let test_files = + cmt_files + |> List.sort String.compare + |> (fun l -> try List.filteri (fun i _ -> i < 3) l with _ -> l) + in + + List.iter (fun path -> + let _ = test_cmt_file path in + Printf.printf "\n%!" + ) test_files; + + (* Test if_changed API *) + Printf.printf "=== Testing with_unmarshalled_if_changed ===\n\n%!"; + Marshal_cache.clear (); (* Clear cache to start fresh *) + (match test_files with + | path :: _ -> + Printf.printf "First call (should process):\n%!"; + (match Marshal_cache.with_unmarshalled_if_changed path (fun (cmt : Cmt_format.cmt_infos) -> + Printf.printf " Processed: %s\n%!" cmt.cmt_modname; + cmt.cmt_modname + ) with + | Some name -> Printf.printf " Result: Some(%s) - SUCCESS (file was processed)\n%!" name + | None -> Printf.printf " Result: None (unexpected - should have processed!)\n%!"); + + Printf.printf "Second call (should return None - file unchanged):\n%!"; + (match Marshal_cache.with_unmarshalled_if_changed path (fun (cmt : Cmt_format.cmt_infos) -> + Printf.printf " Processed: %s\n%!" cmt.cmt_modname; + cmt.cmt_modname + ) with + | Some name -> Printf.printf " Result: Some(%s) (unexpected - file should be cached!)\n%!" name + | None -> Printf.printf " Result: None - SUCCESS (file was cached!)\n%!") + | [] -> Printf.printf "No CMT files to test\n%!"); + + Printf.printf "\n=== Test Complete ===\n%!" + diff --git a/docs/reactive_reanalyze_design.md b/docs/reactive_reanalyze_design.md new file mode 100644 index 0000000000..c19b0ac1c0 --- /dev/null +++ b/docs/reactive_reanalyze_design.md @@ -0,0 +1,469 @@ +# Reactive Reanalyze: Using skip-lite for Incremental Analysis + +## Executive Summary + +This document investigates how skip-lite's reactive collections can be used to create an analysis service that stays on and reacts to file changes, dramatically speeding up CMT processing for repeated analysis runs. + +**Key Insight**: The benchmark results from skip-lite show a **950x speedup** when processing only changed files vs. re-reading all files. Applied to reanalyze with ~4900 files (50 copies benchmark), this could reduce CMT processing from ~780ms to ~1-2ms for typical incremental changes. + +## Current Architecture + +### Reanalyze Processing Flow + +``` + ┌─────────────────┐ + │ Collect CMT │ + │ File Paths │ + └────────┬────────┘ + │ + ┌────────▼────────┐ + │ Load CMT Files │ ← 77% of time (~780ms) + │ (Cmt_format. │ + │ read_cmt) │ + └────────┬────────┘ + │ + ┌────────▼────────┐ + │ Process Each │ + │ File → file_data│ + └────────┬────────┘ + │ + ┌─────────────────┴─────────────────┐ + │ │ + ┌────────▼────────┐ ┌────────▼────────┐ + │ Merge Builders │ │ Exception │ + │ (annotations, │ │ Results │ + │ decls, refs, │ └─────────────────┘ + │ cross_file, │ + │ file_deps) │ ← 8% of time (~80ms) + └────────┬────────┘ + │ + ┌────────▼────────┐ + │ Solve (DCE, │ ← 15% of time (~150ms) + │ optional args) │ + └────────┬────────┘ + │ + ┌────────▼────────┐ + │ Report Issues │ ← <1% of time + └─────────────────┘ +``` + +### Current Bottleneck + +From the benchmark (50 copies, ~4900 files, 12 cores): + +| Phase | Sequential | Parallel | % of Total | +|-------|-----------|----------|------------| +| File loading | 779ms | 422ms | 77% / 64% | +| Merging | 81ms | 94ms | 8% / 14% | +| Solving | 146ms | 148ms | 15% / 22% | +| Total | 1007ms | 664ms | 100% | + +**CMT file loading is the dominant cost** because each file requires: +1. System call to open file +2. Reading marshalled data from disk +3. Unmarshalling into OCaml heap +4. AST traversal to extract analysis data + +## Proposed Architecture: Reactive Analysis Service + +### Design Goals + +1. **Persistent service** - Stay running and maintain state between analysis runs +2. **File watching** - React to file changes (create/modify/delete) +3. **Incremental updates** - Only process changed files +4. **Cached results** - Keep processed `file_data` in memory +5. **Fast iteration** - Sub-10ms response for typical edits + +### Integration with skip-lite + +skip-lite provides two key primitives: + +#### 1. `Marshal_cache` - Efficient CMT Loading + +```ocaml +(* Instead of Cmt_format.read_cmt which does file I/O every time *) +let load_cmt path = + Marshal_cache.with_unmarshalled_file path (fun cmt_infos -> + DceFileProcessing.process_cmt_file ~config ~file ~cmtFilePath cmt_infos + ) +``` + +**Benefits**: +- Memory-mapped, off-heap storage (not GC-scanned) +- LRU eviction for memory management +- Automatic invalidation on file change + +#### 2. `Reactive_file_collection` - Delta-Based Processing + +```ocaml +(* Create collection that maps CMT paths to processed file_data *) +let cmt_collection = Reactive_file_collection.create + ~process:(fun (cmt_infos : Cmt_format.cmt_infos) -> + (* This is called only when file changes *) + process_cmt_for_dce ~config cmt_infos + ) + +(* Initial load - process all files once *) +List.iter (Reactive_file_collection.add cmt_collection) all_cmt_paths + +(* On file watcher event - only process changed files *) +Reactive_file_collection.apply cmt_collection [ + Modified "lib/bs/src/MyModule.cmt"; + Modified "lib/bs/src/MyModule.cmti"; +] + +(* Get all processed data for analysis *) +let file_data_list = Reactive_file_collection.values cmt_collection +``` + +### Service Architecture + +``` +┌────────────────────────────────────────────────────────────────┐ +│ Reanalyze Service │ +├────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────┐ ┌─────────────────────────────────┐ │ +│ │ File Watcher │─────▶│ Reactive_file_collection │ │ +│ │ (fswatch/ │ │ ┌───────────────────────────┐ │ │ +│ │ inotify) │ │ │ path → file_data cache │ │ │ +│ └──────────────┘ │ │ (backed by Marshal_cache) │ │ │ +│ │ └───────────────────────────┘ │ │ +│ └──────────┬──────────────────────┘ │ +│ │ │ +│ │ file_data_list │ +│ ▼ │ +│ ┌─────────────────────────────────┐ │ +│ │ Incremental Merge & Solve │ │ +│ │ (may be reactive in future) │ │ +│ └──────────┬──────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────┐ │ +│ │ Issues / Reports │ │ +│ └─────────────────────────────────┘ │ +│ │ +└────────────────────────────────────────────────────────────────┘ +``` + +### API Design + +```ocaml +module ReactiveReanalyze : sig + type t + (** A reactive analysis service *) + + val create : config:DceConfig.t -> project_root:string -> t + (** Create a new reactive analysis service *) + + val start : t -> unit + (** Start file watching and initial analysis *) + + val stop : t -> unit + (** Stop file watching *) + + val analyze : t -> AnalysisResult.t + (** Run analysis on current state. Fast if no files changed. *) + + val on_file_change : t -> string -> unit + (** Notify of a file change (for external file watchers) *) + + val apply_events : t -> Reactive_file_collection.event list -> unit + (** Apply batch of file events *) +end +``` + +## Performance Analysis + +### Expected Speedup + +| Scenario | Current | With skip-lite | Speedup | +|----------|---------|----------------|---------| +| Cold start (all files) | 780ms | 780ms | 1x | +| Warm cache, no changes | 780ms | ~20ms | **39x** | +| Single file changed | 780ms | ~2ms | **390x** | +| 10 files changed | 780ms | ~15ms | **52x** | + +### How skip-lite Achieves This + +1. **Marshal_cache.with_unmarshalled_if_changed**: + - Stats all files to check modification time (~20ms for 5000 files) + - Only unmarshals files that changed + - Returns `None` for unchanged files, `Some result` for changed + +2. **Reactive_file_collection**: + - Maintains hash table of processed values + - On `apply`, only processes files in the event list + - Iteration is O(n) but values are already computed + +### Memory Considerations + +| Data | Storage | GC Impact | +|------|---------|-----------| +| CMT file bytes | mmap (off-heap) | None | +| Unmarshalled cmt_infos | OCaml heap (temporary) | During callback only | +| Processed file_data | OCaml heap (cached) | Scanned by GC | + +For 5000 files with average 20KB each: +- mmap cache: ~100MB (off-heap, OS-managed) +- file_data cache: ~50MB (on-heap, estimate) + +## Implementation Plan + +### Phase 1: Integration Setup + +1. **Add skip-lite dependency** to dune/opam +2. **Create wrapper module** `CmtCache` that provides: + ```ocaml + val read_cmt : string -> Cmt_format.cmt_infos + (** Drop-in replacement for Cmt_format.read_cmt using Marshal_cache *) + ``` + +### Phase 2: Reactive Collection + +1. **Define file_data type** as the cached result type +2. **Create reactive collection** for CMT → file_data mapping +3. **Implement delta processing** that only reprocesses changed files + +### Phase 3: Analysis Service + +1. **File watching integration** (can use fswatch, inotify, or external watcher) +2. **Service loop** that waits for events and re-runs analysis +3. **LSP integration** (optional) for editor support + +### Phase 4: Incremental Merge & Solve (Future) + +The current merge and solve phases are relatively fast (22% of time), but could be made incremental in the future: + +- Track which declarations changed +- Incrementally update reference graph +- Re-solve only affected transitive closure + +## Prototype Implementation + +Here's a minimal prototype showing how to integrate `Reactive_file_collection`: + +```ocaml +(* reactive_analysis.ml *) + +module CmtCollection = struct + type file_data = DceFileProcessing.file_data + + let collection : file_data Reactive_file_collection.t option ref = ref None + + let init ~config ~cmt_paths = + let coll = Reactive_file_collection.create + ~process:(fun (cmt_infos : Cmt_format.cmt_infos) -> + (* Extract file context from cmt_infos *) + let source_path = + match cmt_infos.cmt_annots |> FindSourceFile.cmt with + | Some path -> path + | None -> failwith "No source file" + in + let module_name = Paths.getModuleName source_path in + let is_interface = match cmt_infos.cmt_annots with + | Cmt_format.Interface _ -> true + | _ -> false + in + let file : DceFileProcessing.file_context = { + source_path; module_name; is_interface + } in + let cmtFilePath = "" (* not used in process_cmt_file body *) in + DceFileProcessing.process_cmt_file ~config ~file ~cmtFilePath cmt_infos + ) + in + (* Initial load *) + List.iter (Reactive_file_collection.add coll) cmt_paths; + collection := Some coll; + coll + + let apply_events events = + match !collection with + | Some coll -> Reactive_file_collection.apply coll events + | None -> failwith "Collection not initialized" + + let get_all_file_data () = + match !collection with + | Some coll -> Reactive_file_collection.values coll + | None -> [] +end + +(* Modified Reanalyze.runAnalysis *) +let runAnalysisIncremental ~config ~events = + (* Apply only the changed files *) + CmtCollection.apply_events events; + + (* Get all file_data (instant - values already computed) *) + let file_data_list = CmtCollection.get_all_file_data () in + + (* Rest of analysis is same as before *) + let annotations, decls, cross_file, refs, file_deps = + merge_all_builders file_data_list + in + solve ~annotations ~decls ~refs ~file_deps ~config +``` + +## Testing Strategy + +1. **Correctness**: Verify reactive analysis produces same results as batch +2. **Performance**: Benchmark incremental updates vs full analysis +3. **Edge cases**: + - File deletion during analysis + - Rapid successive changes + - Build errors (incomplete CMT files) + +## Open Questions + +1. **Build system integration**: How to get file events from rewatch/ninja? +2. **CMT staleness**: What if build system is still writing CMT files? +3. **Multi-project**: How to handle monorepos with multiple rescript.json? +4. **Memory limits**: When to evict file_data from cache? + +## Integration Points + +### 1. Shared.tryReadCmt → Marshal_cache + +Current code in `analysis/src/Shared.ml`: +```ocaml +let tryReadCmt cmt = + if not (Files.exists cmt) then ( + Log.log ("Cmt file does not exist " ^ cmt); + None) + else + match Cmt_format.read_cmt cmt with + | exception ... -> None + | x -> Some x +``` + +With Marshal_cache: +```ocaml +let tryReadCmt cmt = + if not (Files.exists cmt) then ( + Log.log ("Cmt file does not exist " ^ cmt); + None) + else + try + Some (Marshal_cache.with_unmarshalled_file cmt Fun.id) + with Marshal_cache.Cache_error (_, msg) -> + Log.log ("Invalid cmt format " ^ cmt ^ ": " ^ msg); + None +``` + +### 2. Reanalyze.loadCmtFile → Reactive_file_collection + +Current code in `analysis/reanalyze/src/Reanalyze.ml`: +```ocaml +let loadCmtFile ~config cmtFilePath : cmt_file_result option = + let cmt_infos = Cmt_format.read_cmt cmtFilePath in + ... +``` + +With reactive collection: +```ocaml +(* Global reactive collection *) +let cmt_collection : cmt_file_result Reactive_file_collection.t option ref = ref None + +let init_collection ~config = + cmt_collection := Some (Reactive_file_collection.create + ~process:(fun (cmt_infos : Cmt_format.cmt_infos) -> + process_cmt_infos ~config cmt_infos + )) + +let loadCmtFile_reactive ~config cmtFilePath = + match !cmt_collection with + | Some coll -> Reactive_file_collection.get coll cmtFilePath + | None -> loadCmtFile ~config cmtFilePath (* fallback *) +``` + +### 3. File Watcher Integration + +The analysis server already has `DceCommand.ml`. We can extend it to a service: + +```ocaml +(* DceService.ml *) + +type t = { + config: Reanalyze.DceConfig.t; + collection: cmt_file_result Reactive_file_collection.t; + mutable last_result: Reanalyze.AnalysisResult.t option; +} + +let create ~project_root = + let config = Reanalyze.DceConfig.current () in + let cmt_paths = Reanalyze.collectCmtFilePaths ~cmtRoot:None in + let collection = Reactive_file_collection.create + ~process:(process_cmt_for_config ~config) + in + List.iter (Reactive_file_collection.add collection) cmt_paths; + { config; collection; last_result = None } + +let on_file_change t events = + Reactive_file_collection.apply t.collection events; + (* Invalidate cached result *) + t.last_result <- None + +let analyze t = + match t.last_result with + | Some result -> result (* Cached, no files changed *) + | None -> + let file_data_list = Reactive_file_collection.values t.collection in + let result = run_analysis_on_file_data ~config:t.config file_data_list in + t.last_result <- Some result; + result +``` + +### 4. Build System Integration (rewatch) + +Rewatch already watches for file changes. We can extend it to notify the analysis service: + +In `rewatch/src/watcher.rs`: +```rust +// After successful compilation of a module +if let Some(analysis_socket) = &state.analysis_socket { + analysis_socket.send(AnalysisEvent::Modified(cmt_path)); +} +``` + +Or via a Unix domain socket/named pipe that the analysis service listens on. + +## Dependency Setup + +Add to `analysis/dune`: +```dune +(library + (name analysis) + (libraries + ... + skip-lite.marshal_cache + skip-lite.reactive_file_collection)) +``` + +Add to `analysis.opam`: +```opam +depends: [ + ... + "skip-lite" {>= "0.1"} +] +``` + +## Conclusion + +Integrating skip-lite's reactive collections with reanalyze offers a path to **39-390x speedup** for incremental analysis. The key insight is that CMT file loading (77% of current time) can be eliminated for unchanged files, and the processed file_data can be cached. + +The implementation requires: +1. Adding skip-lite as a dependency +2. Wrapping CMT loading with Marshal_cache (immediate benefit: mmap caching) +3. Creating reactive collection for file_data (benefit: only process changed files) +4. Creating a service mode that watches for file changes (benefit: persistent state) + +The merge and solve phases (23% of time) remain unchanged initially, but could be made incremental in the future for even greater speedups. + +## Next Steps + +1. **Phase 0**: Add skip-lite as optional dependency (behind a feature flag) +2. **Phase 1**: Replace `Cmt_format.read_cmt` with `Marshal_cache` wrapper +3. **Phase 2**: Benchmark improvement from mmap caching alone +4. **Phase 3**: Implement `Reactive_file_collection` for file_data +5. **Phase 4**: Create analysis service with file watching +6. **Phase 5**: Integrate with rewatch for automatic updates + diff --git a/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile b/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile index 27b4767f0a..a7f30e2282 100644 --- a/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile +++ b/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile @@ -35,6 +35,33 @@ time: @echo "Parallel (auto-detect cores):" @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing -parallel -1 2>&1 | grep -E "Analysis reported|=== Timing|CMT processing|File loading|Result collection|Analysis:|Merging|Solving|Reporting:|Total:" +# Benchmark with CMT cache +time-cache: generate build + @echo "=== Without cache ===" + @echo "Sequential:" + @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing 2>&1 | grep -E "=== Timing|CMT processing|File loading|Total:" + @echo "" + @echo "=== With CMT cache (first run - cold) ===" + @echo "Sequential:" + @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing -cmt-cache 2>&1 | grep -E "=== Timing|CMT processing|File loading|Total:" + @echo "" + @echo "=== With CMT cache (second run - warm) ===" + @echo "Sequential:" + @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing -cmt-cache 2>&1 | grep -E "=== Timing|CMT processing|File loading|Total:" + @echo "" + @echo "=== With CMT cache + parallel (warm) ===" + @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing -cmt-cache -parallel -1 2>&1 | grep -E "=== Timing|CMT processing|File loading|Total:" + +# Benchmark reactive mode (simulates repeated analysis) +time-reactive: generate build + @echo "=== Reactive mode benchmark ===" + @echo "" + @echo "Standard (baseline):" + @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing 2>&1 | grep -E "=== Timing|CMT processing|File loading|Total:" + @echo "" + @echo "Reactive mode (3 runs - first is cold, subsequent are warm):" + @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing -reactive -runs 3 2>&1 | grep -E "=== Run|=== Timing|CMT processing|File loading|Total:" + .DEFAULT_GOAL := benchmark -.PHONY: generate build clean benchmark time +.PHONY: generate build clean benchmark time time-cache time-reactive From 7f482bf61efc67450d0b95345c354bef7687fe18 Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Mon, 15 Dec 2025 13:37:37 +0100 Subject: [PATCH 2/9] Enable create-sourcedirs by default in rewatch - Change --create-sourcedirs to default to true (always create .sourcedirs.json) - Hide the flag from help since it's now always enabled - Add deprecation warning when flag is explicitly used - Fix package name mismatches in test projects: - deadcode rescript.json: sample-typescript-app -> @tests/reanalyze-deadcode - rescript-react package.json: @tests/rescript-react -> @rescript/react --- analysis/reanalyze/src/CmtCache.ml | 8 +- analysis/reanalyze/src/CmtCache.mli | 6 +- analysis/reanalyze/src/ReactiveAnalysis.ml | 95 ++++++++++--------- analysis/reanalyze/src/Reanalyze.ml | 17 ++-- analysis/vendor/skip-lite/dune | 2 + analysis/vendor/skip-lite/marshal_cache/dune | 3 +- rewatch/src/cli.rs | 15 ++- rewatch/src/main.rs | 16 +++- .../tests-reanalyze/deadcode/package.json | 4 +- .../tests-reanalyze/deadcode/rescript.json | 2 +- .../dependencies/rescript-react/package.json | 2 +- 11 files changed, 90 insertions(+), 80 deletions(-) diff --git a/analysis/reanalyze/src/CmtCache.ml b/analysis/reanalyze/src/CmtCache.ml index 53425cb369..54cfecc71a 100644 --- a/analysis/reanalyze/src/CmtCache.ml +++ b/analysis/reanalyze/src/CmtCache.ml @@ -29,14 +29,10 @@ let clear () = Marshal_cache.clear () The next read will re-load the file from disk. *) let invalidate path = Marshal_cache.invalidate path +type stats = {entry_count: int; mapped_bytes: int} (** Cache statistics *) -type stats = { - entry_count: int; - mapped_bytes: int; -} (** Get cache statistics *) let stats () : stats = let s = Marshal_cache.stats () in - { entry_count = s.entry_count; mapped_bytes = s.mapped_bytes } - + {entry_count = s.entry_count; mapped_bytes = s.mapped_bytes} diff --git a/analysis/reanalyze/src/CmtCache.mli b/analysis/reanalyze/src/CmtCache.mli index 74d6a73c85..ef15970617 100644 --- a/analysis/reanalyze/src/CmtCache.mli +++ b/analysis/reanalyze/src/CmtCache.mli @@ -17,12 +17,8 @@ val clear : unit -> unit val invalidate : string -> unit (** Invalidate a specific path in the cache. *) -type stats = { - entry_count: int; - mapped_bytes: int; -} +type stats = {entry_count: int; mapped_bytes: int} (** Cache statistics *) val stats : unit -> stats (** Get cache statistics *) - diff --git a/analysis/reanalyze/src/ReactiveAnalysis.ml b/analysis/reanalyze/src/ReactiveAnalysis.ml index a6b6a6cf46..5aac5c90c7 100644 --- a/analysis/reanalyze/src/ReactiveAnalysis.ml +++ b/analysis/reanalyze/src/ReactiveAnalysis.ml @@ -6,25 +6,25 @@ [@@@alert "-unsafe"] -(** Result of processing a single CMT file *) type cmt_file_result = { dce_data: DceFileProcessing.file_data option; exception_data: Exception.file_result option; } +(** Result of processing a single CMT file *) -(** Result of processing all CMT files *) type all_files_result = { dce_data_list: DceFileProcessing.file_data list; exception_results: Exception.file_result list; } +(** Result of processing all CMT files *) -(** Cached file_data for a single CMT file. - We cache the processed result, not just the raw CMT data. *) type cached_file = { path: string; file_data: DceFileProcessing.file_data option; exception_data: Exception.file_result option; } +(** Cached file_data for a single CMT file. + We cache the processed result, not just the raw CMT data. *) (** The file cache - maps CMT paths to processed results *) let file_cache : (string, cached_file) Hashtbl.t = Hashtbl.create 1024 @@ -81,60 +81,62 @@ let process_cmt_infos ~config ~cmtFilePath cmt_infos : cmt_file_result option = Returns the cached result if the file hasn't changed since last access. *) let process_cmt_cached ~config cmtFilePath : cmt_file_result option = match CmtCache.read_cmt_if_changed cmtFilePath with - | None -> + | None -> ( (* File unchanged - return cached result *) - (match Hashtbl.find_opt file_cache cmtFilePath with - | Some cached -> - Some { dce_data = cached.file_data; exception_data = cached.exception_data } - | None -> - (* First time seeing this file - shouldn't happen, but handle gracefully *) - None) + match Hashtbl.find_opt file_cache cmtFilePath with + | Some cached -> + Some {dce_data = cached.file_data; exception_data = cached.exception_data} + | None -> + (* First time seeing this file - shouldn't happen, but handle gracefully *) + None) | Some cmt_infos -> (* File changed or new - process it *) let result = process_cmt_infos ~config ~cmtFilePath cmt_infos in (* Cache the result *) (match result with - | Some r -> - Hashtbl.replace file_cache cmtFilePath { - path = cmtFilePath; - file_data = r.dce_data; - exception_data = r.exception_data; - } - | None -> ()); + | Some r -> + Hashtbl.replace file_cache cmtFilePath + { + path = cmtFilePath; + file_data = r.dce_data; + exception_data = r.exception_data; + } + | None -> ()); result (** Process all files incrementally. First run processes all files. Subsequent runs only process changed files. *) let process_files_incremental ~config cmtFilePaths : all_files_result = Timing.time_phase `FileLoading (fun () -> - let dce_data_list = ref [] in - let exception_results = ref [] in - let processed = ref 0 in - let from_cache = ref 0 in - - cmtFilePaths |> List.iter (fun cmtFilePath -> - (* Check if file was in cache *before* processing *) - let was_cached = Hashtbl.mem file_cache cmtFilePath in - match process_cmt_cached ~config cmtFilePath with - | Some {dce_data; exception_data} -> - (match dce_data with - | Some data -> dce_data_list := data :: !dce_data_list - | None -> ()); - (match exception_data with - | Some data -> exception_results := data :: !exception_results - | None -> ()); - (* Track whether it was from cache *) - if was_cached then - incr from_cache - else - incr processed - | None -> () - ); - - if !Cli.timing then - Printf.eprintf "Reactive: %d files processed, %d from cache\n%!" !processed !from_cache; - - {dce_data_list = List.rev !dce_data_list; exception_results = List.rev !exception_results}) + let dce_data_list = ref [] in + let exception_results = ref [] in + let processed = ref 0 in + let from_cache = ref 0 in + + cmtFilePaths + |> List.iter (fun cmtFilePath -> + (* Check if file was in cache *before* processing *) + let was_cached = Hashtbl.mem file_cache cmtFilePath in + match process_cmt_cached ~config cmtFilePath with + | Some {dce_data; exception_data} -> + (match dce_data with + | Some data -> dce_data_list := data :: !dce_data_list + | None -> ()); + (match exception_data with + | Some data -> exception_results := data :: !exception_results + | None -> ()); + (* Track whether it was from cache *) + if was_cached then incr from_cache else incr processed + | None -> ()); + + if !Cli.timing then + Printf.eprintf "Reactive: %d files processed, %d from cache\n%!" + !processed !from_cache; + + { + dce_data_list = List.rev !dce_data_list; + exception_results = List.rev !exception_results; + }) (** Clear all cached file data *) let clear () = @@ -146,4 +148,3 @@ let stats () = let file_count = Hashtbl.length file_cache in let cmt_stats = CmtCache.stats () in (file_count, cmt_stats) - diff --git a/analysis/reanalyze/src/Reanalyze.ml b/analysis/reanalyze/src/Reanalyze.ml index 58a4883e18..c963d662ee 100644 --- a/analysis/reanalyze/src/Reanalyze.ml +++ b/analysis/reanalyze/src/Reanalyze.ml @@ -211,9 +211,14 @@ let processCmtFiles ~config ~cmtRoot : all_files_result = let cmtFilePaths = collectCmtFilePaths ~cmtRoot in (* Reactive mode: use incremental processing that skips unchanged files *) if !Cli.reactive then - let result = ReactiveAnalysis.process_files_incremental ~config cmtFilePaths in - {dce_data_list = result.dce_data_list; exception_results = result.exception_results} - else begin + let result = + ReactiveAnalysis.process_files_incremental ~config cmtFilePaths + in + { + dce_data_list = result.dce_data_list; + exception_results = result.exception_results; + } + else let numDomains = match !Cli.parallel with | n when n > 0 -> n @@ -228,7 +233,6 @@ let processCmtFiles ~config ~cmtRoot : all_files_result = (List.length cmtFilePaths); processFilesParallel ~config ~numDomains cmtFilePaths) else processFilesSequential ~config cmtFilePaths - end (* Shuffle a list using Fisher-Yates algorithm *) let shuffle_list lst = @@ -362,11 +366,10 @@ let runAnalysisAndReport ~cmtRoot = if numRuns > 1 && !Cli.timing then Printf.eprintf "\n=== Run %d/%d ===\n%!" run numRuns; runAnalysis ~dce_config ~cmtRoot; - if run = numRuns then begin + if run = numRuns then ( (* Only report on last run *) Log_.Stats.report ~config:dce_config; - Log_.Stats.clear () - end; + Log_.Stats.clear ()); Timing.report () done; if !Cli.json then EmitJson.finish () diff --git a/analysis/vendor/skip-lite/dune b/analysis/vendor/skip-lite/dune index 4830047662..9611c60add 100644 --- a/analysis/vendor/skip-lite/dune +++ b/analysis/vendor/skip-lite/dune @@ -1,7 +1,9 @@ ; skip-lite vendor directory + (dirs marshal_cache reactive_file_collection) ; Test executable for CMT file support + (executable (name test_cmt) (modules test_cmt) diff --git a/analysis/vendor/skip-lite/marshal_cache/dune b/analysis/vendor/skip-lite/marshal_cache/dune index 714dbcfc98..0a9e05f37a 100644 --- a/analysis/vendor/skip-lite/marshal_cache/dune +++ b/analysis/vendor/skip-lite/marshal_cache/dune @@ -3,5 +3,6 @@ (foreign_stubs (language cxx) (names marshal_cache_stubs) - (flags (:standard -std=c++17))) + (flags + (:standard -std=c++17))) (c_library_flags (-lstdc++))) diff --git a/rewatch/src/cli.rs b/rewatch/src/cli.rs index 3b4604ce54..e0a0132773 100644 --- a/rewatch/src/cli.rs +++ b/rewatch/src/cli.rs @@ -197,9 +197,9 @@ pub struct AfterBuildArg { #[derive(Args, Debug, Clone, Copy)] pub struct CreateSourceDirsArg { - /// Create a source_dirs.json file at the root of the monorepo, needed for Reanalyze. - #[arg(short, long, default_value_t = false, num_args = 0..=1)] - pub create_sourcedirs: bool, + /// Deprecated: source_dirs.json is now always created. + #[arg(short, long, num_args = 0..=1, default_missing_value = "true", hide = true)] + pub create_sourcedirs: Option, } #[derive(Args, Debug, Clone, Copy)] @@ -488,11 +488,10 @@ impl Deref for AfterBuildArg { } } -impl Deref for CreateSourceDirsArg { - type Target = bool; - - fn deref(&self) -> &Self::Target { - &self.create_sourcedirs +impl CreateSourceDirsArg { + /// Returns true if the flag was explicitly passed on the command line. + pub fn was_explicitly_set(&self) -> bool { + self.create_sourcedirs.is_some() } } diff --git a/rewatch/src/main.rs b/rewatch/src/main.rs index 46bf248fbd..4c9ece6018 100644 --- a/rewatch/src/main.rs +++ b/rewatch/src/main.rs @@ -46,12 +46,18 @@ fn main() -> Result<()> { ); } + if build_args.create_sourcedirs.was_explicitly_set() { + log::warn!( + "`--create-sourcedirs` is deprecated: source_dirs.json is now always created. Please remove this flag from your command." + ); + } + match build::build( &build_args.filter, Path::new(&build_args.folder as &str), show_progress, build_args.no_timing, - *build_args.create_sourcedirs, + true, // create_sourcedirs is now always enabled plain_output, (*build_args.warn_error).clone(), ) { @@ -76,12 +82,18 @@ fn main() -> Result<()> { ); } + if watch_args.create_sourcedirs.was_explicitly_set() { + log::warn!( + "`--create-sourcedirs` is deprecated: source_dirs.json is now always created. Please remove this flag from your command." + ); + } + match watcher::start( &watch_args.filter, show_progress, &watch_args.folder, (*watch_args.after_build).clone(), - *watch_args.create_sourcedirs, + true, // create_sourcedirs is now always enabled plain_output, (*watch_args.warn_error).clone(), ) { diff --git a/tests/analysis_tests/tests-reanalyze/deadcode/package.json b/tests/analysis_tests/tests-reanalyze/deadcode/package.json index 2c294ed392..fdcd84d9ee 100644 --- a/tests/analysis_tests/tests-reanalyze/deadcode/package.json +++ b/tests/analysis_tests/tests-reanalyze/deadcode/package.json @@ -2,8 +2,8 @@ "name": "@tests/reanalyze-deadcode", "private": true, "scripts": { - "build": "rescript-legacy build", - "clean": "rescript-legacy clean" + "build": "rescript build", + "clean": "rescript clean" }, "dependencies": { "@rescript/react": "link:../../../dependencies/rescript-react", diff --git a/tests/analysis_tests/tests-reanalyze/deadcode/rescript.json b/tests/analysis_tests/tests-reanalyze/deadcode/rescript.json index 467c603309..942f2d957f 100644 --- a/tests/analysis_tests/tests-reanalyze/deadcode/rescript.json +++ b/tests/analysis_tests/tests-reanalyze/deadcode/rescript.json @@ -5,7 +5,7 @@ "unsuppress": [], "transitive": true }, - "name": "sample-typescript-app", + "name": "@tests/reanalyze-deadcode", "jsx": { "version": 4 }, "dependencies": ["@rescript/react"], "sources": [ diff --git a/tests/dependencies/rescript-react/package.json b/tests/dependencies/rescript-react/package.json index cd0136bdd3..0d09e376d6 100644 --- a/tests/dependencies/rescript-react/package.json +++ b/tests/dependencies/rescript-react/package.json @@ -1,4 +1,4 @@ { - "name": "@tests/rescript-react", + "name": "@rescript/react", "private": true } From 9265b6499b13d9fe001a8453315853031777ca4c Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Mon, 15 Dec 2025 16:00:18 +0100 Subject: [PATCH 3/9] Refine reactive_file_collection representation Simplify the reactive_file_collection implementation by making 'v t be the concrete record type used at runtime, removing the unused phantom fields and internal wrapper type. This eliminates warning 69 about unused record fields and relies directly on a single record with its process function stored as an Obj.t-based callback. --- .../reactive_file_collection.ml | 56 +++++++------------ yarn.lock | 12 ++-- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml b/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml index a7e1babf5f..9b137d7469 100644 --- a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml +++ b/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml @@ -5,43 +5,32 @@ type event = | Removed of string | Modified of string -type 'v t = { - data : (string, 'v) Hashtbl.t; - process : 'a. 'a -> 'v; -} - -(* We need to use Obj.magic to make the polymorphic process function work - with Marshal_cache which returns 'a. This is safe because the user +(* We need to use Obj.t to make the polymorphic process function work + with Marshal_cache which returns ['a]. This is safe because the user guarantees the file contains data of the expected type. *) type 'v process_fn = Obj.t -> 'v -type 'v t_internal = { - data_internal : (string, 'v) Hashtbl.t; - process_internal : 'v process_fn; +type 'v t = { + data : (string, 'v) Hashtbl.t; + process : 'v process_fn; } let create (type a v) ~(process : a -> v) : v t = - let process_internal : v process_fn = fun obj -> process (Obj.obj obj) in - let t = { - data_internal = Hashtbl.create 256; - process_internal; - } in - (* Safe cast - same representation *) - Obj.magic t - -let to_internal (t : 'v t) : 'v t_internal = Obj.magic t + let process_fn : v process_fn = fun obj -> process (Obj.obj obj) in + { + data = Hashtbl.create 256; + process = process_fn; + } let add t path = - let t = to_internal t in let value = Marshal_cache.with_unmarshalled_file path (fun data -> - t.process_internal (Obj.repr data) + t.process (Obj.repr data) ) in - Hashtbl.replace t.data_internal path value + Hashtbl.replace t.data path value [@@alert "-unsafe"] let remove t path = - let t = to_internal t in - Hashtbl.remove t.data_internal path + Hashtbl.remove t.data path let update t path = (* Just reload - Marshal_cache handles the file reading efficiently *) @@ -53,33 +42,26 @@ let apply t events = | Removed path -> remove t path | Modified path -> update t path ) events - let get t path = - let t = to_internal t in - Hashtbl.find_opt t.data_internal path + Hashtbl.find_opt t.data path let find t path = - let t = to_internal t in - Hashtbl.find t.data_internal path + Hashtbl.find t.data path let mem t path = - let t = to_internal t in - Hashtbl.mem t.data_internal path + Hashtbl.mem t.data path let length t = - let t = to_internal t in - Hashtbl.length t.data_internal + Hashtbl.length t.data let is_empty t = length t = 0 let iter f t = - let t = to_internal t in - Hashtbl.iter f t.data_internal + Hashtbl.iter f t.data let fold f t init = - let t = to_internal t in - Hashtbl.fold f t.data_internal init + Hashtbl.fold f t.data init let to_list t = fold (fun k v acc -> (k, v) :: acc) t [] diff --git a/yarn.lock b/yarn.lock index 573bc9b2a6..3db8c9bc0e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -452,6 +452,12 @@ __metadata: languageName: node linkType: hard +"@rescript/react@workspace:tests/dependencies/rescript-react": + version: 0.0.0-use.local + resolution: "@rescript/react@workspace:tests/dependencies/rescript-react" + languageName: unknown + linkType: soft + "@rescript/runtime@workspace:packages/@rescript/runtime": version: 0.0.0-use.local resolution: "@rescript/runtime@workspace:packages/@rescript/runtime" @@ -724,12 +730,6 @@ __metadata: languageName: unknown linkType: soft -"@tests/rescript-react@workspace:tests/dependencies/rescript-react": - version: 0.0.0-use.local - resolution: "@tests/rescript-react@workspace:tests/dependencies/rescript-react" - languageName: unknown - linkType: soft - "@tests/tools@workspace:tests/tools_tests": version: 0.0.0-use.local resolution: "@tests/tools@workspace:tests/tools_tests" From 210862efbd688016dd2231b8a7de14cc1ccdc0ad Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Mon, 15 Dec 2025 17:28:27 +0100 Subject: [PATCH 4/9] Replace C++ marshal_cache with pure OCaml implementation - CmtCache: rewritten using Unix.stat for file change detection (mtime, size, inode) instead of C++ mmap cache - ReactiveFileCollection: new pure OCaml module for reactive file collections with delta-based updates - ReactiveAnalysis: refactored to use ReactiveFileCollection, collection passed as parameter (no global mutable state) - Timing: only show parallel merge timing when applicable - Deleted skip-lite vendor directory (C++ code no longer needed) This eliminates the Linux/musl C++ compilation issue while maintaining the same incremental analysis performance: - Cold run: ~1.0s - Warm run: ~0.01s (90x faster, skips unchanged files) --- analysis/reanalyze/src/CmtCache.ml | 68 +- analysis/reanalyze/src/CmtCache.mli | 13 +- analysis/reanalyze/src/ReactiveAnalysis.ml | 117 ++- .../reanalyze/src/ReactiveFileCollection.ml | 52 ++ .../src/ReactiveFileCollection.mli} | 47 +- analysis/reanalyze/src/Reanalyze.ml | 20 +- analysis/reanalyze/src/Timing.ml | 16 +- analysis/reanalyze/src/dune | 2 +- analysis/src/DceCommand.ml | 2 +- analysis/vendor/dune | 2 +- analysis/vendor/skip-lite/dune | 10 - analysis/vendor/skip-lite/marshal_cache/dune | 8 - .../skip-lite/marshal_cache/marshal_cache.ml | 71 -- .../skip-lite/marshal_cache/marshal_cache.mli | 120 --- .../marshal_cache/marshal_cache_stubs.cpp | 804 ------------------ .../skip-lite/reactive_file_collection/dune | 3 - .../reactive_file_collection.ml | 77 -- analysis/vendor/skip-lite/test_cmt.ml | 119 --- .../deadcode-benchmark/Makefile | 2 +- .../deadcode-benchmark/package.json | 4 +- .../dependencies/rescript-react/package.json | 27 +- 21 files changed, 225 insertions(+), 1359 deletions(-) create mode 100644 analysis/reanalyze/src/ReactiveFileCollection.ml rename analysis/{vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli => reanalyze/src/ReactiveFileCollection.mli} (65%) delete mode 100644 analysis/vendor/skip-lite/dune delete mode 100644 analysis/vendor/skip-lite/marshal_cache/dune delete mode 100644 analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml delete mode 100644 analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli delete mode 100644 analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp delete mode 100644 analysis/vendor/skip-lite/reactive_file_collection/dune delete mode 100644 analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml delete mode 100644 analysis/vendor/skip-lite/test_cmt.ml diff --git a/analysis/reanalyze/src/CmtCache.ml b/analysis/reanalyze/src/CmtCache.ml index 54cfecc71a..aa838ed38d 100644 --- a/analysis/reanalyze/src/CmtCache.ml +++ b/analysis/reanalyze/src/CmtCache.ml @@ -1,38 +1,70 @@ -(** CMT file cache using Marshal_cache for efficient mmap-based reading. +(** CMT file cache with automatic invalidation based on file metadata. This module provides cached reading of CMT files with automatic - invalidation when files change on disk. It's used to speed up - repeated analysis runs by avoiding re-reading unchanged files. *) + invalidation when files change on disk. Uses Unix.stat to detect + changes via mtime, size, and inode. *) -[@@@alert "-unsafe"] +type file_id = { + mtime: float; (** Modification time *) + size: int; (** File size in bytes *) + ino: int; (** Inode number *) +} +(** File identity for cache invalidation *) -(** Read a CMT file, using the mmap cache for efficiency. - The file is memory-mapped and the cache automatically detects - when the file changes on disk. *) +(** Get file identity from path *) +let get_file_id path : file_id = + let st = Unix.stat path in + {mtime = st.Unix.st_mtime; size = st.Unix.st_size; ino = st.Unix.st_ino} + +(** Check if file has changed *) +let file_changed ~old_id ~new_id = + old_id.mtime <> new_id.mtime + || old_id.size <> new_id.size || old_id.ino <> new_id.ino + +type cache_entry = {file_id: file_id; cmt_infos: Cmt_format.cmt_infos} +(** Cache entry: file identity + cached CMT data *) + +(** The cache: path -> cache_entry *) +let cache : (string, cache_entry) Hashtbl.t = Hashtbl.create 256 + +(** Read a CMT file, using the cache for efficiency. + Re-reads from disk if file has changed. *) let read_cmt path : Cmt_format.cmt_infos = - Marshal_cache.with_unmarshalled_file path Fun.id + let new_id = get_file_id path in + match Hashtbl.find_opt cache path with + | Some entry when not (file_changed ~old_id:entry.file_id ~new_id) -> + entry.cmt_infos + | _ -> + let cmt_infos = Cmt_format.read_cmt path in + Hashtbl.replace cache path {file_id = new_id; cmt_infos}; + cmt_infos (** Read a CMT file only if it changed since the last access. Returns [Some cmt_infos] if the file changed (or first access), [None] if the file is unchanged. This is the key function for incremental analysis - unchanged - files return [None] immediately without any unmarshalling. *) + files return [None] immediately without any file reading. *) let read_cmt_if_changed path : Cmt_format.cmt_infos option = - Marshal_cache.with_unmarshalled_if_changed path Fun.id + let new_id = get_file_id path in + match Hashtbl.find_opt cache path with + | Some entry when not (file_changed ~old_id:entry.file_id ~new_id) -> + None (* File unchanged *) + | _ -> + let cmt_infos = Cmt_format.read_cmt path in + Hashtbl.replace cache path {file_id = new_id; cmt_infos}; + Some cmt_infos -(** Clear the CMT cache, unmapping all memory. - Useful for testing or to free memory. *) -let clear () = Marshal_cache.clear () +(** Clear the CMT cache, freeing all cached data. *) +let clear () = Hashtbl.clear cache (** Invalidate a specific path in the cache. The next read will re-load the file from disk. *) -let invalidate path = Marshal_cache.invalidate path +let invalidate path = Hashtbl.remove cache path type stats = {entry_count: int; mapped_bytes: int} (** Cache statistics *) -(** Get cache statistics *) -let stats () : stats = - let s = Marshal_cache.stats () in - {entry_count = s.entry_count; mapped_bytes = s.mapped_bytes} +(** Get cache statistics. + Note: mapped_bytes is approximate (we don't track actual memory usage). *) +let stats () : stats = {entry_count = Hashtbl.length cache; mapped_bytes = 0} diff --git a/analysis/reanalyze/src/CmtCache.mli b/analysis/reanalyze/src/CmtCache.mli index ef15970617..ef7d1b2221 100644 --- a/analysis/reanalyze/src/CmtCache.mli +++ b/analysis/reanalyze/src/CmtCache.mli @@ -1,10 +1,12 @@ -(** CMT file cache using Marshal_cache for efficient mmap-based reading. +(** CMT file cache with automatic invalidation based on file metadata. This module provides cached reading of CMT files with automatic - invalidation when files change on disk. *) + invalidation when files change on disk. Uses Unix.stat to detect + changes via mtime, size, and inode. *) val read_cmt : string -> Cmt_format.cmt_infos -(** Read a CMT file, using the mmap cache for efficiency. *) +(** Read a CMT file, using the cache for efficiency. + Re-reads from disk if file has changed. *) val read_cmt_if_changed : string -> Cmt_format.cmt_infos option (** Read a CMT file only if it changed since the last access. @@ -12,7 +14,7 @@ val read_cmt_if_changed : string -> Cmt_format.cmt_infos option [None] if the file is unchanged. *) val clear : unit -> unit -(** Clear the CMT cache, unmapping all memory. *) +(** Clear the CMT cache, freeing all cached data. *) val invalidate : string -> unit (** Invalidate a specific path in the cache. *) @@ -21,4 +23,5 @@ type stats = {entry_count: int; mapped_bytes: int} (** Cache statistics *) val stats : unit -> stats -(** Get cache statistics *) +(** Get cache statistics. + Note: mapped_bytes is always 0 (we don't track actual memory usage). *) diff --git a/analysis/reanalyze/src/ReactiveAnalysis.ml b/analysis/reanalyze/src/ReactiveAnalysis.ml index 5aac5c90c7..da2aec5623 100644 --- a/analysis/reanalyze/src/ReactiveAnalysis.ml +++ b/analysis/reanalyze/src/ReactiveAnalysis.ml @@ -1,10 +1,8 @@ -(** Reactive analysis service using cached file processing. +(** Reactive analysis service using ReactiveFileCollection. This module provides incremental analysis that only re-processes - files that have changed, caching the processed file_data for - unchanged files. *) - -[@@@alert "-unsafe"] + files that have changed, using ReactiveFileCollection for efficient + delta-based updates. *) type cmt_file_result = { dce_data: DceFileProcessing.file_data option; @@ -18,19 +16,11 @@ type all_files_result = { } (** Result of processing all CMT files *) -type cached_file = { - path: string; - file_data: DceFileProcessing.file_data option; - exception_data: Exception.file_result option; -} -(** Cached file_data for a single CMT file. - We cache the processed result, not just the raw CMT data. *) - -(** The file cache - maps CMT paths to processed results *) -let file_cache : (string, cached_file) Hashtbl.t = Hashtbl.create 1024 +type t = cmt_file_result option ReactiveFileCollection.t +(** The reactive collection type *) (** Process cmt_infos into a file result *) -let process_cmt_infos ~config ~cmtFilePath cmt_infos : cmt_file_result option = +let process_cmt_infos ~config cmt_infos : cmt_file_result option = let excludePath sourceFile = config.DceConfig.cli.exclude_paths |> List.exists (fun prefix_ -> @@ -64,7 +54,7 @@ let process_cmt_infos ~config ~cmtFilePath cmt_infos : cmt_file_result option = Some (cmt_infos |> DceFileProcessing.process_cmt_file ~config ~file:dce_file_context - ~cmtFilePath) + ~cmtFilePath:"") else None in let exception_data = @@ -77,74 +67,63 @@ let process_cmt_infos ~config ~cmtFilePath cmt_infos : cmt_file_result option = Some {dce_data; exception_data} | _ -> None -(** Process a CMT file, using cached result if file unchanged. - Returns the cached result if the file hasn't changed since last access. *) -let process_cmt_cached ~config cmtFilePath : cmt_file_result option = - match CmtCache.read_cmt_if_changed cmtFilePath with - | None -> ( - (* File unchanged - return cached result *) - match Hashtbl.find_opt file_cache cmtFilePath with - | Some cached -> - Some {dce_data = cached.file_data; exception_data = cached.exception_data} - | None -> - (* First time seeing this file - shouldn't happen, but handle gracefully *) - None) - | Some cmt_infos -> - (* File changed or new - process it *) - let result = process_cmt_infos ~config ~cmtFilePath cmt_infos in - (* Cache the result *) - (match result with - | Some r -> - Hashtbl.replace file_cache cmtFilePath - { - path = cmtFilePath; - file_data = r.dce_data; - exception_data = r.exception_data; - } - | None -> ()); - result +(** Create a new reactive collection *) +let create ~config : t = + ReactiveFileCollection.create ~process:(process_cmt_infos ~config) -(** Process all files incrementally. - First run processes all files. Subsequent runs only process changed files. *) -let process_files_incremental ~config cmtFilePaths : all_files_result = +(** Process all files incrementally using ReactiveFileCollection. + First run processes all files. Subsequent runs only process changed files + (detected via CmtCache's file change tracking). *) +let process_files ~(collection : t) ~config cmtFilePaths : all_files_result = Timing.time_phase `FileLoading (fun () -> - let dce_data_list = ref [] in - let exception_results = ref [] in let processed = ref 0 in let from_cache = ref 0 in + (* Add/update all files in the collection *) cmtFilePaths |> List.iter (fun cmtFilePath -> - (* Check if file was in cache *before* processing *) - let was_cached = Hashtbl.mem file_cache cmtFilePath in - match process_cmt_cached ~config cmtFilePath with - | Some {dce_data; exception_data} -> - (match dce_data with - | Some data -> dce_data_list := data :: !dce_data_list - | None -> ()); - (match exception_data with - | Some data -> exception_results := data :: !exception_results - | None -> ()); - (* Track whether it was from cache *) - if was_cached then incr from_cache else incr processed - | None -> ()); + let was_in_collection = + ReactiveFileCollection.mem collection cmtFilePath + in + (* Check if file changed using CmtCache *) + match CmtCache.read_cmt_if_changed cmtFilePath with + | None -> + (* File unchanged - already in collection *) + if was_in_collection then incr from_cache + | Some cmt_infos -> + (* File changed or new - process and update *) + let result = process_cmt_infos ~config cmt_infos in + ReactiveFileCollection.set collection cmtFilePath result; + incr processed); if !Cli.timing then Printf.eprintf "Reactive: %d files processed, %d from cache\n%!" !processed !from_cache; + (* Collect results from the collection *) + let dce_data_list = ref [] in + let exception_results = ref [] in + + ReactiveFileCollection.iter + (fun _path result_opt -> + match result_opt with + | Some {dce_data; exception_data} -> ( + (match dce_data with + | Some data -> dce_data_list := data :: !dce_data_list + | None -> ()); + match exception_data with + | Some data -> exception_results := data :: !exception_results + | None -> ()) + | None -> ()) + collection; + { dce_data_list = List.rev !dce_data_list; exception_results = List.rev !exception_results; }) -(** Clear all cached file data *) -let clear () = - Hashtbl.clear file_cache; - CmtCache.clear () - -(** Get cache statistics *) -let stats () = - let file_count = Hashtbl.length file_cache in +(** Get collection statistics *) +let stats (collection : t) = + let file_count = ReactiveFileCollection.length collection in let cmt_stats = CmtCache.stats () in (file_count, cmt_stats) diff --git a/analysis/reanalyze/src/ReactiveFileCollection.ml b/analysis/reanalyze/src/ReactiveFileCollection.ml new file mode 100644 index 0000000000..61c6b54520 --- /dev/null +++ b/analysis/reanalyze/src/ReactiveFileCollection.ml @@ -0,0 +1,52 @@ +(** Reactive File Collection - Implementation + + Uses CmtCache for efficient file change detection via Unix.stat. *) + +type event = Added of string | Removed of string | Modified of string + +type 'v t = {data: (string, 'v) Hashtbl.t; process: Cmt_format.cmt_infos -> 'v} + +let create ~process = {data = Hashtbl.create 256; process} + +let add t path = + let cmt_infos = CmtCache.read_cmt path in + let value = t.process cmt_infos in + Hashtbl.replace t.data path value + +let remove t path = + Hashtbl.remove t.data path; + CmtCache.invalidate path + +let update t path = + (* Re-read the file and update the cache *) + add t path + +let set t path value = Hashtbl.replace t.data path value + +let apply t events = + List.iter + (function + | Added path -> add t path + | Removed path -> remove t path + | Modified path -> update t path) + events + +let get t path = Hashtbl.find_opt t.data path + +let find t path = Hashtbl.find t.data path + +let mem t path = Hashtbl.mem t.data path + +let length t = Hashtbl.length t.data + +let is_empty t = length t = 0 + +let iter f t = Hashtbl.iter f t.data + +let fold f t init = Hashtbl.fold f t.data init + +let to_list t = fold (fun k v acc -> (k, v) :: acc) t [] + +let paths t = fold (fun k _ acc -> k :: acc) t [] + +let values t = fold (fun _ v acc -> v :: acc) t [] diff --git a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli b/analysis/reanalyze/src/ReactiveFileCollection.mli similarity index 65% rename from analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli rename to analysis/reanalyze/src/ReactiveFileCollection.mli index 56ae3e4c2e..f5f01c4283 100644 --- a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.mli +++ b/analysis/reanalyze/src/ReactiveFileCollection.mli @@ -7,22 +7,22 @@ {[ (* Create collection with processing function *) - let coll = Reactive_file_collection.create + let coll = ReactiveFileCollection.create ~process:(fun (data : Cmt_format.cmt_infos) -> extract_types data ) (* Initial load *) - List.iter (Reactive_file_collection.add coll) (glob "*.cmt") + List.iter (ReactiveFileCollection.add coll) (glob "*.cmt") (* On file watcher events *) match event with - | Created path -> Reactive_file_collection.add coll path - | Deleted path -> Reactive_file_collection.remove coll path - | Modified path -> Reactive_file_collection.update coll path + | Created path -> ReactiveFileCollection.add coll path + | Deleted path -> ReactiveFileCollection.remove coll path + | Modified path -> ReactiveFileCollection.update coll path (* Access the collection *) - Reactive_file_collection.iter (fun path value -> ...) coll + ReactiveFileCollection.iter (fun path value -> ...) coll ]} {2 Thread Safety} @@ -30,34 +30,27 @@ Not thread-safe. Use external synchronization if accessed from multiple threads/domains. *) -(** The type of a reactive file collection with values of type ['v]. *) type 'v t +(** The type of a reactive file collection with values of type ['v]. *) (** Events for batch updates. *) type event = - | Added of string (** File was created *) - | Removed of string (** File was deleted *) + | Added of string (** File was created *) + | Removed of string (** File was deleted *) | Modified of string (** File was modified *) (** {1 Creation} *) -val create : process:('a -> 'v) -> 'v t +val create : process:(Cmt_format.cmt_infos -> 'v) -> 'v t (** [create ~process] creates an empty collection. - [process] is called to transform unmarshalled file contents into values. - - {b Type safety warning}: The caller must ensure files contain data of - type ['a]. This has the same safety properties as [Marshal.from_*]. - - @alert unsafe Caller must ensure files contain data of the expected type *) + [process] is called to transform CMT file contents into values. *) (** {1 Delta Operations} *) val add : 'v t -> string -> unit (** [add t path] adds a file to the collection. - Loads the file, unmarshals, and processes immediately. - - @raise Marshal_cache.Cache_error if file cannot be read or unmarshalled *) + Loads the file and processes immediately. *) val remove : 'v t -> string -> unit (** [remove t path] removes a file from the collection. @@ -65,15 +58,15 @@ val remove : 'v t -> string -> unit val update : 'v t -> string -> unit (** [update t path] reloads a modified file. - Equivalent to remove + add, but more efficient. - - @raise Marshal_cache.Cache_error if file cannot be read or unmarshalled *) + Equivalent to remove + add, but more efficient. *) + +val set : 'v t -> string -> 'v -> unit +(** [set t path value] sets the value for [path] directly. + Used when you have already processed the file externally. *) val apply : 'v t -> event list -> unit (** [apply t events] applies multiple events. - More efficient than individual operations for batches. - - @raise Marshal_cache.Cache_error if any added/modified file fails *) + More efficient than individual operations for batches. *) (** {1 Access} *) @@ -109,7 +102,3 @@ val paths : 'v t -> string list val values : 'v t -> 'v list (** [values t] returns all values in the collection. *) - - - - diff --git a/analysis/reanalyze/src/Reanalyze.ml b/analysis/reanalyze/src/Reanalyze.ml index c963d662ee..e1f9f2871a 100644 --- a/analysis/reanalyze/src/Reanalyze.ml +++ b/analysis/reanalyze/src/Reanalyze.ml @@ -207,18 +207,19 @@ let processFilesParallel ~config ~numDomains (cmtFilePaths : string list) : (** Process all cmt files and return results for DCE and Exception analysis. Conceptually: map process_cmt_file over all files. *) -let processCmtFiles ~config ~cmtRoot : all_files_result = +let processCmtFiles ~config ~cmtRoot ~reactive_collection : all_files_result = let cmtFilePaths = collectCmtFilePaths ~cmtRoot in (* Reactive mode: use incremental processing that skips unchanged files *) - if !Cli.reactive then + match reactive_collection with + | Some collection -> let result = - ReactiveAnalysis.process_files_incremental ~config cmtFilePaths + ReactiveAnalysis.process_files ~collection ~config cmtFilePaths in { dce_data_list = result.dce_data_list; exception_results = result.exception_results; } - else + | None -> let numDomains = match !Cli.parallel with | n when n > 0 -> n @@ -246,10 +247,10 @@ let shuffle_list lst = done; Array.to_list arr -let runAnalysis ~dce_config ~cmtRoot = +let runAnalysis ~dce_config ~cmtRoot ~reactive_collection = (* Map: process each file -> list of file_data *) let {dce_data_list; exception_results} = - processCmtFiles ~config:dce_config ~cmtRoot + processCmtFiles ~config:dce_config ~cmtRoot ~reactive_collection in (* Optionally shuffle for order-independence testing *) let dce_data_list = @@ -361,11 +362,16 @@ let runAnalysisAndReport ~cmtRoot = if !Cli.json then EmitJson.start (); let dce_config = DceConfig.current () in let numRuns = max 1 !Cli.runs in + (* Create reactive collection once, reuse across runs *) + let reactive_collection = + if !Cli.reactive then Some (ReactiveAnalysis.create ~config:dce_config) + else None + in for run = 1 to numRuns do Timing.reset (); if numRuns > 1 && !Cli.timing then Printf.eprintf "\n=== Run %d/%d ===\n%!" run numRuns; - runAnalysis ~dce_config ~cmtRoot; + runAnalysis ~dce_config ~cmtRoot ~reactive_collection; if run = numRuns then ( (* Only report on last run *) Log_.Stats.report ~config:dce_config; diff --git a/analysis/reanalyze/src/Timing.ml b/analysis/reanalyze/src/Timing.ml index b9f739df6a..ef875668db 100644 --- a/analysis/reanalyze/src/Timing.ml +++ b/analysis/reanalyze/src/Timing.ml @@ -54,24 +54,16 @@ let time_phase phase_name f = let report () = if !enabled then ( - (* NOTE about semantics: - - [file_loading] is treated as the WALL-CLOCK time for the overall - "CMT processing" phase (including per-file processing and any - synchronization). - - [result_collection] is an AGGREGATE metric across domains: time spent - in (and waiting on) the mutex-protected result merge/collection - section, summed across all worker domains. This may exceed wall-clock - time in parallel runs. - We do NOT add them together, otherwise we'd double-count. *) let cmt_total = times.file_loading in let analysis_total = times.merging +. times.solving in let total = cmt_total +. analysis_total +. times.reporting in Printf.eprintf "\n=== Timing ===\n"; Printf.eprintf " CMT processing: %.3fs (%.1f%%)\n" cmt_total (100.0 *. cmt_total /. total); - Printf.eprintf " - Wall clock: %.3fs\n" times.file_loading; - Printf.eprintf " - Result collection: %.3fms (aggregate)\n" - (1000.0 *. times.result_collection); + (* Only show parallel-specific timing when used *) + if times.result_collection > 0.0 then + Printf.eprintf " - Parallel merge: %.3fms (aggregate across domains)\n" + (1000.0 *. times.result_collection); Printf.eprintf " Analysis: %.3fs (%.1f%%)\n" analysis_total (100.0 *. analysis_total /. total); Printf.eprintf " - Merging: %.3fms\n" (1000.0 *. times.merging); diff --git a/analysis/reanalyze/src/dune b/analysis/reanalyze/src/dune index a0045f8230..e8b736446f 100644 --- a/analysis/reanalyze/src/dune +++ b/analysis/reanalyze/src/dune @@ -2,4 +2,4 @@ (name reanalyze) (flags (-w "+6+26+27+32+33+39")) - (libraries jsonlib ext ml str unix marshal_cache)) + (libraries jsonlib ext ml str unix)) diff --git a/analysis/src/DceCommand.ml b/analysis/src/DceCommand.ml index 1578a66bb4..6ff03172ae 100644 --- a/analysis/src/DceCommand.ml +++ b/analysis/src/DceCommand.ml @@ -1,6 +1,6 @@ let command () = Reanalyze.RunConfig.dce (); let dce_config = Reanalyze.DceConfig.current () in - Reanalyze.runAnalysis ~dce_config ~cmtRoot:None; + Reanalyze.runAnalysis ~dce_config ~cmtRoot:None ~reactive_collection:None; let issues = !Reanalyze.Log_.Stats.issues in Printf.printf "issues:%d\n" (List.length issues) diff --git a/analysis/vendor/dune b/analysis/vendor/dune index 7ccd94c6b7..07b8286153 100644 --- a/analysis/vendor/dune +++ b/analysis/vendor/dune @@ -1 +1 @@ -(dirs ext ml res_syntax json flow_parser skip-lite) +(dirs ext ml res_syntax json flow_parser) diff --git a/analysis/vendor/skip-lite/dune b/analysis/vendor/skip-lite/dune deleted file mode 100644 index 9611c60add..0000000000 --- a/analysis/vendor/skip-lite/dune +++ /dev/null @@ -1,10 +0,0 @@ -; skip-lite vendor directory - -(dirs marshal_cache reactive_file_collection) - -; Test executable for CMT file support - -(executable - (name test_cmt) - (modules test_cmt) - (libraries marshal_cache ml)) diff --git a/analysis/vendor/skip-lite/marshal_cache/dune b/analysis/vendor/skip-lite/marshal_cache/dune deleted file mode 100644 index 0a9e05f37a..0000000000 --- a/analysis/vendor/skip-lite/marshal_cache/dune +++ /dev/null @@ -1,8 +0,0 @@ -(library - (name marshal_cache) - (foreign_stubs - (language cxx) - (names marshal_cache_stubs) - (flags - (:standard -std=c++17))) - (c_library_flags (-lstdc++))) diff --git a/analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml b/analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml deleted file mode 100644 index 66da5e9c9f..0000000000 --- a/analysis/vendor/skip-lite/marshal_cache/marshal_cache.ml +++ /dev/null @@ -1,71 +0,0 @@ -(* Marshal Cache - OCaml implementation *) - -exception Cache_error of string * string - -type stats = { - entry_count : int; - mapped_bytes : int; -} - -(* Register the exception with the C runtime for proper propagation *) -let () = Callback.register_exception - "Marshal_cache.Cache_error" - (Cache_error ("", "")) - -(* External C stubs *) -external with_unmarshalled_file_stub : string -> ('a -> 'r) -> 'r - = "mfc_with_unmarshalled_file" - -external with_unmarshalled_if_changed_stub : string -> ('a -> 'r) -> 'r option - = "mfc_with_unmarshalled_if_changed" - -external clear_stub : unit -> unit = "mfc_clear" -external invalidate_stub : string -> unit = "mfc_invalidate" -external set_max_entries_stub : int -> unit = "mfc_set_max_entries" -external set_max_bytes_stub : int -> unit = "mfc_set_max_bytes" -external stats_stub : unit -> int * int = "mfc_stats" - -(* Public API *) - -let convert_failure path msg = - (* C code raises Failure with "path: message" format *) - (* Only convert if message starts with the path (i.e., from our C code) *) - let prefix = path ^ ": " in - let prefix_len = String.length prefix in - if String.length msg >= prefix_len && String.sub msg 0 prefix_len = prefix then - let error_msg = String.sub msg prefix_len (String.length msg - prefix_len) in - raise (Cache_error (path, error_msg)) - else - (* Re-raise user callback exceptions as-is *) - raise (Failure msg) - -let with_unmarshalled_file path f = - try - with_unmarshalled_file_stub path f - with - | Failure msg -> convert_failure path msg - [@@alert "-unsafe"] - -let with_unmarshalled_if_changed path f = - try - with_unmarshalled_if_changed_stub path f - with - | Failure msg -> convert_failure path msg - [@@alert "-unsafe"] - -let clear () = clear_stub () - -let invalidate path = invalidate_stub path - -let set_max_entries n = - if n < 0 then invalid_arg "Marshal_cache.set_max_entries: negative value"; - set_max_entries_stub n - -let set_max_bytes n = - if n < 0 then invalid_arg "Marshal_cache.set_max_bytes: negative value"; - set_max_bytes_stub n - -let stats () = - let (entry_count, mapped_bytes) = stats_stub () in - { entry_count; mapped_bytes } - diff --git a/analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli b/analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli deleted file mode 100644 index 091c3f69c6..0000000000 --- a/analysis/vendor/skip-lite/marshal_cache/marshal_cache.mli +++ /dev/null @@ -1,120 +0,0 @@ -(** Marshal Cache - - A high-performance cache for marshalled files that keeps file contents - memory-mapped (off-heap) and provides efficient repeated access with - automatic invalidation when files change on disk. - - {2 Memory Model} - - There is no fixed-size memory pool. Each cached file gets its own [mmap] - of exactly its file size: - - - {b mmap'd bytes}: Live in virtual address space (off-heap), managed by - OS + cache LRU eviction - - {b Unmarshalled value}: Lives in OCaml heap, managed by GC, exists only - during callback - - Physical RAM pages are allocated on demand (first access). Under memory - pressure, the OS can evict pages back to disk since they're file-backed. - - {2 Usage Example} - - {[ - Marshal_cache.with_unmarshalled_file "/path/to/data.marshal" - (fun (data : my_data_type) -> - (* Process data here - mmap stays valid for duration of callback *) - process data - ) - ]} - - {2 Platform Support} - - - macOS 10.13+: Fully supported - - Linux (glibc): Fully supported - - FreeBSD/OpenBSD: Should work (uses same mtime API as macOS) - - Windows: Not supported (no mmap) *) - -(** Exception raised for cache-related errors. - Contains the file path and an error message. *) -exception Cache_error of string * string - -(** Cache statistics. *) -type stats = { - entry_count : int; (** Number of files currently cached *) - mapped_bytes : int; (** Total bytes of memory-mapped data *) -} - -(** [with_unmarshalled_file path f] calls [f] with the unmarshalled value - from [path]. Guarantees the underlying mmap stays valid for the duration - of [f]. - - The cache automatically detects file changes via: - - Modification time (nanosecond precision where available) - - File size - - Inode number (detects atomic file replacement) - - {b Type safety warning}: This function is inherently unsafe. The caller - must ensure the type ['a] matches the actual marshalled data. Using the - wrong type results in undefined behavior (crashes, memory corruption). - This is equivalent to [Marshal.from_*] in terms of type safety. - - @raise Cache_error if the file cannot be read, mapped, or unmarshalled. - @raise exn if [f] raises; the cache state remains consistent. - - {b Thread safety}: Safe to call from multiple threads/domains. The cache - uses internal locking. The lock is released during the callback [f]. *) -val with_unmarshalled_file : string -> ('a -> 'r) -> 'r - [@@alert unsafe "Caller must ensure the file contains data of the expected type"] - -(** [with_unmarshalled_if_changed path f] is like {!with_unmarshalled_file} but - only unmarshals if the file changed since the last access. - - Returns [Some (f data)] if the file changed (or is accessed for the first time). - Returns [None] if the file has not changed since last access (no unmarshal occurs). - - This is the key primitive for building reactive/incremental systems: - {[ - let my_cache = Hashtbl.create 100 - - let get_result path = - match Marshal_cache.with_unmarshalled_if_changed path process with - | Some result -> - Hashtbl.replace my_cache path result; - result - | None -> - Hashtbl.find my_cache path (* use cached result *) - ]} - - @raise Cache_error if the file cannot be read, mapped, or unmarshalled. - @raise exn if [f] raises; the cache state remains consistent. *) -val with_unmarshalled_if_changed : string -> ('a -> 'r) -> 'r option - [@@alert unsafe "Caller must ensure the file contains data of the expected type"] - -(** Remove all entries from the cache, unmapping all memory. - Entries currently in use (during a callback) are preserved and will be - cleaned up when their callbacks complete. *) -val clear : unit -> unit - -(** [invalidate path] removes a specific path from the cache. - No-op if the path is not cached or is currently in use. *) -val invalidate : string -> unit - -(** [set_max_entries n] sets the maximum number of cached entries. - When exceeded, least-recently-used entries are evicted. - Default: 10000. Set to 0 for unlimited (not recommended for long-running - processes). - - @raise Invalid_argument if [n < 0] *) -val set_max_entries : int -> unit - -(** [set_max_bytes n] sets the maximum total bytes of mapped memory. - When exceeded, least-recently-used entries are evicted. - Default: 1GB (1073741824). Set to 0 for unlimited. - - @raise Invalid_argument if [n < 0] *) -val set_max_bytes : int -> unit - -(** [stats ()] returns cache statistics. - Useful for monitoring cache usage. *) -val stats : unit -> stats - diff --git a/analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp b/analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp deleted file mode 100644 index a18ba1b5a1..0000000000 --- a/analysis/vendor/skip-lite/marshal_cache/marshal_cache_stubs.cpp +++ /dev/null @@ -1,804 +0,0 @@ -// marshal_cache_stubs.cpp -// skip-lite: Marshal cache with mmap and LRU eviction -// OCaml 5+ compatible -// -// ============================================================================= -// WARNING: OCaml C FFI and GC Pitfalls -// ============================================================================= -// -// This file interfaces with the OCaml runtime. The OCaml garbage collector -// can move values in memory at any allocation point. Failure to handle this -// correctly causes memory corruption and segfaults. -// -// KEY RULES: -// -// 1. NEVER use String_val(v) across an allocation -// ------------------------------------------------ -// BAD: -// const char* s = String_val(str_val); -// some_ocaml_alloc(); // GC may run, str_val moves, s is now dangling -// use(s); // SEGFAULT -// -// GOOD: -// std::string s(String_val(str_val)); // Copy to C++ string first -// some_ocaml_alloc(); -// use(s.c_str()); // Safe, C++ owns the memory -// -// 2. NEVER nest allocations in Store_field -// ------------------------------------------------ -// BAD: -// value tuple = caml_alloc_tuple(2); -// Store_field(tuple, 0, caml_copy_string(s)); // DANGEROUS! -// // caml_copy_string allocates, may trigger GC, tuple address is -// // computed BEFORE the call, so we write to stale memory -// -// GOOD: -// value tuple = caml_alloc_tuple(2); -// value str = caml_copy_string(s); // Allocate first -// Store_field(tuple, 0, str); // Then store -// -// 3. CAMLlocal doesn't help with evaluation order -// ------------------------------------------------ -// CAMLlocal registers a variable so GC updates it when values move. -// But it doesn't fix the evaluation order problem in Store_field. -// The address computation happens before the nested function call. -// -// 4. Raising exceptions from C is tricky -// ------------------------------------------------ -// caml_raise* functions do a longjmp, so: -// - CAMLparam/CAMLlocal frames are not properly unwound -// - C++ destructors may not run (avoid RAII in throwing paths) -// - Prefer raising simple exceptions (Failure) and converting in OCaml -// -// 5. Callbacks can trigger arbitrary GC -// ------------------------------------------------ -// When calling caml_callback*, the OCaml code can allocate freely. -// All value variables from before the callback may be stale after. -// Either re-read them or use CAMLlocal to keep them updated. -// -// CURRENT APPROACH: -// - Errors are raised as Failure("path: message") from C -// - The OCaml wrapper catches Failure and converts to Cache_error -// - This avoids complex allocation sequences in exception-raising paths -// -// ============================================================================= - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -// OCaml headers -extern "C" { -#include -#include -#include -#include -#include -#include -} - -// Platform-specific mtime access (nanosecond precision) -#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) - #define MTIME_SEC(st) ((st).st_mtimespec.tv_sec) - #define MTIME_NSEC(st) ((st).st_mtimespec.tv_nsec) -#else // Linux and others - #define MTIME_SEC(st) ((st).st_mtim.tv_sec) - #define MTIME_NSEC(st) ((st).st_mtim.tv_nsec) -#endif - -namespace { - -// File identity for cache invalidation (mtime + size + inode) -struct FileId { - time_t mtime_sec; - long mtime_nsec; - off_t size; - ino_t ino; - - bool operator==(const FileId& other) const { - return mtime_sec == other.mtime_sec && - mtime_nsec == other.mtime_nsec && - size == other.size && - ino == other.ino; - } - - bool operator!=(const FileId& other) const { - return !(*this == other); - } -}; - -// A memory mapping -struct Mapping { - void* ptr = nullptr; - size_t len = 0; - FileId file_id = {}; - - bool is_valid() const { - return ptr != nullptr && ptr != MAP_FAILED && ptr != reinterpret_cast(1); - } -}; - -// Cache entry for a single file -struct Entry { - std::string path; - Mapping current; - size_t in_use = 0; // Number of active callbacks - std::vector old_mappings; // Deferred unmaps - std::list::iterator lru_iter; -}; - -// The global cache singleton -class MarshalCache { -public: - static MarshalCache& instance() { - static MarshalCache inst; - return inst; - } - - // Acquire a mapping, incrementing in_use. Returns pointer, length, and whether file changed. - // Throws std::runtime_error on failure. - void acquire_mapping(const std::string& path, void** out_ptr, size_t* out_len, bool* out_changed); - - // Release a mapping, decrementing in_use and cleaning up old mappings. - void release_mapping(const std::string& path); - - // Clear all entries (only those not in use) - void clear(); - - // Invalidate a specific path - void invalidate(const std::string& path); - - // Set limits - void set_max_entries(size_t n) { - std::lock_guard lock(mutex_); - max_entries_ = n; - evict_if_needed(); - } - - void set_max_bytes(size_t n) { - std::lock_guard lock(mutex_); - max_bytes_ = n; - evict_if_needed(); - } - - // Stats: (entry_count, total_mapped_bytes) - std::pair stats() { - std::lock_guard lock(mutex_); - return {cache_.size(), current_bytes_}; - } - -private: - MarshalCache() = default; - ~MarshalCache() { clear_internal(); } - - // Prevent copying - MarshalCache(const MarshalCache&) = delete; - MarshalCache& operator=(const MarshalCache&) = delete; - - // Must be called with mutex_ held - void evict_if_needed(); - void unmap_mapping(const Mapping& m); - void touch_lru(Entry& entry); - void clear_internal(); - - // Get file identity, throws on error - FileId get_file_id(const char* path); - - // Create a new mapping for a file, throws on error - Mapping create_mapping(const char* path, const FileId& file_id); - - std::unordered_map cache_; - std::list lru_order_; // front = most recent - std::mutex mutex_; - - size_t max_entries_ = 10000; - size_t max_bytes_ = 1ULL << 30; // 1GB default - size_t current_bytes_ = 0; -}; - -FileId MarshalCache::get_file_id(const char* path) { - struct stat st; - if (stat(path, &st) != 0) { - throw std::runtime_error(std::string("stat failed: ") + path + ": " + strerror(errno)); - } - return FileId{ - MTIME_SEC(st), - MTIME_NSEC(st), - st.st_size, - st.st_ino - }; -} - -Mapping MarshalCache::create_mapping(const char* path, const FileId& file_id) { - int fd = open(path, O_RDONLY); - if (fd < 0) { - throw std::runtime_error(std::string("open failed: ") + path + ": " + strerror(errno)); - } - - size_t len = static_cast(file_id.size); - void* ptr = nullptr; - - if (len > 0) { - ptr = mmap(nullptr, len, PROT_READ, MAP_PRIVATE, fd, 0); - } else { - // Empty file: use a sentinel non-null pointer - ptr = reinterpret_cast(1); - } - - // Close fd immediately - mapping remains valid on POSIX - close(fd); - - if (len > 0 && (ptr == MAP_FAILED || ptr == nullptr)) { - throw std::runtime_error(std::string("mmap failed: ") + path + ": " + strerror(errno)); - } - - Mapping m; - m.ptr = ptr; - m.len = len; - m.file_id = file_id; - return m; -} - -void MarshalCache::unmap_mapping(const Mapping& m) { - if (m.is_valid() && m.len > 0) { - munmap(m.ptr, m.len); - } -} - -void MarshalCache::touch_lru(Entry& entry) { - // Move to front of LRU list - lru_order_.erase(entry.lru_iter); - lru_order_.push_front(entry.path); - entry.lru_iter = lru_order_.begin(); -} - -void MarshalCache::evict_if_needed() { - // Must be called with mutex_ held - // Use >= because this is called BEFORE adding a new entry - while ((max_entries_ > 0 && cache_.size() >= max_entries_) || - (max_bytes_ > 0 && current_bytes_ >= max_bytes_)) { - if (lru_order_.empty()) break; - - // Find least-recently-used entry that is not in use - bool evicted = false; - for (auto it = lru_order_.rbegin(); it != lru_order_.rend(); ++it) { - auto cache_it = cache_.find(*it); - if (cache_it != cache_.end() && cache_it->second.in_use == 0) { - Entry& entry = cache_it->second; - - // Unmap current and all old mappings - unmap_mapping(entry.current); - for (const auto& m : entry.old_mappings) { - unmap_mapping(m); - } - current_bytes_ -= entry.current.len; - - lru_order_.erase(entry.lru_iter); - cache_.erase(cache_it); - evicted = true; - break; - } - } - if (!evicted) break; // All entries are in use - } -} - -void MarshalCache::acquire_mapping(const std::string& path, - void** out_ptr, size_t* out_len, bool* out_changed) { - std::unique_lock lock(mutex_); - - // Get current file identity - FileId current_id = get_file_id(path.c_str()); - - // Lookup or create entry - auto it = cache_.find(path); - bool needs_remap = false; - - if (it == cache_.end()) { - needs_remap = true; - } else if (it->second.current.file_id != current_id) { - needs_remap = true; - } - - if (needs_remap) { - // Only evict if we're adding a NEW entry (not updating existing) - // This prevents evicting the entry we're about to update - if (it == cache_.end()) { - evict_if_needed(); - } - - // Create new mapping (may throw) - Mapping new_mapping = create_mapping(path.c_str(), current_id); - - if (it == cache_.end()) { - // Insert new entry - Entry entry; - entry.path = path; - entry.current = new_mapping; - entry.in_use = 0; - lru_order_.push_front(path); - entry.lru_iter = lru_order_.begin(); - - cache_[path] = std::move(entry); - it = cache_.find(path); - } else { - // Update existing entry - Entry& entry = it->second; - Mapping old = entry.current; - entry.current = new_mapping; - - // Handle old mapping - if (old.is_valid()) { - if (entry.in_use == 0) { - unmap_mapping(old); - } else { - // Defer unmap until callbacks complete - entry.old_mappings.push_back(old); - } - current_bytes_ -= old.len; - } - } - - current_bytes_ += new_mapping.len; - } - - Entry& entry = it->second; - entry.in_use++; - touch_lru(entry); - - *out_ptr = entry.current.ptr; - *out_len = entry.current.len; - *out_changed = needs_remap; - - // Mutex released here (RAII) -} - -void MarshalCache::release_mapping(const std::string& path) { - std::lock_guard lock(mutex_); - - auto it = cache_.find(path); - if (it == cache_.end()) return; // Entry was evicted - - Entry& entry = it->second; - if (entry.in_use > 0) { - entry.in_use--; - } - - if (entry.in_use == 0 && !entry.old_mappings.empty()) { - // Clean up deferred unmaps - for (const auto& m : entry.old_mappings) { - unmap_mapping(m); - } - entry.old_mappings.clear(); - } -} - -void MarshalCache::clear_internal() { - for (auto& [path, entry] : cache_) { - if (entry.in_use == 0) { - unmap_mapping(entry.current); - } - for (const auto& m : entry.old_mappings) { - unmap_mapping(m); - } - } - cache_.clear(); - lru_order_.clear(); - current_bytes_ = 0; -} - -void MarshalCache::clear() { - std::lock_guard lock(mutex_); - - // Only clear entries not in use - for (auto it = cache_.begin(); it != cache_.end(); ) { - Entry& entry = it->second; - - // Always clean up old_mappings - for (const auto& m : entry.old_mappings) { - unmap_mapping(m); - } - entry.old_mappings.clear(); - - if (entry.in_use == 0) { - unmap_mapping(entry.current); - current_bytes_ -= entry.current.len; - lru_order_.erase(entry.lru_iter); - it = cache_.erase(it); - } else { - ++it; - } - } -} - -void MarshalCache::invalidate(const std::string& path) { - std::lock_guard lock(mutex_); - - auto it = cache_.find(path); - if (it == cache_.end()) return; - - Entry& entry = it->second; - - // Clean up old_mappings - for (const auto& m : entry.old_mappings) { - unmap_mapping(m); - } - entry.old_mappings.clear(); - - if (entry.in_use == 0) { - unmap_mapping(entry.current); - current_bytes_ -= entry.current.len; - lru_order_.erase(entry.lru_iter); - cache_.erase(it); - } - // If in_use > 0, the entry stays but will be refreshed on next access -} - -} // anonymous namespace - - -// ============================================================================= -// OCaml FFI stubs -// ============================================================================= - -extern "C" { - -// Helper to raise an error as Failure (converted to Cache_error in OCaml) -[[noreturn]] -static void raise_cache_error(const char* path, const char* message) { - std::string full_msg = std::string(path) + ": " + message; - caml_failwith(full_msg.c_str()); -} - -// ============================================================================= -// CMT/CMI file format support -// ============================================================================= -// -// ReScript/OCaml compiler generates several file types with headers before Marshal data: -// -// Pure .cmt files (typed tree only): -// - "Caml1999T0xx" (12 bytes) - CMT magic -// - Marshal data (cmt_infos record) -// -// Combined .cmt/.cmti files (interface + typed tree): -// - "Caml1999I0xx" (12 bytes) - CMI magic -// - Marshal data #1 (cmi_name, cmi_sign) -// - Marshal data #2 (crcs) -// - Marshal data #3 (flags) -// - "Caml1999T0xx" (12 bytes) - CMT magic -// - Marshal data (cmt_infos record) -// -// Pure .cmi files (compiled interface only): -// - "Caml1999I0xx" (12 bytes) - CMI magic -// - Marshal data #1 (cmi_name, cmi_sign) -// - Marshal data #2 (crcs) -// - Marshal data #3 (flags) -// -// This code handles all formats and finds the CMT Marshal data. -// ============================================================================= - -static constexpr size_t OCAML_MAGIC_LENGTH = 12; -static constexpr const char* CMT_MAGIC_PREFIX = "Caml1999T"; -static constexpr const char* CMI_MAGIC_PREFIX = "Caml1999I"; -static constexpr size_t MAGIC_PREFIX_LENGTH = 9; // Length of "Caml1999T" or "Caml1999I" - -// Check if data at offset starts with a specific prefix -static bool has_prefix_at(const unsigned char* data, size_t len, size_t offset, - const char* prefix, size_t prefix_len) { - if (len < offset + prefix_len) return false; - return memcmp(data + offset, prefix, prefix_len) == 0; -} - -// Check for Marshal magic at given offset -// Marshal magic: 0x8495A6BE (small/32-bit) or 0x8495A6BF (large/64-bit) -static bool has_marshal_magic_at(const unsigned char* data, size_t len, size_t offset) { - if (len < offset + 4) return false; - uint32_t magic = (static_cast(data[offset]) << 24) | - (static_cast(data[offset + 1]) << 16) | - (static_cast(data[offset + 2]) << 8) | - static_cast(data[offset + 3]); - return magic == 0x8495A6BEu || magic == 0x8495A6BFu; -} - -// Get the size of a Marshal value from its header -// Marshal header format (20 bytes for small, 32 bytes for large): -// 4 bytes: magic -// 4 bytes: data_len (or 8 bytes for large) -// 4 bytes: num_objects (or 8 bytes for large) -// 4 bytes: size_32 (or 8 bytes for large) -// 4 bytes: size_64 (or 8 bytes for large) -// Total Marshal value size = header_size + data_len -static size_t get_marshal_total_size(const unsigned char* data, size_t len, size_t offset) { - if (len < offset + 20) { - throw std::runtime_error("not enough data for Marshal header"); - } - - uint32_t magic = (static_cast(data[offset]) << 24) | - (static_cast(data[offset + 1]) << 16) | - (static_cast(data[offset + 2]) << 8) | - static_cast(data[offset + 3]); - - bool is_large = (magic == 0x8495A6BFu); - size_t header_size = is_large ? 32 : 20; - - if (len < offset + header_size) { - throw std::runtime_error("not enough data for Marshal header"); - } - - // data_len is at offset 4 (32-bit) or offset 4 (64-bit, we read low 32 bits which is enough) - uint32_t data_len; - if (is_large) { - // For large format, data_len is 8 bytes. Read as 64-bit but we only care about reasonable sizes. - // High 32 bits at offset+4, low 32 bits at offset+8 - uint32_t high = (static_cast(data[offset + 4]) << 24) | - (static_cast(data[offset + 5]) << 16) | - (static_cast(data[offset + 6]) << 8) | - static_cast(data[offset + 7]); - uint32_t low = (static_cast(data[offset + 8]) << 24) | - (static_cast(data[offset + 9]) << 16) | - (static_cast(data[offset + 10]) << 8) | - static_cast(data[offset + 11]); - if (high != 0) { - throw std::runtime_error("Marshal data too large (>4GB)"); - } - data_len = low; - } else { - data_len = (static_cast(data[offset + 4]) << 24) | - (static_cast(data[offset + 5]) << 16) | - (static_cast(data[offset + 6]) << 8) | - static_cast(data[offset + 7]); - } - - return header_size + data_len; -} - -// Find the offset where CMT Marshal data starts -// Returns the offset, or throws on error -static size_t find_cmt_marshal_offset(const unsigned char* data, size_t len) { - if (len < 4) { - throw std::runtime_error("file too small"); - } - - // Check for pure Marshal file (starts with Marshal magic) - if (has_marshal_magic_at(data, len, 0)) { - return 0; - } - - // Check for pure CMT file (starts with "Caml1999T") - if (has_prefix_at(data, len, 0, CMT_MAGIC_PREFIX, MAGIC_PREFIX_LENGTH)) { - if (len < OCAML_MAGIC_LENGTH + 4) { - throw std::runtime_error("CMT file too small"); - } - if (!has_marshal_magic_at(data, len, OCAML_MAGIC_LENGTH)) { - throw std::runtime_error("CMT file: no Marshal magic after header"); - } - return OCAML_MAGIC_LENGTH; - } - - // Check for CMI file (starts with "Caml1999I") - // This may be a combined CMI+CMT file, need to skip CMI data to find CMT - if (has_prefix_at(data, len, 0, CMI_MAGIC_PREFIX, MAGIC_PREFIX_LENGTH)) { - if (len < OCAML_MAGIC_LENGTH + 4) { - throw std::runtime_error("CMI file too small"); - } - - // Skip the CMI header - size_t offset = OCAML_MAGIC_LENGTH; - - // CMI section has 3 Marshal values: - // 1. (cmi_name, cmi_sign) - // 2. crcs - // 3. flags - for (int i = 0; i < 3; i++) { - if (!has_marshal_magic_at(data, len, offset)) { - throw std::runtime_error("CMI file: expected Marshal value in CMI section"); - } - size_t marshal_size = get_marshal_total_size(data, len, offset); - offset += marshal_size; - if (offset > len) { - throw std::runtime_error("CMI file: Marshal value extends past end of file"); - } - } - - // Now check if there's a CMT section after the CMI data - if (has_prefix_at(data, len, offset, CMT_MAGIC_PREFIX, MAGIC_PREFIX_LENGTH)) { - // Found CMT magic after CMI data - offset += OCAML_MAGIC_LENGTH; - if (!has_marshal_magic_at(data, len, offset)) { - throw std::runtime_error("CMT section: no Marshal magic after header"); - } - return offset; - } - - // No CMT section - this is a pure CMI file - // Return the first CMI Marshal value (not ideal but allows reading CMI files) - throw std::runtime_error("CMI file without CMT section - use read_cmi instead"); - } - - // Unknown format - throw std::runtime_error("unrecognized file format (not Marshal, CMT, or CMI)"); -} - -// Unmarshal from mmap'd memory (zero-copy using OCaml 5+ API) -// Handles both pure Marshal files and CMT/CMI files with headers -static value unmarshal_from_ptr(void* ptr, size_t len) { - CAMLparam0(); - CAMLlocal1(result); - - if (len == 0) { - caml_failwith("marshal_cache: empty file"); - } - - const unsigned char* data = static_cast(ptr); - - // Find where CMT Marshal data starts (handles CMT/CMI headers) - size_t offset; - try { - offset = find_cmt_marshal_offset(data, len); - } catch (const std::exception& e) { - std::string msg = std::string("marshal_cache: ") + e.what(); - caml_failwith(msg.c_str()); - } - - // Validate remaining length - size_t marshal_len = len - offset; - if (marshal_len < 20) { - caml_failwith("marshal_cache: Marshal data too small"); - } - - // OCaml 5+ API: unmarshal directly from memory block (zero-copy!) - const char* marshal_ptr = reinterpret_cast(data + offset); - result = caml_input_value_from_block(marshal_ptr, static_cast(marshal_len)); - - CAMLreturn(result); -} - -// Main entry point: with_unmarshalled_file -CAMLprim value mfc_with_unmarshalled_file(value path_val, value closure_val) { - CAMLparam2(path_val, closure_val); - CAMLlocal2(unmarshalled, result); - - const char* path = String_val(path_val); - std::string path_str(path); - - void* ptr = nullptr; - size_t len = 0; - bool changed = false; - - // Acquire mapping (may throw) - try { - MarshalCache::instance().acquire_mapping(path_str, &ptr, &len, &changed); - } catch (const std::exception& e) { - // Use path_str.c_str() instead of path, because raise_cache_error - // allocates and can trigger GC which would invalidate the pointer - // from String_val(path_val) - raise_cache_error(path_str.c_str(), e.what()); - CAMLreturn(Val_unit); // Not reached - } - - // Unmarshal (may allocate, may trigger GC, may raise) - unmarshalled = unmarshal_from_ptr(ptr, len); - - // Call the OCaml callback - result = caml_callback_exn(closure_val, unmarshalled); - - // Release mapping before potentially re-raising - MarshalCache::instance().release_mapping(path_str); - - // Check if callback raised an exception - if (Is_exception_result(result)) { - value exn = Extract_exception(result); - caml_raise(exn); - } - - CAMLreturn(result); -} - -// Reactive entry point: only unmarshal if file changed -// Returns Some(f(data)) if changed, None if unchanged -CAMLprim value mfc_with_unmarshalled_if_changed(value path_val, value closure_val) { - CAMLparam2(path_val, closure_val); - CAMLlocal3(unmarshalled, result, some_result); - - const char* path = String_val(path_val); - std::string path_str(path); - - void* ptr = nullptr; - size_t len = 0; - bool changed = false; - - // Acquire mapping (may throw) - try { - MarshalCache::instance().acquire_mapping(path_str, &ptr, &len, &changed); - } catch (const std::exception& e) { - raise_cache_error(path_str.c_str(), e.what()); - CAMLreturn(Val_unit); // Not reached - } - - if (!changed) { - // File unchanged - release and return None - MarshalCache::instance().release_mapping(path_str); - CAMLreturn(Val_none); - } - - // File changed - unmarshal and call callback - unmarshalled = unmarshal_from_ptr(ptr, len); - - // Call the OCaml callback - result = caml_callback_exn(closure_val, unmarshalled); - - // Release mapping before potentially re-raising - MarshalCache::instance().release_mapping(path_str); - - // Check if callback raised an exception - if (Is_exception_result(result)) { - value exn = Extract_exception(result); - caml_raise(exn); - } - - // Wrap in Some - some_result = caml_alloc(1, 0); - Store_field(some_result, 0, result); - - CAMLreturn(some_result); -} - -// Clear all cache entries -CAMLprim value mfc_clear(value unit) { - CAMLparam1(unit); - MarshalCache::instance().clear(); - CAMLreturn(Val_unit); -} - -// Invalidate a specific path -CAMLprim value mfc_invalidate(value path_val) { - CAMLparam1(path_val); - const char* path = String_val(path_val); - std::string path_str(path); // Copy immediately for consistency - MarshalCache::instance().invalidate(path_str); - CAMLreturn(Val_unit); -} - -// Set max entries -CAMLprim value mfc_set_max_entries(value n_val) { - CAMLparam1(n_val); - size_t n = Long_val(n_val); - MarshalCache::instance().set_max_entries(n); - CAMLreturn(Val_unit); -} - -// Set max bytes -CAMLprim value mfc_set_max_bytes(value n_val) { - CAMLparam1(n_val); - size_t n = Long_val(n_val); - MarshalCache::instance().set_max_bytes(n); - CAMLreturn(Val_unit); -} - -// Get stats: returns (entry_count, total_mapped_bytes) -CAMLprim value mfc_stats(value unit) { - CAMLparam1(unit); - CAMLlocal1(result); - - auto [entries, bytes] = MarshalCache::instance().stats(); - - result = caml_alloc_tuple(2); - Store_field(result, 0, Val_long(entries)); - Store_field(result, 1, Val_long(bytes)); - - CAMLreturn(result); -} - -} // extern "C" - diff --git a/analysis/vendor/skip-lite/reactive_file_collection/dune b/analysis/vendor/skip-lite/reactive_file_collection/dune deleted file mode 100644 index e83405cb88..0000000000 --- a/analysis/vendor/skip-lite/reactive_file_collection/dune +++ /dev/null @@ -1,3 +0,0 @@ -(library - (name reactive_file_collection) - (libraries marshal_cache)) diff --git a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml b/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml deleted file mode 100644 index 9b137d7469..0000000000 --- a/analysis/vendor/skip-lite/reactive_file_collection/reactive_file_collection.ml +++ /dev/null @@ -1,77 +0,0 @@ -(* Reactive File Collection - Implementation *) - -type event = - | Added of string - | Removed of string - | Modified of string - -(* We need to use Obj.t to make the polymorphic process function work - with Marshal_cache which returns ['a]. This is safe because the user - guarantees the file contains data of the expected type. *) -type 'v process_fn = Obj.t -> 'v - -type 'v t = { - data : (string, 'v) Hashtbl.t; - process : 'v process_fn; -} - -let create (type a v) ~(process : a -> v) : v t = - let process_fn : v process_fn = fun obj -> process (Obj.obj obj) in - { - data = Hashtbl.create 256; - process = process_fn; - } - -let add t path = - let value = Marshal_cache.with_unmarshalled_file path (fun data -> - t.process (Obj.repr data) - ) in - Hashtbl.replace t.data path value - [@@alert "-unsafe"] - -let remove t path = - Hashtbl.remove t.data path - -let update t path = - (* Just reload - Marshal_cache handles the file reading efficiently *) - add t path - -let apply t events = - List.iter (function - | Added path -> add t path - | Removed path -> remove t path - | Modified path -> update t path - ) events -let get t path = - Hashtbl.find_opt t.data path - -let find t path = - Hashtbl.find t.data path - -let mem t path = - Hashtbl.mem t.data path - -let length t = - Hashtbl.length t.data - -let is_empty t = - length t = 0 - -let iter f t = - Hashtbl.iter f t.data - -let fold f t init = - Hashtbl.fold f t.data init - -let to_list t = - fold (fun k v acc -> (k, v) :: acc) t [] - -let paths t = - fold (fun k _ acc -> k :: acc) t [] - -let values t = - fold (fun _ v acc -> v :: acc) t [] - - - - diff --git a/analysis/vendor/skip-lite/test_cmt.ml b/analysis/vendor/skip-lite/test_cmt.ml deleted file mode 100644 index c2a4c21f7e..0000000000 --- a/analysis/vendor/skip-lite/test_cmt.ml +++ /dev/null @@ -1,119 +0,0 @@ -(* Test that Marshal_cache can read CMT files *) - -[@@@alert "-unsafe"] - -let print_cmt_info (cmt : Cmt_format.cmt_infos) = - Printf.printf " Module name: %s\n%!" cmt.cmt_modname; - Printf.printf " Build dir: %s\n%!" cmt.cmt_builddir; - (match cmt.cmt_sourcefile with - | Some sf -> Printf.printf " Source file: %s\n%!" sf - | None -> Printf.printf " Source file: none\n%!") - -let test_cmt_file_standard path = - Printf.printf "Testing with Cmt_format.read_cmt: %s\n%!" path; - try - let cmt = Cmt_format.read_cmt path in - print_cmt_info cmt; - Printf.printf " SUCCESS with standard read_cmt\n%!"; - true - with e -> - Printf.printf " FAILED: %s\n%!" (Printexc.to_string e); - false - -let test_cmt_file_cache path = - Printf.printf "Testing with Marshal_cache: %s\n%!" path; - try - Marshal_cache.with_unmarshalled_file path (fun (cmt : Cmt_format.cmt_infos) -> - print_cmt_info cmt; - Printf.printf " SUCCESS with Marshal_cache!\n%!"; - true - ) - with - | Marshal_cache.Cache_error (p, msg) -> - Printf.printf " Cache_error: %s: %s\n%!" p msg; - false - | e -> - Printf.printf " FAILED: %s\n%!" (Printexc.to_string e); - false - -let test_cmt_file path = - if not (Sys.file_exists path) then begin - Printf.printf "File not found: %s\n%!" path; - false - end else begin - Printf.printf "\n=== Testing: %s ===\n%!" path; - let std_ok = test_cmt_file_standard path in - Printf.printf "\n%!"; - let cache_ok = test_cmt_file_cache path in - std_ok && cache_ok - end - - -let () = - Printf.printf "=== Marshal_cache CMT Test ===\n\n%!"; - - (* Get CMT files from command line args or find in lib/bs *) - let cmt_files = - if Array.length Sys.argv > 1 then - Array.to_list (Array.sub Sys.argv 1 (Array.length Sys.argv - 1)) - else begin - (* Find CMT files in lib/bs *) - let find_cmt_in_dir dir = - if Sys.file_exists dir && Sys.is_directory dir then begin - let rec find acc dir = - Array.fold_left (fun acc name -> - let path = Filename.concat dir name in - if Sys.is_directory path then - find acc path - else if Filename.check_suffix path ".cmt" then - path :: acc - else - acc - ) acc (Sys.readdir dir) - in - find [] dir - end else [] - in - let lib_bs = "lib/bs" in - let files = find_cmt_in_dir lib_bs in - Printf.printf "Found %d CMT files in %s\n\n%!" (List.length files) lib_bs; - files - end - in - - (* Test first 3 CMT files *) - let test_files = - cmt_files - |> List.sort String.compare - |> (fun l -> try List.filteri (fun i _ -> i < 3) l with _ -> l) - in - - List.iter (fun path -> - let _ = test_cmt_file path in - Printf.printf "\n%!" - ) test_files; - - (* Test if_changed API *) - Printf.printf "=== Testing with_unmarshalled_if_changed ===\n\n%!"; - Marshal_cache.clear (); (* Clear cache to start fresh *) - (match test_files with - | path :: _ -> - Printf.printf "First call (should process):\n%!"; - (match Marshal_cache.with_unmarshalled_if_changed path (fun (cmt : Cmt_format.cmt_infos) -> - Printf.printf " Processed: %s\n%!" cmt.cmt_modname; - cmt.cmt_modname - ) with - | Some name -> Printf.printf " Result: Some(%s) - SUCCESS (file was processed)\n%!" name - | None -> Printf.printf " Result: None (unexpected - should have processed!)\n%!"); - - Printf.printf "Second call (should return None - file unchanged):\n%!"; - (match Marshal_cache.with_unmarshalled_if_changed path (fun (cmt : Cmt_format.cmt_infos) -> - Printf.printf " Processed: %s\n%!" cmt.cmt_modname; - cmt.cmt_modname - ) with - | Some name -> Printf.printf " Result: Some(%s) (unexpected - file should be cached!)\n%!" name - | None -> Printf.printf " Result: None - SUCCESS (file was cached!)\n%!") - | [] -> Printf.printf "No CMT files to test\n%!"); - - Printf.printf "\n=== Test Complete ===\n%!" - diff --git a/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile b/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile index a7f30e2282..33bc025c4e 100644 --- a/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile +++ b/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/Makefile @@ -60,7 +60,7 @@ time-reactive: generate build @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing 2>&1 | grep -E "=== Timing|CMT processing|File loading|Total:" @echo "" @echo "Reactive mode (3 runs - first is cold, subsequent are warm):" - @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing -reactive -runs 3 2>&1 | grep -E "=== Run|=== Timing|CMT processing|File loading|Total:" + @dune exec rescript-editor-analysis -- reanalyze -config -ci -timing -reactive -runs 3 2>/dev/null .DEFAULT_GOAL := benchmark diff --git a/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/package.json b/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/package.json index fc8d9b2b70..f89de2fb09 100644 --- a/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/package.json +++ b/tests/analysis_tests/tests-reanalyze/deadcode-benchmark/package.json @@ -2,8 +2,8 @@ "name": "@tests/reanalyze-benchmark", "private": true, "scripts": { - "build": "rescript-legacy build", - "clean": "rescript-legacy clean" + "build": "rescript build", + "clean": "rescript clean" }, "dependencies": { "@rescript/react": "link:../../../dependencies/rescript-react", diff --git a/tests/dependencies/rescript-react/package.json b/tests/dependencies/rescript-react/package.json index 0d09e376d6..eaf7dd05a3 100644 --- a/tests/dependencies/rescript-react/package.json +++ b/tests/dependencies/rescript-react/package.json @@ -1,4 +1,29 @@ { "name": "@rescript/react", - "private": true + "private": true, + "version": "12.0.2", + "homepage": "https://rescript-lang.org", + "bugs": "https://github.com/rescript-lang/rescript/issues", + "repository": { + "type": "git", + "url": "git+https://github.com/rescript-lang/rescript.git" + }, + "author": { + "name": "Hongbo Zhang", + "email": "bobzhang1988@gmail.com" + }, + "maintainers": [ + "Christoph Knittel (https://github.com/cknitt)", + "Cristiano Calcagno (https://github.com/cristianoc)", + "Dmitry Zakharov (https://github.com/DZakh)", + "Florian Hammerschmidt (https://github.com/fhammerschmidt)", + "Gabriel Nordeborn (https://github.com/zth)", + "Hyeseong Kim (https://github.com/cometkim)", + "Jaap Frolich (https://github.com/jfrolich)", + "Matthias Le Brun (https://github.com/bloodyowl)", + "Patrick Ecker (https://github.com/ryyppy)", + "Paul Tsnobiladzé (https://github.com/tsnobip)", + "Woonki Moon (https://github.com/mununki)" + ], + "preferUnplugged": true } From dfdf0109f09077999a1dddde8be67d725ec24de7 Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Tue, 16 Dec 2025 06:02:51 +0100 Subject: [PATCH 5/9] Move reactive combinators to dedicated library with composition support - Create new analysis/reactive library with: - Reactive.ml: Core combinators (delta type, flatMap with merge) - ReactiveFileCollection.ml: Generic file collection with change detection - Comprehensive tests including multi-stage composition - Remove CmtCache module (logic absorbed into ReactiveFileCollection) - ReactiveAnalysis now uses generic ReactiveFileCollection with: - read_file: Cmt_format.read_cmt - process: CMT analysis function - Test composition: files -> word_counts (with merge) -> frequent_words Demonstrates delta propagation across multiple flatMap stages --- analysis/dune | 2 +- analysis/reactive/dune | 1 + analysis/reactive/src/Reactive.ml | 138 +++++++ analysis/reactive/src/Reactive.mli | 75 ++++ .../reactive/src/ReactiveFileCollection.ml | 87 +++++ .../reactive/src/ReactiveFileCollection.mli | 59 +++ analysis/reactive/src/dune | 4 + analysis/reactive/test/ReactiveTest.ml | 338 ++++++++++++++++++ analysis/reactive/test/dune | 3 + analysis/reanalyze/src/Cli.ml | 3 - analysis/reanalyze/src/CmtCache.ml | 70 ---- analysis/reanalyze/src/CmtCache.mli | 27 -- analysis/reanalyze/src/ReactiveAnalysis.ml | 34 +- .../reanalyze/src/ReactiveFileCollection.ml | 52 --- .../reanalyze/src/ReactiveFileCollection.mli | 104 ------ analysis/reanalyze/src/Reanalyze.ml | 8 +- analysis/reanalyze/src/Timing.ml | 3 +- analysis/reanalyze/src/dune | 2 +- 18 files changed, 723 insertions(+), 287 deletions(-) create mode 100644 analysis/reactive/dune create mode 100644 analysis/reactive/src/Reactive.ml create mode 100644 analysis/reactive/src/Reactive.mli create mode 100644 analysis/reactive/src/ReactiveFileCollection.ml create mode 100644 analysis/reactive/src/ReactiveFileCollection.mli create mode 100644 analysis/reactive/src/dune create mode 100644 analysis/reactive/test/ReactiveTest.ml create mode 100644 analysis/reactive/test/dune delete mode 100644 analysis/reanalyze/src/CmtCache.ml delete mode 100644 analysis/reanalyze/src/CmtCache.mli delete mode 100644 analysis/reanalyze/src/ReactiveFileCollection.ml delete mode 100644 analysis/reanalyze/src/ReactiveFileCollection.mli diff --git a/analysis/dune b/analysis/dune index 6b297d2e58..9b02abb4b5 100644 --- a/analysis/dune +++ b/analysis/dune @@ -1,4 +1,4 @@ -(dirs bin src reanalyze vendor) +(dirs bin src reactive reanalyze vendor) (env (dev diff --git a/analysis/reactive/dune b/analysis/reactive/dune new file mode 100644 index 0000000000..2aac24f843 --- /dev/null +++ b/analysis/reactive/dune @@ -0,0 +1 @@ +(dirs src test) diff --git a/analysis/reactive/src/Reactive.ml b/analysis/reactive/src/Reactive.ml new file mode 100644 index 0000000000..cb8b29ebd2 --- /dev/null +++ b/analysis/reactive/src/Reactive.ml @@ -0,0 +1,138 @@ +(** Reactive collections for incremental computation. + + Provides composable reactive collections with delta-based updates. *) + +(** {1 Deltas} *) + +type ('k, 'v) delta = Set of 'k * 'v | Remove of 'k + +let apply_delta tbl = function + | Set (k, v) -> Hashtbl.replace tbl k v + | Remove k -> Hashtbl.remove tbl k + +let apply_deltas tbl deltas = List.iter (apply_delta tbl) deltas + +(** {1 Reactive Collection} *) + +type ('k, 'v) t = { + subscribe: (('k, 'v) delta -> unit) -> unit; + iter: ('k -> 'v -> unit) -> unit; + get: 'k -> 'v option; + length: unit -> int; +} +(** A reactive collection that can emit deltas and be read. + All collections share this interface, enabling composition. *) + +(** {1 Collection operations} *) + +let iter f t = t.iter f +let get t k = t.get k +let length t = t.length () + +(** {1 FlatMap} *) + +(** Transform a collection into another collection. + Each source entry maps to multiple target entries via [f]. + Optional [merge] combines values when multiple sources produce the same key. *) +let flatMap (source : ('k1, 'v1) t) ~f ?merge () : ('k2, 'v2) t = + let merge = + match merge with + | Some m -> m + | None -> fun _ v -> v + in + (* Internal state *) + let provenance : ('k1, 'k2 list) Hashtbl.t = Hashtbl.create 64 in + let contributions : ('k2, ('k1, 'v2) Hashtbl.t) Hashtbl.t = + Hashtbl.create 256 + in + let target : ('k2, 'v2) Hashtbl.t = Hashtbl.create 256 in + let subscribers : (('k2, 'v2) delta -> unit) list ref = ref [] in + + let emit delta = List.iter (fun h -> h delta) !subscribers in + + let recompute_target k2 = + match Hashtbl.find_opt contributions k2 with + | None -> + Hashtbl.remove target k2; + Some (Remove k2) + | Some contribs when Hashtbl.length contribs = 0 -> + Hashtbl.remove contributions k2; + Hashtbl.remove target k2; + Some (Remove k2) + | Some contribs -> + let values = Hashtbl.fold (fun _ v acc -> v :: acc) contribs [] in + let merged = + match values with + | [] -> assert false + | [v] -> v + | v :: rest -> List.fold_left merge v rest + in + Hashtbl.replace target k2 merged; + Some (Set (k2, merged)) + in + + let remove_source k1 = + match Hashtbl.find_opt provenance k1 with + | None -> [] + | Some target_keys -> + Hashtbl.remove provenance k1; + target_keys + |> List.iter (fun k2 -> + match Hashtbl.find_opt contributions k2 with + | None -> () + | Some contribs -> Hashtbl.remove contribs k1); + target_keys + in + + let add_source k1 entries = + let target_keys = List.map fst entries in + Hashtbl.replace provenance k1 target_keys; + entries + |> List.iter (fun (k2, v2) -> + let contribs = + match Hashtbl.find_opt contributions k2 with + | Some c -> c + | None -> + let c = Hashtbl.create 4 in + Hashtbl.replace contributions k2 c; + c + in + Hashtbl.replace contribs k1 v2); + target_keys + in + + let handle_delta delta = + let downstream = + match delta with + | Remove k1 -> + let affected = remove_source k1 in + affected |> List.filter_map recompute_target + | Set (k1, v1) -> + let old_affected = remove_source k1 in + let new_entries = f k1 v1 in + let new_affected = add_source k1 new_entries in + let all_affected = old_affected @ new_affected in + let seen = Hashtbl.create (List.length all_affected) in + all_affected + |> List.filter_map (fun k2 -> + if Hashtbl.mem seen k2 then None + else ( + Hashtbl.replace seen k2 (); + recompute_target k2)) + in + List.iter emit downstream + in + + (* Subscribe to future deltas *) + source.subscribe handle_delta; + + (* Populate from existing entries *) + source.iter (fun k v -> handle_delta (Set (k, v))); + + (* Return collection interface *) + { + subscribe = (fun handler -> subscribers := handler :: !subscribers); + iter = (fun f -> Hashtbl.iter f target); + get = (fun k -> Hashtbl.find_opt target k); + length = (fun () -> Hashtbl.length target); + } diff --git a/analysis/reactive/src/Reactive.mli b/analysis/reactive/src/Reactive.mli new file mode 100644 index 0000000000..8b1b3e5a31 --- /dev/null +++ b/analysis/reactive/src/Reactive.mli @@ -0,0 +1,75 @@ +(** Reactive collections for incremental computation. + + Provides composable reactive collections with delta-based updates. + + {2 Example: Composing collections} + + {[ + (* Create a file collection *) + let files = ReactiveFileCollection.create ~read_file ~process in + + (* Derive a declarations collection *) + let decls = Reactive.flatMap files + ~f:(fun _path data -> data.decls) + () + + (* Derive a references collection with merging *) + let refs = Reactive.flatMap decls + ~f:(fun _pos decl -> decl.refs) + ~merge:PosSet.union + () + + (* Process files - all downstream collections update automatically *) + files |> Reactive.iter (fun path _ -> + ReactiveFileCollection.process_if_changed files_internal path) + + (* Read from any collection *) + Reactive.iter (fun k v -> ...) refs + ]} *) + +(** {1 Deltas} *) + +type ('k, 'v) delta = Set of 'k * 'v | Remove of 'k + +val apply_delta : ('k, 'v) Hashtbl.t -> ('k, 'v) delta -> unit +val apply_deltas : ('k, 'v) Hashtbl.t -> ('k, 'v) delta list -> unit + +(** {1 Reactive Collection} *) + +type ('k, 'v) t = { + subscribe: (('k, 'v) delta -> unit) -> unit; + iter: ('k -> 'v -> unit) -> unit; + get: 'k -> 'v option; + length: unit -> int; +} +(** A reactive collection that can emit deltas and be read. + All collections share this interface, enabling composition. *) + +(** {1 Collection operations} *) + +val iter : ('k -> 'v -> unit) -> ('k, 'v) t -> unit +(** Iterate over entries. *) + +val get : ('k, 'v) t -> 'k -> 'v option +(** Get a value by key. *) + +val length : ('k, 'v) t -> int +(** Number of entries. *) + +(** {1 Composition} *) + +val flatMap : + ('k1, 'v1) t -> + f:('k1 -> 'v1 -> ('k2 * 'v2) list) -> + ?merge:('v2 -> 'v2 -> 'v2) -> + unit -> + ('k2, 'v2) t +(** [flatMap source ~f ()] creates a derived collection. + + Each entry [(k1, v1)] in [source] produces entries [(k2, v2), ...] via [f k1 v1]. + When [source] changes, the derived collection updates automatically. + + Optional [merge] combines values when multiple sources produce the same key. + Defaults to last-write-wins. + + Derived collections can be further composed with [flatMap]. *) diff --git a/analysis/reactive/src/ReactiveFileCollection.ml b/analysis/reactive/src/ReactiveFileCollection.ml new file mode 100644 index 0000000000..88f9a77265 --- /dev/null +++ b/analysis/reactive/src/ReactiveFileCollection.ml @@ -0,0 +1,87 @@ +(** Reactive File Collection + + Creates a reactive collection from files with automatic change detection. *) + +type file_id = {mtime: float; size: int; ino: int} +(** File identity for change detection *) + +let get_file_id path : file_id = + let st = Unix.stat path in + {mtime = st.Unix.st_mtime; size = st.Unix.st_size; ino = st.Unix.st_ino} + +let file_changed ~old_id ~new_id = + old_id.mtime <> new_id.mtime + || old_id.size <> new_id.size || old_id.ino <> new_id.ino + +type ('raw, 'v) internal = { + cache: (string, file_id * 'v) Hashtbl.t; + read_file: string -> 'raw; + process: 'raw -> 'v; + mutable subscribers: ((string, 'v) Reactive.delta -> unit) list; +} +(** Internal state for file collection *) + +type ('raw, 'v) t = { + internal: ('raw, 'v) internal; + collection: (string, 'v) Reactive.t; +} +(** A file collection is just a Reactive.t with some extra operations *) + +let emit t delta = List.iter (fun h -> h delta) t.internal.subscribers + +(** Create a new reactive file collection *) +let create ~read_file ~process : ('raw, 'v) t = + let internal = + {cache = Hashtbl.create 256; read_file; process; subscribers = []} + in + let collection = + { + Reactive.subscribe = + (fun handler -> internal.subscribers <- handler :: internal.subscribers); + iter = + (fun f -> Hashtbl.iter (fun path (_, v) -> f path v) internal.cache); + get = + (fun path -> + match Hashtbl.find_opt internal.cache path with + | Some (_, v) -> Some v + | None -> None); + length = (fun () -> Hashtbl.length internal.cache); + } + in + {internal; collection} + +(** Get the collection interface for composition *) +let to_collection t : (string, 'v) Reactive.t = t.collection + +(** Process a file if changed. Emits delta to subscribers. *) +let process_if_changed t path = + let new_id = get_file_id path in + match Hashtbl.find_opt t.internal.cache path with + | Some (old_id, _) when not (file_changed ~old_id ~new_id) -> + false (* unchanged *) + | _ -> + let raw = t.internal.read_file path in + let value = t.internal.process raw in + Hashtbl.replace t.internal.cache path (new_id, value); + emit t (Reactive.Set (path, value)); + true (* changed *) + +(** Process multiple files *) +let process_files t paths = + List.iter (fun path -> ignore (process_if_changed t path)) paths + +(** Remove a file *) +let remove t path = + Hashtbl.remove t.internal.cache path; + emit t (Reactive.Remove path) + +(** Clear all cached data *) +let clear t = Hashtbl.clear t.internal.cache + +(** Invalidate a path *) +let invalidate t path = Hashtbl.remove t.internal.cache path + +let get t path = t.collection.get path +let mem t path = Hashtbl.mem t.internal.cache path +let length t = t.collection.length () +let iter f t = t.collection.iter f diff --git a/analysis/reactive/src/ReactiveFileCollection.mli b/analysis/reactive/src/ReactiveFileCollection.mli new file mode 100644 index 0000000000..3730c11d70 --- /dev/null +++ b/analysis/reactive/src/ReactiveFileCollection.mli @@ -0,0 +1,59 @@ +(** Reactive File Collection + + Creates a reactive collection from files with automatic change detection. + + {2 Example} + + {[ + (* Create file collection *) + let files = ReactiveFileCollection.create + ~read_file:Cmt_format.read_cmt + ~process:(fun cmt -> extract_data cmt) + + (* Compose with flatMap *) + let decls = Reactive.flatMap (ReactiveFileCollection.to_collection files) + ~f:(fun _path data -> data.decls) + () + + (* Process files - decls updates automatically *) + ReactiveFileCollection.process_files files [file_a; file_b]; + + (* Read results *) + Reactive.iter (fun pos decl -> ...) decls + ]} *) + +type ('raw, 'v) t +(** A file collection. ['raw] is the raw file type, ['v] is the processed value. *) + +(** {1 Creation} *) + +val create : read_file:(string -> 'raw) -> process:('raw -> 'v) -> ('raw, 'v) t +(** Create a new file collection. *) + +(** {1 Composition} *) + +val to_collection : ('raw, 'v) t -> (string, 'v) Reactive.t +(** Get the reactive collection interface for use with [Reactive.flatMap]. *) + +(** {1 Processing} *) + +val process_files : ('raw, 'v) t -> string list -> unit +(** Process files, emitting deltas for changed files. *) + +val process_if_changed : ('raw, 'v) t -> string -> bool +(** Process a file if changed. Returns true if file was processed. *) + +val remove : ('raw, 'v) t -> string -> unit +(** Remove a file from the collection. *) + +(** {1 Cache Management} *) + +val invalidate : ('raw, 'v) t -> string -> unit +val clear : ('raw, 'v) t -> unit + +(** {1 Access} *) + +val get : ('raw, 'v) t -> string -> 'v option +val mem : ('raw, 'v) t -> string -> bool +val length : ('raw, 'v) t -> int +val iter : (string -> 'v -> unit) -> ('raw, 'v) t -> unit diff --git a/analysis/reactive/src/dune b/analysis/reactive/src/dune new file mode 100644 index 0000000000..4fb933961f --- /dev/null +++ b/analysis/reactive/src/dune @@ -0,0 +1,4 @@ +(library + (name reactive) + (wrapped false) + (libraries unix)) diff --git a/analysis/reactive/test/ReactiveTest.ml b/analysis/reactive/test/ReactiveTest.ml new file mode 100644 index 0000000000..740d11f941 --- /dev/null +++ b/analysis/reactive/test/ReactiveTest.ml @@ -0,0 +1,338 @@ +(** Tests for Reactive collections *) + +open Reactive + +(** {1 Helper functions} *) + +let read_lines path = + let ic = open_in path in + let lines = ref [] in + (try + while true do + lines := input_line ic :: !lines + done + with End_of_file -> ()); + close_in ic; + List.rev !lines + +let write_lines path lines = + let oc = open_out path in + List.iter (fun line -> output_string oc (line ^ "\n")) lines; + close_out oc + +(** {1 Tests} *) + +let test_flatmap_basic () = + Printf.printf "=== Test: flatMap basic ===\n"; + + (* Create a simple source collection *) + let data : (int, string) Hashtbl.t = Hashtbl.create 16 in + let subscribers : ((int, string) delta -> unit) list ref = ref [] in + + let source : (int, string) t = + { + subscribe = (fun h -> subscribers := h :: !subscribers); + iter = (fun f -> Hashtbl.iter f data); + get = (fun k -> Hashtbl.find_opt data k); + length = (fun () -> Hashtbl.length data); + } + in + + let emit delta = + apply_delta data delta; + List.iter (fun h -> h delta) !subscribers + in + + (* Create derived collection via flatMap *) + let derived = + flatMap source + ~f:(fun key value -> + [(key * 10, value); ((key * 10) + 1, value); ((key * 10) + 2, value)]) + () + in + + (* Add entry -> derived should have 3 entries *) + emit (Set (1, "a")); + Printf.printf "After Set(1, 'a'): derived has %d entries\n" (length derived); + assert (length derived = 3); + assert (get derived 10 = Some "a"); + assert (get derived 11 = Some "a"); + assert (get derived 12 = Some "a"); + + (* Add another entry *) + emit (Set (2, "b")); + Printf.printf "After Set(2, 'b'): derived has %d entries\n" (length derived); + assert (length derived = 6); + + (* Update entry *) + emit (Set (1, "A")); + Printf.printf "After Set(1, 'A'): derived has %d entries\n" (length derived); + assert (get derived 10 = Some "A"); + assert (length derived = 6); + + (* Remove entry *) + emit (Remove 1); + Printf.printf "After Remove(1): derived has %d entries\n" (length derived); + assert (length derived = 3); + assert (get derived 10 = None); + assert (get derived 20 = Some "b"); + + Printf.printf "PASSED\n\n" + +module IntSet = Set.Make (Int) + +let test_flatmap_with_merge () = + Printf.printf "=== Test: flatMap with merge ===\n"; + + let data : (int, IntSet.t) Hashtbl.t = Hashtbl.create 16 in + let subscribers : ((int, IntSet.t) delta -> unit) list ref = ref [] in + + let source : (int, IntSet.t) t = + { + subscribe = (fun h -> subscribers := h :: !subscribers); + iter = (fun f -> Hashtbl.iter f data); + get = (fun k -> Hashtbl.find_opt data k); + length = (fun () -> Hashtbl.length data); + } + in + + let emit delta = + apply_delta data delta; + List.iter (fun h -> h delta) !subscribers + in + + (* Create derived with merge *) + let derived = + flatMap source + ~f:(fun _key values -> [(0, values)]) (* all contribute to key 0 *) + ~merge:IntSet.union () + in + + (* Source 1 contributes {1, 2} *) + emit (Set (1, IntSet.of_list [1; 2])); + let v = get derived 0 |> Option.get in + Printf.printf "After source 1: {%s}\n" + (IntSet.elements v |> List.map string_of_int |> String.concat ", "); + assert (IntSet.equal v (IntSet.of_list [1; 2])); + + (* Source 2 contributes {3, 4} -> should merge *) + emit (Set (2, IntSet.of_list [3; 4])); + let v = get derived 0 |> Option.get in + Printf.printf "After source 2: {%s}\n" + (IntSet.elements v |> List.map string_of_int |> String.concat ", "); + assert (IntSet.equal v (IntSet.of_list [1; 2; 3; 4])); + + (* Remove source 1 *) + emit (Remove 1); + let v = get derived 0 |> Option.get in + Printf.printf "After remove 1: {%s}\n" + (IntSet.elements v |> List.map string_of_int |> String.concat ", "); + assert (IntSet.equal v (IntSet.of_list [3; 4])); + + Printf.printf "PASSED\n\n" + +let test_composition () = + Printf.printf "=== Test: composition (flatMap chain) ===\n"; + + (* Source: file -> list of items *) + let data : (string, string list) Hashtbl.t = Hashtbl.create 16 in + let subscribers : ((string, string list) delta -> unit) list ref = ref [] in + + let source : (string, string list) t = + { + subscribe = (fun h -> subscribers := h :: !subscribers); + iter = (fun f -> Hashtbl.iter f data); + get = (fun k -> Hashtbl.find_opt data k); + length = (fun () -> Hashtbl.length data); + } + in + + let emit delta = + apply_delta data delta; + List.iter (fun h -> h delta) !subscribers + in + + (* First flatMap: file -> items *) + let items = + flatMap source + ~f:(fun path items -> + List.mapi (fun i item -> (Printf.sprintf "%s:%d" path i, item)) items) + () + in + + (* Second flatMap: item -> chars *) + let chars = + flatMap items + ~f:(fun key value -> + String.to_seq value + |> Seq.mapi (fun i c -> (Printf.sprintf "%s:%d" key i, c)) + |> List.of_seq) + () + in + + (* Add file with 2 items *) + emit (Set ("file1", ["ab"; "cd"])); + Printf.printf "After file1: items=%d, chars=%d\n" (length items) + (length chars); + assert (length items = 2); + assert (length chars = 4); + + (* Add another file *) + emit (Set ("file2", ["xyz"])); + Printf.printf "After file2: items=%d, chars=%d\n" (length items) + (length chars); + assert (length items = 3); + assert (length chars = 7); + + (* Update file1 *) + emit (Set ("file1", ["a"])); + Printf.printf "After update file1: items=%d, chars=%d\n" (length items) + (length chars); + assert (length items = 2); + (* 1 from file1 + 1 from file2 *) + assert (length chars = 4); + + (* 1 from file1 + 3 from file2 *) + Printf.printf "PASSED\n\n" + +let test_flatmap_on_existing_data () = + Printf.printf "=== Test: flatMap on collection with existing data ===\n"; + + (* Create source with data already in it *) + let data : (int, string) Hashtbl.t = Hashtbl.create 16 in + Hashtbl.add data 1 "a"; + Hashtbl.add data 2 "b"; + + let subscribers : ((int, string) delta -> unit) list ref = ref [] in + + let source : (int, string) t = + { + subscribe = (fun h -> subscribers := h :: !subscribers); + iter = (fun f -> Hashtbl.iter f data); + get = (fun k -> Hashtbl.find_opt data k); + length = (fun () -> Hashtbl.length data); + } + in + + Printf.printf "Source has %d entries before flatMap\n" (length source); + + (* Create flatMap AFTER source has data *) + let derived = flatMap source ~f:(fun k v -> [(k * 10, v)]) () in + + (* Check derived has existing data *) + Printf.printf "Derived has %d entries (expected 2)\n" (length derived); + assert (length derived = 2); + assert (get derived 10 = Some "a"); + assert (get derived 20 = Some "b"); + + Printf.printf "PASSED\n\n" + +module StringMap = Map.Make (String) + +let test_file_collection () = + Printf.printf "=== Test: ReactiveFileCollection + composition ===\n"; + + (* Create temp files with words *) + let temp_dir = Filename.get_temp_dir_name () in + let file_a = Filename.concat temp_dir "reactive_test_a.txt" in + let file_b = Filename.concat temp_dir "reactive_test_b.txt" in + + (* file_a: hello(2), world(1) *) + write_lines file_a ["hello world"; "hello"]; + (* file_b: hello(1), foo(1) *) + write_lines file_b ["hello foo"]; + + (* Create file collection: file -> word count map *) + let files = + ReactiveFileCollection.create ~read_file:read_lines ~process:(fun lines -> + (* Count words within this file *) + let counts = ref StringMap.empty in + lines + |> List.iter (fun line -> + String.split_on_char ' ' line + |> List.iter (fun word -> + let c = + StringMap.find_opt word !counts + |> Option.value ~default:0 + in + counts := StringMap.add word (c + 1) !counts)); + !counts) + in + + (* First flatMap: aggregate word counts across files with merge *) + let word_counts = + Reactive.flatMap + (ReactiveFileCollection.to_collection files) + ~f:(fun _path counts -> StringMap.bindings counts) + (* Each file contributes its word counts *) + ~merge:( + ) (* Sum counts from multiple files *) + () + in + + (* Second flatMap: filter to words with count >= 2 *) + let frequent_words = + Reactive.flatMap word_counts + ~f:(fun word count -> if count >= 2 then [(word, count)] else []) + () + in + + (* Process files *) + ReactiveFileCollection.process_files files [file_a; file_b]; + + Printf.printf "Word counts:\n"; + word_counts + |> Reactive.iter (fun word count -> Printf.printf " %s: %d\n" word count); + + Printf.printf "Frequent words (count >= 2):\n"; + frequent_words + |> Reactive.iter (fun word count -> Printf.printf " %s: %d\n" word count); + + (* Verify: hello=3 (2 from a + 1 from b), world=1, foo=1 *) + assert (Reactive.get word_counts "hello" = Some 3); + assert (Reactive.get word_counts "world" = Some 1); + assert (Reactive.get word_counts "foo" = Some 1); + assert (Reactive.length word_counts = 3); + + (* Verify frequent: only "hello" with count 3 *) + assert (Reactive.length frequent_words = 1); + assert (Reactive.get frequent_words "hello" = Some 3); + + (* Modify file_a: now hello(1), world(2) *) + Printf.printf "\nModifying file_a...\n"; + write_lines file_a ["world world"; "hello"]; + ReactiveFileCollection.process_files files [file_a]; + + Printf.printf "Word counts after modification:\n"; + Reactive.iter + (fun word count -> Printf.printf " %s: %d\n" word count) + word_counts; + + Printf.printf "Frequent words after modification:\n"; + Reactive.iter + (fun word count -> Printf.printf " %s: %d\n" word count) + frequent_words; + + (* Verify: hello=2 (1 from a + 1 from b), world=2, foo=1 *) + assert (Reactive.get word_counts "hello" = Some 2); + assert (Reactive.get word_counts "world" = Some 2); + assert (Reactive.get word_counts "foo" = Some 1); + + (* Verify frequent: hello=2, world=2 *) + assert (Reactive.length frequent_words = 2); + assert (Reactive.get frequent_words "hello" = Some 2); + assert (Reactive.get frequent_words "world" = Some 2); + + (* Cleanup *) + Sys.remove file_a; + Sys.remove file_b; + + Printf.printf "PASSED\n\n" + +let () = + Printf.printf "\n====== Reactive Collection Tests ======\n\n"; + test_flatmap_basic (); + test_flatmap_with_merge (); + test_composition (); + test_flatmap_on_existing_data (); + test_file_collection (); + Printf.printf "All tests passed!\n" diff --git a/analysis/reactive/test/dune b/analysis/reactive/test/dune new file mode 100644 index 0000000000..22584c8578 --- /dev/null +++ b/analysis/reactive/test/dune @@ -0,0 +1,3 @@ +(executable + (name ReactiveTest) + (libraries reactive)) diff --git a/analysis/reanalyze/src/Cli.ml b/analysis/reanalyze/src/Cli.ml index edff3e3e2b..d8ce55db9d 100644 --- a/analysis/reanalyze/src/Cli.ml +++ b/analysis/reanalyze/src/Cli.ml @@ -28,9 +28,6 @@ let parallel = ref 0 (* timing: report internal timing of analysis phases *) let timing = ref false -(* use mmap cache for CMT files *) -let cmtCache = ref false - (* use reactive/incremental analysis (caches processed file_data) *) let reactive = ref false diff --git a/analysis/reanalyze/src/CmtCache.ml b/analysis/reanalyze/src/CmtCache.ml deleted file mode 100644 index aa838ed38d..0000000000 --- a/analysis/reanalyze/src/CmtCache.ml +++ /dev/null @@ -1,70 +0,0 @@ -(** CMT file cache with automatic invalidation based on file metadata. - - This module provides cached reading of CMT files with automatic - invalidation when files change on disk. Uses Unix.stat to detect - changes via mtime, size, and inode. *) - -type file_id = { - mtime: float; (** Modification time *) - size: int; (** File size in bytes *) - ino: int; (** Inode number *) -} -(** File identity for cache invalidation *) - -(** Get file identity from path *) -let get_file_id path : file_id = - let st = Unix.stat path in - {mtime = st.Unix.st_mtime; size = st.Unix.st_size; ino = st.Unix.st_ino} - -(** Check if file has changed *) -let file_changed ~old_id ~new_id = - old_id.mtime <> new_id.mtime - || old_id.size <> new_id.size || old_id.ino <> new_id.ino - -type cache_entry = {file_id: file_id; cmt_infos: Cmt_format.cmt_infos} -(** Cache entry: file identity + cached CMT data *) - -(** The cache: path -> cache_entry *) -let cache : (string, cache_entry) Hashtbl.t = Hashtbl.create 256 - -(** Read a CMT file, using the cache for efficiency. - Re-reads from disk if file has changed. *) -let read_cmt path : Cmt_format.cmt_infos = - let new_id = get_file_id path in - match Hashtbl.find_opt cache path with - | Some entry when not (file_changed ~old_id:entry.file_id ~new_id) -> - entry.cmt_infos - | _ -> - let cmt_infos = Cmt_format.read_cmt path in - Hashtbl.replace cache path {file_id = new_id; cmt_infos}; - cmt_infos - -(** Read a CMT file only if it changed since the last access. - Returns [Some cmt_infos] if the file changed (or first access), - [None] if the file is unchanged. - - This is the key function for incremental analysis - unchanged - files return [None] immediately without any file reading. *) -let read_cmt_if_changed path : Cmt_format.cmt_infos option = - let new_id = get_file_id path in - match Hashtbl.find_opt cache path with - | Some entry when not (file_changed ~old_id:entry.file_id ~new_id) -> - None (* File unchanged *) - | _ -> - let cmt_infos = Cmt_format.read_cmt path in - Hashtbl.replace cache path {file_id = new_id; cmt_infos}; - Some cmt_infos - -(** Clear the CMT cache, freeing all cached data. *) -let clear () = Hashtbl.clear cache - -(** Invalidate a specific path in the cache. - The next read will re-load the file from disk. *) -let invalidate path = Hashtbl.remove cache path - -type stats = {entry_count: int; mapped_bytes: int} -(** Cache statistics *) - -(** Get cache statistics. - Note: mapped_bytes is approximate (we don't track actual memory usage). *) -let stats () : stats = {entry_count = Hashtbl.length cache; mapped_bytes = 0} diff --git a/analysis/reanalyze/src/CmtCache.mli b/analysis/reanalyze/src/CmtCache.mli deleted file mode 100644 index ef7d1b2221..0000000000 --- a/analysis/reanalyze/src/CmtCache.mli +++ /dev/null @@ -1,27 +0,0 @@ -(** CMT file cache with automatic invalidation based on file metadata. - - This module provides cached reading of CMT files with automatic - invalidation when files change on disk. Uses Unix.stat to detect - changes via mtime, size, and inode. *) - -val read_cmt : string -> Cmt_format.cmt_infos -(** Read a CMT file, using the cache for efficiency. - Re-reads from disk if file has changed. *) - -val read_cmt_if_changed : string -> Cmt_format.cmt_infos option -(** Read a CMT file only if it changed since the last access. - Returns [Some cmt_infos] if the file changed (or first access), - [None] if the file is unchanged. *) - -val clear : unit -> unit -(** Clear the CMT cache, freeing all cached data. *) - -val invalidate : string -> unit -(** Invalidate a specific path in the cache. *) - -type stats = {entry_count: int; mapped_bytes: int} -(** Cache statistics *) - -val stats : unit -> stats -(** Get cache statistics. - Note: mapped_bytes is always 0 (we don't track actual memory usage). *) diff --git a/analysis/reanalyze/src/ReactiveAnalysis.ml b/analysis/reanalyze/src/ReactiveAnalysis.ml index da2aec5623..db54833dd4 100644 --- a/analysis/reanalyze/src/ReactiveAnalysis.ml +++ b/analysis/reanalyze/src/ReactiveAnalysis.ml @@ -1,5 +1,5 @@ (** Reactive analysis service using ReactiveFileCollection. - + This module provides incremental analysis that only re-processes files that have changed, using ReactiveFileCollection for efficient delta-based updates. *) @@ -16,7 +16,7 @@ type all_files_result = { } (** Result of processing all CMT files *) -type t = cmt_file_result option ReactiveFileCollection.t +type t = (Cmt_format.cmt_infos, cmt_file_result option) ReactiveFileCollection.t (** The reactive collection type *) (** Process cmt_infos into a file result *) @@ -69,12 +69,12 @@ let process_cmt_infos ~config cmt_infos : cmt_file_result option = (** Create a new reactive collection *) let create ~config : t = - ReactiveFileCollection.create ~process:(process_cmt_infos ~config) + ReactiveFileCollection.create ~read_file:Cmt_format.read_cmt + ~process:(process_cmt_infos ~config) (** Process all files incrementally using ReactiveFileCollection. - First run processes all files. Subsequent runs only process changed files - (detected via CmtCache's file change tracking). *) -let process_files ~(collection : t) ~config cmtFilePaths : all_files_result = + First run processes all files. Subsequent runs only process changed files. *) +let process_files ~(collection : t) ~config:_ cmtFilePaths : all_files_result = Timing.time_phase `FileLoading (fun () -> let processed = ref 0 in let from_cache = ref 0 in @@ -85,16 +85,11 @@ let process_files ~(collection : t) ~config cmtFilePaths : all_files_result = let was_in_collection = ReactiveFileCollection.mem collection cmtFilePath in - (* Check if file changed using CmtCache *) - match CmtCache.read_cmt_if_changed cmtFilePath with - | None -> - (* File unchanged - already in collection *) - if was_in_collection then incr from_cache - | Some cmt_infos -> - (* File changed or new - process and update *) - let result = process_cmt_infos ~config cmt_infos in - ReactiveFileCollection.set collection cmtFilePath result; - incr processed); + let changed = + ReactiveFileCollection.process_if_changed collection cmtFilePath + in + if changed then incr processed + else if was_in_collection then incr from_cache); if !Cli.timing then Printf.eprintf "Reactive: %d files processed, %d from cache\n%!" @@ -122,8 +117,5 @@ let process_files ~(collection : t) ~config cmtFilePaths : all_files_result = exception_results = List.rev !exception_results; }) -(** Get collection statistics *) -let stats (collection : t) = - let file_count = ReactiveFileCollection.length collection in - let cmt_stats = CmtCache.stats () in - (file_count, cmt_stats) +(** Get collection length *) +let length (collection : t) = ReactiveFileCollection.length collection diff --git a/analysis/reanalyze/src/ReactiveFileCollection.ml b/analysis/reanalyze/src/ReactiveFileCollection.ml deleted file mode 100644 index 61c6b54520..0000000000 --- a/analysis/reanalyze/src/ReactiveFileCollection.ml +++ /dev/null @@ -1,52 +0,0 @@ -(** Reactive File Collection - Implementation - - Uses CmtCache for efficient file change detection via Unix.stat. *) - -type event = Added of string | Removed of string | Modified of string - -type 'v t = {data: (string, 'v) Hashtbl.t; process: Cmt_format.cmt_infos -> 'v} - -let create ~process = {data = Hashtbl.create 256; process} - -let add t path = - let cmt_infos = CmtCache.read_cmt path in - let value = t.process cmt_infos in - Hashtbl.replace t.data path value - -let remove t path = - Hashtbl.remove t.data path; - CmtCache.invalidate path - -let update t path = - (* Re-read the file and update the cache *) - add t path - -let set t path value = Hashtbl.replace t.data path value - -let apply t events = - List.iter - (function - | Added path -> add t path - | Removed path -> remove t path - | Modified path -> update t path) - events - -let get t path = Hashtbl.find_opt t.data path - -let find t path = Hashtbl.find t.data path - -let mem t path = Hashtbl.mem t.data path - -let length t = Hashtbl.length t.data - -let is_empty t = length t = 0 - -let iter f t = Hashtbl.iter f t.data - -let fold f t init = Hashtbl.fold f t.data init - -let to_list t = fold (fun k v acc -> (k, v) :: acc) t [] - -let paths t = fold (fun k _ acc -> k :: acc) t [] - -let values t = fold (fun _ v acc -> v :: acc) t [] diff --git a/analysis/reanalyze/src/ReactiveFileCollection.mli b/analysis/reanalyze/src/ReactiveFileCollection.mli deleted file mode 100644 index f5f01c4283..0000000000 --- a/analysis/reanalyze/src/ReactiveFileCollection.mli +++ /dev/null @@ -1,104 +0,0 @@ -(** Reactive File Collection - - A collection that maps file paths to processed values, with efficient - delta-based updates. Designed for use with file watchers. - - {2 Usage Example} - - {[ - (* Create collection with processing function *) - let coll = ReactiveFileCollection.create - ~process:(fun (data : Cmt_format.cmt_infos) -> - extract_types data - ) - - (* Initial load *) - List.iter (ReactiveFileCollection.add coll) (glob "*.cmt") - - (* On file watcher events *) - match event with - | Created path -> ReactiveFileCollection.add coll path - | Deleted path -> ReactiveFileCollection.remove coll path - | Modified path -> ReactiveFileCollection.update coll path - - (* Access the collection *) - ReactiveFileCollection.iter (fun path value -> ...) coll - ]} - - {2 Thread Safety} - - Not thread-safe. Use external synchronization if accessed from - multiple threads/domains. *) - -type 'v t -(** The type of a reactive file collection with values of type ['v]. *) - -(** Events for batch updates. *) -type event = - | Added of string (** File was created *) - | Removed of string (** File was deleted *) - | Modified of string (** File was modified *) - -(** {1 Creation} *) - -val create : process:(Cmt_format.cmt_infos -> 'v) -> 'v t -(** [create ~process] creates an empty collection. - - [process] is called to transform CMT file contents into values. *) - -(** {1 Delta Operations} *) - -val add : 'v t -> string -> unit -(** [add t path] adds a file to the collection. - Loads the file and processes immediately. *) - -val remove : 'v t -> string -> unit -(** [remove t path] removes a file from the collection. - No-op if path is not in collection. *) - -val update : 'v t -> string -> unit -(** [update t path] reloads a modified file. - Equivalent to remove + add, but more efficient. *) - -val set : 'v t -> string -> 'v -> unit -(** [set t path value] sets the value for [path] directly. - Used when you have already processed the file externally. *) - -val apply : 'v t -> event list -> unit -(** [apply t events] applies multiple events. - More efficient than individual operations for batches. *) - -(** {1 Access} *) - -val get : 'v t -> string -> 'v option -(** [get t path] returns the value for [path], or [None] if not present. *) - -val find : 'v t -> string -> 'v -(** [find t path] returns the value for [path]. - @raise Not_found if path is not in collection *) - -val mem : 'v t -> string -> bool -(** [mem t path] returns [true] if [path] is in the collection. *) - -val length : 'v t -> int -(** [length t] returns the number of files in the collection. *) - -val is_empty : 'v t -> bool -(** [is_empty t] returns [true] if the collection is empty. *) - -(** {1 Iteration} *) - -val iter : (string -> 'v -> unit) -> 'v t -> unit -(** [iter f t] applies [f] to each (path, value) pair. *) - -val fold : (string -> 'v -> 'acc -> 'acc) -> 'v t -> 'acc -> 'acc -(** [fold f t init] folds [f] over all (path, value) pairs. *) - -val to_list : 'v t -> (string * 'v) list -(** [to_list t] returns all (path, value) pairs as a list. *) - -val paths : 'v t -> string list -(** [paths t] returns all paths in the collection. *) - -val values : 'v t -> 'v list -(** [values t] returns all values in the collection. *) diff --git a/analysis/reanalyze/src/Reanalyze.ml b/analysis/reanalyze/src/Reanalyze.ml index e1f9f2871a..98f3b51e7b 100644 --- a/analysis/reanalyze/src/Reanalyze.ml +++ b/analysis/reanalyze/src/Reanalyze.ml @@ -9,10 +9,7 @@ type cmt_file_result = { (** Process a cmt file and return its results. Conceptually: map over files, then merge results. *) let loadCmtFile ~config cmtFilePath : cmt_file_result option = - let cmt_infos = - if !Cli.cmtCache then CmtCache.read_cmt cmtFilePath - else Cmt_format.read_cmt cmtFilePath - in + let cmt_infos = Cmt_format.read_cmt cmtFilePath in let excludePath sourceFile = config.DceConfig.cli.exclude_paths |> List.exists (fun prefix_ -> @@ -489,9 +486,6 @@ let cli () = "n Process files in parallel using n domains (0 = sequential, default; \ -1 = auto-detect cores)" ); ("-timing", Set Cli.timing, "Report internal timing of analysis phases"); - ( "-cmt-cache", - Set Cli.cmtCache, - "Use mmap cache for CMT files (faster for repeated analysis)" ); ( "-reactive", Set Cli.reactive, "Use reactive analysis (caches processed file_data, skips unchanged \ diff --git a/analysis/reanalyze/src/Timing.ml b/analysis/reanalyze/src/Timing.ml index ef875668db..2341bd9109 100644 --- a/analysis/reanalyze/src/Timing.ml +++ b/analysis/reanalyze/src/Timing.ml @@ -62,7 +62,8 @@ let report () = (100.0 *. cmt_total /. total); (* Only show parallel-specific timing when used *) if times.result_collection > 0.0 then - Printf.eprintf " - Parallel merge: %.3fms (aggregate across domains)\n" + Printf.eprintf + " - Parallel merge: %.3fms (aggregate across domains)\n" (1000.0 *. times.result_collection); Printf.eprintf " Analysis: %.3fs (%.1f%%)\n" analysis_total (100.0 *. analysis_total /. total); diff --git a/analysis/reanalyze/src/dune b/analysis/reanalyze/src/dune index e8b736446f..8431b0d52d 100644 --- a/analysis/reanalyze/src/dune +++ b/analysis/reanalyze/src/dune @@ -2,4 +2,4 @@ (name reanalyze) (flags (-w "+6+26+27+32+33+39")) - (libraries jsonlib ext ml str unix)) + (libraries reactive jsonlib ext ml str unix)) From b2ff1c10226e0527a2c839bb235549f19fe1aa0f Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Tue, 16 Dec 2025 06:48:33 +0100 Subject: [PATCH 6/9] Integrate ReactiveMerge into runAnalysis - Add ReactiveMerge module for reactive merge of per-file DCE data - Add extraction functions (builder_to_list, create_*) to data modules - Expose types needed for reactive merge (CrossFileItems.t fields, etc.) - ReactiveAnalysis: add iter_file_data, collect_exception_results - runAnalysis: use ReactiveMerge for decls/annotations/cross_file when reactive mode enabled Note: refs and file_deps still use O(n) iteration because they need post-processing (type-label deps, exception refs). Next step is to make these reactive via indexed lookups. --- analysis/reanalyze/src/CrossFileItems.ml | 9 ++ analysis/reanalyze/src/CrossFileItems.mli | 24 ++- analysis/reanalyze/src/Declarations.ml | 7 + analysis/reanalyze/src/Declarations.mli | 8 + analysis/reanalyze/src/FileAnnotations.ml | 8 + analysis/reanalyze/src/FileAnnotations.mli | 11 ++ analysis/reanalyze/src/FileDeps.ml | 10 ++ analysis/reanalyze/src/FileDeps.mli | 16 ++ analysis/reanalyze/src/ReactiveAnalysis.ml | 33 ++++ analysis/reanalyze/src/ReactiveMerge.ml | 169 +++++++++++++++++++++ analysis/reanalyze/src/ReactiveMerge.mli | 60 ++++++++ analysis/reanalyze/src/Reanalyze.ml | 78 +++++++--- analysis/reanalyze/src/References.ml | 12 ++ analysis/reanalyze/src/References.mli | 11 ++ 14 files changed, 431 insertions(+), 25 deletions(-) create mode 100644 analysis/reanalyze/src/ReactiveMerge.ml create mode 100644 analysis/reanalyze/src/ReactiveMerge.mli diff --git a/analysis/reanalyze/src/CrossFileItems.ml b/analysis/reanalyze/src/CrossFileItems.ml index cf038fdb8f..8b72d84120 100644 --- a/analysis/reanalyze/src/CrossFileItems.ml +++ b/analysis/reanalyze/src/CrossFileItems.ml @@ -58,6 +58,15 @@ let merge_all (builders : builder list) : t = let function_refs = builders |> List.concat_map (fun b -> b.function_refs) in {exception_refs; optional_arg_calls; function_refs} +(** {2 Builder extraction for reactive merge} *) + +let builder_to_t (builder : builder) : t = + { + exception_refs = builder.exception_refs; + optional_arg_calls = builder.optional_arg_calls; + function_refs = builder.function_refs; + } + (** {2 Processing API} *) let process_exception_refs (t : t) ~refs ~file_deps ~find_exception ~config = diff --git a/analysis/reanalyze/src/CrossFileItems.mli b/analysis/reanalyze/src/CrossFileItems.mli index 199089baaf..f7517d9974 100644 --- a/analysis/reanalyze/src/CrossFileItems.mli +++ b/analysis/reanalyze/src/CrossFileItems.mli @@ -5,9 +5,26 @@ - [builder] - mutable, for AST processing - [t] - immutable, for processing after merge *) +(** {2 Item types} *) + +type exception_ref = {exception_path: DcePath.t; loc_from: Location.t} + +type optional_arg_call = { + pos_from: Lexing.position; + pos_to: Lexing.position; + arg_names: string list; + arg_names_maybe: string list; +} + +type function_ref = {pos_from: Lexing.position; pos_to: Lexing.position} + (** {2 Types} *) -type t +type t = { + exception_refs: exception_ref list; + optional_arg_calls: optional_arg_call list; + function_refs: function_ref list; +} (** Immutable cross-file items - for processing after merge *) type builder @@ -39,6 +56,11 @@ val add_function_reference : val merge_all : builder list -> t (** Merge all builders into one immutable result. Order doesn't matter. *) +(** {2 Builder extraction for reactive merge} *) + +val builder_to_t : builder -> t +(** Convert builder to t for reactive merge *) + (** {2 Processing API - for after merge} *) val process_exception_refs : diff --git a/analysis/reanalyze/src/Declarations.ml b/analysis/reanalyze/src/Declarations.ml index cf49afdd5a..0bcaa36b16 100644 --- a/analysis/reanalyze/src/Declarations.ml +++ b/analysis/reanalyze/src/Declarations.ml @@ -28,6 +28,13 @@ let merge_all (builders : builder list) : t = PosHash.iter (fun pos decl -> PosHash.replace result pos decl) builder); result +(* ===== Builder extraction for reactive merge ===== *) + +let builder_to_list (builder : builder) : (Lexing.position * Decl.t) list = + PosHash.fold (fun pos decl acc -> (pos, decl) :: acc) builder [] + +let create_from_hashtbl (h : Decl.t PosHash.t) : t = h + (* ===== Read-only API ===== *) let find_opt (t : t) pos = PosHash.find_opt t pos diff --git a/analysis/reanalyze/src/Declarations.mli b/analysis/reanalyze/src/Declarations.mli index 31bbb7934a..1d5180dc53 100644 --- a/analysis/reanalyze/src/Declarations.mli +++ b/analysis/reanalyze/src/Declarations.mli @@ -25,6 +25,14 @@ val replace_builder : builder -> Lexing.position -> Decl.t -> unit val merge_all : builder list -> t (** Merge all builders into one immutable result. Order doesn't matter. *) +(** {2 Builder extraction for reactive merge} *) + +val builder_to_list : builder -> (Lexing.position * Decl.t) list +(** Extract all declarations as a list for reactive merge *) + +val create_from_hashtbl : Decl.t PosHash.t -> t +(** Create from hashtable for reactive merge *) + (** {2 Read-only API for t - for solver} *) val find_opt : t -> Lexing.position -> Decl.t option diff --git a/analysis/reanalyze/src/FileAnnotations.ml b/analysis/reanalyze/src/FileAnnotations.ml index c8344a201f..046805b564 100644 --- a/analysis/reanalyze/src/FileAnnotations.ml +++ b/analysis/reanalyze/src/FileAnnotations.ml @@ -32,6 +32,14 @@ let merge_all (builders : builder list) : t = builder); result +(* ===== Builder extraction for reactive merge ===== *) + +let builder_to_list (builder : builder) : (Lexing.position * annotated_as) list + = + PosHash.fold (fun pos value acc -> (pos, value) :: acc) builder [] + +let create_from_hashtbl (h : annotated_as PosHash.t) : t = h + (* ===== Read-only API ===== *) let is_annotated_dead (state : t) pos = PosHash.find_opt state pos = Some Dead diff --git a/analysis/reanalyze/src/FileAnnotations.mli b/analysis/reanalyze/src/FileAnnotations.mli index dd3df7d861..756264813e 100644 --- a/analysis/reanalyze/src/FileAnnotations.mli +++ b/analysis/reanalyze/src/FileAnnotations.mli @@ -9,6 +9,9 @@ (** {2 Types} *) +type annotated_as = GenType | Dead | Live +(** Annotation type *) + type t (** Immutable annotations - for solver (read-only) *) @@ -25,6 +28,14 @@ val annotate_live : builder -> Lexing.position -> unit val merge_all : builder list -> t (** Merge all builders into one immutable result. Order doesn't matter. *) +(** {2 Builder extraction for reactive merge} *) + +val builder_to_list : builder -> (Lexing.position * annotated_as) list +(** Extract all annotations as a list for reactive merge *) + +val create_from_hashtbl : annotated_as PosHash.t -> t +(** Create from hashtable for reactive merge *) + (** {2 Read-only API for t - for solver} *) val is_annotated_dead : t -> Lexing.position -> bool diff --git a/analysis/reanalyze/src/FileDeps.ml b/analysis/reanalyze/src/FileDeps.ml index ed34e7c4c6..7c0440b687 100644 --- a/analysis/reanalyze/src/FileDeps.ml +++ b/analysis/reanalyze/src/FileDeps.ml @@ -64,6 +64,16 @@ let merge_all (builders : builder list) : t = |> List.iter (fun b -> merge_into_builder ~from:b ~into:merged_builder); freeze_builder merged_builder +(** {2 Builder extraction for reactive merge} *) + +let builder_files (builder : builder) : FileSet.t = builder.files + +let builder_deps_to_list (builder : builder) : (string * FileSet.t) list = + FileHash.fold (fun from_file to_files acc -> (from_file, to_files) :: acc) + builder.deps [] + +let create ~files ~deps : t = {files; deps} + (** {2 Read-only API} *) let get_files (t : t) = t.files diff --git a/analysis/reanalyze/src/FileDeps.mli b/analysis/reanalyze/src/FileDeps.mli index 2975e5ceca..2de875017e 100644 --- a/analysis/reanalyze/src/FileDeps.mli +++ b/analysis/reanalyze/src/FileDeps.mli @@ -35,6 +35,22 @@ val freeze_builder : builder -> t val merge_all : builder list -> t (** Merge all builders into one immutable result. Order doesn't matter. *) +(** {2 Builder extraction for reactive merge} *) + +val builder_files : builder -> FileSet.t +(** Get files set from builder *) + +val builder_deps_to_list : builder -> (string * FileSet.t) list +(** Extract all deps as a list for reactive merge *) + +(** {2 Internal types (for ReactiveMerge)} *) + +module FileHash : Hashtbl.S with type key = string +(** File-keyed hashtable *) + +val create : files:FileSet.t -> deps:FileSet.t FileHash.t -> t +(** Create a FileDeps.t from files set and deps hashtable *) + (** {2 Read-only API for t - for analysis} *) val get_files : t -> FileSet.t diff --git a/analysis/reanalyze/src/ReactiveAnalysis.ml b/analysis/reanalyze/src/ReactiveAnalysis.ml index db54833dd4..48fe11b197 100644 --- a/analysis/reanalyze/src/ReactiveAnalysis.ml +++ b/analysis/reanalyze/src/ReactiveAnalysis.ml @@ -119,3 +119,36 @@ let process_files ~(collection : t) ~config:_ cmtFilePaths : all_files_result = (** Get collection length *) let length (collection : t) = ReactiveFileCollection.length collection + +(** Get the underlying reactive collection for composition. + Returns (path, file_data option) suitable for ReactiveMerge. *) +let to_file_data_collection (collection : t) : + (string, DceFileProcessing.file_data option) Reactive.t = + Reactive.flatMap + (ReactiveFileCollection.to_collection collection) + ~f:(fun path result_opt -> + match result_opt with + | Some {dce_data = Some data; _} -> [(path, Some data)] + | _ -> [(path, None)]) + () + +(** Iterate over all file_data in the collection *) +let iter_file_data (collection : t) (f : DceFileProcessing.file_data -> unit) : + unit = + ReactiveFileCollection.iter + (fun _path result_opt -> + match result_opt with + | Some {dce_data = Some data; _} -> f data + | _ -> ()) + collection + +(** Collect all exception results from the collection *) +let collect_exception_results (collection : t) : Exception.file_result list = + let results = ref [] in + ReactiveFileCollection.iter + (fun _path result_opt -> + match result_opt with + | Some {exception_data = Some data; _} -> results := data :: !results + | _ -> ()) + collection; + !results diff --git a/analysis/reanalyze/src/ReactiveMerge.ml b/analysis/reanalyze/src/ReactiveMerge.ml new file mode 100644 index 0000000000..5bd111d09a --- /dev/null +++ b/analysis/reanalyze/src/ReactiveMerge.ml @@ -0,0 +1,169 @@ +(** Reactive merge of per-file DCE data into global collections. + + Given a reactive collection of (path, file_data), this creates derived + reactive collections that automatically update when source files change. *) + +(** {1 Types} *) + +type t = { + decls: (Lexing.position, Decl.t) Reactive.t; + annotations: (Lexing.position, FileAnnotations.annotated_as) Reactive.t; + value_refs: (Lexing.position, PosSet.t) Reactive.t; + type_refs: (Lexing.position, PosSet.t) Reactive.t; + cross_file_items: (string, CrossFileItems.t) Reactive.t; + file_deps_map: (string, FileSet.t) Reactive.t; + files: (string, unit) Reactive.t; +} +(** All derived reactive collections from per-file data *) + +(** {1 Creation} *) + +let create (source : (string, DceFileProcessing.file_data option) Reactive.t) : + t = + (* Declarations: (pos, Decl.t) with last-write-wins *) + let decls = + Reactive.flatMap source + ~f:(fun _path file_data_opt -> + match file_data_opt with + | None -> [] + | Some file_data -> + Declarations.builder_to_list file_data.DceFileProcessing.decls) + () + in + + (* Annotations: (pos, annotated_as) with last-write-wins *) + let annotations = + Reactive.flatMap source + ~f:(fun _path file_data_opt -> + match file_data_opt with + | None -> [] + | Some file_data -> + FileAnnotations.builder_to_list file_data.DceFileProcessing.annotations) + () + in + + (* Value refs: (posTo, PosSet) with PosSet.union merge *) + let value_refs = + Reactive.flatMap source + ~f:(fun _path file_data_opt -> + match file_data_opt with + | None -> [] + | Some file_data -> + References.builder_value_refs_to_list file_data.DceFileProcessing.refs) + ~merge:PosSet.union () + in + + (* Type refs: (posTo, PosSet) with PosSet.union merge *) + let type_refs = + Reactive.flatMap source + ~f:(fun _path file_data_opt -> + match file_data_opt with + | None -> [] + | Some file_data -> + References.builder_type_refs_to_list file_data.DceFileProcessing.refs) + ~merge:PosSet.union () + in + + (* Cross-file items: (path, CrossFileItems.t) with merge by concatenation *) + let cross_file_items = + Reactive.flatMap source + ~f:(fun path file_data_opt -> + match file_data_opt with + | None -> [] + | Some file_data -> + let items = + CrossFileItems.builder_to_t file_data.DceFileProcessing.cross_file + in + [(path, items)]) + ~merge:(fun a b -> + CrossFileItems. + { + exception_refs = a.exception_refs @ b.exception_refs; + optional_arg_calls = a.optional_arg_calls @ b.optional_arg_calls; + function_refs = a.function_refs @ b.function_refs; + }) + () + in + + (* File deps map: (from_file, FileSet of to_files) with FileSet.union merge *) + let file_deps_map = + Reactive.flatMap source + ~f:(fun _path file_data_opt -> + match file_data_opt with + | None -> [] + | Some file_data -> + FileDeps.builder_deps_to_list file_data.DceFileProcessing.file_deps) + ~merge:FileSet.union () + in + + (* Files set: (path, ()) - just track which files exist *) + let files = + Reactive.flatMap source + ~f:(fun path file_data_opt -> + match file_data_opt with + | None -> [] + | Some file_data -> + (* Include the file and all files it references *) + let file_set = FileDeps.builder_files file_data.DceFileProcessing.file_deps in + let entries = FileSet.fold (fun f acc -> (f, ()) :: acc) file_set [] in + (path, ()) :: entries) + () + in + + {decls; annotations; value_refs; type_refs; cross_file_items; file_deps_map; files} + +(** {1 Conversion to solver-ready format} *) + +(** Convert reactive decls to Declarations.t for solver *) +let freeze_decls (t : t) : Declarations.t = + let result = PosHash.create 256 in + Reactive.iter (fun pos decl -> PosHash.replace result pos decl) t.decls; + Declarations.create_from_hashtbl result + +(** Convert reactive annotations to FileAnnotations.t for solver *) +let freeze_annotations (t : t) : FileAnnotations.t = + let result = PosHash.create 256 in + Reactive.iter (fun pos ann -> PosHash.replace result pos ann) t.annotations; + FileAnnotations.create_from_hashtbl result + +(** Convert reactive refs to References.t for solver *) +let freeze_refs (t : t) : References.t = + let value_refs = PosHash.create 256 in + let type_refs = PosHash.create 256 in + Reactive.iter + (fun pos refs -> PosHash.replace value_refs pos refs) + t.value_refs; + Reactive.iter (fun pos refs -> PosHash.replace type_refs pos refs) t.type_refs; + References.create ~value_refs ~type_refs + +(** Collect all cross-file items *) +let collect_cross_file_items (t : t) : CrossFileItems.t = + let exception_refs = ref [] in + let optional_arg_calls = ref [] in + let function_refs = ref [] in + Reactive.iter + (fun _path items -> + exception_refs := items.CrossFileItems.exception_refs @ !exception_refs; + optional_arg_calls := + items.CrossFileItems.optional_arg_calls @ !optional_arg_calls; + function_refs := items.CrossFileItems.function_refs @ !function_refs) + t.cross_file_items; + { + CrossFileItems.exception_refs = !exception_refs; + optional_arg_calls = !optional_arg_calls; + function_refs = !function_refs; + } + +(** Convert reactive file deps to FileDeps.t for solver *) +let freeze_file_deps (t : t) : FileDeps.t = + let files = + let result = ref FileSet.empty in + Reactive.iter (fun path () -> result := FileSet.add path !result) t.files; + !result + in + let deps = FileDeps.FileHash.create 256 in + Reactive.iter + (fun from_file to_files -> FileDeps.FileHash.replace deps from_file to_files) + t.file_deps_map; + FileDeps.create ~files ~deps + diff --git a/analysis/reanalyze/src/ReactiveMerge.mli b/analysis/reanalyze/src/ReactiveMerge.mli new file mode 100644 index 0000000000..03dd06bb44 --- /dev/null +++ b/analysis/reanalyze/src/ReactiveMerge.mli @@ -0,0 +1,60 @@ +(** Reactive merge of per-file DCE data into global collections. + + Given a reactive collection of (path, file_data), this creates derived + reactive collections that automatically update when source files change. + + {2 Example} + + {[ + (* Create reactive file collection *) + let files = ReactiveAnalysis.create ~config in + + (* Process files *) + ReactiveAnalysis.process_files ~collection:files ~config paths; + + (* Create reactive merge from processed file data *) + let merged = ReactiveMerge.create (ReactiveAnalysis.to_collection files) in + + (* Access derived collections *) + Reactive.iter (fun pos decl -> ...) merged.decls; + + (* Or freeze for solver *) + let decls = ReactiveMerge.freeze_decls merged in + ]} *) + +(** {1 Types} *) + +type t = { + decls: (Lexing.position, Decl.t) Reactive.t; + annotations: (Lexing.position, FileAnnotations.annotated_as) Reactive.t; + value_refs: (Lexing.position, PosSet.t) Reactive.t; + type_refs: (Lexing.position, PosSet.t) Reactive.t; + cross_file_items: (string, CrossFileItems.t) Reactive.t; + file_deps_map: (string, FileSet.t) Reactive.t; + files: (string, unit) Reactive.t; +} +(** All derived reactive collections from per-file data *) + +(** {1 Creation} *) + +val create : (string, DceFileProcessing.file_data option) Reactive.t -> t +(** Create reactive merge from a file data collection. + All derived collections update automatically when source changes. *) + +(** {1 Conversion to solver-ready format} *) + +val freeze_decls : t -> Declarations.t +(** Convert reactive decls to Declarations.t for solver *) + +val freeze_annotations : t -> FileAnnotations.t +(** Convert reactive annotations to FileAnnotations.t for solver *) + +val freeze_refs : t -> References.t +(** Convert reactive refs to References.t for solver *) + +val collect_cross_file_items : t -> CrossFileItems.t +(** Collect all cross-file items *) + +val freeze_file_deps : t -> FileDeps.t +(** Convert reactive file deps to FileDeps.t for solver *) + diff --git a/analysis/reanalyze/src/Reanalyze.ml b/analysis/reanalyze/src/Reanalyze.ml index 98f3b51e7b..0b0359c050 100644 --- a/analysis/reanalyze/src/Reanalyze.ml +++ b/analysis/reanalyze/src/Reanalyze.ml @@ -244,11 +244,17 @@ let shuffle_list lst = done; Array.to_list arr -let runAnalysis ~dce_config ~cmtRoot ~reactive_collection = +let runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge = (* Map: process each file -> list of file_data *) let {dce_data_list; exception_results} = processCmtFiles ~config:dce_config ~cmtRoot ~reactive_collection in + (* Get exception results from reactive collection if available *) + let exception_results = + match reactive_collection with + | Some collection -> ReactiveAnalysis.collect_exception_results collection + | None -> exception_results + in (* Optionally shuffle for order-independence testing *) let dce_data_list = if !Cli.testShuffle then ( @@ -264,31 +270,44 @@ let runAnalysis ~dce_config ~cmtRoot ~reactive_collection = (* Merging phase: combine all builders -> immutable data *) let annotations, decls, cross_file, refs, file_deps = Timing.time_phase `Merging (fun () -> - let annotations = - FileAnnotations.merge_all - (dce_data_list - |> List.map (fun fd -> fd.DceFileProcessing.annotations)) - in - let decls = - Declarations.merge_all - (dce_data_list - |> List.map (fun fd -> fd.DceFileProcessing.decls)) - in - let cross_file = - CrossFileItems.merge_all - (dce_data_list - |> List.map (fun fd -> fd.DceFileProcessing.cross_file)) + (* Use reactive merge if available, otherwise list-based merge *) + let annotations, decls, cross_file = + match reactive_merge with + | Some merged -> + ( ReactiveMerge.freeze_annotations merged, + ReactiveMerge.freeze_decls merged, + ReactiveMerge.collect_cross_file_items merged ) + | None -> + ( FileAnnotations.merge_all + (dce_data_list + |> List.map (fun fd -> fd.DceFileProcessing.annotations)), + Declarations.merge_all + (dce_data_list + |> List.map (fun fd -> fd.DceFileProcessing.decls)), + CrossFileItems.merge_all + (dce_data_list + |> List.map (fun fd -> fd.DceFileProcessing.cross_file)) ) in - (* Merge refs and file_deps into builders for cross-file items processing *) + (* Merge refs and file_deps into builders for cross-file items processing. + This still needs the file_data iteration for post-processing. *) let refs_builder = References.create_builder () in let file_deps_builder = FileDeps.create_builder () in - dce_data_list - |> List.iter (fun fd -> - References.merge_into_builder ~from:fd.DceFileProcessing.refs - ~into:refs_builder; - FileDeps.merge_into_builder - ~from:fd.DceFileProcessing.file_deps - ~into:file_deps_builder); + (match reactive_collection with + | Some collection -> + ReactiveAnalysis.iter_file_data collection (fun fd -> + References.merge_into_builder ~from:fd.DceFileProcessing.refs + ~into:refs_builder; + FileDeps.merge_into_builder + ~from:fd.DceFileProcessing.file_deps + ~into:file_deps_builder) + | None -> + dce_data_list + |> List.iter (fun fd -> + References.merge_into_builder + ~from:fd.DceFileProcessing.refs ~into:refs_builder; + FileDeps.merge_into_builder + ~from:fd.DceFileProcessing.file_deps + ~into:file_deps_builder)); (* Compute type-label dependencies after merge *) DeadType.process_type_label_dependencies ~config:dce_config ~decls ~refs:refs_builder; @@ -364,11 +383,22 @@ let runAnalysisAndReport ~cmtRoot = if !Cli.reactive then Some (ReactiveAnalysis.create ~config:dce_config) else None in + (* Create reactive merge once if reactive mode is enabled. + This automatically updates when reactive_collection changes. *) + let reactive_merge = + match reactive_collection with + | Some collection -> + let file_data_collection = + ReactiveAnalysis.to_file_data_collection collection + in + Some (ReactiveMerge.create file_data_collection) + | None -> None + in for run = 1 to numRuns do Timing.reset (); if numRuns > 1 && !Cli.timing then Printf.eprintf "\n=== Run %d/%d ===\n%!" run numRuns; - runAnalysis ~dce_config ~cmtRoot ~reactive_collection; + runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge; if run = numRuns then ( (* Only report on last run *) Log_.Stats.report ~config:dce_config; diff --git a/analysis/reanalyze/src/References.ml b/analysis/reanalyze/src/References.ml index 632dbd7861..60fd7bfafd 100644 --- a/analysis/reanalyze/src/References.ml +++ b/analysis/reanalyze/src/References.ml @@ -50,6 +50,18 @@ let freeze_builder (builder : builder) : t = (* Zero-copy freeze - builder should not be used after this *) {value_refs = builder.value_refs; type_refs = builder.type_refs} +(* ===== Builder extraction for reactive merge ===== *) + +let builder_value_refs_to_list (builder : builder) : + (Lexing.position * PosSet.t) list = + PosHash.fold (fun pos refs acc -> (pos, refs) :: acc) builder.value_refs [] + +let builder_type_refs_to_list (builder : builder) : + (Lexing.position * PosSet.t) list = + PosHash.fold (fun pos refs acc -> (pos, refs) :: acc) builder.type_refs [] + +let create ~value_refs ~type_refs : t = {value_refs; type_refs} + (* ===== Read-only API ===== *) let find_value_refs (t : t) pos = findSet t.value_refs pos diff --git a/analysis/reanalyze/src/References.mli b/analysis/reanalyze/src/References.mli index 05228b7b8e..5776ca615c 100644 --- a/analysis/reanalyze/src/References.mli +++ b/analysis/reanalyze/src/References.mli @@ -32,6 +32,17 @@ val merge_all : builder list -> t val freeze_builder : builder -> t (** Convert builder to immutable t. Builder should not be used after this. *) +(** {2 Builder extraction for reactive merge} *) + +val builder_value_refs_to_list : builder -> (Lexing.position * PosSet.t) list +(** Extract all value refs as a list for reactive merge *) + +val builder_type_refs_to_list : builder -> (Lexing.position * PosSet.t) list +(** Extract all type refs as a list for reactive merge *) + +val create : value_refs:PosSet.t PosHash.t -> type_refs:PosSet.t PosHash.t -> t +(** Create a References.t from hashtables *) + (** {2 Read-only API for t - for solver} *) val find_value_refs : t -> Lexing.position -> PosSet.t From f7066d213d0767932a471c779192f7ca5a27d779 Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Tue, 16 Dec 2025 07:18:47 +0100 Subject: [PATCH 7/9] Add reactive combinators (lookup, join) and reactive type/exception deps - Add Reactive.lookup: single-key subscription from a collection - Add Reactive.join: reactive hash join between two collections - Add ReactiveTypeDeps: type-label dependencies via reactive join - Add ReactiveExceptionRefs: exception ref resolution via reactive join - Update ARCHITECTURE.md with generated SVG diagrams - Add diagram sources (.mmd) for batch pipeline, reactive pipeline, delta propagation The reactive modules express cross-file dependency resolution declaratively: - ReactiveTypeDeps uses flatMap to index decls by path, then join to connect impl<->intf - ReactiveExceptionRefs uses join to resolve exception paths to declaration locations --- analysis/reactive/src/Reactive.ml | 232 ++++++++++++++++ analysis/reactive/src/Reactive.mli | 44 +++ analysis/reactive/test/ReactiveTest.ml | 217 +++++++++++++++ analysis/reanalyze/ARCHITECTURE.md | 132 ++++----- .../reanalyze/diagrams/batch-pipeline.mmd | 53 ++++ .../reanalyze/diagrams/batch-pipeline.svg | 1 + .../reanalyze/diagrams/delta-propagation.mmd | 26 ++ .../reanalyze/diagrams/delta-propagation.svg | 1 + .../reanalyze/diagrams/reactive-pipeline.mmd | 62 +++++ .../reanalyze/diagrams/reactive-pipeline.svg | 1 + .../reanalyze/src/ReactiveExceptionRefs.ml | 82 ++++++ .../reanalyze/src/ReactiveExceptionRefs.mli | 54 ++++ analysis/reanalyze/src/ReactiveTypeDeps.ml | 255 ++++++++++++++++++ analysis/reanalyze/src/ReactiveTypeDeps.mli | 55 ++++ analysis/src/DceCommand.ml | 3 +- 15 files changed, 1141 insertions(+), 77 deletions(-) create mode 100644 analysis/reanalyze/diagrams/batch-pipeline.mmd create mode 100644 analysis/reanalyze/diagrams/batch-pipeline.svg create mode 100644 analysis/reanalyze/diagrams/delta-propagation.mmd create mode 100644 analysis/reanalyze/diagrams/delta-propagation.svg create mode 100644 analysis/reanalyze/diagrams/reactive-pipeline.mmd create mode 100644 analysis/reanalyze/diagrams/reactive-pipeline.svg create mode 100644 analysis/reanalyze/src/ReactiveExceptionRefs.ml create mode 100644 analysis/reanalyze/src/ReactiveExceptionRefs.mli create mode 100644 analysis/reanalyze/src/ReactiveTypeDeps.ml create mode 100644 analysis/reanalyze/src/ReactiveTypeDeps.mli diff --git a/analysis/reactive/src/Reactive.ml b/analysis/reactive/src/Reactive.ml index cb8b29ebd2..05f2b6f29f 100644 --- a/analysis/reactive/src/Reactive.ml +++ b/analysis/reactive/src/Reactive.ml @@ -136,3 +136,235 @@ let flatMap (source : ('k1, 'v1) t) ~f ?merge () : ('k2, 'v2) t = get = (fun k -> Hashtbl.find_opt target k); length = (fun () -> Hashtbl.length target); } + +(** {1 Lookup} *) + +(** Lookup a single key reactively. + Returns a collection with that single entry that updates when the + source's value at that key changes. + + This is useful for creating reactive subscriptions to specific keys. *) +let lookup (source : ('k, 'v) t) ~key : ('k, 'v) t = + let current : ('k, 'v option) Hashtbl.t = Hashtbl.create 1 in + let subscribers : (('k, 'v) delta -> unit) list ref = ref [] in + + let emit delta = List.iter (fun h -> h delta) !subscribers in + + let handle_delta delta = + match delta with + | Set (k, v) when k = key -> + Hashtbl.replace current key (Some v); + emit (Set (key, v)) + | Remove k when k = key -> + Hashtbl.remove current key; + emit (Remove key) + | _ -> () (* Ignore deltas for other keys *) + in + + (* Subscribe to source *) + source.subscribe handle_delta; + + (* Initialize with current value *) + (match source.get key with + | Some v -> Hashtbl.replace current key (Some v) + | None -> ()); + + { + subscribe = (fun handler -> subscribers := handler :: !subscribers); + iter = + (fun f -> + match Hashtbl.find_opt current key with + | Some (Some v) -> f key v + | _ -> ()); + get = + (fun k -> + if k = key then + match Hashtbl.find_opt current key with + | Some v -> v + | None -> None + else None); + length = + (fun () -> + match Hashtbl.find_opt current key with + | Some (Some _) -> 1 + | _ -> 0); + } + +(** {1 Join} *) + +(** Join two collections: for each entry in [left], look up a key in [right]. + + [key_of] extracts the lookup key from each left entry. + [f] combines left entry with looked-up right value (if present). + + When either collection changes, affected entries are recomputed. + This is more efficient than nested flatMap for join patterns. *) +let join (left : ('k1, 'v1) t) (right : ('k2, 'v2) t) + ~(key_of : 'k1 -> 'v1 -> 'k2) ~(f : 'k1 -> 'v1 -> 'v2 option -> ('k3 * 'v3) list) + ?merge () : ('k3, 'v3) t = + let merge_fn = + match merge with + | Some m -> m + | None -> fun _ v -> v + in + (* Track: for each left key, which right key was looked up *) + let left_to_right_key : ('k1, 'k2) Hashtbl.t = Hashtbl.create 64 in + (* Track: for each right key, which left keys depend on it *) + let right_key_to_left_keys : ('k2, 'k1 list) Hashtbl.t = Hashtbl.create 64 in + (* Current left entries *) + let left_entries : ('k1, 'v1) Hashtbl.t = Hashtbl.create 64 in + (* Provenance and contributions for output *) + let provenance : ('k1, 'k3 list) Hashtbl.t = Hashtbl.create 64 in + let contributions : ('k3, ('k1, 'v3) Hashtbl.t) Hashtbl.t = + Hashtbl.create 256 + in + let target : ('k3, 'v3) Hashtbl.t = Hashtbl.create 256 in + let subscribers : (('k3, 'v3) delta -> unit) list ref = ref [] in + + let emit delta = List.iter (fun h -> h delta) !subscribers in + + let recompute_target k3 = + match Hashtbl.find_opt contributions k3 with + | None -> + Hashtbl.remove target k3; + Some (Remove k3) + | Some contribs when Hashtbl.length contribs = 0 -> + Hashtbl.remove contributions k3; + Hashtbl.remove target k3; + Some (Remove k3) + | Some contribs -> + let values = Hashtbl.fold (fun _ v acc -> v :: acc) contribs [] in + let merged = + match values with + | [] -> assert false + | [v] -> v + | v :: rest -> List.fold_left merge_fn v rest + in + Hashtbl.replace target k3 merged; + Some (Set (k3, merged)) + in + + let remove_left_contributions k1 = + match Hashtbl.find_opt provenance k1 with + | None -> [] + | Some target_keys -> + Hashtbl.remove provenance k1; + target_keys + |> List.iter (fun k3 -> + match Hashtbl.find_opt contributions k3 with + | None -> () + | Some contribs -> Hashtbl.remove contribs k1); + target_keys + in + + let add_left_contributions k1 entries = + let target_keys = List.map fst entries in + Hashtbl.replace provenance k1 target_keys; + entries + |> List.iter (fun (k3, v3) -> + let contribs = + match Hashtbl.find_opt contributions k3 with + | Some c -> c + | None -> + let c = Hashtbl.create 4 in + Hashtbl.replace contributions k3 c; + c + in + Hashtbl.replace contribs k1 v3); + target_keys + in + + let process_left_entry k1 v1 = + let old_affected = remove_left_contributions k1 in + (* Update right key tracking *) + (match Hashtbl.find_opt left_to_right_key k1 with + | Some old_k2 -> + Hashtbl.remove left_to_right_key k1; + (match Hashtbl.find_opt right_key_to_left_keys old_k2 with + | Some keys -> + Hashtbl.replace right_key_to_left_keys old_k2 + (List.filter (fun k -> k <> k1) keys) + | None -> ()) + | None -> ()); + let k2 = key_of k1 v1 in + Hashtbl.replace left_to_right_key k1 k2; + let keys = + match Hashtbl.find_opt right_key_to_left_keys k2 with + | Some ks -> ks + | None -> [] + in + Hashtbl.replace right_key_to_left_keys k2 (k1 :: keys); + (* Compute output *) + let right_val = right.get k2 in + let new_entries = f k1 v1 right_val in + let new_affected = add_left_contributions k1 new_entries in + let all_affected = old_affected @ new_affected in + let seen = Hashtbl.create (List.length all_affected) in + all_affected + |> List.filter_map (fun k3 -> + if Hashtbl.mem seen k3 then None + else ( + Hashtbl.replace seen k3 (); + recompute_target k3)) + in + + let remove_left_entry k1 = + Hashtbl.remove left_entries k1; + let affected = remove_left_contributions k1 in + (* Clean up tracking *) + (match Hashtbl.find_opt left_to_right_key k1 with + | Some k2 -> + Hashtbl.remove left_to_right_key k1; + (match Hashtbl.find_opt right_key_to_left_keys k2 with + | Some keys -> + Hashtbl.replace right_key_to_left_keys k2 + (List.filter (fun k -> k <> k1) keys) + | None -> ()) + | None -> ()); + affected |> List.filter_map recompute_target + in + + let handle_left_delta delta = + let downstream = + match delta with + | Set (k1, v1) -> + Hashtbl.replace left_entries k1 v1; + process_left_entry k1 v1 + | Remove k1 -> remove_left_entry k1 + in + List.iter emit downstream + in + + let handle_right_delta delta = + (* When right changes, reprocess all left entries that depend on it *) + let downstream = + match delta with + | Set (k2, _) | Remove k2 -> + (match Hashtbl.find_opt right_key_to_left_keys k2 with + | None -> [] + | Some left_keys -> + left_keys + |> List.concat_map (fun k1 -> + match Hashtbl.find_opt left_entries k1 with + | Some v1 -> process_left_entry k1 v1 + | None -> [])) + in + List.iter emit downstream + in + + (* Subscribe to both sources *) + left.subscribe handle_left_delta; + right.subscribe handle_right_delta; + + (* Initialize from existing entries *) + left.iter (fun k1 v1 -> + Hashtbl.replace left_entries k1 v1; + let deltas = process_left_entry k1 v1 in + List.iter emit deltas); + + { + subscribe = (fun handler -> subscribers := handler :: !subscribers); + iter = (fun f -> Hashtbl.iter f target); + get = (fun k -> Hashtbl.find_opt target k); + length = (fun () -> Hashtbl.length target); + } diff --git a/analysis/reactive/src/Reactive.mli b/analysis/reactive/src/Reactive.mli index 8b1b3e5a31..5894b23bf4 100644 --- a/analysis/reactive/src/Reactive.mli +++ b/analysis/reactive/src/Reactive.mli @@ -73,3 +73,47 @@ val flatMap : Defaults to last-write-wins. Derived collections can be further composed with [flatMap]. *) + +(** {1 Lookup} *) + +val lookup : ('k, 'v) t -> key:'k -> ('k, 'v) t +(** [lookup source ~key] creates a reactive subscription to a single key. + + Returns a collection containing at most one entry (the value at [key]). + When [source]'s value at [key] changes, the lookup collection updates. + + Useful for reactive point queries. *) + +(** {1 Join} *) + +val join : + ('k1, 'v1) t -> + ('k2, 'v2) t -> + key_of:('k1 -> 'v1 -> 'k2) -> + f:('k1 -> 'v1 -> 'v2 option -> ('k3 * 'v3) list) -> + ?merge:('v3 -> 'v3 -> 'v3) -> + unit -> + ('k3, 'v3) t +(** [join left right ~key_of ~f ()] joins two collections. + + For each entry [(k1, v1)] in [left]: + - Computes lookup key [k2 = key_of k1 v1] + - Looks up [k2] in [right] to get [v2_opt] + - Produces entries via [f k1 v1 v2_opt] + + When either [left] or [right] changes, affected entries are recomputed. + This is the reactive equivalent of a hash join. + + {2 Example: Exception refs lookup} + + {[ + (* exception_refs: (path, loc_from) *) + (* decl_by_path: (path, decl list) *) + let resolved = Reactive.join exception_refs decl_by_path + ~key_of:(fun path _loc -> path) + ~f:(fun path loc decls_opt -> + match decls_opt with + | Some decls -> decls |> List.map (fun d -> (d.pos, loc)) + | None -> []) + () + ]} *) diff --git a/analysis/reactive/test/ReactiveTest.ml b/analysis/reactive/test/ReactiveTest.ml index 740d11f941..35ed4cc319 100644 --- a/analysis/reactive/test/ReactiveTest.ml +++ b/analysis/reactive/test/ReactiveTest.ml @@ -328,6 +328,220 @@ let test_file_collection () = Printf.printf "PASSED\n\n" +let test_lookup () = + Printf.printf "=== Test: lookup (reactive single-key subscription) ===\n"; + + let data : (string, int) Hashtbl.t = Hashtbl.create 16 in + let subscribers : ((string, int) delta -> unit) list ref = ref [] in + + let source : (string, int) t = + { + subscribe = (fun h -> subscribers := h :: !subscribers); + iter = (fun f -> Hashtbl.iter f data); + get = (fun k -> Hashtbl.find_opt data k); + length = (fun () -> Hashtbl.length data); + } + in + + let emit delta = + apply_delta data delta; + List.iter (fun h -> h delta) !subscribers + in + + (* Create lookup for key "foo" *) + let foo_lookup = lookup source ~key:"foo" in + + (* Initially empty *) + assert (length foo_lookup = 0); + assert (get foo_lookup "foo" = None); + + (* Set foo=42 *) + emit (Set ("foo", 42)); + Printf.printf "After Set(foo, 42): lookup has %d entries\n" (length foo_lookup); + assert (length foo_lookup = 1); + assert (get foo_lookup "foo" = Some 42); + + (* Set bar=100 (different key, lookup shouldn't change) *) + emit (Set ("bar", 100)); + Printf.printf "After Set(bar, 100): lookup still has %d entries\n" + (length foo_lookup); + assert (length foo_lookup = 1); + assert (get foo_lookup "foo" = Some 42); + + (* Update foo=99 *) + emit (Set ("foo", 99)); + Printf.printf "After Set(foo, 99): lookup value updated\n"; + assert (get foo_lookup "foo" = Some 99); + + (* Track subscription updates *) + let updates = ref [] in + foo_lookup.subscribe (fun delta -> updates := delta :: !updates); + + emit (Set ("foo", 1)); + emit (Set ("bar", 2)); + emit (Remove "foo"); + + Printf.printf "Subscription received %d updates (expected 2: Set+Remove for foo)\n" + (List.length !updates); + assert (List.length !updates = 2); + + Printf.printf "PASSED\n\n" + +let test_join () = + Printf.printf "=== Test: join (reactive lookup/join) ===\n"; + + (* Left collection: exception refs (path -> loc_from) *) + let left_data : (string, int) Hashtbl.t = Hashtbl.create 16 in + let left_subs : ((string, int) delta -> unit) list ref = ref [] in + let left : (string, int) t = + { + subscribe = (fun h -> left_subs := h :: !left_subs); + iter = (fun f -> Hashtbl.iter f left_data); + get = (fun k -> Hashtbl.find_opt left_data k); + length = (fun () -> Hashtbl.length left_data); + } + in + let emit_left delta = + apply_delta left_data delta; + List.iter (fun h -> h delta) !left_subs + in + + (* Right collection: decl index (path -> decl_pos) *) + let right_data : (string, int) Hashtbl.t = Hashtbl.create 16 in + let right_subs : ((string, int) delta -> unit) list ref = ref [] in + let right : (string, int) t = + { + subscribe = (fun h -> right_subs := h :: !right_subs); + iter = (fun f -> Hashtbl.iter f right_data); + get = (fun k -> Hashtbl.find_opt right_data k); + length = (fun () -> Hashtbl.length right_data); + } + in + let emit_right delta = + apply_delta right_data delta; + List.iter (fun h -> h delta) !right_subs + in + + (* Join: for each (path, loc_from) in left, look up path in right *) + let joined = + join left right + ~key_of:(fun path _loc_from -> path) + ~f:(fun _path loc_from decl_pos_opt -> + match decl_pos_opt with + | Some decl_pos -> + (* Produce (decl_pos, loc_from) pairs *) + [(decl_pos, loc_from)] + | None -> []) + () + in + + (* Initially empty *) + assert (length joined = 0); + + (* Add declaration at path "A" with pos 100 *) + emit_right (Set ("A", 100)); + Printf.printf "After right Set(A, 100): joined=%d\n" (length joined); + assert (length joined = 0); (* No left entries yet *) + + (* Add exception ref at path "A" from loc 1 *) + emit_left (Set ("A", 1)); + Printf.printf "After left Set(A, 1): joined=%d\n" (length joined); + assert (length joined = 1); + assert (get joined 100 = Some 1); (* decl_pos 100 -> loc_from 1 *) + + (* Add another exception ref at path "B" (no matching decl) *) + emit_left (Set ("B", 2)); + Printf.printf "After left Set(B, 2): joined=%d (B has no decl)\n" + (length joined); + assert (length joined = 1); + + (* Add declaration for path "B" *) + emit_right (Set ("B", 200)); + Printf.printf "After right Set(B, 200): joined=%d\n" (length joined); + assert (length joined = 2); + assert (get joined 200 = Some 2); + + (* Update right: change B's decl_pos *) + emit_right (Set ("B", 201)); + Printf.printf "After right Set(B, 201): joined=%d\n" (length joined); + assert (length joined = 2); + assert (get joined 200 = None); (* Old key gone *) + assert (get joined 201 = Some 2); (* New key has the value *) + + (* Remove left entry A *) + emit_left (Remove "A"); + Printf.printf "After left Remove(A): joined=%d\n" (length joined); + assert (length joined = 1); + assert (get joined 100 = None); + + Printf.printf "PASSED\n\n" + +let test_join_with_merge () = + Printf.printf "=== Test: join with merge ===\n"; + + (* Multiple left entries can map to same right key *) + let left_data : (int, string) Hashtbl.t = Hashtbl.create 16 in + let left_subs : ((int, string) delta -> unit) list ref = ref [] in + let left : (int, string) t = + { + subscribe = (fun h -> left_subs := h :: !left_subs); + iter = (fun f -> Hashtbl.iter f left_data); + get = (fun k -> Hashtbl.find_opt left_data k); + length = (fun () -> Hashtbl.length left_data); + } + in + let emit_left delta = + apply_delta left_data delta; + List.iter (fun h -> h delta) !left_subs + in + + let right_data : (string, int) Hashtbl.t = Hashtbl.create 16 in + let right_subs : ((string, int) delta -> unit) list ref = ref [] in + let right : (string, int) t = + { + subscribe = (fun h -> right_subs := h :: !right_subs); + iter = (fun f -> Hashtbl.iter f right_data); + get = (fun k -> Hashtbl.find_opt right_data k); + length = (fun () -> Hashtbl.length right_data); + } + in + let emit_right delta = + apply_delta right_data delta; + List.iter (fun h -> h delta) !right_subs + in + + (* Join with merge: all entries produce to key 0 *) + let joined = + join left right + ~key_of:(fun _id path -> path) (* Look up by path *) + ~f:(fun _id _path value_opt -> + match value_opt with + | Some v -> [(0, v)] (* All contribute to key 0 *) + | None -> []) + ~merge:( + ) (* Sum values *) + () + in + + emit_right (Set ("X", 10)); + emit_left (Set (1, "X")); + emit_left (Set (2, "X")); + + Printf.printf "Two entries looking up X (value 10): sum=%d\n" + (get joined 0 |> Option.value ~default:0); + assert (get joined 0 = Some 20); (* 10 + 10 *) + + emit_right (Set ("X", 5)); + Printf.printf "After right changes to 5: sum=%d\n" + (get joined 0 |> Option.value ~default:0); + assert (get joined 0 = Some 10); (* 5 + 5 *) + + emit_left (Remove 1); + Printf.printf "After removing one left entry: sum=%d\n" + (get joined 0 |> Option.value ~default:0); + assert (get joined 0 = Some 5); (* Only one left *) + + Printf.printf "PASSED\n\n" + let () = Printf.printf "\n====== Reactive Collection Tests ======\n\n"; test_flatmap_basic (); @@ -335,4 +549,7 @@ let () = test_composition (); test_flatmap_on_existing_data (); test_file_collection (); + test_lookup (); + test_join (); + test_join_with_merge (); Printf.printf "All tests passed!\n" diff --git a/analysis/reanalyze/ARCHITECTURE.md b/analysis/reanalyze/ARCHITECTURE.md index 1d341ae52e..9644b4f1f9 100644 --- a/analysis/reanalyze/ARCHITECTURE.md +++ b/analysis/reanalyze/ARCHITECTURE.md @@ -21,82 +21,9 @@ This design enables: ## Pipeline Diagram -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ DCE ANALYSIS PIPELINE │ -└─────────────────────────────────────────────────────────────────────────────┘ - - ┌─────────────┐ - │ DceConfig.t │ (explicit configuration) - └──────┬──────┘ - │ - ╔════════════════════════════════╪════════════════════════════════════════╗ - ║ PHASE 1: MAP (per-file) │ ║ - ╠════════════════════════════════╪════════════════════════════════════════╣ - ║ ▼ ║ - ║ ┌──────────┐ process_cmt_file ┌───────────────────────────────┐ ║ - ║ │ file1.cmt├──────────────────────►│ file_data { │ ║ - ║ └──────────┘ │ annotations: builder │ ║ - ║ ┌──────────┐ process_cmt_file │ decls: builder │ ║ - ║ │ file2.cmt├──────────────────────►│ refs: builder │ ║ - ║ └──────────┘ │ file_deps: builder │ ║ - ║ ┌──────────┐ process_cmt_file │ cross_file: builder │ ║ - ║ │ file3.cmt├──────────────────────►│ } │ ║ - ║ └──────────┘ └───────────────────────────────┘ ║ - ║ │ ║ - ║ Local mutable state OK │ file_data list ║ - ╚══════════════════════════════════════════════════╪══════════════════════╝ - │ - ╔══════════════════════════════════════════════════╪══════════════════════╗ - ║ PHASE 2: MERGE (combine builders) │ ║ - ╠══════════════════════════════════════════════════╪══════════════════════╣ - ║ ▼ ║ - ║ ┌─────────────────────────────────────────────────────────────────┐ ║ - ║ │ FileAnnotations.merge_all → annotations: FileAnnotations.t │ ║ - ║ │ Declarations.merge_all → decls: Declarations.t │ ║ - ║ │ References.merge_all → refs: References.t │ ║ - ║ │ FileDeps.merge_all → file_deps: FileDeps.t │ ║ - ║ │ CrossFileItems.merge_all → cross_file: CrossFileItems.t │ ║ - ║ │ │ ║ - ║ │ CrossFileItems.compute_optional_args_state │ ║ - ║ │ → optional_args_state: State.t │ ║ - ║ └─────────────────────────────────────────────────────────────────┘ ║ - ║ │ ║ - ║ Pure functions, immutable output │ merged data ║ - ╚══════════════════════════════════════════════════╪══════════════════════╝ - │ - ╔══════════════════════════════════════════════════╪══════════════════════╗ - ║ PHASE 3: SOLVE (pure deadness computation) │ ║ - ╠══════════════════════════════════════════════════╪══════════════════════╣ - ║ ▼ ║ - ║ ┌─────────────────────────────────────────────────────────────────┐ ║ - ║ │ Pass 1: DeadCommon.solveDead (core deadness) │ ║ - ║ │ ~annotations ~decls ~refs ~file_deps ~config │ ║ - ║ │ → AnalysisResult.t (dead/live status resolved) │ ║ - ║ │ │ ║ - ║ │ Pass 2: Optional args analysis (liveness-aware) │ ║ - ║ │ CrossFileItems.compute_optional_args_state ~is_live │ ║ - ║ │ DeadOptionalArgs.check (only for live decls) │ ║ - ║ │ → AnalysisResult.t { issues: Issue.t list } │ ║ - ║ └─────────────────────────────────────────────────────────────────┘ ║ - ║ │ ║ - ║ Pure functions: immutable in → immutable out │ issues ║ - ╚══════════════════════════════════════════════════╪══════════════════════╝ - │ - ╔══════════════════════════════════════════════════╪══════════════════════╗ - ║ PHASE 4: REPORT (side effects at the edge) │ ║ - ╠══════════════════════════════════════════════════╪══════════════════════╣ - ║ ▼ ║ - ║ ┌─────────────────────────────────────────────────────────────────┐ ║ - ║ │ AnalysisResult.get_issues │ ║ - ║ │ |> List.iter (fun issue -> Log_.warning ~loc issue.description) │ ║ - ║ │ │ ║ - ║ │ (Optional: EmitJson for JSON output) │ ║ - ║ └─────────────────────────────────────────────────────────────────┘ ║ - ║ ║ - ║ Side effects only here: logging, JSON output ║ - ╚════════════════════════════════════════════════════════════════════════╝ -``` +> **Source**: [`diagrams/batch-pipeline.mmd`](diagrams/batch-pipeline.mmd) + +![Batch Pipeline](diagrams/batch-pipeline.svg) --- @@ -208,6 +135,59 @@ The key insight: **immutable data structures enable safe incremental updates** - --- +## Reactive Pipelines + +The reactive layer (`analysis/reactive/`) provides delta-based incremental updates. Instead of re-running entire phases, changes propagate automatically through derived collections. + +### Core Reactive Primitives + +| Primitive | Description | +|-----------|-------------| +| `Reactive.t ('k, 'v)` | Universal reactive collection interface | +| `subscribe` | Register for delta notifications | +| `iter` | Iterate current entries | +| `get` | Lookup by key | +| `delta` | Change notification: `Set (key, value)` or `Remove key` | +| `flatMap` | Transform collection, optionally merge same-key values | +| `join` | Hash join two collections with automatic updates | +| `lookup` | Single-key subscription | +| `ReactiveFileCollection` | File-backed collection with change detection | + +### Reactive Analysis Pipeline + +> **Source**: [`diagrams/reactive-pipeline.mmd`](diagrams/reactive-pipeline.mmd) + +![Reactive Pipeline](diagrams/reactive-pipeline.svg) + +### Delta Propagation + +> **Source**: [`diagrams/delta-propagation.mmd`](diagrams/delta-propagation.mmd) + +![Delta Propagation](diagrams/delta-propagation.svg) + +### Key Benefits + +| Aspect | Batch Pipeline | Reactive Pipeline | +|--------|----------------|-------------------| +| File change | Re-process all files | Re-process changed file only | +| Merge | Re-merge all data | Update affected entries only | +| Type deps | Rebuild entire index | Update affected paths only | +| Exception refs | Re-resolve all | Re-resolve affected only | +| Memory | O(N) per phase | O(N) total, shared | + +### Reactive Modules + +| Module | Responsibility | +|--------|---------------| +| `Reactive` | Core primitives: `flatMap`, `join`, `lookup`, delta types | +| `ReactiveFileCollection` | File-backed collection with change detection | +| `ReactiveAnalysis` | CMT processing with file caching | +| `ReactiveMerge` | Derives decls, annotations, refs from file_data | +| `ReactiveTypeDeps` | Type-label dependency resolution via join | +| `ReactiveExceptionRefs` | Exception ref resolution via join | + +--- + ## Testing **Order-independence test**: Run with `-test-shuffle` flag to randomize file processing order. The test (`make test-reanalyze-order-independence`) verifies that shuffled runs produce identical output. diff --git a/analysis/reanalyze/diagrams/batch-pipeline.mmd b/analysis/reanalyze/diagrams/batch-pipeline.mmd new file mode 100644 index 0000000000..cc2c1bde94 --- /dev/null +++ b/analysis/reanalyze/diagrams/batch-pipeline.mmd @@ -0,0 +1,53 @@ +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#e8f4fd', 'primaryTextColor': '#1a1a1a', 'primaryBorderColor': '#4a90d9', 'lineColor': '#4a90d9'}}}%% +flowchart TB + subgraph Phase1["PHASE 1: MAP (per-file)"] + CMT1["file1.cmt"] + CMT2["file2.cmt"] + CMT3["file3.cmt"] + PROC["process_cmt_file"] + FD1["file_data₁"] + FD2["file_data₂"] + FD3["file_data₃"] + + CMT1 --> PROC + CMT2 --> PROC + CMT3 --> PROC + PROC --> FD1 + PROC --> FD2 + PROC --> FD3 + end + + subgraph Phase2["PHASE 2: MERGE"] + MERGE["merge_all"] + MERGED["merged {
annotations,
decls,
refs,
file_deps
}"] + + FD1 --> MERGE + FD2 --> MERGE + FD3 --> MERGE + MERGE --> MERGED + end + + subgraph Phase3["PHASE 3: SOLVE"] + SOLVE["solveDead"] + RESULT["AnalysisResult {
issues: Issue.t list
}"] + + MERGED --> SOLVE + SOLVE --> RESULT + end + + subgraph Phase4["PHASE 4: REPORT"] + REPORT["Log_.warning"] + + RESULT --> REPORT + end + + classDef phase1 fill:#e8f4fd,stroke:#4a90d9 + classDef phase2 fill:#f0f7e6,stroke:#6b8e23 + classDef phase3 fill:#fff5e6,stroke:#d4a574 + classDef phase4 fill:#ffe6e6,stroke:#cc6666 + + class CMT1,CMT2,CMT3,PROC,FD1,FD2,FD3 phase1 + class MERGE,MERGED phase2 + class SOLVE,RESULT phase3 + class REPORT phase4 + diff --git a/analysis/reanalyze/diagrams/batch-pipeline.svg b/analysis/reanalyze/diagrams/batch-pipeline.svg new file mode 100644 index 0000000000..5877ce5c5a --- /dev/null +++ b/analysis/reanalyze/diagrams/batch-pipeline.svg @@ -0,0 +1 @@ +

PHASE 4: REPORT

PHASE 3: SOLVE

PHASE 2: MERGE

PHASE 1: MAP (per-file)

file1.cmt

file2.cmt

file3.cmt

process_cmt_file

file_data₁

file_data₂

file_data₃

merge_all

merged {
annotations,
decls,
refs,
file_deps
}

solveDead

AnalysisResult {
issues: Issue.t list
}

Log_.warning

\ No newline at end of file diff --git a/analysis/reanalyze/diagrams/delta-propagation.mmd b/analysis/reanalyze/diagrams/delta-propagation.mmd new file mode 100644 index 0000000000..94f6d39c17 --- /dev/null +++ b/analysis/reanalyze/diagrams/delta-propagation.mmd @@ -0,0 +1,26 @@ +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#e8f4fd', 'primaryTextColor': '#1a1a1a', 'primaryBorderColor': '#4a90d9', 'lineColor': '#4a90d9'}}}%% +sequenceDiagram + participant FS as File System + participant RFC as ReactiveFileCollection + participant FD as file_data + participant DECLS as decls + participant DBP as decl_by_path + participant REFS as refs + participant SOLVER as Solver + + Note over FS,SOLVER: File.cmt changes on disk + + FS->>RFC: mtime/size changed + RFC->>RFC: read_cmt + process + RFC->>FD: Set("File.res", new_file_data) + + FD->>DECLS: Remove(old_pos₁), Remove(old_pos₂), ... + FD->>DECLS: Set(new_pos₁, decl₁), Set(new_pos₂, decl₂), ... + + DECLS->>DBP: Update affected paths only + DBP->>DBP: Recalculate merged lists + + DBP->>REFS: Set(pos, updated_refs) + + Note over SOLVER: Solver sees updated refs immediately + diff --git a/analysis/reanalyze/diagrams/delta-propagation.svg b/analysis/reanalyze/diagrams/delta-propagation.svg new file mode 100644 index 0000000000..06bd47c050 --- /dev/null +++ b/analysis/reanalyze/diagrams/delta-propagation.svg @@ -0,0 +1 @@ +Solverrefsdecl_by_pathdeclsfile_dataReactiveFileCollectionFile SystemSolverrefsdecl_by_pathdeclsfile_dataReactiveFileCollectionFile SystemFile.cmt changes on diskSolver sees updated refs immediatelymtime/size changedread_cmt + processSet("File.res", new_file_data)Remove(old_pos₁), Remove(old_pos₂), ...Set(new_pos₁, decl₁), Set(new_pos₂, decl₂), ...Update affected paths onlyRecalculate merged listsSet(pos, updated_refs) \ No newline at end of file diff --git a/analysis/reanalyze/diagrams/reactive-pipeline.mmd b/analysis/reanalyze/diagrams/reactive-pipeline.mmd new file mode 100644 index 0000000000..c5d228cbd0 --- /dev/null +++ b/analysis/reanalyze/diagrams/reactive-pipeline.mmd @@ -0,0 +1,62 @@ +%%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#e8f4fd', 'primaryTextColor': '#1a1a1a', 'primaryBorderColor': '#4a90d9', 'lineColor': '#4a90d9', 'secondaryColor': '#f0f7e6', 'tertiaryColor': '#fff5e6'}}}%% +flowchart TB + subgraph FileLayer["File Layer"] + RFC[("ReactiveFileCollection
(file change detection)")] + end + + subgraph FileData["Per-File Data"] + FD["file_data
(path → file_data option)"] + end + + subgraph Extracted["Extracted Collections"] + DECLS["decls
(pos → Decl.t)"] + ANNOT["annotations
(pos → annotation)"] + EXCREF["exception_refs
(path → loc_from)"] + end + + subgraph TypeDeps["ReactiveTypeDeps"] + DBP["decl_by_path
(path → decl list)"] + SPR["same_path_refs
(pos → PosSet)"] + CFR["cross_file_refs
(pos → PosSet)"] + ATR["all_type_refs
(pos → PosSet)"] + end + + subgraph ExcDeps["ReactiveExceptionRefs"] + EXCDECL["exception_decls
(path → loc)"] + RESOLVED["resolved_refs
(pos → PosSet)"] + end + + subgraph Output["Combined Output"] + REFS["All refs
→ Ready for solver"] + end + + RFC -->|"process_files
(detect changes)"| FD + FD -->|"flatMap
(extract)"| DECLS + FD -->|"flatMap
(extract)"| ANNOT + FD -->|"flatMap
(extract)"| EXCREF + + DECLS -->|"flatMap"| DBP + DBP -->|"flatMap"| SPR + DBP -->|"join"| CFR + SPR --> ATR + CFR --> ATR + + DECLS -->|"flatMap"| EXCDECL + EXCDECL -->|"join"| RESOLVED + EXCREF -->|"join"| RESOLVED + + ATR --> REFS + RESOLVED --> REFS + + classDef fileLayer fill:#e8f4fd,stroke:#4a90d9,stroke-width:2px + classDef extracted fill:#f0f7e6,stroke:#6b8e23,stroke-width:2px + classDef typeDeps fill:#fff5e6,stroke:#d4a574,stroke-width:2px + classDef excDeps fill:#f5e6ff,stroke:#9966cc,stroke-width:2px + classDef output fill:#e6ffe6,stroke:#2e8b2e,stroke-width:2px + + class RFC,FD fileLayer + class DECLS,ANNOT,EXCREF extracted + class DBP,SPR,CFR,ATR typeDeps + class EXCDECL,RESOLVED excDeps + class REFS output + diff --git a/analysis/reanalyze/diagrams/reactive-pipeline.svg b/analysis/reanalyze/diagrams/reactive-pipeline.svg new file mode 100644 index 0000000000..dfeacb4ba0 --- /dev/null +++ b/analysis/reanalyze/diagrams/reactive-pipeline.svg @@ -0,0 +1 @@ +

Combined Output

ReactiveExceptionRefs

ReactiveTypeDeps

Extracted Collections

Per-File Data

File Layer

process_files
(detect changes)

flatMap
(extract)

flatMap
(extract)

flatMap
(extract)

flatMap

flatMap

join

flatMap

join

join

ReactiveFileCollection
(file change detection)

file_data
(path → file_data option)

decls
(pos → Decl.t)

annotations
(pos → annotation)

exception_refs
(path → loc_from)

decl_by_path
(path → decl list)

same_path_refs
(pos → PosSet)

cross_file_refs
(pos → PosSet)

all_type_refs
(pos → PosSet)

exception_decls
(path → loc)

resolved_refs
(pos → PosSet)

All refs
→ Ready for solver

\ No newline at end of file diff --git a/analysis/reanalyze/src/ReactiveExceptionRefs.ml b/analysis/reanalyze/src/ReactiveExceptionRefs.ml new file mode 100644 index 0000000000..d2bf89da2c --- /dev/null +++ b/analysis/reanalyze/src/ReactiveExceptionRefs.ml @@ -0,0 +1,82 @@ +(** Reactive exception reference resolution. + + Expresses exception ref resolution as a reactive join: + - exception_refs: (path, loc_from) from CrossFileItems + - exception_decls: (path, loc_to) indexed from Declarations + - result: value refs (pos_to, pos_from) + + When declarations or exception_refs change, only affected refs update. *) + +(** {1 Types} *) + +type t = { + exception_decls: (DcePath.t, Location.t) Reactive.t; + resolved_refs: (Lexing.position, PosSet.t) Reactive.t; +} +(** Reactive exception ref collections *) + +(** {1 Creation} *) + +(** Create reactive exception refs from decls and cross-file exception refs. + + [decls] is the reactive declarations collection. + [exception_refs] is the reactive collection of (path, loc_from) from CrossFileItems. *) +let create ~(decls : (Lexing.position, Decl.t) Reactive.t) + ~(exception_refs : (DcePath.t, Location.t) Reactive.t) : t = + (* Step 1: Index exception declarations by path *) + let exception_decls = + Reactive.flatMap decls + ~f:(fun _pos (decl : Decl.t) -> + match decl.Decl.declKind with + | Exception -> + let loc : Location.t = + { + Location.loc_start = decl.pos; + loc_end = decl.posEnd; + loc_ghost = false; + } + in + [(decl.path, loc)] + | _ -> []) + () (* Last-write-wins is fine since paths should be unique *) + in + + (* Step 2: Join exception_refs with exception_decls *) + let resolved_refs = + Reactive.join exception_refs exception_decls + ~key_of:(fun path _loc_from -> path) + ~f:(fun _path loc_from loc_to_opt -> + match loc_to_opt with + | Some loc_to -> + (* Add value reference: pos_to -> pos_from *) + [(loc_to.Location.loc_start, PosSet.singleton loc_from.Location.loc_start)] + | None -> []) + ~merge:PosSet.union () + in + + {exception_decls; resolved_refs} + +(** {1 Freezing} *) + +(** Add all resolved exception refs to a References.builder *) +let add_to_refs_builder (t : t) ~(refs : References.builder) : unit = + Reactive.iter + (fun posTo posFromSet -> + PosSet.iter + (fun posFrom -> References.add_value_ref refs ~posTo ~posFrom) + posFromSet) + t.resolved_refs + +(** Add file dependencies for resolved refs *) +let add_to_file_deps_builder (t : t) ~(file_deps : FileDeps.builder) : unit = + Reactive.iter + (fun posTo posFromSet -> + PosSet.iter + (fun posFrom -> + let from_file = posFrom.Lexing.pos_fname in + let to_file = posTo.Lexing.pos_fname in + if from_file <> to_file then + FileDeps.add_dep file_deps ~from_file ~to_file) + posFromSet) + t.resolved_refs + diff --git a/analysis/reanalyze/src/ReactiveExceptionRefs.mli b/analysis/reanalyze/src/ReactiveExceptionRefs.mli new file mode 100644 index 0000000000..2e7f583497 --- /dev/null +++ b/analysis/reanalyze/src/ReactiveExceptionRefs.mli @@ -0,0 +1,54 @@ +(** Reactive exception reference resolution. + + Expresses exception ref resolution as a reactive join. + When declarations or exception_refs change, only affected refs update. + + {2 Pipeline} + + {[ + decls exception_refs + | | + | flatMap | + ↓ | + exception_decls | + (path → loc) | + ↘ ↙ + join + ↓ + resolved_refs + (pos → PosSet) + ]} + + {2 Example} + + {[ + let exc_refs = ReactiveExceptionRefs.create + ~decls:merged.decls + ~exception_refs:(flatMap cross_file ~f:extract_exception_refs ()) + in + ReactiveExceptionRefs.add_to_refs_builder exc_refs ~refs:my_refs_builder + ]} *) + +(** {1 Types} *) + +type t +(** Reactive exception ref collections *) + +(** {1 Creation} *) + +val create : + decls:(Lexing.position, Decl.t) Reactive.t -> + exception_refs:(DcePath.t, Location.t) Reactive.t -> + t +(** Create reactive exception refs from decls and cross-file exception refs. + + When the source collections change, resolved refs automatically update. *) + +(** {1 Freezing} *) + +val add_to_refs_builder : t -> refs:References.builder -> unit +(** Add all resolved exception refs to a References.builder. *) + +val add_to_file_deps_builder : t -> file_deps:FileDeps.builder -> unit +(** Add file dependencies for resolved refs. *) + diff --git a/analysis/reanalyze/src/ReactiveTypeDeps.ml b/analysis/reanalyze/src/ReactiveTypeDeps.ml new file mode 100644 index 0000000000..bb9aa03931 --- /dev/null +++ b/analysis/reanalyze/src/ReactiveTypeDeps.ml @@ -0,0 +1,255 @@ +(** Reactive type-label dependencies. + + Expresses the type-label dependency computation as a reactive pipeline: + 1. decls -> decl_by_path (index by path) + 2. decl_by_path -> same_path_refs (connect duplicates at same path) + 3. decl_by_path + impl_decls -> cross_file_refs (connect impl<->intf) + + When declarations change, only affected refs are recomputed. *) + +(** {1 Helper types} *) + +type decl_info = { + pos: Lexing.position; + pos_end: Lexing.position; + path: DcePath.t; + is_interface: bool; +} +(** Simplified decl info for type-label processing *) + +let decl_to_info (decl : Decl.t) : decl_info option = + match decl.declKind with + | RecordLabel | VariantCase -> + let is_interface = + match List.rev decl.path with + | [] -> true + | moduleNameTag :: _ -> ( + try (moduleNameTag |> Name.toString).[0] <> '+' with _ -> true) + in + Some {pos = decl.pos; pos_end = decl.posEnd; path = decl.path; is_interface} + | _ -> None + +(** {1 Reactive Collections} *) + +type t = { + decl_by_path: (DcePath.t, decl_info list) Reactive.t; + same_path_refs: (Lexing.position, PosSet.t) Reactive.t; + cross_file_refs: (Lexing.position, PosSet.t) Reactive.t; + all_type_refs: (Lexing.position, PosSet.t) Reactive.t; +} +(** All reactive collections for type-label dependencies *) + +(** Create reactive type-label dependency collections from a decls collection *) +let create ~(decls : (Lexing.position, Decl.t) Reactive.t) + ~(report_types_dead_only_in_interface : bool) : t = + (* Step 1: Index decls by path *) + let decl_by_path = + Reactive.flatMap decls + ~f:(fun _pos decl -> + match decl_to_info decl with + | Some info -> [(info.path, [info])] + | None -> []) + ~merge:List.append () + in + + (* Step 2: Same-path refs - connect all decls at the same path *) + let same_path_refs = + Reactive.flatMap decl_by_path + ~f:(fun _path decls -> + match decls with + | [] | [_] -> [] + | first :: rest -> + (* Connect each decl to the first one (and vice-versa if needed) *) + rest + |> List.concat_map (fun other -> + let refs = + [(first.pos, PosSet.singleton other.pos); + (other.pos, PosSet.singleton first.pos)] + in + if report_types_dead_only_in_interface then + (* Only first -> other *) + [(other.pos, PosSet.singleton first.pos)] + else refs)) + ~merge:PosSet.union () + in + + (* Step 3: Cross-file refs - connect impl decls to intf decls *) + (* First, extract impl decls that need to look up intf *) + let impl_decls = + Reactive.flatMap decls + ~f:(fun _pos decl -> + match decl_to_info decl with + | Some info when not info.is_interface -> ( + match info.path with + | [] -> [] + | typeLabelName :: pathToType -> + (* Try two intf paths *) + let path_1 = pathToType |> DcePath.moduleToInterface in + let path_2 = path_1 |> DcePath.typeToInterface in + let intf_path1 = typeLabelName :: path_1 in + let intf_path2 = typeLabelName :: path_2 in + [(info.pos, (info, intf_path1, intf_path2))]) + | _ -> []) + () + in + + (* Join impl decls with decl_by_path to find intf *) + let impl_to_intf_refs = + Reactive.join impl_decls decl_by_path + ~key_of:(fun _pos (_, intf_path1, _) -> intf_path1) + ~f:(fun _pos (info, _intf_path1, intf_path2) intf_decls_opt -> + match intf_decls_opt with + | Some (intf_info :: _) -> + (* Found at path1, connect impl <-> intf *) + if report_types_dead_only_in_interface then + [(intf_info.pos, PosSet.singleton info.pos)] + else + [(info.pos, PosSet.singleton intf_info.pos); + (intf_info.pos, PosSet.singleton info.pos)] + | _ -> + (* Try path2 - need second join, but for now return placeholder *) + (* We'll handle path2 with a separate join below *) + [(info.pos, (intf_path2, info))] |> List.filter_map (fun _ -> None)) + ~merge:PosSet.union () + in + + (* Second join for path2 fallback *) + let impl_needing_path2 = + Reactive.join impl_decls decl_by_path + ~key_of:(fun _pos (_, intf_path1, _) -> intf_path1) + ~f:(fun pos (info, _intf_path1, intf_path2) intf_decls_opt -> + match intf_decls_opt with + | Some (_ :: _) -> [] (* Found at path1, skip *) + | _ -> [(pos, (info, intf_path2))]) + () + in + + let impl_to_intf_refs_path2 = + Reactive.join impl_needing_path2 decl_by_path + ~key_of:(fun _pos (_, intf_path2) -> intf_path2) + ~f:(fun _pos (info, _) intf_decls_opt -> + match intf_decls_opt with + | Some (intf_info :: _) -> + if report_types_dead_only_in_interface then + [(intf_info.pos, PosSet.singleton info.pos)] + else + [(info.pos, PosSet.singleton intf_info.pos); + (intf_info.pos, PosSet.singleton info.pos)] + | _ -> []) + ~merge:PosSet.union () + in + + (* Also handle intf -> impl direction *) + let intf_decls = + Reactive.flatMap decls + ~f:(fun _pos decl -> + match decl_to_info decl with + | Some info when info.is_interface -> ( + match info.path with + | [] -> [] + | typeLabelName :: pathToType -> + let impl_path = typeLabelName :: DcePath.moduleToImplementation pathToType in + [(info.pos, (info, impl_path))]) + | _ -> []) + () + in + + let intf_to_impl_refs = + Reactive.join intf_decls decl_by_path + ~key_of:(fun _pos (_, impl_path) -> impl_path) + ~f:(fun _pos (info, _) impl_decls_opt -> + match impl_decls_opt with + | Some (impl_info :: _) -> + if report_types_dead_only_in_interface then + [(info.pos, PosSet.singleton impl_info.pos)] + else + [(impl_info.pos, PosSet.singleton info.pos); + (info.pos, PosSet.singleton impl_info.pos)] + | _ -> []) + ~merge:PosSet.union () + in + + (* Combine all cross-file refs *) + let cross_file_refs = + Reactive.flatMap impl_to_intf_refs + ~f:(fun pos refs -> [(pos, refs)]) + ~merge:PosSet.union () + in + (* Merge in path2 refs *) + let cross_file_refs = + Reactive.flatMap impl_to_intf_refs_path2 + ~f:(fun pos refs -> [(pos, refs)]) + ~merge:PosSet.union () + |> fun refs2 -> + Reactive.flatMap cross_file_refs + ~f:(fun pos refs -> + let additional = + match Reactive.get refs2 pos with + | Some r -> r + | None -> PosSet.empty + in + [(pos, PosSet.union refs additional)]) + ~merge:PosSet.union () + in + (* Merge in intf->impl refs *) + let cross_file_refs = + Reactive.flatMap intf_to_impl_refs + ~f:(fun pos refs -> [(pos, refs)]) + ~merge:PosSet.union () + |> fun refs3 -> + Reactive.flatMap cross_file_refs + ~f:(fun pos refs -> + let additional = + match Reactive.get refs3 pos with + | Some r -> r + | None -> PosSet.empty + in + [(pos, PosSet.union refs additional)]) + ~merge:PosSet.union () + in + + (* Step 4: Combine same-path and cross-file refs *) + let all_type_refs = + Reactive.flatMap same_path_refs + ~f:(fun pos refs -> + let cross = + match Reactive.get cross_file_refs pos with + | Some r -> r + | None -> PosSet.empty + in + [(pos, PosSet.union refs cross)]) + ~merge:PosSet.union () + in + (* Also include cross-file refs that don't have same-path refs *) + let all_type_refs = + Reactive.flatMap cross_file_refs + ~f:(fun pos refs -> + match Reactive.get same_path_refs pos with + | Some _ -> [] (* Already included above *) + | None -> [(pos, refs)]) + ~merge:PosSet.union () + |> fun extra_refs -> + Reactive.flatMap all_type_refs + ~f:(fun pos refs -> + let extra = + match Reactive.get extra_refs pos with + | Some r -> r + | None -> PosSet.empty + in + [(pos, PosSet.union refs extra)]) + ~merge:PosSet.union () + in + + {decl_by_path; same_path_refs; cross_file_refs; all_type_refs} + +(** {1 Freezing for solver} *) + +(** Add all type refs to a References.builder *) +let add_to_refs_builder (t : t) ~(refs : References.builder) : unit = + Reactive.iter + (fun posTo posFromSet -> + PosSet.iter + (fun posFrom -> References.add_type_ref refs ~posTo ~posFrom) + posFromSet) + t.all_type_refs + diff --git a/analysis/reanalyze/src/ReactiveTypeDeps.mli b/analysis/reanalyze/src/ReactiveTypeDeps.mli new file mode 100644 index 0000000000..7c9e19c77d --- /dev/null +++ b/analysis/reanalyze/src/ReactiveTypeDeps.mli @@ -0,0 +1,55 @@ +(** Reactive type-label dependencies. + + Expresses the type-label dependency computation as a reactive pipeline. + When declarations change, only affected refs are recomputed. + + {2 Pipeline} + + {[ + decls + |> (flatMap) decl_by_path (* index by path *) + |> (flatMap) same_path_refs (* connect same-path duplicates *) + | + +-> (join) cross_file_refs (* connect impl <-> intf *) + | + +-> all_type_refs (* combined refs *) + ]} + + {2 Example} + + {[ + let reactive_decls = ReactiveMerge.create ... in + let type_deps = ReactiveTypeDeps.create + ~decls:reactive_decls.decls + ~report_types_dead_only_in_interface:true + in + (* Type refs update automatically when decls change *) + ReactiveTypeDeps.add_to_refs_builder type_deps ~refs:my_refs_builder + ]} *) + +(** {1 Types} *) + +type t +(** Reactive type-label dependency collections *) + +(** {1 Creation} *) + +val create : + decls:(Lexing.position, Decl.t) Reactive.t -> + report_types_dead_only_in_interface:bool -> + t +(** Create reactive type-label dependencies from a decls collection. + + When the [decls] collection changes, type refs automatically update. + + [report_types_dead_only_in_interface] controls whether refs are bidirectional + (false) or only intf->impl (true). *) + +(** {1 Freezing} *) + +val add_to_refs_builder : t -> refs:References.builder -> unit +(** Add all computed type refs to a References.builder. + + Call this after processing files to get the current type refs. + The builder will contain all type-label dependency refs. *) + diff --git a/analysis/src/DceCommand.ml b/analysis/src/DceCommand.ml index 6ff03172ae..66ddb6f06f 100644 --- a/analysis/src/DceCommand.ml +++ b/analysis/src/DceCommand.ml @@ -1,6 +1,7 @@ let command () = Reanalyze.RunConfig.dce (); let dce_config = Reanalyze.DceConfig.current () in - Reanalyze.runAnalysis ~dce_config ~cmtRoot:None ~reactive_collection:None; + Reanalyze.runAnalysis ~dce_config ~cmtRoot:None ~reactive_collection:None + ~reactive_merge:None; let issues = !Reanalyze.Log_.Stats.issues in Printf.printf "issues:%d\n" (List.length issues) From d686fd64a55667ae25f9fc3b43e9f2c8e4738b0d Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Tue, 16 Dec 2025 08:17:37 +0100 Subject: [PATCH 8/9] Fix reactive mode @genType handling and update architecture docs Bug fix: - ReactiveFileCollection.process now receives (path, raw) instead of just (raw) - ReactiveAnalysis passes cmtFilePath to DceFileProcessing.process_cmt_file - This fixes @genType annotations being incorrectly collected when .cmti exists Previously, reactive mode passed cmtFilePath:"" which made the .cmti existence check always return false, causing @genType annotations to be collected even for files with interface files (where they should be ignored). Architecture updates: - Updated reactive-pipeline.mmd with accurate node names - Added legend table explaining all diagram symbols - Diagram now shows: VR/TR (per-file refs), all ReactiveTypeDeps fields, ReactiveExceptionRefs flow, and combined output Helper functions added for debugging (kept as useful): - Declarations.length, FileAnnotations.length/iter - References.value_refs_length/type_refs_length - FileDeps.files_count/deps_count --- analysis/reactive/src/Reactive.ml | 17 +- .../reactive/src/ReactiveFileCollection.ml | 4 +- .../reactive/src/ReactiveFileCollection.mli | 8 +- analysis/reactive/test/ReactiveTest.ml | 33 +++- analysis/reanalyze/ARCHITECTURE.md | 21 +++ .../reanalyze/diagrams/reactive-pipeline.mmd | 64 ++++--- .../reanalyze/diagrams/reactive-pipeline.svg | 2 +- analysis/reanalyze/src/Declarations.ml | 2 + analysis/reanalyze/src/Declarations.mli | 2 + analysis/reanalyze/src/FileAnnotations.ml | 4 + analysis/reanalyze/src/FileAnnotations.mli | 5 +- analysis/reanalyze/src/FileDeps.ml | 7 +- analysis/reanalyze/src/FileDeps.mli | 6 + analysis/reanalyze/src/ReactiveAnalysis.ml | 7 +- .../reanalyze/src/ReactiveExceptionRefs.ml | 6 +- .../reanalyze/src/ReactiveExceptionRefs.mli | 6 +- analysis/reanalyze/src/ReactiveMerge.ml | 113 +++++++++-- analysis/reanalyze/src/ReactiveMerge.mli | 4 +- analysis/reanalyze/src/ReactiveTypeDeps.ml | 176 +++++++----------- analysis/reanalyze/src/ReactiveTypeDeps.mli | 19 +- analysis/reanalyze/src/Reanalyze.ml | 75 +++++--- analysis/reanalyze/src/References.ml | 4 + analysis/reanalyze/src/References.mli | 4 + 23 files changed, 374 insertions(+), 215 deletions(-) diff --git a/analysis/reactive/src/Reactive.ml b/analysis/reactive/src/Reactive.ml index 05f2b6f29f..11e4836161 100644 --- a/analysis/reactive/src/Reactive.ml +++ b/analysis/reactive/src/Reactive.ml @@ -200,8 +200,9 @@ let lookup (source : ('k, 'v) t) ~key : ('k, 'v) t = When either collection changes, affected entries are recomputed. This is more efficient than nested flatMap for join patterns. *) let join (left : ('k1, 'v1) t) (right : ('k2, 'v2) t) - ~(key_of : 'k1 -> 'v1 -> 'k2) ~(f : 'k1 -> 'v1 -> 'v2 option -> ('k3 * 'v3) list) - ?merge () : ('k3, 'v3) t = + ~(key_of : 'k1 -> 'v1 -> 'k2) + ~(f : 'k1 -> 'v1 -> 'v2 option -> ('k3 * 'v3) list) ?merge () : ('k3, 'v3) t + = let merge_fn = match merge with | Some m -> m @@ -278,9 +279,9 @@ let join (left : ('k1, 'v1) t) (right : ('k2, 'v2) t) let old_affected = remove_left_contributions k1 in (* Update right key tracking *) (match Hashtbl.find_opt left_to_right_key k1 with - | Some old_k2 -> + | Some old_k2 -> ( Hashtbl.remove left_to_right_key k1; - (match Hashtbl.find_opt right_key_to_left_keys old_k2 with + match Hashtbl.find_opt right_key_to_left_keys old_k2 with | Some keys -> Hashtbl.replace right_key_to_left_keys old_k2 (List.filter (fun k -> k <> k1) keys) @@ -313,9 +314,9 @@ let join (left : ('k1, 'v1) t) (right : ('k2, 'v2) t) let affected = remove_left_contributions k1 in (* Clean up tracking *) (match Hashtbl.find_opt left_to_right_key k1 with - | Some k2 -> + | Some k2 -> ( Hashtbl.remove left_to_right_key k1; - (match Hashtbl.find_opt right_key_to_left_keys k2 with + match Hashtbl.find_opt right_key_to_left_keys k2 with | Some keys -> Hashtbl.replace right_key_to_left_keys k2 (List.filter (fun k -> k <> k1) keys) @@ -339,8 +340,8 @@ let join (left : ('k1, 'v1) t) (right : ('k2, 'v2) t) (* When right changes, reprocess all left entries that depend on it *) let downstream = match delta with - | Set (k2, _) | Remove k2 -> - (match Hashtbl.find_opt right_key_to_left_keys k2 with + | Set (k2, _) | Remove k2 -> ( + match Hashtbl.find_opt right_key_to_left_keys k2 with | None -> [] | Some left_keys -> left_keys diff --git a/analysis/reactive/src/ReactiveFileCollection.ml b/analysis/reactive/src/ReactiveFileCollection.ml index 88f9a77265..f634468197 100644 --- a/analysis/reactive/src/ReactiveFileCollection.ml +++ b/analysis/reactive/src/ReactiveFileCollection.ml @@ -16,7 +16,7 @@ let file_changed ~old_id ~new_id = type ('raw, 'v) internal = { cache: (string, file_id * 'v) Hashtbl.t; read_file: string -> 'raw; - process: 'raw -> 'v; + process: string -> 'raw -> 'v; (* path -> raw -> value *) mutable subscribers: ((string, 'v) Reactive.delta -> unit) list; } (** Internal state for file collection *) @@ -61,7 +61,7 @@ let process_if_changed t path = false (* unchanged *) | _ -> let raw = t.internal.read_file path in - let value = t.internal.process raw in + let value = t.internal.process path raw in Hashtbl.replace t.internal.cache path (new_id, value); emit t (Reactive.Set (path, value)); true (* changed *) diff --git a/analysis/reactive/src/ReactiveFileCollection.mli b/analysis/reactive/src/ReactiveFileCollection.mli index 3730c11d70..95a0ca9ef8 100644 --- a/analysis/reactive/src/ReactiveFileCollection.mli +++ b/analysis/reactive/src/ReactiveFileCollection.mli @@ -8,7 +8,7 @@ (* Create file collection *) let files = ReactiveFileCollection.create ~read_file:Cmt_format.read_cmt - ~process:(fun cmt -> extract_data cmt) + ~process:(fun path cmt -> extract_data path cmt) (* Compose with flatMap *) let decls = Reactive.flatMap (ReactiveFileCollection.to_collection files) @@ -27,8 +27,10 @@ type ('raw, 'v) t (** {1 Creation} *) -val create : read_file:(string -> 'raw) -> process:('raw -> 'v) -> ('raw, 'v) t -(** Create a new file collection. *) +val create : + read_file:(string -> 'raw) -> process:(string -> 'raw -> 'v) -> ('raw, 'v) t +(** Create a new file collection. + [process path raw] receives the file path and raw content to produce the value. *) (** {1 Composition} *) diff --git a/analysis/reactive/test/ReactiveTest.ml b/analysis/reactive/test/ReactiveTest.ml index 35ed4cc319..bdd6fc488f 100644 --- a/analysis/reactive/test/ReactiveTest.ml +++ b/analysis/reactive/test/ReactiveTest.ml @@ -244,7 +244,8 @@ let test_file_collection () = (* Create file collection: file -> word count map *) let files = - ReactiveFileCollection.create ~read_file:read_lines ~process:(fun lines -> + ReactiveFileCollection.create ~read_file:read_lines + ~process:(fun _path lines -> (* Count words within this file *) let counts = ref StringMap.empty in lines @@ -357,7 +358,8 @@ let test_lookup () = (* Set foo=42 *) emit (Set ("foo", 42)); - Printf.printf "After Set(foo, 42): lookup has %d entries\n" (length foo_lookup); + Printf.printf "After Set(foo, 42): lookup has %d entries\n" + (length foo_lookup); assert (length foo_lookup = 1); assert (get foo_lookup "foo" = Some 42); @@ -381,7 +383,8 @@ let test_lookup () = emit (Set ("bar", 2)); emit (Remove "foo"); - Printf.printf "Subscription received %d updates (expected 2: Set+Remove for foo)\n" + Printf.printf + "Subscription received %d updates (expected 2: Set+Remove for foo)\n" (List.length !updates); assert (List.length !updates = 2); @@ -441,13 +444,17 @@ let test_join () = (* Add declaration at path "A" with pos 100 *) emit_right (Set ("A", 100)); Printf.printf "After right Set(A, 100): joined=%d\n" (length joined); - assert (length joined = 0); (* No left entries yet *) + assert (length joined = 0); + + (* No left entries yet *) (* Add exception ref at path "A" from loc 1 *) emit_left (Set ("A", 1)); Printf.printf "After left Set(A, 1): joined=%d\n" (length joined); assert (length joined = 1); - assert (get joined 100 = Some 1); (* decl_pos 100 -> loc_from 1 *) + assert (get joined 100 = Some 1); + + (* decl_pos 100 -> loc_from 1 *) (* Add another exception ref at path "B" (no matching decl) *) emit_left (Set ("B", 2)); @@ -465,8 +472,11 @@ let test_join () = emit_right (Set ("B", 201)); Printf.printf "After right Set(B, 201): joined=%d\n" (length joined); assert (length joined = 2); - assert (get joined 200 = None); (* Old key gone *) - assert (get joined 201 = Some 2); (* New key has the value *) + assert (get joined 200 = None); + (* Old key gone *) + assert (get joined 201 = Some 2); + + (* New key has the value *) (* Remove left entry A *) emit_left (Remove "A"); @@ -528,18 +538,21 @@ let test_join_with_merge () = Printf.printf "Two entries looking up X (value 10): sum=%d\n" (get joined 0 |> Option.value ~default:0); - assert (get joined 0 = Some 20); (* 10 + 10 *) + assert (get joined 0 = Some 20); + (* 10 + 10 *) emit_right (Set ("X", 5)); Printf.printf "After right changes to 5: sum=%d\n" (get joined 0 |> Option.value ~default:0); - assert (get joined 0 = Some 10); (* 5 + 5 *) + assert (get joined 0 = Some 10); + (* 5 + 5 *) emit_left (Remove 1); Printf.printf "After removing one left entry: sum=%d\n" (get joined 0 |> Option.value ~default:0); - assert (get joined 0 = Some 5); (* Only one left *) + assert (get joined 0 = Some 5); + (* Only one left *) Printf.printf "PASSED\n\n" let () = diff --git a/analysis/reanalyze/ARCHITECTURE.md b/analysis/reanalyze/ARCHITECTURE.md index 9644b4f1f9..1f7d7a0d72 100644 --- a/analysis/reanalyze/ARCHITECTURE.md +++ b/analysis/reanalyze/ARCHITECTURE.md @@ -159,6 +159,27 @@ The reactive layer (`analysis/reactive/`) provides delta-based incremental updat ![Reactive Pipeline](diagrams/reactive-pipeline.svg) +**Legend:** + +| Symbol | Collection | Type | +|--------|-----------|------| +| **RFC** | `ReactiveFileCollection` | File change detection | +| **FD** | `file_data` | `path → file_data option` | +| **D** | `decls` | `pos → Decl.t` | +| **A** | `annotations` | `pos → annotation` | +| **VR** | `value_refs` | `pos → PosSet` (per-file) | +| **TR** | `type_refs` | `pos → PosSet` (per-file) | +| **CFI** | `cross_file_items` | `path → CrossFileItems.t` | +| **DBP** | `decl_by_path` | `path → decl_info list` | +| **SPR** | `same_path_refs` | Same-path duplicates | +| **I2I** | `impl_to_intf_refs` | Impl → Interface links | +| **I2I₂** | `impl_to_intf_refs_path2` | Impl → Interface (path2) | +| **I→I** | `intf_to_impl_refs` | Interface → Impl links | +| **ER** | `exception_refs` | Exception references | +| **ED** | `exception_decls` | Exception declarations | +| **RR** | `resolved_refs` | Resolved exception refs | +| **REFS** | Output | Combined `References.t` | + ### Delta Propagation > **Source**: [`diagrams/delta-propagation.mmd`](diagrams/delta-propagation.mmd) diff --git a/analysis/reanalyze/diagrams/reactive-pipeline.mmd b/analysis/reanalyze/diagrams/reactive-pipeline.mmd index c5d228cbd0..67cd539389 100644 --- a/analysis/reanalyze/diagrams/reactive-pipeline.mmd +++ b/analysis/reanalyze/diagrams/reactive-pipeline.mmd @@ -1,51 +1,63 @@ %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#e8f4fd', 'primaryTextColor': '#1a1a1a', 'primaryBorderColor': '#4a90d9', 'lineColor': '#4a90d9', 'secondaryColor': '#f0f7e6', 'tertiaryColor': '#fff5e6'}}}%% flowchart TB subgraph FileLayer["File Layer"] - RFC[("ReactiveFileCollection
(file change detection)")] + RFC[("RFC")] end subgraph FileData["Per-File Data"] - FD["file_data
(path → file_data option)"] + FD["FD"] end - subgraph Extracted["Extracted Collections"] - DECLS["decls
(pos → Decl.t)"] - ANNOT["annotations
(pos → annotation)"] - EXCREF["exception_refs
(path → loc_from)"] + subgraph Extracted["Extracted"] + DECLS["D"] + ANNOT["A"] + VREFS["VR"] + TREFS["TR"] + CFI["CFI"] end subgraph TypeDeps["ReactiveTypeDeps"] - DBP["decl_by_path
(path → decl list)"] - SPR["same_path_refs
(pos → PosSet)"] - CFR["cross_file_refs
(pos → PosSet)"] - ATR["all_type_refs
(pos → PosSet)"] + DBP["DBP"] + SPR["SPR"] + I2I["I2I"] + I2I2["I2I₂"] + INT2IMP["I→I"] end subgraph ExcDeps["ReactiveExceptionRefs"] - EXCDECL["exception_decls
(path → loc)"] - RESOLVED["resolved_refs
(pos → PosSet)"] + EXCREF["ER"] + EXCDECL["ED"] + RESOLVED["RR"] end - subgraph Output["Combined Output"] - REFS["All refs
→ Ready for solver"] + subgraph Output["Output"] + REFS["REFS"] end - RFC -->|"process_files
(detect changes)"| FD - FD -->|"flatMap
(extract)"| DECLS - FD -->|"flatMap
(extract)"| ANNOT - FD -->|"flatMap
(extract)"| EXCREF + RFC -->|"process"| FD + FD -->|"flatMap"| DECLS + FD -->|"flatMap"| ANNOT + FD -->|"flatMap"| VREFS + FD -->|"flatMap"| TREFS + FD -->|"flatMap"| CFI DECLS -->|"flatMap"| DBP DBP -->|"flatMap"| SPR - DBP -->|"join"| CFR - SPR --> ATR - CFR --> ATR + DBP -->|"join"| I2I + DBP -->|"join"| I2I2 + DBP -->|"join"| INT2IMP + CFI -->|"flatMap"| EXCREF DECLS -->|"flatMap"| EXCDECL - EXCDECL -->|"join"| RESOLVED EXCREF -->|"join"| RESOLVED + EXCDECL -->|"join"| RESOLVED - ATR --> REFS + VREFS --> REFS + TREFS --> REFS + SPR --> REFS + I2I --> REFS + I2I2 --> REFS + INT2IMP --> REFS RESOLVED --> REFS classDef fileLayer fill:#e8f4fd,stroke:#4a90d9,stroke-width:2px @@ -55,8 +67,8 @@ flowchart TB classDef output fill:#e6ffe6,stroke:#2e8b2e,stroke-width:2px class RFC,FD fileLayer - class DECLS,ANNOT,EXCREF extracted - class DBP,SPR,CFR,ATR typeDeps - class EXCDECL,RESOLVED excDeps + class DECLS,ANNOT,VREFS,TREFS,CFI extracted + class DBP,SPR,I2I,I2I2,INT2IMP typeDeps + class EXCREF,EXCDECL,RESOLVED excDeps class REFS output diff --git a/analysis/reanalyze/diagrams/reactive-pipeline.svg b/analysis/reanalyze/diagrams/reactive-pipeline.svg index dfeacb4ba0..bc932f903f 100644 --- a/analysis/reanalyze/diagrams/reactive-pipeline.svg +++ b/analysis/reanalyze/diagrams/reactive-pipeline.svg @@ -1 +1 @@ -

Combined Output

ReactiveExceptionRefs

ReactiveTypeDeps

Extracted Collections

Per-File Data

File Layer

process_files
(detect changes)

flatMap
(extract)

flatMap
(extract)

flatMap
(extract)

flatMap

flatMap

join

flatMap

join

join

ReactiveFileCollection
(file change detection)

file_data
(path → file_data option)

decls
(pos → Decl.t)

annotations
(pos → annotation)

exception_refs
(path → loc_from)

decl_by_path
(path → decl list)

same_path_refs
(pos → PosSet)

cross_file_refs
(pos → PosSet)

all_type_refs
(pos → PosSet)

exception_decls
(path → loc)

resolved_refs
(pos → PosSet)

All refs
→ Ready for solver

\ No newline at end of file +

Output

ReactiveExceptionRefs

ReactiveTypeDeps

Extracted

Per-File Data

File Layer

process

flatMap

flatMap

flatMap

flatMap

flatMap

flatMap

flatMap

join

join

join

flatMap

flatMap

join

join

RFC

FD

D

A

VR

TR

CFI

DBP

SPR

I2I

I2I₂

I→I

ER

ED

RR

REFS

\ No newline at end of file diff --git a/analysis/reanalyze/src/Declarations.ml b/analysis/reanalyze/src/Declarations.ml index 0bcaa36b16..6b8dfedc7d 100644 --- a/analysis/reanalyze/src/Declarations.ml +++ b/analysis/reanalyze/src/Declarations.ml @@ -42,3 +42,5 @@ let find_opt (t : t) pos = PosHash.find_opt t pos let fold f (t : t) init = PosHash.fold f t init let iter f (t : t) = PosHash.iter f t + +let length (t : t) = PosHash.length t diff --git a/analysis/reanalyze/src/Declarations.mli b/analysis/reanalyze/src/Declarations.mli index 1d5180dc53..e6362ee2e9 100644 --- a/analysis/reanalyze/src/Declarations.mli +++ b/analysis/reanalyze/src/Declarations.mli @@ -38,3 +38,5 @@ val create_from_hashtbl : Decl.t PosHash.t -> t val find_opt : t -> Lexing.position -> Decl.t option val fold : (Lexing.position -> Decl.t -> 'a -> 'a) -> t -> 'a -> 'a val iter : (Lexing.position -> Decl.t -> unit) -> t -> unit + +val length : t -> int diff --git a/analysis/reanalyze/src/FileAnnotations.ml b/analysis/reanalyze/src/FileAnnotations.ml index 046805b564..60e78a0bb9 100644 --- a/analysis/reanalyze/src/FileAnnotations.ml +++ b/analysis/reanalyze/src/FileAnnotations.ml @@ -53,3 +53,7 @@ let is_annotated_gentype_or_dead (state : t) pos = match PosHash.find_opt state pos with | Some (Dead | GenType) -> true | Some Live | None -> false + +let length (t : t) = PosHash.length t + +let iter f (t : t) = PosHash.iter f t diff --git a/analysis/reanalyze/src/FileAnnotations.mli b/analysis/reanalyze/src/FileAnnotations.mli index 756264813e..292b5b5c12 100644 --- a/analysis/reanalyze/src/FileAnnotations.mli +++ b/analysis/reanalyze/src/FileAnnotations.mli @@ -9,8 +9,7 @@ (** {2 Types} *) -type annotated_as = GenType | Dead | Live -(** Annotation type *) +type annotated_as = GenType | Dead | Live (** Annotation type *) type t (** Immutable annotations - for solver (read-only) *) @@ -41,3 +40,5 @@ val create_from_hashtbl : annotated_as PosHash.t -> t val is_annotated_dead : t -> Lexing.position -> bool val is_annotated_gentype_or_live : t -> Lexing.position -> bool val is_annotated_gentype_or_dead : t -> Lexing.position -> bool +val length : t -> int +val iter : (Lexing.position -> annotated_as -> unit) -> t -> unit diff --git a/analysis/reanalyze/src/FileDeps.ml b/analysis/reanalyze/src/FileDeps.ml index 7c0440b687..ec83cb2896 100644 --- a/analysis/reanalyze/src/FileDeps.ml +++ b/analysis/reanalyze/src/FileDeps.ml @@ -69,7 +69,8 @@ let merge_all (builders : builder list) : t = let builder_files (builder : builder) : FileSet.t = builder.files let builder_deps_to_list (builder : builder) : (string * FileSet.t) list = - FileHash.fold (fun from_file to_files acc -> (from_file, to_files) :: acc) + FileHash.fold + (fun from_file to_files acc -> (from_file, to_files) :: acc) builder.deps [] let create ~files ~deps : t = {files; deps} @@ -87,6 +88,10 @@ let iter_deps (t : t) f = FileHash.iter f t.deps let file_exists (t : t) file = FileHash.mem t.deps file +let files_count (t : t) = FileSet.cardinal t.files + +let deps_count (t : t) = FileHash.length t.deps + (** {2 Topological ordering} *) let iter_files_from_roots_to_leaves (t : t) iterFun = diff --git a/analysis/reanalyze/src/FileDeps.mli b/analysis/reanalyze/src/FileDeps.mli index 2de875017e..1536d66451 100644 --- a/analysis/reanalyze/src/FileDeps.mli +++ b/analysis/reanalyze/src/FileDeps.mli @@ -65,6 +65,12 @@ val iter_deps : t -> (string -> FileSet.t -> unit) -> unit val file_exists : t -> string -> bool (** Check if a file exists in the graph. *) +val files_count : t -> int +(** Count of files in the file set. *) + +val deps_count : t -> int +(** Count of dependencies (number of from_file entries). *) + (** {2 Topological ordering} *) val iter_files_from_roots_to_leaves : t -> (string -> unit) -> unit diff --git a/analysis/reanalyze/src/ReactiveAnalysis.ml b/analysis/reanalyze/src/ReactiveAnalysis.ml index 48fe11b197..962b173771 100644 --- a/analysis/reanalyze/src/ReactiveAnalysis.ml +++ b/analysis/reanalyze/src/ReactiveAnalysis.ml @@ -20,7 +20,7 @@ type t = (Cmt_format.cmt_infos, cmt_file_result option) ReactiveFileCollection.t (** The reactive collection type *) (** Process cmt_infos into a file result *) -let process_cmt_infos ~config cmt_infos : cmt_file_result option = +let process_cmt_infos ~config ~cmtFilePath cmt_infos : cmt_file_result option = let excludePath sourceFile = config.DceConfig.cli.exclude_paths |> List.exists (fun prefix_ -> @@ -54,7 +54,7 @@ let process_cmt_infos ~config cmt_infos : cmt_file_result option = Some (cmt_infos |> DceFileProcessing.process_cmt_file ~config ~file:dce_file_context - ~cmtFilePath:"") + ~cmtFilePath) else None in let exception_data = @@ -70,7 +70,8 @@ let process_cmt_infos ~config cmt_infos : cmt_file_result option = (** Create a new reactive collection *) let create ~config : t = ReactiveFileCollection.create ~read_file:Cmt_format.read_cmt - ~process:(process_cmt_infos ~config) + ~process:(fun path cmt_infos -> + process_cmt_infos ~config ~cmtFilePath:path cmt_infos) (** Process all files incrementally using ReactiveFileCollection. First run processes all files. Subsequent runs only process changed files. *) diff --git a/analysis/reanalyze/src/ReactiveExceptionRefs.ml b/analysis/reanalyze/src/ReactiveExceptionRefs.ml index d2bf89da2c..675d5e0a9d 100644 --- a/analysis/reanalyze/src/ReactiveExceptionRefs.ml +++ b/analysis/reanalyze/src/ReactiveExceptionRefs.ml @@ -49,7 +49,10 @@ let create ~(decls : (Lexing.position, Decl.t) Reactive.t) match loc_to_opt with | Some loc_to -> (* Add value reference: pos_to -> pos_from *) - [(loc_to.Location.loc_start, PosSet.singleton loc_from.Location.loc_start)] + [ + ( loc_to.Location.loc_start, + PosSet.singleton loc_from.Location.loc_start ); + ] | None -> []) ~merge:PosSet.union () in @@ -79,4 +82,3 @@ let add_to_file_deps_builder (t : t) ~(file_deps : FileDeps.builder) : unit = FileDeps.add_dep file_deps ~from_file ~to_file) posFromSet) t.resolved_refs - diff --git a/analysis/reanalyze/src/ReactiveExceptionRefs.mli b/analysis/reanalyze/src/ReactiveExceptionRefs.mli index 2e7f583497..95f24a34c9 100644 --- a/analysis/reanalyze/src/ReactiveExceptionRefs.mli +++ b/analysis/reanalyze/src/ReactiveExceptionRefs.mli @@ -31,7 +31,10 @@ (** {1 Types} *) -type t +type t = { + exception_decls: (DcePath.t, Location.t) Reactive.t; + resolved_refs: (Lexing.position, PosSet.t) Reactive.t; +} (** Reactive exception ref collections *) (** {1 Creation} *) @@ -51,4 +54,3 @@ val add_to_refs_builder : t -> refs:References.builder -> unit val add_to_file_deps_builder : t -> file_deps:FileDeps.builder -> unit (** Add file dependencies for resolved refs. *) - diff --git a/analysis/reanalyze/src/ReactiveMerge.ml b/analysis/reanalyze/src/ReactiveMerge.ml index 5bd111d09a..9f1319de4e 100644 --- a/analysis/reanalyze/src/ReactiveMerge.ml +++ b/analysis/reanalyze/src/ReactiveMerge.ml @@ -13,6 +13,9 @@ type t = { cross_file_items: (string, CrossFileItems.t) Reactive.t; file_deps_map: (string, FileSet.t) Reactive.t; files: (string, unit) Reactive.t; + (* Reactive type/exception dependencies *) + type_deps: ReactiveTypeDeps.t; + exception_refs: ReactiveExceptionRefs.t; } (** All derived reactive collections from per-file data *) @@ -38,7 +41,8 @@ let create (source : (string, DceFileProcessing.file_data option) Reactive.t) : match file_data_opt with | None -> [] | Some file_data -> - FileAnnotations.builder_to_list file_data.DceFileProcessing.annotations) + FileAnnotations.builder_to_list + file_data.DceFileProcessing.annotations) () in @@ -96,21 +100,55 @@ let create (source : (string, DceFileProcessing.file_data option) Reactive.t) : ~merge:FileSet.union () in - (* Files set: (path, ()) - just track which files exist *) + (* Files set: (source_path, ()) - just track which source files exist *) let files = Reactive.flatMap source - ~f:(fun path file_data_opt -> + ~f:(fun _cmt_path file_data_opt -> match file_data_opt with | None -> [] | Some file_data -> - (* Include the file and all files it references *) - let file_set = FileDeps.builder_files file_data.DceFileProcessing.file_deps in - let entries = FileSet.fold (fun f acc -> (f, ()) :: acc) file_set [] in - (path, ()) :: entries) + (* Include all source files from file_deps (NOT the CMT path) *) + let file_set = + FileDeps.builder_files file_data.DceFileProcessing.file_deps + in + FileSet.fold (fun f acc -> (f, ()) :: acc) file_set []) + () + in + + (* Extract exception_refs from cross_file_items for ReactiveExceptionRefs *) + let exception_refs_collection = + Reactive.flatMap cross_file_items + ~f:(fun _path items -> + items.CrossFileItems.exception_refs + |> List.map (fun (r : CrossFileItems.exception_ref) -> + (r.exception_path, r.loc_from))) () in - {decls; annotations; value_refs; type_refs; cross_file_items; file_deps_map; files} + (* Create reactive type-label dependencies *) + let type_deps = + ReactiveTypeDeps.create ~decls + ~report_types_dead_only_in_interface: + DeadCommon.Config.reportTypesDeadOnlyInInterface + in + + (* Create reactive exception refs resolution *) + let exception_refs = + ReactiveExceptionRefs.create ~decls + ~exception_refs:exception_refs_collection + in + + { + decls; + annotations; + value_refs; + type_refs; + cross_file_items; + file_deps_map; + files; + type_deps; + exception_refs; + } (** {1 Conversion to solver-ready format} *) @@ -126,14 +164,41 @@ let freeze_annotations (t : t) : FileAnnotations.t = Reactive.iter (fun pos ann -> PosHash.replace result pos ann) t.annotations; FileAnnotations.create_from_hashtbl result -(** Convert reactive refs to References.t for solver *) +(** Convert reactive refs to References.t for solver. + Includes type-label deps and exception refs from reactive computations. *) let freeze_refs (t : t) : References.t = let value_refs = PosHash.create 256 in let type_refs = PosHash.create 256 in + (* Helper to merge refs into a hashtable *) + let merge_into tbl posTo posFromSet = + let existing = + match PosHash.find_opt tbl posTo with + | Some s -> s + | None -> PosSet.empty + in + PosHash.replace tbl posTo (PosSet.union existing posFromSet) + in + (* Merge per-file value refs *) + Reactive.iter (fun pos refs -> merge_into value_refs pos refs) t.value_refs; + (* Merge per-file type refs *) + Reactive.iter (fun pos refs -> merge_into type_refs pos refs) t.type_refs; + (* Add type-label dependency refs from all sources *) + Reactive.iter + (fun pos refs -> merge_into type_refs pos refs) + t.type_deps.same_path_refs; + Reactive.iter + (fun pos refs -> merge_into type_refs pos refs) + t.type_deps.cross_file_refs; Reactive.iter - (fun pos refs -> PosHash.replace value_refs pos refs) - t.value_refs; - Reactive.iter (fun pos refs -> PosHash.replace type_refs pos refs) t.type_refs; + (fun pos refs -> merge_into type_refs pos refs) + t.type_deps.impl_to_intf_refs_path2; + Reactive.iter + (fun pos refs -> merge_into type_refs pos refs) + t.type_deps.intf_to_impl_refs; + (* Add exception refs (to value refs) *) + Reactive.iter + (fun pos refs -> merge_into value_refs pos refs) + t.exception_refs.resolved_refs; References.create ~value_refs ~type_refs (** Collect all cross-file items *) @@ -154,7 +219,8 @@ let collect_cross_file_items (t : t) : CrossFileItems.t = function_refs = !function_refs; } -(** Convert reactive file deps to FileDeps.t for solver *) +(** Convert reactive file deps to FileDeps.t for solver. + Includes file deps from exception refs. *) let freeze_file_deps (t : t) : FileDeps.t = let files = let result = ref FileSet.empty in @@ -163,7 +229,24 @@ let freeze_file_deps (t : t) : FileDeps.t = in let deps = FileDeps.FileHash.create 256 in Reactive.iter - (fun from_file to_files -> FileDeps.FileHash.replace deps from_file to_files) + (fun from_file to_files -> + FileDeps.FileHash.replace deps from_file to_files) t.file_deps_map; + (* Add file deps from exception refs *) + Reactive.iter + (fun posTo posFromSet -> + PosSet.iter + (fun posFrom -> + let from_file = posFrom.Lexing.pos_fname in + let to_file = posTo.Lexing.pos_fname in + if from_file <> to_file then + let existing = + match FileDeps.FileHash.find_opt deps from_file with + | Some s -> s + | None -> FileSet.empty + in + FileDeps.FileHash.replace deps from_file + (FileSet.add to_file existing)) + posFromSet) + t.exception_refs.resolved_refs; FileDeps.create ~files ~deps - diff --git a/analysis/reanalyze/src/ReactiveMerge.mli b/analysis/reanalyze/src/ReactiveMerge.mli index 03dd06bb44..6f0c3503b8 100644 --- a/analysis/reanalyze/src/ReactiveMerge.mli +++ b/analysis/reanalyze/src/ReactiveMerge.mli @@ -32,6 +32,9 @@ type t = { cross_file_items: (string, CrossFileItems.t) Reactive.t; file_deps_map: (string, FileSet.t) Reactive.t; files: (string, unit) Reactive.t; + (* Reactive type/exception dependencies *) + type_deps: ReactiveTypeDeps.t; + exception_refs: ReactiveExceptionRefs.t; } (** All derived reactive collections from per-file data *) @@ -57,4 +60,3 @@ val collect_cross_file_items : t -> CrossFileItems.t val freeze_file_deps : t -> FileDeps.t (** Convert reactive file deps to FileDeps.t for solver *) - diff --git a/analysis/reanalyze/src/ReactiveTypeDeps.ml b/analysis/reanalyze/src/ReactiveTypeDeps.ml index bb9aa03931..f42102c11d 100644 --- a/analysis/reanalyze/src/ReactiveTypeDeps.ml +++ b/analysis/reanalyze/src/ReactiveTypeDeps.ml @@ -36,6 +36,9 @@ type t = { same_path_refs: (Lexing.position, PosSet.t) Reactive.t; cross_file_refs: (Lexing.position, PosSet.t) Reactive.t; all_type_refs: (Lexing.position, PosSet.t) Reactive.t; + (* Additional cross-file sources for complete coverage *) + impl_to_intf_refs_path2: (Lexing.position, PosSet.t) Reactive.t; + intf_to_impl_refs: (Lexing.position, PosSet.t) Reactive.t; } (** All reactive collections for type-label dependencies *) @@ -59,17 +62,17 @@ let create ~(decls : (Lexing.position, Decl.t) Reactive.t) match decls with | [] | [_] -> [] | first :: rest -> - (* Connect each decl to the first one (and vice-versa if needed) *) + (* Connect each decl to the first one (and vice-versa if needed). + Original: extendTypeDependencies loc loc0 adds posTo=loc, posFrom=loc0 + So: posTo=other, posFrom=first *) rest |> List.concat_map (fun other -> - let refs = - [(first.pos, PosSet.singleton other.pos); - (other.pos, PosSet.singleton first.pos)] - in - if report_types_dead_only_in_interface then - (* Only first -> other *) - [(other.pos, PosSet.singleton first.pos)] - else refs)) + (* Always add: other -> first (posTo=other, posFrom=first) *) + let refs = [(other.pos, PosSet.singleton first.pos)] in + if report_types_dead_only_in_interface then refs + else + (* Also add: first -> other (posTo=first, posFrom=other) *) + (first.pos, PosSet.singleton other.pos) :: refs)) ~merge:PosSet.union () in @@ -93,23 +96,22 @@ let create ~(decls : (Lexing.position, Decl.t) Reactive.t) () in - (* Join impl decls with decl_by_path to find intf *) + (* Join impl decls with decl_by_path to find intf. + Original: extendTypeDependencies loc loc1 where loc=impl, loc1=intf + adds posTo=impl, posFrom=intf *) let impl_to_intf_refs = Reactive.join impl_decls decl_by_path ~key_of:(fun _pos (_, intf_path1, _) -> intf_path1) - ~f:(fun _pos (info, _intf_path1, intf_path2) intf_decls_opt -> + ~f:(fun _pos (info, _intf_path1, _intf_path2) intf_decls_opt -> match intf_decls_opt with | Some (intf_info :: _) -> - (* Found at path1, connect impl <-> intf *) - if report_types_dead_only_in_interface then - [(intf_info.pos, PosSet.singleton info.pos)] + (* Found at path1: posTo=impl, posFrom=intf *) + let refs = [(info.pos, PosSet.singleton intf_info.pos)] in + if report_types_dead_only_in_interface then refs else - [(info.pos, PosSet.singleton intf_info.pos); - (intf_info.pos, PosSet.singleton info.pos)] - | _ -> - (* Try path2 - need second join, but for now return placeholder *) - (* We'll handle path2 with a separate join below *) - [(info.pos, (intf_path2, info))] |> List.filter_map (fun _ -> None)) + (* Also: posTo=intf, posFrom=impl *) + (intf_info.pos, PosSet.singleton info.pos) :: refs + | _ -> []) ~merge:PosSet.union () in @@ -130,16 +132,19 @@ let create ~(decls : (Lexing.position, Decl.t) Reactive.t) ~f:(fun _pos (info, _) intf_decls_opt -> match intf_decls_opt with | Some (intf_info :: _) -> - if report_types_dead_only_in_interface then - [(intf_info.pos, PosSet.singleton info.pos)] - else - [(info.pos, PosSet.singleton intf_info.pos); - (intf_info.pos, PosSet.singleton info.pos)] + (* posTo=impl, posFrom=intf *) + let refs = [(info.pos, PosSet.singleton intf_info.pos)] in + if report_types_dead_only_in_interface then refs + else (intf_info.pos, PosSet.singleton info.pos) :: refs | _ -> []) ~merge:PosSet.union () in - (* Also handle intf -> impl direction *) + (* Also handle intf -> impl direction. + Original: extendTypeDependencies loc1 loc where loc=impl, loc1=intf + adds posTo=impl, posFrom=intf (note: same direction!) + The intf->impl code in original only runs when isInterface=true, + and the lookup is for finding the impl. *) let intf_decls = Reactive.flatMap decls ~f:(fun _pos decl -> @@ -148,7 +153,9 @@ let create ~(decls : (Lexing.position, Decl.t) Reactive.t) match info.path with | [] -> [] | typeLabelName :: pathToType -> - let impl_path = typeLabelName :: DcePath.moduleToImplementation pathToType in + let impl_path = + typeLabelName :: DcePath.moduleToImplementation pathToType + in [(info.pos, (info, impl_path))]) | _ -> []) () @@ -157,90 +164,48 @@ let create ~(decls : (Lexing.position, Decl.t) Reactive.t) let intf_to_impl_refs = Reactive.join intf_decls decl_by_path ~key_of:(fun _pos (_, impl_path) -> impl_path) - ~f:(fun _pos (info, _) impl_decls_opt -> + ~f:(fun _pos (intf_info, _) impl_decls_opt -> match impl_decls_opt with | Some (impl_info :: _) -> - if report_types_dead_only_in_interface then - [(info.pos, PosSet.singleton impl_info.pos)] - else - [(impl_info.pos, PosSet.singleton info.pos); - (info.pos, PosSet.singleton impl_info.pos)] + (* Original: extendTypeDependencies loc1 loc where loc1=intf, loc=impl + But wait, looking at the original code more carefully: + + if isInterface then + match find_one path1 with + | None -> () + | Some loc1 -> + extendTypeDependencies ~config ~refs loc1 loc; + if not Config.reportTypesDeadOnlyInInterface then + extendTypeDependencies ~config ~refs loc loc1 + + Here loc is the current intf decl, loc1 is the found impl. + So extendTypeDependencies loc1 loc means posTo=loc1=impl, posFrom=loc=intf + *) + let refs = [(impl_info.pos, PosSet.singleton intf_info.pos)] in + if report_types_dead_only_in_interface then refs + else (intf_info.pos, PosSet.singleton impl_info.pos) :: refs | _ -> []) ~merge:PosSet.union () in - (* Combine all cross-file refs *) - let cross_file_refs = - Reactive.flatMap impl_to_intf_refs - ~f:(fun pos refs -> [(pos, refs)]) - ~merge:PosSet.union () - in - (* Merge in path2 refs *) - let cross_file_refs = - Reactive.flatMap impl_to_intf_refs_path2 - ~f:(fun pos refs -> [(pos, refs)]) - ~merge:PosSet.union () - |> fun refs2 -> - Reactive.flatMap cross_file_refs - ~f:(fun pos refs -> - let additional = - match Reactive.get refs2 pos with - | Some r -> r - | None -> PosSet.empty - in - [(pos, PosSet.union refs additional)]) - ~merge:PosSet.union () - in - (* Merge in intf->impl refs *) - let cross_file_refs = - Reactive.flatMap intf_to_impl_refs - ~f:(fun pos refs -> [(pos, refs)]) - ~merge:PosSet.union () - |> fun refs3 -> - Reactive.flatMap cross_file_refs - ~f:(fun pos refs -> - let additional = - match Reactive.get refs3 pos with - | Some r -> r - | None -> PosSet.empty - in - [(pos, PosSet.union refs additional)]) - ~merge:PosSet.union () - in - - (* Step 4: Combine same-path and cross-file refs *) - let all_type_refs = - Reactive.flatMap same_path_refs - ~f:(fun pos refs -> - let cross = - match Reactive.get cross_file_refs pos with - | Some r -> r - | None -> PosSet.empty - in - [(pos, PosSet.union refs cross)]) - ~merge:PosSet.union () - in - (* Also include cross-file refs that don't have same-path refs *) - let all_type_refs = - Reactive.flatMap cross_file_refs - ~f:(fun pos refs -> - match Reactive.get same_path_refs pos with - | Some _ -> [] (* Already included above *) - | None -> [(pos, refs)]) - ~merge:PosSet.union () - |> fun extra_refs -> - Reactive.flatMap all_type_refs - ~f:(fun pos refs -> - let extra = - match Reactive.get extra_refs pos with - | Some r -> r - | None -> PosSet.empty - in - [(pos, PosSet.union refs extra)]) - ~merge:PosSet.union () - in - - {decl_by_path; same_path_refs; cross_file_refs; all_type_refs} + (* Cross-file refs are the combination of: + - impl_to_intf_refs (path1 matches) + - impl_to_intf_refs_path2 (path2 fallback) + - intf_to_impl_refs *) + let cross_file_refs = impl_to_intf_refs in + + (* All type refs = same_path_refs + all cross-file sources. + We expose these separately and merge in freeze_refs. *) + let all_type_refs = same_path_refs in + + { + decl_by_path; + same_path_refs; + cross_file_refs; + all_type_refs; + impl_to_intf_refs_path2; + intf_to_impl_refs; + } (** {1 Freezing for solver} *) @@ -252,4 +217,3 @@ let add_to_refs_builder (t : t) ~(refs : References.builder) : unit = (fun posFrom -> References.add_type_ref refs ~posTo ~posFrom) posFromSet) t.all_type_refs - diff --git a/analysis/reanalyze/src/ReactiveTypeDeps.mli b/analysis/reanalyze/src/ReactiveTypeDeps.mli index 7c9e19c77d..5836719baa 100644 --- a/analysis/reanalyze/src/ReactiveTypeDeps.mli +++ b/analysis/reanalyze/src/ReactiveTypeDeps.mli @@ -29,9 +29,25 @@ (** {1 Types} *) -type t +type t = { + decl_by_path: (DcePath.t, decl_info list) Reactive.t; + same_path_refs: (Lexing.position, PosSet.t) Reactive.t; + cross_file_refs: (Lexing.position, PosSet.t) Reactive.t; + all_type_refs: (Lexing.position, PosSet.t) Reactive.t; + (* Additional cross-file sources for complete coverage *) + impl_to_intf_refs_path2: (Lexing.position, PosSet.t) Reactive.t; + intf_to_impl_refs: (Lexing.position, PosSet.t) Reactive.t; +} (** Reactive type-label dependency collections *) +and decl_info = { + pos: Lexing.position; + pos_end: Lexing.position; + path: DcePath.t; + is_interface: bool; +} +(** Simplified decl info for type-label processing *) + (** {1 Creation} *) val create : @@ -52,4 +68,3 @@ val add_to_refs_builder : t -> refs:References.builder -> unit Call this after processing files to get the current type refs. The builder will contain all type-label dependency refs. *) - diff --git a/analysis/reanalyze/src/Reanalyze.ml b/analysis/reanalyze/src/Reanalyze.ml index 0b0359c050..927f264eed 100644 --- a/analysis/reanalyze/src/Reanalyze.ml +++ b/analysis/reanalyze/src/Reanalyze.ml @@ -288,38 +288,51 @@ let runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge = (dce_data_list |> List.map (fun fd -> fd.DceFileProcessing.cross_file)) ) in - (* Merge refs and file_deps into builders for cross-file items processing. - This still needs the file_data iteration for post-processing. *) - let refs_builder = References.create_builder () in - let file_deps_builder = FileDeps.create_builder () in - (match reactive_collection with - | Some collection -> - ReactiveAnalysis.iter_file_data collection (fun fd -> - References.merge_into_builder ~from:fd.DceFileProcessing.refs - ~into:refs_builder; - FileDeps.merge_into_builder - ~from:fd.DceFileProcessing.file_deps - ~into:file_deps_builder) - | None -> - dce_data_list - |> List.iter (fun fd -> - References.merge_into_builder - ~from:fd.DceFileProcessing.refs ~into:refs_builder; - FileDeps.merge_into_builder - ~from:fd.DceFileProcessing.file_deps - ~into:file_deps_builder)); - (* Compute type-label dependencies after merge *) - DeadType.process_type_label_dependencies ~config:dce_config ~decls - ~refs:refs_builder; - let find_exception = - DeadException.find_exception_from_decls decls + (* Compute refs and file_deps. + In reactive mode, ReactiveMerge handles type deps and exception refs. + In non-reactive mode, use the imperative processing. *) + let refs, file_deps = + match reactive_merge with + | Some merged -> + (* Reactive mode: freeze_refs includes type deps and exception refs *) + let refs = ReactiveMerge.freeze_refs merged in + let file_deps = ReactiveMerge.freeze_file_deps merged in + (refs, file_deps) + | None -> + (* Non-reactive mode: build refs/file_deps imperatively *) + let refs_builder = References.create_builder () in + let file_deps_builder = FileDeps.create_builder () in + (match reactive_collection with + | Some collection -> + ReactiveAnalysis.iter_file_data collection (fun fd -> + References.merge_into_builder + ~from:fd.DceFileProcessing.refs ~into:refs_builder; + FileDeps.merge_into_builder + ~from:fd.DceFileProcessing.file_deps + ~into:file_deps_builder) + | None -> + dce_data_list + |> List.iter (fun fd -> + References.merge_into_builder + ~from:fd.DceFileProcessing.refs ~into:refs_builder; + FileDeps.merge_into_builder + ~from:fd.DceFileProcessing.file_deps + ~into:file_deps_builder)); + (* Compute type-label dependencies after merge *) + DeadType.process_type_label_dependencies ~config:dce_config + ~decls ~refs:refs_builder; + let find_exception = + DeadException.find_exception_from_decls decls + in + (* Process cross-file exception refs *) + CrossFileItems.process_exception_refs cross_file + ~refs:refs_builder ~file_deps:file_deps_builder + ~find_exception ~config:dce_config; + (* Freeze refs and file_deps for solver *) + let refs = References.freeze_builder refs_builder in + let file_deps = FileDeps.freeze_builder file_deps_builder in + (refs, file_deps) in - (* Process cross-file exception refs *) - CrossFileItems.process_exception_refs cross_file ~refs:refs_builder - ~file_deps:file_deps_builder ~find_exception ~config:dce_config; - (* Freeze refs and file_deps for solver *) - let refs = References.freeze_builder refs_builder in - let file_deps = FileDeps.freeze_builder file_deps_builder in (annotations, decls, cross_file, refs, file_deps)) in (* Solving phase: run the solver and collect issues *) diff --git a/analysis/reanalyze/src/References.ml b/analysis/reanalyze/src/References.ml index 60fd7bfafd..c566aedd9b 100644 --- a/analysis/reanalyze/src/References.ml +++ b/analysis/reanalyze/src/References.ml @@ -67,3 +67,7 @@ let create ~value_refs ~type_refs : t = {value_refs; type_refs} let find_value_refs (t : t) pos = findSet t.value_refs pos let find_type_refs (t : t) pos = findSet t.type_refs pos + +let value_refs_length (t : t) = PosHash.length t.value_refs + +let type_refs_length (t : t) = PosHash.length t.type_refs diff --git a/analysis/reanalyze/src/References.mli b/analysis/reanalyze/src/References.mli index 5776ca615c..89f653657d 100644 --- a/analysis/reanalyze/src/References.mli +++ b/analysis/reanalyze/src/References.mli @@ -47,3 +47,7 @@ val create : value_refs:PosSet.t PosHash.t -> type_refs:PosSet.t PosHash.t -> t val find_value_refs : t -> Lexing.position -> PosSet.t val find_type_refs : t -> Lexing.position -> PosSet.t + +val value_refs_length : t -> int + +val type_refs_length : t -> int From 038d0a0b05103b1e3021f3b47c43bed68ce1c79f Mon Sep 17 00:00:00 2001 From: Cristiano Calcagno Date: Tue, 16 Dec 2025 08:54:15 +0100 Subject: [PATCH 9/9] Add store abstractions to eliminate merge overhead in reactive mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce five new store modules that wrap reactive collections directly, eliminating O(N) freeze/copy operations in the merge phase: - AnnotationStore: wraps FileAnnotations reactive collection - DeclarationStore: wraps Declarations reactive collection - ReferenceStore: combines 7 reactive sources (value_refs, type_refs, type_deps.*, exception_refs) without copying - FileDepsStore: wraps file deps reactive collections - CrossFileItemsStore: iterates reactive collection directly without intermediate allocation Performance improvement on 4900-file benchmark: - Merge time: 37ms → 0.002ms (eliminated) - Warm run total: 165ms → 129ms (22% faster) The stores provide a unified interface that works with both frozen (non-reactive) and reactive data, dispatching to the appropriate implementation at runtime. Signed-off-by: Cristiano Calcagno --- analysis/reanalyze/src/AnnotationStore.ml | 34 +++++++ analysis/reanalyze/src/AnnotationStore.mli | 19 ++++ analysis/reanalyze/src/CrossFileItems.ml | 39 -------- analysis/reanalyze/src/CrossFileItems.mli | 5 - analysis/reanalyze/src/CrossFileItemsStore.ml | 68 ++++++++++++++ .../reanalyze/src/CrossFileItemsStore.mli | 30 ++++++ analysis/reanalyze/src/DeadCommon.ml | 65 ++++++------- analysis/reanalyze/src/DeadOptionalArgs.ml | 4 +- analysis/reanalyze/src/DeclarationStore.ml | 33 +++++++ analysis/reanalyze/src/DeclarationStore.mli | 27 ++++++ analysis/reanalyze/src/FileDepsStore.ml | 46 ++++++++++ analysis/reanalyze/src/FileDepsStore.mli | 28 ++++++ analysis/reanalyze/src/Reanalyze.ml | 92 +++++++++++++------ analysis/reanalyze/src/ReferenceStore.ml | 68 ++++++++++++++ analysis/reanalyze/src/ReferenceStore.mli | 27 ++++++ 15 files changed, 476 insertions(+), 109 deletions(-) create mode 100644 analysis/reanalyze/src/AnnotationStore.ml create mode 100644 analysis/reanalyze/src/AnnotationStore.mli create mode 100644 analysis/reanalyze/src/CrossFileItemsStore.ml create mode 100644 analysis/reanalyze/src/CrossFileItemsStore.mli create mode 100644 analysis/reanalyze/src/DeclarationStore.ml create mode 100644 analysis/reanalyze/src/DeclarationStore.mli create mode 100644 analysis/reanalyze/src/FileDepsStore.ml create mode 100644 analysis/reanalyze/src/FileDepsStore.mli create mode 100644 analysis/reanalyze/src/ReferenceStore.ml create mode 100644 analysis/reanalyze/src/ReferenceStore.mli diff --git a/analysis/reanalyze/src/AnnotationStore.ml b/analysis/reanalyze/src/AnnotationStore.ml new file mode 100644 index 0000000000..b34dbce8e7 --- /dev/null +++ b/analysis/reanalyze/src/AnnotationStore.ml @@ -0,0 +1,34 @@ +(** Abstraction over annotation storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [FileAnnotations.t] (copied from reactive) + - [Reactive]: Direct [Reactive.t] (no copy, zero-cost on warm runs) *) + +type t = + | Frozen of FileAnnotations.t + | Reactive of (Lexing.position, FileAnnotations.annotated_as) Reactive.t + +let of_frozen ann = Frozen ann + +let of_reactive reactive = Reactive reactive + +let is_annotated_dead t pos = + match t with + | Frozen ann -> FileAnnotations.is_annotated_dead ann pos + | Reactive reactive -> Reactive.get reactive pos = Some FileAnnotations.Dead + +let is_annotated_gentype_or_live t pos = + match t with + | Frozen ann -> FileAnnotations.is_annotated_gentype_or_live ann pos + | Reactive reactive -> ( + match Reactive.get reactive pos with + | Some (FileAnnotations.Live | FileAnnotations.GenType) -> true + | Some FileAnnotations.Dead | None -> false) + +let is_annotated_gentype_or_dead t pos = + match t with + | Frozen ann -> FileAnnotations.is_annotated_gentype_or_dead ann pos + | Reactive reactive -> ( + match Reactive.get reactive pos with + | Some (FileAnnotations.Dead | FileAnnotations.GenType) -> true + | Some FileAnnotations.Live | None -> false) diff --git a/analysis/reanalyze/src/AnnotationStore.mli b/analysis/reanalyze/src/AnnotationStore.mli new file mode 100644 index 0000000000..0c8e099fd8 --- /dev/null +++ b/analysis/reanalyze/src/AnnotationStore.mli @@ -0,0 +1,19 @@ +(** Abstraction over annotation storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [FileAnnotations.t] (copied from reactive) + - [Reactive]: Direct [Reactive.t] (no copy, zero-cost on warm runs) *) + +type t +(** Abstract annotation store *) + +val of_frozen : FileAnnotations.t -> t +(** Wrap a frozen [FileAnnotations.t] *) + +val of_reactive : + (Lexing.position, FileAnnotations.annotated_as) Reactive.t -> t +(** Wrap a reactive collection directly (no copy) *) + +val is_annotated_dead : t -> Lexing.position -> bool +val is_annotated_gentype_or_live : t -> Lexing.position -> bool +val is_annotated_gentype_or_dead : t -> Lexing.position -> bool diff --git a/analysis/reanalyze/src/CrossFileItems.ml b/analysis/reanalyze/src/CrossFileItems.ml index 8b72d84120..f51e55a468 100644 --- a/analysis/reanalyze/src/CrossFileItems.ml +++ b/analysis/reanalyze/src/CrossFileItems.ml @@ -78,42 +78,3 @@ let process_exception_refs (t : t) ~refs ~file_deps ~find_exception ~config = DeadCommon.addValueReference ~config ~refs ~file_deps ~binding:Location.none ~addFileReference:true ~locFrom:loc_from ~locTo:loc_to) - -(** Compute optional args state from calls and function references. - Returns a map from position to final OptionalArgs.t state. - Pure function - does not mutate declarations. *) -let compute_optional_args_state (t : t) ~decls ~is_live : OptionalArgsState.t = - let state = OptionalArgsState.create () in - (* Initialize state from declarations *) - let get_state pos = - match OptionalArgsState.find_opt state pos with - | Some s -> s - | None -> ( - match Declarations.find_opt decls pos with - | Some {declKind = Value {optionalArgs}} -> optionalArgs - | _ -> OptionalArgs.empty) - in - let set_state pos s = OptionalArgsState.set state pos s in - (* Process optional arg calls *) - t.optional_arg_calls - |> List.iter (fun {pos_from; pos_to; arg_names; arg_names_maybe} -> - if is_live pos_from then - let current = get_state pos_to in - let updated = - OptionalArgs.apply_call ~argNames:arg_names - ~argNamesMaybe:arg_names_maybe current - in - set_state pos_to updated); - (* Process function references *) - t.function_refs - |> List.iter (fun {pos_from; pos_to} -> - if is_live pos_from then - let state_from = get_state pos_from in - let state_to = get_state pos_to in - if not (OptionalArgs.isEmpty state_to) then ( - let updated_from, updated_to = - OptionalArgs.combine_pair state_from state_to - in - set_state pos_from updated_from; - set_state pos_to updated_to)); - state diff --git a/analysis/reanalyze/src/CrossFileItems.mli b/analysis/reanalyze/src/CrossFileItems.mli index f7517d9974..93141b1004 100644 --- a/analysis/reanalyze/src/CrossFileItems.mli +++ b/analysis/reanalyze/src/CrossFileItems.mli @@ -74,11 +74,6 @@ val process_exception_refs : (** {2 Optional Args State} *) -val compute_optional_args_state : - t -> - decls:Declarations.t -> - is_live:(Lexing.position -> bool) -> - OptionalArgsState.t (** Compute final optional args state from calls and function references, taking into account caller liveness via the [is_live] predicate. Pure function - does not mutate declarations. *) diff --git a/analysis/reanalyze/src/CrossFileItemsStore.ml b/analysis/reanalyze/src/CrossFileItemsStore.ml new file mode 100644 index 0000000000..33e5a756d6 --- /dev/null +++ b/analysis/reanalyze/src/CrossFileItemsStore.ml @@ -0,0 +1,68 @@ +(** Abstraction over cross-file items storage. + + Allows iteration over optional arg calls and function refs from either: + - [Frozen]: Collected [CrossFileItems.t] + - [Reactive]: Direct iteration over reactive collection (no intermediate allocation) *) + +type t = + | Frozen of CrossFileItems.t + | Reactive of (string, CrossFileItems.t) Reactive.t + +let of_frozen cfi = Frozen cfi + +let of_reactive reactive = Reactive reactive + +let iter_optional_arg_calls t f = + match t with + | Frozen cfi -> List.iter f cfi.CrossFileItems.optional_arg_calls + | Reactive r -> + Reactive.iter + (fun _path items -> List.iter f items.CrossFileItems.optional_arg_calls) + r + +let iter_function_refs t f = + match t with + | Frozen cfi -> List.iter f cfi.CrossFileItems.function_refs + | Reactive r -> + Reactive.iter + (fun _path items -> List.iter f items.CrossFileItems.function_refs) + r + +(** Compute optional args state from calls and function references. + Returns a map from position to final OptionalArgs.t state. + Pure function - does not mutate declarations. *) +let compute_optional_args_state (store : t) ~find_decl ~is_live : + OptionalArgsState.t = + let state = OptionalArgsState.create () in + (* Initialize state from declarations *) + let get_state pos = + match OptionalArgsState.find_opt state pos with + | Some s -> s + | None -> ( + match find_decl pos with + | Some {Decl.declKind = Value {optionalArgs}} -> optionalArgs + | _ -> OptionalArgs.empty) + in + let set_state pos s = OptionalArgsState.set state pos s in + (* Process optional arg calls *) + iter_optional_arg_calls store + (fun {CrossFileItems.pos_from; pos_to; arg_names; arg_names_maybe} -> + if is_live pos_from then + let current = get_state pos_to in + let updated = + OptionalArgs.apply_call ~argNames:arg_names + ~argNamesMaybe:arg_names_maybe current + in + set_state pos_to updated); + (* Process function references *) + iter_function_refs store (fun {CrossFileItems.pos_from; pos_to} -> + if is_live pos_from then + let state_from = get_state pos_from in + let state_to = get_state pos_to in + if not (OptionalArgs.isEmpty state_to) then ( + let updated_from, updated_to = + OptionalArgs.combine_pair state_from state_to + in + set_state pos_from updated_from; + set_state pos_to updated_to)); + state diff --git a/analysis/reanalyze/src/CrossFileItemsStore.mli b/analysis/reanalyze/src/CrossFileItemsStore.mli new file mode 100644 index 0000000000..98eda6d3d7 --- /dev/null +++ b/analysis/reanalyze/src/CrossFileItemsStore.mli @@ -0,0 +1,30 @@ +(** Abstraction over cross-file items storage. + + Allows iteration over optional arg calls and function refs from either: + - [Frozen]: Collected [CrossFileItems.t] + - [Reactive]: Direct iteration over reactive collection (no intermediate allocation) *) + +type t = + | Frozen of CrossFileItems.t + | Reactive of (string, CrossFileItems.t) Reactive.t + (** Cross-file items store with exposed constructors for pattern matching *) + +val of_frozen : CrossFileItems.t -> t +(** Wrap a frozen [CrossFileItems.t] *) + +val of_reactive : (string, CrossFileItems.t) Reactive.t -> t +(** Wrap reactive collection directly (no intermediate collection) *) + +val iter_optional_arg_calls : + t -> (CrossFileItems.optional_arg_call -> unit) -> unit +(** Iterate over all optional arg calls *) + +val iter_function_refs : t -> (CrossFileItems.function_ref -> unit) -> unit +(** Iterate over all function refs *) + +val compute_optional_args_state : + t -> + find_decl:(Lexing.position -> Decl.t option) -> + is_live:(Lexing.position -> bool) -> + OptionalArgsState.t +(** Compute optional args state from calls and function references *) diff --git a/analysis/reanalyze/src/DeadCommon.ml b/analysis/reanalyze/src/DeadCommon.ml index 9f3ad1f21a..a63c212c51 100644 --- a/analysis/reanalyze/src/DeadCommon.ml +++ b/analysis/reanalyze/src/DeadCommon.ml @@ -88,11 +88,6 @@ let addValueReference ~config ~refs ~file_deps ~(binding : Location.t) FileDeps.add_dep file_deps ~from_file:effectiveFrom.loc_start.pos_fname ~to_file:locTo.loc_start.pos_fname) -(* NOTE: iterFilesFromRootsToLeaves moved to FileDeps.iter_files_from_roots_to_leaves *) - -let iterFilesFromRootsToLeaves ~file_deps iterFun = - FileDeps.iter_files_from_roots_to_leaves file_deps iterFun - let addDeclaration_ ~config ~decls ~(file : FileContext.t) ?posEnd ?posStart ~declKind ~path ~(loc : Location.t) ?(posAdjustment = Decl.Nothing) ~moduleLoc (name : Name.t) = @@ -162,7 +157,7 @@ let isInsideReportedValue (ctx : ReportingContext.t) decl = (** Report a dead declaration. Returns list of issues (dead module first, then dead value). Caller is responsible for logging. *) -let reportDeclaration ~config ~refs (ctx : ReportingContext.t) decl : +let reportDeclaration ~config ~ref_store (ctx : ReportingContext.t) decl : Issue.t list = let insideReportedValue = decl |> isInsideReportedValue ctx in if not decl.report then [] @@ -197,7 +192,7 @@ let reportDeclaration ~config ~refs (ctx : ReportingContext.t) decl : (WarningDeadType, "is a variant case which is never constructed") in let hasRefBelow () = - let decl_refs = References.find_value_refs refs decl.pos in + let decl_refs = ReferenceStore.find_value_refs ref_store decl.pos in let refIsBelow (pos : Lexing.position) = decl.pos.pos_fname <> pos.pos_fname || decl.pos.pos_cnum < pos.pos_cnum @@ -227,20 +222,19 @@ let reportDeclaration ~config ~refs (ctx : ReportingContext.t) decl : | None -> [dead_value_issue] else [] -let declIsDead ~annotations ~refs decl = +let declIsDead ~ann_store ~refs decl = let liveRefs = refs |> PosSet.filter (fun p -> - not (FileAnnotations.is_annotated_dead annotations p)) + not (AnnotationStore.is_annotated_dead ann_store p)) in liveRefs |> PosSet.cardinal = 0 - && not - (FileAnnotations.is_annotated_gentype_or_live annotations decl.Decl.pos) + && not (AnnotationStore.is_annotated_gentype_or_live ann_store decl.Decl.pos) -let doReportDead ~annotations pos = - not (FileAnnotations.is_annotated_gentype_or_dead annotations pos) +let doReportDead ~ann_store pos = + not (AnnotationStore.is_annotated_gentype_or_dead ann_store pos) -let rec resolveRecursiveRefs ~all_refs ~annotations ~config ~decls +let rec resolveRecursiveRefs ~ref_store ~ann_store ~config ~decl_store ~checkOptionalArg: (checkOptionalArgFn : config:DceConfig.t -> Decl.t -> Issue.t list) ~deadDeclarations ~issues ~level ~orderedFiles ~refs ~refsBeingResolved decl @@ -275,7 +269,7 @@ let rec resolveRecursiveRefs ~all_refs ~annotations ~config ~decls (decl.path |> DcePath.toString); false) else - match Declarations.find_opt decls pos with + match DeclarationStore.find_opt decl_store pos with | None -> if Config.recursiveDebug then Log_.item "recursiveDebug can't find decl for %s@." @@ -284,20 +278,20 @@ let rec resolveRecursiveRefs ~all_refs ~annotations ~config ~decls | Some xDecl -> let xRefs = match xDecl.declKind |> Decl.Kind.isType with - | true -> References.find_type_refs all_refs pos - | false -> References.find_value_refs all_refs pos + | true -> ReferenceStore.find_type_refs ref_store pos + | false -> ReferenceStore.find_value_refs ref_store pos in let xDeclIsDead = xDecl - |> resolveRecursiveRefs ~all_refs ~annotations ~config ~decls - ~checkOptionalArg:checkOptionalArgFn ~deadDeclarations - ~issues ~level:(level + 1) ~orderedFiles ~refs:xRefs - ~refsBeingResolved + |> resolveRecursiveRefs ~ref_store ~ann_store ~config + ~decl_store ~checkOptionalArg:checkOptionalArgFn + ~deadDeclarations ~issues ~level:(level + 1) + ~orderedFiles ~refs:xRefs ~refsBeingResolved in if xDecl.resolvedDead = None then allDepsResolved := false; not xDeclIsDead) in - let isDead = decl |> declIsDead ~annotations ~refs:newRefs in + let isDead = decl |> declIsDead ~ann_store ~refs:newRefs in let isResolved = (not isDead) || !allDepsResolved || level = 0 in if isResolved then ( decl.resolvedDead <- Some isDead; @@ -306,7 +300,7 @@ let rec resolveRecursiveRefs ~all_refs ~annotations ~config ~decls |> DeadModules.markDead ~config ~isType:(decl.declKind |> Decl.Kind.isType) ~loc:decl.moduleLoc; - if not (doReportDead ~annotations decl.pos) then decl.report <- false; + if not (doReportDead ~ann_store decl.pos) then decl.report <- false; deadDeclarations := decl :: !deadDeclarations) else ( (* Collect optional args issues *) @@ -316,7 +310,7 @@ let rec resolveRecursiveRefs ~all_refs ~annotations ~config ~decls |> DeadModules.markLive ~config ~isType:(decl.declKind |> Decl.Kind.isType) ~loc:decl.moduleLoc; - if FileAnnotations.is_annotated_dead annotations decl.pos then ( + if AnnotationStore.is_annotated_dead ann_store decl.pos then ( (* Collect incorrect @dead annotation issue *) let issue = makeDeadIssue ~decl ~message:" is annotated @dead but is live" @@ -342,22 +336,23 @@ let rec resolveRecursiveRefs ~all_refs ~annotations ~config ~decls refsString level); isDead -let solveDead ~annotations ~config ~decls ~refs ~file_deps ~optional_args_state +let solveDead ~ann_store ~config ~decl_store ~ref_store ~file_deps_store + ~optional_args_state ~checkOptionalArg: (checkOptionalArgFn : optional_args_state:OptionalArgsState.t -> - annotations:FileAnnotations.t -> + ann_store:AnnotationStore.t -> config:DceConfig.t -> Decl.t -> Issue.t list) : AnalysisResult.t = let iterDeclInOrder ~deadDeclarations ~issues ~orderedFiles decl = let decl_refs = match decl |> Decl.isValue with - | true -> References.find_value_refs refs decl.pos - | false -> References.find_type_refs refs decl.pos + | true -> ReferenceStore.find_value_refs ref_store decl.pos + | false -> ReferenceStore.find_type_refs ref_store decl.pos in - resolveRecursiveRefs ~all_refs:refs ~annotations ~config ~decls - ~checkOptionalArg:(checkOptionalArgFn ~optional_args_state ~annotations) + resolveRecursiveRefs ~ref_store ~ann_store ~config ~decl_store + ~checkOptionalArg:(checkOptionalArgFn ~optional_args_state ~ann_store) ~deadDeclarations ~issues ~level:0 ~orderedFiles ~refsBeingResolved:(ref PosSet.empty) ~refs:decl_refs decl |> ignore @@ -365,7 +360,7 @@ let solveDead ~annotations ~config ~decls ~refs ~file_deps ~optional_args_state if config.DceConfig.cli.debug then ( Log_.item "@.File References@.@."; let fileList = ref [] in - FileDeps.iter_deps file_deps (fun file files -> + FileDepsStore.iter_deps file_deps_store (fun file files -> fileList := (file, files) :: !fileList); !fileList |> List.sort (fun (f1, _) (f2, _) -> String.compare f1 f2) @@ -375,12 +370,12 @@ let solveDead ~annotations ~config ~decls ~refs ~file_deps ~optional_args_state (files |> FileSet.elements |> List.map Filename.basename |> String.concat ", "))); let declarations = - Declarations.fold + DeclarationStore.fold (fun _pos decl declarations -> decl :: declarations) - decls [] + decl_store [] in let orderedFiles = Hashtbl.create 256 in - iterFilesFromRootsToLeaves ~file_deps + FileDepsStore.iter_files_from_roots_to_leaves file_deps_store (let current = ref 0 in fun fileName -> incr current; @@ -402,7 +397,7 @@ let solveDead ~annotations ~config ~decls ~refs ~file_deps ~optional_args_state let dead_issues = sortedDeadDeclarations |> List.concat_map (fun decl -> - reportDeclaration ~config ~refs reporting_ctx decl) + reportDeclaration ~config ~ref_store reporting_ctx decl) in (* Combine all issues: inline issues first (they were logged during analysis), then dead declaration issues *) diff --git a/analysis/reanalyze/src/DeadOptionalArgs.ml b/analysis/reanalyze/src/DeadOptionalArgs.ml index c7fcc93b8e..71bef0ac99 100644 --- a/analysis/reanalyze/src/DeadOptionalArgs.ml +++ b/analysis/reanalyze/src/DeadOptionalArgs.ml @@ -59,12 +59,12 @@ let addReferences ~config ~cross_file ~(locFrom : Location.t) (** Check for optional args issues. Returns issues instead of logging. Uses optional_args_state map for final computed state. *) -let check ~optional_args_state ~annotations ~config:_ decl : Issue.t list = +let check ~optional_args_state ~ann_store ~config:_ decl : Issue.t list = match decl with | {Decl.declKind = Value {optionalArgs}} when active () && not - (FileAnnotations.is_annotated_gentype_or_live annotations decl.pos) + (AnnotationStore.is_annotated_gentype_or_live ann_store decl.pos) -> (* Look up computed state from map, fall back to declaration's initial state *) let state = diff --git a/analysis/reanalyze/src/DeclarationStore.ml b/analysis/reanalyze/src/DeclarationStore.ml new file mode 100644 index 0000000000..7b0043c541 --- /dev/null +++ b/analysis/reanalyze/src/DeclarationStore.ml @@ -0,0 +1,33 @@ +(** Abstraction over declaration storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [Declarations.t] (copied from reactive) + - [Reactive]: Direct [Reactive.t] (no copy, zero-cost on warm runs) + + This eliminates the O(N) freeze step when using reactive mode. *) + +type t = + | Frozen of Declarations.t + | Reactive of (Lexing.position, Decl.t) Reactive.t + +let of_frozen decls = Frozen decls + +let of_reactive reactive = Reactive reactive + +let find_opt t pos = + match t with + | Frozen decls -> Declarations.find_opt decls pos + | Reactive reactive -> Reactive.get reactive pos + +let fold f t init = + match t with + | Frozen decls -> Declarations.fold f decls init + | Reactive reactive -> + let acc = ref init in + Reactive.iter (fun pos decl -> acc := f pos decl !acc) reactive; + !acc + +let iter f t = + match t with + | Frozen decls -> Declarations.iter f decls + | Reactive reactive -> Reactive.iter f reactive diff --git a/analysis/reanalyze/src/DeclarationStore.mli b/analysis/reanalyze/src/DeclarationStore.mli new file mode 100644 index 0000000000..c50583aca1 --- /dev/null +++ b/analysis/reanalyze/src/DeclarationStore.mli @@ -0,0 +1,27 @@ +(** Abstraction over declaration storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [Declarations.t] (copied from reactive) + - [Reactive]: Direct [Reactive.t] (no copy, zero-cost on warm runs) + + This eliminates the O(N) freeze step when using reactive mode. *) + +type t = + | Frozen of Declarations.t + | Reactive of (Lexing.position, Decl.t) Reactive.t + (** Declaration store - either frozen or reactive *) + +val of_frozen : Declarations.t -> t +(** Wrap a frozen [Declarations.t] *) + +val of_reactive : (Lexing.position, Decl.t) Reactive.t -> t +(** Wrap a reactive collection directly (no copy) *) + +val find_opt : t -> Lexing.position -> Decl.t option +(** Look up a declaration by position *) + +val fold : (Lexing.position -> Decl.t -> 'a -> 'a) -> t -> 'a -> 'a +(** Fold over all declarations *) + +val iter : (Lexing.position -> Decl.t -> unit) -> t -> unit +(** Iterate over all declarations *) diff --git a/analysis/reanalyze/src/FileDepsStore.ml b/analysis/reanalyze/src/FileDepsStore.ml new file mode 100644 index 0000000000..5c16bbacde --- /dev/null +++ b/analysis/reanalyze/src/FileDepsStore.ml @@ -0,0 +1,46 @@ +(** Abstraction over file dependency storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [FileDeps.t] (copied from reactive) + - [Reactive]: Direct reactive collections (no copy, zero-cost on warm runs) *) + +type t = + | Frozen of FileDeps.t + | Reactive of { + files: (string, unit) Reactive.t; + deps: (string, FileSet.t) Reactive.t; + } + +let of_frozen fd = Frozen fd + +let of_reactive ~files ~deps = Reactive {files; deps} + +let get_deps t file = + match t with + | Frozen fd -> FileDeps.get_deps fd file + | Reactive r -> ( + match Reactive.get r.deps file with + | Some s -> s + | None -> FileSet.empty) + +let iter_deps t f = + match t with + | Frozen fd -> FileDeps.iter_deps fd f + | Reactive r -> Reactive.iter f r.deps + +(** Topological iteration from roots to leaves. + Works for both frozen and reactive - builds temporary structures as needed. *) +let iter_files_from_roots_to_leaves t iterFun = + match t with + | Frozen fd -> FileDeps.iter_files_from_roots_to_leaves fd iterFun + | Reactive r -> + (* Build temporary FileDeps.t from reactive collections for topo sort *) + let files = ref FileSet.empty in + Reactive.iter (fun f () -> files := FileSet.add f !files) r.files; + let deps = FileDeps.FileHash.create 256 in + Reactive.iter + (fun from_file to_files -> + FileDeps.FileHash.replace deps from_file to_files) + r.deps; + let fd = FileDeps.create ~files:!files ~deps in + FileDeps.iter_files_from_roots_to_leaves fd iterFun diff --git a/analysis/reanalyze/src/FileDepsStore.mli b/analysis/reanalyze/src/FileDepsStore.mli new file mode 100644 index 0000000000..93983030a0 --- /dev/null +++ b/analysis/reanalyze/src/FileDepsStore.mli @@ -0,0 +1,28 @@ +(** Abstraction over file dependency storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [FileDeps.t] (copied from reactive) + - [Reactive]: Direct reactive collections (no copy, zero-cost on warm runs) *) + +type t = + | Frozen of FileDeps.t + | Reactive of { + files: (string, unit) Reactive.t; + deps: (string, FileSet.t) Reactive.t; + } (** File deps store with exposed constructors for pattern matching *) + +val of_frozen : FileDeps.t -> t +(** Wrap a frozen [FileDeps.t] *) + +val of_reactive : + files:(string, unit) Reactive.t -> deps:(string, FileSet.t) Reactive.t -> t +(** Wrap reactive collections directly *) + +val get_deps : t -> string -> FileSet.t +(** Get dependencies for a file *) + +val iter_deps : t -> (string -> FileSet.t -> unit) -> unit +(** Iterate over all dependencies *) + +val iter_files_from_roots_to_leaves : t -> (string -> unit) -> unit +(** Iterate files in topological order (roots first) *) diff --git a/analysis/reanalyze/src/Reanalyze.ml b/analysis/reanalyze/src/Reanalyze.ml index 927f264eed..5c6d965c39 100644 --- a/analysis/reanalyze/src/Reanalyze.ml +++ b/analysis/reanalyze/src/Reanalyze.ml @@ -268,38 +268,71 @@ let runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge = let analysis_result = if dce_config.DceConfig.run.dce then (* Merging phase: combine all builders -> immutable data *) - let annotations, decls, cross_file, refs, file_deps = + let ann_store, decl_store, cross_file_store, ref_store, file_deps_store = Timing.time_phase `Merging (fun () -> (* Use reactive merge if available, otherwise list-based merge *) - let annotations, decls, cross_file = + let ann_store, decl_store, cross_file_store = match reactive_merge with | Some merged -> - ( ReactiveMerge.freeze_annotations merged, - ReactiveMerge.freeze_decls merged, - ReactiveMerge.collect_cross_file_items merged ) + (* Reactive mode: use stores directly, skip freeze! *) + ( AnnotationStore.of_reactive merged.ReactiveMerge.annotations, + DeclarationStore.of_reactive merged.ReactiveMerge.decls, + CrossFileItemsStore.of_reactive + merged.ReactiveMerge.cross_file_items ) | None -> - ( FileAnnotations.merge_all - (dce_data_list - |> List.map (fun fd -> fd.DceFileProcessing.annotations)), + (* Non-reactive mode: freeze into data, wrap in store *) + let decls = Declarations.merge_all (dce_data_list - |> List.map (fun fd -> fd.DceFileProcessing.decls)), - CrossFileItems.merge_all - (dce_data_list - |> List.map (fun fd -> fd.DceFileProcessing.cross_file)) ) + |> List.map (fun fd -> fd.DceFileProcessing.decls)) + in + ( AnnotationStore.of_frozen + (FileAnnotations.merge_all + (dce_data_list + |> List.map (fun fd -> fd.DceFileProcessing.annotations) + )), + DeclarationStore.of_frozen decls, + CrossFileItemsStore.of_frozen + (CrossFileItems.merge_all + (dce_data_list + |> List.map (fun fd -> fd.DceFileProcessing.cross_file))) + ) in (* Compute refs and file_deps. - In reactive mode, ReactiveMerge handles type deps and exception refs. + In reactive mode, use stores directly (skip freeze!). In non-reactive mode, use the imperative processing. *) - let refs, file_deps = + let ref_store, file_deps_store = match reactive_merge with | Some merged -> - (* Reactive mode: freeze_refs includes type deps and exception refs *) - let refs = ReactiveMerge.freeze_refs merged in - let file_deps = ReactiveMerge.freeze_file_deps merged in - (refs, file_deps) + (* Reactive mode: use stores directly, skip freeze! *) + let ref_store = + ReferenceStore.of_reactive ~value_refs:merged.value_refs + ~type_refs:merged.type_refs ~type_deps:merged.type_deps + ~exception_refs:merged.exception_refs + in + let file_deps_store = + FileDepsStore.of_reactive ~files:merged.files + ~deps:merged.file_deps_map + in + (ref_store, file_deps_store) | None -> (* Non-reactive mode: build refs/file_deps imperatively *) + (* Need Declarations.t for type deps processing *) + let decls = + match decl_store with + | DeclarationStore.Frozen d -> d + | DeclarationStore.Reactive _ -> + failwith + "unreachable: non-reactive path with reactive store" + in + (* Need CrossFileItems.t for exception refs processing *) + let cross_file = + match cross_file_store with + | CrossFileItemsStore.Frozen cfi -> cfi + | CrossFileItemsStore.Reactive _ -> + failwith + "unreachable: non-reactive path with reactive store" + in let refs_builder = References.create_builder () in let file_deps_builder = FileDeps.create_builder () in (match reactive_collection with @@ -331,41 +364,44 @@ let runAnalysis ~dce_config ~cmtRoot ~reactive_collection ~reactive_merge = (* Freeze refs and file_deps for solver *) let refs = References.freeze_builder refs_builder in let file_deps = FileDeps.freeze_builder file_deps_builder in - (refs, file_deps) + ( ReferenceStore.of_frozen refs, + FileDepsStore.of_frozen file_deps ) in - (annotations, decls, cross_file, refs, file_deps)) + (ann_store, decl_store, cross_file_store, ref_store, file_deps_store)) in (* Solving phase: run the solver and collect issues *) Timing.time_phase `Solving (fun () -> let empty_optional_args_state = OptionalArgsState.create () in let analysis_result_core = - DeadCommon.solveDead ~annotations ~decls ~refs ~file_deps - ~optional_args_state:empty_optional_args_state ~config:dce_config + DeadCommon.solveDead ~ann_store ~decl_store ~ref_store + ~file_deps_store ~optional_args_state:empty_optional_args_state + ~config:dce_config ~checkOptionalArg:(fun - ~optional_args_state:_ ~annotations:_ ~config:_ _ -> []) + ~optional_args_state:_ ~ann_store:_ ~config:_ _ -> []) in (* Compute liveness-aware optional args state *) let is_live pos = - match Declarations.find_opt decls pos with + match DeclarationStore.find_opt decl_store pos with | Some decl -> Decl.isLive decl | None -> true in let optional_args_state = - CrossFileItems.compute_optional_args_state cross_file ~decls + CrossFileItemsStore.compute_optional_args_state cross_file_store + ~find_decl:(DeclarationStore.find_opt decl_store) ~is_live in (* Collect optional args issues only for live declarations *) let optional_args_issues = - Declarations.fold + DeclarationStore.fold (fun _pos decl acc -> if Decl.isLive decl then let issues = - DeadOptionalArgs.check ~optional_args_state ~annotations + DeadOptionalArgs.check ~optional_args_state ~ann_store ~config:dce_config decl in List.rev_append issues acc else acc) - decls [] + decl_store [] |> List.rev in Some diff --git a/analysis/reanalyze/src/ReferenceStore.ml b/analysis/reanalyze/src/ReferenceStore.ml new file mode 100644 index 0000000000..1cff4a1918 --- /dev/null +++ b/analysis/reanalyze/src/ReferenceStore.ml @@ -0,0 +1,68 @@ +(** Abstraction over reference storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [References.t] (copied from reactive) + - [Reactive]: Direct reactive collections (no copy, zero-cost on warm runs) + + This eliminates the O(N) freeze step when using reactive mode. *) + +type t = + | Frozen of References.t + | Reactive of { + value_refs: (Lexing.position, PosSet.t) Reactive.t; + type_refs: (Lexing.position, PosSet.t) Reactive.t; + (* Type deps sources *) + same_path_refs: (Lexing.position, PosSet.t) Reactive.t; + cross_file_refs: (Lexing.position, PosSet.t) Reactive.t; + impl_to_intf_refs_path2: (Lexing.position, PosSet.t) Reactive.t; + intf_to_impl_refs: (Lexing.position, PosSet.t) Reactive.t; + (* Exception refs source *) + exception_resolved_refs: (Lexing.position, PosSet.t) Reactive.t; + } + +let of_frozen refs = Frozen refs + +let of_reactive ~value_refs ~type_refs ~type_deps ~exception_refs = + Reactive + { + value_refs; + type_refs; + same_path_refs = type_deps.ReactiveTypeDeps.same_path_refs; + cross_file_refs = type_deps.ReactiveTypeDeps.cross_file_refs; + impl_to_intf_refs_path2 = + type_deps.ReactiveTypeDeps.impl_to_intf_refs_path2; + intf_to_impl_refs = type_deps.ReactiveTypeDeps.intf_to_impl_refs; + exception_resolved_refs = + exception_refs.ReactiveExceptionRefs.resolved_refs; + } + +(** Helper to get from reactive and default to empty *) +let get_or_empty reactive pos = + match Reactive.get reactive pos with + | Some s -> s + | None -> PosSet.empty + +let find_value_refs t pos = + match t with + | Frozen refs -> References.find_value_refs refs pos + | Reactive r -> + (* Combine: per-file value_refs + exception resolved_refs *) + let from_file = get_or_empty r.value_refs pos in + let from_exceptions = get_or_empty r.exception_resolved_refs pos in + PosSet.union from_file from_exceptions + +let find_type_refs t pos = + match t with + | Frozen refs -> References.find_type_refs refs pos + | Reactive r -> + (* Combine: per-file type_refs + all type_deps sources *) + let from_file = get_or_empty r.type_refs pos in + let from_same_path = get_or_empty r.same_path_refs pos in + let from_cross_file = get_or_empty r.cross_file_refs pos in + let from_impl_intf2 = get_or_empty r.impl_to_intf_refs_path2 pos in + let from_intf_impl = get_or_empty r.intf_to_impl_refs pos in + from_file + |> PosSet.union from_same_path + |> PosSet.union from_cross_file + |> PosSet.union from_impl_intf2 + |> PosSet.union from_intf_impl diff --git a/analysis/reanalyze/src/ReferenceStore.mli b/analysis/reanalyze/src/ReferenceStore.mli new file mode 100644 index 0000000000..a0e88b9fb8 --- /dev/null +++ b/analysis/reanalyze/src/ReferenceStore.mli @@ -0,0 +1,27 @@ +(** Abstraction over reference storage. + + Allows the solver to work with either: + - [Frozen]: Traditional [References.t] (copied from reactive) + - [Reactive]: Direct reactive collections (no copy, zero-cost on warm runs) + + This eliminates the O(N) freeze step when using reactive mode. *) + +type t +(** Abstract reference store *) + +val of_frozen : References.t -> t +(** Wrap a frozen [References.t] *) + +val of_reactive : + value_refs:(Lexing.position, PosSet.t) Reactive.t -> + type_refs:(Lexing.position, PosSet.t) Reactive.t -> + type_deps:ReactiveTypeDeps.t -> + exception_refs:ReactiveExceptionRefs.t -> + t +(** Wrap reactive collections directly (no copy) *) + +val find_value_refs : t -> Lexing.position -> PosSet.t +(** Find value references to a position *) + +val find_type_refs : t -> Lexing.position -> PosSet.t +(** Find type references to a position *)