From 09959152150971b3d575b0c7e95daa7c910c2c93 Mon Sep 17 00:00:00 2001 From: Shinpei Hayashi Date: Tue, 24 Mar 2026 22:01:16 +0900 Subject: [PATCH 1/4] fix: use ConcurrentHashMap, do shutdown --- .../ac/titech/c/se/stein/rewriter/RepositoryRewriter.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java index 1bd6e72..368c0c7 100644 --- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java +++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java @@ -68,7 +68,7 @@ private static Map createEntryMapping(long memoryBudget) { /** * Root tree-to-tree mapping. */ - protected Map rootTreeMapping = new HashMap<>(); + protected Map rootTreeMapping = new ConcurrentHashMap<>(); /** * Commit-to-commit mapping. @@ -227,10 +227,11 @@ protected void rewriteRootTrees(final RevWalk walk, final Context c) { } Try.io(walk::memoReset); + final ForkJoinPool pool = new ForkJoinPool(config.nthreads); try (walk) { final int characteristics = Spliterator.DISTINCT | Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.SIZED; final Spliterator split = Spliterators.spliterator(walk.iterator(), count, characteristics); - new ForkJoinPool(config.nthreads).submit(() -> { + pool.submit(() -> { final Stream stream = StreamSupport.stream(split, true); stream.forEach(commit -> { final long id = Thread.currentThread().getId(); @@ -239,6 +240,8 @@ protected void rewriteRootTrees(final RevWalk walk, final Context c) { rewriteRootTree(commit.getTree().getId(), uuc); }); }).join(); + } finally { + pool.shutdown(); } // finalize From 3d563ad615b9b1581346a5b339f925a8bde7170a Mon Sep 17 00:00:00 2001 From: Shinpei Hayashi Date: Wed, 25 Mar 2026 01:45:33 +0900 Subject: [PATCH 2/4] fix: avoid closing RevWalk multiple times --- .../c/se/stein/rewriter/RepositoryRewriter.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java index 368c0c7..3965449 100644 --- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java +++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java @@ -156,8 +156,7 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo) public void rewrite(final Context c) { setUp(c); - try { - final RevWalk walk = prepareRevisionWalk(c); + try (final RevWalk walk = prepareRevisionWalk(c)) { if (config.nthreads >= 2) { log.debug("Parallel rewriting"); rewriteRootTrees(walk, c); @@ -219,16 +218,15 @@ protected void rewriteCommits(final RevWalk walk, final Context c) { protected void rewriteRootTrees(final RevWalk walk, final Context c) { final Map cxts = new ConcurrentHashMap<>(); + // Count commits without closing the walk long count = 0; - try (walk) { - for (final RevCommit commit : walk) { - count++; - } + for (final RevCommit ignored : walk) { + count++; } Try.io(walk::memoReset); final ForkJoinPool pool = new ForkJoinPool(config.nthreads); - try (walk) { + try { final int characteristics = Spliterator.DISTINCT | Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.SIZED; final Spliterator split = Spliterators.spliterator(walk.iterator(), count, characteristics); pool.submit(() -> { From fec5f922340e428f970abd6595f39556bfb08a04 Mon Sep 17 00:00:00 2001 From: Shinpei Hayashi Date: Wed, 25 Mar 2026 10:35:05 +0900 Subject: [PATCH 3/4] use `nthreads` threads instead of `nthreads - 1` --- README.md | 17 ++++++++++++++++- .../jp/ac/titech/c/se/stein/Application.java | 3 +-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 59e323a..6b82da0 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ public class MyTranslator implements BlobTranslator { - `-d`, `--duplicate`: Duplicate the source repository and overwrites it. **Requires `-o`**. - `--clean`: Delete the target repository before applying the transformation if it exists. **Requires `-o`**. - `--bare`: Treat that the specified repositories are bare. -- `-j`, `--jobs=`: Rewrites trees in parallel using `` threads. If the number of threads is omitted (just `-j` is given), _total number of processors - 1_ is used. +- `-j`, `--jobs=`: Rewrites trees in parallel using `` threads (see [Parallel Rewriting](#parallel-rewriting)). If the number of threads is omitted (just `-j` is given), the number of available processors is used. - `-n`, `--dry-run`: Do not actually modify the target repository. - `--stream-size-limit={,K,M,G}`: increase the stream size limit. - `--no-notes`: Stop noting the source commit ID to the commits in the target repository (see [Notes](#notes)). @@ -261,6 +261,21 @@ A no-op rewriter that copies all objects without transformation. Useful for verifying that the rewriting pipeline preserves repository content. +## Parallel Rewriting + +With `-j`, git-stein rewrites trees in parallel using multiple threads. +The rewriting is done in two passes over the commit history: + +1. **Tree rewriting pass** (parallel): all root trees are rewritten in parallel using a `ForkJoinPool`. +The commit list is split into contiguous chunks, and each chunk is processed by a worker thread. +Consecutive commits within a chunk share many tree entries, so the entry mapping cache is effective within each chunk. +2. **Commit writing pass** (sequential): commits are written in topological order. +Since each commit depends on its parent's ID, this pass must be sequential. +The tree rewriting results are looked up from the first pass. + +The number of threads can be specified explicitly (e.g., `-j4`) or left to default (`-j` alone uses all available processors). + + ## Chaining Commands Multiple commands can be listed on a single command line. diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java index 7ff8e1e..75fc33b 100644 --- a/src/main/java/jp/ac/titech/c/se/stein/Application.java +++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java @@ -77,8 +77,7 @@ public static class OutputOptions { void setNumberOfThreads(final int nthreads) { this.nthreads = nthreads; if (nthreads == 0) { - final int nprocs = Runtime.getRuntime().availableProcessors(); - this.nthreads = nprocs > 1 ? nprocs - 1 : 1; + this.nthreads = Runtime.getRuntime().availableProcessors(); } } public int nthreads = 1; From 2efa2afc7409563cbc048515989c2642fdcd8488 Mon Sep 17 00:00:00 2001 From: Shinpei Hayashi Date: Wed, 25 Mar 2026 10:35:12 +0900 Subject: [PATCH 4/4] more logging --- .../jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java index 3965449..fc58eb0 100644 --- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java +++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java @@ -158,7 +158,6 @@ public void rewrite(final Context c) { setUp(c); try (final RevWalk walk = prepareRevisionWalk(c)) { if (config.nthreads >= 2) { - log.debug("Parallel rewriting"); rewriteRootTrees(walk, c); Try.io(walk::memoReset); } @@ -187,6 +186,7 @@ public void rewrite(final Context c) { blobHit, blobTotal, String.format("%.1f", blobTotal > 0 ? blobHit * 100.0 / blobTotal : 0), treeHit, treeTotal, String.format("%.1f", treeTotal > 0 ? treeHit * 100.0 / treeTotal : 0), hits, total, String.format("%.1f", hits * 100.0 / total)); + log.info("Entry mapping size: {}, root tree mapping size: {}", entryMapping.size(), rootTreeMapping.size()); } if (entryCache != null) { entryCache.close(); @@ -224,6 +224,7 @@ protected void rewriteRootTrees(final RevWalk walk, final Context c) { count++; } Try.io(walk::memoReset); + log.info("Parallel rewriting: {} commits with {} threads", count, config.nthreads); final ForkJoinPool pool = new ForkJoinPool(config.nthreads); try { @@ -239,6 +240,7 @@ protected void rewriteRootTrees(final RevWalk walk, final Context c) { }); }).join(); } finally { + log.debug("Pool stats: steal={}, threads={}", pool.getStealCount(), cxts.size()); pool.shutdown(); }