Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public class MyTranslator implements BlobTranslator {
- `-d`, `--duplicate`: Duplicate the source repository and overwrites it. **Requires `-o`**.
- `--clean`: Delete the target repository before applying the transformation if it exists. **Requires `-o`**.
- `--bare`: Treat that the specified repositories are bare.
- `-j`, `--jobs=<nthreads>`: Rewrites trees in parallel using `<nthreads>` threads. If the number of threads is omitted (just `-j` is given), _total number of processors - 1_ is used.
- `-j`, `--jobs=<nthreads>`: Rewrites trees in parallel using `<nthreads>` threads (see [Parallel Rewriting](#parallel-rewriting)). If the number of threads is omitted (just `-j` is given), the number of available processors is used.
- `-n`, `--dry-run`: Do not actually modify the target repository.
- `--stream-size-limit=<num>{,K,M,G}`: increase the stream size limit.
- `--no-notes`: Stop noting the source commit ID to the commits in the target repository (see [Notes](#notes)).
Expand Down Expand Up @@ -261,6 +261,21 @@ A no-op rewriter that copies all objects without transformation.
Useful for verifying that the rewriting pipeline preserves repository content.


## Parallel Rewriting

With `-j`, git-stein rewrites trees in parallel using multiple threads.
The rewriting is done in two passes over the commit history:

1. **Tree rewriting pass** (parallel): all root trees are rewritten in parallel using a `ForkJoinPool`.
The commit list is split into contiguous chunks, and each chunk is processed by a worker thread.
Consecutive commits within a chunk share many tree entries, so the entry mapping cache is effective within each chunk.
2. **Commit writing pass** (sequential): commits are written in topological order.
Since each commit depends on its parent's ID, this pass must be sequential.
The tree rewriting results are looked up from the first pass.

The number of threads can be specified explicitly (e.g., `-j4`) or left to default (`-j` alone uses all available processors).


## Chaining Commands

Multiple commands can be listed on a single command line.
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/jp/ac/titech/c/se/stein/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ public static class OutputOptions {
void setNumberOfThreads(final int nthreads) {
this.nthreads = nthreads;
if (nthreads == 0) {
final int nprocs = Runtime.getRuntime().availableProcessors();
this.nthreads = nprocs > 1 ? nprocs - 1 : 1;
this.nthreads = Runtime.getRuntime().availableProcessors();
}
}
public int nthreads = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ private static Map<Entry, AnyColdEntry> createEntryMapping(long memoryBudget) {
/**
* Root tree-to-tree mapping.
*/
protected Map<ObjectId, ObjectId> rootTreeMapping = new HashMap<>();
protected Map<ObjectId, ObjectId> rootTreeMapping = new ConcurrentHashMap<>();

/**
* Commit-to-commit mapping.
Expand Down Expand Up @@ -156,10 +156,8 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)

public void rewrite(final Context c) {
setUp(c);
try {
final RevWalk walk = prepareRevisionWalk(c);
try (final RevWalk walk = prepareRevisionWalk(c)) {
if (config.nthreads >= 2) {
log.debug("Parallel rewriting");
rewriteRootTrees(walk, c);
Try.io(walk::memoReset);
}
Expand Down Expand Up @@ -188,6 +186,7 @@ public void rewrite(final Context c) {
blobHit, blobTotal, String.format("%.1f", blobTotal > 0 ? blobHit * 100.0 / blobTotal : 0),
treeHit, treeTotal, String.format("%.1f", treeTotal > 0 ? treeHit * 100.0 / treeTotal : 0),
hits, total, String.format("%.1f", hits * 100.0 / total));
log.info("Entry mapping size: {}, root tree mapping size: {}", entryMapping.size(), rootTreeMapping.size());
}
if (entryCache != null) {
entryCache.close();
Expand Down Expand Up @@ -219,18 +218,19 @@ protected void rewriteCommits(final RevWalk walk, final Context c) {
protected void rewriteRootTrees(final RevWalk walk, final Context c) {
final Map<Long, Context> cxts = new ConcurrentHashMap<>();

// Count commits without closing the walk
long count = 0;
try (walk) {
for (final RevCommit commit : walk) {
count++;
}
for (final RevCommit ignored : walk) {
count++;
}
Try.io(walk::memoReset);
log.info("Parallel rewriting: {} commits with {} threads", count, config.nthreads);

try (walk) {
final ForkJoinPool pool = new ForkJoinPool(config.nthreads);
try {
final int characteristics = Spliterator.DISTINCT | Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.SIZED;
final Spliterator<RevCommit> split = Spliterators.spliterator(walk.iterator(), count, characteristics);
new ForkJoinPool(config.nthreads).submit(() -> {
pool.submit(() -> {
final Stream<RevCommit> stream = StreamSupport.stream(split, true);
stream.forEach(commit -> {
final long id = Thread.currentThread().getId();
Expand All @@ -239,6 +239,9 @@ protected void rewriteRootTrees(final RevWalk walk, final Context c) {
rewriteRootTree(commit.getTree().getId(), uuc);
});
}).join();
} finally {
log.debug("Pool stats: steal={}, threads={}", pool.getStealCount(), cxts.size());
pool.shutdown();
}

// finalize
Expand Down
Loading