From f505bc2e44886404f07cdcb7e3844a5609b2f818 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 19:15:37 +0900
Subject: [PATCH 01/28] benchmark scripts for incremental transformation
---
scripts/bench-incremental.sh | 107 +++++++++++++++++++++++++++++++++++
scripts/setup-incremental.sh | 91 +++++++++++++++++++++++++++++
2 files changed, 198 insertions(+)
create mode 100755 scripts/bench-incremental.sh
create mode 100755 scripts/setup-incremental.sh
diff --git a/scripts/bench-incremental.sh b/scripts/bench-incremental.sh
new file mode 100755
index 0000000..035830b
--- /dev/null
+++ b/scripts/bench-incremental.sh
@@ -0,0 +1,107 @@
+#!/bin/sh
+# Run incremental transformation benchmarks.
+# Usage: ./bench-incremental.sh [cache-opts...]
+#
+# Example:
+# ./bench-incremental.sh ./build/libs/git-stein-all.jar ./work @historage-jdt
+# ./bench-incremental.sh ./build/libs/git-stein-all.jar ./work @historage-jdt --cache commit,blob
+#
+# Runs two experiments:
+# A) Incremental over splits (1 -> 2 -> ... -> N)
+# B) Independent deltas from base (base+10, base+20, ...)
+set -eu
+
+JAR="${1:?Usage: bench-incremental.sh [cache-opts...]}"
+WORK_DIR="${2:?}"
+COMMAND="${3:?}"
+shift 3
+CACHE_OPTS="$*"
+
+RESULTS_DIR="$WORK_DIR/results"
+mkdir -p "$RESULTS_DIR"
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+LABEL=$(echo "$CACHE_OPTS" | tr ' ' '_')
+[ -z "$LABEL" ] && LABEL="none"
+
+TIME=/usr/bin/time
+
+run_stein() {
+ java -Xmx1g -jar "$JAR" --bare --log=WARN $CACHE_OPTS -o "$2" "$1" "$COMMAND"
+}
+
+# Capture wall-clock seconds from "time -p"
+time_run_stein() {
+ $TIME -p sh -c "run_stein='java -Xmx1g -jar $JAR --bare --log=WARN $CACHE_OPTS -o $2 $1 $COMMAND'; eval \"\$run_stein\"" 2>&1 | grep '^real ' | awk '{print $2}'
+}
+
+# ============================================================
+# Experiment A: incremental over splits
+# ============================================================
+echo "=== Experiment A: Incremental splits (cache: ${CACHE_OPTS:-none}) ==="
+RESULT_A="$RESULTS_DIR/${TIMESTAMP}_splits_${LABEL}.csv"
+echo "step,commits,time_seconds" > "$RESULT_A"
+
+SPLITS_DIR="$WORK_DIR/splits"
+DEST_A="$WORK_DIR/dest_splits_${LABEL}"
+rm -rf "$DEST_A"
+
+SPLITS=$(ls -1d "$SPLITS_DIR"/[0-9]* 2>/dev/null | wc -l | tr -d ' ')
+
+for i in $(seq 1 "$SPLITS"); do
+ SOURCE="$SPLITS_DIR/$i"
+ [ -d "$SOURCE" ] || continue
+ NCOMMITS=$(git -C "$SOURCE" rev-list --all 2>/dev/null | wc -l | tr -d ' ')
+ printf " Split %d/%d (%d commits) ... " "$i" "$SPLITS" "$NCOMMITS"
+
+ ELAPSED=$(time_run_stein "$SOURCE" "$DEST_A")
+
+ echo "${ELAPSED}s"
+ echo "$i,$NCOMMITS,$ELAPSED" >> "$RESULT_A"
+done
+echo "Results: $RESULT_A"
+rm -rf "$DEST_A"
+
+# ============================================================
+# Experiment B: independent deltas from base
+# ============================================================
+echo ""
+echo "=== Experiment B: Deltas from base (cache: ${CACHE_OPTS:-none}) ==="
+RESULT_B="$RESULTS_DIR/${TIMESTAMP}_deltas_${LABEL}.csv"
+echo "delta,commits,time_seconds" > "$RESULT_B"
+
+DELTAS_DIR="$WORK_DIR/deltas"
+BASE_SOURCE="$DELTAS_DIR/base"
+
+# First, create the base destination
+DEST_BASE="$WORK_DIR/dest_deltas_base_${LABEL}"
+rm -rf "$DEST_BASE"
+printf " Building base ... "
+BASE_TIME=$(time_run_stein "$BASE_SOURCE" "$DEST_BASE")
+BASE_COMMITS=$(git -C "$BASE_SOURCE" rev-list --all 2>/dev/null | wc -l | tr -d ' ')
+echo "$BASE_COMMITS commits, ${BASE_TIME}s"
+echo "0,$BASE_COMMITS,$BASE_TIME" >> "$RESULT_B"
+
+# Run deltas independently (cp base, then incremental transform)
+DELTAS=$(ls -1d "$DELTAS_DIR"/[0-9]* 2>/dev/null | sort -n | while read d; do basename "$d"; done)
+
+for i in $DELTAS; do
+ DELTA_SOURCE="$DELTAS_DIR/$i"
+ [ -d "$DELTA_SOURCE" ] || continue
+ NCOMMITS=$(git -C "$DELTA_SOURCE" rev-list --all 2>/dev/null | wc -l | tr -d ' ')
+ DIFF=$(( NCOMMITS - BASE_COMMITS ))
+ printf " Delta %s (+%d commits, total %d) ... " "$i" "$DIFF" "$NCOMMITS"
+
+ DEST_DELTA="$WORK_DIR/dest_deltas_${LABEL}_${i}"
+ cp -r "$DEST_BASE" "$DEST_DELTA"
+
+ ELAPSED=$(time_run_stein "$DELTA_SOURCE" "$DEST_DELTA")
+
+ echo "${ELAPSED}s"
+ echo "$i,$NCOMMITS,$ELAPSED" >> "$RESULT_B"
+ rm -rf "$DEST_DELTA"
+done
+echo "Results: $RESULT_B"
+rm -rf "$DEST_BASE"
+
+echo ""
+echo "Done."
diff --git a/scripts/setup-incremental.sh b/scripts/setup-incremental.sh
new file mode 100755
index 0000000..cad608f
--- /dev/null
+++ b/scripts/setup-incremental.sh
@@ -0,0 +1,91 @@
+#!/bin/sh
+# Setup: clone target repo and generate sub-repositories by truncating at commit boundaries.
+# Usage: ./setup-incremental.sh [splits] [delta-base-frac] [delta-step] [delta-count]
+#
+# Example:
+# ./setup-incremental.sh https://github.com/google/gson.git ./work 10 0.5 10 10
+#
+# This creates:
+# work/source.git -- bare clone of the repo
+# work/splits/1 .. N -- repos truncated at 1/N, 2/N, ..., (N-1)/N of commits
+# work/deltas/base -- repo at delta-base-frac of total commits
+# work/deltas/1 .. M -- repos at base + delta-step*1, base + delta-step*2, ...
+set -eu
+
+REPO_URL="${1:?Usage: setup-incremental.sh [splits] [delta-base-frac] [delta-step] [delta-count]}"
+WORK_DIR="${2:?}"
+SPLITS="${3:-10}"
+DELTA_BASE_FRAC="${4:-0.5}"
+DELTA_STEP="${5:-10}"
+DELTA_COUNT="${6:-10}"
+
+mkdir -p "$WORK_DIR"
+
+# Clone source
+SOURCE="$WORK_DIR/source.git"
+if [ ! -d "$SOURCE" ]; then
+ echo "Cloning $REPO_URL ..."
+ git clone --bare "$REPO_URL" "$SOURCE"
+fi
+
+# Get first-parent commit list (oldest first)
+COMMITS_FILE="$WORK_DIR/commits.txt"
+git -C "$SOURCE" rev-list --first-parent HEAD | sed '1!G;h;$!d' > "$COMMITS_FILE"
+TOTAL=$(wc -l < "$COMMITS_FILE" | tr -d ' ')
+echo "Total first-parent commits: $TOTAL"
+
+# Helper: create a repo truncated at commit N
+create_truncated() {
+ n="$1"
+ dest="$2"
+ sha=$(sed -n "${n}p" "$COMMITS_FILE")
+
+ if [ -d "$dest" ]; then
+ echo " $dest already exists, skipping"
+ return
+ fi
+
+ git clone --bare --no-tags "$SOURCE" "$dest" 2>/dev/null
+ git -C "$dest" update-ref refs/heads/main "$sha"
+ # Remove all other refs
+ git -C "$dest" for-each-ref --format='%(refname)' | grep -v '^refs/heads/main$' | while read ref; do
+ git -C "$dest" update-ref -d "$ref" 2>/dev/null || true
+ done
+ git -C "$dest" gc --prune=now --quiet 2>/dev/null || true
+}
+
+# Experiment A: splits
+echo ""
+echo "=== Creating $SPLITS splits ==="
+mkdir -p "$WORK_DIR/splits"
+STEP=$(( TOTAL / SPLITS ))
+for i in $(seq 1 $(( SPLITS - 1 ))); do
+ N=$(( STEP * i ))
+ echo "Split $i/$SPLITS: $N commits"
+ create_truncated "$N" "$WORK_DIR/splits/$i"
+done
+# Last split = full repo
+if [ ! -d "$WORK_DIR/splits/$SPLITS" ]; then
+ cp -r "$SOURCE" "$WORK_DIR/splits/$SPLITS"
+fi
+
+# Experiment B: deltas
+echo ""
+echo "=== Creating delta repos (base + step*N) ==="
+mkdir -p "$WORK_DIR/deltas"
+BASE_N=$(python3 -c "print(int($TOTAL * $DELTA_BASE_FRAC))")
+echo "Base: $BASE_N commits"
+create_truncated "$BASE_N" "$WORK_DIR/deltas/base"
+
+for i in $(seq 1 "$DELTA_COUNT"); do
+ N=$(( BASE_N + DELTA_STEP * i ))
+ if [ "$N" -gt "$TOTAL" ]; then
+ echo "Delta $i: $N exceeds total ($TOTAL), stopping"
+ break
+ fi
+ echo "Delta $i: $N commits (+$(( DELTA_STEP * i )))"
+ create_truncated "$N" "$WORK_DIR/deltas/$i"
+done
+
+echo ""
+echo "Setup complete: $WORK_DIR"
From a898255d9fc5db9152599f2649ff12897aaf99cc Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 19:16:46 +0900
Subject: [PATCH 02/28] MVStore implementaion for cache
---
.gitignore | 1 +
build.gradle | 3 +
.../jp/ac/titech/c/se/stein/Application.java | 5 +
.../titech/c/se/stein/core/CacheProvider.java | 30 +++++
.../c/se/stein/core/MVStoreCacheProvider.java | 108 ++++++++++++++++++
.../c/se/stein/core/SQLiteCacheProvider.java | 2 +-
.../se/stein/rewriter/RepositoryRewriter.java | 7 +-
.../c/se/stein/testing/RewriteBenchmark.java | 68 ++++++++---
8 files changed, 208 insertions(+), 16 deletions(-)
create mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
create mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
diff --git a/.gitignore b/.gitignore
index ed65bc2..3768c93 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
/gradle.properties
/.settings/
/.idea/
+__*
*~
.DS_Store
diff --git a/build.gradle b/build.gradle
index 49be582..3846acc 100644
--- a/build.gradle
+++ b/build.gradle
@@ -40,6 +40,8 @@ dependencies {
implementation 'org.xerial:sqlite-jdbc:3.51.3.0'
implementation 'com.j256.ormlite:ormlite-jdbc:5.7'
+ implementation 'com.h2database:h2-mvstore:2.3.232'
+
testImplementation 'org.junit.jupiter:junit-jupiter:5.14.3'
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
testImplementation 'org.mockito:mockito-junit-jupiter:5.23.0'
@@ -87,6 +89,7 @@ tasks.register('benchmark', JavaExec) {
def benchArgs = project.hasProperty('benchRepo') ? [project.property('benchRepo')] : ['.']
if (project.hasProperty('alternates')) benchArgs.add('--alternates')
+ if (project.hasProperty('cache')) benchArgs.add('--cache')
args = benchArgs
jvmArgs = ['-Xmx1g']
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java
index bf956b2..9731d67 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/Application.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java
@@ -105,6 +105,11 @@ public enum AlternatesMode { relative, absolute }
@Option(names = "--cache", split = ",", paramLabel = "", description = "cache level (${COMPLETION-CANDIDATES}. default: none)", order = MIDDLE)
public EnumSet cacheLevel = EnumSet.noneOf(RepositoryRewriter.CacheLevel.class);
+ public enum CacheBackend { sqlite, mvstore }
+
+ @Option(names = "--cache-backend", paramLabel = "", description = "cache backend (${COMPLETION-CANDIDATES}. default: sqlite)", order = MIDDLE)
+ public CacheBackend cacheBackend = CacheBackend.sqlite;
+
@Option(names = "--extra-attributes", description = "rewrite encoding and signature in commits", order = MIDDLE)
public boolean isRewritingExtraAttributes = false;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
new file mode 100644
index 0000000..a3d8991
--- /dev/null
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
@@ -0,0 +1,30 @@
+package jp.ac.titech.c.se.stein.core;
+
+import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
+import jp.ac.titech.c.se.stein.entry.Entry;
+import org.eclipse.jgit.lib.ObjectId;
+
+import java.util.Map;
+
+/**
+ * Common interface for cache providers that persist object mappings.
+ */
+public interface CacheProvider {
+ boolean isInitial();
+
+ Map getCommitMapping();
+
+ Map getEntryMapping();
+
+ Map getRefEntryMapping();
+
+ default void inTransaction(java.util.concurrent.Callable fn) {
+ try {
+ fn.call();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ default void close() {}
+}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
new file mode 100644
index 0000000..50805cf
--- /dev/null
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
@@ -0,0 +1,108 @@
+package jp.ac.titech.c.se.stein.core;
+
+import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
+import jp.ac.titech.c.se.stein.entry.Entry;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.Repository;
+import org.h2.mvstore.MVMap;
+import org.h2.mvstore.MVStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.AbstractMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Cache provider backed by H2 MVStore.
+ * Data is stored in a single file ({@code cache.mv.db}) in the target repository's .git directory.
+ */
+public class MVStoreCacheProvider implements CacheProvider {
+ private static final Logger log = LoggerFactory.getLogger(MVStoreCacheProvider.class);
+
+ private final MVStore store;
+ private final boolean initial;
+
+ public MVStoreCacheProvider(final Repository target) {
+ final Path dbFile = target.getDirectory().toPath().resolve("cache.mv.db");
+ initial = !Files.exists(dbFile);
+ store = new MVStore.Builder()
+ .fileName(dbFile.toString())
+ .open();
+ }
+
+ @Override
+ public boolean isInitial() {
+ return initial;
+ }
+
+ @Override
+ public Map getCommitMapping() {
+ final Marshaler m = new Marshaler.ObjectIdMarshaler();
+ return new MVMapAdapter<>(store.openMap("commits"), m, m);
+ }
+
+ @Override
+ public Map getEntryMapping() {
+ final Marshaler km = new Marshaler.JavaSerializerMarshaler<>();
+ final Marshaler vm = new Marshaler.JavaSerializerMarshaler<>();
+ return new MVMapAdapter<>(store.openMap("entries"), km, vm);
+ }
+
+ @Override
+ public Map getRefEntryMapping() {
+ final Marshaler m = new Marshaler.JavaSerializerMarshaler<>();
+ return new MVMapAdapter<>(store.openMap("refs"), m, m);
+ }
+
+ @Override
+ public void close() {
+ if (store != null && !store.isClosed()) {
+ store.close();
+ }
+ }
+
+ /**
+ * Map adapter that serializes keys/values via Marshaler and stores them in an MVMap.
+ */
+ static class MVMapAdapter extends AbstractMap {
+ private final MVMap map;
+ private final Marshaler keyMarshaler;
+ private final Marshaler valueMarshaler;
+
+ MVMapAdapter(MVMap map, Marshaler keyMarshaler, Marshaler valueMarshaler) {
+ this.map = map;
+ this.keyMarshaler = keyMarshaler;
+ this.valueMarshaler = valueMarshaler;
+ }
+
+ @Override
+ public V get(final Object key) {
+ @SuppressWarnings("unchecked")
+ final K k = (K) key;
+ final byte[] raw = map.get(keyMarshaler.marshal(k));
+ return raw != null ? valueMarshaler.unmarshal(raw) : null;
+ }
+
+ @Override
+ public V put(final K key, final V value) {
+ map.put(keyMarshaler.marshal(key), valueMarshaler.marshal(value));
+ return value;
+ }
+
+ @Override
+ public Set> entrySet() {
+ return map.entrySet().stream()
+ .map(e -> new SimpleEntry<>(keyMarshaler.unmarshal(e.getKey()), valueMarshaler.unmarshal(e.getValue())))
+ .collect(Collectors.toSet());
+ }
+
+ @Override
+ public void clear() {
+ map.clear();
+ }
+ }
+}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/SQLiteCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/SQLiteCacheProvider.java
index 12d8690..0f66a42 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/SQLiteCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/SQLiteCacheProvider.java
@@ -29,7 +29,7 @@
import java.util.function.Supplier;
import java.util.stream.Collectors;
-public class SQLiteCacheProvider {
+public class SQLiteCacheProvider implements CacheProvider {
private final static Logger log = LoggerFactory.getLogger(SQLiteCacheProvider.class);
static class KeyValue {
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 02c5b5c..a908b68 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -78,7 +78,7 @@ public enum CacheLevel {
blob, tree, commit
}
- protected SQLiteCacheProvider cacheProvider;
+ protected CacheProvider cacheProvider;
public void initialize(final Repository sourceRepo, final Repository targetRepo) {
source = new RepositoryAccess(sourceRepo);
@@ -92,7 +92,10 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
target.setDryRunning(true);
}
if (!config.cacheLevel.isEmpty()) {
- cacheProvider = new SQLiteCacheProvider(targetRepo);
+ cacheProvider = switch (config.cacheBackend) {
+ case mvstore -> new MVStoreCacheProvider(targetRepo);
+ default -> new SQLiteCacheProvider(targetRepo);
+ };
if (config.cacheLevel.contains(CacheLevel.commit)) {
log.info("Stored mapping (commit-mapping) is available");
commitMapping = new Cache<>(commitMapping, cacheProvider.getCommitMapping(), !cacheProvider.isInitial(), true);
diff --git a/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java b/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
index 2ca7d6b..10fd1d6 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
@@ -24,6 +24,7 @@
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
+import java.util.EnumSet;
import java.util.List;
/**
@@ -43,17 +44,20 @@ public static void main(String[] args) throws Exception {
}
final boolean alternates = Arrays.asList(args).contains("--alternates");
+ final boolean cache = Arrays.asList(args).contains("--cache");
- System.out.println("Benchmarking: " + sourceDir.getAbsolutePath() + (alternates ? " (alternates)" : ""));
+ System.out.println("Benchmarking: " + sourceDir.getAbsolutePath()
+ + (alternates ? " (alternates)" : "")
+ + (cache ? " (cache)" : ""));
System.out.println();
final List results = new ArrayList<>();
- results.add(benchmark("identity", sourceDir, new Identity(), alternates));
- results.add(benchmark("tokenize-jdt", sourceDir, new TokenizeViaJDT().toRewriter(), alternates));
- results.add(benchmark("historage-jdt", sourceDir, new HistorageViaJDT().toRewriter(), alternates));
+ results.add(benchmark("identity", sourceDir, Identity::new, alternates, cache));
+ results.add(benchmark("tokenize-jdt", sourceDir, () -> new TokenizeViaJDT().toRewriter(), alternates, cache));
+ results.add(benchmark("historage-jdt", sourceDir, () -> new HistorageViaJDT().toRewriter(), alternates, cache));
results.add(benchmark("historage+tokenize", sourceDir,
- new BlobTranslator.Composite(new HistorageViaJDT(), new TokenizeViaJDT()), alternates));
+ () -> new BlobTranslator.Composite(new HistorageViaJDT(), new TokenizeViaJDT()), alternates, cache));
// summary
System.out.println();
@@ -76,7 +80,13 @@ public static void main(String[] args) throws Exception {
System.out.println(GSON.toJson(report));
}
- static JsonObject benchmark(String name, File sourceDir, RepositoryRewriter rewriter, boolean useAlternates) throws IOException {
+ @FunctionalInterface
+ interface RewriterFactory {
+ RepositoryRewriter create();
+ }
+
+ static JsonObject benchmark(String name, File sourceDir, RewriterFactory factory,
+ boolean useAlternates, boolean useCache) throws IOException {
System.out.printf("Running %-25s ... ", name);
System.out.flush();
@@ -96,7 +106,12 @@ static JsonObject benchmark(String name, File sourceDir, RepositoryRewriter rewr
targetRepo = openRepository(tmp.getPath().toFile(), true);
}
- rewriter.setConfig(new Application.Config());
+ final Application.Config config = new Application.Config();
+ if (useCache) {
+ config.cacheLevel = EnumSet.allOf(RepositoryRewriter.CacheLevel.class);
+ }
+ final RepositoryRewriter rewriter = factory.create();
+ rewriter.setConfig(config);
rewriter.initialize(sourceRepo, targetRepo);
System.gc();
@@ -105,18 +120,45 @@ static JsonObject benchmark(String name, File sourceDir, RepositoryRewriter rewr
final Instant start = Instant.now();
rewriter.rewrite(Context.init());
final Instant end = Instant.now();
+ final long timeMs = Duration.between(start, end).toMillis();
+ final long heapMb = Math.max(0, (usedHeap() - heapBefore) / (1024 * 1024));
+ final int commits = countCommits(targetRepo);
- final JsonObject result = new JsonObject();
+ System.out.printf("%d ms, %d MB heap%n", timeMs, heapMb);
+
+ // If cache is enabled, run a second time (incremental) with a fresh rewriter
+ JsonObject result = new JsonObject();
result.addProperty("name", name);
- result.addProperty("timeMs", Duration.between(start, end).toMillis());
- result.addProperty("heapMb", Math.max(0, (usedHeap() - heapBefore) / (1024 * 1024)));
- result.addProperty("commits", countCommits(targetRepo));
+ result.addProperty("timeMs", timeMs);
+ result.addProperty("heapMb", heapMb);
+ result.addProperty("commits", commits);
+
+ if (useCache) {
+ // Second run: reuse the same target (cache.db is there)
+ System.out.printf(" (cached) %-22s ... ", name);
+ System.out.flush();
+
+ final RepositoryRewriter rewriter2 = factory.create();
+ rewriter2.setConfig(config);
+ rewriter2.initialize(sourceRepo, targetRepo);
+
+ System.gc();
+ final long heapBefore2 = usedHeap();
+ final Instant start2 = Instant.now();
+ rewriter2.rewrite(Context.init());
+ final Instant end2 = Instant.now();
+ final long timeMs2 = Duration.between(start2, end2).toMillis();
+ final long heapMb2 = Math.max(0, (usedHeap() - heapBefore2) / (1024 * 1024));
+
+ System.out.printf("%d ms, %d MB heap%n", timeMs2, heapMb2);
+
+ result.addProperty("cachedTimeMs", timeMs2);
+ result.addProperty("cachedHeapMb", heapMb2);
+ }
sourceRepo.close();
targetRepo.close();
- System.out.printf("%d ms, %d MB heap%n",
- result.get("timeMs").getAsLong(), result.get("heapMb").getAsLong());
return result;
}
}
From 5b2cb1e551b25534a8350241a1066d77d922422c Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 19:51:50 +0900
Subject: [PATCH 03/28] Disable autocommit, use internal serializer
---
.../c/se/stein/core/MVStoreCacheProvider.java | 64 +++----------------
.../se/stein/rewriter/RepositoryRewriter.java | 3 +
2 files changed, 11 insertions(+), 56 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
index 50805cf..ed4cf02 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
@@ -4,25 +4,17 @@
import jp.ac.titech.c.se.stein.entry.Entry;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
-import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.AbstractMap;
import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
/**
* Cache provider backed by H2 MVStore.
* Data is stored in a single file ({@code cache.mv.db}) in the target repository's .git directory.
*/
public class MVStoreCacheProvider implements CacheProvider {
- private static final Logger log = LoggerFactory.getLogger(MVStoreCacheProvider.class);
-
private final MVStore store;
private final boolean initial;
@@ -31,6 +23,7 @@ public MVStoreCacheProvider(final Repository target) {
initial = !Files.exists(dbFile);
store = new MVStore.Builder()
.fileName(dbFile.toString())
+ .autoCommitDisabled()
.open();
}
@@ -40,69 +33,28 @@ public boolean isInitial() {
}
@Override
+ @SuppressWarnings("unchecked")
public Map getCommitMapping() {
- final Marshaler m = new Marshaler.ObjectIdMarshaler();
- return new MVMapAdapter<>(store.openMap("commits"), m, m);
+ return store.openMap("commits");
}
@Override
+ @SuppressWarnings("unchecked")
public Map getEntryMapping() {
- final Marshaler km = new Marshaler.JavaSerializerMarshaler<>();
- final Marshaler vm = new Marshaler.JavaSerializerMarshaler<>();
- return new MVMapAdapter<>(store.openMap("entries"), km, vm);
+ return store.openMap("entries");
}
@Override
+ @SuppressWarnings("unchecked")
public Map getRefEntryMapping() {
- final Marshaler m = new Marshaler.JavaSerializerMarshaler<>();
- return new MVMapAdapter<>(store.openMap("refs"), m, m);
+ return store.openMap("refs");
}
@Override
public void close() {
if (store != null && !store.isClosed()) {
+ store.commit();
store.close();
}
}
-
- /**
- * Map adapter that serializes keys/values via Marshaler and stores them in an MVMap.
- */
- static class MVMapAdapter extends AbstractMap {
- private final MVMap map;
- private final Marshaler keyMarshaler;
- private final Marshaler valueMarshaler;
-
- MVMapAdapter(MVMap map, Marshaler keyMarshaler, Marshaler valueMarshaler) {
- this.map = map;
- this.keyMarshaler = keyMarshaler;
- this.valueMarshaler = valueMarshaler;
- }
-
- @Override
- public V get(final Object key) {
- @SuppressWarnings("unchecked")
- final K k = (K) key;
- final byte[] raw = map.get(keyMarshaler.marshal(k));
- return raw != null ? valueMarshaler.unmarshal(raw) : null;
- }
-
- @Override
- public V put(final K key, final V value) {
- map.put(keyMarshaler.marshal(key), valueMarshaler.marshal(value));
- return value;
- }
-
- @Override
- public Set> entrySet() {
- return map.entrySet().stream()
- .map(e -> new SimpleEntry<>(keyMarshaler.unmarshal(e.getKey()), valueMarshaler.unmarshal(e.getValue())))
- .collect(Collectors.toSet());
- }
-
- @Override
- public void clear() {
- map.clear();
- }
- }
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index a908b68..d671ebe 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -135,6 +135,9 @@ public void rewrite(final Context c) {
updateRefs(c);
}
target.writeNotes(target.getDefaultNotes(), c);
+ if (cacheProvider != null) {
+ cacheProvider.close();
+ }
cleanUp(c);
}
From b431d0571f5ed840c6271814c3c27133018f645e Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 19:56:00 +0900
Subject: [PATCH 04/28] switch the default cache backend
---
src/main/java/jp/ac/titech/c/se/stein/Application.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java
index 9731d67..408e648 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/Application.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java
@@ -107,8 +107,8 @@ public enum AlternatesMode { relative, absolute }
public enum CacheBackend { sqlite, mvstore }
- @Option(names = "--cache-backend", paramLabel = "", description = "cache backend (${COMPLETION-CANDIDATES}. default: sqlite)", order = MIDDLE)
- public CacheBackend cacheBackend = CacheBackend.sqlite;
+ @Option(names = "--cache-backend", paramLabel = "", description = "cache backend (${COMPLETION-CANDIDATES}. default: mvstore)", order = MIDDLE)
+ public CacheBackend cacheBackend = CacheBackend.mvstore;
@Option(names = "--extra-attributes", description = "rewrite encoding and signature in commits", order = MIDDLE)
public boolean isRewritingExtraAttributes = false;
From 6e4d732acd6909646a52276661339bde3628dbb0 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 19:57:26 +0900
Subject: [PATCH 05/28] ensure to close even for exceptional cases
---
.../se/stein/rewriter/RepositoryRewriter.java | 37 ++++++++++---------
1 file changed, 20 insertions(+), 17 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index d671ebe..b7067f9 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -118,27 +118,30 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
public void rewrite(final Context c) {
setUp(c);
- final RevWalk walk = prepareRevisionWalk(c);
- if (cacheProvider != null) {
- cacheProvider.inTransaction(() -> {
+ try {
+ final RevWalk walk = prepareRevisionWalk(c);
+ if (cacheProvider != null) {
+ cacheProvider.inTransaction(() -> {
+ rewriteCommits(walk, c);
+ updateRefs(c);
+ return null;
+ });
+ } else {
+ if (config.nthreads >= 2) {
+ log.debug("Parallel rewriting");
+ rewriteRootTrees(walk, c);
+ Try.io(walk::memoReset);
+ }
rewriteCommits(walk, c);
updateRefs(c);
- return null;
- });
- } else {
- if (config.nthreads >= 2) {
- log.debug("Parallel rewriting");
- rewriteRootTrees(walk, c);
- Try.io(walk::memoReset);
}
- rewriteCommits(walk, c);
- updateRefs(c);
- }
- target.writeNotes(target.getDefaultNotes(), c);
- if (cacheProvider != null) {
- cacheProvider.close();
+ target.writeNotes(target.getDefaultNotes(), c);
+ } finally {
+ if (cacheProvider != null) {
+ cacheProvider.close();
+ }
+ cleanUp(c);
}
- cleanUp(c);
}
protected void setUp(final Context c) {}
From 3c488ec4e05634f681166446baa3026801d5dcb0 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 20:07:23 +0900
Subject: [PATCH 06/28] Add tests for CacheProvider
---
.../c/se/stein/core/CacheProviderTest.java | 120 ++++++++++++++++++
1 file changed, 120 insertions(+)
create mode 100644 src/test/java/jp/ac/titech/c/se/stein/core/CacheProviderTest.java
diff --git a/src/test/java/jp/ac/titech/c/se/stein/core/CacheProviderTest.java b/src/test/java/jp/ac/titech/c/se/stein/core/CacheProviderTest.java
new file mode 100644
index 0000000..e9ef15c
--- /dev/null
+++ b/src/test/java/jp/ac/titech/c/se/stein/core/CacheProviderTest.java
@@ -0,0 +1,120 @@
+package jp.ac.titech.c.se.stein.core;
+
+import jp.ac.titech.c.se.stein.Application;
+import jp.ac.titech.c.se.stein.app.Identity;
+import jp.ac.titech.c.se.stein.app.blob.HistorageViaJDT;
+import jp.ac.titech.c.se.stein.rewriter.BlobTranslator;
+import jp.ac.titech.c.se.stein.rewriter.RepositoryRewriter;
+import jp.ac.titech.c.se.stein.testing.TestRepo;
+import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class CacheProviderTest {
+ static RepositoryAccess source;
+
+ @BeforeAll
+ static void setUp() throws IOException {
+ source = TestRepo.createSample(true);
+ }
+
+ @AfterAll
+ static void tearDown() {
+ source.close();
+ }
+
+ private Application.Config cacheConfig() {
+ final Application.Config config = new Application.Config();
+ config.cacheLevel = EnumSet.allOf(RepositoryRewriter.CacheLevel.class);
+ config.cacheBackend = Application.Config.CacheBackend.mvstore;
+ return config;
+ }
+
+ private void rewriteWithCache(RepositoryRewriter rewriter, Repository targetRepo) {
+ rewriter.setConfig(cacheConfig());
+ rewriter.initialize(source.repo, targetRepo);
+ rewriter.rewrite(Context.init());
+ }
+
+ @Test
+ public void testCacheProducesCorrectResult() {
+ try (RepositoryAccess target = TestRepo.create(true)) {
+ rewriteWithCache(new Identity(), target.repo);
+ final List firstRun = target.collectCommits("refs/heads/main");
+
+ assertTrue(new File(target.repo.getDirectory(), "cache.mv.db").exists());
+
+ rewriteWithCache(new Identity(), target.repo);
+ final List secondRun = target.collectCommits("refs/heads/main");
+
+ assertEquals(firstRun.size(), secondRun.size());
+ for (int i = 0; i < firstRun.size(); i++) {
+ assertEquals(firstRun.get(i).getId(), secondRun.get(i).getId());
+ }
+ }
+ }
+
+ @Test
+ public void testCacheMatchesNonCachedResult() {
+ try (RepositoryAccess noCacheResult = TestRepo.rewrite(source, new Identity())) {
+ final List noCacheCommits = noCacheResult.collectCommits("refs/heads/main");
+
+ try (RepositoryAccess target = TestRepo.create(true)) {
+ rewriteWithCache(new Identity(), target.repo);
+ final List cachedCommits = target.collectCommits("refs/heads/main");
+
+ assertEquals(noCacheCommits.size(), cachedCommits.size());
+ for (int i = 0; i < noCacheCommits.size(); i++) {
+ assertEquals(noCacheCommits.get(i).getId(), cachedCommits.get(i).getId());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testCacheWithHistorage() {
+ try (RepositoryAccess target = TestRepo.create(true)) {
+ rewriteWithCache(new HistorageViaJDT().toRewriter(), target.repo);
+ final List firstRun = target.collectCommits("refs/heads/main");
+ assertFalse(firstRun.isEmpty());
+
+ rewriteWithCache(new HistorageViaJDT().toRewriter(), target.repo);
+ final List secondRun = target.collectCommits("refs/heads/main");
+
+ assertEquals(firstRun.size(), secondRun.size());
+ for (int i = 0; i < firstRun.size(); i++) {
+ assertEquals(firstRun.get(i).getId(), secondRun.get(i).getId());
+ }
+ }
+ }
+
+ @Test
+ public void testSecondRunHasZeroTranslations() {
+ final AtomicInteger count = new AtomicInteger();
+ final BlobTranslator counting = (entry, c) -> {
+ count.incrementAndGet();
+ return entry;
+ };
+
+ try (RepositoryAccess target = TestRepo.create(true)) {
+ count.set(0);
+ rewriteWithCache(counting.toRewriter(), target.repo);
+ assertTrue(count.get() > 0, "First run should translate blobs");
+
+ count.set(0);
+ rewriteWithCache(counting.toRewriter(), target.repo);
+ assertEquals(0, count.get(),
+ "Second run should have 100% cache hit (0 translations), but got " + count.get());
+ }
+ }
+}
From 935ce4eb995cff1ab43f5995149766c5760dd306 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 21:30:47 +0900
Subject: [PATCH 07/28] fix: make SingleEntry fully consistent with equals() to
ensure the assumption of ObjectDataType
---
.../jp/ac/titech/c/se/stein/entry/SingleEntry.java | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/entry/SingleEntry.java b/src/main/java/jp/ac/titech/c/se/stein/entry/SingleEntry.java
index 9c5f377..fa08694 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/entry/SingleEntry.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/entry/SingleEntry.java
@@ -3,6 +3,8 @@
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.ObjectId;
+import java.util.Comparator;
+
/**
* Common interface for a single tree entry.
*
@@ -93,11 +95,18 @@ default String sortKey() {
return isTree() ? getName() + "/" : getName();
}
+ Comparator COMPARATOR = Comparator
+ .comparing(SingleEntry::sortKey)
+ .thenComparing(SingleEntry::getId)
+ .thenComparingInt(SingleEntry::getMode)
+ .thenComparing(SingleEntry::getDirectory, Comparator.nullsFirst(Comparator.naturalOrder()));
+
/**
- * Compares entries by their {@link #sortKey()}.
+ * Compares entries by their {@link #sortKey()}, then by mode, object ID, and directory
+ * to ensure consistency with {@code equals}.
*/
@Override
default int compareTo(final SingleEntry other) {
- return sortKey().compareTo(other.sortKey());
+ return COMPARATOR.compare(this, other);
}
}
From 6eab52d1f3e4630b10bb1937d781b18a301797df Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 22:50:11 +0900
Subject: [PATCH 08/28] Make RefEntry comparable
---
.../java/jp/ac/titech/c/se/stein/core/RefEntry.java | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/RefEntry.java b/src/main/java/jp/ac/titech/c/se/stein/core/RefEntry.java
index 6e4a3c1..097f8cd 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/RefEntry.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/RefEntry.java
@@ -1,6 +1,7 @@
package jp.ac.titech.c.se.stein.core;
import java.io.Serializable;
+import java.util.Comparator;
import lombok.EqualsAndHashCode;
import org.eclipse.jgit.lib.ObjectId;
@@ -14,7 +15,7 @@
* A symbolic ref has a non-null {@link #target} and a null {@link #id}.
*/
@EqualsAndHashCode
-public class RefEntry implements Serializable {
+public class RefEntry implements Serializable, Comparable {
/**
* The ref name (e.g., {@code "refs/heads/main"} or {@code "HEAD"}).
*/
@@ -76,4 +77,14 @@ public boolean isSymbolic() {
public String toString() {
return String.format("", name, target != null ? target : id.name());
}
+
+ private static final Comparator COMPARATOR = Comparator
+ .comparing((RefEntry r) -> r.name, Comparator.nullsFirst(Comparator.naturalOrder()))
+ .thenComparing(r -> r.id, Comparator.nullsFirst(Comparator.naturalOrder()))
+ .thenComparing(r -> r.target, Comparator.nullsFirst(Comparator.naturalOrder()));
+
+ @Override
+ public int compareTo(final RefEntry other) {
+ return COMPARATOR.compare(this, other);
+ }
}
From c868b3caf8352f4be2a85913d364fcb784719907 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 23:20:19 +0900
Subject: [PATCH 09/28] New provider: guava cache
---
.../jp/ac/titech/c/se/stein/Application.java | 2 +-
.../titech/c/se/stein/core/CacheProvider.java | 2 +-
.../c/se/stein/core/GuavaCacheProvider.java | 53 +++++++++++++++++++
.../se/stein/rewriter/RepositoryRewriter.java | 1 +
4 files changed, 56 insertions(+), 2 deletions(-)
create mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/GuavaCacheProvider.java
diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java
index 408e648..79e66b6 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/Application.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java
@@ -105,7 +105,7 @@ public enum AlternatesMode { relative, absolute }
@Option(names = "--cache", split = ",", paramLabel = "", description = "cache level (${COMPLETION-CANDIDATES}. default: none)", order = MIDDLE)
public EnumSet cacheLevel = EnumSet.noneOf(RepositoryRewriter.CacheLevel.class);
- public enum CacheBackend { sqlite, mvstore }
+ public enum CacheBackend { sqlite, mvstore, guava }
@Option(names = "--cache-backend", paramLabel = "", description = "cache backend (${COMPLETION-CANDIDATES}. default: mvstore)", order = MIDDLE)
public CacheBackend cacheBackend = CacheBackend.mvstore;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
index a3d8991..93148b2 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
@@ -7,7 +7,7 @@
import java.util.Map;
/**
- * Common interface for cache providers that persist object mappings.
+ * Common interface for cache providers that manage object mappings.
*/
public interface CacheProvider {
boolean isInitial();
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/GuavaCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/GuavaCacheProvider.java
new file mode 100644
index 0000000..f886cc9
--- /dev/null
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/GuavaCacheProvider.java
@@ -0,0 +1,53 @@
+package jp.ac.titech.c.se.stein.core;
+
+import com.google.common.cache.CacheBuilder;
+import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
+import jp.ac.titech.c.se.stein.entry.Entry;
+import org.eclipse.jgit.lib.ObjectId;
+
+import java.util.Map;
+
+/**
+ * Non-persistent cache provider backed by Guava Cache with LRU eviction.
+ */
+public class GuavaCacheProvider implements CacheProvider {
+ private static final double HEAP_FRACTION = 0.25;
+ private static final int BYTES_PER_ENTRY = 300;
+
+ private final long maxEntries;
+
+ public GuavaCacheProvider() {
+ final long budget = (long) (Runtime.getRuntime().maxMemory() * HEAP_FRACTION);
+ this.maxEntries = Math.max(1000, budget / BYTES_PER_ENTRY);
+ }
+
+ @Override
+ public boolean isInitial() {
+ return true;
+ }
+
+ @Override
+ public Map getCommitMapping() {
+ return CacheBuilder.newBuilder()
+ .maximumSize(maxEntries)
+ .build()
+ .asMap();
+ }
+
+ @Override
+ public Map getEntryMapping() {
+ return CacheBuilder.newBuilder()
+ .maximumWeight(maxEntries)
+ .weigher((Entry k, AnyColdEntry v) -> v.size())
+ .build()
+ .asMap();
+ }
+
+ @Override
+ public Map getRefEntryMapping() {
+ return CacheBuilder.newBuilder()
+ .maximumSize(maxEntries)
+ .build()
+ .asMap();
+ }
+}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index b7067f9..ee01a6d 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -94,6 +94,7 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
if (!config.cacheLevel.isEmpty()) {
cacheProvider = switch (config.cacheBackend) {
case mvstore -> new MVStoreCacheProvider(targetRepo);
+ case guava -> new GuavaCacheProvider();
default -> new SQLiteCacheProvider(targetRepo);
};
if (config.cacheLevel.contains(CacheLevel.commit)) {
From 1580b11291b73595a1f7a7f9a2b32a3e4eb8178a Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 23:44:58 +0900
Subject: [PATCH 10/28] refactor: move class (extract package)
---
.../java/jp/ac/titech/c/se/stein/core/{ => cache}/Cache.java | 2 +-
.../ac/titech/c/se/stein/core/{ => cache}/CacheProvider.java | 4 +++-
.../c/se/stein/core/{ => cache}/GuavaCacheProvider.java | 4 +++-
.../c/se/stein/core/{ => cache}/MVStoreCacheProvider.java | 4 +++-
.../jp/ac/titech/c/se/stein/core/{ => cache}/Marshaler.java | 2 +-
.../c/se/stein/core/{ => cache}/SQLiteCacheProvider.java | 4 +++-
.../jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java | 1 +
.../c/se/stein/core/{ => cache}/CacheProviderTest.java | 5 ++++-
8 files changed, 19 insertions(+), 7 deletions(-)
rename src/main/java/jp/ac/titech/c/se/stein/core/{ => cache}/Cache.java (98%)
rename src/main/java/jp/ac/titech/c/se/stein/core/{ => cache}/CacheProvider.java (88%)
rename src/main/java/jp/ac/titech/c/se/stein/core/{ => cache}/GuavaCacheProvider.java (94%)
rename src/main/java/jp/ac/titech/c/se/stein/core/{ => cache}/MVStoreCacheProvider.java (94%)
rename src/main/java/jp/ac/titech/c/se/stein/core/{ => cache}/Marshaler.java (98%)
rename src/main/java/jp/ac/titech/c/se/stein/core/{ => cache}/SQLiteCacheProvider.java (98%)
rename src/test/java/jp/ac/titech/c/se/stein/core/{ => cache}/CacheProviderTest.java (96%)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/Cache.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
similarity index 98%
rename from src/main/java/jp/ac/titech/c/se/stein/core/Cache.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
index de42ac0..e329de7 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/Cache.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
@@ -1,4 +1,4 @@
-package jp.ac.titech.c.se.stein.core;
+package jp.ac.titech.c.se.stein.core.cache;
import lombok.AllArgsConstructor;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
similarity index 88%
rename from src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
index 93148b2..ea646ff 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/CacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
@@ -1,4 +1,6 @@
-package jp.ac.titech.c.se.stein.core;
+package jp.ac.titech.c.se.stein.core.cache;
+
+import jp.ac.titech.c.se.stein.core.RefEntry;
import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
import jp.ac.titech.c.se.stein.entry.Entry;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/GuavaCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
similarity index 94%
rename from src/main/java/jp/ac/titech/c/se/stein/core/GuavaCacheProvider.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
index f886cc9..af8ddd7 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/GuavaCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
@@ -1,4 +1,6 @@
-package jp.ac.titech.c.se.stein.core;
+package jp.ac.titech.c.se.stein.core.cache;
+
+import jp.ac.titech.c.se.stein.core.RefEntry;
import com.google.common.cache.CacheBuilder;
import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
similarity index 94%
rename from src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
index ed4cf02..d6fadee 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/MVStoreCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
@@ -1,4 +1,6 @@
-package jp.ac.titech.c.se.stein.core;
+package jp.ac.titech.c.se.stein.core.cache;
+
+import jp.ac.titech.c.se.stein.core.RefEntry;
import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
import jp.ac.titech.c.se.stein.entry.Entry;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/Marshaler.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Marshaler.java
similarity index 98%
rename from src/main/java/jp/ac/titech/c/se/stein/core/Marshaler.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/Marshaler.java
index 7a78e05..795e014 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/Marshaler.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Marshaler.java
@@ -1,4 +1,4 @@
-package jp.ac.titech.c.se.stein.core;
+package jp.ac.titech.c.se.stein.core.cache;
import org.eclipse.jgit.lib.ObjectId;
import org.slf4j.Logger;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/SQLiteCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/SQLiteCacheProvider.java
similarity index 98%
rename from src/main/java/jp/ac/titech/c/se/stein/core/SQLiteCacheProvider.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/SQLiteCacheProvider.java
index 0f66a42..22590a2 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/SQLiteCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/SQLiteCacheProvider.java
@@ -1,4 +1,6 @@
-package jp.ac.titech.c.se.stein.core;
+package jp.ac.titech.c.se.stein.core.cache;
+
+import jp.ac.titech.c.se.stein.core.RefEntry;
import com.j256.ormlite.dao.Dao;
import com.j256.ormlite.dao.DaoManager;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index ee01a6d..2697a54 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -10,6 +10,7 @@
import java.util.stream.StreamSupport;
import jp.ac.titech.c.se.stein.core.*;
+import jp.ac.titech.c.se.stein.core.cache.*;
import jp.ac.titech.c.se.stein.entry.*;
import jp.ac.titech.c.se.stein.jgit.RevWalk;
import lombok.Setter;
diff --git a/src/test/java/jp/ac/titech/c/se/stein/core/CacheProviderTest.java b/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
similarity index 96%
rename from src/test/java/jp/ac/titech/c/se/stein/core/CacheProviderTest.java
rename to src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
index e9ef15c..c885479 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/core/CacheProviderTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
@@ -1,4 +1,7 @@
-package jp.ac.titech.c.se.stein.core;
+package jp.ac.titech.c.se.stein.core.cache;
+
+import jp.ac.titech.c.se.stein.core.RepositoryAccess;
+import jp.ac.titech.c.se.stein.core.Context;
import jp.ac.titech.c.se.stein.Application;
import jp.ac.titech.c.se.stein.app.Identity;
From 6530090498456c82237e51f3c83f487eec1a8e15 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Mon, 23 Mar 2026 23:51:27 +0900
Subject: [PATCH 11/28] remove dependencies to SQLite
20M build/libs/git-stein.jar
34M build/libs/git-stein-before.jar
---
build.gradle | 4 -
.../jp/ac/titech/c/se/stein/Application.java | 2 +-
.../c/se/stein/core/cache/Marshaler.java | 91 ---------
.../stein/core/cache/SQLiteCacheProvider.java | 189 ------------------
.../se/stein/rewriter/RepositoryRewriter.java | 1 -
5 files changed, 1 insertion(+), 286 deletions(-)
delete mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/cache/Marshaler.java
delete mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/cache/SQLiteCacheProvider.java
diff --git a/build.gradle b/build.gradle
index 3846acc..e7146f0 100644
--- a/build.gradle
+++ b/build.gradle
@@ -37,9 +37,6 @@ dependencies {
implementation 'org.jgrapht:jgrapht-core:1.5.2'
implementation 'org.jgrapht:jgrapht-io:1.5.2'
- implementation 'org.xerial:sqlite-jdbc:3.51.3.0'
- implementation 'com.j256.ormlite:ormlite-jdbc:5.7'
-
implementation 'com.h2database:h2-mvstore:2.3.232'
testImplementation 'org.junit.jupiter:junit-jupiter:5.14.3'
@@ -77,7 +74,6 @@ publishing {
shadowJar {
minimize {
- exclude(dependency('org.xerial:sqlite-jdbc:.*'))
exclude(dependency('ch.qos.logback:logback-classic:.*'))
}
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java
index 79e66b6..655522e 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/Application.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java
@@ -105,7 +105,7 @@ public enum AlternatesMode { relative, absolute }
@Option(names = "--cache", split = ",", paramLabel = "", description = "cache level (${COMPLETION-CANDIDATES}. default: none)", order = MIDDLE)
public EnumSet cacheLevel = EnumSet.noneOf(RepositoryRewriter.CacheLevel.class);
- public enum CacheBackend { sqlite, mvstore, guava }
+ public enum CacheBackend { mvstore, guava }
@Option(names = "--cache-backend", paramLabel = "", description = "cache backend (${COMPLETION-CANDIDATES}. default: mvstore)", order = MIDDLE)
public CacheBackend cacheBackend = CacheBackend.mvstore;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/Marshaler.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Marshaler.java
deleted file mode 100644
index 795e014..0000000
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/Marshaler.java
+++ /dev/null
@@ -1,91 +0,0 @@
-package jp.ac.titech.c.se.stein.core.cache;
-
-import org.eclipse.jgit.lib.ObjectId;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.OutputStream;
-
-/**
- * Converts an object to a byte array and vice versa.
- */
-public interface Marshaler {
- Logger log = LoggerFactory.getLogger(Marshaler.class);
-
- /**
- * Marshals an object and write it to the given stream.
- */
- void writeObject(final T object, final OutputStream stream);
-
- /**
- * Reads from the given stream and unmarshals it to an object.
- */
- T readObject(final InputStream stream);
-
- /**
- * Marshals an object.
- */
- default byte[] marshal(final T object) {
- final ByteArrayOutputStream stream = new ByteArrayOutputStream();
- writeObject(object, stream);
- return stream.toByteArray();
- }
-
- /**
- * Unmarshals an object.
- */
- default T unmarshal(final byte[] binary) {
- return readObject(new ByteArrayInputStream(binary));
- }
-
- class JavaSerializerMarshaler implements Marshaler {
- @Override
- public void writeObject(final T object, final OutputStream stream) {
- try (final ObjectOutputStream output = new ObjectOutputStream(stream)) {
- output.writeObject(object);
- } catch (final IOException e) {
- log.error(e.getMessage(), e);
- }
- }
-
- @Override
- public T readObject(final InputStream stream) {
- try (final ObjectInputStream input = new ObjectInputStream(stream)) {
- @SuppressWarnings("unchecked")
- final T result = (T) input.readObject();
- return result;
- } catch (final IOException | ClassNotFoundException e) {
- log.error(e.getMessage(), e);
- return null;
- }
- }
- }
-
- class ObjectIdMarshaler implements Marshaler {
- @Override
- public void writeObject(final ObjectId object, final OutputStream stream) {
- try {
- object.copyRawTo(stream);
- } catch (final IOException e) {
- log.error(e.getMessage(), e);
- }
- }
-
- @Override
- public ObjectId readObject(final InputStream stream) {
- try {
- final byte[] bytes = stream.readAllBytes();
- return ObjectId.fromRaw(bytes);
- } catch (final IOException e) {
- log.error(e.getMessage(), e);
- return null;
- }
- }
- }
-}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/SQLiteCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/SQLiteCacheProvider.java
deleted file mode 100644
index 22590a2..0000000
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/SQLiteCacheProvider.java
+++ /dev/null
@@ -1,189 +0,0 @@
-package jp.ac.titech.c.se.stein.core.cache;
-
-import jp.ac.titech.c.se.stein.core.RefEntry;
-
-import com.j256.ormlite.dao.Dao;
-import com.j256.ormlite.dao.DaoManager;
-import com.j256.ormlite.field.DataType;
-import com.j256.ormlite.field.DatabaseField;
-import com.j256.ormlite.jdbc.JdbcConnectionSource;
-import com.j256.ormlite.logger.Slf4jLoggingLogBackend;
-import com.j256.ormlite.misc.TransactionManager;
-import com.j256.ormlite.table.DatabaseTable;
-import com.j256.ormlite.table.TableUtils;
-import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
-import jp.ac.titech.c.se.stein.entry.Entry;
-import org.eclipse.jgit.lib.ObjectId;
-import org.eclipse.jgit.lib.Repository;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.sql.SQLException;
-import java.util.AbstractMap;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.function.Supplier;
-import java.util.stream.Collectors;
-
-public class SQLiteCacheProvider implements CacheProvider {
- private final static Logger log = LoggerFactory.getLogger(SQLiteCacheProvider.class);
-
- static class KeyValue {
- @DatabaseField(id = true, dataType = DataType.BYTE_ARRAY)
- byte[] source;
- @DatabaseField(dataType = DataType.BYTE_ARRAY)
- byte[] target;
- }
-
- @DatabaseTable(tableName = "commits")
- static class CommitRow extends KeyValue {}
-
- @DatabaseTable(tableName = "entries")
- static class EntryRow extends KeyValue {}
-
- @DatabaseTable(tableName = "refs")
- static class RefRow extends KeyValue {}
-
- JdbcConnectionSource connectionSource = null;
-
- Dao commitDao;
-
- Dao entryDao;
-
- Dao refDao;
-
- final boolean initial;
-
- public SQLiteCacheProvider(final Repository target) {
- com.j256.ormlite.logger.LoggerFactory.setLogBackendFactory(new Slf4jLoggingLogBackend.Slf4jLoggingLogBackendFactory());
- com.j256.ormlite.logger.Logger.setGlobalLogLevel(com.j256.ormlite.logger.Level.FATAL);
-
- final File dotGitDir = target.getDirectory().getAbsoluteFile();
- final Path dbFile = dotGitDir.toPath().resolve("cache.db");
- initial = !Files.exists(dbFile);
- try {
- connectionSource = new JdbcConnectionSource("jdbc:sqlite:" + dbFile);
- commitDao = DaoManager.createDao(connectionSource, CommitRow.class);
- TableUtils.createTableIfNotExists(connectionSource, CommitRow.class);
- entryDao = DaoManager.createDao(connectionSource, EntryRow.class);
- TableUtils.createTableIfNotExists(connectionSource, EntryRow.class);
- refDao = DaoManager.createDao(connectionSource, RefRow.class);
- TableUtils.createTableIfNotExists(connectionSource, RefRow.class);
- } catch (final SQLException e) {
- log.error(e.getMessage(), e);
- } finally {
- try {
- if (connectionSource != null) {
- connectionSource.close();
- }
- } catch (final IOException e) {
- log.error("Failed to close connection to Database", e);
- }
- }
- }
-
- public void inTransaction(final Callable fn) {
- try {
- TransactionManager.callInTransaction(connectionSource, fn);
- } catch (final SQLException e) {
- log.error(e.getMessage(), e);
- }
- }
-
- public boolean isInitial() {
- return initial;
- }
-
- public Map getCommitMapping() {
- final Marshaler m = new Marshaler.ObjectIdMarshaler();
- return new MapAdapter<>(commitDao, CommitRow::new, m, m);
- }
-
- public Map getEntryMapping() {
- final Marshaler km = new Marshaler.JavaSerializerMarshaler<>();
- final Marshaler vm = new Marshaler.JavaSerializerMarshaler<>();
- return new MapAdapter<>(entryDao, EntryRow::new, km, vm);
- }
-
- public Map getRefEntryMapping() {
- final Marshaler m = new Marshaler.JavaSerializerMarshaler<>();
- return new MapAdapter<>(refDao, RefRow::new, m, m);
- }
-
- /**
- * Map interface using the SQLite cache.
- */
- static class MapAdapter extends AbstractMap {
-
- final Dao dao;
-
- final Supplier constructor;
-
- final Marshaler keyMarshaler;
-
- final Marshaler valueMarshaler;
-
- public MapAdapter(final Dao dao, final Supplier constructor, final Marshaler keyMarshaler, Marshaler valueMarshaler) {
- this.dao = dao;
- this.constructor = constructor;
- this.keyMarshaler = keyMarshaler;
- this.valueMarshaler = valueMarshaler;
- }
-
- @Override
- public V get(final Object key) {
- @SuppressWarnings("unchecked")
- final K k = (K) key;
- try {
- final byte[] source = keyMarshaler.marshal(k);
- final Row row = dao.queryForId(source);
- return row != null ? valueMarshaler.unmarshal(row.target) : null;
- } catch (final SQLException e) {
- log.warn(e.getMessage(), e);
- return null;
- }
- }
-
- @Override
- public V put(final K key, final V value) {
- try {
- final Row row = constructor.get();
- row.source = keyMarshaler.marshal(key);
- row.target = valueMarshaler.marshal(value);
- dao.createIfNotExists(row);
- } catch (final SQLException e) {
- log.error(e.getMessage(), e);
- }
- return value;
- }
-
- @Override
- public Set> entrySet() {
- try {
- return dao
- .queryForAll()
- .stream()
- .map(r -> new AbstractMap.SimpleEntry<>(keyMarshaler.unmarshal(r.source), valueMarshaler.unmarshal(r.target)))
- .collect(Collectors.toSet());
- } catch (final SQLException e) {
- log.error(e.getMessage(), e);
- return Collections.emptySet();
- }
- }
-
- @Override
- public void clear() {
- try {
- dao.deleteBuilder().delete();
- } catch (final SQLException e) {
- log.error(e.getMessage(), e);
- }
- }
- }
-}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 2697a54..a13cc56 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -96,7 +96,6 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
cacheProvider = switch (config.cacheBackend) {
case mvstore -> new MVStoreCacheProvider(targetRepo);
case guava -> new GuavaCacheProvider();
- default -> new SQLiteCacheProvider(targetRepo);
};
if (config.cacheLevel.contains(CacheLevel.commit)) {
log.info("Stored mapping (commit-mapping) is available");
From 083ff51eba57a9da809d2040b0da071b4acef144 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 13:06:54 +0900
Subject: [PATCH 12/28] Use git-notes for the source of commit mapping at the
previous stage
---
.../c/se/stein/core/cache/CommitMapping.java | 174 ++++++++++++++++++
.../se/stein/rewriter/RepositoryRewriter.java | 23 +--
2 files changed, 182 insertions(+), 15 deletions(-)
create mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java
new file mode 100644
index 0000000..29037ee
--- /dev/null
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java
@@ -0,0 +1,174 @@
+package jp.ac.titech.c.se.stein.core.cache;
+
+import jp.ac.titech.c.se.stein.core.RepositoryAccess;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.Ref;
+import org.eclipse.jgit.notes.NoteMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Manages source-to-target commit ID mapping with support for notes-based restoration.
+ *
+ *
When an incremental transformation is performed, commits that were already
+ * transformed in a previous run should not be re-processed. git-stein records the
+ * source commit ID as a git note on each target commit, so the target repository
+ * itself serves as persistent storage for commit mappings.
+ *
+ *
Loading is two-phase. On initialization, only the ref tips of the target are
+ * examined: for each target ref, the note on the tip commit is read to recover the
+ * corresponding source commit ID. These are registered in the mapping and also
+ * collected as "uninteresting" points so that the source RevWalk stops at
+ * already-processed commits. This covers the common case (linear history, no merges
+ * from old branches). If a merge commit references an old source commit not reachable
+ * from any current ref tip, the mapping will miss, and a full scan of all target notes
+ * is triggered lazily (at most once) to load the remaining entries.
+ *
+ *
Each entry is ~100 bytes (two ObjectIds + HashMap overhead). Even for repositories
+ * with 100K commits, this is ~10 MB, which is small compared to the entryMapping.
+ */
+public class CommitMapping extends AbstractMap {
+ private static final Logger log = LoggerFactory.getLogger(CommitMapping.class);
+
+ private final Map map = new HashMap<>();
+ private final List previousSourceTips = new ArrayList<>();
+
+ private NoteObjectIdMap notesMap;
+ private boolean notesFullyLoaded = false;
+
+ /**
+ * Restores commit mapping from the target repository's notes.
+ * Only ref tips are read eagerly.
+ */
+ public void restoreFromTarget(RepositoryAccess target) {
+ final List targetRefs = target.getRefs();
+ if (targetRefs.isEmpty()) {
+ return;
+ }
+
+ notesMap = new NoteObjectIdMap(target.getDefaultNotes(), target);
+
+ for (final Ref ref : targetRefs) {
+ final ObjectId targetTipId = target.getRefTarget(ref);
+ if (targetTipId == null || target.getObjectType(targetTipId) != Constants.OBJ_COMMIT) {
+ continue;
+ }
+ final ObjectId sourceTipId = notesMap.get(targetTipId);
+ if (sourceTipId == null) {
+ continue;
+ }
+ map.put(sourceTipId, targetTipId);
+ previousSourceTips.add(sourceTipId);
+ log.debug("Restored commit mapping from note: {} -> {} (ref: {})",
+ sourceTipId.name(), targetTipId.name(), ref.getName());
+ }
+
+ if (!previousSourceTips.isEmpty()) {
+ log.info("Restored {} commit mappings from target notes", previousSourceTips.size());
+ }
+ }
+
+ /**
+ * Returns the source commit IDs of previously processed ref tips.
+ * These should be marked as uninteresting in the source RevWalk.
+ */
+ public List getPreviousSourceTips() {
+ return previousSourceTips;
+ }
+
+ @Override
+ public ObjectId get(Object key) {
+ final ObjectId v = map.get(key);
+ if (v != null) {
+ return v;
+ }
+ if (!notesFullyLoaded && notesMap != null) {
+ loadAllNotes();
+ return map.get(key);
+ }
+ return null;
+ }
+
+ @Override
+ public ObjectId put(ObjectId key, ObjectId value) {
+ return map.put(key, value);
+ }
+
+ @Override
+ public int size() {
+ return map.size();
+ }
+
+ @Override
+ public Set> entrySet() {
+ return map.entrySet();
+ }
+
+ /**
+ * Loads all notes into the mapping. Called at most once, when a lookup
+ * misses on a commit not reachable from any ref tip (e.g., old merge parent).
+ */
+ private synchronized void loadAllNotes() {
+ if (notesFullyLoaded) {
+ return;
+ }
+ log.info("Loading full notes for commit mapping fallback");
+ notesMap.forEach((targetId, sourceId) -> map.put(sourceId, targetId));
+ log.info("Loaded commit mappings, total {} entries", map.size());
+ notesFullyLoaded = true;
+ }
+
+ /**
+ * A read-only {@code Map} view over a JGit NoteMap.
+ * Keys are annotated commit IDs, values are note bodies parsed as ObjectIds.
+ */
+ private static class NoteObjectIdMap extends AbstractMap {
+ private final NoteMap notes;
+ private final RepositoryAccess ra;
+
+ NoteObjectIdMap(NoteMap notes, RepositoryAccess ra) {
+ this.notes = notes;
+ this.ra = ra;
+ }
+
+ @Override
+ public ObjectId get(Object key) {
+ if (!(key instanceof ObjectId id)) {
+ return null;
+ }
+ return parseObjectId(ra.readNote(notes, id));
+ }
+
+ @Override
+ public Set> entrySet() {
+ final Set> result = new HashSet<>();
+ ra.forEachNote(notes, (annotatedId, body) -> {
+ final ObjectId bodyId = parseObjectId(body);
+ if (bodyId != null) {
+ result.add(new SimpleEntry<>(annotatedId, bodyId));
+ }
+ });
+ return result;
+ }
+
+ private static ObjectId parseObjectId(byte[] body) {
+ if (body == null) {
+ return null;
+ }
+ try {
+ return ObjectId.fromString(new String(body));
+ } catch (Exception e) {
+ return null;
+ }
+ }
+ }
+}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index a13cc56..7e62453 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -54,7 +54,7 @@ public class RepositoryRewriter implements RewriterCommand {
/**
* Commit-to-commit mapping.
*/
- protected Map commitMapping = new HashMap<>();
+ protected final CommitMapping commitMapping = new CommitMapping();
/**
* Tag-to-tag mapping.
@@ -92,16 +92,14 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
source.setDryRunning(true);
target.setDryRunning(true);
}
+ if (config.isAddingNotes && !isOverwriting) {
+ commitMapping.restoreFromTarget(target);
+ }
if (!config.cacheLevel.isEmpty()) {
cacheProvider = switch (config.cacheBackend) {
case mvstore -> new MVStoreCacheProvider(targetRepo);
case guava -> new GuavaCacheProvider();
};
- if (config.cacheLevel.contains(CacheLevel.commit)) {
- log.info("Stored mapping (commit-mapping) is available");
- commitMapping = new Cache<>(commitMapping, cacheProvider.getCommitMapping(), !cacheProvider.isInitial(), true);
- refEntryMapping = new Cache<>(refEntryMapping, cacheProvider.getRefEntryMapping(), !cacheProvider.isInitial(), true);
- }
if (config.cacheLevel.contains(CacheLevel.blob) || config.cacheLevel.contains(CacheLevel.tree)) {
log.info("Stored mapping (entry-mapping) is available");
Map storedEntryMapping = cacheProvider.getEntryMapping();
@@ -242,16 +240,11 @@ protected List filterRefs(final List refs, @SuppressWarnings("unused")
* Collects the set of commit Ids used as uninteresting points.
*/
protected Collection collectUninterestings(@SuppressWarnings("unused") final Context c) {
- final List result = new ArrayList<>();
- for (final Map.Entry e : refEntryMapping.entrySet()) {
- final RefEntry ref = e.getKey();
- if (ref.id != null) {
- log.debug("Previous Ref {}: added as an uninteresting point (commit: {})", ref.name, ref.id.name());
- result.add(ref.id);
- }
+ final List tips = commitMapping.getPreviousSourceTips();
+ if (!tips.isEmpty()) {
+ log.info("Using {} previous source tips as uninteresting points", tips.size());
}
- refEntryMapping.clear(); // ref entries might be removed when updated.
- return result;
+ return tips;
}
/**
From f26ee3bce7102c3f616ab9b602bbad4ba88b42eb Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 17:37:09 +0900
Subject: [PATCH 13/28] Introduce CheckStyle
---
build.gradle | 6 ++++++
config/checkstyle/checkstyle.xml | 13 +++++++++++++
2 files changed, 19 insertions(+)
create mode 100644 config/checkstyle/checkstyle.xml
diff --git a/build.gradle b/build.gradle
index e7146f0..75c09c8 100644
--- a/build.gradle
+++ b/build.gradle
@@ -4,6 +4,12 @@ plugins {
id 'maven-publish'
id 'com.gradleup.shadow' version '9.4.0'
id 'com.github.ben-manes.versions' version '0.53.0'
+ id 'checkstyle'
+}
+
+checkstyle {
+ toolVersion = '10.21.4'
+ configFile = file("${rootDir}/config/checkstyle/checkstyle.xml")
}
repositories {
diff --git a/config/checkstyle/checkstyle.xml b/config/checkstyle/checkstyle.xml
new file mode 100644
index 0000000..b996137
--- /dev/null
+++ b/config/checkstyle/checkstyle.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
From 6527099c6d7069858d462489cfecd118cac2542c Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 17:38:35 +0900
Subject: [PATCH 14/28] Remove unused imports
---
src/main/java/jp/ac/titech/c/se/stein/PorcelainAPI.java | 1 -
src/main/java/jp/ac/titech/c/se/stein/app/blob/Tokenize.java | 1 -
.../java/jp/ac/titech/c/se/stein/app/blob/TokenizeViaJDT.java | 2 --
src/main/java/jp/ac/titech/c/se/stein/app/blob/Untokenize.java | 2 --
src/main/java/jp/ac/titech/c/se/stein/entry/TreeEntry.java | 1 -
.../java/jp/ac/titech/c/se/stein/rewriter/BlobTranslator.java | 1 -
.../java/jp/ac/titech/c/se/stein/app/blob/ConvertBlobTest.java | 1 -
.../jp/ac/titech/c/se/stein/app/blob/HistorageViaJDTTest.java | 1 -
.../jp/ac/titech/c/se/stein/rewriter/BlobTranslatorTest.java | 2 --
9 files changed, 12 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/PorcelainAPI.java b/src/main/java/jp/ac/titech/c/se/stein/PorcelainAPI.java
index 959b4b6..f9d3863 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/PorcelainAPI.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/PorcelainAPI.java
@@ -5,7 +5,6 @@
import org.eclipse.jgit.api.ResetCommand.ResetType;
import org.eclipse.jgit.api.errors.*;
import org.eclipse.jgit.internal.storage.file.FileRepository;
-import org.eclipse.jgit.internal.storage.file.GC;
import jp.ac.titech.c.se.stein.core.Try;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/app/blob/Tokenize.java b/src/main/java/jp/ac/titech/c/se/stein/app/blob/Tokenize.java
index 1de8689..007ff0a 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/app/blob/Tokenize.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/app/blob/Tokenize.java
@@ -8,7 +8,6 @@
import lombok.ToString;
import picocli.CommandLine.Command;
-import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/app/blob/TokenizeViaJDT.java b/src/main/java/jp/ac/titech/c/se/stein/app/blob/TokenizeViaJDT.java
index 7672b36..2f68715 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/app/blob/TokenizeViaJDT.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/app/blob/TokenizeViaJDT.java
@@ -1,7 +1,5 @@
package jp.ac.titech.c.se.stein.app.blob;
-import java.nio.charset.StandardCharsets;
-
import jp.ac.titech.c.se.stein.entry.AnyHotEntry;
import jp.ac.titech.c.se.stein.core.SourceText;
import jp.ac.titech.c.se.stein.entry.BlobEntry;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/app/blob/Untokenize.java b/src/main/java/jp/ac/titech/c/se/stein/app/blob/Untokenize.java
index 4d087b3..fb8bb05 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/app/blob/Untokenize.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/app/blob/Untokenize.java
@@ -10,8 +10,6 @@
import picocli.CommandLine.Command;
import picocli.CommandLine.Mixin;
-import java.nio.charset.StandardCharsets;
-
/**
* Restores linetoken-encoded source files back to their original form.
* The inverse of {@link Tokenize}: removes line breaks between tokens and
diff --git a/src/main/java/jp/ac/titech/c/se/stein/entry/TreeEntry.java b/src/main/java/jp/ac/titech/c/se/stein/entry/TreeEntry.java
index 7a74cfe..4bdb6f5 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/entry/TreeEntry.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/entry/TreeEntry.java
@@ -11,7 +11,6 @@
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
-import java.util.stream.Stream;
/**
* A Hot entry representing a tree (directory).
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslator.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslator.java
index 9eceeef..bea0e9c 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslator.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslator.java
@@ -8,7 +8,6 @@
import lombok.Getter;
import lombok.ToString;
-import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.stream.Collectors;
import java.util.function.Function;
diff --git a/src/test/java/jp/ac/titech/c/se/stein/app/blob/ConvertBlobTest.java b/src/test/java/jp/ac/titech/c/se/stein/app/blob/ConvertBlobTest.java
index f481438..0bf3695 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/app/blob/ConvertBlobTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/app/blob/ConvertBlobTest.java
@@ -6,7 +6,6 @@
import jp.ac.titech.c.se.stein.entry.BlobEntry;
import jp.ac.titech.c.se.stein.entry.HotEntry;
import jp.ac.titech.c.se.stein.util.ProcessRunner;
-import org.eclipse.jgit.lib.FileMode;
import org.junit.jupiter.api.Test;
import java.net.InetSocketAddress;
diff --git a/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageViaJDTTest.java b/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageViaJDTTest.java
index 63e72ac..b54eb68 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageViaJDTTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageViaJDTTest.java
@@ -9,7 +9,6 @@
import jp.ac.titech.c.se.stein.entry.HotEntry;
import jp.ac.titech.c.se.stein.core.RepositoryAccess;
import jp.ac.titech.c.se.stein.testing.TestRepo;
-import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.revwalk.RevCommit;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
diff --git a/src/test/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslatorTest.java b/src/test/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslatorTest.java
index d6ad813..5e772ef 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslatorTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/rewriter/BlobTranslatorTest.java
@@ -4,12 +4,10 @@
import jp.ac.titech.c.se.stein.app.blob.TokenizeViaJDT;
import jp.ac.titech.c.se.stein.core.Context;
import jp.ac.titech.c.se.stein.entry.AnyHotEntry;
-import jp.ac.titech.c.se.stein.entry.BlobEntry;
import jp.ac.titech.c.se.stein.entry.Entry;
import jp.ac.titech.c.se.stein.entry.HotEntry;
import jp.ac.titech.c.se.stein.core.RepositoryAccess;
import jp.ac.titech.c.se.stein.testing.TestRepo;
-import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.revwalk.RevCommit;
import org.junit.jupiter.api.Test;
From 533323314e030bb896e25eaf5d649103aa171059 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 18:24:47 +0900
Subject: [PATCH 15/28] Use block
---
.../java/jp/ac/titech/c/se/stein/jgit/TreeFormatter.java | 9 ++++++---
.../jp/ac/titech/c/se/stein/app/blob/CregitTest.java | 8 ++++++--
.../jp/ac/titech/c/se/stein/app/blob/HistorageTest.java | 8 ++++++--
3 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/jgit/TreeFormatter.java b/src/main/java/jp/ac/titech/c/se/stein/jgit/TreeFormatter.java
index 5d7fa94..3351472 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/jgit/TreeFormatter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/jgit/TreeFormatter.java
@@ -56,8 +56,9 @@ private void append(byte[] nameBuf, int nameLen, byte[] mode, AnyObjectId id) {
}
private boolean fmtBuf(byte[] nameBuf, int nameLen, byte[] mode) {
- if (buf == null || buf.length < ptr + entrySize(mode, nameLen))
+ if (buf == null || buf.length < ptr + entrySize(mode, nameLen)) {
return false;
+ }
//mode.copyTo(buf, ptr);
//ptr += mode.copyToLength();
System.arraycopy(mode, 0, buf, ptr, mode.length);
@@ -83,15 +84,17 @@ private void fmtOverflowBuffer(byte[] nameBuf, int nameLen, byte[] mode) throws
}
public ObjectId insertTo(ObjectInserter ins) throws IOException {
- if (buf != null)
+ if (buf != null) {
return ins.insert(OBJ_TREE, buf, 0, ptr);
+ }
final long len = overflowBuffer.length();
return ins.insert(OBJ_TREE, len, overflowBuffer.openInputStream());
}
public ObjectId computeId(ObjectInserter ins) {
- if (buf != null)
+ if (buf != null) {
return ins.idFor(OBJ_TREE, buf, 0, ptr);
+ }
final long len = overflowBuffer.length();
try {
return ins.idFor(OBJ_TREE, len, overflowBuffer.openInputStream());
diff --git a/src/test/java/jp/ac/titech/c/se/stein/app/blob/CregitTest.java b/src/test/java/jp/ac/titech/c/se/stein/app/blob/CregitTest.java
index bb9115a..dd5fe0a 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/app/blob/CregitTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/app/blob/CregitTest.java
@@ -27,8 +27,12 @@ static void setUp() throws IOException {
@AfterAll
static void tearDown() {
- if (result != null) result.close();
- if (source != null) source.close();
+ if (result != null) {
+ result.close();
+ }
+ if (source != null) {
+ source.close();
+ }
}
static RepositoryAccess getResult() {
diff --git a/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageTest.java b/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageTest.java
index ae27d3a..690b5f2 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/app/blob/HistorageTest.java
@@ -27,8 +27,12 @@ static void setUp() throws IOException {
@AfterAll
static void tearDown() {
- if (result != null) result.close();
- if (source != null) source.close();
+ if (result != null) {
+ result.close();
+ }
+ if (source != null) {
+ source.close();
+ }
}
static RepositoryAccess getResult() {
From 6621ac0e77b6a3850e22a4ea5d9e23460725a657 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 18:27:38 +0900
Subject: [PATCH 16/28] Store both prev and orig notes
---
.../c/se/stein/core/cache/CommitMapping.java | 72 +++-------------
.../se/stein/core/cache/NoteObjectIdMap.java | 83 +++++++++++++++++++
.../se/stein/rewriter/RepositoryRewriter.java | 72 ++++++++++++----
3 files changed, 152 insertions(+), 75 deletions(-)
create mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/cache/NoteObjectIdMap.java
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java
index 29037ee..221964f 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CommitMapping.java
@@ -1,17 +1,16 @@
package jp.ac.titech.c.se.stein.core.cache;
import jp.ac.titech.c.se.stein.core.RepositoryAccess;
+import lombok.Getter;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Ref;
-import org.eclipse.jgit.notes.NoteMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -32,30 +31,35 @@
* from old branches). If a merge commit references an old source commit not reachable
* from any current ref tip, the mapping will miss, and a full scan of all target notes
* is triggered lazily (at most once) to load the remaining entries.
- *
- *
Each entry is ~100 bytes (two ObjectIds + HashMap overhead). Even for repositories
- * with 100K commits, this is ~10 MB, which is small compared to the entryMapping.
*/
public class CommitMapping extends AbstractMap {
private static final Logger log = LoggerFactory.getLogger(CommitMapping.class);
private final Map map = new HashMap<>();
+
+ /**
+ * Source commit IDs of previously processed ref tips.
+ * These should be marked as uninteresting in the source RevWalk.
+ */
+ @Getter
private final List previousSourceTips = new ArrayList<>();
private NoteObjectIdMap notesMap;
- private boolean notesFullyLoaded = false;
+ private volatile boolean notesFullyLoaded = false;
/**
* Restores commit mapping from the target repository's notes.
* Only ref tips are read eagerly.
+ *
+ * @param notesRef the notes ref to read from (e.g., {@code refs/notes/git-stein-prev})
*/
- public void restoreFromTarget(RepositoryAccess target) {
+ public void restoreFromTarget(RepositoryAccess target, String notesRef) {
final List targetRefs = target.getRefs();
if (targetRefs.isEmpty()) {
return;
}
- notesMap = new NoteObjectIdMap(target.getDefaultNotes(), target);
+ notesMap = new NoteObjectIdMap(target.readNotes(notesRef), target);
for (final Ref ref : targetRefs) {
final ObjectId targetTipId = target.getRefTarget(ref);
@@ -77,13 +81,6 @@ public void restoreFromTarget(RepositoryAccess target) {
}
}
- /**
- * Returns the source commit IDs of previously processed ref tips.
- * These should be marked as uninteresting in the source RevWalk.
- */
- public List getPreviousSourceTips() {
- return previousSourceTips;
- }
@Override
public ObjectId get(Object key) {
@@ -126,49 +123,4 @@ private synchronized void loadAllNotes() {
log.info("Loaded commit mappings, total {} entries", map.size());
notesFullyLoaded = true;
}
-
- /**
- * A read-only {@code Map} view over a JGit NoteMap.
- * Keys are annotated commit IDs, values are note bodies parsed as ObjectIds.
- */
- private static class NoteObjectIdMap extends AbstractMap {
- private final NoteMap notes;
- private final RepositoryAccess ra;
-
- NoteObjectIdMap(NoteMap notes, RepositoryAccess ra) {
- this.notes = notes;
- this.ra = ra;
- }
-
- @Override
- public ObjectId get(Object key) {
- if (!(key instanceof ObjectId id)) {
- return null;
- }
- return parseObjectId(ra.readNote(notes, id));
- }
-
- @Override
- public Set> entrySet() {
- final Set> result = new HashSet<>();
- ra.forEachNote(notes, (annotatedId, body) -> {
- final ObjectId bodyId = parseObjectId(body);
- if (bodyId != null) {
- result.add(new SimpleEntry<>(annotatedId, bodyId));
- }
- });
- return result;
- }
-
- private static ObjectId parseObjectId(byte[] body) {
- if (body == null) {
- return null;
- }
- try {
- return ObjectId.fromString(new String(body));
- } catch (Exception e) {
- return null;
- }
- }
- }
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/NoteObjectIdMap.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/NoteObjectIdMap.java
new file mode 100644
index 0000000..ffb0f74
--- /dev/null
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/NoteObjectIdMap.java
@@ -0,0 +1,83 @@
+package jp.ac.titech.c.se.stein.core.cache;
+
+import jp.ac.titech.c.se.stein.core.Context;
+import jp.ac.titech.c.se.stein.core.RepositoryAccess;
+import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.notes.NoteMap;
+
+import java.util.function.BiConsumer;
+
+/**
+ * A view over a JGit NoteMap that interprets note bodies as ObjectIds (hex-encoded).
+ * Supports both reading (get, forEach) and writing (add).
+ */
+public class NoteObjectIdMap {
+ private final NoteMap notes;
+ private final RepositoryAccess ra;
+
+ public NoteObjectIdMap(NoteMap notes, RepositoryAccess ra) {
+ this.notes = notes;
+ this.ra = ra;
+ }
+
+ /**
+ * Returns the NoteMap backing this view.
+ */
+ public NoteMap getNoteMap() {
+ return notes;
+ }
+
+ /**
+ * Reads the note on the given commit as an ObjectId.
+ */
+ public ObjectId get(ObjectId commitId) {
+ return parseObjectId(ra.readNote(notes, commitId));
+ }
+
+ /**
+ * Adds a note recording the given value as the note body on the given commit.
+ */
+ public void add(ObjectId commitId, ObjectId value, Context c) {
+ final byte[] content = new byte[Constants.OBJECT_ID_STRING_LENGTH];
+ value.copyTo(content, 0);
+ ra.addNote(notes, commitId, content, c);
+ }
+
+ /**
+ * Adds a note by forwarding raw note bytes (for chain forwarding).
+ */
+ public void addRaw(ObjectId commitId, byte[] rawNote, Context c) {
+ ra.addNote(notes, commitId, rawNote, c);
+ }
+
+ /**
+ * Iterates all notes, passing (annotatedId, bodyAsObjectId) pairs.
+ */
+ public void forEach(BiConsumer consumer) {
+ ra.forEachNote(notes, (annotatedId, body) -> {
+ final ObjectId bodyId = parseObjectId(body);
+ if (bodyId != null) {
+ consumer.accept(annotatedId, bodyId);
+ }
+ });
+ }
+
+ /**
+ * Writes the notes to the repository under the given ref.
+ */
+ public void write(String ref, Context c) {
+ ra.writeNotes(notes, ref, c);
+ }
+
+ private static ObjectId parseObjectId(byte[] body) {
+ if (body == null) {
+ return null;
+ }
+ try {
+ return ObjectId.fromString(new String(body));
+ } catch (Exception e) {
+ return null;
+ }
+ }
+}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 7e62453..66c9cd9 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -66,8 +66,38 @@ public class RepositoryRewriter implements RewriterCommand {
*/
protected Map refEntryMapping = new HashMap<>();
+ /**
+ * Notes ref for the immediate source commit ID (for incremental transformation).
+ */
+ public static final String R_NOTES_PREV = "refs/notes/git-stein-prev";
+
+ /**
+ * Notes ref for the original source commit ID (through the chain).
+ */
+ public static final String R_NOTES_ORIG = "refs/notes/git-stein-orig";
+
protected RepositoryAccess source, target;
+ /**
+ * Whether source is a chained transformation (has git-stein-orig notes).
+ */
+ private boolean isChained = false;
+
+ /**
+ * Notes for prev (always the immediate source commit ID).
+ */
+ private NoteObjectIdMap prevNotes;
+
+ /**
+ * Notes for orig (forwarded from source, or same as prev for single).
+ */
+ private NoteObjectIdMap origNotes;
+
+ /**
+ * Source's orig notes (for chain forwarding). Cached at initialization.
+ */
+ private NoteObjectIdMap sourceOrigNotes;
+
protected boolean isOverwriting = false;
protected boolean isPathSensitive = false;
@@ -93,7 +123,15 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
target.setDryRunning(true);
}
if (config.isAddingNotes && !isOverwriting) {
- commitMapping.restoreFromTarget(target);
+ isChained = source.getRef(R_NOTES_ORIG) != null;
+ prevNotes = new NoteObjectIdMap(target.readNotes(R_NOTES_PREV), target);
+ if (isChained) {
+ origNotes = new NoteObjectIdMap(target.readNotes(R_NOTES_ORIG), target);
+ sourceOrigNotes = new NoteObjectIdMap(source.readNotes(R_NOTES_ORIG), source);
+ } else {
+ origNotes = prevNotes;
+ }
+ commitMapping.restoreFromTarget(target, R_NOTES_PREV);
}
if (!config.cacheLevel.isEmpty()) {
cacheProvider = switch (config.cacheBackend) {
@@ -134,7 +172,19 @@ public void rewrite(final Context c) {
rewriteCommits(walk, c);
updateRefs(c);
}
- target.writeNotes(target.getDefaultNotes(), c);
+ if (config.isAddingNotes) {
+ prevNotes.write(R_NOTES_PREV, c);
+ if (isChained) {
+ origNotes.write(R_NOTES_ORIG, c);
+ } else {
+ // Single transformation: orig = prev, share the same ref
+ target.applyRefUpdate(new RefEntry(R_NOTES_ORIG, target.getRef(R_NOTES_PREV).getObjectId()));
+ }
+ // Default notes = orig (for git log display)
+ target.applyRefUpdate(new RefEntry(Constants.R_NOTES_COMMITS, target.getRef(R_NOTES_ORIG).getObjectId()));
+ } else {
+ target.writeNotes(target.getDefaultNotes(), c);
+ }
} finally {
if (cacheProvider != null) {
cacheProvider.close();
@@ -275,23 +325,15 @@ protected ObjectId rewriteCommit(final RevCommit commit, final Context c) {
log.debug("Rewrite commit: {} -> {} {}", oldId.name(), newId.name(), c);
if (config.isAddingNotes) {
- target.addNote(target.getDefaultNotes(), newId, getNote(oldId, c), uc);
+ prevNotes.add(newId, oldId, uc);
+ if (isChained) {
+ final ObjectId origId = sourceOrigNotes.get(oldId);
+ origNotes.add(newId, origId != null ? origId : oldId, uc);
+ }
}
return newId;
}
- /**
- * Returns a note for a commit.
- */
- protected byte[] getNote(final ObjectId oldCommitId, @SuppressWarnings("unused") final Context c) {
- final byte[] note = source.readNote(source.getDefaultNotes(), oldCommitId);
- if (note != null) {
- return note;
- }
- final byte[] blob = new byte[Constants.OBJECT_ID_STRING_LENGTH];
- oldCommitId.copyTo(blob, 0);
- return blob;
- }
/**
* Rewrites the parents of a commit.
From a0405377f091dab20458745489d0e350210596c5 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 18:28:13 +0900
Subject: [PATCH 17/28] memory profile
---
build.gradle | 12 ++
.../c/se/stein/testing/MemoryProfile.java | 123 ++++++++++++++++++
2 files changed, 135 insertions(+)
create mode 100644 src/test/java/jp/ac/titech/c/se/stein/testing/MemoryProfile.java
diff --git a/build.gradle b/build.gradle
index 75c09c8..2dcae7a 100644
--- a/build.gradle
+++ b/build.gradle
@@ -96,6 +96,18 @@ tasks.register('benchmark', JavaExec) {
jvmArgs = ['-Xmx1g']
}
+tasks.register('memoryProfile', JavaExec) {
+ dependsOn 'testClasses'
+ classpath = sourceSets.test.runtimeClasspath
+ mainClass = 'jp.ac.titech.c.se.stein.testing.MemoryProfile'
+
+ def profArgs = project.hasProperty('benchRepo') ? [project.property('benchRepo')] : ['.']
+ if (project.hasProperty('command')) profArgs.add(project.property('command'))
+ args = profArgs
+ def heap = project.hasProperty('heap') ? project.property('heap') : '4g'
+ jvmArgs = ["-Xmx${heap}", '-XX:+UseSerialGC', '-XX:+CrashOnOutOfMemoryError']
+}
+
tasks.register('executableJar') {
dependsOn 'shadowJar'
// cf. https://ujun.hatenablog.com/entry/2017/09/22/010209
diff --git a/src/test/java/jp/ac/titech/c/se/stein/testing/MemoryProfile.java b/src/test/java/jp/ac/titech/c/se/stein/testing/MemoryProfile.java
new file mode 100644
index 0000000..2eac5bf
--- /dev/null
+++ b/src/test/java/jp/ac/titech/c/se/stein/testing/MemoryProfile.java
@@ -0,0 +1,123 @@
+package jp.ac.titech.c.se.stein.testing;
+
+import jp.ac.titech.c.se.stein.Application;
+import jp.ac.titech.c.se.stein.app.Identity;
+import jp.ac.titech.c.se.stein.app.blob.HistorageViaJDT;
+import jp.ac.titech.c.se.stein.core.Context;
+import jp.ac.titech.c.se.stein.rewriter.RepositoryRewriter;
+import jp.ac.titech.c.se.stein.util.TemporaryFile;
+import org.eclipse.jgit.internal.storage.file.FileRepository;
+import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.Map;
+
+/**
+ * Profiles memory usage of entryMapping during rewrite.
+ * Usage: java -Xmx4g -cp ... MemoryProfile [command]
+ * command: identity (default) or historage
+ */
+public class MemoryProfile {
+ public static void main(String[] args) throws Exception {
+ final String repoPath = args.length > 0 ? args[0] : ".";
+ final String command = args.length > 1 ? args[1] : "identity";
+ final File sourceDir = new File(repoPath);
+
+ if (!new File(sourceDir, ".git").exists() && !new File(sourceDir, "HEAD").exists()) {
+ System.err.println("Not a git repository: " + sourceDir.getAbsolutePath());
+ System.exit(1);
+ }
+
+ final Runtime rt = Runtime.getRuntime();
+ System.out.printf("Max heap: %d MB%n", rt.maxMemory() / (1024 * 1024));
+ System.out.printf("Repo: %s%n", sourceDir.getAbsolutePath());
+ System.out.printf("Command: %s%n%n", command);
+
+ final boolean isBare = !new File(sourceDir, ".git").exists();
+ final FileRepository sourceRepo = openRepository(sourceDir, isBare);
+
+ try (TemporaryFile.Directory tmp = TemporaryFile.directoryOf("mem-profile-")) {
+ final FileRepository targetRepo = createRepository(tmp.getPath().toFile());
+
+ final RepositoryRewriter rewriter = switch (command) {
+ case "historage" -> new HistorageViaJDT().toRewriter();
+ default -> new Identity();
+ };
+ rewriter.setConfig(new Application.Config());
+ rewriter.initialize(sourceRepo, targetRepo);
+
+ // Before
+ System.gc();
+ Thread.sleep(500);
+ System.gc();
+ final long heapBefore = usedHeap();
+ System.out.printf("Before rewrite:%n");
+ System.out.printf(" Heap used: %d MB%n%n", heapBefore / (1024 * 1024));
+
+ // Run
+ rewriter.rewrite(Context.init());
+
+ // After (before GC)
+ final long heapAfterNoGC = usedHeap();
+
+ // After (after GC)
+ System.gc();
+ Thread.sleep(500);
+ System.gc();
+ final long heapAfterGC = usedHeap();
+
+ // entryMapping size
+ final int entryMappingSize = getEntryMappingSize(rewriter);
+
+ System.out.printf("After rewrite:%n");
+ System.out.printf(" Heap used (before GC): %d MB%n", heapAfterNoGC / (1024 * 1024));
+ System.out.printf(" Heap used (after GC): %d MB%n", heapAfterGC / (1024 * 1024));
+ System.out.printf(" Heap delta (after GC): %d MB%n", (heapAfterGC - heapBefore) / (1024 * 1024));
+ System.out.printf(" entryMapping size: %d entries%n", entryMappingSize);
+ if (entryMappingSize > 0) {
+ final long deltaBytes = heapAfterGC - heapBefore;
+ System.out.printf(" Approx bytes/entry: %d bytes%n", deltaBytes / entryMappingSize);
+ }
+
+ sourceRepo.close();
+ targetRepo.close();
+ }
+ }
+
+ static int getEntryMappingSize(RepositoryRewriter rewriter) {
+ try {
+ Field f = RepositoryRewriter.class.getDeclaredField("entryMapping");
+ f.setAccessible(true);
+ Map, ?> map = (Map, ?>) f.get(rewriter);
+ return map.size();
+ } catch (Exception e) {
+ System.err.println("Could not access entryMapping: " + e.getMessage());
+ return -1;
+ }
+ }
+
+ static long usedHeap() {
+ final Runtime rt = Runtime.getRuntime();
+ return rt.totalMemory() - rt.freeMemory();
+ }
+
+ static FileRepository openRepository(File dir, boolean isBare) throws IOException {
+ final FileRepositoryBuilder builder = new FileRepositoryBuilder();
+ if (isBare) {
+ builder.setGitDir(dir).setBare();
+ } else {
+ builder.setWorkTree(dir).setGitDir(new File(dir, ".git"));
+ }
+ return (FileRepository) builder.readEnvironment().build();
+ }
+
+ static FileRepository createRepository(File dir) throws IOException {
+ final FileRepositoryBuilder builder = new FileRepositoryBuilder();
+ builder.setGitDir(dir).setBare();
+ final FileRepository repo = (FileRepository) builder.build();
+ repo.create(true);
+ return repo;
+ }
+}
From 6caad9b6cc1c56bad83ae9ea41fad650e1405d9a Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 19:17:49 +0900
Subject: [PATCH 18/28] use Guava cache for entry mapping. --mapping-mem option
to specify its memory size
---
.../jp/ac/titech/c/se/stein/Application.java | 8 +++--
.../c/se/stein/app/blob/FilterBlob.java | 29 +---------------
.../se/stein/rewriter/RepositoryRewriter.java | 28 ++++++++++++----
.../titech/c/se/stein/util/SizeConverter.java | 33 +++++++++++++++++++
.../c/se/stein/app/blob/FilterBlobTest.java | 3 +-
5 files changed, 63 insertions(+), 38 deletions(-)
create mode 100644 src/main/java/jp/ac/titech/c/se/stein/util/SizeConverter.java
diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java
index 655522e..3b982cd 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/Application.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java
@@ -10,12 +10,12 @@
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
-import jp.ac.titech.c.se.stein.app.blob.FilterBlob;
import jp.ac.titech.c.se.stein.rewriter.BlobTranslator;
import jp.ac.titech.c.se.stein.app.Identity;
import jp.ac.titech.c.se.stein.rewriter.RewriterCommand;
import jp.ac.titech.c.se.stein.util.SettableHelpCommand;
import jp.ac.titech.c.se.stein.util.Loader;
+import jp.ac.titech.c.se.stein.util.SizeConverter;
import org.apache.commons.io.FileUtils;
import org.eclipse.jgit.internal.storage.file.FileRepository;
import org.eclipse.jgit.lib.Constants;
@@ -110,12 +110,16 @@ public enum CacheBackend { mvstore, guava }
@Option(names = "--cache-backend", paramLabel = "", description = "cache backend (${COMPLETION-CANDIDATES}. default: mvstore)", order = MIDDLE)
public CacheBackend cacheBackend = CacheBackend.mvstore;
+ @Option(names = "--mapping-mem", paramLabel = "{,K,M,G}", description = "max memory for entry mapping (default: 25%% of max heap)", order = MIDDLE,
+ converter = SizeConverter.class)
+ public long entryMappingMemory = -1;
+
@Option(names = "--extra-attributes", description = "rewrite encoding and signature in commits", order = MIDDLE)
public boolean isRewritingExtraAttributes = false;
@SuppressWarnings("unused")
@Option(names = "--stream-size-limit", paramLabel = "{,K,M,G}", description = "increase stream size limit", order = MIDDLE,
- converter = FilterBlob.SizeConverter.class)
+ converter = SizeConverter.class)
void setSizeLimit(final long limit) {
// default: 50MB is too small
final int intLimit = (int) Math.min(limit, Integer.MAX_VALUE);
diff --git a/src/main/java/jp/ac/titech/c/se/stein/app/blob/FilterBlob.java b/src/main/java/jp/ac/titech/c/se/stein/app/blob/FilterBlob.java
index 17cad4b..04c0d07 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/app/blob/FilterBlob.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/app/blob/FilterBlob.java
@@ -4,6 +4,7 @@
import jp.ac.titech.c.se.stein.entry.BlobEntry;
import jp.ac.titech.c.se.stein.rewriter.BlobTranslator;
import jp.ac.titech.c.se.stein.rewriter.NameFilter;
+import jp.ac.titech.c.se.stein.util.SizeConverter;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
@@ -11,7 +12,6 @@
import org.apache.commons.io.FileUtils;
import picocli.CommandLine.Command;
import picocli.CommandLine.Mixin;
-import picocli.CommandLine.ITypeConverter;
import picocli.CommandLine.Option;
@@ -51,31 +51,4 @@ public AnyHotEntry rewriteBlobEntry(final BlobEntry entry, final Context c) {
return entry;
}
-
- public static class SizeConverter implements ITypeConverter {
- @Override
- public Long convert(final String value) {
- if (value.isEmpty()) {
- throw new IllegalArgumentException("Empty value is given");
- }
- final int len = value.length();
- final char unit = Character.toUpperCase(value.charAt(len - 1));
- final String num = value.substring(0, len - 1);
- return switch (unit) {
- case 'B' -> convert(num);
- case 'K' -> displaySizeToByteCount(num, 1024);
- case 'M' -> displaySizeToByteCount(num, 1024 * 1024);
- case 'G' -> displaySizeToByteCount(num, 1024 * 1024 * 1024);
- default -> displaySizeToByteCount(value, 1);
- };
- }
-
- protected long displaySizeToByteCount(final String value, final long base) {
- if (value.contains(".")) {
- return (long) (Double.parseDouble(value) * base);
- } else {
- return Long.parseLong(value) * base;
- }
- }
- }
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 66c9cd9..37c23ac 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -9,6 +9,7 @@
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
+import com.google.common.cache.CacheBuilder;
import jp.ac.titech.c.se.stein.core.*;
import jp.ac.titech.c.se.stein.core.cache.*;
import jp.ac.titech.c.se.stein.entry.*;
@@ -42,9 +43,21 @@ public class RepositoryRewriter implements RewriterCommand {
protected static final ObjectId ZERO = ObjectId.zeroId();
/**
- * Entry-to-entries mapping.
+ * Entry-to-entries mapping. Backed by Guava Cache with LRU eviction
+ * to bound memory usage proportional to available heap.
*/
- protected Map entryMapping = new HashMap<>();
+ protected Map entryMapping;
+
+ private static final int BYTES_PER_ENTRY = 300;
+
+ private static Map createEntryMapping(long memoryBudget) {
+ final long maxWeight = Math.max(1000, memoryBudget / BYTES_PER_ENTRY);
+ return CacheBuilder.newBuilder()
+ .maximumWeight(maxWeight)
+ .weigher((Entry k, AnyColdEntry v) -> v.size())
+ .build()
+ .asMap();
+ }
/**
* Root tree-to-tree mapping.
@@ -114,24 +127,25 @@ public enum CacheLevel {
public void initialize(final Repository sourceRepo, final Repository targetRepo) {
source = new RepositoryAccess(sourceRepo);
target = new RepositoryAccess(targetRepo);
+ // memory budget: defaults to 25% of max heap if not specified
+ entryMapping = createEntryMapping(config.entryMappingMemory >= 0 ? config.entryMappingMemory : Runtime.getRuntime().maxMemory() / 4);
isOverwriting = sourceRepo == targetRepo;
- if (config.nthreads > 1) {
- this.entryMapping = new ConcurrentHashMap<>();
- }
if (config.isDryRunning) {
source.setDryRunning(true);
target.setDryRunning(true);
}
if (config.isAddingNotes && !isOverwriting) {
isChained = source.getRef(R_NOTES_ORIG) != null;
- prevNotes = new NoteObjectIdMap(target.readNotes(R_NOTES_PREV), target);
+ // Fall back to refs/notes/commits when git-stein-prev does not exist (old format)
+ final String prevRef = target.getRef(R_NOTES_PREV) != null ? R_NOTES_PREV : Constants.R_NOTES_COMMITS;
+ prevNotes = new NoteObjectIdMap(target.readNotes(prevRef), target);
if (isChained) {
origNotes = new NoteObjectIdMap(target.readNotes(R_NOTES_ORIG), target);
sourceOrigNotes = new NoteObjectIdMap(source.readNotes(R_NOTES_ORIG), source);
} else {
origNotes = prevNotes;
}
- commitMapping.restoreFromTarget(target, R_NOTES_PREV);
+ commitMapping.restoreFromTarget(target, prevRef);
}
if (!config.cacheLevel.isEmpty()) {
cacheProvider = switch (config.cacheBackend) {
diff --git a/src/main/java/jp/ac/titech/c/se/stein/util/SizeConverter.java b/src/main/java/jp/ac/titech/c/se/stein/util/SizeConverter.java
new file mode 100644
index 0000000..35f5963
--- /dev/null
+++ b/src/main/java/jp/ac/titech/c/se/stein/util/SizeConverter.java
@@ -0,0 +1,33 @@
+package jp.ac.titech.c.se.stein.util;
+
+import picocli.CommandLine.ITypeConverter;
+
+/**
+ * Converts a human-readable size string (e.g., "10", "1K", "256M", "1.5G") to bytes.
+ */
+public class SizeConverter implements ITypeConverter {
+ @Override
+ public Long convert(final String value) {
+ if (value.isEmpty()) {
+ throw new IllegalArgumentException("Empty value is given");
+ }
+ final int len = value.length();
+ final char unit = Character.toUpperCase(value.charAt(len - 1));
+ final String num = value.substring(0, len - 1);
+ return switch (unit) {
+ case 'B' -> convert(num);
+ case 'K' -> displaySizeToByteCount(num, 1024);
+ case 'M' -> displaySizeToByteCount(num, 1024 * 1024);
+ case 'G' -> displaySizeToByteCount(num, 1024 * 1024 * 1024);
+ default -> displaySizeToByteCount(value, 1);
+ };
+ }
+
+ protected long displaySizeToByteCount(final String value, final long base) {
+ if (value.contains(".")) {
+ return (long) (Double.parseDouble(value) * base);
+ } else {
+ return Long.parseLong(value) * base;
+ }
+ }
+}
diff --git a/src/test/java/jp/ac/titech/c/se/stein/app/blob/FilterBlobTest.java b/src/test/java/jp/ac/titech/c/se/stein/app/blob/FilterBlobTest.java
index 89b9106..5fdc696 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/app/blob/FilterBlobTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/app/blob/FilterBlobTest.java
@@ -1,6 +1,7 @@
package jp.ac.titech.c.se.stein.app.blob;
import jp.ac.titech.c.se.stein.entry.Entry;
+import jp.ac.titech.c.se.stein.util.SizeConverter;
import jp.ac.titech.c.se.stein.core.RepositoryAccess;
import jp.ac.titech.c.se.stein.testing.TestRepo;
import org.eclipse.jgit.revwalk.RevCommit;
@@ -28,7 +29,7 @@ static void tearDown() {
@Test
public void testSizeConverter() {
- final FilterBlob.SizeConverter converter = new FilterBlob.SizeConverter();
+ final SizeConverter converter = new SizeConverter();
assertEquals(Long.valueOf(10), converter.convert("10"));
assertEquals(Long.valueOf(10), converter.convert("10B"));
assertEquals(Long.valueOf(1024), converter.convert("1K"));
From 83c7f555ea896ca24397c25c1eee3a2c893fa778 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 19:26:37 +0900
Subject: [PATCH 19/28] Remove fallback
---
.../ac/titech/c/se/stein/rewriter/RepositoryRewriter.java | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 37c23ac..de12029 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -136,16 +136,14 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
}
if (config.isAddingNotes && !isOverwriting) {
isChained = source.getRef(R_NOTES_ORIG) != null;
- // Fall back to refs/notes/commits when git-stein-prev does not exist (old format)
- final String prevRef = target.getRef(R_NOTES_PREV) != null ? R_NOTES_PREV : Constants.R_NOTES_COMMITS;
- prevNotes = new NoteObjectIdMap(target.readNotes(prevRef), target);
+ prevNotes = new NoteObjectIdMap(target.readNotes(R_NOTES_PREV), target);
if (isChained) {
origNotes = new NoteObjectIdMap(target.readNotes(R_NOTES_ORIG), target);
sourceOrigNotes = new NoteObjectIdMap(source.readNotes(R_NOTES_ORIG), source);
} else {
origNotes = prevNotes;
}
- commitMapping.restoreFromTarget(target, prevRef);
+ commitMapping.restoreFromTarget(target, R_NOTES_PREV);
}
if (!config.cacheLevel.isEmpty()) {
cacheProvider = switch (config.cacheBackend) {
From b2ddefa6ca47edce00580af534eed52e5d2d3b4d Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 19:28:57 +0900
Subject: [PATCH 20/28] Remove commit/refentry mappings from cache
---
.../c/se/stein/core/cache/CacheProvider.java | 7 -------
.../stein/core/cache/GuavaCacheProvider.java | 19 -------------------
.../core/cache/MVStoreCacheProvider.java | 15 ---------------
.../se/stein/rewriter/RepositoryRewriter.java | 2 +-
4 files changed, 1 insertion(+), 42 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
index ea646ff..d05b952 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
@@ -1,10 +1,7 @@
package jp.ac.titech.c.se.stein.core.cache;
-import jp.ac.titech.c.se.stein.core.RefEntry;
-
import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
import jp.ac.titech.c.se.stein.entry.Entry;
-import org.eclipse.jgit.lib.ObjectId;
import java.util.Map;
@@ -14,12 +11,8 @@
public interface CacheProvider {
boolean isInitial();
- Map getCommitMapping();
-
Map getEntryMapping();
- Map getRefEntryMapping();
-
default void inTransaction(java.util.concurrent.Callable fn) {
try {
fn.call();
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
index af8ddd7..4c3b9d7 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
@@ -1,11 +1,8 @@
package jp.ac.titech.c.se.stein.core.cache;
-import jp.ac.titech.c.se.stein.core.RefEntry;
-
import com.google.common.cache.CacheBuilder;
import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
import jp.ac.titech.c.se.stein.entry.Entry;
-import org.eclipse.jgit.lib.ObjectId;
import java.util.Map;
@@ -28,14 +25,6 @@ public boolean isInitial() {
return true;
}
- @Override
- public Map getCommitMapping() {
- return CacheBuilder.newBuilder()
- .maximumSize(maxEntries)
- .build()
- .asMap();
- }
-
@Override
public Map getEntryMapping() {
return CacheBuilder.newBuilder()
@@ -44,12 +33,4 @@ public Map getEntryMapping() {
.build()
.asMap();
}
-
- @Override
- public Map getRefEntryMapping() {
- return CacheBuilder.newBuilder()
- .maximumSize(maxEntries)
- .build()
- .asMap();
- }
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
index d6fadee..86d97e8 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
@@ -1,10 +1,7 @@
package jp.ac.titech.c.se.stein.core.cache;
-import jp.ac.titech.c.se.stein.core.RefEntry;
-
import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
import jp.ac.titech.c.se.stein.entry.Entry;
-import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.h2.mvstore.MVStore;
@@ -34,24 +31,12 @@ public boolean isInitial() {
return initial;
}
- @Override
- @SuppressWarnings("unchecked")
- public Map getCommitMapping() {
- return store.openMap("commits");
- }
-
@Override
@SuppressWarnings("unchecked")
public Map getEntryMapping() {
return store.openMap("entries");
}
- @Override
- @SuppressWarnings("unchecked")
- public Map getRefEntryMapping() {
- return store.openMap("refs");
- }
-
@Override
public void close() {
if (store != null && !store.isClosed()) {
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index de12029..28e5724 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -119,7 +119,7 @@ private static Map createEntryMapping(long memoryBudget) {
protected Config config;
public enum CacheLevel {
- blob, tree, commit
+ blob, tree
}
protected CacheProvider cacheProvider;
From 0d1a1987c8773af72d98fb0aa04fda121996e955 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 19:32:51 +0900
Subject: [PATCH 21/28] Unify tree/blob caches
---
.../jp/ac/titech/c/se/stein/Application.java | 5 ++-
.../titech/c/se/stein/core/cache/Cache.java | 34 -------------------
.../se/stein/rewriter/RepositoryRewriter.java | 21 +++---------
.../stein/core/cache/CacheProviderTest.java | 3 +-
.../c/se/stein/testing/RewriteBenchmark.java | 3 +-
5 files changed, 8 insertions(+), 58 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java
index 3b982cd..e62c055 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/Application.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java
@@ -5,7 +5,6 @@
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
-import java.util.EnumSet;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
@@ -102,8 +101,8 @@ public enum AlternatesMode { relative, absolute }
fallbackValue = "relative", order = MIDDLE, arity = "0..1")
public AlternatesMode alternatesMode;
- @Option(names = "--cache", split = ",", paramLabel = "", description = "cache level (${COMPLETION-CANDIDATES}. default: none)", order = MIDDLE)
- public EnumSet cacheLevel = EnumSet.noneOf(RepositoryRewriter.CacheLevel.class);
+ @Option(names = "--cache", description = "enable persistent entry caching", order = MIDDLE)
+ public boolean isCachingEnabled = false;
public enum CacheBackend { mvstore, guava }
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
index e329de7..1ae64e9 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
@@ -7,8 +7,6 @@
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-import java.util.function.Predicate;
-
@AllArgsConstructor
public class Cache extends AbstractMap {
private final Map frontend, readingBackend, writingBackend;
@@ -50,38 +48,6 @@ public void clear() {
writingBackend.clear();
}
- public static class Filter extends AbstractMap {
- private final Predicate condition;
- private final Map delegatee;
-
- public Filter(final Predicate condition, final Map delegatee) {
- this.condition = condition;
- this.delegatee = delegatee;
- }
-
- public static Map apply(final Predicate condition, final Map delegatee) {
- return new Filter<>(condition, delegatee);
- }
-
- @Override
- public V get(final Object key) {
- @SuppressWarnings("unchecked")
- final K k = (K) key;
- return condition.test(k) ? delegatee.get(key) : null;
- }
-
- @Override
- public V put(final K key, final V value) {
- return condition.test(key) ? delegatee.put(key, value) : value;
- }
-
- @Override
-
- public Set> entrySet() {
- return delegatee.entrySet();
- }
- }
-
public static class NullObjectMap extends AbstractMap {
@Override
public V get(final Object key) {
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 28e5724..04ffdfc 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -118,10 +118,6 @@ private static Map createEntryMapping(long memoryBudget) {
@Setter
protected Config config;
- public enum CacheLevel {
- blob, tree
- }
-
protected CacheProvider cacheProvider;
public void initialize(final Repository sourceRepo, final Repository targetRepo) {
@@ -145,23 +141,14 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
}
commitMapping.restoreFromTarget(target, R_NOTES_PREV);
}
- if (!config.cacheLevel.isEmpty()) {
+ if (config.isCachingEnabled) {
cacheProvider = switch (config.cacheBackend) {
case mvstore -> new MVStoreCacheProvider(targetRepo);
case guava -> new GuavaCacheProvider();
};
- if (config.cacheLevel.contains(CacheLevel.blob) || config.cacheLevel.contains(CacheLevel.tree)) {
- log.info("Stored mapping (entry-mapping) is available");
- Map storedEntryMapping = cacheProvider.getEntryMapping();
- if (!config.cacheLevel.contains(CacheLevel.tree)) {
- log.info("Stored mapping (entry-mapping): blob-only filtering");
- storedEntryMapping = Cache.Filter.apply(e -> !e.isTree(), storedEntryMapping);
- } else if (!config.cacheLevel.contains(CacheLevel.blob)) {
- log.info("Stored mapping (entry-mapping): tree-only filtering");
- storedEntryMapping = Cache.Filter.apply(Entry::isTree, storedEntryMapping);
- }
- entryMapping = new Cache<>(entryMapping, storedEntryMapping, !cacheProvider.isInitial(), true);
- }
+ log.info("Stored mapping (entry-mapping) is available");
+ final Map storedEntryMapping = cacheProvider.getEntryMapping();
+ entryMapping = new Cache<>(entryMapping, storedEntryMapping, !cacheProvider.isInitial(), true);
}
}
diff --git a/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java b/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
index c885479..989e4ea 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
@@ -17,7 +17,6 @@
import java.io.File;
import java.io.IOException;
-import java.util.EnumSet;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
@@ -38,7 +37,7 @@ static void tearDown() {
private Application.Config cacheConfig() {
final Application.Config config = new Application.Config();
- config.cacheLevel = EnumSet.allOf(RepositoryRewriter.CacheLevel.class);
+ config.isCachingEnabled = true;
config.cacheBackend = Application.Config.CacheBackend.mvstore;
return config;
}
diff --git a/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java b/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
index 10fd1d6..198e239 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
@@ -24,7 +24,6 @@
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
-import java.util.EnumSet;
import java.util.List;
/**
@@ -108,7 +107,7 @@ static JsonObject benchmark(String name, File sourceDir, RewriterFactory factory
final Application.Config config = new Application.Config();
if (useCache) {
- config.cacheLevel = EnumSet.allOf(RepositoryRewriter.CacheLevel.class);
+ config.isCachingEnabled = true;
}
final RepositoryRewriter rewriter = factory.create();
rewriter.setConfig(config);
From 24b4709c9ba2ad449ed0eafa81b2df7c9f5ee07e Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 19:37:23 +0900
Subject: [PATCH 22/28] Remove --cache-backend option
---
.../jp/ac/titech/c/se/stein/Application.java | 5 ---
.../stein/core/cache/GuavaCacheProvider.java | 36 -------------------
.../se/stein/rewriter/RepositoryRewriter.java | 5 +--
.../stein/core/cache/CacheProviderTest.java | 1 -
4 files changed, 1 insertion(+), 46 deletions(-)
delete mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
diff --git a/src/main/java/jp/ac/titech/c/se/stein/Application.java b/src/main/java/jp/ac/titech/c/se/stein/Application.java
index e62c055..7ff8e1e 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/Application.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/Application.java
@@ -104,11 +104,6 @@ public enum AlternatesMode { relative, absolute }
@Option(names = "--cache", description = "enable persistent entry caching", order = MIDDLE)
public boolean isCachingEnabled = false;
- public enum CacheBackend { mvstore, guava }
-
- @Option(names = "--cache-backend", paramLabel = "", description = "cache backend (${COMPLETION-CANDIDATES}. default: mvstore)", order = MIDDLE)
- public CacheBackend cacheBackend = CacheBackend.mvstore;
-
@Option(names = "--mapping-mem", paramLabel = "{,K,M,G}", description = "max memory for entry mapping (default: 25%% of max heap)", order = MIDDLE,
converter = SizeConverter.class)
public long entryMappingMemory = -1;
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
deleted file mode 100644
index 4c3b9d7..0000000
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/GuavaCacheProvider.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package jp.ac.titech.c.se.stein.core.cache;
-
-import com.google.common.cache.CacheBuilder;
-import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
-import jp.ac.titech.c.se.stein.entry.Entry;
-
-import java.util.Map;
-
-/**
- * Non-persistent cache provider backed by Guava Cache with LRU eviction.
- */
-public class GuavaCacheProvider implements CacheProvider {
- private static final double HEAP_FRACTION = 0.25;
- private static final int BYTES_PER_ENTRY = 300;
-
- private final long maxEntries;
-
- public GuavaCacheProvider() {
- final long budget = (long) (Runtime.getRuntime().maxMemory() * HEAP_FRACTION);
- this.maxEntries = Math.max(1000, budget / BYTES_PER_ENTRY);
- }
-
- @Override
- public boolean isInitial() {
- return true;
- }
-
- @Override
- public Map getEntryMapping() {
- return CacheBuilder.newBuilder()
- .maximumWeight(maxEntries)
- .weigher((Entry k, AnyColdEntry v) -> v.size())
- .build()
- .asMap();
- }
-}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 04ffdfc..cd800a6 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -142,10 +142,7 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
commitMapping.restoreFromTarget(target, R_NOTES_PREV);
}
if (config.isCachingEnabled) {
- cacheProvider = switch (config.cacheBackend) {
- case mvstore -> new MVStoreCacheProvider(targetRepo);
- case guava -> new GuavaCacheProvider();
- };
+ cacheProvider = new MVStoreCacheProvider(targetRepo);
log.info("Stored mapping (entry-mapping) is available");
final Map storedEntryMapping = cacheProvider.getEntryMapping();
entryMapping = new Cache<>(entryMapping, storedEntryMapping, !cacheProvider.isInitial(), true);
diff --git a/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java b/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
index 989e4ea..ffb674a 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
@@ -38,7 +38,6 @@ static void tearDown() {
private Application.Config cacheConfig() {
final Application.Config config = new Application.Config();
config.isCachingEnabled = true;
- config.cacheBackend = Application.Config.CacheBackend.mvstore;
return config;
}
From 24461251d1477ebbd9e26bb28a8d31ef43b1d83c Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 19:38:45 +0900
Subject: [PATCH 23/28] Remove inTransaction block
---
.../c/se/stein/core/cache/CacheProvider.java | 8 --------
.../se/stein/rewriter/RepositoryRewriter.java | 20 ++++++-------------
2 files changed, 6 insertions(+), 22 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
index d05b952..6fa96fa 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
@@ -13,13 +13,5 @@ public interface CacheProvider {
Map getEntryMapping();
- default void inTransaction(java.util.concurrent.Callable fn) {
- try {
- fn.call();
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
default void close() {}
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index cd800a6..b59168f 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -153,21 +153,13 @@ public void rewrite(final Context c) {
setUp(c);
try {
final RevWalk walk = prepareRevisionWalk(c);
- if (cacheProvider != null) {
- cacheProvider.inTransaction(() -> {
- rewriteCommits(walk, c);
- updateRefs(c);
- return null;
- });
- } else {
- if (config.nthreads >= 2) {
- log.debug("Parallel rewriting");
- rewriteRootTrees(walk, c);
- Try.io(walk::memoReset);
- }
- rewriteCommits(walk, c);
- updateRefs(c);
+ if (config.nthreads >= 2) {
+ log.debug("Parallel rewriting");
+ rewriteRootTrees(walk, c);
+ Try.io(walk::memoReset);
}
+ rewriteCommits(walk, c);
+ updateRefs(c);
if (config.isAddingNotes) {
prevNotes.write(R_NOTES_PREV, c);
if (isChained) {
From 350df2a015707caa0794811a38625d46407585ce Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 19:45:56 +0900
Subject: [PATCH 24/28] Simplify classes
---
.../c/se/stein/core/cache/CacheProvider.java | 17 -----------------
...VStoreCacheProvider.java => EntryCache.java} | 15 ++++++---------
.../c/se/stein/rewriter/RepositoryRewriter.java | 12 ++++++------
...cheProviderTest.java => EntryCacheTest.java} | 2 +-
4 files changed, 13 insertions(+), 33 deletions(-)
delete mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
rename src/main/java/jp/ac/titech/c/se/stein/core/cache/{MVStoreCacheProvider.java => EntryCache.java} (80%)
rename src/test/java/jp/ac/titech/c/se/stein/core/cache/{CacheProviderTest.java => EntryCacheTest.java} (99%)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
deleted file mode 100644
index 6fa96fa..0000000
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/CacheProvider.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package jp.ac.titech.c.se.stein.core.cache;
-
-import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
-import jp.ac.titech.c.se.stein.entry.Entry;
-
-import java.util.Map;
-
-/**
- * Common interface for cache providers that manage object mappings.
- */
-public interface CacheProvider {
- boolean isInitial();
-
- Map getEntryMapping();
-
- default void close() {}
-}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
similarity index 80%
rename from src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
index 86d97e8..1ac3d20 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/MVStoreCacheProvider.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
@@ -2,6 +2,7 @@
import jp.ac.titech.c.se.stein.entry.AnyColdEntry;
import jp.ac.titech.c.se.stein.entry.Entry;
+import lombok.Getter;
import org.eclipse.jgit.lib.Repository;
import org.h2.mvstore.MVStore;
@@ -10,14 +11,16 @@
import java.util.Map;
/**
- * Cache provider backed by H2 MVStore.
+ * Persistent entry cache backed by H2 MVStore.
* Data is stored in a single file ({@code cache.mv.db}) in the target repository's .git directory.
*/
-public class MVStoreCacheProvider implements CacheProvider {
+public class EntryCache implements AutoCloseable {
private final MVStore store;
+
+ @Getter
private final boolean initial;
- public MVStoreCacheProvider(final Repository target) {
+ public EntryCache(final Repository target) {
final Path dbFile = target.getDirectory().toPath().resolve("cache.mv.db");
initial = !Files.exists(dbFile);
store = new MVStore.Builder()
@@ -26,12 +29,6 @@ public MVStoreCacheProvider(final Repository target) {
.open();
}
- @Override
- public boolean isInitial() {
- return initial;
- }
-
- @Override
@SuppressWarnings("unchecked")
public Map getEntryMapping() {
return store.openMap("entries");
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index b59168f..ab5e6e3 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -118,7 +118,7 @@ private static Map createEntryMapping(long memoryBudget) {
@Setter
protected Config config;
- protected CacheProvider cacheProvider;
+ protected EntryCache entryCache;
public void initialize(final Repository sourceRepo, final Repository targetRepo) {
source = new RepositoryAccess(sourceRepo);
@@ -142,10 +142,10 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
commitMapping.restoreFromTarget(target, R_NOTES_PREV);
}
if (config.isCachingEnabled) {
- cacheProvider = new MVStoreCacheProvider(targetRepo);
+ entryCache = new EntryCache(targetRepo);
log.info("Stored mapping (entry-mapping) is available");
- final Map storedEntryMapping = cacheProvider.getEntryMapping();
- entryMapping = new Cache<>(entryMapping, storedEntryMapping, !cacheProvider.isInitial(), true);
+ final Map storedEntryMapping = entryCache.getEntryMapping();
+ entryMapping = new Cache<>(entryMapping, storedEntryMapping, !entryCache.isInitial(), true);
}
}
@@ -174,8 +174,8 @@ public void rewrite(final Context c) {
target.writeNotes(target.getDefaultNotes(), c);
}
} finally {
- if (cacheProvider != null) {
- cacheProvider.close();
+ if (entryCache != null) {
+ entryCache.close();
}
cleanUp(c);
}
diff --git a/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java b/src/test/java/jp/ac/titech/c/se/stein/core/cache/EntryCacheTest.java
similarity index 99%
rename from src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
rename to src/test/java/jp/ac/titech/c/se/stein/core/cache/EntryCacheTest.java
index ffb674a..73b16c8 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/core/cache/CacheProviderTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/core/cache/EntryCacheTest.java
@@ -22,7 +22,7 @@
import static org.junit.jupiter.api.Assertions.*;
-public class CacheProviderTest {
+public class EntryCacheTest {
static RepositoryAccess source;
@BeforeAll
From 696db5cc32997281c0912d727171003024421ab4 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 20:15:13 +0900
Subject: [PATCH 25/28] Use memory-budget constrained persistent cache
---
.../titech/c/se/stein/core/cache/Cache.java | 68 -------------------
.../c/se/stein/core/cache/EntryCache.java | 18 ++++-
.../se/stein/rewriter/RepositoryRewriter.java | 11 ++-
.../c/se/stein/testing/RewriteBenchmark.java | 10 +--
4 files changed, 26 insertions(+), 81 deletions(-)
delete mode 100644 src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
deleted file mode 100644
index 1ae64e9..0000000
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/Cache.java
+++ /dev/null
@@ -1,68 +0,0 @@
-package jp.ac.titech.c.se.stein.core.cache;
-
-import lombok.AllArgsConstructor;
-
-import java.util.AbstractMap;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-@AllArgsConstructor
-public class Cache extends AbstractMap {
- private final Map frontend, readingBackend, writingBackend;
-
- public Cache(final Map frontend, final Map backend) {
- this(frontend, backend, backend);
- }
-
- public Cache(final Map frontend, final Map backend, final boolean readFrom, final boolean writeTo) {
- this(frontend, readFrom ? backend : new NullObjectMap<>(),
- writeTo ? backend : new NullObjectMap<>());
- }
-
- @Override
- public V get(final Object key) {
- @SuppressWarnings("unchecked")
- final K k = (K) key;
- return frontend.computeIfAbsent(k, readingBackend::get);
- }
-
- @Override
- public V put(final K key, final V value) {
- writingBackend.put(key, value);
- return frontend.put(key, value);
- }
-
- @Override
-
- public Set> entrySet() {
- final Set> result = new HashSet<>();
- result.addAll(frontend.entrySet());
- result.addAll(readingBackend.entrySet());
- return result;
- }
-
- @Override
- public void clear() {
- frontend.clear();
- writingBackend.clear();
- }
-
- public static class NullObjectMap extends AbstractMap {
- @Override
- public V get(final Object key) {
- return null;
- }
-
- @Override
- public V put(final K key, final V value) {
- return value;
- }
-
- @Override
-
- public Set> entrySet() {
- return Collections.emptySet();
- }
- }
-}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
index 1ac3d20..3c38ab7 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
@@ -15,17 +15,31 @@
* Data is stored in a single file ({@code cache.mv.db}) in the target repository's .git directory.
*/
public class EntryCache implements AutoCloseable {
+ /**
+ * Fraction of memoryBudget allocated to the read page cache.
+ */
+ private static final double READ_CACHE_RATIO = 1.0;
+
+ /**
+ * Fraction of memoryBudget allocated to the write buffer (auto-commit threshold).
+ * Worst-case total memory usage is (READ_CACHE_RATIO + WRITE_BUFFER_RATIO) times the budget.
+ */
+ private static final double WRITE_BUFFER_RATIO = 0.5;
+
private final MVStore store;
@Getter
private final boolean initial;
- public EntryCache(final Repository target) {
+ public EntryCache(final Repository target, final long memoryBudget) {
final Path dbFile = target.getDirectory().toPath().resolve("cache.mv.db");
initial = !Files.exists(dbFile);
+ final int cacheSizeMB = (int) Math.max(1, (long) (memoryBudget * READ_CACHE_RATIO) / (1024 * 1024));
+ final int autoCommitBufferSizeKB = (int) Math.max(1, (long) (memoryBudget * WRITE_BUFFER_RATIO) / 1024);
store = new MVStore.Builder()
.fileName(dbFile.toString())
- .autoCommitDisabled()
+ .cacheSize(cacheSizeMB)
+ .autoCommitBufferSize(autoCommitBufferSizeKB)
.open();
}
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index ab5e6e3..3a350d7 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -123,8 +123,6 @@ private static Map createEntryMapping(long memoryBudget) {
public void initialize(final Repository sourceRepo, final Repository targetRepo) {
source = new RepositoryAccess(sourceRepo);
target = new RepositoryAccess(targetRepo);
- // memory budget: defaults to 25% of max heap if not specified
- entryMapping = createEntryMapping(config.entryMappingMemory >= 0 ? config.entryMappingMemory : Runtime.getRuntime().maxMemory() / 4);
isOverwriting = sourceRepo == targetRepo;
if (config.isDryRunning) {
source.setDryRunning(true);
@@ -141,11 +139,12 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
}
commitMapping.restoreFromTarget(target, R_NOTES_PREV);
}
+ final long budget = config.entryMappingMemory >= 0 ? config.entryMappingMemory : Runtime.getRuntime().maxMemory() / 4;
if (config.isCachingEnabled) {
- entryCache = new EntryCache(targetRepo);
- log.info("Stored mapping (entry-mapping) is available");
- final Map storedEntryMapping = entryCache.getEntryMapping();
- entryMapping = new Cache<>(entryMapping, storedEntryMapping, !entryCache.isInitial(), true);
+ entryCache = new EntryCache(targetRepo, budget);
+ entryMapping = entryCache.getEntryMapping();
+ } else {
+ entryMapping = createEntryMapping(budget);
}
}
diff --git a/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java b/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
index 198e239..1cc4539 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/testing/RewriteBenchmark.java
@@ -132,9 +132,9 @@ static JsonObject benchmark(String name, File sourceDir, RewriterFactory factory
result.addProperty("heapMb", heapMb);
result.addProperty("commits", commits);
- if (useCache) {
- // Second run: reuse the same target (cache.db is there)
- System.out.printf(" (cached) %-22s ... ", name);
+ // Second run: incremental (notes skip already-processed commits)
+ {
+ System.out.printf(" (2nd run) %-21s ... ", name);
System.out.flush();
final RepositoryRewriter rewriter2 = factory.create();
@@ -151,8 +151,8 @@ static JsonObject benchmark(String name, File sourceDir, RewriterFactory factory
System.out.printf("%d ms, %d MB heap%n", timeMs2, heapMb2);
- result.addProperty("cachedTimeMs", timeMs2);
- result.addProperty("cachedHeapMb", heapMb2);
+ result.addProperty("secondTimeMs", timeMs2);
+ result.addProperty("secondHeapMb", heapMb2);
}
sourceRepo.close();
From 0ab2c72ee76c76e23f7d9160ee804c8c4a820c06 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 20:19:01 +0900
Subject: [PATCH 26/28] refactor: rename
---
.../cache/{EntryCache.java => PersistentEntryCache.java} | 4 ++--
.../ac/titech/c/se/stein/rewriter/RepositoryRewriter.java | 8 ++++----
...{EntryCacheTest.java => PersistentEntryCacheTest.java} | 2 +-
3 files changed, 7 insertions(+), 7 deletions(-)
rename src/main/java/jp/ac/titech/c/se/stein/core/cache/{EntryCache.java => PersistentEntryCache.java} (92%)
rename src/test/java/jp/ac/titech/c/se/stein/core/cache/{EntryCacheTest.java => PersistentEntryCacheTest.java} (99%)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java b/src/main/java/jp/ac/titech/c/se/stein/core/cache/PersistentEntryCache.java
similarity index 92%
rename from src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
rename to src/main/java/jp/ac/titech/c/se/stein/core/cache/PersistentEntryCache.java
index 3c38ab7..879a99a 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/core/cache/EntryCache.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/core/cache/PersistentEntryCache.java
@@ -14,7 +14,7 @@
* Persistent entry cache backed by H2 MVStore.
* Data is stored in a single file ({@code cache.mv.db}) in the target repository's .git directory.
*/
-public class EntryCache implements AutoCloseable {
+public class PersistentEntryCache implements AutoCloseable {
/**
* Fraction of memoryBudget allocated to the read page cache.
*/
@@ -31,7 +31,7 @@ public class EntryCache implements AutoCloseable {
@Getter
private final boolean initial;
- public EntryCache(final Repository target, final long memoryBudget) {
+ public PersistentEntryCache(final Repository target, final long memoryBudget) {
final Path dbFile = target.getDirectory().toPath().resolve("cache.mv.db");
initial = !Files.exists(dbFile);
final int cacheSizeMB = (int) Math.max(1, (long) (memoryBudget * READ_CACHE_RATIO) / (1024 * 1024));
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index 3a350d7..f61bd6e 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -43,8 +43,8 @@ public class RepositoryRewriter implements RewriterCommand {
protected static final ObjectId ZERO = ObjectId.zeroId();
/**
- * Entry-to-entries mapping. Backed by Guava Cache with LRU eviction
- * to bound memory usage proportional to available heap.
+ * Entry-to-entries mapping. When {@code --cache} is disabled, uses an in-memory
+ * Guava Cache with LRU eviction. When enabled, uses a persistent MVStore map.
*/
protected Map entryMapping;
@@ -118,7 +118,7 @@ private static Map createEntryMapping(long memoryBudget) {
@Setter
protected Config config;
- protected EntryCache entryCache;
+ protected PersistentEntryCache entryCache;
public void initialize(final Repository sourceRepo, final Repository targetRepo) {
source = new RepositoryAccess(sourceRepo);
@@ -141,7 +141,7 @@ public void initialize(final Repository sourceRepo, final Repository targetRepo)
}
final long budget = config.entryMappingMemory >= 0 ? config.entryMappingMemory : Runtime.getRuntime().maxMemory() / 4;
if (config.isCachingEnabled) {
- entryCache = new EntryCache(targetRepo, budget);
+ entryCache = new PersistentEntryCache(targetRepo, budget);
entryMapping = entryCache.getEntryMapping();
} else {
entryMapping = createEntryMapping(budget);
diff --git a/src/test/java/jp/ac/titech/c/se/stein/core/cache/EntryCacheTest.java b/src/test/java/jp/ac/titech/c/se/stein/core/cache/PersistentEntryCacheTest.java
similarity index 99%
rename from src/test/java/jp/ac/titech/c/se/stein/core/cache/EntryCacheTest.java
rename to src/test/java/jp/ac/titech/c/se/stein/core/cache/PersistentEntryCacheTest.java
index 73b16c8..7d3ecf7 100644
--- a/src/test/java/jp/ac/titech/c/se/stein/core/cache/EntryCacheTest.java
+++ b/src/test/java/jp/ac/titech/c/se/stein/core/cache/PersistentEntryCacheTest.java
@@ -22,7 +22,7 @@
import static org.junit.jupiter.api.Assertions.*;
-public class EntryCacheTest {
+public class PersistentEntryCacheTest {
static RepositoryAccess source;
@BeforeAll
From 3f63adb1a42923abf096bd81715cdf083fa91fa7 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 20:58:43 +0900
Subject: [PATCH 27/28] Update README
---
README.md | 113 ++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 84 insertions(+), 29 deletions(-)
diff --git a/README.md b/README.md
index 96a2869..59e323a 100644
--- a/README.md
+++ b/README.md
@@ -34,11 +34,7 @@ $ git stein [options...] # When subcommand available
## Recipes
-### Chaining commands
-
-Multiple commands can be listed on the command line.
-They are applied sequentially; intermediate repositories are created under `.git/.git-stein.N` in the target directory and cleaned up automatically.
-As an optimization, consecutive blob translators are composed into a single pass.
+### Splitting and converting to cregit
Split Java files into method-level modules, then convert each to cregit format:
```
@@ -72,18 +68,6 @@ $ git stein path/to/repo -o path/to/out \
@convert --endpoint=http://localhost:8080/convert --pattern='*.java'
```
-### Tracking original commit IDs
-
-When git-stein rewrites a repository, it records the original commit ID in Git notes (enabled by default).
-`@note-commit` reads these notes and prepends the original commit ID to each commit message.
-
-A typical workflow is to first transform, then apply `@note-commit`:
-```
-$ git stein path/to/repo -o path/to/out @historage-jdt @note-commit
-```
-After this, each commit message in `step2` starts with the original commit ID from `repo`.
-This works even after multiple transformations — the notes trace back to the original.
-
### Writing a custom blob translator
Implement the `BlobTranslator` interface to define your own transformation.
@@ -119,12 +103,13 @@ public class MyTranslator implements BlobTranslator {
- `-j`, `--jobs=`: Rewrites trees in parallel using `` threads. If the number of threads is omitted (just `-j` is given), _total number of processors - 1_ is used.
- `-n`, `--dry-run`: Do not actually modify the target repository.
- `--stream-size-limit={,K,M,G}`: increase the stream size limit.
-- `--no-notes`: Stop noting the source commit ID to the commits in the target repository.
+- `--no-notes`: Stop noting the source commit ID to the commits in the target repository (see [Notes](#notes)).
- `--no-pack`: Stop packing objects after transformation finished.
- `--alternates`: Share source objects via Git alternates to skip writing unchanged objects, which speeds up transformations where many objects are unchanged. The target repository will depend on the source's object store until repacked.
-- `--no-composite`: Stop composing multiple blob translators.
+- `--no-composite`: Stop composing multiple blob translators (see [Chaining Commands](#chaining-commands)).
- `--extra-attributes`: Allow opportunity to rewrite the encoding and the signature fields in commits.
-- `--cache=,...`: Specify the object types for caching (`commit`, `blob`, `tree`. See [Incremental transformation](#incremental-transformation) for the details). Default: none. `commit` is recommended.
+- `--cache`: Enable persistent entry caching (see [Caching](#caching)).
+- `--mapping-mem={,K,M,G}`: Max memory for entry mapping cache. Default: 25% of max heap (see [Caching](#caching)).
- `--cmdpath=:...`: Add packages for search for commands.
- `--log=`: Specify log level (default: `INFO`).
- `-q`, `--quiet`: Quiet mode (same as `--log=ERROR`).
@@ -143,19 +128,10 @@ The git-stein supports three rewriting modes.
- _duplicate_ mode (` -o -d`): given a source repository and a path for the target repository, copying the source repository into the given path and applying overwrite mode to the target repository.
-## Incremental Transformation
-In case the source repository to be transformed has been evolving, git-stein can transform only newly added objects.
-With the option `--cache=`, an SQLite3 cache file "cache.db" will be stored in the `.git` directory of the destination repository.
-This file records the correspondence between objects before and after transformation, according to the specified option.
-Correspondences between commits (`--cache=commit`), between trees (`--cache=tree`), and between files (`--cache=blob`) are stored.
-This cache can save the re-transformation of remaining objects during the second and subsequent transformation trials.
-
-
## Bundle Apps
### Blob Translators
_Blob translators_ provide a blob-to-blob(s) translations.
-Multiple blob translators can be composed and applied in a single pass.
#### @historage
Generates a [Historage](https://github.com/hideakihata/git2historage)-like repository using [Universal Ctags](https://ctags.io/).
@@ -285,6 +261,85 @@ A no-op rewriter that copies all objects without transformation.
Useful for verifying that the rewriting pipeline preserves repository content.
+## Chaining Commands
+
+Multiple commands can be listed on a single command line.
+They are applied sequentially as separate transformation steps.
+For example, with three commands `@A @B @C`:
+```
+source → target/.git/.git-stein.1 → target/.git/.git-stein.2 → target
+ (@A) (@B) (@C)
+```
+Intermediate repositories (`.git-stein.N`) are bare repositories created under the target's `.git` directory.
+
+As an optimization, consecutive blob translators are composed into a single pass rather than creating intermediate repositories for each one.
+This behavior can be disabled with `--no-composite`.
+For example, the following runs `@historage-jdt` and `@cregit` as a single composed blob translator, then `@note-commit` as a separate commit translator step:
+```
+$ git stein path/to/repo -o path/to/out \
+ @historage-jdt --no-original --no-classes \
+ @cregit --pattern='*.cjava' --ignore-case \
+ @note-commit
+```
+
+
+## Notes
+
+git-stein records the original commit ID as a git note on each target commit (enabled by default).
+Each note stores the source commit ID as a 40-character hex string.
+This provides the standard way to trace a target commit back to its source, and is visible in `git log` without any extra options (via `refs/notes/commits`).
+Notes are also used for [Incremental Transformation](#incremental-transformation) to skip already-processed commits on subsequent runs.
+
+`@note-commit` reads the note on each commit and embeds the original commit ID into the commit message.
+Place it at the end of the command list:
+```
+$ git stein path/to/repo -o path/to/out @historage-jdt @note-commit
+```
+
+git-stein uses three notes refs:
+`refs/notes/git-stein-prev` stores the immediate source commit ID (i.e., the commit in the input repository of this transformation step),
+`refs/notes/git-stein-orig` stores the original source commit ID (traces back through chained transformations to the very first source),
+and `refs/notes/commits` points to the same object as `git-stein-orig` (visible in `git log` by default).
+For a single transformation, all three refs point to the same notes object.
+In a chained transformation (see [Chaining Commands](#chaining-commands)), `git-stein-prev` and `git-stein-orig` may differ.
+For example, in `.git-stein.2`, `git-stein-prev` points to the commit in `.git-stein.1`, while `git-stein-orig` points to the commit in the original source.
+
+If `--no-notes` is used, no notes are written, and incremental transformation will not be available on subsequent runs.
+The target will be fully rewritten each time.
+
+
+## Incremental Transformation
+
+git-stein supports incremental transformation:
+when the target repository already contains results from a previous run, only new commits are processed.
+
+On subsequent runs, git-stein reads the notes from the target repository to reconstruct the commit mapping and skips already-processed commits.
+
+New commits still need to be transformed.
+To try to speed up the transformation of these new commits by reusing previously computed entry mappings, try `--cache` (see [Persistent cache](#persistent-cache-cache)).
+
+
+## Caching
+
+git-stein uses two levels of caching to avoid redundant work:
+an in-memory cache for the current run and an optional persistent cache for repeated runs.
+
+### In-memory cache
+
+During a single run, git-stein keeps an in-memory entry mapping (source entry → transformed entry) backed by a Guava Cache with LRU eviction.
+This avoids re-transforming identical entries within the same execution.
+The memory budget is controlled by `--mapping-mem` (default: 25% of max heap).
+
+### Persistent cache (`--cache`)
+
+When `--cache` is enabled, the entry mapping is stored in an MVStore (H2) file (`cache.mv.db`) in the target repository's `.git` directory.
+This persists entry mappings across runs, so entries that were already transformed in a previous run can be reused without re-computation.
+The `--mapping-mem` option also controls the MVStore page cache and write buffer sizes.
+
+`--cache` and the in-memory cache are mutually exclusive:
+when `--cache` is enabled, MVStore replaces the in-memory Guava Cache entirely.
+
+
## Publications
The following article includes the details of the incremental transformation (and a brief introduction to git-stein).
Those who have used git-stein in their academic work may be encouraged to cite the following in their work:
From bd440a45e334bb5d95d9a206dfd8e0776cd4c351 Mon Sep 17 00:00:00 2001
From: Shinpei Hayashi
Date: Tue, 24 Mar 2026 21:46:01 +0900
Subject: [PATCH 28/28] Report cache hit rate.
The stats feature in Guava was not used to report blob and tree hit rates separately
---
.../se/stein/rewriter/RepositoryRewriter.java | 24 ++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
index f61bd6e..1bd6e72 100644
--- a/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
+++ b/src/main/java/jp/ac/titech/c/se/stein/rewriter/RepositoryRewriter.java
@@ -5,6 +5,7 @@
import java.nio.charset.Charset;
import java.util.*;
import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
@@ -48,6 +49,11 @@ public class RepositoryRewriter implements RewriterCommand {
*/
protected Map entryMapping;
+ private final AtomicLong blobCacheHits = new AtomicLong();
+ private final AtomicLong blobCacheMisses = new AtomicLong();
+ private final AtomicLong treeCacheHits = new AtomicLong();
+ private final AtomicLong treeCacheMisses = new AtomicLong();
+
private static final int BYTES_PER_ENTRY = 300;
private static Map createEntryMapping(long memoryBudget) {
@@ -173,6 +179,16 @@ public void rewrite(final Context c) {
target.writeNotes(target.getDefaultNotes(), c);
}
} finally {
+ final long blobHit = blobCacheHits.get(), blobMiss = blobCacheMisses.get();
+ final long treeHit = treeCacheHits.get(), treeMiss = treeCacheMisses.get();
+ final long blobTotal = blobHit + blobMiss, treeTotal = treeHit + treeMiss, total = blobTotal + treeTotal;
+ if (total > 0) {
+ final long hits = blobHit + treeHit;
+ log.info("Entry mapping cache hit: blob {}/{} ({}%), tree {}/{} ({}%), total {}/{} ({}%)",
+ blobHit, blobTotal, String.format("%.1f", blobTotal > 0 ? blobHit * 100.0 / blobTotal : 0),
+ treeHit, treeTotal, String.format("%.1f", treeTotal > 0 ? treeHit * 100.0 / treeTotal : 0),
+ hits, total, String.format("%.1f", hits * 100.0 / total));
+ }
if (entryCache != null) {
entryCache.close();
}
@@ -364,10 +380,12 @@ protected ObjectId rewriteRootTree(final ObjectId treeId, final Context c) {
*/
protected AnyColdEntry getEntry(final Entry entry, final Context c) {
// computeIfAbsent is unsuitable because this may be invoked recursively
- final AnyColdEntry cache = entryMapping.get(entry);
- if (cache != null) {
- return cache;
+ final AnyColdEntry cached = entryMapping.get(entry);
+ if (cached != null) {
+ (entry.isTree() ? treeCacheHits : blobCacheHits).incrementAndGet();
+ return cached;
}
+ (entry.isTree() ? treeCacheMisses : blobCacheMisses).incrementAndGet();
final AnyColdEntry result = rewriteEntry(entry, c);
entryMapping.put(entry, result);
return result;