Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5e30506
chore: add monorepo migration script
meltsufin Dec 22, 2025
2d2b80c
chore: fix path
meltsufin Dec 22, 2025
c9aa394
chore: refine migration script for atomic commits and isolation
meltsufin Dec 22, 2025
98e420b
chore: remove pom.xml update logic from migration script
meltsufin Dec 23, 2025
5c333b8
chore: implement GitHub Actions workflow migration using paths-filter
meltsufin Dec 23, 2025
a63523e
chore: skip redundant workflows and update generation_config.yaml
meltsufin Dec 23, 2025
6ce68c9
chore: expand workflow skip list to include samples, release, and more
meltsufin Dec 23, 2025
3f2f4ba
chore: make workflow transformation logic a permanent script and rena…
meltsufin Dec 23, 2025
46cc566
chore: refine versions.txt consolidation to only append data lines
meltsufin Dec 23, 2025
cf9914a
chore: remove clirr job from workflow transformation
meltsufin Dec 23, 2025
6ddf4df
chore: add copyright header fix to migration script
meltsufin Dec 23, 2025
8d103f2
impl: automate reporting removal, build verification, and dynamic par…
meltsufin Dec 24, 2025
30833e3
feat: add CLI binary checks to migrate.sh
chingor13 Jan 5, 2026
004af07
chore: allow setting SOURCE_REPO_URL via environment variable in migr…
chingor13 Jan 6, 2026
0d824c4
feat: add guard for checking if the repository is already migrated
chingor13 Jan 6, 2026
e7df2ba
feat: add environment variable for specifying codeowner team to migrate
chingor13 Jan 6, 2026
2bd0821
feat: insert new module in root pom.xml
chingor13 Jan 7, 2026
f7232a8
chore: exclude common files from source root in migration script
chingor13 Jan 7, 2026
bf352f2
refactor: extract inline python to a .py file
chingor13 Jan 8, 2026
83eaa22
chore: add license headers
chingor13 Jan 8, 2026
9c82d20
fix: use 2026 when fixing copyright headers
chingor13 Jan 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 264 additions & 0 deletions monorepo-migration/migrate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
#!/bin/bash
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Exit on error
set -e

# Function to check if a command exists
check_command() {
if ! command -v "$1" >/dev/null 2>&1; then
echo "Error: $1 is not installed or not in PATH." >&2
exit 1
fi
}

# Check for necessary CLI binaries
check_command git
check_command python3
check_command mvn

# Configuration
MONOREPO_URL="https://github.com/googleapis/google-cloud-java"
SOURCE_REPO_URL="${SOURCE_REPO_URL:-https://github.com/googleapis/java-logging}"
CODEOWNER="${CODEOWNER:-}"

# Derive names from URLs to avoid duplication
SOURCE_REPO_NAME="${SOURCE_REPO_URL##*/}"
MONOREPO_NAME="${MONOREPO_URL##*/}"

# Use a temporary working directory sibling to the current monorepo
WORKING_DIR="../../migration-work"
SOURCE_DIR="$WORKING_DIR/$SOURCE_REPO_NAME-source"
TARGET_DIR="$WORKING_DIR/$MONOREPO_NAME-target"

# Get absolute path to the transformation script before any cd
TRANSFORM_SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
TRANSFORM_SCRIPT="$TRANSFORM_SCRIPT_DIR/transform_workflow.py"
MODERNIZE_POM_SCRIPT="$TRANSFORM_SCRIPT_DIR/modernize_pom.py"
UPDATE_ROOT_POM_SCRIPT="$TRANSFORM_SCRIPT_DIR/update_root_pom.py"

echo "Starting migration using git read-tree with isolated clones..."

# 0. Create working directory
mkdir -p "$WORKING_DIR"

# 1. Clone the source repository
if [ ! -d "$SOURCE_DIR" ]; then
echo "Cloning source repo: $SOURCE_REPO_URL into $SOURCE_DIR"
git clone "$SOURCE_REPO_URL" "$SOURCE_DIR"
else
echo "Source directory $SOURCE_DIR already exists. Ensuring it is clean and up-to-date..."
cd "$SOURCE_DIR"
git fetch origin
git checkout -f main
git reset --hard origin/main
git clean -fd
cd - > /dev/null
fi

# 2. Clone the target monorepo (the "isolated clone")
if [ ! -d "$TARGET_DIR" ]; then
echo "Cloning target monorepo: $MONOREPO_URL into $TARGET_DIR"
git clone "$MONOREPO_URL" "$TARGET_DIR"
else
echo "Target directory $TARGET_DIR already exists. Ensuring it is clean and up-to-date..."
cd "$TARGET_DIR"
git fetch origin
git checkout -f main
git reset --hard origin/main
git clean -fd
cd - > /dev/null
fi

cd "$TARGET_DIR"

# Ensure we are on a clean main branch in the target clone
echo "Ensuring clean state in target monorepo..."
git fetch origin
git reset --hard HEAD
git clean -fd
git checkout -f main
git reset --hard origin/main
git clean -fdx

# Check if the repository is already migrated
if [ -d "$SOURCE_REPO_NAME" ]; then
echo "Error: Directory $SOURCE_REPO_NAME already exists in the monorepo." >&2
echo "This repository seems to have already been migrated." >&2
exit 1
fi


# 2.5 Create a new feature branch for the migration
BRANCH_NAME="migrate-$SOURCE_REPO_NAME"
echo "Creating feature branch: $BRANCH_NAME"
if git rev-parse --verify "$BRANCH_NAME" >/dev/null 2>&1; then
git branch -D "$BRANCH_NAME"
fi
git checkout -b "$BRANCH_NAME"

# 3. Add the source repo as a remote
echo "Adding remote for $SOURCE_REPO_NAME: $SOURCE_DIR"
if git remote | grep -q "^$SOURCE_REPO_NAME$"; then
git remote remove "$SOURCE_REPO_NAME"
fi
git remote add "$SOURCE_REPO_NAME" "../$SOURCE_REPO_NAME-source"

# 4. Fetch the source repo
echo "Fetching $SOURCE_REPO_NAME..."
git fetch "$SOURCE_REPO_NAME"

# 5. Merge the histories using 'ours' strategy to keep monorepo content
echo "Merging histories (strategy: ours)..."
git merge --allow-unrelated-histories --no-ff "$SOURCE_REPO_NAME/main" -s ours --no-commit -m "chore($SOURCE_REPO_NAME): migrate $SOURCE_REPO_NAME into monorepo"

# 6. Read the tree from the source repo into the desired subdirectory
echo "Reading tree into prefix $SOURCE_REPO_NAME/..."
git read-tree --prefix="$SOURCE_REPO_NAME/" -u "$SOURCE_REPO_NAME/main"

# 6.5 Remove common files from the root of the migrated library
echo "Removing common files from the root of $SOURCE_REPO_NAME/..."
rm -f "$SOURCE_REPO_NAME/.gitignore"
rm -f "$SOURCE_REPO_NAME/renovate.json"
rm -f "$SOURCE_REPO_NAME/LICENSE"
rm -f "$SOURCE_REPO_NAME/java.header"
rm -f "$SOURCE_REPO_NAME/license-checks.xml"
find "$SOURCE_REPO_NAME" -maxdepth 1 -name "*.md" ! -name "CHANGELOG.md" ! -name "README.md" -delete

# 7. Commit the migration
echo "Committing migration..."
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): migrate $SOURCE_REPO_NAME into monorepo"

# 7.1 Update CODEOWNERS
if [ -n "$CODEOWNER" ]; then
echo "Updating .github/CODEOWNERS..."
mkdir -p .github
echo "/$SOURCE_REPO_NAME/ $CODEOWNER @googleapis/cloud-java-team-teamsync" >> .github/CODEOWNERS

echo "Committing CODEOWNERS update..."
git add .github/CODEOWNERS
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): add code owners for $SOURCE_REPO_NAME"
fi

# 7.2 Update root pom.xml modules
echo "Updating root pom.xml modules..."
python3 "$UPDATE_ROOT_POM_SCRIPT" "pom.xml" "$SOURCE_REPO_NAME"

echo "Committing root pom.xml modules update..."
git add pom.xml
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): add module to root pom.xml"


# 7.5 Migrate GitHub Actions workflows
echo "Checking for GitHub Actions workflows..."
if [ -d "$SOURCE_REPO_NAME/.github/workflows" ]; then
echo "Migrating workflows to root .github/workflows/..."
mkdir -p .github/workflows

for workflow in "$SOURCE_REPO_NAME/.github/workflows/"*; do
if [ -f "$workflow" ]; then
filename=$(basename "$workflow")

# Skip redundant workflows as requested by user
case "$filename" in
"hermetic_library_generation.yaml" | "update_generation_config.yaml" | \
"approve-readme.yaml" | "auto-release.yaml" | "renovate_config_check.yaml" | \
"samples.yaml" | "unmanaged_dependency_check.yaml")
echo "Skipping redundant workflow: $filename"
continue
;;
esac

new_filename="${SOURCE_REPO_NAME}-${filename}"
target_path=".github/workflows/$new_filename"

echo "Migrating and adapting $filename to $target_path"
python3 "$TRANSFORM_SCRIPT" "$SOURCE_REPO_NAME" < "$workflow" > "$target_path"
fi
done

# Cleanup empty .github directory if it exists
rm -rf "$SOURCE_REPO_NAME/.github"

echo "Committing workflow migration..."
git add .github/workflows
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): migrate and adapt GitHub Actions workflows"
fi

# 7.6 Update generation_config.yaml
echo "Updating generation_config.yaml..."
SOURCE_CONFIG="$SOURCE_REPO_NAME/generation_config.yaml"
if [ -f "$SOURCE_CONFIG" ]; then
# Extract the library entry (starts with - api_shortname)
# This assumes the source config only has one library or we want the first one
ENTRY=$(awk '/^ - api_shortname:/{flag=1; print $0; next} /^ - / && flag{flag=0} flag' "$SOURCE_CONFIG")

# Simple cleanup: remove repo and repo_short if they exist
# Adjust indentation to match monorepo (0 spaces for -)
CLEAN_ENTRY=$(echo "$ENTRY" | sed '/repo:/d' | sed '/repo_short:/d' | sed 's/^ //')

# Append to target generation_config.yaml
echo "" >> generation_config.yaml
echo "$CLEAN_ENTRY" >> generation_config.yaml

echo "Committing generation_config.yaml update..."
git add generation_config.yaml
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): add library to generation_config.yaml"
fi

# 7.7 Consolidate versions.txt
echo "Consolidating versions.txt..."
SOURCE_VERSIONS="$SOURCE_REPO_NAME/versions.txt"
if [ -f "$SOURCE_VERSIONS" ]; then
# Append data lines only to root versions.txt (exclude comments/headers)
grep "^[a-zA-Z0-9]" "$SOURCE_VERSIONS" >> versions.txt

# Remove the migrated subdirectory's versions.txt
rm "$SOURCE_VERSIONS"

echo "Committing versions.txt update..."
git add versions.txt "$SOURCE_VERSIONS"
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): consolidate versions.txt into root"
fi

# 7.8 Fix copyright headers in Java files
echo "Fixing copyright headers in Java files..."
find "$SOURCE_REPO_NAME" -name "*.java" -exec python3 -c "import sys, re; p = sys.argv[1]; c = open(p).read(); new_c = re.sub(r'Copyright \d{4} Google (Inc\.|LLC)', 'Copyright 2026 Google LLC', c); open(p, 'w').write(new_c)" {} \;

echo "Committing copyright header fixes..."
git add "$SOURCE_REPO_NAME"
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): update copyright headers to 2026 Google LLC"

# 7.9 Modernize root pom.xml
echo "Modernizing root pom.xml..."
PARENT_VERSION=$(grep -m 1 "<version>.*{x-version-update:google-cloud-java:current}" google-cloud-jar-parent/pom.xml | sed -E 's/.*<version>(.*)<\/version>.*/\1/')
python3 "$MODERNIZE_POM_SCRIPT" "$SOURCE_REPO_NAME/pom.xml" "$PARENT_VERSION"

echo "Committing root pom.xml modernization..."
git add "$SOURCE_REPO_NAME/pom.xml"
git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): modernize root pom.xml"

# 7.10 Verify compilation
echo "Verifying compilation..."
(cd "$SOURCE_REPO_NAME" && mvn compile -DskipTests -T 1C)

# 8. Cleanup
echo "Cleaning up temporary source clone..."
rm -rf "$SOURCE_DIR"

echo "Migration complete!"
echo "The migrated codebase is available in: $TARGET_DIR"
echo "You are on the $BRANCH_NAME branch in that clone."
116 changes: 116 additions & 0 deletions monorepo-migration/modernize_pom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import re

def modernize_pom(file_path, parent_version):
with open(file_path, 'r') as f:
lines = f.readlines()

new_lines = []
in_parent = False
in_dep_mgmt = False
in_dependencies = False
in_dependency = False
in_reporting = False
current_dependency_lines = []
has_x_version_update = False

for line in lines:
# Parent section modernization
if '<parent>' in line and not in_parent:
in_parent = True
indent = line[:line.find('<')]
new_lines.append(f"{indent}<parent>\n")
new_lines.append(f"{indent} <groupId>com.google.cloud</groupId>\n")
new_lines.append(f"{indent} <artifactId>google-cloud-jar-parent</artifactId>\n")
new_lines.append(f"{indent} <version>{parent_version}</version><!-- {{x-version-update:google-cloud-java:current}} -->\n")
new_lines.append(f"{indent} <relativePath>../google-cloud-jar-parent/pom.xml</relativePath>\n")
continue
if '</parent>' in line and in_parent:
in_parent = False
new_lines.append(line)
continue
if in_parent:
continue # skip original parent content

# Dependency Management pruning
if '<dependencyManagement>' in line:
in_dep_mgmt = True
new_lines.append(line)
continue
if '</dependencyManagement>' in line:
in_dep_mgmt = False
new_lines.append(line)
continue

if in_dep_mgmt:
if '<dependencies>' in line:
in_dependencies = True
new_lines.append(line)
continue
if '</dependencies>' in line:
in_dependencies = False
new_lines.append(line)
continue

if in_dependencies:
if '<dependency>' in line:
in_dependency = True
current_dependency_lines = [line]
has_x_version_update = False
continue
if '</dependency>' in line:
in_dependency = False
current_dependency_lines.append(line)
if has_x_version_update:
new_lines.extend(current_dependency_lines)
continue

if in_dependency:
current_dependency_lines.append(line)
if '{x-version-update:' in line:
has_x_version_update = True
continue

# Prune comments and extra whitespace in depMgmt for a cleaner result
if not line.strip():
new_lines.append(line)
continue

# Reporting section removal
if '<reporting>' in line:
in_reporting = True
continue
if '</reporting>' in line:
in_reporting = False
continue
if in_reporting:
continue

new_lines.append(line)

with open(file_path, 'w') as f:
# Clean up double empty lines potentially introduced by pruning
content = "".join(new_lines)
content = re.sub(r'\n\s*\n\s*\n', '\n\n', content)
f.write(content)

if __name__ == "__main__":
if len(sys.argv) > 2:
modernize_pom(sys.argv[1], sys.argv[2])
else:
print("Usage: python3 modernize_pom.py <file_path> <parent_version>")
sys.exit(1)
Loading
Loading