diff --git a/monorepo-migration/fix_copyright_headers.py b/monorepo-migration/fix_copyright_headers.py new file mode 100644 index 000000000000..cd813851d383 --- /dev/null +++ b/monorepo-migration/fix_copyright_headers.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import sys + +def fix_copyright(path): + if os.path.isfile(path): + if path.endswith(".java"): + _fix_file(path) + elif os.path.isdir(path): + for root, _, files in os.walk(path): + for file in files: + if file.endswith(".java"): + _fix_file(os.path.join(root, file)) + +def _fix_file(file_path): + with open(file_path, 'r') as f: + content = f.read() + + # Replace "Copyright [Year] Google LLC" or "Copyright [Year] Google Inc." + # with "Copyright 2026 Google LLC" + new_content = re.sub( + r'Copyright \d{4} Google (Inc\.|LLC)', + 'Copyright 2026 Google LLC', + content + ) + + if new_content != content: + with open(file_path, 'w') as f: + f.write(new_content) + print(f"Updated copyright in {file_path}") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: fix_copyright_headers.py ...") + sys.exit(1) + + for arg in sys.argv[1:]: + fix_copyright(arg) diff --git a/monorepo-migration/migrate.sh b/monorepo-migration/migrate.sh new file mode 100755 index 000000000000..54bddeecf44c --- /dev/null +++ b/monorepo-migration/migrate.sh @@ -0,0 +1,271 @@ +#!/bin/bash +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Exit on error +set -e + +# Function to check if a command exists +check_command() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "Error: $1 is not installed or not in PATH." >&2 + exit 1 + fi +} + +# Check for necessary CLI binaries +check_command git +check_command python3 +check_command mvn + +# Configuration +MONOREPO_URL="https://github.com/googleapis/google-cloud-java" +if [ -z "$SOURCE_REPO_URL" ]; then + read -p "Enter SOURCE_REPO_URL [https://github.com/googleapis/java-logging]: " input_url + SOURCE_REPO_URL="${input_url:-https://github.com/googleapis/java-logging}" +fi +CODEOWNER="${CODEOWNER:-}" +if [ -z "$CODEOWNER" ]; then + read -p "Enter CODEOWNER (e.g., @chingor13): " CODEOWNER +fi + +# Derive names from URLs to avoid duplication +SOURCE_REPO_NAME="${SOURCE_REPO_URL##*/}" +MONOREPO_NAME="${MONOREPO_URL##*/}" + +# Use a temporary working directory sibling to the current monorepo +WORKING_DIR="../../migration-work" +SOURCE_DIR="$WORKING_DIR/$SOURCE_REPO_NAME-source" +TARGET_DIR="$WORKING_DIR/$MONOREPO_NAME-target" + +# Get absolute path to the transformation script before any cd +TRANSFORM_SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +TRANSFORM_SCRIPT="$TRANSFORM_SCRIPT_DIR/transform_workflow.py" +MODERNIZE_POM_SCRIPT="$TRANSFORM_SCRIPT_DIR/modernize_pom.py" +UPDATE_ROOT_POM_SCRIPT="$TRANSFORM_SCRIPT_DIR/update_root_pom.py" +FIX_COPYRIGHT_SCRIPT="$TRANSFORM_SCRIPT_DIR/fix_copyright_headers.py" + +echo "Starting migration using git read-tree with isolated clones..." + +# 0. Create working directory +mkdir -p "$WORKING_DIR" + +# 1. Clone the source repository +if [ ! -d "$SOURCE_DIR" ]; then + echo "Cloning source repo: $SOURCE_REPO_URL into $SOURCE_DIR" + git clone "$SOURCE_REPO_URL" "$SOURCE_DIR" +else + echo "Source directory $SOURCE_DIR already exists. Ensuring it is clean and up-to-date..." + cd "$SOURCE_DIR" + git fetch origin + git checkout -f main + git reset --hard origin/main + git clean -fd + cd - > /dev/null +fi + +# 2. Clone the target monorepo (the "isolated clone") +if [ ! -d "$TARGET_DIR" ]; then + echo "Cloning target monorepo: $MONOREPO_URL into $TARGET_DIR" + git clone "$MONOREPO_URL" "$TARGET_DIR" +else + echo "Target directory $TARGET_DIR already exists. Ensuring it is clean and up-to-date..." + cd "$TARGET_DIR" + git fetch origin + git checkout -f main + git reset --hard origin/main + git clean -fd + cd - > /dev/null +fi + +cd "$TARGET_DIR" + +# Ensure we are on a clean main branch in the target clone +echo "Ensuring clean state in target monorepo..." +git fetch origin +git reset --hard HEAD +git clean -fd +git checkout -f main +git reset --hard origin/main +git clean -fdx + +# Check if the repository is already migrated +if [ -d "$SOURCE_REPO_NAME" ]; then + echo "Error: Directory $SOURCE_REPO_NAME already exists in the monorepo." >&2 + echo "This repository seems to have already been migrated." >&2 + exit 1 +fi + + +# 2.5 Create a new feature branch for the migration +BRANCH_NAME="migrate-$SOURCE_REPO_NAME" +echo "Creating feature branch: $BRANCH_NAME" +if git rev-parse --verify "$BRANCH_NAME" >/dev/null 2>&1; then + git branch -D "$BRANCH_NAME" +fi +git checkout -b "$BRANCH_NAME" + +# 3. Add the source repo as a remote +echo "Adding remote for $SOURCE_REPO_NAME: $SOURCE_DIR" +if git remote | grep -q "^$SOURCE_REPO_NAME$"; then + git remote remove "$SOURCE_REPO_NAME" +fi +git remote add "$SOURCE_REPO_NAME" "../$SOURCE_REPO_NAME-source" + +# 4. Fetch the source repo +echo "Fetching $SOURCE_REPO_NAME..." +git fetch "$SOURCE_REPO_NAME" + +# 5. Merge the histories using 'ours' strategy to keep monorepo content +echo "Merging histories (strategy: ours)..." +git merge --allow-unrelated-histories --no-ff "$SOURCE_REPO_NAME/main" -s ours --no-commit -m "chore($SOURCE_REPO_NAME): migrate $SOURCE_REPO_NAME into monorepo" + +# 6. Read the tree from the source repo into the desired subdirectory +echo "Reading tree into prefix $SOURCE_REPO_NAME/..." +git read-tree --prefix="$SOURCE_REPO_NAME/" -u "$SOURCE_REPO_NAME/main" + +# 6.5 Remove common files from the root of the migrated library +echo "Removing common files from the root of $SOURCE_REPO_NAME/..." +rm -f "$SOURCE_REPO_NAME/.gitignore" +rm -f "$SOURCE_REPO_NAME/renovate.json" +rm -f "$SOURCE_REPO_NAME/LICENSE" +rm -f "$SOURCE_REPO_NAME/java.header" +rm -f "$SOURCE_REPO_NAME/license-checks.xml" +find "$SOURCE_REPO_NAME" -maxdepth 1 -name "*.md" ! -name "CHANGELOG.md" ! -name "README.md" -delete + +# 7. Commit the migration +echo "Committing migration..." +git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): migrate $SOURCE_REPO_NAME into monorepo" + +# 7.1 Update CODEOWNERS +if [ -n "$CODEOWNER" ]; then + echo "Updating .github/CODEOWNERS..." + mkdir -p .github + echo "/$SOURCE_REPO_NAME/ $CODEOWNER @googleapis/cloud-java-team-teamsync" >> .github/CODEOWNERS + + echo "Committing CODEOWNERS update..." + git add .github/CODEOWNERS + git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): add code owners for $SOURCE_REPO_NAME" +fi + +# 7.2 Update root pom.xml modules +echo "Updating root pom.xml modules..." +python3 "$UPDATE_ROOT_POM_SCRIPT" "pom.xml" "$SOURCE_REPO_NAME" + +echo "Committing root pom.xml modules update..." +git add pom.xml +git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): add module to root pom.xml" + + +# 7.5 Migrate GitHub Actions workflows +echo "Checking for GitHub Actions workflows..." +if [ -d "$SOURCE_REPO_NAME/.github/workflows" ]; then + echo "Migrating workflows to root .github/workflows/..." + mkdir -p .github/workflows + + for workflow in "$SOURCE_REPO_NAME/.github/workflows/"*; do + if [ -f "$workflow" ]; then + filename=$(basename "$workflow") + + # Skip redundant workflows as requested by user + case "$filename" in + "hermetic_library_generation.yaml" | "update_generation_config.yaml" | \ + "approve-readme.yaml" | "auto-release.yaml" | "renovate_config_check.yaml" | \ + "samples.yaml" | "unmanaged_dependency_check.yaml") + echo "Skipping redundant workflow: $filename" + continue + ;; + esac + + new_filename="${SOURCE_REPO_NAME}-${filename}" + target_path=".github/workflows/$new_filename" + + echo "Migrating and adapting $filename to $target_path" + python3 "$TRANSFORM_SCRIPT" "$SOURCE_REPO_NAME" < "$workflow" > "$target_path" + fi + done + + # Cleanup empty .github directory if it exists + rm -rf "$SOURCE_REPO_NAME/.github" + + echo "Committing workflow migration..." + git add .github/workflows + git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): migrate and adapt GitHub Actions workflows" +fi + +# 7.6 Update generation_config.yaml +echo "Updating generation_config.yaml..." +SOURCE_CONFIG="$SOURCE_REPO_NAME/generation_config.yaml" +if [ -f "$SOURCE_CONFIG" ]; then + # Extract the library entry (starts with - api_shortname) + # This assumes the source config only has one library or we want the first one + ENTRY=$(awk '/^ - api_shortname:/{flag=1; print $0; next} /^ - / && flag{flag=0} flag' "$SOURCE_CONFIG") + + # Simple cleanup: remove repo and repo_short if they exist + # Adjust indentation to match monorepo (0 spaces for -) + CLEAN_ENTRY=$(echo "$ENTRY" | sed '/repo:/d' | sed '/repo_short:/d' | sed 's/^ //') + + # Append to target generation_config.yaml + echo "" >> generation_config.yaml + echo "$CLEAN_ENTRY" >> generation_config.yaml + + echo "Committing generation_config.yaml update..." + git add generation_config.yaml + git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): add library to generation_config.yaml" +fi + +# 7.7 Consolidate versions.txt +echo "Consolidating versions.txt..." +SOURCE_VERSIONS="$SOURCE_REPO_NAME/versions.txt" +if [ -f "$SOURCE_VERSIONS" ]; then + # Append data lines only to root versions.txt (exclude comments/headers) + grep "^[a-zA-Z0-9]" "$SOURCE_VERSIONS" >> versions.txt + + # Remove the migrated subdirectory's versions.txt + rm "$SOURCE_VERSIONS" + + echo "Committing versions.txt update..." + git add versions.txt "$SOURCE_VERSIONS" + git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): consolidate versions.txt into root" +fi + +# 7.8 Fix copyright headers in Java files +echo "Fixing copyright headers in Java files..." +python3 "$FIX_COPYRIGHT_SCRIPT" "$SOURCE_REPO_NAME" + +echo "Committing copyright header fixes..." +git add "$SOURCE_REPO_NAME" +git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): update copyright headers to 2026 Google LLC" + +# 7.9 Modernize root pom.xml +echo "Modernizing root pom.xml..." +PARENT_VERSION=$(grep -m 1 ".*{x-version-update:google-cloud-java:current}" google-cloud-jar-parent/pom.xml | sed -E 's/.*(.*)<\/version>.*/\1/') +python3 "$MODERNIZE_POM_SCRIPT" "$SOURCE_REPO_NAME/pom.xml" "$PARENT_VERSION" "$SOURCE_REPO_NAME" + +echo "Committing root pom.xml modernization..." +git add "$SOURCE_REPO_NAME/pom.xml" +git commit -n --no-gpg-sign -m "chore($SOURCE_REPO_NAME): modernize root pom.xml" + +# 7.10 Verify compilation +echo "Verifying compilation..." +(cd "$SOURCE_REPO_NAME" && mvn compile -DskipTests -T 1C) + +# 8. Cleanup +echo "Cleaning up temporary source clone..." +rm -rf "$SOURCE_DIR" + +echo "Migration complete!" +echo "The migrated codebase is available in: $TARGET_DIR" +echo "You are on the $BRANCH_NAME branch in that clone." diff --git a/monorepo-migration/modernize_pom.py b/monorepo-migration/modernize_pom.py new file mode 100644 index 000000000000..bdf7f57a6027 --- /dev/null +++ b/monorepo-migration/modernize_pom.py @@ -0,0 +1,167 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import re + +def modernize_pom(file_path, parent_version, source_repo_name=None): + with open(file_path, 'r') as f: + lines = f.readlines() + + new_lines = [] + in_parent = False + in_dep_mgmt = False + in_dependencies = False + in_dependency = False + in_reporting = False + current_dependency_lines = [] + should_preserve = False + current_group_id = None + has_version = False + + for line in lines: + # URL Modernization + if any(tag in line for tag in ['', '', '']): + if 'github.com' in line and 'googleapis/' in line: + if source_repo_name: + repo_pattern = re.escape(source_repo_name) + else: + repo_pattern = r'[a-zA-Z0-9-]+' + + # Replace HTTPS URLs + line = re.sub( + r'https://github\.com/googleapis/' + repo_pattern, + 'https://github.com/googleapis/google-cloud-java', + line + ) + # Replace Git SSH URLs + line = re.sub( + r'git@github\.com:googleapis/' + repo_pattern + r'(\.git)?', + 'git@github.com:googleapis/google-cloud-java.git', + line + ) + # Handle scm:git: prefix if it has https + line = re.sub( + r'scm:git:https://github\.com/googleapis/' + repo_pattern, + 'scm:git:https://github.com/googleapis/google-cloud-java.git', + line + ) + + # Parent section modernization + if '' in line and not in_parent: + in_parent = True + indent = line[:line.find('<')] + new_lines.append(f"{indent}\n") + new_lines.append(f"{indent} com.google.cloud\n") + new_lines.append(f"{indent} google-cloud-jar-parent\n") + new_lines.append(f"{indent} {parent_version}\n") + new_lines.append(f"{indent} ../google-cloud-jar-parent/pom.xml\n") + continue + if '' in line and in_parent: + in_parent = False + new_lines.append(line) + continue + if in_parent: + continue # skip original parent content + + # Dependency Management pruning + if '' in line: + in_dep_mgmt = True + new_lines.append(line) + continue + if '' in line: + in_dep_mgmt = False + new_lines.append(line) + continue + + if in_dep_mgmt: + if '' in line: + in_dependencies = True + new_lines.append(line) + continue + if '' in line: + in_dependencies = False + new_lines.append(line) + continue + + if in_dependencies: + if '' in line: + in_dependency = True + current_dependency_lines = [line] + should_preserve = False + current_group_id = None + current_artifact_id = None + has_version = False + continue + if '' in line: + in_dependency = False + current_dependency_lines.append(line) + + # Preservation logic: + # 1. Has x-version-update comment + # 2. Is NOT com.google group AND has a version tag + # 3. Is com.google.cloud group AND artifactId starts with google-cloud- AND has a version tag + is_external = current_group_id and not current_group_id.startswith('com.google') + is_google_cloud_lib = current_group_id == 'com.google.cloud' and current_artifact_id and current_artifact_id.startswith('google-cloud-') + + if should_preserve or (is_external and has_version) or (is_google_cloud_lib and has_version): + new_lines.extend(current_dependency_lines) + continue + + if in_dependency: + current_dependency_lines.append(line) + if '{x-version-update:' in line: + should_preserve = True + if '' in line: + match = re.search(r'(.*?)', line) + if match: + current_group_id = match.group(1).strip() + if '' in line: + match = re.search(r'(.*?)', line) + if match: + current_artifact_id = match.group(1).strip() + if '' in line: + has_version = True + continue + + # Prune comments and extra whitespace in depMgmt for a cleaner result + if not line.strip(): + new_lines.append(line) + continue + + # Reporting section removal + if '' in line: + in_reporting = True + continue + if '' in line: + in_reporting = False + continue + if in_reporting: + continue + + new_lines.append(line) + + with open(file_path, 'w') as f: + # Clean up double empty lines potentially introduced by pruning + content = "".join(new_lines) + content = re.sub(r'\n\s*\n\s*\n', '\n\n', content) + f.write(content) + +if __name__ == "__main__": + if len(sys.argv) > 2: + source_repo = sys.argv[3] if len(sys.argv) > 3 else None + modernize_pom(sys.argv[1], sys.argv[2], source_repo) + else: + print("Usage: python3 modernize_pom.py [source_repo_name]") + sys.exit(1) diff --git a/monorepo-migration/transform_workflow.py b/monorepo-migration/transform_workflow.py new file mode 100644 index 000000000000..3a719bb9710e --- /dev/null +++ b/monorepo-migration/transform_workflow.py @@ -0,0 +1,84 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import re + +def transform(content, lib_name): + lines = content.splitlines() + new_lines = [] + inserted_defaults = False + + filter_job = f""" filter: + runs-on: ubuntu-latest + outputs: + library: ${{{{ steps.filter.outputs.library }}}} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + library: + - '{lib_name}/**'""" + + in_jobs = False + skip_current_job = False + for line in lines: + if line.startswith('name:') and not in_jobs: + name_match = re.match(r'^name:\s*(.*)', line) + if name_match: + orig_name = name_match.group(1).strip() + # Remove quotes if they exist + orig_name = orig_name.strip("\"'") + new_lines.append(f"name: {lib_name} {orig_name}") + continue + + if line.startswith('jobs:'): + if not inserted_defaults: + new_lines.append("defaults:") + new_lines.append(" run:") + new_lines.append(f" working-directory: {lib_name}") + inserted_defaults = True + new_lines.append(line) + new_lines.append(filter_job) + in_jobs = True + continue + + if in_jobs and line.startswith(' ') and not line.startswith(' ') and line.strip() and not line.strip().startswith('#'): + job_match = re.match(r'^ ([\w-]+):', line) + if job_match: + job_name = job_match.group(1) + if job_name == 'clirr': + skip_current_job = True + continue + else: + skip_current_job = False + + if job_name != 'filter': + new_lines.append(line) + new_lines.append(" needs: filter") + new_lines.append(f" if: ${{{{ needs.filter.outputs.library == 'true' }}}}") + continue + + if not skip_current_job: + new_lines.append(line) + return "\n".join(new_lines) + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 transform_workflow.py ") + sys.exit(1) + lib = sys.argv[1] + print(transform(sys.stdin.read(), lib)) diff --git a/monorepo-migration/update_root_pom.py b/monorepo-migration/update_root_pom.py new file mode 100644 index 000000000000..fec12930dee3 --- /dev/null +++ b/monorepo-migration/update_root_pom.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +def update_root_pom(pom_path, module_name): + new_module = f' {module_name}\n' + with open(pom_path, 'r') as f: + content = f.read() + + start_tag = '' + end_tag = '' + start_idx = content.find(start_tag) + end_idx = content.find(end_tag) + + if start_idx == -1 or end_idx == -1: + print(f"Error: {start_tag} or {end_tag} not found in {pom_path}") + sys.exit(1) + + modules_section = content[start_idx + len(start_tag):end_idx] + lines = [l for l in modules_section.splitlines(keepends=True) if l.strip()] + + java_indices = [i for i, l in enumerate(lines) if 'java-' in l] + if java_indices: + start_java = java_indices[0] + end_java = java_indices[-1] + 1 + java_lines = lines[start_java:end_java] + if not any(f'{module_name}' in l for l in java_lines): + java_lines.append(new_module) + java_lines.sort() + lines = lines[:start_java] + java_lines + lines[end_java:] + else: + if not any(f'{module_name}' in l for l in lines): + lines.append(new_module) + + new_content = content[:start_idx + len(start_tag)] + '\n' + ''.join(lines) + ' ' + content[end_idx:] + with open(pom_path, 'w') as f: + f.write(new_content) + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: update_root_pom.py ") + sys.exit(1) + update_root_pom(sys.argv[1], sys.argv[2])