From 1acee2e0247e9b838e2f31babf6a175574c35105 Mon Sep 17 00:00:00 2001 From: bnsoni Date: Wed, 8 Apr 2026 13:05:49 +0300 Subject: [PATCH 1/4] feat: add AMI-based PR validation workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New spa-pr-validation-new.yml that launches fresh EC2 from pre-built AMI per PR run instead of deploying to shared OCI VMs. Flow: acquire stg slot (1-4) via domain-lock → launch EC2 from AMI → deploy PR mentor image + prod auth → run 4 browser tests in parallel → save status → release slot → terminate EC2 Key differences from existing workflow: - Isolated environment per PR (not shared VMs) - All 4 browsers always parallel (no sequential mode) - EC2 terminated after tests (clean slate every run) - Uses reusable-service-update.yml for infra launch - Domain-number maps to stg1-4 infrastructure Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/spa-pr-validation-new.yml | 655 ++++++++++++++++++++ 1 file changed, 655 insertions(+) create mode 100644 .github/workflows/spa-pr-validation-new.yml diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml new file mode 100644 index 00000000..3a6f785b --- /dev/null +++ b/.github/workflows/spa-pr-validation-new.yml @@ -0,0 +1,655 @@ +name: SPA PR Validation (AMI-based) + +on: + pull_request: + branches: [main] + types: [labeled] + +run-name: 'PR #${{ github.event.pull_request.number }} - Mentor Test (AMI)' + +concurrency: + group: pr-validation-mentor-ami-${{ github.event.pull_request.number }} + cancel-in-progress: false + +permissions: + contents: read + pull-requests: write + statuses: write + id-token: write + +jobs: + # ============================================================ + # GATE: Detect what changed, decide what to build/test + # ============================================================ + gate: + if: github.event.label.name == 'run-tests' && !startsWith(github.head_ref, 'release/') + runs-on: ubuntu-22.04 + environment: iblai.app + outputs: + should-build-app: ${{ steps.decide.outputs.should-build-app }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Detect changes + id: changes + uses: dorny/paths-filter@v3 + with: + filters: | + app: + - 'src/**' + - 'app/**' + - 'components/**' + - 'features/**' + - 'hooks/**' + - 'lib/**' + - 'actions/**' + - 'contexts/**' + - 'public/**' + - 'next.config.*' + - 'package.json' + - 'pnpm-lock.yaml' + - 'tsconfig*.json' + - 'tailwind.config.*' + - 'Dockerfile' + tests: + - 'e2e/**' + + - name: Decide build strategy + id: decide + run: | + APP_CHANGED="${{ steps.changes.outputs.app }}" + if [[ "$APP_CHANGED" == "true" && "${{ vars.ENABLE_TESTING }}" != "true" ]]; then + echo "should-build-app=true" >> $GITHUB_OUTPUT + else + echo "should-build-app=false" >> $GITHUB_OUTPUT + fi + + # ============================================================ + # TEST RESUMPTION CHECK + # ============================================================ + check-resumption: + needs: [gate] + uses: iblai/iblai-web-ops/.github/workflows/reusable-test-resumption.yml@main + with: + app-name: mentor + pr-number: ${{ github.event.pull_request.number }} + test-dir: e2e/tests + secrets: + s3-bucket: ${{ secrets.S3_LOGS_BUCKET }} + aws-access-key-id: ${{ secrets.S3_LOGS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.S3_LOGS_SECRET_ACCESS_KEY }} + aws-region: ${{ vars.AWS_REGION }} + + # ============================================================ + # ACQUIRE STG SLOT (domain-lock picks stg1-4) + # ============================================================ + acquire-stg-slot: + needs: [gate] + uses: iblai/iblai-web-ops/.github/workflows/reusable-domain-lock.yml@main + secrets: inherit + with: + action: acquire + app-type: testing + context: 'PR #${{ github.event.pull_request.number }} - Mentor AMI Testing' + allowed-domains: '1,2,3,4' + max-wait: 18000 + + # ============================================================ + # CODE QUALITY (parallel after slot acquired) + # ============================================================ + pinned-versions: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Check all dependency versions are pinned + run: ./scripts/check-pinned-versions.sh + + commitlint: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - run: npm install --global @commitlint/cli @commitlint/config-conventional + - run: npx commitlint --from origin/${{ github.base_ref }} --to HEAD --verbose + + lint: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-linting.yml@main + secrets: inherit + + typecheck: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-typecheck.yml@main + secrets: inherit + + coverage: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-unit-test-coverage.yml@main + with: + coverage-threshold: 95 + secrets: inherit + + e2e-coverage-check: + name: 'E2E Journey Coverage' + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: '20' + - run: node e2e/scripts/check-journey-coverage.mjs --all --no-regress --base origin/${{ github.base_ref }} + + claude-review-coverage: + name: 'Claude E2E Coverage Review' + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Get changed files + id: changes + run: | + CHANGED=$(git diff --name-only origin/${{ github.base_ref }}...HEAD -- app/ components/ hooks/ lib/ providers/ | head -100) + echo "files<> $GITHUB_OUTPUT + echo "$CHANGED" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + echo "count=$(echo "$CHANGED" | grep -c . || echo 0)" >> $GITHUB_OUTPUT + - name: Claude coverage analysis + if: steps.changes.outputs.count != '0' + uses: anthropics/claude-code-action@beta + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + direct_prompt: | + You are an E2E coverage analyst. Analyze the changed files in this PR and determine if e2e/coverage.json and e2e/COVERAGE.md are properly updated. + + Changed files: + ${{ steps.changes.outputs.files }} + + Tasks: + 1. Read e2e/coverage.json and e2e/COVERAGE.md + 2. For each changed file in app/ or components/: + - Check if it's listed in a journey's sourceFiles in coverage.json + - If it's a new page.tsx route, verify a journey covers it + - If it adds new user-visible behavior, check for corresponding checkpoints + 3. Compare checkpoint counts: current branch vs base branch + 4. Post a summary comment on the PR with: + - Files covered by existing journeys + - Files missing coverage (if any) + - Checkpoint count change (regression warning if decreased) + - Recommended actions (new journeys or checkpoints to add) + + If all changes are covered, approve. If there are gaps, request changes with specific instructions. + + claude-review-quality: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-claude-review-quality.yml@main + with: + pr-number: ${{ github.event.pull_request.number }} + secrets: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + + claude-review-uiux: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-claude-review-uiux.yml@main + with: + pr-number: ${{ github.event.pull_request.number }} + secrets: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + + # ============================================================ + # CHECK PRODUCTION VERSIONS (fallback images) + # ============================================================ + check-prod-versions: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + runs-on: image-deploy-ops-new + environment: iblai.app + outputs: + mentor-uri: ${{ steps.read-mentor.outputs.image-uri }} + auth-uri: ${{ steps.read-auth.outputs.image-uri }} + steps: + - uses: actions/checkout@v4 + with: + repository: iblai/iblai-web-ops + token: ${{ secrets.GIT_TOKEN || github.token }} + path: .ops + - uses: ./.ops/.github/actions/setup-ssh + with: + ssh-private-key: ${{ secrets.SSH_PRIVATE_DEPLOY_OPS }} + - id: read-mentor + uses: ./.ops/.github/actions/read-prod-versions + with: + app-name: mentor + nodes-json: ${{ vars.NODES_SPA_CHECK }} + - id: read-auth + uses: ./.ops/.github/actions/read-prod-versions + with: + app-name: auth + nodes-json: ${{ vars.NODES_SPA_CHECK }} + + # ============================================================ + # BUILD DOCKER IMAGES + # ============================================================ + build-app-image: + needs: [gate, acquire-stg-slot] + if: >- + always() && + needs.acquire-stg-slot.result == 'success' && + needs.gate.outputs.should-build-app == 'true' + uses: iblai/iblai-web-ops/.github/workflows/reusable-pr-docker-build.yml@main + secrets: inherit + with: + dockerfile-path: Dockerfile + image-name: ibl-mentor-spa-pro + registry-type: ecr + event-name: pull_request + pr-number: ${{ github.event.pull_request.number }} + + build-playwright-image: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-pr-docker-build.yml@main + secrets: inherit + with: + dockerfile-path: e2e/Dockerfile + build-context: . + image-name: ibl-mentor-playwright + registry-type: ocir + event-name: pull_request + pr-number: ${{ github.event.pull_request.number }} + + # ============================================================ + # LAUNCH INFRASTRUCTURE (EC2 from AMI) + # ============================================================ + launch-infra: + needs: [gate, acquire-stg-slot] + if: always() && needs.acquire-stg-slot.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-service-update.yml@main + with: + ami-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_AMI_ID || vars.STG4_AMI_ID }} + subnet-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SUBNET_ID || vars.STG4_SUBNET_ID }} + security-group-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SG_ID || vars.STG4_SG_ID }} + target-group-arn: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_TG_ARN || vars.STG4_TG_ARN }} + key-pair-name: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_KEY_PAIR || vars.STG4_KEY_PAIR }} + project-name: mentor-pr-${{ github.event.pull_request.number }} + secrets: + aws-access-key-id: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }} + aws-secret-access-key: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }} + ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} + git-token: ${{ secrets.GIT_TOKEN }} + + # ============================================================ + # DEPLOY PR MENTOR IMAGE onto launched instance + # ============================================================ + deploy-app: + needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions, launch-infra] + if: >- + always() && + needs.launch-infra.result == 'success' && + needs.lint.result == 'success' && + needs.build-app-image.result != 'failure' + uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main + with: + app-name: MENTOR + image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }} + deployment-path: /ibl/app/ibl-spa/mentor + nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]' + venv-name: ibl-cli-ops + secrets: + ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} + git-token: ${{ secrets.GIT_TOKEN }} + + # ============================================================ + # DEPLOY AUTH (prod image) onto launched instance + # ============================================================ + deploy-auth: + needs: [gate, acquire-stg-slot, check-prod-versions, launch-infra, deploy-app] + if: always() && needs.launch-infra.result == 'success' && needs.deploy-app.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main + with: + app-name: AUTH + image-uri: ${{ needs.check-prod-versions.outputs.auth-uri }} + deployment-path: /ibl/app/ibl-spa/auth + nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]' + venv-name: ibl-cli-ops + secrets: + ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} + git-token: ${{ secrets.GIT_TOKEN }} + + # ============================================================ + # HEALTH CHECK + # ============================================================ + verify: + needs: [gate, acquire-stg-slot, deploy-auth, build-app-image, check-prod-versions, launch-infra] + if: always() && needs.deploy-auth.result == 'success' + uses: iblai/iblai-web-ops/.github/workflows/reusable-pre-test-health-check.yml@main + with: + app-name: MENTOR + expected-image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }} + deployment-path: /ibl/app/ibl-spa/mentor + nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]' + venv-name: ibl-cli-ops + health-port: '5001' + secrets: + ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} + git-token: ${{ secrets.GIT_TOKEN }} + + # ============================================================ + # PARALLEL BROWSER TESTS (all 4 after health check) + # ============================================================ + test-chrome: + needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + if: >- + always() && + needs.verify.result == 'success' && + needs.check-resumption.outputs.chrome-resumption-mode != 'all-passed' + uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main + secrets: inherit + with: + domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + app-type: mentor + playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} + pr-number: ${{ github.event.pull_request.number }} + max-wait: 5400 + total-shards: 1 + run-type: main-chrome + browsers: 'chrome' + test-files: ${{ needs.check-resumption.outputs.chrome-test-files }} + workers: '3' + + test-firefox: + needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + if: >- + always() && + needs.verify.result == 'success' && + needs.check-resumption.outputs.firefox-resumption-mode != 'all-passed' + uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main + secrets: inherit + with: + domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + app-type: mentor + playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} + pr-number: ${{ github.event.pull_request.number }} + max-wait: 5400 + total-shards: 1 + run-type: main-firefox + browsers: 'firefox' + test-files: ${{ needs.check-resumption.outputs.firefox-test-files }} + workers: '3' + + test-safari: + needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + if: >- + always() && + needs.verify.result == 'success' && + needs.check-resumption.outputs.safari-resumption-mode != 'all-passed' + uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main + secrets: inherit + with: + domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + app-type: mentor + playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} + pr-number: ${{ github.event.pull_request.number }} + max-wait: 5400 + total-shards: 1 + run-type: main-safari + browsers: 'safari' + test-files: ${{ needs.check-resumption.outputs.safari-test-files }} + workers: '3' + + test-edge: + needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + if: >- + always() && + needs.verify.result == 'success' && + needs.check-resumption.outputs.edge-resumption-mode != 'all-passed' + uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main + secrets: inherit + with: + domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + app-type: mentor + playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} + pr-number: ${{ github.event.pull_request.number }} + max-wait: 5400 + total-shards: 1 + run-type: main-edge + browsers: 'edge' + test-files: ${{ needs.check-resumption.outputs.edge-test-files }} + workers: '3' + + # ============================================================ + # SAVE TEST STATUS (for resumption on next run) + # ============================================================ + save-status: + name: 'Save Mentor Status' + needs: [gate, test-chrome, test-firefox, test-safari, test-edge] + if: always() + runs-on: ubuntu-22.04 + environment: iblai.app + steps: + - name: Save browser status to S3 + env: + CHROME_RESULT: ${{ needs.test-chrome.result }} + FIREFOX_RESULT: ${{ needs.test-firefox.result }} + SAFARI_RESULT: ${{ needs.test-safari.result }} + EDGE_RESULT: ${{ needs.test-edge.result }} + S3_BUCKET: ${{ secrets.S3_LOGS_BUCKET }} + AWS_ACCESS_KEY_ID: ${{ secrets.S3_LOGS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_LOGS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ vars.AWS_REGION || 'us-east-1' }} + PR_NUMBER: ${{ github.event.pull_request.number }} + COMMIT_SHA: ${{ github.sha }} + run: | + PREV_STATUS="/tmp/prev-status.json" + S3_PATH="s3://$S3_BUCKET/pr/$PR_NUMBER/mentor/run-status.json" + aws s3 cp "$S3_PATH" "$PREV_STATUS" --region "$AWS_REGION" 2>/dev/null || echo '{}' > "$PREV_STATUS" + + get_status() { + local result=$1 browser=$2 + if [ "$result" = "skipped" ]; then + jq -r ".browsers.${browser} // \"pending\"" "$PREV_STATUS" 2>/dev/null || echo "pending" + return + fi + if [ "$result" != "success" ]; then echo "failed"; return; fi + local results_file="/tmp/test-results-${browser}.json" + local results_path="s3://$S3_BUCKET/pr/$PR_NUMBER/mentor/test-results-${browser}.json" + if aws s3 cp "$results_path" "$results_file" --region "$AWS_REGION" >/dev/null 2>&1; then + local failed_count + failed_count=$(jq '[.tests | to_entries[] | select(.value == "failed")] | length' "$results_file" 2>/dev/null || echo "-1") + if [ "$failed_count" = "0" ] || [ "$failed_count" = "-1" ]; then echo "passed"; else echo "failed"; fi + else + echo "passed" + fi + } + + cat > /tmp/run-status.json <> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**PR:** #${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY + echo "**Stg slot:** stg${{ needs.acquire-stg-slot.outputs.domain-number }}" >> $GITHUB_STEP_SUMMARY + echo "**Instance:** ${{ needs.launch-infra.outputs.instance-id }} (${{ needs.launch-infra.outputs.instance-ip }})" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + echo "## Code Quality" >> $GITHUB_STEP_SUMMARY + echo "| Check | Status |" >> $GITHUB_STEP_SUMMARY + echo "|-------|:------:|" >> $GITHUB_STEP_SUMMARY + + for PAIR in \ + "Pinned Versions:${{ needs.pinned-versions.result }}" \ + "Conventional Commits:${{ needs.commitlint.result }}" \ + "Linting:${{ needs.lint.outputs.result }}" \ + "TypeCheck:${{ needs.typecheck.outputs.result }}" \ + "Unit Coverage:${{ needs.coverage.outputs.result }}" \ + "E2E Coverage:${{ needs.e2e-coverage-check.result }}"; do + CHECK="${PAIR%%:*}" + RESULT="${PAIR#*:}" + if [[ "$RESULT" == "success" ]]; then + echo "| $CHECK | :white_check_mark: PASSED |" >> $GITHUB_STEP_SUMMARY + elif [[ "$RESULT" == "skipped" ]]; then + echo "| $CHECK | :fast_forward: SKIPPED |" >> $GITHUB_STEP_SUMMARY + else + echo "| $CHECK | :x: FAILED |" >> $GITHUB_STEP_SUMMARY + fi + done + echo "" >> $GITHUB_STEP_SUMMARY + + echo "## Infrastructure" >> $GITHUB_STEP_SUMMARY + echo "| Step | Status |" >> $GITHUB_STEP_SUMMARY + echo "|------|:------:|" >> $GITHUB_STEP_SUMMARY + for PAIR in \ + "Launch Infra:${{ needs.launch-infra.result }}" \ + "Deploy App:${{ needs.deploy-app.result }}" \ + "Deploy Auth:${{ needs.deploy-auth.result }}" \ + "Health Check:${{ needs.verify.result }}" \ + "Terminate:${{ needs.terminate-infra.result }}"; do + CHECK="${PAIR%%:*}" + RESULT="${PAIR#*:}" + if [[ "$RESULT" == "success" ]]; then + echo "| $CHECK | :white_check_mark: |" >> $GITHUB_STEP_SUMMARY + else + echo "| $CHECK | :x: $RESULT |" >> $GITHUB_STEP_SUMMARY + fi + done + echo "" >> $GITHUB_STEP_SUMMARY + + echo "## Browser Tests (Parallel)" >> $GITHUB_STEP_SUMMARY + echo "| Browser | Status |" >> $GITHUB_STEP_SUMMARY + echo "|---------|:------:|" >> $GITHUB_STEP_SUMMARY + for PAIR in \ + "Chrome:${{ needs.test-chrome.outputs.result }}" \ + "Firefox:${{ needs.test-firefox.outputs.result }}" \ + "Safari:${{ needs.test-safari.outputs.result }}" \ + "Edge:${{ needs.test-edge.outputs.result }}"; do + BROWSER="${PAIR%%:*}" + RESULT="${PAIR#*:}" + if [[ "$RESULT" == "success" ]]; then + echo "| $BROWSER | :white_check_mark: PASSED |" >> $GITHUB_STEP_SUMMARY + elif [[ -z "$RESULT" ]]; then + echo "| $BROWSER | :fast_forward: SKIPPED |" >> $GITHUB_STEP_SUMMARY + else + echo "| $BROWSER | :x: FAILED |" >> $GITHUB_STEP_SUMMARY + fi + done + + - name: Check results + id: check-results + if: always() + run: | + FAILED=false + for R in "${{ needs.pinned-versions.result }}" "${{ needs.commitlint.result }}" "${{ needs.lint.outputs.result }}" "${{ needs.typecheck.outputs.result }}" "${{ needs.coverage.outputs.result }}"; do + [[ "$R" != "success" ]] && FAILED=true + done + [[ "${{ needs.e2e-coverage-check.result }}" == "failure" ]] && FAILED=true + for R in "${{ needs.test-chrome.outputs.result }}" "${{ needs.test-firefox.outputs.result }}" "${{ needs.test-safari.outputs.result }}" "${{ needs.test-edge.outputs.result }}"; do + [[ -n "$R" && "$R" != "success" ]] && FAILED=true + done + if [[ "$FAILED" == "true" ]]; then exit 1; else exit 0; fi + + - name: Update PR gate status + if: always() + uses: actions/github-script@v7 + with: + script: | + const passed = '${{ steps.check-results.outcome }}' === 'success'; + await github.rest.repos.createCommitStatus({ + owner: context.repo.owner, + repo: context.repo.repo, + sha: context.payload.pull_request.head.sha, + state: passed ? 'success' : 'failure', + context: 'PR Validation', + description: passed ? 'All checks passed' : 'Some checks failed', + }); From 8262adf603ac6d89bfcb66a3550937c7c84db29e Mon Sep 17 00:00:00 2001 From: bnsoni Date: Wed, 8 Apr 2026 14:04:45 +0300 Subject: [PATCH 2/4] refactor: inline launch/deploy steps to support per-stg SSH keys GitHub Actions doesn't allow dynamic secret selection in reusable workflow secrets: blocks. Replaced reusable-service-update.yml + reusable-spa-deployment.yml calls with a single inline launch-and-deploy job that: 1. Resolves stg config based on domain-lock slot (1-4) 2. Selects the correct SSH key (STG1-4_SSH_KEY) via case statement 3. Opens SSH SG rule for runner IP 4. Launches EC2 from AMI (boto3) 5. Waits for SSH + runs service-update (Ansible) 6. Registers in ALB target group 7. Deploys PR mentor image + prod auth image 8. Waits for mentor health (with auto-restart on empty reply) 9. Revokes SSH SG rule Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/spa-pr-validation-new.yml | 277 +++++++++++++------- 1 file changed, 189 insertions(+), 88 deletions(-) diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml index 3a6f785b..0ac1ad74 100644 --- a/.github/workflows/spa-pr-validation-new.yml +++ b/.github/workflows/spa-pr-validation-new.yml @@ -280,89 +280,189 @@ jobs: pr-number: ${{ github.event.pull_request.number }} # ============================================================ - # LAUNCH INFRASTRUCTURE (EC2 from AMI) + # LAUNCH INFRA + DEPLOY + VERIFY (single job, inline) # ============================================================ - launch-infra: - needs: [gate, acquire-stg-slot] - if: always() && needs.acquire-stg-slot.result == 'success' - uses: iblai/iblai-web-ops/.github/workflows/reusable-service-update.yml@main - with: - ami-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_AMI_ID || vars.STG4_AMI_ID }} - subnet-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SUBNET_ID || vars.STG4_SUBNET_ID }} - security-group-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SG_ID || vars.STG4_SG_ID }} - target-group-arn: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_TG_ARN || vars.STG4_TG_ARN }} - key-pair-name: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_KEY_PAIR || vars.STG4_KEY_PAIR }} - project-name: mentor-pr-${{ github.event.pull_request.number }} - secrets: - aws-access-key-id: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }} - aws-secret-access-key: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }} - ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} - git-token: ${{ secrets.GIT_TOKEN }} - - # ============================================================ - # DEPLOY PR MENTOR IMAGE onto launched instance - # ============================================================ - deploy-app: - needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions, launch-infra] + launch-and-deploy: + needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions] if: >- always() && - needs.launch-infra.result == 'success' && + needs.acquire-stg-slot.result == 'success' && needs.lint.result == 'success' && needs.build-app-image.result != 'failure' - uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main - with: - app-name: MENTOR - image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }} - deployment-path: /ibl/app/ibl-spa/mentor - nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]' - venv-name: ibl-cli-ops - secrets: - ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} - git-token: ${{ secrets.GIT_TOKEN }} + runs-on: ubuntu-latest + timeout-minutes: 45 + outputs: + instance-id: ${{ steps.launch.outputs.instance-id }} + instance-ip: ${{ steps.launch.outputs.instance-ip }} + env: + AWS_ACCESS_KEY_ID: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }} + AWS_DEFAULT_REGION: us-east-1 + steps: + - name: Resolve stg config + id: config + run: | + SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}" + echo "Acquired stg slot: $SLOT" + + declare -A AMIS SUBNETS SGS TGS KEYS + AMIS[1]="${{ vars.STG1_AMI_ID }}"; AMIS[2]="${{ vars.STG2_AMI_ID }}"; AMIS[3]="${{ vars.STG3_AMI_ID }}"; AMIS[4]="${{ vars.STG4_AMI_ID }}" + SUBNETS[1]="${{ vars.STG1_SUBNET_ID }}"; SUBNETS[2]="${{ vars.STG2_SUBNET_ID }}"; SUBNETS[3]="${{ vars.STG3_SUBNET_ID }}"; SUBNETS[4]="${{ vars.STG4_SUBNET_ID }}" + SGS[1]="${{ vars.STG1_SG_ID }}"; SGS[2]="${{ vars.STG2_SG_ID }}"; SGS[3]="${{ vars.STG3_SG_ID }}"; SGS[4]="${{ vars.STG4_SG_ID }}" + TGS[1]="${{ vars.STG1_TG_ARN }}"; TGS[2]="${{ vars.STG2_TG_ARN }}"; TGS[3]="${{ vars.STG3_TG_ARN }}"; TGS[4]="${{ vars.STG4_TG_ARN }}" + KEYS[1]="${{ vars.STG1_KEY_PAIR }}"; KEYS[2]="${{ vars.STG2_KEY_PAIR }}"; KEYS[3]="${{ vars.STG3_KEY_PAIR }}"; KEYS[4]="${{ vars.STG4_KEY_PAIR }}" + + echo "ami-id=${AMIS[$SLOT]}" >> $GITHUB_OUTPUT + echo "subnet-id=${SUBNETS[$SLOT]}" >> $GITHUB_OUTPUT + echo "security-group-id=${SGS[$SLOT]}" >> $GITHUB_OUTPUT + echo "target-group-arn=${TGS[$SLOT]}" >> $GITHUB_OUTPUT + echo "key-pair-name=${KEYS[$SLOT]}" >> $GITHUB_OUTPUT + + - name: Write SSH key + run: | + mkdir -p ~/.ssh + SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}" + case "$SLOT" in + 1) echo "${{ secrets.STG1_SSH_KEY }}" > ~/.ssh/deploy-key ;; + 2) echo "${{ secrets.STG2_SSH_KEY }}" > ~/.ssh/deploy-key ;; + 3) echo "${{ secrets.STG3_SSH_KEY }}" > ~/.ssh/deploy-key ;; + 4) echo "${{ secrets.STG4_SSH_KEY }}" > ~/.ssh/deploy-key ;; + esac + chmod 600 ~/.ssh/deploy-key + + - name: Get runner IP and open SSH + id: runner-ip + run: | + RUNNER_IP=$(curl -s https://checkip.amazonaws.com) + echo "ip=$RUNNER_IP" >> $GITHUB_OUTPUT + aws ec2 authorize-security-group-ingress \ + --group-id ${{ steps.config.outputs.security-group-id }} \ + --protocol tcp --port 22 --cidr ${RUNNER_IP}/32 2>&1 || true + + - name: Launch EC2 from AMI + id: launch + run: | + INSTANCE_ID=$(aws ec2 run-instances \ + --image-id ${{ steps.config.outputs.ami-id }} \ + --instance-type t3.2xlarge \ + --key-name ${{ steps.config.outputs.key-pair-name }} \ + --subnet-id ${{ steps.config.outputs.subnet-id }} \ + --security-group-ids ${{ steps.config.outputs.security-group-id }} \ + --block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":200,"VolumeType":"gp3","Encrypted":true}}]' \ + --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=mentor-pr-${{ github.event.pull_request.number }}}]" \ + --query 'Instances[0].InstanceId' --output text) + echo "instance-id=$INSTANCE_ID" >> $GITHUB_OUTPUT + echo "Instance: $INSTANCE_ID" + + aws ec2 wait instance-running --instance-ids $INSTANCE_ID + IP=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID --query 'Reservations[0].Instances[0].PublicIpAddress' --output text) + echo "instance-ip=$IP" >> $GITHUB_OUTPUT + echo "IP: $IP" + + - name: Wait for SSH + run: | + for i in $(seq 1 15); do + if ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} "echo ok" 2>/dev/null; then + echo "SSH ready on attempt $i" + break + fi + echo "Attempt $i - retrying in 15s..." + sleep 15 + done - # ============================================================ - # DEPLOY AUTH (prod image) onto launched instance - # ============================================================ - deploy-auth: - needs: [gate, acquire-stg-slot, check-prod-versions, launch-infra, deploy-app] - if: always() && needs.launch-infra.result == 'success' && needs.deploy-app.result == 'success' - uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main - with: - app-name: AUTH - image-uri: ${{ needs.check-prod-versions.outputs.auth-uri }} - deployment-path: /ibl/app/ibl-spa/auth - nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]' - venv-name: ibl-cli-ops - secrets: - ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} - git-token: ${{ secrets.GIT_TOKEN }} + - name: Checkout infra CLI + uses: actions/checkout@v4 + with: + repository: iblai/iblai-infra-cli + token: ${{ secrets.GIT_TOKEN }} + path: .infra - # ============================================================ - # HEALTH CHECK - # ============================================================ - verify: - needs: [gate, acquire-stg-slot, deploy-auth, build-app-image, check-prod-versions, launch-infra] - if: always() && needs.deploy-auth.result == 'success' - uses: iblai/iblai-web-ops/.github/workflows/reusable-pre-test-health-check.yml@main - with: - app-name: MENTOR - expected-image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }} - deployment-path: /ibl/app/ibl-spa/mentor - nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]' - venv-name: ibl-cli-ops - health-port: '5001' - secrets: - ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }} - git-token: ${{ secrets.GIT_TOKEN }} + - name: Install infra CLI + run: | + pip install ./.infra + pip install ansible-core + + - name: Run service update (Ansible) + env: + PYTHONUNBUFFERED: "1" + TERM: dumb + run: | + iblai infra service-update \ + --host ${{ steps.launch.outputs.instance-ip }} \ + --ssh-key ~/.ssh/deploy-key \ + --git-token ${{ secrets.GIT_TOKEN }} + + - name: Register in target group + run: | + aws elbv2 register-targets \ + --target-group-arn ${{ steps.config.outputs.target-group-arn }} \ + --targets Id=${{ steps.launch.outputs.instance-id }},Port=80 + echo "Registered in target group" + + - name: Deploy PR mentor image + run: | + IMAGE="${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }}" + echo "Deploying mentor image: $IMAGE" + ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} " + export PYENV_ROOT=/home/ubuntu/.pyenv + export PATH=\$PYENV_ROOT/bin:\$PATH + eval \"\$(pyenv init -)\" + eval \"\$(pyenv virtualenv-init -)\" + pyenv activate ibl-cli-ops + export IBL_ROOT=/ibl/ + + # Login to ECR + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 765174860755.dkr.ecr.us-east-1.amazonaws.com + + # Deploy mentor + cd /ibl/app/ibl-spa/mentor/ + docker compose pull 2>/dev/null || true + echo 'DOCKER_IMAGE=$IMAGE' > .env.deploy + docker compose down 2>/dev/null + docker compose up -d + echo 'Mentor deployed' + + # Restart auth + cd /ibl/app/ibl-spa/auth/ + docker compose down 2>/dev/null + docker compose up -d + echo 'Auth restarted' + " + + - name: Wait for mentor health + run: | + for i in $(seq 1 20); do + HTTP=$(ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} \ + "curl -s -o /dev/null -w '%{http_code}' http://localhost:5001/" 2>/dev/null) + echo "Attempt $i: HTTP $HTTP" + if echo "$HTTP" | grep -qE '^(200|301|302)$'; then + echo "Mentor healthy!" + break + fi + if [ "$i" -eq 10 ]; then + echo "Restarting mentor..." + ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} \ + "cd /ibl/app/ibl-spa/mentor/ && docker compose restart" 2>/dev/null + fi + sleep 15 + done + + - name: Revoke runner SSH access + if: always() + run: | + aws ec2 revoke-security-group-ingress \ + --group-id ${{ steps.config.outputs.security-group-id }} \ + --protocol tcp --port 22 \ + --cidr ${{ steps.runner-ip.outputs.ip }}/32 2>&1 || true # ============================================================ # PARALLEL BROWSER TESTS (all 4 after health check) # ============================================================ test-chrome: - needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image] if: >- always() && - needs.verify.result == 'success' && + needs.launch-and-deploy.result == 'success' && needs.check-resumption.outputs.chrome-resumption-mode != 'all-passed' uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit @@ -379,10 +479,10 @@ jobs: workers: '3' test-firefox: - needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image] if: >- always() && - needs.verify.result == 'success' && + needs.launch-and-deploy.result == 'success' && needs.check-resumption.outputs.firefox-resumption-mode != 'all-passed' uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit @@ -399,10 +499,10 @@ jobs: workers: '3' test-safari: - needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image] if: >- always() && - needs.verify.result == 'success' && + needs.launch-and-deploy.result == 'success' && needs.check-resumption.outputs.safari-resumption-mode != 'all-passed' uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit @@ -419,10 +519,10 @@ jobs: workers: '3' test-edge: - needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify] + needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image] if: >- always() && - needs.verify.result == 'success' && + needs.launch-and-deploy.result == 'success' && needs.check-resumption.outputs.edge-resumption-mode != 'all-passed' uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit @@ -507,8 +607,7 @@ jobs: gate, acquire-stg-slot, pinned-versions, commitlint, lint, typecheck, coverage, e2e-coverage-check, claude-review-coverage, claude-review-quality, claude-review-uiux, check-prod-versions, - build-app-image, build-playwright-image, launch-infra, - deploy-app, deploy-auth, verify, + build-app-image, build-playwright-image, launch-and-deploy, test-chrome, test-firefox, test-safari, test-edge, save-status, ] @@ -524,8 +623,8 @@ jobs: # TERMINATE INFRASTRUCTURE # ============================================================ terminate-infra: - needs: [launch-infra, save-status, release-stg-slot] - if: always() && needs.launch-infra.outputs.instance-id != '' + needs: [launch-and-deploy, save-status, release-stg-slot] + if: always() && needs.launch-and-deploy.result != 'skipped' runs-on: ubuntu-latest env: AWS_ACCESS_KEY_ID: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }} @@ -534,8 +633,14 @@ jobs: steps: - name: Terminate EC2 instance run: | - echo "Terminating instance ${{ needs.launch-infra.outputs.instance-id }}..." - aws ec2 terminate-instances --instance-ids ${{ needs.launch-infra.outputs.instance-id }} + INSTANCE_ID="${{ needs.launch-and-deploy.outputs.instance-id }}" + if [ -n "$INSTANCE_ID" ]; then + echo "Terminating instance $INSTANCE_ID..." + aws ec2 terminate-instances --instance-ids $INSTANCE_ID + echo "Instance terminated" + else + echo "No instance to terminate" + fi echo "Instance terminated" # ============================================================ @@ -547,8 +652,7 @@ jobs: gate, acquire-stg-slot, pinned-versions, commitlint, lint, typecheck, coverage, e2e-coverage-check, claude-review-coverage, claude-review-quality, claude-review-uiux, check-prod-versions, - build-app-image, build-playwright-image, launch-infra, - deploy-app, deploy-auth, verify, + build-app-image, build-playwright-image, launch-and-deploy, test-chrome, test-firefox, test-safari, test-edge, save-status, release-stg-slot, terminate-infra, ] @@ -561,7 +665,7 @@ jobs: echo "" >> $GITHUB_STEP_SUMMARY echo "**PR:** #${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY echo "**Stg slot:** stg${{ needs.acquire-stg-slot.outputs.domain-number }}" >> $GITHUB_STEP_SUMMARY - echo "**Instance:** ${{ needs.launch-infra.outputs.instance-id }} (${{ needs.launch-infra.outputs.instance-ip }})" >> $GITHUB_STEP_SUMMARY + echo "**Instance:** ${{ needs.launch-and-deploy.outputs.instance-id }} (${{ needs.launch-and-deploy.outputs.instance-ip }})" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "## Code Quality" >> $GITHUB_STEP_SUMMARY @@ -591,10 +695,7 @@ jobs: echo "| Step | Status |" >> $GITHUB_STEP_SUMMARY echo "|------|:------:|" >> $GITHUB_STEP_SUMMARY for PAIR in \ - "Launch Infra:${{ needs.launch-infra.result }}" \ - "Deploy App:${{ needs.deploy-app.result }}" \ - "Deploy Auth:${{ needs.deploy-auth.result }}" \ - "Health Check:${{ needs.verify.result }}" \ + "Launch & Deploy:${{ needs.launch-and-deploy.result }}" \ "Terminate:${{ needs.terminate-infra.result }}"; do CHECK="${PAIR%%:*}" RESULT="${PAIR#*:}" From c46f771b2acd3cfc28b09561f2a94e22d5843d21 Mon Sep 17 00:00:00 2001 From: bnsoni Date: Wed, 8 Apr 2026 14:31:45 +0300 Subject: [PATCH 3/4] feat: add workflow_dispatch trigger for manual testing Allows running the workflow from the Actions tab with a stg-slot input (1-4). Skips domain-lock and code quality checks for dispatch. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/spa-pr-validation-new.yml | 33 +++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml index 0ac1ad74..39af3ece 100644 --- a/.github/workflows/spa-pr-validation-new.yml +++ b/.github/workflows/spa-pr-validation-new.yml @@ -4,6 +4,12 @@ on: pull_request: branches: [main] types: [labeled] + workflow_dispatch: + inputs: + stg-slot: + description: 'Stg slot (1-4) — skips domain-lock' + required: false + default: '1' run-name: 'PR #${{ github.event.pull_request.number }} - Mentor Test (AMI)' @@ -22,7 +28,9 @@ jobs: # GATE: Detect what changed, decide what to build/test # ============================================================ gate: - if: github.event.label.name == 'run-tests' && !startsWith(github.head_ref, 'release/') + if: >- + github.event_name == 'workflow_dispatch' || + (github.event.label.name == 'run-tests' && !startsWith(github.head_ref, 'release/')) runs-on: ubuntu-22.04 environment: iblai.app outputs: @@ -87,6 +95,7 @@ jobs: # ============================================================ acquire-stg-slot: needs: [gate] + if: github.event_name != 'workflow_dispatch' uses: iblai/iblai-web-ops/.github/workflows/reusable-domain-lock.yml@main secrets: inherit with: @@ -286,14 +295,15 @@ jobs: needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions] if: >- always() && - needs.acquire-stg-slot.result == 'success' && - needs.lint.result == 'success' && + (needs.acquire-stg-slot.result == 'success' || github.event_name == 'workflow_dispatch') && + (needs.lint.result == 'success' || github.event_name == 'workflow_dispatch') && needs.build-app-image.result != 'failure' runs-on: ubuntu-latest timeout-minutes: 45 outputs: instance-id: ${{ steps.launch.outputs.instance-id }} instance-ip: ${{ steps.launch.outputs.instance-ip }} + stg-slot: ${{ steps.config.outputs.stg-slot }} env: AWS_ACCESS_KEY_ID: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }} @@ -302,7 +312,11 @@ jobs: - name: Resolve stg config id: config run: | - SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}" + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + SLOT="${{ inputs.stg-slot }}" + else + SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}" + fi echo "Acquired stg slot: $SLOT" declare -A AMIS SUBNETS SGS TGS KEYS @@ -317,6 +331,7 @@ jobs: echo "security-group-id=${SGS[$SLOT]}" >> $GITHUB_OUTPUT echo "target-group-arn=${TGS[$SLOT]}" >> $GITHUB_OUTPUT echo "key-pair-name=${KEYS[$SLOT]}" >> $GITHUB_OUTPUT + echo "stg-slot=$SLOT" >> $GITHUB_OUTPUT - name: Write SSH key run: | @@ -467,7 +482,7 @@ jobs: uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit with: - domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }} app-type: mentor playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} pr-number: ${{ github.event.pull_request.number }} @@ -487,7 +502,7 @@ jobs: uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit with: - domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }} app-type: mentor playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} pr-number: ${{ github.event.pull_request.number }} @@ -507,7 +522,7 @@ jobs: uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit with: - domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }} app-type: mentor playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} pr-number: ${{ github.event.pull_request.number }} @@ -527,7 +542,7 @@ jobs: uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main secrets: inherit with: - domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }} app-type: mentor playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }} pr-number: ${{ github.event.pull_request.number }} @@ -617,7 +632,7 @@ jobs: with: action: release app-type: testing - domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }} + domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }} # ============================================================ # TERMINATE INFRASTRUCTURE From ea9cb4f30d7d0bdba3abe11bd0d89a828ece9c82 Mon Sep 17 00:00:00 2001 From: bnsoni Date: Wed, 8 Apr 2026 16:33:34 +0300 Subject: [PATCH 4/4] fix: deregister old ALB targets before registering new instance Prevents split-brain routing where ALB sends some requests to old instance with stale OAuth creds. Now deregisters all existing targets from the target group before registering the new instance. Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/spa-pr-validation-new.yml | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml index 39af3ece..fe8c5acc 100644 --- a/.github/workflows/spa-pr-validation-new.yml +++ b/.github/workflows/spa-pr-validation-new.yml @@ -407,12 +407,22 @@ jobs: --ssh-key ~/.ssh/deploy-key \ --git-token ${{ secrets.GIT_TOKEN }} - - name: Register in target group + - name: Deregister old targets and register new instance run: | - aws elbv2 register-targets \ - --target-group-arn ${{ steps.config.outputs.target-group-arn }} \ - --targets Id=${{ steps.launch.outputs.instance-id }},Port=80 - echo "Registered in target group" + TG_ARN="${{ steps.config.outputs.target-group-arn }}" + NEW_ID="${{ steps.launch.outputs.instance-id }}" + + # Deregister any existing targets to prevent split-brain routing + OLD_TARGETS=$(aws elbv2 describe-target-health --target-group-arn "$TG_ARN" \ + --query "TargetHealthDescriptions[?Target.Id!='$NEW_ID'].Target.Id" --output text 2>/dev/null) + for OLD_ID in $OLD_TARGETS; do + echo "Deregistering old target: $OLD_ID" + aws elbv2 deregister-targets --target-group-arn "$TG_ARN" --targets Id=$OLD_ID 2>/dev/null || true + done + + # Register new instance + aws elbv2 register-targets --target-group-arn "$TG_ARN" --targets Id=$NEW_ID,Port=80 + echo "Registered $NEW_ID in target group" - name: Deploy PR mentor image run: |