From 1acee2e0247e9b838e2f31babf6a175574c35105 Mon Sep 17 00:00:00 2001
From: bnsoni <bhargavsoni8@gmail.com>
Date: Wed, 8 Apr 2026 13:05:49 +0300
Subject: [PATCH 1/4] feat: add AMI-based PR validation workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New spa-pr-validation-new.yml that launches fresh EC2 from pre-built
AMI per PR run instead of deploying to shared OCI VMs.

Flow: acquire stg slot (1-4) via domain-lock → launch EC2 from AMI →
deploy PR mentor image + prod auth → run 4 browser tests in parallel →
save status → release slot → terminate EC2

Key differences from existing workflow:
- Isolated environment per PR (not shared VMs)
- All 4 browsers always parallel (no sequential mode)
- EC2 terminated after tests (clean slate every run)
- Uses reusable-service-update.yml for infra launch
- Domain-number maps to stg1-4 infrastructure

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/spa-pr-validation-new.yml | 655 ++++++++++++++++++++
 1 file changed, 655 insertions(+)
 create mode 100644 .github/workflows/spa-pr-validation-new.yml

diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml
new file mode 100644
index 00000000..3a6f785b
--- /dev/null
+++ b/.github/workflows/spa-pr-validation-new.yml
@@ -0,0 +1,655 @@
+name: SPA PR Validation (AMI-based)
+
+on:
+  pull_request:
+    branches: [main]
+    types: [labeled]
+
+run-name: 'PR #${{ github.event.pull_request.number }} - Mentor Test (AMI)'
+
+concurrency:
+  group: pr-validation-mentor-ami-${{ github.event.pull_request.number }}
+  cancel-in-progress: false
+
+permissions:
+  contents: read
+  pull-requests: write
+  statuses: write
+  id-token: write
+
+jobs:
+  # ============================================================
+  # GATE: Detect what changed, decide what to build/test
+  # ============================================================
+  gate:
+    if: github.event.label.name == 'run-tests' && !startsWith(github.head_ref, 'release/')
+    runs-on: ubuntu-22.04
+    environment: iblai.app
+    outputs:
+      should-build-app: ${{ steps.decide.outputs.should-build-app }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Detect changes
+        id: changes
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            app:
+              - 'src/**'
+              - 'app/**'
+              - 'components/**'
+              - 'features/**'
+              - 'hooks/**'
+              - 'lib/**'
+              - 'actions/**'
+              - 'contexts/**'
+              - 'public/**'
+              - 'next.config.*'
+              - 'package.json'
+              - 'pnpm-lock.yaml'
+              - 'tsconfig*.json'
+              - 'tailwind.config.*'
+              - 'Dockerfile'
+            tests:
+              - 'e2e/**'
+
+      - name: Decide build strategy
+        id: decide
+        run: |
+          APP_CHANGED="${{ steps.changes.outputs.app }}"
+          if [[ "$APP_CHANGED" == "true" && "${{ vars.ENABLE_TESTING }}" != "true" ]]; then
+            echo "should-build-app=true" >> $GITHUB_OUTPUT
+          else
+            echo "should-build-app=false" >> $GITHUB_OUTPUT
+          fi
+
+  # ============================================================
+  # TEST RESUMPTION CHECK
+  # ============================================================
+  check-resumption:
+    needs: [gate]
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-test-resumption.yml@main
+    with:
+      app-name: mentor
+      pr-number: ${{ github.event.pull_request.number }}
+      test-dir: e2e/tests
+    secrets:
+      s3-bucket: ${{ secrets.S3_LOGS_BUCKET }}
+      aws-access-key-id: ${{ secrets.S3_LOGS_ACCESS_KEY_ID }}
+      aws-secret-access-key: ${{ secrets.S3_LOGS_SECRET_ACCESS_KEY }}
+      aws-region: ${{ vars.AWS_REGION }}
+
+  # ============================================================
+  # ACQUIRE STG SLOT (domain-lock picks stg1-4)
+  # ============================================================
+  acquire-stg-slot:
+    needs: [gate]
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-domain-lock.yml@main
+    secrets: inherit
+    with:
+      action: acquire
+      app-type: testing
+      context: 'PR #${{ github.event.pull_request.number }} - Mentor AMI Testing'
+      allowed-domains: '1,2,3,4'
+      max-wait: 18000
+
+  # ============================================================
+  # CODE QUALITY (parallel after slot acquired)
+  # ============================================================
+  pinned-versions:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - name: Check all dependency versions are pinned
+        run: ./scripts/check-pinned-versions.sh
+
+  commitlint:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+      - run: npm install --global @commitlint/cli @commitlint/config-conventional
+      - run: npx commitlint --from origin/${{ github.base_ref }} --to HEAD --verbose
+
+  lint:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-linting.yml@main
+    secrets: inherit
+
+  typecheck:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-typecheck.yml@main
+    secrets: inherit
+
+  coverage:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-unit-test-coverage.yml@main
+    with:
+      coverage-threshold: 95
+    secrets: inherit
+
+  e2e-coverage-check:
+    name: 'E2E Journey Coverage'
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+      - run: node e2e/scripts/check-journey-coverage.mjs --all --no-regress --base origin/${{ github.base_ref }}
+
+  claude-review-coverage:
+    name: 'Claude E2E Coverage Review'
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Get changed files
+        id: changes
+        run: |
+          CHANGED=$(git diff --name-only origin/${{ github.base_ref }}...HEAD -- app/ components/ hooks/ lib/ providers/ | head -100)
+          echo "files<<EOF" >> $GITHUB_OUTPUT
+          echo "$CHANGED" >> $GITHUB_OUTPUT
+          echo "EOF" >> $GITHUB_OUTPUT
+          echo "count=$(echo "$CHANGED" | grep -c . || echo 0)" >> $GITHUB_OUTPUT
+      - name: Claude coverage analysis
+        if: steps.changes.outputs.count != '0'
+        uses: anthropics/claude-code-action@beta
+        with:
+          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
+          direct_prompt: |
+            You are an E2E coverage analyst. Analyze the changed files in this PR and determine if e2e/coverage.json and e2e/COVERAGE.md are properly updated.
+
+            Changed files:
+            ${{ steps.changes.outputs.files }}
+
+            Tasks:
+            1. Read e2e/coverage.json and e2e/COVERAGE.md
+            2. For each changed file in app/ or components/:
+               - Check if it's listed in a journey's sourceFiles in coverage.json
+               - If it's a new page.tsx route, verify a journey covers it
+               - If it adds new user-visible behavior, check for corresponding checkpoints
+            3. Compare checkpoint counts: current branch vs base branch
+            4. Post a summary comment on the PR with:
+               - Files covered by existing journeys
+               - Files missing coverage (if any)
+               - Checkpoint count change (regression warning if decreased)
+               - Recommended actions (new journeys or checkpoints to add)
+
+            If all changes are covered, approve. If there are gaps, request changes with specific instructions.
+
+  claude-review-quality:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-claude-review-quality.yml@main
+    with:
+      pr-number: ${{ github.event.pull_request.number }}
+    secrets:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+
+  claude-review-uiux:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-claude-review-uiux.yml@main
+    with:
+      pr-number: ${{ github.event.pull_request.number }}
+    secrets:
+      ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+
+  # ============================================================
+  # CHECK PRODUCTION VERSIONS (fallback images)
+  # ============================================================
+  check-prod-versions:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    runs-on: image-deploy-ops-new
+    environment: iblai.app
+    outputs:
+      mentor-uri: ${{ steps.read-mentor.outputs.image-uri }}
+      auth-uri: ${{ steps.read-auth.outputs.image-uri }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          repository: iblai/iblai-web-ops
+          token: ${{ secrets.GIT_TOKEN || github.token }}
+          path: .ops
+      - uses: ./.ops/.github/actions/setup-ssh
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_DEPLOY_OPS }}
+      - id: read-mentor
+        uses: ./.ops/.github/actions/read-prod-versions
+        with:
+          app-name: mentor
+          nodes-json: ${{ vars.NODES_SPA_CHECK }}
+      - id: read-auth
+        uses: ./.ops/.github/actions/read-prod-versions
+        with:
+          app-name: auth
+          nodes-json: ${{ vars.NODES_SPA_CHECK }}
+
+  # ============================================================
+  # BUILD DOCKER IMAGES
+  # ============================================================
+  build-app-image:
+    needs: [gate, acquire-stg-slot]
+    if: >-
+      always() &&
+      needs.acquire-stg-slot.result == 'success' &&
+      needs.gate.outputs.should-build-app == 'true'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-pr-docker-build.yml@main
+    secrets: inherit
+    with:
+      dockerfile-path: Dockerfile
+      image-name: ibl-mentor-spa-pro
+      registry-type: ecr
+      event-name: pull_request
+      pr-number: ${{ github.event.pull_request.number }}
+
+  build-playwright-image:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-pr-docker-build.yml@main
+    secrets: inherit
+    with:
+      dockerfile-path: e2e/Dockerfile
+      build-context: .
+      image-name: ibl-mentor-playwright
+      registry-type: ocir
+      event-name: pull_request
+      pr-number: ${{ github.event.pull_request.number }}
+
+  # ============================================================
+  # LAUNCH INFRASTRUCTURE (EC2 from AMI)
+  # ============================================================
+  launch-infra:
+    needs: [gate, acquire-stg-slot]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-service-update.yml@main
+    with:
+      ami-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_AMI_ID || vars.STG4_AMI_ID }}
+      subnet-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SUBNET_ID || vars.STG4_SUBNET_ID }}
+      security-group-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SG_ID || vars.STG4_SG_ID }}
+      target-group-arn: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_TG_ARN || vars.STG4_TG_ARN }}
+      key-pair-name: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_KEY_PAIR || vars.STG4_KEY_PAIR }}
+      project-name: mentor-pr-${{ github.event.pull_request.number }}
+    secrets:
+      aws-access-key-id: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }}
+      aws-secret-access-key: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }}
+      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
+      git-token: ${{ secrets.GIT_TOKEN }}
+
+  # ============================================================
+  # DEPLOY PR MENTOR IMAGE onto launched instance
+  # ============================================================
+  deploy-app:
+    needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions, launch-infra]
+    if: >-
+      always() &&
+      needs.launch-infra.result == 'success' &&
+      needs.lint.result == 'success' &&
+      needs.build-app-image.result != 'failure'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main
+    with:
+      app-name: MENTOR
+      image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }}
+      deployment-path: /ibl/app/ibl-spa/mentor
+      nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]'
+      venv-name: ibl-cli-ops
+    secrets:
+      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
+      git-token: ${{ secrets.GIT_TOKEN }}
+
+  # ============================================================
+  # DEPLOY AUTH (prod image) onto launched instance
+  # ============================================================
+  deploy-auth:
+    needs: [gate, acquire-stg-slot, check-prod-versions, launch-infra, deploy-app]
+    if: always() && needs.launch-infra.result == 'success' && needs.deploy-app.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main
+    with:
+      app-name: AUTH
+      image-uri: ${{ needs.check-prod-versions.outputs.auth-uri }}
+      deployment-path: /ibl/app/ibl-spa/auth
+      nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]'
+      venv-name: ibl-cli-ops
+    secrets:
+      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
+      git-token: ${{ secrets.GIT_TOKEN }}
+
+  # ============================================================
+  # HEALTH CHECK
+  # ============================================================
+  verify:
+    needs: [gate, acquire-stg-slot, deploy-auth, build-app-image, check-prod-versions, launch-infra]
+    if: always() && needs.deploy-auth.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-pre-test-health-check.yml@main
+    with:
+      app-name: MENTOR
+      expected-image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }}
+      deployment-path: /ibl/app/ibl-spa/mentor
+      nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]'
+      venv-name: ibl-cli-ops
+      health-port: '5001'
+    secrets:
+      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
+      git-token: ${{ secrets.GIT_TOKEN }}
+
+  # ============================================================
+  # PARALLEL BROWSER TESTS (all 4 after health check)
+  # ============================================================
+  test-chrome:
+    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    if: >-
+      always() &&
+      needs.verify.result == 'success' &&
+      needs.check-resumption.outputs.chrome-resumption-mode != 'all-passed'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
+    secrets: inherit
+    with:
+      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      app-type: mentor
+      playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
+      pr-number: ${{ github.event.pull_request.number }}
+      max-wait: 5400
+      total-shards: 1
+      run-type: main-chrome
+      browsers: 'chrome'
+      test-files: ${{ needs.check-resumption.outputs.chrome-test-files }}
+      workers: '3'
+
+  test-firefox:
+    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    if: >-
+      always() &&
+      needs.verify.result == 'success' &&
+      needs.check-resumption.outputs.firefox-resumption-mode != 'all-passed'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
+    secrets: inherit
+    with:
+      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      app-type: mentor
+      playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
+      pr-number: ${{ github.event.pull_request.number }}
+      max-wait: 5400
+      total-shards: 1
+      run-type: main-firefox
+      browsers: 'firefox'
+      test-files: ${{ needs.check-resumption.outputs.firefox-test-files }}
+      workers: '3'
+
+  test-safari:
+    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    if: >-
+      always() &&
+      needs.verify.result == 'success' &&
+      needs.check-resumption.outputs.safari-resumption-mode != 'all-passed'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
+    secrets: inherit
+    with:
+      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      app-type: mentor
+      playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
+      pr-number: ${{ github.event.pull_request.number }}
+      max-wait: 5400
+      total-shards: 1
+      run-type: main-safari
+      browsers: 'safari'
+      test-files: ${{ needs.check-resumption.outputs.safari-test-files }}
+      workers: '3'
+
+  test-edge:
+    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    if: >-
+      always() &&
+      needs.verify.result == 'success' &&
+      needs.check-resumption.outputs.edge-resumption-mode != 'all-passed'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
+    secrets: inherit
+    with:
+      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      app-type: mentor
+      playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
+      pr-number: ${{ github.event.pull_request.number }}
+      max-wait: 5400
+      total-shards: 1
+      run-type: main-edge
+      browsers: 'edge'
+      test-files: ${{ needs.check-resumption.outputs.edge-test-files }}
+      workers: '3'
+
+  # ============================================================
+  # SAVE TEST STATUS (for resumption on next run)
+  # ============================================================
+  save-status:
+    name: 'Save Mentor Status'
+    needs: [gate, test-chrome, test-firefox, test-safari, test-edge]
+    if: always()
+    runs-on: ubuntu-22.04
+    environment: iblai.app
+    steps:
+      - name: Save browser status to S3
+        env:
+          CHROME_RESULT: ${{ needs.test-chrome.result }}
+          FIREFOX_RESULT: ${{ needs.test-firefox.result }}
+          SAFARI_RESULT: ${{ needs.test-safari.result }}
+          EDGE_RESULT: ${{ needs.test-edge.result }}
+          S3_BUCKET: ${{ secrets.S3_LOGS_BUCKET }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.S3_LOGS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_LOGS_SECRET_ACCESS_KEY }}
+          AWS_REGION: ${{ vars.AWS_REGION || 'us-east-1' }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          COMMIT_SHA: ${{ github.sha }}
+        run: |
+          PREV_STATUS="/tmp/prev-status.json"
+          S3_PATH="s3://$S3_BUCKET/pr/$PR_NUMBER/mentor/run-status.json"
+          aws s3 cp "$S3_PATH" "$PREV_STATUS" --region "$AWS_REGION" 2>/dev/null || echo '{}' > "$PREV_STATUS"
+
+          get_status() {
+            local result=$1 browser=$2
+            if [ "$result" = "skipped" ]; then
+              jq -r ".browsers.${browser} // \"pending\"" "$PREV_STATUS" 2>/dev/null || echo "pending"
+              return
+            fi
+            if [ "$result" != "success" ]; then echo "failed"; return; fi
+            local results_file="/tmp/test-results-${browser}.json"
+            local results_path="s3://$S3_BUCKET/pr/$PR_NUMBER/mentor/test-results-${browser}.json"
+            if aws s3 cp "$results_path" "$results_file" --region "$AWS_REGION" >/dev/null 2>&1; then
+              local failed_count
+              failed_count=$(jq '[.tests | to_entries[] | select(.value == "failed")] | length' "$results_file" 2>/dev/null || echo "-1")
+              if [ "$failed_count" = "0" ] || [ "$failed_count" = "-1" ]; then echo "passed"; else echo "failed"; fi
+            else
+              echo "passed"
+            fi
+          }
+
+          cat > /tmp/run-status.json <<EOF
+          {
+            "commit": "$COMMIT_SHA",
+            "browsers": {
+              "chrome": "$(get_status "$CHROME_RESULT" "chrome")",
+              "firefox": "$(get_status "$FIREFOX_RESULT" "firefox")",
+              "safari": "$(get_status "$SAFARI_RESULT" "safari")",
+              "edge": "$(get_status "$EDGE_RESULT" "edge")"
+            }
+          }
+          EOF
+
+          aws s3 cp /tmp/run-status.json "$S3_PATH" --region "$AWS_REGION" --content-type "application/json"
+          echo "Status saved"
+
+  # ============================================================
+  # RELEASE STG SLOT
+  # ============================================================
+  release-stg-slot:
+    needs:
+      [
+        gate, acquire-stg-slot, pinned-versions, commitlint, lint, typecheck,
+        coverage, e2e-coverage-check, claude-review-coverage,
+        claude-review-quality, claude-review-uiux, check-prod-versions,
+        build-app-image, build-playwright-image, launch-infra,
+        deploy-app, deploy-auth, verify,
+        test-chrome, test-firefox, test-safari, test-edge,
+        save-status,
+      ]
+    if: always() && needs.acquire-stg-slot.result == 'success'
+    uses: iblai/iblai-web-ops/.github/workflows/reusable-domain-lock.yml@main
+    secrets: inherit
+    with:
+      action: release
+      app-type: testing
+      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+
+  # ============================================================
+  # TERMINATE INFRASTRUCTURE
+  # ============================================================
+  terminate-infra:
+    needs: [launch-infra, save-status, release-stg-slot]
+    if: always() && needs.launch-infra.outputs.instance-id != ''
+    runs-on: ubuntu-latest
+    env:
+      AWS_ACCESS_KEY_ID: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }}
+      AWS_DEFAULT_REGION: us-east-1
+    steps:
+      - name: Terminate EC2 instance
+        run: |
+          echo "Terminating instance ${{ needs.launch-infra.outputs.instance-id }}..."
+          aws ec2 terminate-instances --instance-ids ${{ needs.launch-infra.outputs.instance-id }}
+          echo "Instance terminated"
+
+  # ============================================================
+  # FINAL SUMMARY
+  # ============================================================
+  summary:
+    needs:
+      [
+        gate, acquire-stg-slot, pinned-versions, commitlint, lint, typecheck,
+        coverage, e2e-coverage-check, claude-review-coverage,
+        claude-review-quality, claude-review-uiux, check-prod-versions,
+        build-app-image, build-playwright-image, launch-infra,
+        deploy-app, deploy-auth, verify,
+        test-chrome, test-firefox, test-safari, test-edge,
+        save-status, release-stg-slot, terminate-infra,
+      ]
+    if: always()
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Generate summary
+        run: |
+          echo "# Mentor PR Test Validation Results (AMI)" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "**PR:** #${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY
+          echo "**Stg slot:** stg${{ needs.acquire-stg-slot.outputs.domain-number }}" >> $GITHUB_STEP_SUMMARY
+          echo "**Instance:** ${{ needs.launch-infra.outputs.instance-id }} (${{ needs.launch-infra.outputs.instance-ip }})" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          echo "## Code Quality" >> $GITHUB_STEP_SUMMARY
+          echo "| Check | Status |" >> $GITHUB_STEP_SUMMARY
+          echo "|-------|:------:|" >> $GITHUB_STEP_SUMMARY
+
+          for PAIR in \
+            "Pinned Versions:${{ needs.pinned-versions.result }}" \
+            "Conventional Commits:${{ needs.commitlint.result }}" \
+            "Linting:${{ needs.lint.outputs.result }}" \
+            "TypeCheck:${{ needs.typecheck.outputs.result }}" \
+            "Unit Coverage:${{ needs.coverage.outputs.result }}" \
+            "E2E Coverage:${{ needs.e2e-coverage-check.result }}"; do
+            CHECK="${PAIR%%:*}"
+            RESULT="${PAIR#*:}"
+            if [[ "$RESULT" == "success" ]]; then
+              echo "| $CHECK | :white_check_mark: PASSED |" >> $GITHUB_STEP_SUMMARY
+            elif [[ "$RESULT" == "skipped" ]]; then
+              echo "| $CHECK | :fast_forward: SKIPPED |" >> $GITHUB_STEP_SUMMARY
+            else
+              echo "| $CHECK | :x: FAILED |" >> $GITHUB_STEP_SUMMARY
+            fi
+          done
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          echo "## Infrastructure" >> $GITHUB_STEP_SUMMARY
+          echo "| Step | Status |" >> $GITHUB_STEP_SUMMARY
+          echo "|------|:------:|" >> $GITHUB_STEP_SUMMARY
+          for PAIR in \
+            "Launch Infra:${{ needs.launch-infra.result }}" \
+            "Deploy App:${{ needs.deploy-app.result }}" \
+            "Deploy Auth:${{ needs.deploy-auth.result }}" \
+            "Health Check:${{ needs.verify.result }}" \
+            "Terminate:${{ needs.terminate-infra.result }}"; do
+            CHECK="${PAIR%%:*}"
+            RESULT="${PAIR#*:}"
+            if [[ "$RESULT" == "success" ]]; then
+              echo "| $CHECK | :white_check_mark: |" >> $GITHUB_STEP_SUMMARY
+            else
+              echo "| $CHECK | :x: $RESULT |" >> $GITHUB_STEP_SUMMARY
+            fi
+          done
+          echo "" >> $GITHUB_STEP_SUMMARY
+
+          echo "## Browser Tests (Parallel)" >> $GITHUB_STEP_SUMMARY
+          echo "| Browser | Status |" >> $GITHUB_STEP_SUMMARY
+          echo "|---------|:------:|" >> $GITHUB_STEP_SUMMARY
+          for PAIR in \
+            "Chrome:${{ needs.test-chrome.outputs.result }}" \
+            "Firefox:${{ needs.test-firefox.outputs.result }}" \
+            "Safari:${{ needs.test-safari.outputs.result }}" \
+            "Edge:${{ needs.test-edge.outputs.result }}"; do
+            BROWSER="${PAIR%%:*}"
+            RESULT="${PAIR#*:}"
+            if [[ "$RESULT" == "success" ]]; then
+              echo "| $BROWSER | :white_check_mark: PASSED |" >> $GITHUB_STEP_SUMMARY
+            elif [[ -z "$RESULT" ]]; then
+              echo "| $BROWSER | :fast_forward: SKIPPED |" >> $GITHUB_STEP_SUMMARY
+            else
+              echo "| $BROWSER | :x: FAILED |" >> $GITHUB_STEP_SUMMARY
+            fi
+          done
+
+      - name: Check results
+        id: check-results
+        if: always()
+        run: |
+          FAILED=false
+          for R in "${{ needs.pinned-versions.result }}" "${{ needs.commitlint.result }}" "${{ needs.lint.outputs.result }}" "${{ needs.typecheck.outputs.result }}" "${{ needs.coverage.outputs.result }}"; do
+            [[ "$R" != "success" ]] && FAILED=true
+          done
+          [[ "${{ needs.e2e-coverage-check.result }}" == "failure" ]] && FAILED=true
+          for R in "${{ needs.test-chrome.outputs.result }}" "${{ needs.test-firefox.outputs.result }}" "${{ needs.test-safari.outputs.result }}" "${{ needs.test-edge.outputs.result }}"; do
+            [[ -n "$R" && "$R" != "success" ]] && FAILED=true
+          done
+          if [[ "$FAILED" == "true" ]]; then exit 1; else exit 0; fi
+
+      - name: Update PR gate status
+        if: always()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const passed = '${{ steps.check-results.outcome }}' === 'success';
+            await github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: context.payload.pull_request.head.sha,
+              state: passed ? 'success' : 'failure',
+              context: 'PR Validation',
+              description: passed ? 'All checks passed' : 'Some checks failed',
+            });

From 8262adf603ac6d89bfcb66a3550937c7c84db29e Mon Sep 17 00:00:00 2001
From: bnsoni <bhargavsoni8@gmail.com>
Date: Wed, 8 Apr 2026 14:04:45 +0300
Subject: [PATCH 2/4] refactor: inline launch/deploy steps to support per-stg
 SSH keys

GitHub Actions doesn't allow dynamic secret selection in reusable
workflow secrets: blocks. Replaced reusable-service-update.yml +
reusable-spa-deployment.yml calls with a single inline
launch-and-deploy job that:

1. Resolves stg config based on domain-lock slot (1-4)
2. Selects the correct SSH key (STG1-4_SSH_KEY) via case statement
3. Opens SSH SG rule for runner IP
4. Launches EC2 from AMI (boto3)
5. Waits for SSH + runs service-update (Ansible)
6. Registers in ALB target group
7. Deploys PR mentor image + prod auth image
8. Waits for mentor health (with auto-restart on empty reply)
9. Revokes SSH SG rule

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/spa-pr-validation-new.yml | 277 +++++++++++++-------
 1 file changed, 189 insertions(+), 88 deletions(-)

diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml
index 3a6f785b..0ac1ad74 100644
--- a/.github/workflows/spa-pr-validation-new.yml
+++ b/.github/workflows/spa-pr-validation-new.yml
@@ -280,89 +280,189 @@ jobs:
       pr-number: ${{ github.event.pull_request.number }}
 
   # ============================================================
-  # LAUNCH INFRASTRUCTURE (EC2 from AMI)
+  # LAUNCH INFRA + DEPLOY + VERIFY (single job, inline)
   # ============================================================
-  launch-infra:
-    needs: [gate, acquire-stg-slot]
-    if: always() && needs.acquire-stg-slot.result == 'success'
-    uses: iblai/iblai-web-ops/.github/workflows/reusable-service-update.yml@main
-    with:
-      ami-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_AMI_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_AMI_ID || vars.STG4_AMI_ID }}
-      subnet-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SUBNET_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SUBNET_ID || vars.STG4_SUBNET_ID }}
-      security-group-id: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_SG_ID || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_SG_ID || vars.STG4_SG_ID }}
-      target-group-arn: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_TG_ARN || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_TG_ARN || vars.STG4_TG_ARN }}
-      key-pair-name: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && vars.STG1_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '2' && vars.STG2_KEY_PAIR || needs.acquire-stg-slot.outputs.domain-number == '3' && vars.STG3_KEY_PAIR || vars.STG4_KEY_PAIR }}
-      project-name: mentor-pr-${{ github.event.pull_request.number }}
-    secrets:
-      aws-access-key-id: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }}
-      aws-secret-access-key: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }}
-      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
-      git-token: ${{ secrets.GIT_TOKEN }}
-
-  # ============================================================
-  # DEPLOY PR MENTOR IMAGE onto launched instance
-  # ============================================================
-  deploy-app:
-    needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions, launch-infra]
+  launch-and-deploy:
+    needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions]
     if: >-
       always() &&
-      needs.launch-infra.result == 'success' &&
+      needs.acquire-stg-slot.result == 'success' &&
       needs.lint.result == 'success' &&
       needs.build-app-image.result != 'failure'
-    uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main
-    with:
-      app-name: MENTOR
-      image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }}
-      deployment-path: /ibl/app/ibl-spa/mentor
-      nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]'
-      venv-name: ibl-cli-ops
-    secrets:
-      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
-      git-token: ${{ secrets.GIT_TOKEN }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    outputs:
+      instance-id: ${{ steps.launch.outputs.instance-id }}
+      instance-ip: ${{ steps.launch.outputs.instance-ip }}
+    env:
+      AWS_ACCESS_KEY_ID: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }}
+      AWS_DEFAULT_REGION: us-east-1
+    steps:
+      - name: Resolve stg config
+        id: config
+        run: |
+          SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}"
+          echo "Acquired stg slot: $SLOT"
+
+          declare -A AMIS SUBNETS SGS TGS KEYS
+          AMIS[1]="${{ vars.STG1_AMI_ID }}"; AMIS[2]="${{ vars.STG2_AMI_ID }}"; AMIS[3]="${{ vars.STG3_AMI_ID }}"; AMIS[4]="${{ vars.STG4_AMI_ID }}"
+          SUBNETS[1]="${{ vars.STG1_SUBNET_ID }}"; SUBNETS[2]="${{ vars.STG2_SUBNET_ID }}"; SUBNETS[3]="${{ vars.STG3_SUBNET_ID }}"; SUBNETS[4]="${{ vars.STG4_SUBNET_ID }}"
+          SGS[1]="${{ vars.STG1_SG_ID }}"; SGS[2]="${{ vars.STG2_SG_ID }}"; SGS[3]="${{ vars.STG3_SG_ID }}"; SGS[4]="${{ vars.STG4_SG_ID }}"
+          TGS[1]="${{ vars.STG1_TG_ARN }}"; TGS[2]="${{ vars.STG2_TG_ARN }}"; TGS[3]="${{ vars.STG3_TG_ARN }}"; TGS[4]="${{ vars.STG4_TG_ARN }}"
+          KEYS[1]="${{ vars.STG1_KEY_PAIR }}"; KEYS[2]="${{ vars.STG2_KEY_PAIR }}"; KEYS[3]="${{ vars.STG3_KEY_PAIR }}"; KEYS[4]="${{ vars.STG4_KEY_PAIR }}"
+
+          echo "ami-id=${AMIS[$SLOT]}" >> $GITHUB_OUTPUT
+          echo "subnet-id=${SUBNETS[$SLOT]}" >> $GITHUB_OUTPUT
+          echo "security-group-id=${SGS[$SLOT]}" >> $GITHUB_OUTPUT
+          echo "target-group-arn=${TGS[$SLOT]}" >> $GITHUB_OUTPUT
+          echo "key-pair-name=${KEYS[$SLOT]}" >> $GITHUB_OUTPUT
+
+      - name: Write SSH key
+        run: |
+          mkdir -p ~/.ssh
+          SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}"
+          case "$SLOT" in
+            1) echo "${{ secrets.STG1_SSH_KEY }}" > ~/.ssh/deploy-key ;;
+            2) echo "${{ secrets.STG2_SSH_KEY }}" > ~/.ssh/deploy-key ;;
+            3) echo "${{ secrets.STG3_SSH_KEY }}" > ~/.ssh/deploy-key ;;
+            4) echo "${{ secrets.STG4_SSH_KEY }}" > ~/.ssh/deploy-key ;;
+          esac
+          chmod 600 ~/.ssh/deploy-key
+
+      - name: Get runner IP and open SSH
+        id: runner-ip
+        run: |
+          RUNNER_IP=$(curl -s https://checkip.amazonaws.com)
+          echo "ip=$RUNNER_IP" >> $GITHUB_OUTPUT
+          aws ec2 authorize-security-group-ingress \
+            --group-id ${{ steps.config.outputs.security-group-id }} \
+            --protocol tcp --port 22 --cidr ${RUNNER_IP}/32 2>&1 || true
+
+      - name: Launch EC2 from AMI
+        id: launch
+        run: |
+          INSTANCE_ID=$(aws ec2 run-instances \
+            --image-id ${{ steps.config.outputs.ami-id }} \
+            --instance-type t3.2xlarge \
+            --key-name ${{ steps.config.outputs.key-pair-name }} \
+            --subnet-id ${{ steps.config.outputs.subnet-id }} \
+            --security-group-ids ${{ steps.config.outputs.security-group-id }} \
+            --block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":200,"VolumeType":"gp3","Encrypted":true}}]' \
+            --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=mentor-pr-${{ github.event.pull_request.number }}}]" \
+            --query 'Instances[0].InstanceId' --output text)
+          echo "instance-id=$INSTANCE_ID" >> $GITHUB_OUTPUT
+          echo "Instance: $INSTANCE_ID"
+
+          aws ec2 wait instance-running --instance-ids $INSTANCE_ID
+          IP=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID --query 'Reservations[0].Instances[0].PublicIpAddress' --output text)
+          echo "instance-ip=$IP" >> $GITHUB_OUTPUT
+          echo "IP: $IP"
+
+      - name: Wait for SSH
+        run: |
+          for i in $(seq 1 15); do
+            if ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} "echo ok" 2>/dev/null; then
+              echo "SSH ready on attempt $i"
+              break
+            fi
+            echo "Attempt $i - retrying in 15s..."
+            sleep 15
+          done
 
-  # ============================================================
-  # DEPLOY AUTH (prod image) onto launched instance
-  # ============================================================
-  deploy-auth:
-    needs: [gate, acquire-stg-slot, check-prod-versions, launch-infra, deploy-app]
-    if: always() && needs.launch-infra.result == 'success' && needs.deploy-app.result == 'success'
-    uses: iblai/iblai-web-ops/.github/workflows/reusable-spa-deployment.yml@main
-    with:
-      app-name: AUTH
-      image-uri: ${{ needs.check-prod-versions.outputs.auth-uri }}
-      deployment-path: /ibl/app/ibl-spa/auth
-      nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]'
-      venv-name: ibl-cli-ops
-    secrets:
-      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
-      git-token: ${{ secrets.GIT_TOKEN }}
+      - name: Checkout infra CLI
+        uses: actions/checkout@v4
+        with:
+          repository: iblai/iblai-infra-cli
+          token: ${{ secrets.GIT_TOKEN }}
+          path: .infra
 
-  # ============================================================
-  # HEALTH CHECK
-  # ============================================================
-  verify:
-    needs: [gate, acquire-stg-slot, deploy-auth, build-app-image, check-prod-versions, launch-infra]
-    if: always() && needs.deploy-auth.result == 'success'
-    uses: iblai/iblai-web-ops/.github/workflows/reusable-pre-test-health-check.yml@main
-    with:
-      app-name: MENTOR
-      expected-image-uri: ${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }}
-      deployment-path: /ibl/app/ibl-spa/mentor
-      nodes-json: '[{"name": "stg${{ needs.acquire-stg-slot.outputs.domain-number }}", "host": "${{ needs.launch-infra.outputs.instance-ip }}"}]'
-      venv-name: ibl-cli-ops
-      health-port: '5001'
-    secrets:
-      ssh-private-key: ${{ needs.acquire-stg-slot.outputs.domain-number == '1' && secrets.STG1_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '2' && secrets.STG2_SSH_KEY || needs.acquire-stg-slot.outputs.domain-number == '3' && secrets.STG3_SSH_KEY || secrets.STG4_SSH_KEY }}
-      git-token: ${{ secrets.GIT_TOKEN }}
+      - name: Install infra CLI
+        run: |
+          pip install ./.infra
+          pip install ansible-core
+
+      - name: Run service update (Ansible)
+        env:
+          PYTHONUNBUFFERED: "1"
+          TERM: dumb
+        run: |
+          iblai infra service-update \
+            --host ${{ steps.launch.outputs.instance-ip }} \
+            --ssh-key ~/.ssh/deploy-key \
+            --git-token ${{ secrets.GIT_TOKEN }}
+
+      - name: Register in target group
+        run: |
+          aws elbv2 register-targets \
+            --target-group-arn ${{ steps.config.outputs.target-group-arn }} \
+            --targets Id=${{ steps.launch.outputs.instance-id }},Port=80
+          echo "Registered in target group"
+
+      - name: Deploy PR mentor image
+        run: |
+          IMAGE="${{ vars.ENABLE_TESTING == 'true' && vars.MENTOR_IMAGE || needs.build-app-image.outputs.image-uri || needs.check-prod-versions.outputs.mentor-uri }}"
+          echo "Deploying mentor image: $IMAGE"
+          ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} "
+            export PYENV_ROOT=/home/ubuntu/.pyenv
+            export PATH=\$PYENV_ROOT/bin:\$PATH
+            eval \"\$(pyenv init -)\"
+            eval \"\$(pyenv virtualenv-init -)\"
+            pyenv activate ibl-cli-ops
+            export IBL_ROOT=/ibl/
+
+            # Login to ECR
+            aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 765174860755.dkr.ecr.us-east-1.amazonaws.com
+
+            # Deploy mentor
+            cd /ibl/app/ibl-spa/mentor/
+            docker compose pull 2>/dev/null || true
+            echo 'DOCKER_IMAGE=$IMAGE' > .env.deploy
+            docker compose down 2>/dev/null
+            docker compose up -d
+            echo 'Mentor deployed'
+
+            # Restart auth
+            cd /ibl/app/ibl-spa/auth/
+            docker compose down 2>/dev/null
+            docker compose up -d
+            echo 'Auth restarted'
+          "
+
+      - name: Wait for mentor health
+        run: |
+          for i in $(seq 1 20); do
+            HTTP=$(ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} \
+              "curl -s -o /dev/null -w '%{http_code}' http://localhost:5001/" 2>/dev/null)
+            echo "Attempt $i: HTTP $HTTP"
+            if echo "$HTTP" | grep -qE '^(200|301|302)$'; then
+              echo "Mentor healthy!"
+              break
+            fi
+            if [ "$i" -eq 10 ]; then
+              echo "Restarting mentor..."
+              ssh -o StrictHostKeyChecking=no -i ~/.ssh/deploy-key ubuntu@${{ steps.launch.outputs.instance-ip }} \
+                "cd /ibl/app/ibl-spa/mentor/ && docker compose restart" 2>/dev/null
+            fi
+            sleep 15
+          done
+
+      - name: Revoke runner SSH access
+        if: always()
+        run: |
+          aws ec2 revoke-security-group-ingress \
+            --group-id ${{ steps.config.outputs.security-group-id }} \
+            --protocol tcp --port 22 \
+            --cidr ${{ steps.runner-ip.outputs.ip }}/32 2>&1 || true
 
   # ============================================================
   # PARALLEL BROWSER TESTS (all 4 after health check)
   # ============================================================
   test-chrome:
-    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image]
     if: >-
       always() &&
-      needs.verify.result == 'success' &&
+      needs.launch-and-deploy.result == 'success' &&
       needs.check-resumption.outputs.chrome-resumption-mode != 'all-passed'
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
@@ -379,10 +479,10 @@ jobs:
       workers: '3'
 
   test-firefox:
-    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image]
     if: >-
       always() &&
-      needs.verify.result == 'success' &&
+      needs.launch-and-deploy.result == 'success' &&
       needs.check-resumption.outputs.firefox-resumption-mode != 'all-passed'
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
@@ -399,10 +499,10 @@ jobs:
       workers: '3'
 
   test-safari:
-    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image]
     if: >-
       always() &&
-      needs.verify.result == 'success' &&
+      needs.launch-and-deploy.result == 'success' &&
       needs.check-resumption.outputs.safari-resumption-mode != 'all-passed'
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
@@ -419,10 +519,10 @@ jobs:
       workers: '3'
 
   test-edge:
-    needs: [gate, check-resumption, acquire-stg-slot, deploy-auth, build-playwright-image, verify]
+    needs: [gate, check-resumption, acquire-stg-slot, launch-and-deploy, build-playwright-image]
     if: >-
       always() &&
-      needs.verify.result == 'success' &&
+      needs.launch-and-deploy.result == 'success' &&
       needs.check-resumption.outputs.edge-resumption-mode != 'all-passed'
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
@@ -507,8 +607,7 @@ jobs:
         gate, acquire-stg-slot, pinned-versions, commitlint, lint, typecheck,
         coverage, e2e-coverage-check, claude-review-coverage,
         claude-review-quality, claude-review-uiux, check-prod-versions,
-        build-app-image, build-playwright-image, launch-infra,
-        deploy-app, deploy-auth, verify,
+        build-app-image, build-playwright-image, launch-and-deploy,
         test-chrome, test-firefox, test-safari, test-edge,
         save-status,
       ]
@@ -524,8 +623,8 @@ jobs:
   # TERMINATE INFRASTRUCTURE
   # ============================================================
   terminate-infra:
-    needs: [launch-infra, save-status, release-stg-slot]
-    if: always() && needs.launch-infra.outputs.instance-id != ''
+    needs: [launch-and-deploy, save-status, release-stg-slot]
+    if: always() && needs.launch-and-deploy.result != 'skipped'
     runs-on: ubuntu-latest
     env:
       AWS_ACCESS_KEY_ID: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }}
@@ -534,8 +633,14 @@ jobs:
     steps:
       - name: Terminate EC2 instance
         run: |
-          echo "Terminating instance ${{ needs.launch-infra.outputs.instance-id }}..."
-          aws ec2 terminate-instances --instance-ids ${{ needs.launch-infra.outputs.instance-id }}
+          INSTANCE_ID="${{ needs.launch-and-deploy.outputs.instance-id }}"
+          if [ -n "$INSTANCE_ID" ]; then
+            echo "Terminating instance $INSTANCE_ID..."
+            aws ec2 terminate-instances --instance-ids $INSTANCE_ID
+            echo "Instance terminated"
+          else
+            echo "No instance to terminate"
+          fi
           echo "Instance terminated"
 
   # ============================================================
@@ -547,8 +652,7 @@ jobs:
         gate, acquire-stg-slot, pinned-versions, commitlint, lint, typecheck,
         coverage, e2e-coverage-check, claude-review-coverage,
         claude-review-quality, claude-review-uiux, check-prod-versions,
-        build-app-image, build-playwright-image, launch-infra,
-        deploy-app, deploy-auth, verify,
+        build-app-image, build-playwright-image, launch-and-deploy,
         test-chrome, test-firefox, test-safari, test-edge,
         save-status, release-stg-slot, terminate-infra,
       ]
@@ -561,7 +665,7 @@ jobs:
           echo "" >> $GITHUB_STEP_SUMMARY
           echo "**PR:** #${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY
           echo "**Stg slot:** stg${{ needs.acquire-stg-slot.outputs.domain-number }}" >> $GITHUB_STEP_SUMMARY
-          echo "**Instance:** ${{ needs.launch-infra.outputs.instance-id }} (${{ needs.launch-infra.outputs.instance-ip }})" >> $GITHUB_STEP_SUMMARY
+          echo "**Instance:** ${{ needs.launch-and-deploy.outputs.instance-id }} (${{ needs.launch-and-deploy.outputs.instance-ip }})" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
 
           echo "## Code Quality" >> $GITHUB_STEP_SUMMARY
@@ -591,10 +695,7 @@ jobs:
           echo "| Step | Status |" >> $GITHUB_STEP_SUMMARY
           echo "|------|:------:|" >> $GITHUB_STEP_SUMMARY
           for PAIR in \
-            "Launch Infra:${{ needs.launch-infra.result }}" \
-            "Deploy App:${{ needs.deploy-app.result }}" \
-            "Deploy Auth:${{ needs.deploy-auth.result }}" \
-            "Health Check:${{ needs.verify.result }}" \
+            "Launch & Deploy:${{ needs.launch-and-deploy.result }}" \
             "Terminate:${{ needs.terminate-infra.result }}"; do
             CHECK="${PAIR%%:*}"
             RESULT="${PAIR#*:}"

From c46f771b2acd3cfc28b09561f2a94e22d5843d21 Mon Sep 17 00:00:00 2001
From: bnsoni <bhargavsoni8@gmail.com>
Date: Wed, 8 Apr 2026 14:31:45 +0300
Subject: [PATCH 3/4] feat: add workflow_dispatch trigger for manual testing

Allows running the workflow from the Actions tab with a stg-slot
input (1-4). Skips domain-lock and code quality checks for dispatch.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/spa-pr-validation-new.yml | 33 +++++++++++++++------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml
index 0ac1ad74..39af3ece 100644
--- a/.github/workflows/spa-pr-validation-new.yml
+++ b/.github/workflows/spa-pr-validation-new.yml
@@ -4,6 +4,12 @@ on:
   pull_request:
     branches: [main]
     types: [labeled]
+  workflow_dispatch:
+    inputs:
+      stg-slot:
+        description: 'Stg slot (1-4) — skips domain-lock'
+        required: false
+        default: '1'
 
 run-name: 'PR #${{ github.event.pull_request.number }} - Mentor Test (AMI)'
 
@@ -22,7 +28,9 @@ jobs:
   # GATE: Detect what changed, decide what to build/test
   # ============================================================
   gate:
-    if: github.event.label.name == 'run-tests' && !startsWith(github.head_ref, 'release/')
+    if: >-
+      github.event_name == 'workflow_dispatch' ||
+      (github.event.label.name == 'run-tests' && !startsWith(github.head_ref, 'release/'))
     runs-on: ubuntu-22.04
     environment: iblai.app
     outputs:
@@ -87,6 +95,7 @@ jobs:
   # ============================================================
   acquire-stg-slot:
     needs: [gate]
+    if: github.event_name != 'workflow_dispatch'
     uses: iblai/iblai-web-ops/.github/workflows/reusable-domain-lock.yml@main
     secrets: inherit
     with:
@@ -286,14 +295,15 @@ jobs:
     needs: [gate, acquire-stg-slot, lint, build-app-image, check-prod-versions]
     if: >-
       always() &&
-      needs.acquire-stg-slot.result == 'success' &&
-      needs.lint.result == 'success' &&
+      (needs.acquire-stg-slot.result == 'success' || github.event_name == 'workflow_dispatch') &&
+      (needs.lint.result == 'success' || github.event_name == 'workflow_dispatch') &&
       needs.build-app-image.result != 'failure'
     runs-on: ubuntu-latest
     timeout-minutes: 45
     outputs:
       instance-id: ${{ steps.launch.outputs.instance-id }}
       instance-ip: ${{ steps.launch.outputs.instance-ip }}
+      stg-slot: ${{ steps.config.outputs.stg-slot }}
     env:
       AWS_ACCESS_KEY_ID: ${{ secrets.SERVICE_UPDATE_ACCESS_KEY }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.SERVICE_UPDATE_SECRET_KEY }}
@@ -302,7 +312,11 @@ jobs:
       - name: Resolve stg config
         id: config
         run: |
-          SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}"
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            SLOT="${{ inputs.stg-slot }}"
+          else
+            SLOT="${{ needs.acquire-stg-slot.outputs.domain-number }}"
+          fi
           echo "Acquired stg slot: $SLOT"
 
           declare -A AMIS SUBNETS SGS TGS KEYS
@@ -317,6 +331,7 @@ jobs:
           echo "security-group-id=${SGS[$SLOT]}" >> $GITHUB_OUTPUT
           echo "target-group-arn=${TGS[$SLOT]}" >> $GITHUB_OUTPUT
           echo "key-pair-name=${KEYS[$SLOT]}" >> $GITHUB_OUTPUT
+          echo "stg-slot=$SLOT" >> $GITHUB_OUTPUT
 
       - name: Write SSH key
         run: |
@@ -467,7 +482,7 @@ jobs:
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
     with:
-      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }}
       app-type: mentor
       playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
       pr-number: ${{ github.event.pull_request.number }}
@@ -487,7 +502,7 @@ jobs:
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
     with:
-      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }}
       app-type: mentor
       playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
       pr-number: ${{ github.event.pull_request.number }}
@@ -507,7 +522,7 @@ jobs:
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
     with:
-      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }}
       app-type: mentor
       playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
       pr-number: ${{ github.event.pull_request.number }}
@@ -527,7 +542,7 @@ jobs:
     uses: iblai/iblai-web-ops/.github/workflows/reusable-oci-test-runner.yml@main
     secrets: inherit
     with:
-      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }}
       app-type: mentor
       playwright-image: ${{ needs.build-playwright-image.outputs.image-uri }}
       pr-number: ${{ github.event.pull_request.number }}
@@ -617,7 +632,7 @@ jobs:
     with:
       action: release
       app-type: testing
-      domain-number: ${{ needs.acquire-stg-slot.outputs.domain-number }}
+      domain-number: ${{ needs.launch-and-deploy.outputs.stg-slot }}
 
   # ============================================================
   # TERMINATE INFRASTRUCTURE

From ea9cb4f30d7d0bdba3abe11bd0d89a828ece9c82 Mon Sep 17 00:00:00 2001
From: bnsoni <bhargavsoni8@gmail.com>
Date: Wed, 8 Apr 2026 16:33:34 +0300
Subject: [PATCH 4/4] fix: deregister old ALB targets before registering new
 instance

Prevents split-brain routing where ALB sends some requests to old
instance with stale OAuth creds. Now deregisters all existing targets
from the target group before registering the new instance.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/workflows/spa-pr-validation-new.yml | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/spa-pr-validation-new.yml b/.github/workflows/spa-pr-validation-new.yml
index 39af3ece..fe8c5acc 100644
--- a/.github/workflows/spa-pr-validation-new.yml
+++ b/.github/workflows/spa-pr-validation-new.yml
@@ -407,12 +407,22 @@ jobs:
             --ssh-key ~/.ssh/deploy-key \
             --git-token ${{ secrets.GIT_TOKEN }}
 
-      - name: Register in target group
+      - name: Deregister old targets and register new instance
         run: |
-          aws elbv2 register-targets \
-            --target-group-arn ${{ steps.config.outputs.target-group-arn }} \
-            --targets Id=${{ steps.launch.outputs.instance-id }},Port=80
-          echo "Registered in target group"
+          TG_ARN="${{ steps.config.outputs.target-group-arn }}"
+          NEW_ID="${{ steps.launch.outputs.instance-id }}"
+
+          # Deregister any existing targets to prevent split-brain routing
+          OLD_TARGETS=$(aws elbv2 describe-target-health --target-group-arn "$TG_ARN" \
+            --query "TargetHealthDescriptions[?Target.Id!='$NEW_ID'].Target.Id" --output text 2>/dev/null)
+          for OLD_ID in $OLD_TARGETS; do
+            echo "Deregistering old target: $OLD_ID"
+            aws elbv2 deregister-targets --target-group-arn "$TG_ARN" --targets Id=$OLD_ID 2>/dev/null || true
+          done
+
+          # Register new instance
+          aws elbv2 register-targets --target-group-arn "$TG_ARN" --targets Id=$NEW_ID,Port=80
+          echo "Registered $NEW_ID in target group"
 
       - name: Deploy PR mentor image
         run: |