From 2a63432c878852543d8bbc62863dc9b5daf5c76c Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Thu, 30 Apr 2026 22:34:09 +0000 Subject: [PATCH 1/7] CI: auto-trigger AITER prebuilt upload on 3rdparty/aiter updates --- .github/scripts/aiter_prebuild_upload.sh | 108 +++++++++++++++++++- .github/workflows/aiter-prebuilt-upload.yml | 36 +++++-- .github/workflows/rocm-ci-dispatch.yml | 48 ++++++++- .github/workflows/rocm-ci.yml | 16 +++ .github/workflows/rocm-wheels-build.yml | 17 ++- 5 files changed, 213 insertions(+), 12 deletions(-) diff --git a/.github/scripts/aiter_prebuild_upload.sh b/.github/scripts/aiter_prebuild_upload.sh index 473ef1c75..62e66ca76 100755 --- a/.github/scripts/aiter_prebuild_upload.sh +++ b/.github/scripts/aiter_prebuild_upload.sh @@ -7,8 +7,112 @@ set -euo pipefail # Inputs for upload (optional): # NVTE_AITER_PREBUILT_BASE_URL - base URL for prebuilts # NVTE_AITER_PREBUILT_UPLOAD_TOKEN - bearer token for Artifactory -# Optional flag: -# --build : build aiter libs before packaging/uploading; default is package-only. +# Optional flags: +# --preflight --upload +# Validate upload path: Artifactory ping, then HEAD on the probe URL with the bearer token. +# Use in CI before uploading prebuilts. +# --preflight --download +# Validate download path: same ping, then HEAD on the probe URL without credentials. +# Matches what CMake file(DOWNLOAD) sees when fetching prebuilts (no token). +# --build : build AITER libs before packaging/uploading; default is package-only. + +_aiter_set_artifactory_check_urls() { + if [[ -z "${NVTE_AITER_PREBUILT_BASE_URL:-}" ]]; then + echo "Missing vars.NVTE_AITER_PREBUILT_BASE_URL" >&2 + exit 1 + fi + local BASE="${NVTE_AITER_PREBUILT_BASE_URL%/}" + local ROOT_PREFIX="${BASE%%/artifactory/*}" + _AITER_ARTIFACTORY_SYSTEM_PING_URL="${ROOT_PREFIX}/artifactory/api/system/ping" + _AITER_PREBUILT_BASE_ACCESS_PROBE_URL="${BASE}/__aiter_repo_access_probe_not_a_real_artifact" +} + +_aiter_curl_artifactory_system_ping() { + echo "[AITER-PREBUILT] Preflight: GET ${_AITER_ARTIFACTORY_SYSTEM_PING_URL} ..." + curl -fsS --connect-timeout 25 --max-time 60 "${_AITER_ARTIFACTORY_SYSTEM_PING_URL}" >/dev/null +} + +_aiter_check_artifactory_upload() { + _aiter_set_artifactory_check_urls + if [[ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN:-}" ]]; then + echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2 + exit 1 + fi + _aiter_curl_artifactory_system_ping + echo "[AITER-PREBUILT] Preflight (upload): HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL} (authenticated) ..." + local code + code="$(curl -sS -o /dev/null -w "%{http_code}" --connect-timeout 25 --max-time 90 \ + -H "Authorization: Bearer ${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" \ + -I "${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}" || true)" + case "${code}" in + 404|200) + echo "[AITER-PREBUILT] Preflight OK (upload; HTTP ${code})." + ;; + 401) + echo "Preflight: HTTP 401 - invalid or expired token." >&2 + exit 1 + ;; + 403) + echo "Preflight: HTTP 403 - token cannot access this repository path." >&2 + exit 1 + ;; + 000|'') + echo "Preflight: no HTTP response for HEAD probe." >&2 + exit 1 + ;; + *) + echo "Preflight: unexpected HTTP ${code} for HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}." >&2 + exit 1 + ;; + esac +} + +_aiter_check_artifactory_download() { + _aiter_set_artifactory_check_urls + _aiter_curl_artifactory_system_ping + echo "[AITER-PREBUILT] Preflight (download): HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL} (anonymous; CMake file(DOWNLOAD)) ..." + local code + code="$(curl -sS -o /dev/null -w "%{http_code}" --connect-timeout 25 --max-time 90 \ + -I "${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}" || true)" + case "${code}" in + 404|200) + echo "[AITER-PREBUILT] Preflight OK (download; HTTP ${code})." + ;; + 401) + echo "Preflight: HTTP 401 - anonymous read denied (CMake download may fail)." >&2 + exit 1 + ;; + 403) + echo "Preflight: HTTP 403 - anonymous access forbidden for this path." >&2 + exit 1 + ;; + 000|'') + echo "Preflight: no HTTP response for HEAD probe." >&2 + exit 1 + ;; + *) + echo "Preflight: unexpected HTTP ${code} for HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}." >&2 + exit 1 + ;; + esac +} + +if [[ "${1:-}" == "--preflight" ]]; then + shift + case "${1:-}" in + --upload) + _aiter_check_artifactory_upload + ;; + --download) + _aiter_check_artifactory_download + ;; + *) + echo "Usage: $(basename "$0") --preflight --upload | --preflight --download" >&2 + exit 1 + ;; + esac + exit 0 +fi # Derive ROCm version and aiter commit -> cache key ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" diff --git a/.github/workflows/aiter-prebuilt-upload.yml b/.github/workflows/aiter-prebuilt-upload.yml index a45350a79..c7b5d4b6d 100644 --- a/.github/workflows/aiter-prebuilt-upload.yml +++ b/.github/workflows/aiter-prebuilt-upload.yml @@ -10,10 +10,19 @@ on: description: "Docker image" required: false default: "" + workflow_call: + inputs: + docker_image: + description: "Docker image URI from rocm-ci select_image.outputs.image-tag" + required: true + type: string jobs: upload: runs-on: build-only-te + env: + NVTE_AITER_PREBUILT_BASE_URL: ${{ vars.NVTE_AITER_PREBUILT_BASE_URL }} + NVTE_AITER_PREBUILT_UPLOAD_TOKEN: ${{ secrets.AITER_ARTIFACTORY_TOKEN }} steps: - name: Checkout source uses: actions/checkout@v6 @@ -22,9 +31,27 @@ jobs: submodules: recursive fetch-depth: 0 + # Fails early if the prebuilt artifact URL is unreachable or upload credentials are invalid + - name: Validate prebuilt upload to Artifactory + run: | + set -euo pipefail + bash .github/scripts/aiter_prebuild_upload.sh --preflight --upload + - name: Resolve docker image id: cfg run: | + set -euo pipefail + EVENT="${{ github.event_name }}" + if [ "$EVENT" = "workflow_call" ]; then + IMAGE="${{ inputs.docker_image }}" + if [ -z "$IMAGE" ]; then + echo "workflow_call requires non-empty docker_image." >&2 + exit 1 + fi + echo "Using docker_image from caller." + echo "image=${IMAGE}" >> "$GITHUB_OUTPUT" + exit 0 + fi IMAGE="${{ inputs.docker_image }}" if [ -z "$IMAGE" ]; then IMAGE="${{ vars.DEV_DOCKER_IMAGE }}" @@ -33,7 +60,7 @@ jobs: echo "No docker image provided and vars.DEV_DOCKER_IMAGE is empty." >&2 exit 1 fi - echo "image=${IMAGE}" >> $GITHUB_OUTPUT + echo "image=${IMAGE}" >> "$GITHUB_OUTPUT" - name: Pull docker image run: docker pull ${{ steps.cfg.outputs.image }} @@ -50,19 +77,12 @@ jobs: ${{ steps.cfg.outputs.image }} - name: Build and upload aiter prebuilt - env: - NVTE_AITER_PREBUILT_BASE_URL: https://compute-artifactory.amd.com:5000/artifactory/rocm-generic-local/te-ci/aiter-prebuilts - NVTE_AITER_PREBUILT_UPLOAD_TOKEN: ${{ secrets.AITER_ARTIFACTORY_TOKEN }} run: | docker exec \ -e NVTE_AITER_PREBUILT_BASE_URL=${NVTE_AITER_PREBUILT_BASE_URL} \ -e NVTE_AITER_PREBUILT_UPLOAD_TOKEN=${NVTE_AITER_PREBUILT_UPLOAD_TOKEN} \ te-aiter-upload bash -c "$(cat <<'EOF' set -ex - if [ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" ]; then - echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2 - exit 1 - fi export HIP_PATH="" git config --global --add safe.directory '*' bash .github/scripts/aiter_prebuild_upload.sh --build diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml index e679ece46..2dac3d216 100644 --- a/.github/workflows/rocm-ci-dispatch.yml +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -12,6 +12,51 @@ permissions: contents: read jobs: + # To determine whether to upload AITER prebuilt to Artifactory + aiter_upload_trigger: + name: PR - set trigger_aiter_upload + runs-on: ubuntu-latest + outputs: + trigger_aiter_upload: ${{ steps.set.outputs.trigger_aiter_upload }} + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: dorny/paths-filter@v3 + id: paths + if: github.event.action == 'synchronize' + with: + filters: | + aiter: + - '3rdparty/aiter/**' + + - name: Detect skip_aiter_upload label + id: skip_label + uses: actions/github-script@v8 + with: + script: | + const labels = context.payload.pull_request?.labels || []; + const skip = labels.some((l) => l.name === 'skip_aiter_upload'); + core.setOutput('skip', skip ? 'true' : 'false'); + + - id: set + run: | + set -euo pipefail + if [ '${{ github.event.action }}' != "synchronize" ]; then + echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + if [ '${{ steps.skip_label.outputs.skip }}' = 'true' ]; then + echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + if [ "${{ steps.paths.outputs.aiter }}" = "true" ]; then + echo "trigger_aiter_upload=true" >> "$GITHUB_OUTPUT" + else + echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" + fi + determine_level: runs-on: ubuntu-latest outputs: @@ -53,9 +98,10 @@ jobs: # - A commit was pushed with existing ci-level label(s) # - The PR was reopened or opened with existing ci-level label(s) if: ${{ needs.determine_level.outputs.test_level != '' }} - needs: determine_level + needs: [determine_level, aiter_upload_trigger] name: CI Level ${{ needs.determine_level.outputs.test_level }} uses: ./.github/workflows/rocm-ci.yml secrets: inherit with: test_level: ${{ needs.determine_level.outputs.test_level }} + trigger_aiter_upload: ${{ needs.aiter_upload_trigger.outputs.trigger_aiter_upload == 'true' }} diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index 5e0ae242c..51414a461 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -26,6 +26,11 @@ on: required: false default: false type: boolean + trigger_aiter_upload: + description: 'True when 3rdparty/aiter changed on the PR (set by rocm-ci-dispatch)' + required: false + default: false + type: boolean workflow_dispatch: inputs: test_level: @@ -99,8 +104,19 @@ jobs: echo "Selected image: $IMAGE_TO_USE" echo "image-tag=$IMAGE_TO_USE" >> $GITHUB_OUTPUT + upload_aiter_prebuilt: + name: Build and upload AITER prebuilt + needs: select_image + if: ${{ (github.event_name == 'workflow_call' && inputs.trigger_aiter_upload == 'true') }} + uses: ./.github/workflows/aiter-prebuilt-upload.yml + with: + docker_image: ${{ needs.select_image.outputs.image-tag }} + secrets: inherit + build: # Delegate wheel building to the reusable workflow on dev. It produces a core .whl plus framework .tar.gz sdists under artifact name `te-rocm-wheels`. + needs: [select_image, upload_aiter_prebuilt] + if: always() && needs.select_image.result == 'success' && (needs.upload_aiter_prebuilt.result == 'skipped' || needs.upload_aiter_prebuilt.result == 'success') uses: ./.github/workflows/rocm-wheels-build.yml secrets: inherit diff --git a/.github/workflows/rocm-wheels-build.yml b/.github/workflows/rocm-wheels-build.yml index c1a8ea087..ba93ecbbf 100644 --- a/.github/workflows/rocm-wheels-build.yml +++ b/.github/workflows/rocm-wheels-build.yml @@ -76,6 +76,8 @@ jobs: build-rocm-wheels: name: Build ROCm Docker image and TransformerEngine wheels runs-on: build-only-te + env: + NVTE_AITER_PREBUILT_BASE_URL: ${{ vars.NVTE_AITER_PREBUILT_BASE_URL }} steps: - name: Checkout repository @@ -89,6 +91,19 @@ jobs: 3rdparty/QoLA \ 3rdparty/hipify_torch + # Verify this runner can reach Artifactory for anonymous prebuilt downloads (informational; does not fail CI). + - name: Artifactory reachability from build runner + if: ${{ inputs.use_prebuilt_aiter }} + continue-on-error: true + run: | + set -euo pipefail + if bash .github/scripts/aiter_prebuild_upload.sh --preflight --download; then + echo "::notice::build-only-te can reach prebuilt Artifactory for downloads." + exit 0 + fi + echo "::warning::Prebuilt Artifactory check failed on build-only-te - see script output above." + exit 1 + - name: Derive Docker image tag id: set-tag run: | @@ -187,7 +202,7 @@ jobs: # The container writes all wheels and logs under /wheelhouse. - name: Build TransformerEngine wheels run: | - NVTE_AITER_PREBUILT_BASE_URL="https://compute-artifactory.amd.com:5000/artifactory/rocm-generic-local/te-ci/aiter-prebuilts" + set -euo pipefail docker run --rm \ --env LOCAL_TREE_BUILD=1 \ --env NVTE_SKIP_SUBMODULE_CHECKS_DURING_BUILD=1 \ From 8cb05ea095769be93d3af29737ed0c99f0a7ae3c Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Thu, 30 Apr 2026 23:05:43 +0000 Subject: [PATCH 2/7] Updated step names --- .github/workflows/rocm-ci-dispatch.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml index 2dac3d216..4699edd80 100644 --- a/.github/workflows/rocm-ci-dispatch.yml +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -13,16 +13,11 @@ permissions: jobs: # To determine whether to upload AITER prebuilt to Artifactory - aiter_upload_trigger: - name: PR - set trigger_aiter_upload + aiter_prebuilt_upload_trigger: runs-on: ubuntu-latest outputs: trigger_aiter_upload: ${{ steps.set.outputs.trigger_aiter_upload }} steps: - - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - uses: dorny/paths-filter@v3 id: paths if: github.event.action == 'synchronize' @@ -40,7 +35,8 @@ jobs: const skip = labels.some((l) => l.name === 'skip_aiter_upload'); core.setOutput('skip', skip ? 'true' : 'false'); - - id: set + - name: Set aiter upload trigger from paths and labels + id: set run: | set -euo pipefail if [ '${{ github.event.action }}' != "synchronize" ]; then @@ -98,10 +94,10 @@ jobs: # - A commit was pushed with existing ci-level label(s) # - The PR was reopened or opened with existing ci-level label(s) if: ${{ needs.determine_level.outputs.test_level != '' }} - needs: [determine_level, aiter_upload_trigger] + needs: [determine_level, aiter_prebuilt_upload_trigger] name: CI Level ${{ needs.determine_level.outputs.test_level }} uses: ./.github/workflows/rocm-ci.yml secrets: inherit with: test_level: ${{ needs.determine_level.outputs.test_level }} - trigger_aiter_upload: ${{ needs.aiter_upload_trigger.outputs.trigger_aiter_upload == 'true' }} + trigger_aiter_upload: ${{ needs.aiter_prebuilt_upload_trigger.outputs.trigger_aiter_upload == 'true' }} From 52be5b1f95d1604acbaef56fb1ba8c60e4c9d20e Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Thu, 30 Apr 2026 23:22:43 +0000 Subject: [PATCH 3/7] Added logs to rocm-ci-dispatch.yml --- .github/workflows/rocm-ci-dispatch.yml | 27 +++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml index 4699edd80..2eefb045a 100644 --- a/.github/workflows/rocm-ci-dispatch.yml +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -10,6 +10,7 @@ on: permissions: contents: read + pull-requests: read jobs: # To determine whether to upload AITER prebuilt to Artifactory @@ -18,7 +19,8 @@ jobs: outputs: trigger_aiter_upload: ${{ steps.set.outputs.trigger_aiter_upload }} steps: - - uses: dorny/paths-filter@v3 + - name: Detect PR changes under 3rdparty/aiter + uses: dorny/paths-filter@v3 id: paths if: github.event.action == 'synchronize' with: @@ -33,24 +35,39 @@ jobs: script: | const labels = context.payload.pull_request?.labels || []; const skip = labels.some((l) => l.name === 'skip_aiter_upload'); + core.info(`skip_aiter_upload label : ${skip}`); core.setOutput('skip', skip ? 'true' : 'false'); - - name: Set aiter upload trigger from paths and labels + - name: Set trigger_aiter_upload from paths and labels id: set run: | set -euo pipefail - if [ '${{ github.event.action }}' != "synchronize" ]; then + ACTION='${{ github.event.action }}' + echo "PR action=${ACTION}" + + if [ "$ACTION" != "synchronize" ]; then echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" + echo "Not synchronize - trigger_aiter_upload = false" exit 0 fi - if [ '${{ steps.skip_label.outputs.skip }}' = 'true' ]; then + + SKIP='${{ steps.skip_label.outputs.skip }}' + echo "skip_aiter_upload label : ${SKIP}" + + if [ "$SKIP" = 'true' ]; then echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" + echo "skip_aiter_upload label set - trigger_aiter_upload = false" exit 0 fi - if [ "${{ steps.paths.outputs.aiter }}" = "true" ]; then + + AITER_PATHS='${{ steps.paths.outputs.aiter }}' + + if [ "$AITER_PATHS" = "true" ]; then echo "trigger_aiter_upload=true" >> "$GITHUB_OUTPUT" + echo "3rdparty/aiter changed on PR - trigger_aiter_upload = true" else echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" + echo "No 3rdparty/aiter changes on PR - trigger_aiter_upload = false" fi determine_level: From 7be4b09e346bd4c6241290627040d4ff0e641586 Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Fri, 1 May 2026 15:34:46 +0000 Subject: [PATCH 4/7] Updated preflight checks --- .github/scripts/aiter_prebuild_upload.sh | 60 ++++++--------------- .github/workflows/aiter-prebuilt-upload.yml | 11 ++-- .github/workflows/rocm-ci-dispatch.yml | 2 +- .github/workflows/rocm-wheels-build.yml | 8 +-- 4 files changed, 30 insertions(+), 51 deletions(-) diff --git a/.github/scripts/aiter_prebuild_upload.sh b/.github/scripts/aiter_prebuild_upload.sh index 62e66ca76..4f039264e 100755 --- a/.github/scripts/aiter_prebuild_upload.sh +++ b/.github/scripts/aiter_prebuild_upload.sh @@ -32,6 +32,20 @@ _aiter_curl_artifactory_system_ping() { curl -fsS --connect-timeout 25 --max-time 60 "${_AITER_ARTIFACTORY_SYSTEM_PING_URL}" >/dev/null } +_aiter_preflight_head_ok() { + local mode=$1 + local code=$2 + case "${code}" in + 404|200) + echo "[AITER-PREBUILT] Preflight ${mode}: HTTP ${code} (success)" + ;; + *) + echo "[AITER-PREBUILT] Preflight ${mode}: HTTP ${code} (failed)" >&2 + exit 1 + ;; + esac +} + _aiter_check_artifactory_upload() { _aiter_set_artifactory_check_urls if [[ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN:-}" ]]; then @@ -44,57 +58,17 @@ _aiter_check_artifactory_upload() { code="$(curl -sS -o /dev/null -w "%{http_code}" --connect-timeout 25 --max-time 90 \ -H "Authorization: Bearer ${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" \ -I "${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}" || true)" - case "${code}" in - 404|200) - echo "[AITER-PREBUILT] Preflight OK (upload; HTTP ${code})." - ;; - 401) - echo "Preflight: HTTP 401 - invalid or expired token." >&2 - exit 1 - ;; - 403) - echo "Preflight: HTTP 403 - token cannot access this repository path." >&2 - exit 1 - ;; - 000|'') - echo "Preflight: no HTTP response for HEAD probe." >&2 - exit 1 - ;; - *) - echo "Preflight: unexpected HTTP ${code} for HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}." >&2 - exit 1 - ;; - esac + _aiter_preflight_head_ok upload "${code}" } _aiter_check_artifactory_download() { _aiter_set_artifactory_check_urls _aiter_curl_artifactory_system_ping - echo "[AITER-PREBUILT] Preflight (download): HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL} (anonymous; CMake file(DOWNLOAD)) ..." + echo "[AITER-PREBUILT] Preflight (download): HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL} (anonymous) ..." local code code="$(curl -sS -o /dev/null -w "%{http_code}" --connect-timeout 25 --max-time 90 \ -I "${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}" || true)" - case "${code}" in - 404|200) - echo "[AITER-PREBUILT] Preflight OK (download; HTTP ${code})." - ;; - 401) - echo "Preflight: HTTP 401 - anonymous read denied (CMake download may fail)." >&2 - exit 1 - ;; - 403) - echo "Preflight: HTTP 403 - anonymous access forbidden for this path." >&2 - exit 1 - ;; - 000|'') - echo "Preflight: no HTTP response for HEAD probe." >&2 - exit 1 - ;; - *) - echo "Preflight: unexpected HTTP ${code} for HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}." >&2 - exit 1 - ;; - esac + _aiter_preflight_head_ok download "${code}" } if [[ "${1:-}" == "--preflight" ]]; then diff --git a/.github/workflows/aiter-prebuilt-upload.yml b/.github/workflows/aiter-prebuilt-upload.yml index c7b5d4b6d..7ee13a919 100644 --- a/.github/workflows/aiter-prebuilt-upload.yml +++ b/.github/workflows/aiter-prebuilt-upload.yml @@ -31,11 +31,16 @@ jobs: submodules: recursive fetch-depth: 0 - # Fails early if the prebuilt artifact URL is unreachable or upload credentials are invalid - - name: Validate prebuilt upload to Artifactory + # Verify this runner can reach Artifactory for uploads + - name: "Preflight: Artifactory upload reachability" run: | set -euo pipefail - bash .github/scripts/aiter_prebuild_upload.sh --preflight --upload + if bash .github/scripts/aiter_prebuild_upload.sh --preflight --upload; then + echo "::notice::Preflight upload reachability succeeded" + exit 0 + fi + echo "::error::Preflight upload reachability failed" + exit 1 - name: Resolve docker image id: cfg diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml index 2eefb045a..7b1718608 100644 --- a/.github/workflows/rocm-ci-dispatch.yml +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -20,7 +20,7 @@ jobs: trigger_aiter_upload: ${{ steps.set.outputs.trigger_aiter_upload }} steps: - name: Detect PR changes under 3rdparty/aiter - uses: dorny/paths-filter@v3 + uses: dorny/paths-filter@v4 id: paths if: github.event.action == 'synchronize' with: diff --git a/.github/workflows/rocm-wheels-build.yml b/.github/workflows/rocm-wheels-build.yml index ba93ecbbf..afaf5024f 100644 --- a/.github/workflows/rocm-wheels-build.yml +++ b/.github/workflows/rocm-wheels-build.yml @@ -91,17 +91,17 @@ jobs: 3rdparty/QoLA \ 3rdparty/hipify_torch - # Verify this runner can reach Artifactory for anonymous prebuilt downloads (informational; does not fail CI). - - name: Artifactory reachability from build runner + # Verify this runner can reach Artifactory for anonymous prebuilt downloads + - name: "Preflight: Artifactory download reachability" if: ${{ inputs.use_prebuilt_aiter }} continue-on-error: true run: | set -euo pipefail if bash .github/scripts/aiter_prebuild_upload.sh --preflight --download; then - echo "::notice::build-only-te can reach prebuilt Artifactory for downloads." + echo "::notice::Preflight download reachability succeeded" exit 0 fi - echo "::warning::Prebuilt Artifactory check failed on build-only-te - see script output above." + echo "::warning::Preflight download reachability failed" exit 1 - name: Derive Docker image tag From c08e310cef48a12fbeb790015e8dde18739f27a7 Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Sat, 2 May 2026 01:27:02 +0000 Subject: [PATCH 5/7] Updated event name --- .github/workflows/rocm-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index 51414a461..55e8a8be6 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -107,7 +107,7 @@ jobs: upload_aiter_prebuilt: name: Build and upload AITER prebuilt needs: select_image - if: ${{ (github.event_name == 'workflow_call' && inputs.trigger_aiter_upload == 'true') }} + if: ${{ (github.event_name == 'pull_request' && inputs.trigger_aiter_upload == 'true') }} uses: ./.github/workflows/aiter-prebuilt-upload.yml with: docker_image: ${{ needs.select_image.outputs.image-tag }} From c63d1fa800eeef56d3da23e273af987df9c8a10d Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Thu, 7 May 2026 18:14:20 +0000 Subject: [PATCH 6/7] CI: AITER prebuilt auto-upload, shared Docker image selection Wire PR Automatic CI to optionally build and upload AITER prebuilts to Artifactory when a synchronize touches 3rdparty/aiter and the upload-success cache misses. Reuse select-docker-image for consistent image resolution across dispatch, rocm-ci, and the upload workflow. - rocm-ci-dispatch: gate on aiter paths, compute aiter_upload_cache_key from Docker image slug and submodule SHA, restore cache before deciding trigger_aiter_upload, pass docker_image_override and flags into rocm-ci. - rocm-ci: call aiter-prebuilt-upload on pull_request when trigger_aiter_upload; pass image tag and cache key; keep build gated on select_image and upload success or skip. - aiter-prebuilt-upload: always run select_docker_image; pull/run using needs.select_docker_image.outputs.image_tag; optional cache save after success. - Add select-docker-image reusable workflow for ci/ci_config.json resolution. --- .github/workflows/aiter-prebuilt-upload.yml | 67 ++++++++------- .github/workflows/rocm-ci-dispatch.yml | 93 +++++++++++---------- .github/workflows/rocm-ci.yml | 69 ++++----------- .github/workflows/select-docker-image.yml | 74 ++++++++++++++++ 4 files changed, 175 insertions(+), 128 deletions(-) create mode 100644 .github/workflows/select-docker-image.yml diff --git a/.github/workflows/aiter-prebuilt-upload.yml b/.github/workflows/aiter-prebuilt-upload.yml index 7ee13a919..70447e8eb 100644 --- a/.github/workflows/aiter-prebuilt-upload.yml +++ b/.github/workflows/aiter-prebuilt-upload.yml @@ -6,19 +6,36 @@ name: AITER Prebuilt Upload on: workflow_dispatch: inputs: - docker_image: - description: "Docker image" + docker_image_override: + description: "Manual Docker Image (Leave empty to use config file value)" required: false - default: "" + type: string workflow_call: inputs: - docker_image: - description: "Docker image URI from rocm-ci select_image.outputs.image-tag" + docker_image_override: + description: "Manual Docker Image (Leave empty to use config file value)" required: true type: string + aiter_upload_cache_key: + description: "If non-empty, save Actions cache after success (same key as rocm-ci-dispatch restore)." + required: false + default: '' + type: string + +permissions: + actions: write + contents: read jobs: + # Same resolver as rocm-ci / dispatch; override comes from inputs for both workflow_dispatch and workflow_call. + select_docker_image: + uses: ./.github/workflows/select-docker-image.yml + with: + docker_image_override: ${{ inputs.docker_image_override }} + test_config_from_source: true + upload: + needs: [select_docker_image] runs-on: build-only-te env: NVTE_AITER_PREBUILT_BASE_URL: ${{ vars.NVTE_AITER_PREBUILT_BASE_URL }} @@ -42,33 +59,8 @@ jobs: echo "::error::Preflight upload reachability failed" exit 1 - - name: Resolve docker image - id: cfg - run: | - set -euo pipefail - EVENT="${{ github.event_name }}" - if [ "$EVENT" = "workflow_call" ]; then - IMAGE="${{ inputs.docker_image }}" - if [ -z "$IMAGE" ]; then - echo "workflow_call requires non-empty docker_image." >&2 - exit 1 - fi - echo "Using docker_image from caller." - echo "image=${IMAGE}" >> "$GITHUB_OUTPUT" - exit 0 - fi - IMAGE="${{ inputs.docker_image }}" - if [ -z "$IMAGE" ]; then - IMAGE="${{ vars.DEV_DOCKER_IMAGE }}" - fi - if [ -z "$IMAGE" ]; then - echo "No docker image provided and vars.DEV_DOCKER_IMAGE is empty." >&2 - exit 1 - fi - echo "image=${IMAGE}" >> "$GITHUB_OUTPUT" - - name: Pull docker image - run: docker pull ${{ steps.cfg.outputs.image }} + run: docker pull ${{ needs.select_docker_image.outputs.image_tag }} - name: Run container run: | @@ -79,7 +71,7 @@ jobs: --pid=host \ -v "${{ github.workspace }}:/workspace" \ -w /workspace \ - ${{ steps.cfg.outputs.image }} + ${{ needs.select_docker_image.outputs.image_tag }} - name: Build and upload aiter prebuilt run: | @@ -94,6 +86,17 @@ jobs: EOF )" + - name: Record successful AITER prebuilt upload (cache marker) + if: success() && inputs.aiter_upload_cache_key != '' + run: echo ok > .aiter-upload-success + + - name: Save AITER upload success cache + if: success() && inputs.aiter_upload_cache_key != '' + uses: actions/cache/save@v4 + with: + path: .aiter-upload-success + key: ${{ inputs.aiter_upload_cache_key }} + - name: Cleanup container if: always() run: docker rm -f te-aiter-upload || true diff --git a/.github/workflows/rocm-ci-dispatch.yml b/.github/workflows/rocm-ci-dispatch.yml index 7b1718608..422ff2078 100644 --- a/.github/workflows/rocm-ci-dispatch.yml +++ b/.github/workflows/rocm-ci-dispatch.yml @@ -11,13 +11,21 @@ on: permissions: contents: read pull-requests: read + actions: write jobs: - # To determine whether to upload AITER prebuilt to Artifactory - aiter_prebuilt_upload_trigger: + # Resolve Docker image tag + select_ci_image: + uses: ./.github/workflows/select-docker-image.yml + with: + docker_image_override: '' + test_config_from_source: true + + # Whether the PR touches 3rdparty/aiter + aiter_gate: runs-on: ubuntu-latest outputs: - trigger_aiter_upload: ${{ steps.set.outputs.trigger_aiter_upload }} + aiter_paths: ${{ steps.paths.outputs.aiter }} steps: - name: Detect PR changes under 3rdparty/aiter uses: dorny/paths-filter@v4 @@ -28,47 +36,43 @@ jobs: aiter: - '3rdparty/aiter/**' - - name: Detect skip_aiter_upload label - id: skip_label - uses: actions/github-script@v8 + # Whether to upload AITER prebuilt to Artifactory + aiter_prebuilt_upload_trigger: + needs: [aiter_gate, select_ci_image] + runs-on: ubuntu-latest + outputs: + # true only on synchronize + aiter paths + cache miss (default false via expression) + trigger_aiter_upload: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' && steps.aiter_upload_cache.outputs.cache-hit != 'true' }} + aiter_upload_cache_key: ${{ steps.aiter_key.outputs.cache_key }} + steps: + - name: Checkout PR head + if: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' }} + uses: actions/checkout@v6 with: - script: | - const labels = context.payload.pull_request?.labels || []; - const skip = labels.some((l) => l.name === 'skip_aiter_upload'); - core.info(`skip_aiter_upload label : ${skip}`); - core.setOutput('skip', skip ? 'true' : 'false'); - - - name: Set trigger_aiter_upload from paths and labels - id: set + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 1 + + - name: Compute AITER upload cache key + id: aiter_key + if: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' }} + env: + IMAGE_TO_USE: ${{ needs.select_ci_image.outputs.image_tag }} run: | set -euo pipefail - ACTION='${{ github.event.action }}' - echo "PR action=${ACTION}" - - if [ "$ACTION" != "synchronize" ]; then - echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" - echo "Not synchronize - trigger_aiter_upload = false" - exit 0 - fi - - SKIP='${{ steps.skip_label.outputs.skip }}' - echo "skip_aiter_upload label : ${SKIP}" - - if [ "$SKIP" = 'true' ]; then - echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" - echo "skip_aiter_upload label set - trigger_aiter_upload = false" - exit 0 - fi - - AITER_PATHS='${{ steps.paths.outputs.aiter }}' - - if [ "$AITER_PATHS" = "true" ]; then - echo "trigger_aiter_upload=true" >> "$GITHUB_OUTPUT" - echo "3rdparty/aiter changed on PR - trigger_aiter_upload = true" - else - echo "trigger_aiter_upload=false" >> "$GITHUB_OUTPUT" - echo "No 3rdparty/aiter changes on PR - trigger_aiter_upload = false" - fi + AITER_SHA=$(git rev-parse HEAD:3rdparty/aiter) + IMAGE_SLUG=$(printf '%s' "$IMAGE_TO_USE" | sha256sum | awk '{print $1}') + echo "aiter_sha=$AITER_SHA" >> "$GITHUB_OUTPUT" + echo "image_slug=$IMAGE_SLUG" >> "$GITHUB_OUTPUT" + echo "cache_key=aiter-prebuilt-upload-ok-${IMAGE_SLUG}-${AITER_SHA}" >> "$GITHUB_OUTPUT" + echo "Resolved Docker image for cache key (select-docker-image.yml): $IMAGE_TO_USE" + + - name: AITER upload cache validation + id: aiter_upload_cache + if: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' }} + uses: actions/cache/restore@v4 + with: + path: .aiter-upload-success + key: ${{ steps.aiter_key.outputs.cache_key }} determine_level: runs-on: ubuntu-latest @@ -110,11 +114,14 @@ jobs: # - A ci-level label higher than any existing ci-level label(s) was added # - A commit was pushed with existing ci-level label(s) # - The PR was reopened or opened with existing ci-level label(s) - if: ${{ needs.determine_level.outputs.test_level != '' }} - needs: [determine_level, aiter_prebuilt_upload_trigger] + if: ${{ always() && needs.select_ci_image.result == 'success' && needs.determine_level.outputs.test_level != '' }} + needs: [determine_level, aiter_prebuilt_upload_trigger, select_ci_image] name: CI Level ${{ needs.determine_level.outputs.test_level }} uses: ./.github/workflows/rocm-ci.yml secrets: inherit with: test_level: ${{ needs.determine_level.outputs.test_level }} trigger_aiter_upload: ${{ needs.aiter_prebuilt_upload_trigger.outputs.trigger_aiter_upload == 'true' }} + aiter_upload_cache_key: ${{ needs.aiter_prebuilt_upload_trigger.outputs.aiter_upload_cache_key }} + docker_image_override: ${{ needs.select_ci_image.outputs.image_tag }} + test_config_from_source: true diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index 55e8a8be6..e28008d77 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -19,7 +19,7 @@ on: type: string docker_image_override: description: 'Manual Docker Image (Leave empty to use config file value)' - required: false + required: true type: string test_config_from_source: description: 'DEBUG: Use config.json from current source branch instead of dev' @@ -31,6 +31,11 @@ on: required: false default: false type: boolean + aiter_upload_cache_key: + description: 'Actions cache key for upload success (computed in rocm-ci-dispatch; empty otherwise)' + required: false + default: '' + type: string workflow_dispatch: inputs: test_level: @@ -56,53 +61,10 @@ env: jobs: select_image: name: Select Docker Image - runs-on: ubuntu-latest - timeout-minutes: 10 - outputs: - image-tag: ${{ steps.select-image.outputs.image-tag }} - steps: - - name: Checkout repository - uses: actions/checkout@v6 - with: - ref: ${{ inputs.test_config_from_source && github.ref_name || github.event.repository.default_branch || 'dev' }} - sparse-checkout: ci/ci_config.json - sparse-checkout-cone-mode: false - - - name: Select Docker Image Tag - id: select-image - run: | - if [[ "${{ inputs.test_config_from_source }}" == "true" ]]; then - echo "::notice::Debugging mode: Using ci/ci_config.json from ${{ github.ref_name }}" - else - echo "::notice::Using ci/ci_config.json from ${{ github.event.repository.default_branch || 'dev' }}" - fi - - if [[ ! -f "ci/ci_config.json" ]]; then - echo "::error::Config file not found in checkout." - exit 1 - fi - - BRANCH_NAME="${{ github.base_ref || github.ref_name }}" - echo "Determining image for branch: $BRANCH_NAME" - VERSION_KEY="$BRANCH_NAME" - - if jq -e --arg key "$VERSION_KEY" '.docker_images[$key]' ci/ci_config.json > /dev/null; then - JSON_KEY="$VERSION_KEY" - else - JSON_KEY="default" - fi - - echo "Selected config key: $JSON_KEY" - IMAGE_TO_USE=$(jq -r --arg key "$JSON_KEY" '.docker_images[$key]' ci/ci_config.json) - - MANUAL_OVERRIDE="${{ inputs.docker_image_override }}" - if [[ -n "$MANUAL_OVERRIDE" ]]; then - echo "::notice::Manual override detected: $MANUAL_OVERRIDE" - IMAGE_TO_USE="$MANUAL_OVERRIDE" - fi - - echo "Selected image: $IMAGE_TO_USE" - echo "image-tag=$IMAGE_TO_USE" >> $GITHUB_OUTPUT + uses: ./.github/workflows/select-docker-image.yml + with: + docker_image_override: ${{ inputs.docker_image_override }} + test_config_from_source: ${{ inputs.test_config_from_source }} upload_aiter_prebuilt: name: Build and upload AITER prebuilt @@ -110,7 +72,8 @@ jobs: if: ${{ (github.event_name == 'pull_request' && inputs.trigger_aiter_upload == 'true') }} uses: ./.github/workflows/aiter-prebuilt-upload.yml with: - docker_image: ${{ needs.select_image.outputs.image-tag }} + docker_image_override: ${{ needs.select_image.outputs.image_tag }} + aiter_upload_cache_key: ${{ inputs.aiter_upload_cache_key }} secrets: inherit build: @@ -156,7 +119,7 @@ jobs: - name: Pull Docker Image run: | - docker pull ${{ needs.select_image.outputs.image-tag }} + docker pull ${{ needs.select_image.outputs.image_tag }} - name: Run Container run: | @@ -171,7 +134,7 @@ jobs: --group-add $(getent group video | cut -d: -f3) \ -v "${{ github.workspace }}:/workspace" \ -w /workspace \ - ${{ needs.select_image.outputs.image-tag }} + ${{ needs.select_image.outputs.image_tag }} - name: Install packages run: | @@ -352,7 +315,7 @@ jobs: - name: Pull Docker Image run: | - docker pull ${{ needs.select_image.outputs.image-tag }} + docker pull ${{ needs.select_image.outputs.image_tag }} - name: Run Container run: | @@ -367,7 +330,7 @@ jobs: --group-add $(getent group video | cut -d: -f3) \ -v "${{ github.workspace }}:/workspace" \ -w /workspace \ - ${{ needs.select_image.outputs.image-tag }} + ${{ needs.select_image.outputs.image_tag }} - name: Install packages env: diff --git a/.github/workflows/select-docker-image.yml b/.github/workflows/select-docker-image.yml new file mode 100644 index 000000000..950805412 --- /dev/null +++ b/.github/workflows/select-docker-image.yml @@ -0,0 +1,74 @@ +# Copyright (c) 2026, Advanced Micro Devices, Inc. All rights reserved. +# +# See LICENSE for license information. + +name: Select Docker Image + +on: + workflow_call: + inputs: + docker_image_override: + description: 'Manual Docker Image (Leave empty to use config file value)' + required: false + default: '' + type: string + test_config_from_source: + description: 'Use ci/ci_config.json from the caller ref instead of default branch' + required: false + default: false + type: boolean + outputs: + image_tag: + description: 'Docker image URI from ci/ci_config.json (or override)' + value: ${{ jobs.select.outputs.image_tag }} + +jobs: + select: + name: Select Docker Image + runs-on: ubuntu-latest + timeout-minutes: 10 + outputs: + image_tag: ${{ steps.select.outputs.image_tag }} + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + ref: ${{ inputs.test_config_from_source && github.ref_name || github.event.repository.default_branch || 'dev' }} + sparse-checkout: ci/ci_config.json + sparse-checkout-cone-mode: false + + - name: Select Docker Image Tag + id: select + run: | + if [[ "${{ inputs.test_config_from_source }}" == "true" ]]; then + echo "::notice::Debugging mode: Using ci/ci_config.json from ${{ github.ref_name }}" + else + echo "::notice::Using ci/ci_config.json from ${{ github.event.repository.default_branch || 'dev' }}" + fi + + if [[ ! -f "ci/ci_config.json" ]]; then + echo "::error::Config file not found in checkout." + exit 1 + fi + + BRANCH_NAME="${{ github.base_ref || github.ref_name }}" + echo "Determining image for branch: $BRANCH_NAME" + VERSION_KEY="$BRANCH_NAME" + + if jq -e --arg key "$VERSION_KEY" '.docker_images[$key]' ci/ci_config.json > /dev/null; then + JSON_KEY="$VERSION_KEY" + else + JSON_KEY="default" + fi + + echo "Selected config key: $JSON_KEY" + IMAGE_TO_USE=$(jq -r --arg key "$JSON_KEY" '.docker_images[$key]' ci/ci_config.json) + + MANUAL_OVERRIDE="${{ inputs.docker_image_override }}" + if [[ -n "$MANUAL_OVERRIDE" ]]; then + echo "::notice::Manual override detected: $MANUAL_OVERRIDE" + IMAGE_TO_USE="$MANUAL_OVERRIDE" + fi + + echo "Selected image: $IMAGE_TO_USE" + echo "image_tag=$IMAGE_TO_USE" >> "$GITHUB_OUTPUT" From 015b7050134e7ace92ce1a45eaf4f3b80ef12dc8 Mon Sep 17 00:00:00 2001 From: Veera Rajasekhar Reddy Gopu Date: Thu, 7 May 2026 18:21:04 +0000 Subject: [PATCH 7/7] Fixed git fetch issue --- .github/workflows/select-docker-image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/select-docker-image.yml b/.github/workflows/select-docker-image.yml index 950805412..d1a12c2cf 100644 --- a/.github/workflows/select-docker-image.yml +++ b/.github/workflows/select-docker-image.yml @@ -33,7 +33,7 @@ jobs: - name: Checkout repository uses: actions/checkout@v6 with: - ref: ${{ inputs.test_config_from_source && github.ref_name || github.event.repository.default_branch || 'dev' }} + ref: ${{ inputs.test_config_from_source && github.ref || github.event.repository.default_branch || 'dev' }} sparse-checkout: ci/ci_config.json sparse-checkout-cone-mode: false @@ -41,7 +41,7 @@ jobs: id: select run: | if [[ "${{ inputs.test_config_from_source }}" == "true" ]]; then - echo "::notice::Debugging mode: Using ci/ci_config.json from ${{ github.ref_name }}" + echo "::notice::Debugging mode: Using ci/ci_config.json from ${{ github.ref }} (${{ github.ref_name }})" else echo "::notice::Using ci/ci_config.json from ${{ github.event.repository.default_branch || 'dev' }}" fi