Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 80 additions & 2 deletions .github/scripts/aiter_prebuild_upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,86 @@ set -euo pipefail
# Inputs for upload (optional):
# NVTE_AITER_PREBUILT_BASE_URL - base URL for prebuilts
# NVTE_AITER_PREBUILT_UPLOAD_TOKEN - bearer token for Artifactory
# Optional flag:
# --build : build aiter libs before packaging/uploading; default is package-only.
# Optional flags:
# --preflight --upload
# Validate upload path: Artifactory ping, then HEAD on the probe URL with the bearer token.
# Use in CI before uploading prebuilts.
# --preflight --download
# Validate download path: same ping, then HEAD on the probe URL without credentials.
# Matches what CMake file(DOWNLOAD) sees when fetching prebuilts (no token).
# --build : build AITER libs before packaging/uploading; default is package-only.

_aiter_set_artifactory_check_urls() {
if [[ -z "${NVTE_AITER_PREBUILT_BASE_URL:-}" ]]; then
echo "Missing vars.NVTE_AITER_PREBUILT_BASE_URL" >&2
exit 1
fi
local BASE="${NVTE_AITER_PREBUILT_BASE_URL%/}"
local ROOT_PREFIX="${BASE%%/artifactory/*}"
_AITER_ARTIFACTORY_SYSTEM_PING_URL="${ROOT_PREFIX}/artifactory/api/system/ping"
_AITER_PREBUILT_BASE_ACCESS_PROBE_URL="${BASE}/__aiter_repo_access_probe_not_a_real_artifact"
}

_aiter_curl_artifactory_system_ping() {
echo "[AITER-PREBUILT] Preflight: GET ${_AITER_ARTIFACTORY_SYSTEM_PING_URL} ..."
curl -fsS --connect-timeout 25 --max-time 60 "${_AITER_ARTIFACTORY_SYSTEM_PING_URL}" >/dev/null
}

_aiter_preflight_head_ok() {
local mode=$1
local code=$2
case "${code}" in
404|200)
echo "[AITER-PREBUILT] Preflight ${mode}: HTTP ${code} (success)"
;;
*)
echo "[AITER-PREBUILT] Preflight ${mode}: HTTP ${code} (failed)" >&2
exit 1
;;
esac
}

_aiter_check_artifactory_upload() {
_aiter_set_artifactory_check_urls
if [[ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN:-}" ]]; then
echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2
exit 1
fi
_aiter_curl_artifactory_system_ping
echo "[AITER-PREBUILT] Preflight (upload): HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL} (authenticated) ..."
local code
code="$(curl -sS -o /dev/null -w "%{http_code}" --connect-timeout 25 --max-time 90 \
-H "Authorization: Bearer ${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" \
-I "${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}" || true)"
_aiter_preflight_head_ok upload "${code}"
}

_aiter_check_artifactory_download() {
_aiter_set_artifactory_check_urls
_aiter_curl_artifactory_system_ping
echo "[AITER-PREBUILT] Preflight (download): HEAD ${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL} (anonymous) ..."
local code
code="$(curl -sS -o /dev/null -w "%{http_code}" --connect-timeout 25 --max-time 90 \
-I "${_AITER_PREBUILT_BASE_ACCESS_PROBE_URL}" || true)"
_aiter_preflight_head_ok download "${code}"
}

if [[ "${1:-}" == "--preflight" ]]; then
shift
case "${1:-}" in
--upload)
_aiter_check_artifactory_upload
;;
--download)
_aiter_check_artifactory_download
;;
*)
echo "Usage: $(basename "$0") --preflight --upload | --preflight --download" >&2
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usage string omits the script's primary modes (no-arg package-only and --build). When someone misuses --preflight and lands on this branch they'll be told only the preflight forms exist.

Suggested change
echo "Usage: $(basename "$0") --preflight --upload | --preflight --download" >&2
echo "Usage: $(basename "$0") [--build]" >&2
echo " $(basename "$0") --preflight --upload | --preflight --download" >&2

exit 1
;;
esac
exit 0
fi

# Derive ROCm version and aiter commit -> cache key
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
Expand Down
72 changes: 50 additions & 22 deletions .github/workflows/aiter-prebuilt-upload.yml
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pushing to dev is too late. Binaries are expected to be cached when PR is created, otherwise the PR CI will have to rebuild them

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially started with an idea to provide a PR comment trigger which does this. Do you think it is better? In this case I might need to provide a way to force-push if the user needs to trigger multiple times while working on the PR.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it can be driven by CI labels + filter by specific path modification. I.e. on the first CI run after aiter commit update, it first builds and uploads AITER and then goes further with CI.

Copy link
Copy Markdown
Contributor

@Micky774 Micky774 Apr 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On that note, can we have an upload as a side effect during the build-and-test workflow? That would provide a relatively simple way to implement this.

Specifically, we can do this more-or-less as-is by using the following filtering for a prebuilt cache upload:

  on:                                                                                                                                       
    pull_request: 
      paths:
        - '3rdparty/aiter'
        - '3rdpart/aiter/***'

Not sure which one exactly is needed since aiter is a submodule but one should work. Still, I'd be more interested in conditionally checking whether the AITER submodule was built from source, and then uploading if it was in the build-and-test flow.

Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,40 @@ name: AITER Prebuilt Upload
on:
workflow_dispatch:
inputs:
docker_image:
description: "Docker image"
docker_image_override:
description: "Manual Docker Image (Leave empty to use config file value)"
required: false
default: ""
type: string
workflow_call:
inputs:
docker_image_override:
description: "Manual Docker Image (Leave empty to use config file value)"
required: true
type: string
aiter_upload_cache_key:
description: "If non-empty, save Actions cache after success (same key as rocm-ci-dispatch restore)."
required: false
default: ''
type: string

permissions:
actions: write
contents: read

jobs:
# Same resolver as rocm-ci / dispatch; override comes from inputs for both workflow_dispatch and workflow_call.
select_docker_image:
uses: ./.github/workflows/select-docker-image.yml
with:
docker_image_override: ${{ inputs.docker_image_override }}
test_config_from_source: true

upload:
needs: [select_docker_image]
runs-on: build-only-te
env:
NVTE_AITER_PREBUILT_BASE_URL: ${{ vars.NVTE_AITER_PREBUILT_BASE_URL }}
NVTE_AITER_PREBUILT_UPLOAD_TOKEN: ${{ secrets.AITER_ARTIFACTORY_TOKEN }}
steps:
- name: Checkout source
uses: actions/checkout@v6
Expand All @@ -22,21 +48,19 @@ jobs:
submodules: recursive
fetch-depth: 0

- name: Resolve docker image
id: cfg
# Verify this runner can reach Artifactory for uploads
- name: "Preflight: Artifactory upload reachability"
run: |
IMAGE="${{ inputs.docker_image }}"
if [ -z "$IMAGE" ]; then
IMAGE="${{ vars.DEV_DOCKER_IMAGE }}"
fi
if [ -z "$IMAGE" ]; then
echo "No docker image provided and vars.DEV_DOCKER_IMAGE is empty." >&2
exit 1
set -euo pipefail
if bash .github/scripts/aiter_prebuild_upload.sh --preflight --upload; then
echo "::notice::Preflight upload reachability succeeded"
exit 0
fi
echo "image=${IMAGE}" >> $GITHUB_OUTPUT
echo "::error::Preflight upload reachability failed"
exit 1

- name: Pull docker image
run: docker pull ${{ steps.cfg.outputs.image }}
run: docker pull ${{ needs.select_docker_image.outputs.image_tag }}

- name: Run container
run: |
Expand All @@ -47,28 +71,32 @@ jobs:
--pid=host \
-v "${{ github.workspace }}:/workspace" \
-w /workspace \
${{ steps.cfg.outputs.image }}
${{ needs.select_docker_image.outputs.image_tag }}

- name: Build and upload aiter prebuilt
env:
NVTE_AITER_PREBUILT_BASE_URL: https://compute-artifactory.amd.com:5000/artifactory/rocm-generic-local/te-ci/aiter-prebuilts
NVTE_AITER_PREBUILT_UPLOAD_TOKEN: ${{ secrets.AITER_ARTIFACTORY_TOKEN }}
run: |
docker exec \
-e NVTE_AITER_PREBUILT_BASE_URL=${NVTE_AITER_PREBUILT_BASE_URL} \
-e NVTE_AITER_PREBUILT_UPLOAD_TOKEN=${NVTE_AITER_PREBUILT_UPLOAD_TOKEN} \
te-aiter-upload bash -c "$(cat <<'EOF'
set -ex
if [ -z "${NVTE_AITER_PREBUILT_UPLOAD_TOKEN}" ]; then
echo "Missing secrets.AITER_ARTIFACTORY_TOKEN" >&2
exit 1
fi
export HIP_PATH=""
git config --global --add safe.directory '*'
bash .github/scripts/aiter_prebuild_upload.sh --build
EOF
)"

- name: Record successful AITER prebuilt upload (cache marker)
if: success() && inputs.aiter_upload_cache_key != ''
run: echo ok > .aiter-upload-success

- name: Save AITER upload success cache
if: success() && inputs.aiter_upload_cache_key != ''
uses: actions/cache/save@v4
with:
path: .aiter-upload-success
key: ${{ inputs.aiter_upload_cache_key }}

- name: Cleanup container
if: always()
run: docker rm -f te-aiter-upload || true
70 changes: 68 additions & 2 deletions .github/workflows/rocm-ci-dispatch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,70 @@ on:

permissions:
contents: read
pull-requests: read
actions: write

jobs:
# Resolve Docker image tag
select_ci_image:
uses: ./.github/workflows/select-docker-image.yml
with:
docker_image_override: ''
test_config_from_source: true

# Whether the PR touches 3rdparty/aiter
aiter_gate:
runs-on: ubuntu-latest
outputs:
aiter_paths: ${{ steps.paths.outputs.aiter }}
steps:
- name: Detect PR changes under 3rdparty/aiter
uses: dorny/paths-filter@v4
id: paths
if: github.event.action == 'synchronize'
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: this if: is on the step, not the job, so aiter_gate still spins up an ubuntu-latest runner on every labeled and reopened event just to skip its only step. Moving the gate to the job level (and letting the downstream aiter_prebuilt_upload_trigger see an empty aiter_paths output, which the existing expression already treats as falsy) would avoid that.

Suggested change
if: github.event.action == 'synchronize'
- name: Detect PR changes under 3rdparty/aiter
uses: dorny/paths-filter@v4
id: paths
with:
filters: |
aiter:
- '3rdparty/aiter/**'

…paired with if: github.event.action == 'synchronize' on the aiter_gate job itself.

with:
filters: |
aiter:
- '3rdparty/aiter/**'

# Whether to upload AITER prebuilt to Artifactory
aiter_prebuilt_upload_trigger:
needs: [aiter_gate, select_ci_image]
runs-on: ubuntu-latest
outputs:
# true only on synchronize + aiter paths + cache miss (default false via expression)
trigger_aiter_upload: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' && steps.aiter_upload_cache.outputs.cache-hit != 'true' }}
aiter_upload_cache_key: ${{ steps.aiter_key.outputs.cache_key }}
steps:
- name: Checkout PR head
if: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' }}
uses: actions/checkout@v6
with:
ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 1

- name: Compute AITER upload cache key
id: aiter_key
if: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' }}
env:
IMAGE_TO_USE: ${{ needs.select_ci_image.outputs.image_tag }}
run: |
set -euo pipefail
AITER_SHA=$(git rev-parse HEAD:3rdparty/aiter)
IMAGE_SLUG=$(printf '%s' "$IMAGE_TO_USE" | sha256sum | awk '{print $1}')
echo "aiter_sha=$AITER_SHA" >> "$GITHUB_OUTPUT"
echo "image_slug=$IMAGE_SLUG" >> "$GITHUB_OUTPUT"
echo "cache_key=aiter-prebuilt-upload-ok-${IMAGE_SLUG}-${AITER_SHA}" >> "$GITHUB_OUTPUT"
echo "Resolved Docker image for cache key (select-docker-image.yml): $IMAGE_TO_USE"

- name: AITER upload cache validation
id: aiter_upload_cache
if: ${{ github.event.action == 'synchronize' && needs.aiter_gate.outputs.aiter_paths == 'true' }}
uses: actions/cache/restore@v4
with:
path: .aiter-upload-success
key: ${{ steps.aiter_key.outputs.cache_key }}

determine_level:
runs-on: ubuntu-latest
outputs:
Expand Down Expand Up @@ -52,10 +114,14 @@ jobs:
# - A ci-level label higher than any existing ci-level label(s) was added
# - A commit was pushed with existing ci-level label(s)
# - The PR was reopened or opened with existing ci-level label(s)
if: ${{ needs.determine_level.outputs.test_level != '' }}
needs: determine_level
if: ${{ always() && needs.select_ci_image.result == 'success' && needs.determine_level.outputs.test_level != '' }}
needs: [determine_level, aiter_prebuilt_upload_trigger, select_ci_image]
name: CI Level ${{ needs.determine_level.outputs.test_level }}
uses: ./.github/workflows/rocm-ci.yml
secrets: inherit
with:
test_level: ${{ needs.determine_level.outputs.test_level }}
trigger_aiter_upload: ${{ needs.aiter_prebuilt_upload_trigger.outputs.trigger_aiter_upload == 'true' }}
aiter_upload_cache_key: ${{ needs.aiter_prebuilt_upload_trigger.outputs.aiter_upload_cache_key }}
docker_image_override: ${{ needs.select_ci_image.outputs.image_tag }}
test_config_from_source: true
Loading
Loading