diff --git a/.github/workflows/README_BUILD_PACKAGES.md b/.github/workflows/README_BUILD_PACKAGES.md index 2490f1b7..bbaecd83 100644 --- a/.github/workflows/README_BUILD_PACKAGES.md +++ b/.github/workflows/README_BUILD_PACKAGES.md @@ -93,13 +93,26 @@ sudo rpm -i --replacefiles --nodeps build/amdrocm7-transferbench-*.rpm ### Any Linux (TGZ — relocatable install tree, requires ROCm runtime on target) +End-user instructions (pre-install ROCm, runtime dependencies, extract, +`PATH` / `LD_LIBRARY_PATH`, troubleshooting) live in the project docs at +[docs/install/INSTALL_TGZ.rst](../../docs/install/INSTALL_TGZ.rst). + +Quick smoke test from the repo root after a successful build: + ```bash sudo mkdir -p /opt/rocm/extras-7 sudo tar -xzf build/amdrocm7-transferbench-*.tar.gz -C /opt/rocm/extras-7 --strip-components=1 export PATH=/opt/rocm/extras-7/bin:$PATH +# With no args, TransferBench prints version, usage, available presets, +# and detected topology — a fast end-to-end check that the binary loads +# its ROCm libs correctly. TransferBench ``` +> Once the `help` and `smoketest` presets land on `develop` (currently on +> `candidate`), prefer `TransferBench help` for usage and `TransferBench +> smoketest` for a real correctness check. + ## S3 upload (OIDC) S3 upload runs only when: diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b6591d3..23eb07a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,41 @@ if (NOT CMAKE_TOOLCHAIN_FILE) message(STATUS "CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}") endif() -set(VERSION_STRING "1.66.02") +set(TRANSFERBENCH_VERSION_MAJOR 1) +set(TRANSFERBENCH_VERSION_MINOR 66) +set(TRANSFERBENCH_VERSION_PATCH_FALLBACK "02") + +# Auto-compute patch from git: count commits since the last v..* tag. +# Falls back to TRANSFERBENCH_VERSION_PATCH_FALLBACK when git is unavailable, +# this is not a git checkout, or no matching tag exists. Mirrors the RVS flow. +set(TRANSFERBENCH_VERSION_PATCH "${TRANSFERBENCH_VERSION_PATCH_FALLBACK}") +find_package(Git QUIET) +if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") + execute_process( + COMMAND "${GIT_EXECUTABLE}" describe --tags --abbrev=0 --match + "v${TRANSFERBENCH_VERSION_MAJOR}.${TRANSFERBENCH_VERSION_MINOR}.*" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + OUTPUT_VARIABLE _tb_last_tag + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _tb_describe_rc + ERROR_QUIET) + if(_tb_describe_rc EQUAL 0 AND _tb_last_tag) + execute_process( + COMMAND "${GIT_EXECUTABLE}" rev-list --count "${_tb_last_tag}..HEAD" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + OUTPUT_VARIABLE _tb_commit_count + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE _tb_count_rc + ERROR_QUIET) + if(_tb_count_rc EQUAL 0 AND _tb_commit_count MATCHES "^[0-9]+$") + set(TRANSFERBENCH_VERSION_PATCH "${_tb_commit_count}") + endif() + endif() +endif() + +set(VERSION_STRING + "${TRANSFERBENCH_VERSION_MAJOR}.${TRANSFERBENCH_VERSION_MINOR}.${TRANSFERBENCH_VERSION_PATCH}") +message(STATUS "TransferBench version: ${VERSION_STRING}") project(TransferBench VERSION ${VERSION_STRING} LANGUAGES CXX) ## Load CMake modules @@ -272,6 +306,22 @@ if(BUILD_RELOCATABLE_PACKAGE) set(ROCM_MAJOR_VERSION "7") endif() + # Relocatable RPATH (matches the RVS reference flow). Mirrors what + # build_packages_local.sh used to inject via -DCMAKE_INSTALL_RPATH=, so + # plain `cmake -DBUILD_RELOCATABLE_PACKAGE=ON ..` now produces the same + # RPATH as a CI/packaged build. + set(CMAKE_SKIP_RPATH FALSE) + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) + set(CMAKE_INSTALL_RPATH + "\$ORIGIN:\$ORIGIN/../lib:/opt/rocm/extras-${ROCM_MAJOR_VERSION}/lib:/opt/rocm/lib:/opt/rocm/lib/llvm/lib:/opt/rocm/core-${ROCM_MAJOR_VERSION}/lib:/opt/rocm/core-${ROCM_MAJOR_VERSION}/lib/llvm/lib") + set(CMAKE_BUILD_RPATH "${CMAKE_INSTALL_RPATH}") + # Strip implicit SDK-from-build-host paths on install so the ephemeral + # $HOME/rocm-sdk/install path the CI script uses does not leak into the + # packaged binary's RPATH. + if(NOT CMAKE_VERSION VERSION_LESS "3.16") + set(CMAKE_INSTALL_REMOVE_ENVIRONMENT_RPATH TRUE) + endif() + install(TARGETS TransferBench RUNTIME DESTINATION bin COMPONENT devel) set(CPACK_PACKAGE_NAME "amdrocm${ROCM_MAJOR_VERSION}-transferbench") diff --git a/build_packages_local.sh b/build_packages_local.sh index d10ec86e..cf12b831 100755 --- a/build_packages_local.sh +++ b/build_packages_local.sh @@ -167,17 +167,29 @@ printf -v ROCM_LIBPATCH_VERSION '%02d%02d' "${ROCM_MAJOR}" "${ROCM_MINOR}" export ROCM_MAJOR ROCM_MINOR ROCM_LIBPATCH_VERSION log "ROCm major=${ROCM_MAJOR} minor=${ROCM_MINOR} libpatch=${ROCM_LIBPATCH_VERSION}" -# Package release string: branch.commit for dev, run_number for release branches +# Package release string. Format mirrors the RVS reference flow: +# default (push/schedule/dispatch/local): r. +# pull request: r... +# release/* branch (non-PR): ${GITHUB_RUN_NUMBER} (fallback 1) GIT_BRANCH="${GITHUB_REF_NAME:-$(git -C "${REPO_ROOT}" rev-parse --abbrev-ref HEAD 2>/dev/null || echo unknown)}" GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)" -if [[ "${GIT_BRANCH}" == rel* ]] || [[ "${GIT_BRANCH}" == release/* ]]; then - PKG_RELEASE="${GITHUB_RUN_NUMBER}" +BUILD_DATE_UTC="$(date -u +%Y%m%d)" + +# Collapse non-alphanumerics into single dots and trim — DEB/RPM release +# fields reject most punctuation. +sanitize_release() { + local s + s="$(printf '%s' "$1" | sed -E 's/[^[:alnum:]]+/./g; s/^\.+//; s/\.+$//')" + printf '%s' "${s:-unknown}" +} + +if [[ "${GITHUB_EVENT_NAME:-}" == "pull_request" ]]; then + PR_BRANCH="$(sanitize_release "${GITHUB_HEAD_REF:-${GIT_BRANCH}}")" + PKG_RELEASE="r${ROCM_LIBPATCH_VERSION}.${BUILD_DATE_UTC}.${PR_BRANCH}.${GIT_COMMIT}" +elif [[ "${GIT_BRANCH}" == release/* ]]; then + PKG_RELEASE="${GITHUB_RUN_NUMBER:-1}" else - # Sanitize: DEB/RPM release fields disallow many punctuation chars. - # Collapse anything that's not [A-Za-z0-9] into a single dot, then trim. - SAFE_BRANCH="$(printf '%s' "${GIT_BRANCH}" | sed -E 's/[^[:alnum:]]+/./g; s/^\.+//; s/\.+$//')" - SAFE_BRANCH="${SAFE_BRANCH:-unknown}" - PKG_RELEASE="${SAFE_BRANCH}.${GIT_COMMIT}" + PKG_RELEASE="r${ROCM_LIBPATCH_VERSION}.${BUILD_DATE_UTC}" fi export CPACK_DEBIAN_PACKAGE_RELEASE="${CPACK_DEBIAN_PACKAGE_RELEASE:-$PKG_RELEASE}" export CPACK_RPM_PACKAGE_RELEASE="${CPACK_RPM_PACKAGE_RELEASE:-$PKG_RELEASE}" @@ -185,11 +197,8 @@ log "Package release tag: ${PKG_RELEASE}" # -------- configure -------- INSTALL_PREFIX="/opt/rocm/extras-${ROCM_MAJOR}" -# Relocatable RPATH: $ORIGIN-relative + install prefix + the conventional -# install-time ROCm locations. Do NOT embed ${ROCM_PATH} (the ephemeral -# build-time SDK download path) — that would leak CI paths into the -# packaged binary and break relocatability. -RPATH_LIST="\$ORIGIN:\$ORIGIN/../lib:${INSTALL_PREFIX}/lib:/opt/rocm/lib:/opt/rocm/lib64" +# Relocatable RPATH defaults live in CMakeLists.txt under +# if(BUILD_RELOCATABLE_PACKAGE); enabling that option below activates them. log "Configuring CMake..." rm -rf "${BUILD_DIR}" @@ -204,9 +213,6 @@ CMAKE_ARGS=( -DHIP_PLATFORM=amd -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" -DCPACK_PACKAGING_INSTALL_PREFIX="${INSTALL_PREFIX}" - -DCMAKE_SKIP_RPATH=FALSE - -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=FALSE - -DCMAKE_INSTALL_RPATH="${RPATH_LIST}" -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_RELOCATABLE_PACKAGE=ON -DBUILD_LOCAL_GPU_TARGET_ONLY=OFF diff --git a/docs/install/INSTALL_TGZ.rst b/docs/install/INSTALL_TGZ.rst new file mode 100644 index 00000000..b365cef4 --- /dev/null +++ b/docs/install/INSTALL_TGZ.rst @@ -0,0 +1,136 @@ +:orphan: + +.. meta:: + :description: Install the relocatable TransferBench TGZ archive on any Linux distribution + :keywords: TransferBench, TGZ, tarball, install, relocatable + +.. _install-transferbench-tgz: + +------------------------------------------------ +Installing TransferBench from the TGZ archive +------------------------------------------------ + +The TransferBench TGZ archive (``amdrocm-transferbench-*.tar.gz``) is a +relocatable install tree that works on any Linux distribution where a +compatible ROCm runtime is already present. Use it when you cannot or do not +want to install the DEB or RPM package — for example on a distribution +without a native ROCm package, or inside a non-root container. + +The TGZ ships only the ``TransferBench`` binary and its supporting files. It +does **not** bundle ROCm; the host system must already provide the ROCm +runtime libraries (``hsa-rocr`` and the HIP runtime). + +Pre-install: ROCm +----------------- + +Install ROCm on the target system before extracting the TGZ. Follow the +official AMD documentation: + +* `ROCm documentation `_ +* `Linux install guide `_ + +After installing, ``ROCM_PATH`` (typically ``/opt/rocm``) must be set +correctly and the ROCm libraries must be loadable by the dynamic linker. + +Install runtime dependencies +---------------------------- + +The DEB and RPM packages declare these runtime dependencies; TGZ users must +install them manually on the target host. + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Family + - Required packages + * - Debian / Ubuntu + - ``numactl``, ``libnuma1``, plus the ROCm runtime (``hsa-rocr``) + * - RHEL / Rocky / AlmaLinux + - ``numactl``, plus the ROCm runtime (``hsa-rocr``) + +Install commands: + +.. code-block:: bash + + # Ubuntu / Debian + sudo apt update && sudo apt install -y numactl libnuma1 + + # RHEL / Rocky / AlmaLinux + sudo dnf install -y numactl + +The ROCm packages (``hsa-rocr`` and friends) come from the ROCm repo +configured in the pre-install step above. + +Extract the TGZ +--------------- + +Extract the archive into ``/opt/rocm/extras-``, where ```` is +the ROCm major version the package was built against (encoded in the package +name, for example ``amdrocm7-transferbench-*.tar.gz`` → major ``7``). + +.. code-block:: bash + + # Example for ROCm major 7 — match your package + sudo mkdir -p /opt/rocm/extras-7 + sudo tar -xzf amdrocm7-transferbench-*.tar.gz -C /opt/rocm/extras-7 --strip-components=1 + +The ``--strip-components=1`` option discards the top-level directory inside +the tarball so files land directly under ``/opt/rocm/extras-7/{bin,lib,...}``. + +Configure ``PATH`` and ``LD_LIBRARY_PATH`` +------------------------------------------ + +Point the shell at the extracted prefix and your ROCm install. Copy and paste +the block as one unit (replace paths with your real ``ROCM_PATH`` and major +version): + +.. code-block:: bash + + export ROCM_PATH=/opt/rocm # or your real ROCm root + export PATH=/opt/rocm/extras-7/bin:$ROCM_PATH/bin:$PATH + export LD_LIBRARY_PATH=/opt/rocm/extras-7/lib:$ROCM_PATH/lib:$ROCM_PATH/lib/llvm/lib:$LD_LIBRARY_PATH + +The ``TransferBench`` binary embeds an ``RPATH`` covering ``$ORIGIN``, +``$ORIGIN/../lib``, ``/opt/rocm/extras-/lib``, ``/opt/rocm/lib``, +``/opt/rocm/lib/llvm/lib``, ``/opt/rocm/core-/lib``, and +``/opt/rocm/core-/lib/llvm/lib``. The ``LD_LIBRARY_PATH`` export above +is mainly defensive — useful if your ROCm tree lives somewhere non-standard +or if you want to override which copy of a library is loaded for +troubleshooting. + +Verify the install +------------------ + +.. code-block:: bash + + TransferBench + +Run with no arguments, ``TransferBench`` prints its version, usage, the +list of available preset benchmarks, and the detected GPU/CPU topology, +then exits. Seeing that output confirms the binary loaded its ROCm +libraries correctly. + +If the binary fails to load a shared library, inspect: + +.. code-block:: bash + + ldd /opt/rocm/extras-7/bin/TransferBench + readelf -d /opt/rocm/extras-7/bin/TransferBench | grep -E 'RPATH|RUNPATH' + +Make a persistent shell setup +----------------------------- + +To avoid re-exporting every shell, drop the variables into a profile script: + +.. code-block:: bash + + sudo tee /etc/profile.d/transferbench.sh >/dev/null <<'EOF' + export ROCM_PATH=/opt/rocm + export PATH=/opt/rocm/extras-7/bin:$ROCM_PATH/bin:$PATH + export LD_LIBRARY_PATH=/opt/rocm/extras-7/lib:$ROCM_PATH/lib:$ROCM_PATH/lib/llvm/lib:$LD_LIBRARY_PATH + EOF + sudo chmod 0644 /etc/profile.d/transferbench.sh + +Log out and back in (or ``source /etc/profile.d/transferbench.sh``) for the +changes to apply. diff --git a/docs/install/install.rst b/docs/install/install.rst index 4a44ff59..367fc307 100644 --- a/docs/install/install.rst +++ b/docs/install/install.rst @@ -83,3 +83,9 @@ TransferBench looks for NVCC in ``/usr/local/cuda`` by default. To modify the lo .. code-block:: bash CUDA_PATH=/usr/local/cuda make + +Installing from the relocatable TGZ archive +------------------------------------------- + +If you want to install a pre-built TransferBench binary on a system where +you cannot install the DEB or RPM package, see :ref:`install-transferbench-tgz`.