From 94e581dcbe00790d30dfa6a68e038f7d36f70d0b Mon Sep 17 00:00:00 2001 From: AtlantaPepsi Date: Wed, 8 Apr 2026 06:04:49 +0000 Subject: [PATCH 1/2] ibv dynamic loading --- CMakeLists.txt | 9 +++ Makefile | 9 +++ src/header/TransferBench.hpp | 136 ++++++++++++++++++++++++++++++++--- 3 files changed, 143 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ba5ed980..18f52dca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") #================================================================================================== option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on this machine" OFF) option(ENABLE_NIC_EXEC "Enable RDMA NIC Executor in TransferBench" OFF) +option(ENABLE_IBV_DIRECT "Link libibverbs symbols directly (OFF: resolve via dlsym)" ON) option(ENABLE_MPI_COMM "Enable MPI Communicator support" OFF) option(ENABLE_DMA_BUF "Enable DMA-BUF support for GPU Direct RDMA" OFF) option(ENABLE_AMD_SMI "Enable AMD-SMI pod membership queries" OFF) @@ -146,6 +147,11 @@ else() set_target_properties(ibverbs PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}" IMPORTED_LOCATION "${IBVERBS_LIBRARY}" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}") set(IBVERBS_FOUND 1) message(STATUS "- Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable") + if(ENABLE_IBV_DIRECT) + message(STATUS "- IBV_DIRECT enabled (direct libibverbs linkage); set -DENABLE_IBV_DIRECT=OFF for dlsym path") + else() + message(STATUS "- IBV_DIRECT disabled: libibverbs symbols resolved via dlsym at runtime") + endif() else() if(NOT IBVERBS_LIBRARY) message(WARNING "- IBVerbs library not found") @@ -318,6 +324,9 @@ if(IBVERBS_FOUND) target_include_directories(TransferBench PRIVATE ${IBVERBS_INCLUDE_DIR}) target_link_libraries(TransferBench PRIVATE ${IBVERBS_LIBRARY}) target_compile_definitions(TransferBench PRIVATE NIC_EXEC_ENABLED) + if(ENABLE_IBV_DIRECT) + target_compile_definitions(TransferBench PRIVATE IBV_DIRECT=1) + endif() endif() if(MPI_COMM_FOUND) if(TARGET MPI::MPI_CXX) diff --git a/Makefile b/Makefile index 993eedd5..3e7bdb27 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ MPI_PATH ?= /usr/local/openmpi # Optional features (set to 0 to disable, 1 to enable) # DISABLE_NIC_EXEC: Disable RDMA/NIC executor support (default: 0) +# DISABLE_IBV_DIRECT: When NIC support is on, use dlsym for libibverbs instead of direct linkage (default: 0) # DISABLE_MPI_COMM: Disable MPI communicator support (default: 0) # DISABLE_DMA_BUF: Disable DMA-BUF support for GPU Direct RDMA (default: 1) # DISABLE_AMD_SMI: Disable AMD-SMI pod membership checking support (default: 0) @@ -85,7 +86,9 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),) # 1) DISABLE_NIC_EXEC is not set to 1 # 2) IBVerbs is found in the Dynamic Linker cache # 3) infiniband/verbs.h is found in the default include path + # When enabled, -DIBV_DIRECT=1 is added unless DISABLE_IBV_DIRECT=1 (verbs via direct link + constexpr pfn_*) DISABLE_NIC_EXEC ?= 0 + DISABLE_IBV_DIRECT ?= 0 ifneq ($(DISABLE_NIC_EXEC),1) $(info Attempting to build with NIC executor support) ifeq ("$(shell ldconfig -p | grep -c ibverbs)", "0") @@ -96,6 +99,9 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),) COMMON_FLAGS += -DNIC_EXEC_ENABLED LDFLAGS += -libverbs NIC_ENABLED = 1 + ifneq ($(DISABLE_IBV_DIRECT),1) + COMMON_FLAGS += -DIBV_DIRECT=1 + endif # Disable DMA-BUF support by default (set DISABLE_DMA_BUF=0 to enable) DISABLE_DMA_BUF ?= 1 @@ -123,6 +129,9 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),) $(info - To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed) else $(info - Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable) + ifeq ($(DISABLE_IBV_DIRECT),1) + $(info - IBV_DIRECT disabled: libibverbs via dlsym, DISABLE_IBV_DIRECT=1) + endif endif endif diff --git a/src/header/TransferBench.hpp b/src/header/TransferBench.hpp index 023991b3..a5467b50 100644 --- a/src/header/TransferBench.hpp +++ b/src/header/TransferBench.hpp @@ -55,6 +55,12 @@ THE SOFTWARE. #ifdef NIC_EXEC_ENABLED #include +#if IBV_DIRECT + #define IBV_FN(name, rettype, arglist) constexpr rettype(*pfn_##name)arglist = name; +#else + #include + #define IBV_FN(name, rettype, arglist) rettype(*pfn_##name)arglist = nullptr; +#endif #endif #ifdef MPI_COMM_ENABLED @@ -736,11 +742,36 @@ namespace TransferBench return false; \ } while (0) +namespace { + IBV_FN(ibv_alloc_pd, ibv_pd*, (ibv_context*)); + IBV_FN(ibv_close_device, int, (ibv_context*)); + IBV_FN(ibv_create_cq, ibv_cq*, (ibv_context*, int, void*, ibv_comp_channel*, int)); + IBV_FN(ibv_create_qp, ibv_qp*, (ibv_pd*, ibv_qp_init_attr*)); + IBV_FN(ibv_dealloc_pd, int, (ibv_pd*)); + IBV_FN(ibv_dereg_mr, int, (ibv_mr*)); + IBV_FN(ibv_destroy_cq, int, (ibv_cq*)); + IBV_FN(ibv_destroy_qp, int, (ibv_qp*)); + IBV_FN(ibv_free_device_list, void, (ibv_device**)); + IBV_FN(ibv_get_device_list, ibv_device**, (int*)); + IBV_FN(ibv_get_device_name, const char*, (ibv_device*)); + IBV_FN(ibv_modify_qp, int, (ibv_qp*, ibv_qp_attr*, int)); + IBV_FN(ibv_open_device, ibv_context*, (ibv_device*)); + IBV_FN(ibv_poll_cq, int, (ibv_cq*, int, ibv_wc*)); + IBV_FN(ibv_post_send, int, (ibv_qp*, ibv_send_wr*, ibv_send_wr**)); + IBV_FN(ibv_query_device, int, (ibv_context*, ibv_device_attr*)); + IBV_FN(ibv_query_gid, int, (ibv_context*, uint8_t, int, ibv_gid*)); + IBV_FN(ibv_query_port, int, (ibv_context*, uint8_t, ibv_port_attr*)); +#ifdef HAVE_DMABUF_SUPPORT + IBV_FN(ibv_reg_dmabuf_mr, ibv_mr*, (ibv_pd*, uint64_t, size_t, uint64_t, int, int)); +#endif + IBV_FN(ibv_reg_mr, ibv_mr*, (ibv_pd*, void*, size_t, int)); +} + // Helper macros for calling RDMA functions and reporting errors #ifdef VERBS_DEBUG #define IBV_CALL(__func__, ...) \ do { \ - int error = __func__(__VA_ARGS__); \ + int error = pfn_##__func__(__VA_ARGS__); \ if (error != 0) { \ return {ERR_FATAL, "Encountered IbVerbs error (%d) at line (%d) " \ "and function (%s)", (error), __LINE__, #__func__}; \ @@ -749,7 +780,7 @@ namespace TransferBench #define IBV_PTR_CALL(__ptr__, __func__, ...) \ do { \ - __ptr__ = __func__(__VA_ARGS__); \ + __ptr__ = pfn_##__func__(__VA_ARGS__); \ if (__ptr__ == nullptr) { \ return {ERR_FATAL, "Encountered IbVerbs nullptr error at line (%d) " \ "and function (%s)", __LINE__, #__func__}; \ @@ -758,7 +789,7 @@ namespace TransferBench #else #define IBV_CALL(__func__, ...) \ do { \ - int error = __func__(__VA_ARGS__); \ + int error = pfn_##__func__(__VA_ARGS__); \ if (error != 0) { \ return {ERR_FATAL, "Encountered IbVerbs error (%d=%s) in func (%s)" \ , error, strerror(errno), #__func__}; \ @@ -767,7 +798,7 @@ namespace TransferBench #define IBV_PTR_CALL(__ptr__, __func__, ...) \ do { \ - __ptr__ = __func__(__VA_ARGS__); \ + __ptr__ = pfn_##__func__(__VA_ARGS__); \ if (__ptr__ == nullptr) { \ return {ERR_FATAL, "Encountered IbVerbs nullptr error (%s) in func (%s) " \ , strerror(errno), #__func__}; \ @@ -1006,6 +1037,7 @@ namespace { bool IsSamePod(int targetRank, int sourceRank) const; std::string GetExecutorName(ExeDevice exeDevice) const; int NicIsActive(int nicIndex, int targetRank) const; + bool IbvLoaded() const; #if !defined(__NVCC__) ErrResult GetHsaAgent(ExeDevice const& exeDevice, hsa_agent_t& agent) const; @@ -1032,6 +1064,8 @@ namespace { bool verbose = false; bool rankDoesOutput = true; FILE* dumpCfgFile = nullptr; + bool ibvLoaded = false; + void* ibvLibHandle = nullptr; #if !defined(__NVCC__) std::vector cpuAgents; @@ -2341,6 +2375,11 @@ namespace { case EXE_NIC: case EXE_NIC_NEAREST: #ifdef NIC_EXEC_ENABLED { + if (!System::Get().IbvLoaded()) { + errors.push_back({ERR_FATAL, "Transfer %d: NIC executor is requested but IB verbs is not loaded.", i}); + hasFatalError = true; + break; + } // NIC Executors can only execute a copy operation if (t.srcs.size() != 1 || t.dsts.size() != 1) { errors.push_back({ERR_FATAL, "Transfer %d: NIC executor requires single SRC and single DST", i}); @@ -2671,7 +2710,7 @@ namespace { #ifdef NIC_EXEC_ENABLED // Function to collect information about IBV devices //======================================================================================== -static bool IsConfiguredGid(union ibv_gid const& gid) + static bool IsConfiguredGid(union ibv_gid const& gid) { const struct in6_addr *a = (struct in6_addr *) gid.raw; int trailer = (a->s6_addr32[1] | a->s6_addr32[2] | a->s6_addr32[3]); @@ -2696,7 +2735,8 @@ static bool IsConfiguredGid(union ibv_gid const& gid) int const& gidIndex, int& version) { - char const* deviceName = ibv_get_device_name(context->device); + char const* deviceName; + IBV_PTR_CALL(deviceName, ibv_get_device_name, context->device); char gidRoceVerStr[16] = {}; char roceTypePath[PATH_MAX] = {}; sprintf(roceTypePath, "/sys/class/infiniband/%s/ports/%d/gid_attrs/types/%d", @@ -2778,17 +2818,68 @@ static bool IsConfiguredGid(union ibv_gid const& gid) return ERR_NONE; } + // Should only be called with IBV_DIRECT guard + static void* Ibvdl() { + static void* ibvLibHandle = nullptr; + if (ibvLibHandle) return ibvLibHandle; + + void *handle = dlopen("libibverbs.so.1", RTLD_NOW); + if (handle != nullptr) { + struct Symbol { void **ppfn; char const *name; }; + Symbol symbols[] = { + { (void**)&pfn_ibv_alloc_pd, "ibv_alloc_pd" }, + { (void**)&pfn_ibv_close_device, "ibv_close_device" }, + { (void**)&pfn_ibv_create_cq, "ibv_create_cq" }, + { (void**)&pfn_ibv_create_qp, "ibv_create_qp" }, + { (void**)&pfn_ibv_dealloc_pd, "ibv_dealloc_pd" }, + { (void**)&pfn_ibv_dereg_mr, "ibv_dereg_mr" }, + { (void**)&pfn_ibv_destroy_cq, "ibv_destroy_cq" }, + { (void**)&pfn_ibv_destroy_qp, "ibv_destroy_qp" }, + { (void**)&pfn_ibv_free_device_list, "ibv_free_device_list" }, + { (void**)&pfn_ibv_get_device_list, "ibv_get_device_list" }, + { (void**)&pfn_ibv_get_device_name, "ibv_get_device_name" }, + { (void**)&pfn_ibv_modify_qp, "ibv_modify_qp" }, + { (void**)&pfn_ibv_open_device, "ibv_open_device" }, + { (void**)&pfn_ibv_poll_cq, "ibv_poll_cq" }, + { (void**)&pfn_ibv_post_send, "ibv_post_send" }, + { (void**)&pfn_ibv_query_device, "ibv_query_device" }, + { (void**)&pfn_ibv_query_gid, "ibv_query_gid" }, + { (void**)&pfn_ibv_query_port, "ibv_query_port" }, +#ifdef HAVE_DMABUF_SUPPORT + { (void**)&pfn_ibv_reg_dmabuf_mr, "ibv_reg_dmabuf_mr" }, +#endif + { (void**)&pfn_ibv_reg_mr, "ibv_reg_mr" }, + }; + for (size_t i = 0; i < sizeof(symbols) / sizeof(symbols[0]); i++) { + *symbols[i].ppfn = dlsym(handle, symbols[i].name); + if (*symbols[i].ppfn == nullptr) { + // Log("[WARN] Failed to load symbol %s", symbols[i].name); + dlclose(handle); + break; + } + } + ibvLibHandle = handle; + } + + return ibvLibHandle; + } + static vector& GetIbvDeviceList() { static bool isInitialized = false; static vector ibvDeviceList = {}; +#if !defined(IBV_DIRECT) + if (ibvLibHandle == nullptr) { + return ibvDeviceList; + } +#endif // Build list on first use if (!isInitialized) { // Query the number of IBV devices int numIbvDevices = 0; - ibv_device** deviceList = ibv_get_device_list(&numIbvDevices); + ibv_device** deviceList = pfn_ibv_get_device_list(&numIbvDevices); // Check for TB_NIC_FILTER // By default, accept all NIC names @@ -2806,15 +2897,15 @@ static bool IsConfiguredGid(union ibv_gid const& gid) ibvDevice.name = deviceList[i]->name; ibvDevice.hasActivePort = false; { - struct ibv_context *context = ibv_open_device(ibvDevice.devicePtr); + struct ibv_context *context = pfn_ibv_open_device(ibvDevice.devicePtr); if (context) { struct ibv_device_attr deviceAttr; - if (!ibv_query_device(context, &deviceAttr)) { + if (!pfn_ibv_query_device(context, &deviceAttr)) { int activePort; ibvDevice.gidIndex = -1; for (int port = 1; port <= deviceAttr.phys_port_cnt; ++port) { struct ibv_port_attr portAttr; - if (ibv_query_port(context, port, &portAttr)) continue; + if (pfn_ibv_query_port(context, port, &portAttr)) continue; if (portAttr.state == IBV_PORT_ACTIVE) { activePort = port; ibvDevice.hasActivePort = true; @@ -2831,7 +2922,7 @@ static bool IsConfiguredGid(union ibv_gid const& gid) } } } - ibv_close_device(context); + pfn_ibv_close_device(context); } } ibvDevice.busId = ""; @@ -5810,6 +5901,19 @@ static bool IsConfiguredGid(union ibv_gid const& gid) Log("[INFO] Running in single node mode\n"); } +#ifdef NIC_EXEC_ENABLED +#if IBV_DIRECT + ibvLoaded = true; +#else + ibvLoaded = true; + ibvLibHandle = Ibvdl(); + if (ibvLibHandle == nullptr) { + Log("[WARN] Failed to open libibverbs.so.1"); + ibvLoaded = false; + } +#endif +#endif + // Collect topology and distribute across all ranks CollectTopology(); } @@ -5842,6 +5946,11 @@ static bool IsConfiguredGid(union ibv_gid const& gid) fclose(dumpCfgFile); } + if (ibvLibHandle) { + dlclose(ibvLibHandle); + ibvLibHandle = nullptr; + } + #ifdef AMD_SMI_ENABLED amdsmi_shut_down(); #elif defined(__NVCC__) && defined(POD_COMM_ENABLED) @@ -7016,6 +7125,11 @@ static bool IsConfiguredGid(union ibv_gid const& gid) return rankInfo[targetRank].nicIsActive.at(nicIndex); } + bool System::IbvLoaded() const + { + return ibvLoaded; + } + int GetNumExecutors(ExeType exeType, int targetRank) { return System::Get().GetNumExecutors(exeType, targetRank); From ebde17fb282ab8c2818a1104b16c3f1402fbc9f5 Mon Sep 17 00:00:00 2001 From: AtlantaPepsi Date: Mon, 27 Apr 2026 06:30:14 +0000 Subject: [PATCH 2/2] ibv dynamicloading; addition of CUMEM flag and compilation fixes --- Makefile | 23 ++++- src/header/IbvDynload.hpp | 187 +++++++++++++++++++++++++++++++++++ src/header/TransferBench.hpp | 150 +++++++--------------------- 3 files changed, 242 insertions(+), 118 deletions(-) create mode 100644 src/header/IbvDynload.hpp diff --git a/Makefile b/Makefile index 3e7bdb27..c60e3b8b 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ MPI_PATH ?= /usr/local/openmpi # DISABLE_DMA_BUF: Disable DMA-BUF support for GPU Direct RDMA (default: 1) # DISABLE_AMD_SMI: Disable AMD-SMI pod membership checking support (default: 0) # DISABLE_NVML: Disable NVML pod membership detection for CUDA builds (default: 0) +# DISABLE_POD_COMM: Disable pod communication support (default: 0) +# DISABLE_CUMEM: Disable CUDA driver API (default: 0). On CUDA, POD_COMM_ENABLED requires CUMEM_ENABLED. HIPCC ?= $(ROCM_PATH)/bin/amdclang++ NVCC ?= $(CUDA_PATH)/bin/nvcc @@ -227,6 +229,18 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),) endif endif + # TransferBenchCuda: CUDA driver API (libcuda). Independent of POD, but POD on CUDA requires CUMEM. + DISABLE_CUMEM ?= 0 + ifeq ($(MAKECMDGOALS),TransferBenchCuda) + ifneq ($(DISABLE_CUMEM),1) + $(info - Building with CUMEM_ENABLED (CUDA driver API, -lcuda)) + COMMON_FLAGS += -DCUMEM_ENABLED + LDFLAGS += -lcuda + else + $(info - CUDA driver API disabled (DISABLE_CUMEM=1); POD comm unavailable on CUDA) + endif + endif + POD_ENABLED = 0 # Compile with pod support if # 1) DISABLE_POD_COMM is not set to 1 @@ -254,9 +268,12 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),) ifeq ($(CUDA_VERSION_OK),yes) $(info - Detected CUDA version $(CUDA_MAJOR).$(CUDA_MINOR) which has MNNVL support) - COMMON_FLAGS += -DPOD_COMM_ENABLED - LDFLAGS += -lcuda - POD_ENABLED = 1 + ifeq ($(DISABLE_CUMEM),1) + $(info - Pod communication skipped on CUDA: requires CUMEM_ENABLED (DISABLE_CUMEM=1)) + else + COMMON_FLAGS += -DPOD_COMM_ENABLED + POD_ENABLED = 1 + endif else $(info - Detected CUDA version $(CUDA_MAJOR).$(CUDA_MINOR) which does not have MNNVL support) $(info - Pod support will require CUDA version of at least $(CUDA_MIN_MAJOR).$(CUDA_MIN_MINOR)) diff --git a/src/header/IbvDynload.hpp b/src/header/IbvDynload.hpp new file mode 100644 index 00000000..eac46548 --- /dev/null +++ b/src/header/IbvDynload.hpp @@ -0,0 +1,187 @@ +/* +Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/// @file IbvDynload.hpp +/// @brief libibverbs function pointers and optional dlopen/dlsym when not IBV_DIRECT. +/// @note Include when `NIC_EXEC_ENABLED` is defined (e.g. from `TransferBench.hpp` alongside other headers). + +#pragma once + +#include +#include +#include + +#if IBV_DIRECT +#define IBV_FN(name, rettype, arglist) constexpr rettype(*pfn_##name)arglist = name; +#else +#define IBV_FN(name, rettype, arglist) rettype(*pfn_##name)arglist = nullptr; +#endif + +namespace { + +IBV_FN(ibv_alloc_pd, ibv_pd*, (ibv_context*)) +IBV_FN(ibv_close_device, int, (ibv_context*)) +IBV_FN(ibv_create_cq, ibv_cq*, (ibv_context*, int, void*, ibv_comp_channel*, int)) +IBV_FN(ibv_create_qp, ibv_qp*, (ibv_pd*, ibv_qp_init_attr*)) +IBV_FN(ibv_dealloc_pd, int, (ibv_pd*)) +IBV_FN(ibv_dereg_mr, int, (ibv_mr*)) +IBV_FN(ibv_destroy_cq, int, (ibv_cq*)) +IBV_FN(ibv_destroy_qp, int, (ibv_qp*)) +IBV_FN(ibv_free_device_list, void, (ibv_device**)) +IBV_FN(ibv_get_device_list, ibv_device**, (int*)) +IBV_FN(ibv_get_device_name, const char*, (ibv_device*)) +IBV_FN(ibv_modify_qp, int, (ibv_qp*, ibv_qp_attr*, int)) +IBV_FN(ibv_open_device, ibv_context*, (ibv_device*)) +IBV_FN(ibv_poll_cq, int, (ibv_cq*, int, ibv_wc*)) +IBV_FN(ibv_post_send, int, (ibv_qp*, ibv_send_wr*, ibv_send_wr**)) +IBV_FN(ibv_query_device, int, (ibv_context*, ibv_device_attr*)) +IBV_FN(ibv_query_gid, int, (ibv_context*, uint8_t, int, ibv_gid*)) +#if IBV_DIRECT +// On older versions of libibverbs, ibv_query_port is not defined in the header file. +constexpr int (*pfn_ibv_query_port)(ibv_context*, uint8_t, ibv_port_attr*) = ___ibv_query_port; +#else +IBV_FN(ibv_query_port, int, (ibv_context*, uint8_t, ibv_port_attr*)) +#endif +#ifdef HAVE_DMABUF_SUPPORT +IBV_FN(ibv_reg_dmabuf_mr, ibv_mr*, (ibv_pd*, uint64_t, size_t, uint64_t, int, int)) +#endif +IBV_FN(ibv_reg_mr, ibv_mr*, (ibv_pd*, void*, size_t, int)) + +} // namespace + +#if IBV_DIRECT + +inline void TbIbvEnsureLoaded() {} +inline bool TbIbvSymbolsReady() { return true; } +inline void* TbIbvDlHandle() { return nullptr; } +inline void TbIbvUnload() {} + +#else + +struct IbvDynloadState { + std::once_flag once{}; + void* handle = nullptr; + bool loaded = false; + + void tryLoad() + { + handle = dlopen("libibverbs.so.1", RTLD_NOW); + if (handle == nullptr) + return; + + struct Symbol { void **ppfn; char const *name; }; + + Symbol symbols[] = { + {(void**)&pfn_ibv_alloc_pd, "ibv_alloc_pd"}, + {(void**)&pfn_ibv_close_device, "ibv_close_device"}, + {(void**)&pfn_ibv_create_cq, "ibv_create_cq"}, + {(void**)&pfn_ibv_create_qp, "ibv_create_qp"}, + {(void**)&pfn_ibv_dealloc_pd, "ibv_dealloc_pd"}, + {(void**)&pfn_ibv_dereg_mr, "ibv_dereg_mr"}, + {(void**)&pfn_ibv_destroy_cq, "ibv_destroy_cq"}, + {(void**)&pfn_ibv_destroy_qp, "ibv_destroy_qp"}, + {(void**)&pfn_ibv_free_device_list, "ibv_free_device_list"}, + {(void**)&pfn_ibv_get_device_list, "ibv_get_device_list"}, + {(void**)&pfn_ibv_get_device_name, "ibv_get_device_name"}, + {(void**)&pfn_ibv_modify_qp, "ibv_modify_qp"}, + {(void**)&pfn_ibv_open_device, "ibv_open_device"}, + {(void**)&pfn_ibv_poll_cq, "ibv_poll_cq"}, + {(void**)&pfn_ibv_post_send, "ibv_post_send"}, + {(void**)&pfn_ibv_query_device, "ibv_query_device"}, + {(void**)&pfn_ibv_query_gid, "ibv_query_gid"}, + {(void**)&pfn_ibv_query_port, "ibv_query_port"}, +#ifdef HAVE_DMABUF_SUPPORT + {(void**)&pfn_ibv_reg_dmabuf_mr, "ibv_reg_dmabuf_mr"}, +#endif + {(void**)&pfn_ibv_reg_mr, "ibv_reg_mr"}, + }; + + for (Symbol const& s : symbols) { + void* sym = dlsym(handle, s.name); + if (sym == nullptr) { + dlclose(handle); + handle = nullptr; + return; + } + *s.ppfn = sym; + } + loaded = true; + } +}; + +inline IbvDynloadState& ibvDynloadState() +{ + static IbvDynloadState s; + return s; +} + +inline void TbIbvEnsureLoaded() +{ + IbvDynloadState& st = ibvDynloadState(); + std::call_once(st.once, [&]() { st.tryLoad(); }); +} + +inline bool TbIbvSymbolsReady() +{ + TbIbvEnsureLoaded(); + return ibvDynloadState().loaded; +} + +inline void* TbIbvDlHandle() +{ + TbIbvEnsureLoaded(); + return ibvDynloadState().handle; +} + +inline void TbIbvUnload() +{ + IbvDynloadState& st = ibvDynloadState(); + if (st.handle != nullptr) { + dlclose(st.handle); + st.handle = nullptr; + st.loaded = false; + pfn_ibv_alloc_pd = nullptr; + pfn_ibv_close_device = nullptr; + pfn_ibv_create_cq = nullptr; + pfn_ibv_create_qp = nullptr; + pfn_ibv_dealloc_pd = nullptr; + pfn_ibv_dereg_mr = nullptr; + pfn_ibv_destroy_cq = nullptr; + pfn_ibv_destroy_qp = nullptr; + pfn_ibv_free_device_list = nullptr; + pfn_ibv_get_device_list = nullptr; + pfn_ibv_get_device_name = nullptr; + pfn_ibv_modify_qp = nullptr; + pfn_ibv_open_device = nullptr; + pfn_ibv_poll_cq = nullptr; + pfn_ibv_post_send = nullptr; + pfn_ibv_query_device = nullptr; + pfn_ibv_query_gid = nullptr; + pfn_ibv_query_port = nullptr; +#ifdef HAVE_DMABUF_SUPPORT + pfn_ibv_reg_dmabuf_mr = nullptr; +#endif + pfn_ibv_reg_mr = nullptr; + } +} + +#endif // !IBV_DIRECT \ No newline at end of file diff --git a/src/header/TransferBench.hpp b/src/header/TransferBench.hpp index a5467b50..339d97c7 100644 --- a/src/header/TransferBench.hpp +++ b/src/header/TransferBench.hpp @@ -54,13 +54,7 @@ THE SOFTWARE. #include #ifdef NIC_EXEC_ENABLED -#include -#if IBV_DIRECT - #define IBV_FN(name, rettype, arglist) constexpr rettype(*pfn_##name)arglist = name; -#else - #include - #define IBV_FN(name, rettype, arglist) rettype(*pfn_##name)arglist = nullptr; -#endif +#include "IbvDynload.hpp" #endif #ifdef MPI_COMM_ENABLED @@ -742,32 +736,8 @@ namespace TransferBench return false; \ } while (0) -namespace { - IBV_FN(ibv_alloc_pd, ibv_pd*, (ibv_context*)); - IBV_FN(ibv_close_device, int, (ibv_context*)); - IBV_FN(ibv_create_cq, ibv_cq*, (ibv_context*, int, void*, ibv_comp_channel*, int)); - IBV_FN(ibv_create_qp, ibv_qp*, (ibv_pd*, ibv_qp_init_attr*)); - IBV_FN(ibv_dealloc_pd, int, (ibv_pd*)); - IBV_FN(ibv_dereg_mr, int, (ibv_mr*)); - IBV_FN(ibv_destroy_cq, int, (ibv_cq*)); - IBV_FN(ibv_destroy_qp, int, (ibv_qp*)); - IBV_FN(ibv_free_device_list, void, (ibv_device**)); - IBV_FN(ibv_get_device_list, ibv_device**, (int*)); - IBV_FN(ibv_get_device_name, const char*, (ibv_device*)); - IBV_FN(ibv_modify_qp, int, (ibv_qp*, ibv_qp_attr*, int)); - IBV_FN(ibv_open_device, ibv_context*, (ibv_device*)); - IBV_FN(ibv_poll_cq, int, (ibv_cq*, int, ibv_wc*)); - IBV_FN(ibv_post_send, int, (ibv_qp*, ibv_send_wr*, ibv_send_wr**)); - IBV_FN(ibv_query_device, int, (ibv_context*, ibv_device_attr*)); - IBV_FN(ibv_query_gid, int, (ibv_context*, uint8_t, int, ibv_gid*)); - IBV_FN(ibv_query_port, int, (ibv_context*, uint8_t, ibv_port_attr*)); -#ifdef HAVE_DMABUF_SUPPORT - IBV_FN(ibv_reg_dmabuf_mr, ibv_mr*, (ibv_pd*, uint64_t, size_t, uint64_t, int, int)); -#endif - IBV_FN(ibv_reg_mr, ibv_mr*, (ibv_pd*, void*, size_t, int)); -} - // Helper macros for calling RDMA functions and reporting errors +#ifdef NIC_EXEC_ENABLED #ifdef VERBS_DEBUG #define IBV_CALL(__func__, ...) \ do { \ @@ -805,6 +775,7 @@ namespace { } \ } while (0) #endif +#endif // NIC_EXEC_ENABLED namespace TransferBench { @@ -1065,7 +1036,6 @@ namespace { bool rankDoesOutput = true; FILE* dumpCfgFile = nullptr; bool ibvLoaded = false; - void* ibvLibHandle = nullptr; #if !defined(__NVCC__) std::vector cpuAgents; @@ -2818,60 +2788,14 @@ namespace { return ERR_NONE; } - // Should only be called with IBV_DIRECT guard - static void* Ibvdl() { - static void* ibvLibHandle = nullptr; - if (ibvLibHandle) return ibvLibHandle; - - void *handle = dlopen("libibverbs.so.1", RTLD_NOW); - if (handle != nullptr) { - struct Symbol { void **ppfn; char const *name; }; - Symbol symbols[] = { - { (void**)&pfn_ibv_alloc_pd, "ibv_alloc_pd" }, - { (void**)&pfn_ibv_close_device, "ibv_close_device" }, - { (void**)&pfn_ibv_create_cq, "ibv_create_cq" }, - { (void**)&pfn_ibv_create_qp, "ibv_create_qp" }, - { (void**)&pfn_ibv_dealloc_pd, "ibv_dealloc_pd" }, - { (void**)&pfn_ibv_dereg_mr, "ibv_dereg_mr" }, - { (void**)&pfn_ibv_destroy_cq, "ibv_destroy_cq" }, - { (void**)&pfn_ibv_destroy_qp, "ibv_destroy_qp" }, - { (void**)&pfn_ibv_free_device_list, "ibv_free_device_list" }, - { (void**)&pfn_ibv_get_device_list, "ibv_get_device_list" }, - { (void**)&pfn_ibv_get_device_name, "ibv_get_device_name" }, - { (void**)&pfn_ibv_modify_qp, "ibv_modify_qp" }, - { (void**)&pfn_ibv_open_device, "ibv_open_device" }, - { (void**)&pfn_ibv_poll_cq, "ibv_poll_cq" }, - { (void**)&pfn_ibv_post_send, "ibv_post_send" }, - { (void**)&pfn_ibv_query_device, "ibv_query_device" }, - { (void**)&pfn_ibv_query_gid, "ibv_query_gid" }, - { (void**)&pfn_ibv_query_port, "ibv_query_port" }, -#ifdef HAVE_DMABUF_SUPPORT - { (void**)&pfn_ibv_reg_dmabuf_mr, "ibv_reg_dmabuf_mr" }, -#endif - { (void**)&pfn_ibv_reg_mr, "ibv_reg_mr" }, - }; - for (size_t i = 0; i < sizeof(symbols) / sizeof(symbols[0]); i++) { - *symbols[i].ppfn = dlsym(handle, symbols[i].name); - if (*symbols[i].ppfn == nullptr) { - // Log("[WARN] Failed to load symbol %s", symbols[i].name); - dlclose(handle); - break; - } - } - ibvLibHandle = handle; - } - - return ibvLibHandle; - } - static vector& GetIbvDeviceList() { static bool isInitialized = false; static vector ibvDeviceList = {}; #if !defined(IBV_DIRECT) - if (ibvLibHandle == nullptr) { - return ibvDeviceList; + if (!TbIbvSymbolsReady() && !isInitialized) { + isInitialized = true; } #endif // Build list on first use @@ -2879,7 +2803,7 @@ namespace { // Query the number of IBV devices int numIbvDevices = 0; - ibv_device** deviceList = pfn_ibv_get_device_list(&numIbvDevices); + ibv_device** deviceList = ibv_get_device_list(&numIbvDevices); // Check for TB_NIC_FILTER // By default, accept all NIC names @@ -2897,15 +2821,15 @@ namespace { ibvDevice.name = deviceList[i]->name; ibvDevice.hasActivePort = false; { - struct ibv_context *context = pfn_ibv_open_device(ibvDevice.devicePtr); + struct ibv_context *context = ibv_open_device(ibvDevice.devicePtr); if (context) { struct ibv_device_attr deviceAttr; - if (!pfn_ibv_query_device(context, &deviceAttr)) { + if (!ibv_query_device(context, &deviceAttr)) { int activePort; ibvDevice.gidIndex = -1; for (int port = 1; port <= deviceAttr.phys_port_cnt; ++port) { struct ibv_port_attr portAttr; - if (pfn_ibv_query_port(context, port, &portAttr)) continue; + if (ibv_query_port(context, port, &portAttr)) continue; if (portAttr.state == IBV_PORT_ACTIVE) { activePort = port; ibvDevice.hasActivePort = true; @@ -2922,7 +2846,7 @@ namespace { } } } - pfn_ibv_close_device(context); + ibv_close_device(context); } } ibvDevice.busId = ""; @@ -4977,7 +4901,7 @@ namespace { // Use DMA copy engine do { -#if defined(__NVCC__) +#if defined(CUMEM_ENABLED) ERR_CHECK(cuMemcpyAsync((CUdeviceptr)resources.dstMem[0], (CUdeviceptr)resources.srcMem[0], resources.numBytes, stream)); @@ -5114,32 +5038,32 @@ namespace { } } -#if defined(__NVCC__) - ErrResult::ErrResult(CUresult err) +#if !defined(__NVCC__) + ErrResult::ErrResult(hsa_status_t err) { - if (err == CUDA_SUCCESS) { + if (err == HSA_STATUS_SUCCESS) { this->errType = ERR_NONE; this->errMsg = ""; } else { - const char *errString = NULL, *errName = NULL; - cuGetErrorName(err, &errName); - cuGetErrorString(err, &errString); + const char *errString = NULL; + hsa_status_string(err, &errString); this->errType = ERR_FATAL; - this->errMsg = std::string("CUDA Driver Error: ") + errName - + " (" + errString + ")"; + this->errMsg = std::string("HSA Error: ") + errString; } } -#else - ErrResult::ErrResult(hsa_status_t err) +#elif defined(CUMEM_ENABLED) + ErrResult::ErrResult(CUresult err) { - if (err == HSA_STATUS_SUCCESS) { + if (err == CUDA_SUCCESS) { this->errType = ERR_NONE; this->errMsg = ""; } else { - const char *errString = NULL; - hsa_status_string(err, &errString); + const char *errString = NULL, *errName = NULL; + cuGetErrorName(err, &errName); + cuGetErrorString(err, &errString); this->errType = ERR_FATAL; - this->errMsg = std::string("HSA Error: ") + errString; + this->errMsg = std::string("CUDA Driver Error: ") + errName + + " (" + errString + ")"; } } #endif @@ -5902,15 +5826,12 @@ namespace { } #ifdef NIC_EXEC_ENABLED -#if IBV_DIRECT - ibvLoaded = true; -#else - ibvLoaded = true; - ibvLibHandle = Ibvdl(); - if (ibvLibHandle == nullptr) { - Log("[WARN] Failed to open libibverbs.so.1"); - ibvLoaded = false; - } + TbIbvEnsureLoaded(); + ibvLoaded = TbIbvSymbolsReady(); +#if !defined(IBV_DIRECT) + if (!ibvLoaded) { + Log("[WARN] Failed to load libibverbs.so.1 or required symbols\n"); + } #endif #endif @@ -5946,14 +5867,13 @@ namespace { fclose(dumpCfgFile); } - if (ibvLibHandle) { - dlclose(ibvLibHandle); - ibvLibHandle = nullptr; - } +#ifdef NIC_EXEC_ENABLED + TbIbvUnload(); +#endif #ifdef AMD_SMI_ENABLED amdsmi_shut_down(); -#elif defined(__NVCC__) && defined(POD_COMM_ENABLED) +#elif defined(NVML_ENABLED) nvmlShutdown(); #endif }