diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index 88fde2cfac..1b8da4acb4 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -239,17 +239,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t urCommandBufferAppendUSMMemcpyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, - size_t size, uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, const void *pPattern, size_t patternSize, size_t size, diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp index c451d2cdac..2c92e54848 100644 --- a/source/adapters/level_zero/v2/command_buffer.cpp +++ b/source/adapters/level_zero/v2/command_buffer.cpp @@ -138,6 +138,33 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( return exceptionToResult(std::current_exception()); } +ur_result_t urCommandBufferAppendUSMMemcpyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + // Responsibility of UMD to offload to copy engine + UR_CALL(hCommandBuffer->commandListManager.appendUSMMemcpy( + false, pDst, pSrc, size, 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + ur_result_t urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, diff --git a/source/adapters/level_zero/v2/command_list_manager.cpp b/source/adapters/level_zero/v2/command_list_manager.cpp index 09be360631..b57c53344f 100644 --- a/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/source/adapters/level_zero/v2/command_list_manager.cpp @@ -102,6 +102,30 @@ ur_result_t ur_command_list_manager::appendKernelLaunch( return UR_RESULT_SUCCESS; } +ur_result_t ur_command_list_manager::appendUSMMemcpy( + bool blocking, void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy"); + + std::scoped_lock lock(this->Mutex); + + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); + + auto [pWaitEvents, numWaitEvents] = + getWaitListView(phEventWaitList, numEventsInWaitList); + + ZE2UR_CALL(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), pDst, pSrc, size, zeSignalEvent, + numWaitEvents, pWaitEvents)); + + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, (zeCommandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; +} + ze_command_list_handle_t ur_command_list_manager::getZeCommandList() { return zeCommandList.get(); } diff --git a/source/adapters/level_zero/v2/command_list_manager.hpp b/source/adapters/level_zero/v2/command_list_manager.hpp index 60de1363c7..975a3a792c 100644 --- a/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/source/adapters/level_zero/v2/command_list_manager.hpp @@ -47,6 +47,11 @@ struct ur_command_list_manager : public _ur_object { const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + ur_result_t appendUSMMemcpy(bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + ze_command_list_handle_t getZeCommandList(); wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 4d66607250..94f2f90b4c 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -698,21 +698,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( // TODO: parametrize latency tracking with 'blocking' TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy"); - std::scoped_lock lock(this->Mutex); - - auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); - - auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList); - - ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (commandListManager.getZeCommandList(), pDst, pSrc, size, - zeSignalEvent, numWaitEvents, pWaitEvents)); - - if (blocking) { - ZE2UR_CALL(zeCommandListHostSynchronize, - (commandListManager.getZeCommandList(), UINT64_MAX)); - } + UR_CALL(commandListManager.appendUSMMemcpy(blocking, pDst, pSrc, size, + numEventsInWaitList, + phEventWaitList, phEvent)); return UR_RESULT_SUCCESS; }