diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index 1b8da4acb4..9b74ec67e2 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -251,83 +251,6 @@ ur_result_t urCommandBufferAppendUSMFillExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t urCommandBufferAppendMemBufferCopyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, - ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t urCommandBufferAppendMemBufferWriteExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - size_t offset, size_t size, const void *pSrc, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t urCommandBufferAppendMemBufferReadExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t urCommandBufferAppendMemBufferCopyRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, - ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, - size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t urCommandBufferAppendMemBufferWriteRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t urCommandBufferAppendMemBufferReadRectExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, - ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, - ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, - size_t hostRowPitch, size_t hostSlicePitch, void *pDst, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, size_t offset, size_t size, diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp index 9ccdc1f1eb..d101fb4be8 100644 --- a/source/adapters/level_zero/v2/command_buffer.cpp +++ b/source/adapters/level_zero/v2/command_buffer.cpp @@ -165,6 +165,187 @@ ur_result_t urCommandBufferAppendUSMMemcpyExp( return exceptionToResult(std::current_exception()); } +ur_result_t urCommandBufferAppendMemBufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + // Responsibility of UMD to offload to copy engine + UR_CALL(hCommandBuffer->commandListManager.appendMemBufferCopy( + hSrcMem, hDstMem, srcOffset, dstOffset, size, 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t urCommandBufferAppendMemBufferWriteExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, const void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + // Responsibility of UMD to offload to copy engine + UR_CALL(hCommandBuffer->commandListManager.appendMemBufferWrite( + hBuffer, false, offset, size, pSrc, 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t urCommandBufferAppendMemBufferReadExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + + // Responsibility of UMD to offload to copy engine + UR_CALL(hCommandBuffer->commandListManager.appendMemBufferRead( + hBuffer, false, offset, size, pDst, 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + // Responsibility of UMD to offload to copy engine + UR_CALL(hCommandBuffer->commandListManager.appendMemBufferCopyRect( + hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, + srcSlicePitch, dstRowPitch, dstSlicePitch, 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + + // Responsibility of UMD to offload to copy engine + UR_CALL(hCommandBuffer->commandListManager.appendMemBufferWriteRect( + hBuffer, false, bufferOffset, hostOffset, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, 0, nullptr, + nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t urCommandBufferAppendMemBufferReadRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + + // Responsibility of UMD to offload to copy engine + UR_CALL(hCommandBuffer->commandListManager.appendMemBufferReadRect( + hBuffer, false, bufferOffset, hostOffset, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, 0, nullptr, + nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + ur_result_t urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, diff --git a/source/adapters/level_zero/v2/command_list_manager.cpp b/source/adapters/level_zero/v2/command_list_manager.cpp index 80daf8f408..d50afbe083 100644 --- a/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/source/adapters/level_zero/v2/command_list_manager.cpp @@ -10,6 +10,7 @@ #include "command_list_manager.hpp" #include "../helpers/kernel_helpers.hpp" +#include "../helpers/memory_helpers.hpp" #include "../ur_interface_loader.hpp" #include "context.hpp" #include "kernel.hpp" @@ -30,6 +31,88 @@ ur_command_list_manager::~ur_command_list_manager() { ur::level_zero::urDeviceRelease(device); } +ur_result_t ur_command_list_manager::appendGenericCopyUnlocked( + ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, + size_t dstOffset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { + auto zeSignalEvent = getSignalEvent(phEvent, commandType); + + auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); + + auto pSrc = ur_cast(src->getDevicePtr( + device, ur_mem_handle_t_::device_access_mode_t::read_only, srcOffset, + size, [&](void *src, void *dst, size_t size) { + ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), dst, src, size, nullptr, + waitListView.num, waitListView.handles)); + waitListView.clear(); + })); + + auto pDst = ur_cast(dst->getDevicePtr( + device, ur_mem_handle_t_::device_access_mode_t::write_only, dstOffset, + size, [&](void *src, void *dst, size_t size) { + ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), dst, src, size, nullptr, + waitListView.num, waitListView.handles)); + waitListView.clear(); + })); + + ZE2UR_CALL(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), pDst, pSrc, size, zeSignalEvent, + waitListView.num, waitListView.handles)); + + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, (zeCommandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t ur_command_list_manager::appendRegionCopyUnlocked( + ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { + auto zeParams = ur2zeRegionParams(srcOrigin, dstOrigin, region, srcRowPitch, + dstRowPitch, srcSlicePitch, dstSlicePitch); + + auto zeSignalEvent = getSignalEvent(phEvent, commandType); + + auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); + + auto pSrc = ur_cast(src->getDevicePtr( + device, ur_mem_handle_t_::device_access_mode_t::read_only, 0, + src->getSize(), [&](void *src, void *dst, size_t size) { + ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), dst, src, size, nullptr, + waitListView.num, waitListView.handles)); + waitListView.clear(); + })); + auto pDst = ur_cast(dst->getDevicePtr( + device, ur_mem_handle_t_::device_access_mode_t::write_only, 0, + dst->getSize(), [&](void *src, void *dst, size_t size) { + ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), dst, src, size, nullptr, + waitListView.num, waitListView.handles)); + waitListView.clear(); + })); + + ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, + (zeCommandList.get(), pDst, &zeParams.dstRegion, zeParams.dstPitch, + zeParams.dstSlicePitch, pSrc, &zeParams.srcRegion, + zeParams.srcPitch, zeParams.srcSlicePitch, zeSignalEvent, + waitListView.num, waitListView.handles)); + + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, (zeCommandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; +} + wait_list_view ur_command_list_manager::getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) { @@ -126,6 +209,139 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy( return UR_RESULT_SUCCESS; } +ur_result_t ur_command_list_manager::appendMemBufferRead( + ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, + void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferRead"); + + UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE); + + ur_usm_handle_t_ dstHandle(context, size, pDst); + + std::scoped_lock lock(this->Mutex, + hBuffer->getMutex()); + + return appendGenericCopyUnlocked(hBuffer, &dstHandle, blockingRead, offset, 0, + size, numEventsInWaitList, phEventWaitList, + phEvent, UR_COMMAND_MEM_BUFFER_READ); +} + +ur_result_t ur_command_list_manager::appendMemBufferWrite( + ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, + const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferWrite"); + + UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE); + + ur_usm_handle_t_ srcHandle(context, size, pSrc); + + std::scoped_lock lock(this->Mutex, + hBuffer->getMutex()); + + return appendGenericCopyUnlocked( + &srcHandle, hBuffer, blockingWrite, 0, offset, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE); +} + +ur_result_t ur_command_list_manager::appendMemBufferCopy( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferCopy"); + + UR_ASSERT(srcOffset + size <= hBufferSrc->getSize(), + UR_RESULT_ERROR_INVALID_SIZE); + UR_ASSERT(dstOffset + size <= hBufferDst->getSize(), + UR_RESULT_ERROR_INVALID_SIZE); + + std::scoped_lock lock( + this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex()); + + return appendGenericCopyUnlocked(hBufferSrc, hBufferDst, false, srcOffset, + dstOffset, size, numEventsInWaitList, + phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_COPY); +} + +ur_result_t ur_command_list_manager::appendMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, + size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, + void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferReadRect"); + + ur_usm_handle_t_ dstHandle(context, 0, pDst); + + std::scoped_lock lock(this->Mutex, + hBuffer->getMutex()); + + return appendRegionCopyUnlocked( + hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region, + bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_READ_RECT); +} + +ur_result_t ur_command_list_manager::appendMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, + size_t bufferSlicePitch, size_t hostRowPitch, size_t hostSlicePitch, + void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferWriteRect"); + + ur_usm_handle_t_ srcHandle(context, 0, pSrc); + + std::scoped_lock lock(this->Mutex, + hBuffer->getMutex()); + + return appendRegionCopyUnlocked( + &srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region, + hostRowPitch, hostSlicePitch, bufferRowPitch, bufferSlicePitch, + numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_WRITE_RECT); +} + +ur_result_t ur_command_list_manager::appendMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferCopyRect"); + + std::scoped_lock lock( + this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex()); + + return appendRegionCopyUnlocked( + hBufferSrc, hBufferDst, false, srcOrigin, dstOrigin, region, srcRowPitch, + srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT); +} + +ur_result_t ur_command_list_manager::appendUSMMemcpy2D( + bool blocking, void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy2D"); + + ur_rect_offset_t zeroOffset{0, 0, 0}; + ur_rect_region_t region{width, height, 0}; + + std::scoped_lock lock(this->Mutex); + + ur_usm_handle_t_ srcHandle(context, 0, pSrc); + ur_usm_handle_t_ dstHandle(context, 0, pDst); + + return appendRegionCopyUnlocked(&srcHandle, &dstHandle, blocking, zeroOffset, + zeroOffset, region, srcPitch, 0, dstPitch, 0, + numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_COPY_RECT); +} + ze_command_list_handle_t ur_command_list_manager::getZeCommandList() { return zeCommandList.get(); } diff --git a/source/adapters/level_zero/v2/command_list_manager.hpp b/source/adapters/level_zero/v2/command_list_manager.hpp index 975a3a792c..645e35fa03 100644 --- a/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/source/adapters/level_zero/v2/command_list_manager.hpp @@ -51,6 +51,52 @@ struct ur_command_list_manager : public _ur_object { size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + ur_result_t appendMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + + ur_result_t appendMemBufferWrite(ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + + ur_result_t appendMemBufferCopy(ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + + ur_result_t appendMemBufferReadRect( + ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, + ur_rect_offset_t hostOrigin, ur_rect_region_t region, + size_t bufferRowPitch, size_t bufferSlicePitch, size_t hostRowPitch, + size_t hostSlicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + + ur_result_t appendMemBufferWriteRect( + ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + + ur_result_t appendMemBufferCopyRect( + ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + + ur_result_t appendUSMMemcpy2D(bool blocking, void *pDst, size_t dstPitch, + const void *pSrc, size_t srcPitch, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ze_command_list_handle_t getZeCommandList(); @@ -60,6 +106,19 @@ struct ur_command_list_manager : public _ur_object { ur_command_t commandType); private: + ur_result_t appendGenericCopyUnlocked( + ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, + size_t dstOffset, size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); + + ur_result_t appendRegionCopyUnlocked( + ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, + ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, + ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, + size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); // UR context associated with this command-buffer ur_context_handle_t context; // Device associated with this command-buffer diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 80151fa416..9e7d2030c4 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -16,7 +16,6 @@ #include "../common/latency_tracker.hpp" #include "../helpers/kernel_helpers.hpp" -#include "../helpers/memory_helpers.hpp" #include "../program.hpp" #include "../ur_interface_loader.hpp" @@ -284,63 +283,15 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierExt( phEvent); } -ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( - ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, - size_t dstOffset, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, - ur_command_t commandType) { - auto zeSignalEvent = getSignalEvent(phEvent, commandType); - - auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); - - auto pSrc = ur_cast(src->getDevicePtr( - hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, srcOffset, - size, [&](void *src, void *dst, size_t size) { - ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (commandListManager.getZeCommandList(), dst, src, - size, nullptr, waitListView.num, - waitListView.handles)); - waitListView.clear(); - })); - - auto pDst = ur_cast(dst->getDevicePtr( - hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, dstOffset, - size, [&](void *src, void *dst, size_t size) { - ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (commandListManager.getZeCommandList(), dst, src, - size, nullptr, waitListView.num, - waitListView.handles)); - waitListView.clear(); - })); - - ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (commandListManager.getZeCommandList(), pDst, pSrc, size, - zeSignalEvent, waitListView.num, waitListView.handles)); - - if (blocking) { - ZE2UR_CALL(zeCommandListHostSynchronize, - (commandListManager.getZeCommandList(), UINT64_MAX)); - } - - return UR_RESULT_SUCCESS; -} - ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueMemBufferRead"); - - UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE); - - ur_usm_handle_t_ dstHandle(hContext, size, pDst); - - std::scoped_lock lock(this->Mutex, - hBuffer->getMutex()); - - return enqueueGenericCopyUnlocked( - hBuffer, &dstHandle, blockingRead, offset, 0, size, numEventsInWaitList, - phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ); + UR_CALL(commandListManager.appendMemBufferRead( + hBuffer, blockingRead, offset, size, pDst, numEventsInWaitList, + phEventWaitList, phEvent)); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( @@ -348,63 +299,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueMemBufferWrite"); - - UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE); - - ur_usm_handle_t_ srcHandle(hContext, size, pSrc); - - std::scoped_lock lock(this->Mutex, - hBuffer->getMutex()); - - return enqueueGenericCopyUnlocked( - &srcHandle, hBuffer, blockingWrite, 0, offset, size, numEventsInWaitList, - phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE); -} - -ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( - ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, - ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, - ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, - ur_command_t commandType) { - auto zeParams = ur2zeRegionParams(srcOrigin, dstOrigin, region, srcRowPitch, - dstRowPitch, srcSlicePitch, dstSlicePitch); - - auto zeSignalEvent = getSignalEvent(phEvent, commandType); - - auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList); - - auto pSrc = ur_cast(src->getDevicePtr( - hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, 0, - src->getSize(), [&](void *src, void *dst, size_t size) { - ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (commandListManager.getZeCommandList(), dst, src, - size, nullptr, waitListView.num, - waitListView.handles)); - waitListView.clear(); - })); - auto pDst = ur_cast(dst->getDevicePtr( - hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, 0, - dst->getSize(), [&](void *src, void *dst, size_t size) { - ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (commandListManager.getZeCommandList(), dst, src, - size, nullptr, waitListView.num, - waitListView.handles)); - waitListView.clear(); - })); - - ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (commandListManager.getZeCommandList(), pDst, &zeParams.dstRegion, - zeParams.dstPitch, zeParams.dstSlicePitch, pSrc, - &zeParams.srcRegion, zeParams.srcPitch, zeParams.srcSlicePitch, - zeSignalEvent, waitListView.num, waitListView.handles)); - - if (blocking) { - ZE2UR_CALL(zeCommandListHostSynchronize, - (commandListManager.getZeCommandList(), UINT64_MAX)); - } - + UR_CALL(commandListManager.appendMemBufferWrite( + hBuffer, blockingWrite, offset, size, pSrc, numEventsInWaitList, + phEventWaitList, phEvent)); return UR_RESULT_SUCCESS; } @@ -417,16 +314,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::enqueueMemBufferReadRect"); - ur_usm_handle_t_ dstHandle(hContext, 0, pDst); - - std::scoped_lock lock(this->Mutex, - hBuffer->getMutex()); + UR_CALL(commandListManager.appendMemBufferReadRect( + hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, + phEventWaitList, phEvent)); - return enqueueRegionCopyUnlocked( - hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region, - bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - numEventsInWaitList, phEventWaitList, phEvent, - UR_COMMAND_MEM_BUFFER_READ_RECT); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( @@ -438,16 +331,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect"); - ur_usm_handle_t_ srcHandle(hContext, 0, pSrc); - - std::scoped_lock lock(this->Mutex, - hBuffer->getMutex()); + UR_CALL(commandListManager.appendMemBufferWriteRect( + hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, + bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, + phEventWaitList, phEvent)); - return enqueueRegionCopyUnlocked( - &srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region, - hostRowPitch, hostSlicePitch, bufferRowPitch, bufferSlicePitch, - numEventsInWaitList, phEventWaitList, phEvent, - UR_COMMAND_MEM_BUFFER_WRITE_RECT); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( @@ -456,18 +345,10 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueMemBufferCopy"); - UR_ASSERT(srcOffset + size <= hBufferSrc->getSize(), - UR_RESULT_ERROR_INVALID_SIZE); - UR_ASSERT(dstOffset + size <= hBufferDst->getSize(), - UR_RESULT_ERROR_INVALID_SIZE); - - std::scoped_lock lock( - this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex()); - - return enqueueGenericCopyUnlocked(hBufferSrc, hBufferDst, false, srcOffset, - dstOffset, size, numEventsInWaitList, - phEventWaitList, phEvent, - UR_COMMAND_MEM_BUFFER_COPY); + UR_CALL(commandListManager.appendMemBufferCopy( + hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, + phEventWaitList, phEvent)); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( @@ -479,13 +360,11 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect"); - std::scoped_lock lock( - this->Mutex, hBufferSrc->getMutex(), hBufferDst->getMutex()); - - return enqueueRegionCopyUnlocked( - hBufferSrc, hBufferDst, false, srcOrigin, dstOrigin, region, srcRowPitch, + UR_CALL(commandListManager.appendMemBufferCopyRect( + hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, - phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT); + phEventWaitList, phEvent)); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( @@ -791,19 +670,10 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D( size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D"); - - ur_rect_offset_t zeroOffset{0, 0, 0}; - ur_rect_region_t region{width, height, 0}; - - std::scoped_lock lock(this->Mutex); - - ur_usm_handle_t_ srcHandle(hContext, 0, pSrc); - ur_usm_handle_t_ dstHandle(hContext, 0, pDst); - - return enqueueRegionCopyUnlocked(&srcHandle, &dstHandle, blocking, zeroOffset, - zeroOffset, region, srcPitch, 0, dstPitch, 0, - numEventsInWaitList, phEventWaitList, - phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT); + UR_CALL(commandListManager.appendUSMMemcpy2D( + blocking, pDst, dstPitch, pSrc, srcPitch, width, height, + numEventsInWaitList, phEventWaitList, phEvent)); + return UR_RESULT_SUCCESS; } static void *getGlobalPointerFromModule(ze_module_handle_t hModule, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 6137fae405..12fa056184 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -43,20 +43,6 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_t_ { void deferEventFree(ur_event_handle_t hEvent) override; - ur_result_t enqueueRegionCopyUnlocked( - ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, - ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, - ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, - ur_command_t commandType); - - ur_result_t enqueueGenericCopyUnlocked( - ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, - size_t dstOffset, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, - ur_command_t commandType); - ur_result_t enqueueGenericFillUnlocked( ur_mem_handle_t hBuffer, size_t offset, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList,