Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
519c9c3
Prepare ground for command_buffer in v2
Dec 18, 2024
f87741e
Enforce in order list usage, and add initialization and destruction t…
Dec 20, 2024
94ce521
Merge branch 'oneapi-src:main' into add-command-buffer-support
Xewar313 Dec 30, 2024
159ebc8
Add initial support of command buffers to adapter v2
Dec 30, 2024
6d0d8b3
Merge branch 'add-command-buffer-support' of github.com:Xewar313/unif…
Dec 30, 2024
bb90ee5
Update UR calls handling
Jan 8, 2025
84ef0df
Remove unnecessary comment
Jan 8, 2025
1716db3
Move not implemented command buffer commands to previous position
Jan 8, 2025
7da53d8
Fix most issues with code
Jan 10, 2025
895f5c6
Fix formatting and modify queue_api template
Jan 13, 2025
384326c
Move command buffer cleanup to destructor
Jan 13, 2025
a1dd428
Use cached command lists instead of created ones
Jan 13, 2025
d03e88e
update GitHub Cache action to 4.2.0
pbalcer Jan 16, 2025
4e3072a
Remove not needed function and change phrasing
Jan 20, 2025
cbfba58
Add initial implementation of command list manager
Jan 20, 2025
1de57ef
Use list manager instead of custom implementation in queue
Jan 20, 2025
de2f273
Optimalize imports
Jan 21, 2025
d979f6a
Remove not needed destructor
Jan 21, 2025
021c0e4
Merge branch 'main' into add-command-buffer-support
Jan 21, 2025
b1b0c60
Add barriers to the SignalCommandList that guarantee that resetting t…
Jan 20, 2025
545e577
Fix formatting
Jan 21, 2025
ea643b3
Revert "Fix formatting"
Jan 21, 2025
8b7b269
Move command list close to the command buffer
Jan 21, 2025
95f978c
Moved try outside function block
Jan 21, 2025
30f2f91
Move enqueue generic command list back to queue
Jan 21, 2025
c00d960
Share events and lists between queue and command list manager
Jan 21, 2025
eb6487d
[CUDA][HIP] Fix kernel arguments being overriden when added out of order
Jan 14, 2025
e3dcfc3
Rename Indices member variable to ArgPointers
Jan 21, 2025
06e7807
Use ze events instead of ur in getSignalEvent
Jan 21, 2025
9f53547
Remove not needed structs and reformat code
Jan 21, 2025
a451646
Fix PR comments
Jan 22, 2025
f98229f
Fix formatting
Jan 22, 2025
67f7162
Fix ze function calling
Jan 22, 2025
9de10cd
Rename variable ArgIndices to ArgPointers
Jan 22, 2025
eeff9f4
Enable creation of bindless images backed by host USM
Jan 8, 2025
f28f707
Fix access to some fields in command buffer v2
Jan 23, 2025
8ba4703
Merge branch 'main' into add-command-buffer-support
Jan 23, 2025
48d1890
Fix compilation
Jan 23, 2025
c685944
Merge pull request #2589 from Bensuo/fabio/fix_potential_race_condition
kbenzie Jan 23, 2025
b841691
Merge pull request #2559 from Bensuo/fix_kernel_arg_indices
kbenzie Jan 23, 2025
f71ef62
Fix passing struct object by value
kbenzie Jan 23, 2025
9a64274
remove benchmark output from markdown
EuphoricThinking Jan 23, 2025
04cb8f1
Merge pull request #2611 from EuphoricThinking/benchmark_no_markdown_…
pbalcer Jan 23, 2025
07001aa
fix parseDisjointPoolConfig and add tests
bratpiorka Jan 24, 2025
707bcde
Move urMemImageGetInfo success test from a switch to individual test.
martygrant Jan 13, 2025
f85255e
Merge pull request #2549 from martygrant/martin/memimage-info-unswitch
martygrant Jan 24, 2025
d18935c
Revert "Move urMemImageGetInfo success test from a switch to individu…
martygrant Jan 24, 2025
ff85879
Merge pull request #2572 from pbalcer/update-cache-action
pbalcer Jan 24, 2025
2f5f4bb
Merge pull request #2613 from oneapi-src/revert-2549-martin/memimage-…
martygrant Jan 24, 2025
e370a2b
Merge pull request #2609 from kbenzie/benie/fix-large-obj-noref-arg
kbenzie Jan 24, 2025
0bb6789
Merge pull request #2551 from przemektmalon/przemek/bindless-images-h…
kbenzie Jan 27, 2025
095e846
Merge pull request #2574 from bratpiorka/rrudnick_fix_usm_pool_config…
kbenzie Jan 27, 2025
241636f
Remove unnecessary OpenCL KNOWN_FAILURE from urKernelGetInfoTest.
Jan 27, 2025
9824163
Merge pull request #2625 from aarongreig/aaron/removeUnnecessaryCLSkip
Jan 28, 2025
43e7f2d
enqueueMemBufferMap bugfix
mateuszpn Jan 28, 2025
4a916cc
# This is a combination of 4 commits.
Dec 18, 2024
78f6fbd
parent 982416300132e778138f9d02bbbb2cde4e9f6249
mateuszpn Jan 28, 2025
2aff645
# This is a combination of 3 commits.
mateuszpn Jan 28, 2025
af06203
# This is a combination of 6 commits.
mateuszpn Jan 28, 2025
55055ae
rebase
mateuszpn Jan 28, 2025
17c2589
bug fix
mateuszpn Jan 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/benchmarks-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ jobs:

- name: Upload HTML report
if: ${{ always() && inputs.upload_report }}
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ur-repo/benchmark_results.html
key: benchmark-results-${{ matrix.adapter.str_name }}-${{ github.run_id }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:

- name: Download benchmark HTML
id: download-bench-html
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ur-repo/benchmark_results.html
key: benchmark-results-
Expand Down
1 change: 1 addition & 0 deletions Testing/Temporary/CTestCostData.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
---
21 changes: 21 additions & 0 deletions ft.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# add_test([=[enqueue-adapter_level_zero_v2]=] "/home/mateuszpn/pr2532/build/bin/test-enqueue" "--gtest_filter=*Level_Zero*")
# set_tests_properties([=[enqueue-adapter_level_zero_v2]=] PROPERTIES ENVIRONMENT
# "UR_ADAPTERS_FORCE_LOAD=\"/home/mateuszpn/pr2532/build/lib/libur_adapter_level_zero_v2.so.0.12.0\""
# "LABELS "conformance;adapter_level_zero_v2"
# WORKING_DIRECTORY "/home/mateuszpn/pr2532/build/test/conformance/enqueue" _BACKTRACE_TRIPLES "/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;22;add_test;/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;32;do_add_test;/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;67;add_test_adapter;/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;78;add_conformance_test;/home/mateuszpn/pr2532/test/conformance/enqueue/CMakeLists.txt;6;add_conformance_test_with_kernels_environment;/home/mateuszpn/pr2532/test/conformance/enqueue/CMakeLists.txt;0;")

# Set environment variable
export UR_ADAPTERS_FORCE_LOAD="/home/mateuszpn/pr2532/build/lib/libur_adapter_level_zero_v2.so.0.12.0"

# Set working directory
#cd /home/mateuszpn/pr2532/build/test/conformance/$1

# Run the test with the specified filter
#/home/mateuszpn/pr2532/build/bin/test-$1 --gtest_filter=*Level_Zero*

# Set working directory
cd /home/mateuszpn/pr2532/build/test/conformance/enqueue

# Run the test with the specified filter
#/home/mateuszpn/pr2532/build/bin/test-enqueue --gtest_filter=urEnqueueMemBufferMapTestWithParam.MapSignalEvent*Level_Zero*
/home/mateuszpn/pr2532/build/bin/test-enqueue --gtest_filter=*Level_Zero*
5 changes: 1 addition & 4 deletions scripts/benchmarks/output_markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def generate_markdown_details(results: list[Result]):

markdown_sections.append(f"""
<details>
<summary>Benchmark details - environment, command, output...</summary>
<summary>Benchmark details - environment, command...</summary>
""")

for res in results:
Expand All @@ -42,9 +42,6 @@ def generate_markdown_details(results: list[Result]):
#### Command:
{' '.join(res.command)}

#### Output:
{res.stdout}

</details>
""")
markdown_sections.append(f"""
Expand Down
2 changes: 2 additions & 0 deletions scripts/templates/queue_api.cpp.mako
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ from templates import helper as th
*
*/

// Do not edit. This file is auto generated from a template: scripts/templates/queue_api.cpp.mako

#include "queue_api.hpp"
#include "ur_util.hpp"

Expand Down
7 changes: 7 additions & 0 deletions scripts/templates/queue_api.hpp.mako
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ from templates import helper as th
*
*/

// Do not edit. This file is auto generated from a template: scripts/templates/queue_api.hpp.mako

#pragma once

#include <ur_api.h>
#include <ze_api.h>

struct ur_queue_handle_t_ {
virtual ~ur_queue_handle_t_();
Expand All @@ -32,4 +35,8 @@ struct ur_queue_handle_t_ {
%for obj in th.get_queue_related_functions(specs, n, tags):
virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0;
%endfor

virtual ur_result_t
enqueueCommandBuffer(ze_command_list_handle_t, ur_event_handle_t *,
uint32_t, const ur_event_handle_t *) = 0;
};
6 changes: 3 additions & 3 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
ThreadsPerBlock, BlocksPerGrid));

// Set node param structure with the kernel related data
auto &ArgIndices = hKernel->getArgIndices();
auto &ArgPointers = hKernel->getArgPointers();
CUDA_KERNEL_NODE_PARAMS NodeParams = {};
NodeParams.func = CuFunc;
NodeParams.gridDimX = BlocksPerGrid[0];
Expand All @@ -533,7 +533,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
NodeParams.blockDimY = ThreadsPerBlock[1];
NodeParams.blockDimZ = ThreadsPerBlock[2];
NodeParams.sharedMemBytes = LocalSize;
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
NodeParams.kernelParams = const_cast<void **>(ArgPointers.data());

// Create and add an new kernel node to the Cuda graph
UR_CHECK_ERROR(cuGraphAddKernelNode(&GraphNode, hCommandBuffer->CudaGraph,
Expand Down Expand Up @@ -1398,7 +1398,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
Params.blockDimZ = ThreadsPerBlock[2];
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
Params.kernelParams =
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
const_cast<void **>(KernelCommandHandle->Kernel->getArgPointers().data());

CUgraphNode Node = KernelCommandHandle->Node;
CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec;
Expand Down
8 changes: 4 additions & 4 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,11 +492,11 @@ enqueueKernelLaunch(ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel,
UR_CHECK_ERROR(RetImplEvent->start());
}

auto &ArgIndices = hKernel->getArgIndices();
auto &ArgPointers = hKernel->getArgPointers();
UR_CHECK_ERROR(cuLaunchKernel(
CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
CuStream, const_cast<void **>(ArgIndices.data()), nullptr));
CuStream, const_cast<void **>(ArgPointers.data()), nullptr));

if (phEvent) {
UR_CHECK_ERROR(RetImplEvent->record());
Expand Down Expand Up @@ -680,7 +680,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
UR_CHECK_ERROR(RetImplEvent->start());
}

auto &ArgIndices = hKernel->getArgIndices();
auto &ArgPointers = hKernel->getArgPointers();

CUlaunchConfig launch_config;
launch_config.gridDimX = BlocksPerGrid[0];
Expand All @@ -696,7 +696,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
launch_config.numAttrs = launch_attribute.size();

UR_CHECK_ERROR(cuLaunchKernelEx(&launch_config, CuFunc,
const_cast<void **>(ArgIndices.data()),
const_cast<void **>(ArgPointers.data()),
nullptr));

if (phEvent) {
Expand Down
6 changes: 4 additions & 2 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
image_res_desc.resType = CU_RESOURCE_TYPE_MIPMAPPED_ARRAY;
image_res_desc.res.mipmap.hMipmappedArray = (CUmipmappedArray)hImageMem;
}
} else if (mem_type == CU_MEMORYTYPE_DEVICE) {
// We have a USM pointer
} else if (mem_type == CU_MEMORYTYPE_DEVICE ||
mem_type == CU_MEMORYTYPE_HOST) {
// We have a USM pointer.
// Images may be created from device or host USM.
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
image_res_desc.resType = CU_RESOURCE_TYPE_LINEAR;
image_res_desc.res.linear.devPtr = (CUdeviceptr)hImageMem;
Expand Down
62 changes: 36 additions & 26 deletions source/adapters/cuda/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ struct ur_kernel_handle_t_ {
args_t Storage;
/// Aligned size of each parameter, including padding.
args_size_t ParamSizes;
/// Byte offset into /p Storage allocation for each parameter.
args_index_t Indices;
/// Byte offset into /p Storage allocation for each argument.
args_index_t ArgPointers;
/// Position in the Storage array where the next argument should added.
size_t InsertPos = 0;
/// Aligned size in bytes for each local memory parameter after padding has
/// been added. Zero if the argument at the index isn't a local memory
/// argument.
Expand All @@ -90,33 +92,43 @@ struct ur_kernel_handle_t_ {
std::uint32_t ImplicitOffsetArgs[3] = {0, 0, 0};

arguments() {
// Place the implicit offset index at the end of the indicies collection
Indices.emplace_back(&ImplicitOffsetArgs);
// Place the implicit offset index at the end of the ArgPointers
// collection.
ArgPointers.emplace_back(&ImplicitOffsetArgs);
}

/// Add an argument to the kernel.
/// If the argument existed before, it is replaced.
/// Otherwise, it is added.
/// Gaps are filled with empty arguments.
/// Implicit offset argument is kept at the back of the indices collection.
/// Implicit offset argument is kept at the back of the ArgPointers
/// collection.
void addArg(size_t Index, size_t Size, const void *Arg,
size_t LocalSize = 0) {
if (Index + 2 > Indices.size()) {
// Expand storage to accommodate this Index if needed.
if (Index + 2 > ArgPointers.size()) {
// Move implicit offset argument index with the end
Indices.resize(Index + 2, Indices.back());
ArgPointers.resize(Index + 2, ArgPointers.back());
// Ensure enough space for the new argument
ParamSizes.resize(Index + 1);
AlignedLocalMemSize.resize(Index + 1);
OriginalLocalMemSize.resize(Index + 1);
}
ParamSizes[Index] = Size;
// calculate the insertion point on the array
size_t InsertPos = std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + Index, 0);
// Update the stored value for the argument
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
AlignedLocalMemSize[Index] = LocalSize;

// Copy new argument to storage if it hasn't been added before.
if (ParamSizes[Index] == 0) {
ParamSizes[Index] = Size;
std::memcpy(&Storage[InsertPos], Arg, Size);
ArgPointers[Index] = &Storage[InsertPos];
AlignedLocalMemSize[Index] = LocalSize;
InsertPos += Size;
}
// Otherwise, update the existing argument.
else {
std::memcpy(ArgPointers[Index], Arg, Size);
AlignedLocalMemSize[Index] = LocalSize;
assert(Size == ParamSizes[Index]);
}
}

/// Returns the padded size and offset of a local memory argument.
Expand All @@ -128,7 +140,7 @@ struct ur_kernel_handle_t_ {
std::pair<size_t, size_t> calcAlignedLocalArgument(size_t Index,
size_t Size) {
// Store the unpadded size of the local argument
if (Index + 2 > Indices.size()) {
if (Index + 2 > ArgPointers.size()) {
AlignedLocalMemSize.resize(Index + 1);
OriginalLocalMemSize.resize(Index + 1);
}
Expand Down Expand Up @@ -158,10 +170,11 @@ struct ur_kernel_handle_t_ {
return std::make_pair(AlignedLocalSize, AlignedLocalOffset);
}

// Iterate over all existing local argument which follows StartIndex
// Iterate over each existing local argument which follows StartIndex
// index, update the offset and pointer into the kernel local memory.
void updateLocalArgOffset(size_t StartIndex) {
const size_t NumArgs = Indices.size() - 1; // Accounts for implicit arg
const size_t NumArgs =
ArgPointers.size() - 1; // Accounts for implicit arg
for (auto SuccIndex = StartIndex; SuccIndex < NumArgs; SuccIndex++) {
const size_t OriginalLocalSize = OriginalLocalMemSize[SuccIndex];
if (OriginalLocalSize == 0) {
Expand All @@ -177,10 +190,7 @@ struct ur_kernel_handle_t_ {
AlignedLocalMemSize[SuccIndex] = SuccAlignedLocalSize;

// Store new offset into local data
const size_t InsertPos =
std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + SuccIndex, size_t{0});
std::memcpy(&Storage[InsertPos], &SuccAlignedLocalOffset,
std::memcpy(ArgPointers[SuccIndex], &SuccAlignedLocalOffset,
sizeof(size_t));
}
}
Expand Down Expand Up @@ -228,7 +238,7 @@ struct ur_kernel_handle_t_ {
std::memcpy(ImplicitOffsetArgs, ImplicitOffset, Size);
}

const args_index_t &getIndices() const noexcept { return Indices; }
const args_index_t &getArgPointers() const noexcept { return ArgPointers; }

uint32_t getLocalSize() const {
return std::accumulate(std::begin(AlignedLocalMemSize),
Expand Down Expand Up @@ -299,7 +309,7 @@ struct ur_kernel_handle_t_ {
/// real one required by the kernel, since this cannot be queried from
/// the CUDA Driver API
uint32_t getNumArgs() const noexcept {
return static_cast<uint32_t>(Args.Indices.size() - 1);
return static_cast<uint32_t>(Args.ArgPointers.size() - 1);
}

void setKernelArg(int Index, size_t Size, const void *Arg) {
Expand All @@ -314,8 +324,8 @@ struct ur_kernel_handle_t_ {
return Args.setImplicitOffset(Size, ImplicitOffset);
}

const arguments::args_index_t &getArgIndices() const {
return Args.getIndices();
const arguments::args_index_t &getArgPointers() const {
return Args.getArgPointers();
}

void setWorkGroupMemory(size_t MemSize) { Args.setWorkGroupMemory(MemSize); }
Expand Down
6 changes: 3 additions & 3 deletions source/adapters/hip/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid));

// Set node param structure with the kernel related data
auto &ArgIndices = hKernel->getArgIndices();
auto &ArgPointers = hKernel->getArgPointers();
hipKernelNodeParams NodeParams;
NodeParams.func = HIPFunc;
NodeParams.gridDim.x = BlocksPerGrid[0];
Expand All @@ -388,7 +388,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
NodeParams.blockDim.y = ThreadsPerBlock[1];
NodeParams.blockDim.z = ThreadsPerBlock[2];
NodeParams.sharedMemBytes = LocalSize;
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
NodeParams.kernelParams = const_cast<void **>(ArgPointers.data());
NodeParams.extra = nullptr;

// Create and add an new kernel node to the HIP graph
Expand Down Expand Up @@ -1098,7 +1098,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
Params.blockDim.z = ThreadsPerBlock[2];
Params.sharedMemBytes = hCommand->Kernel->getLocalSize();
Params.kernelParams =
const_cast<void **>(hCommand->Kernel->getArgIndices().data());
const_cast<void **>(hCommand->Kernel->getArgPointers().data());

hipGraphNode_t Node = hCommand->Node;
hipGraphExec_t HipGraphExec = CommandBuffer->HIPGraphExec;
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/hip/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
}
}

auto ArgIndices = hKernel->getArgIndices();
auto ArgPointers = hKernel->getArgPointers();

// If migration of mem across buffer is needed, an event must be associated
// with this command, implicitly if phEvent is nullptr
Expand All @@ -322,7 +322,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
UR_CHECK_ERROR(hipModuleLaunchKernel(
HIPFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2],
hKernel->getLocalSize(), HIPStream, ArgIndices.data(), nullptr));
hKernel->getLocalSize(), HIPStream, ArgPointers.data(), nullptr));

if (phEvent) {
UR_CHECK_ERROR(RetImplEvent->record());
Expand Down
Loading