Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/AMDGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ export workitemIdx, workgroupIdx, workgroupDim, gridItemDim, gridGroupDim
export sync_workgroup, sync_workgroup_count, sync_workgroup_and, sync_workgroup_or

struct KernelState
exception_info::Ptr{Device.ExceptionInfo}
exception_info::Ptr{UInt64}
malloc_hc::Ptr{Cvoid}
free_hc::Ptr{Cvoid}
output_context::Ptr{Cvoid}
Expand Down
173 changes: 78 additions & 95 deletions src/device/exceptions.jl
Original file line number Diff line number Diff line change
@@ -1,114 +1,97 @@
"""
- `status::Int32`: whether exception has been thrown (0 - no, 1 - yes).
"""
struct ExceptionInfo
status::Int32
output_lock::Int32
# Exception reason codes — encoded in bits [7:0] of the packed exception UInt64
# 0 means no exception
module ExceptionCode
const NONE = UInt8(0)
const UNKNOWN = UInt8(1)
const BOUNDS_ERROR = UInt8(2)
const DOMAIN_ERROR = UInt8(3)
const OVERFLOW_ERROR = UInt8(4)
const INEXACT_ERROR = UInt8(5)
const ARGUMENT_ERROR = UInt8(6)
const DIVIDE_ERROR = UInt8(7)
const DIM_MISMATCH = UInt8(8)
end

thread::@NamedTuple{x::UInt32, y::UInt32, z::UInt32}
block::@NamedTuple{x::UInt32, y::UInt32, z::UInt32}
const EXCEPTION_REASON_STRINGS = Dict{UInt8, String}(
ExceptionCode.NONE => "No exception",
ExceptionCode.UNKNOWN => "Unknown exception",
ExceptionCode.BOUNDS_ERROR => "BoundsError: Out-of-bounds array access",
ExceptionCode.DOMAIN_ERROR => "DomainError",
ExceptionCode.OVERFLOW_ERROR => "OverflowError",
ExceptionCode.INEXACT_ERROR => "InexactError: Inexact conversion",
ExceptionCode.ARGUMENT_ERROR => "ArgumentError",
ExceptionCode.DIVIDE_ERROR => "DivideError: Integer division error",
ExceptionCode.DIM_MISMATCH => "DimensionMismatch",
)

reason::LLVMPtr{UInt8, AS.Global}
reason_length::Int64
# Packed exception format (UInt64):
# [63:48] workgroup_x (16 bits)
# [47:32] workgroup_y (16 bits)
# [31:16] workgroup_z (16 bits)
# [15:8] reserved
# [7:0] error code (non-zero = exception occurred)

ExceptionInfo() = new(
Int32(0), Int32(0),
(; x=UInt32(0), y=UInt32(0), z=UInt32(0)),
(; x=UInt32(0), y=UInt32(0), z=UInt32(0)),
LLVMPtr{UInt8, AS.Global}(), 0)
@inline function pack_exception(code::UInt8)
wg = workgroupIdx()
wg_x = UInt64(wg.x % UInt16) << 48
wg_y = UInt64(wg.y % UInt16) << 32
wg_z = UInt64(wg.z % UInt16) << 16
return wg_x | wg_y | wg_z | UInt64(code)
end

@inline function Base.getproperty(ei::Ptr{ExceptionInfo}, field::Symbol)
if field == :status
unsafe_load(convert(Ptr{Int32}, ei))
elseif field == :output_lock
unsafe_load(convert(Ptr{Int32}, ei + sizeof(Int32)))
elseif field == :output_lock_ptr
reinterpret(LLVMPtr{Int32, AS.Generic}, ei + sizeof(Int32))
elseif field == :thread
offset = 2 * sizeof(Int32)
unsafe_load(convert(Ptr{@NamedTuple{x::UInt32, y::UInt32, z::UInt32}}, ei + offset))
elseif field == :block
offset = 2 * sizeof(Int32) + sizeof(@NamedTuple{x::UInt32, y::UInt32, z::UInt32})
unsafe_load(convert(Ptr{@NamedTuple{x::UInt32, y::UInt32, z::UInt32}}, ei + offset))
elseif field == :reason
offset =
2 * sizeof(Int32) +
2 * sizeof(@NamedTuple{x::UInt32, y::UInt32, z::UInt32})
unsafe_load(convert(Ptr{LLVMPtr{UInt8, AS.Global}}, ei + offset))
elseif field == :reason_length
offset =
2 * sizeof(Int32) +
2 * sizeof(@NamedTuple{x::UInt32, y::UInt32, z::UInt32}) +
sizeof(LLVMPtr{UInt8, AS.Global})
unsafe_load(convert(Ptr{Int64}, ei + offset))
else
getfield(ei, field)
end
@inline function unpack_exception(packed::UInt64)
wg_x = UInt16((packed >> 48) & 0xFFFF)
wg_y = UInt16((packed >> 32) & 0xFFFF)
wg_z = UInt16((packed >> 16) & 0xFFFF)
code = UInt8(packed & 0xFF)
return (; wg_x, wg_y, wg_z, code)
end

@inline function Base.setproperty!(ei::Ptr{ExceptionInfo}, field::Symbol, value)
if field == :status
unsafe_store!(convert(Ptr{Int32}, ei), value)
elseif field == :output_lock
unsafe_store!(convert(Ptr{Int32}, ei + sizeof(Int32)), value)
elseif field == :thread
offset = 2 * sizeof(Int32)
unsafe_store!(convert(Ptr{@NamedTuple{x::UInt32, y::UInt32, z::UInt32}}, ei + offset), value)
elseif field == :block
offset = 2 * sizeof(Int32) + sizeof(@NamedTuple{x::UInt32, y::UInt32, z::UInt32})
unsafe_store!(convert(Ptr{@NamedTuple{x::UInt32, y::UInt32, z::UInt32}}, ei + offset), value)
elseif field == :reason
offset =
2 * sizeof(Int32) +
2 * sizeof(@NamedTuple{x::UInt32, y::UInt32, z::UInt32})
unsafe_store!(convert(Ptr{LLVMPtr{UInt8, AS.Global}}, ei + offset), value)
elseif field == :reason_length
offset =
2 * sizeof(Int32) +
2 * sizeof(@NamedTuple{x::UInt32, y::UInt32, z::UInt32}) +
sizeof(LLVMPtr{UInt8, AS.Global})
unsafe_store!(convert(Ptr{Int64}, ei + offset), value)
else
setfield!(ei, field, value)
end
end
# Legacy compat — ExceptionInfo is now just a UInt64
const ExceptionInfo = UInt64

function alloc_exception_info()
ei_ptr = Mem.HostBuffer(sizeof(ExceptionInfo), HIP.hipHostAllocDefault)
unsafe_store!(convert(Ptr{ExceptionInfo}, ei_ptr), ExceptionInfo())
ei_ptr = Mem.HostBuffer(sizeof(UInt64), HIP.hipHostAllocDefault)
unsafe_store!(convert(Ptr{UInt64}, ei_ptr), UInt64(0))
return ei_ptr
end

@inline function lock_output!(ei::Ptr{ExceptionInfo})
# if llvm_atomic_cas(ei.output_lock_ptr, zero(Int32), one(Int32)) == zero(Int32)
if llvm_atomic_cas(ei.output_lock_ptr, Int32(0x0), Int32(0x1)) == Int32(0x0)
# Take the lock & write thread info.
ei.thread = workitemIdx()
ei.block = workgroupIdx()
sync_workgroup()
return true
elseif (
ei.output_lock == Int32(0x1) &&
ei.thread == workitemIdx() &&
ei.block == workgroupIdx()
)
# Thread already has the lock.
return true
else
# Other thread has the lock.
return false
end
@inline function signal_exception!(ei::Ptr{UInt64}, code::UInt8)
packed = pack_exception(code)
# First writer wins via atomic CAS; losers are no-ops.
ei_llvm = reinterpret(LLVMPtr{UInt64, AS.Generic}, ei)
llvm_atomic_cas(ei_llvm, UInt64(0), packed)
endpgm()
return
end

macro gpu_throw(reason)
code = _reason_to_code(reason)
quote
ei = kernel_state().exception_info
if lock_output!(ei)
reason_ptr, reason_length = @strptr $reason
ei.reason = reason_ptr
ei.reason_length = reason_length
end
throw(nothing)
signal_exception!(ei, $code)
throw(nothing) # unreachable, but keeps Julia's type system happy
end
end

# Map reason strings to error codes at macro expansion time
function _reason_to_code(reason::String)
if startswith(reason, "BoundsError")
ExceptionCode.BOUNDS_ERROR
elseif startswith(reason, "DomainError")
ExceptionCode.DOMAIN_ERROR
elseif startswith(reason, "OverflowError")
ExceptionCode.OVERFLOW_ERROR
elseif startswith(reason, "InexactError")
ExceptionCode.INEXACT_ERROR
elseif startswith(reason, "ArgumentError")
ExceptionCode.ARGUMENT_ERROR
elseif startswith(reason, "DivideError")
ExceptionCode.DIVIDE_ERROR
elseif startswith(reason, "DimensionMismatch")
ExceptionCode.DIM_MISMATCH
else
ExceptionCode.UNKNOWN
end
end
_reason_to_code(reason) = ExceptionCode.UNKNOWN
5 changes: 1 addition & 4 deletions src/device/runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,7 @@ end

function signal_exception()
ei = kernel_state().exception_info
ei.status = Int32(0x1)
# Lock in case it was not locked before, to get workitem and workgroup info.
lock_output!(ei)
endpgm() # Without endpgm we'll get hardware exception.
signal_exception!(ei, ExceptionCode.UNKNOWN)
return
end

Expand Down
30 changes: 8 additions & 22 deletions src/exception_handler.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,30 @@
const GLOBAL_EXCEPTION_INFO = Dict{UInt, Mem.HostBuffer}()

# TODO RT_LOCK?
function exception_info(dev::HIPDevice)::Ptr{Device.ExceptionInfo}
function exception_info(dev::HIPDevice)::Ptr{UInt64}
ei = get!(
() -> Device.alloc_exception_info(),
GLOBAL_EXCEPTION_INFO, hash(dev))
return convert(Ptr{Device.ExceptionInfo}, Mem.device_ptr(ei))
return convert(Ptr{UInt64}, Mem.device_ptr(ei))
end

function has_exception(dev::HIPDevice)::Bool
return exception_info(dev).status != 0
return unsafe_load(exception_info(dev)) != UInt64(0)
end

function reset_exception_info!(dev::HIPDevice)
unsafe_store!(exception_info(dev), Device.ExceptionInfo())
unsafe_store!(exception_info(dev), UInt64(0))
return
end

function device_str_to_host(str_ptr, str_length)
str_length == 0 && return ""

buf = Vector{UInt8}(undef, str_length)
HSA.memory_copy(
convert(Ptr{Cvoid}, pointer(buf)),
reinterpret(Ptr{Cvoid}, str_ptr), str_length) |> Runtime.check
return String(buf)
end

function get_exception_info_string(dev::HIPDevice)
ei = exception_info(dev)
reason = device_str_to_host(ei.reason, ei.reason_length)

workitemIdx = ei.thread
workgroupIdx = ei.block
packed = unsafe_load(exception_info(dev))
info = Device.unpack_exception(packed)
reason = get(Device.EXCEPTION_REASON_STRINGS, info.code, "Unkown error code $(info.code)")

isempty(reason) && (reason = "Unknown reason";)
return """GPU Kernel Exception:
$reason
workitemIdx: $workitemIdx
workgroupIdx: $workgroupIdx"""
workgroupIdx: ($(info.wg_x), $(info.wg_y), $(info.wg_z))"""
end

function throw_if_exception(dev::HIPDevice)
Expand Down
47 changes: 45 additions & 2 deletions test/device/exceptions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,52 @@ using StaticArrays
AMDGPU.synchronize()
catch err
@test err isa ErrorException
@test occursin("GPU Kernel Exception", err.msg)
end
# TODO check exception message
# TODO check specific exception type
end

@testset "Exception codegen" begin
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should start using FileCheck.jl for these.

# Kernel with multiple div() calls — each generates an error path
function div_kernel(X, a, b, c, d)
i = workitemIdx().x
x = div(a, b)
y = div(c, d)
X[i] = x + y
return
end

iob = IOBuffer()
AMDGPU.code_native(iob, div_kernel, Tuple{
Device.ROCDeviceArray{Int64, 1, 1},
Int64, Int64, Int64, Int64,
}; kernel=true)
asm = String(take!(iob))

# The new lightweight exception path should NOT generate flat_store_byte
# instructions for writing ExceptionInfo fields. Previously each div check
# inlined ~20 flat_store_byte for the 56-byte ExceptionInfo struct.
n_flat_store_byte = count("flat_store_byte", asm)
@test n_flat_store_byte == 0

# Should use global_atomic_cmpswap for the exception flag instead
@test occursin("global_atomic_cmpswap", asm) || occursin("flat_atomic_cmpswap", asm)

# Kernel with bounds-checked array access
function boundscheck_kernel(X, Y)
i = workitemIdx().x
X[i] = Y[i] + Y[i+1]
return
end

iob2 = IOBuffer()
AMDGPU.code_native(iob2, boundscheck_kernel, Tuple{
Device.ROCDeviceArray{Float64, 1, 1},
Device.ROCDeviceArray{Float64, 1, 1},
}; kernel=true)
asm2 = String(take!(iob2))

@test count("flat_store_byte", asm2) == 0
@test occursin("global_atomic_cmpswap", asm2) || occursin("flat_atomic_cmpswap", asm2)
end

if VERSION ≥ v"1.11-"
Expand Down