Skip to content

Commit

Permalink
Add support for USM.
Browse files Browse the repository at this point in the history
  • Loading branch information
VarLad authored and maleadt committed Jan 20, 2025
1 parent 4ec3ac3 commit 02735df
Show file tree
Hide file tree
Showing 28 changed files with 2,123 additions and 593 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Adapt = "4"
GPUArrays = "11.2.1"
GPUCompiler = "0.27, 1"
KernelAbstractions = "0.9.1"
KernelAbstractions = "0.9.2"
LLVM = "9.1"
LinearAlgebra = "1"
OpenCL_jll = "=2024.5.8"
Expand Down
7 changes: 5 additions & 2 deletions lib/cl/CL.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module cl

using Printf

include("api.jl")

# OpenCL wrapper objects are expected to have an `id` field containing a handle pointer
Expand All @@ -15,9 +17,10 @@ include("device.jl")
include("context.jl")
include("cmdqueue.jl")
include("event.jl")
include("memory.jl")
include("pointer.jl")
include("modified_fns.jl")
include("memory/memory.jl")
include("buffer.jl")
include("svm.jl")
include("program.jl")
include("kernel.jl")

Expand Down
73 changes: 73 additions & 0 deletions lib/cl/buffer.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,76 @@
# OpenCL Memory Object

abstract type AbstractMemory <: CLObject end

#This should be implemented by all subtypes
# type MemoryType <: AbstractMemory
# id::cl_mem
# ...
# end

# for passing buffers to OpenCL APIs: use the underlying handle
Base.unsafe_convert(::Type{cl_mem}, mem::AbstractMemory) = mem.id

# for passing buffers to kernels: keep the buffer, it's handled by `cl.set_arg!`
Base.unsafe_convert(::Type{<:Ptr}, mem::AbstractMemory) = mem

Base.sizeof(mem::AbstractMemory) = mem.size

context(mem::AbstractMemory) = mem.context

function Base.getproperty(mem::AbstractMemory, s::Symbol)
if s == :context
param = Ref{cl_context}()
clGetMemObjectInfo(mem, CL_MEM_CONTEXT, sizeof(cl_context), param, C_NULL)
return Context(param[], retain=true)
elseif s == :mem_type
result = Ref{cl_mem_object_type}()
clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(cl_mem_object_type), result, C_NULL)
return result[]
elseif s == :mem_flags
result = Ref{cl_mem_flags}()
clGetMemObjectInfo(mem, CL_MEM_FLAGS, sizeof(cl_mem_flags), result, C_NULL)
mf = result[]
flags = Symbol[]
if (mf & CL_MEM_READ_WRITE) != 0
push!(flags, :rw)
end
if (mf & CL_MEM_WRITE_ONLY) != 0
push!(flags, :w)
end
if (mf & CL_MEM_READ_ONLY) != 0
push!(flags, :r)
end
if (mf & CL_MEM_USE_HOST_PTR) != 0
push!(flags, :use)
end
if (mf & CL_MEM_ALLOC_HOST_PTR) != 0
push!(flags, :alloc)
end
if (mf & CL_MEM_COPY_HOST_PTR) != 0
push!(flags, :copy)
end
return tuple(flags...)
elseif s == :size
result = Ref{Csize_t}()
clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(Csize_t), result, C_NULL)
return result[]
elseif s == :reference_count
result = Ref{Cuint}()
clGetMemObjectInfo(mem, CL_MEM_REFERENCE_COUNT, sizeof(Cuint), result, C_NULL)
return Int(result[])
elseif s == :map_count
result = Ref{Cuint}()
clGetMemObjectInfo(mem, CL_MEM_MAP_COUNT, sizeof(Cuint), result, C_NULL)
return Int(result[])
else
return getfield(mem, s)
end
end

#TODO: enqueue_migrate_mem_objects(queue, mem_objects, flags=0, wait_for=None)
#TODO: enqueue_migrate_mem_objects_ext(queue, mem_objects, flags=0, wait_for=None)

# OpenCL.Buffer

mutable struct Buffer{T} <: AbstractMemory
Expand Down
88 changes: 74 additions & 14 deletions lib/cl/device.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,17 @@ end
end

if s == :max_image2d_shape
width = Ref{Csize_t}()
width = Ref{Csize_t}()
height = Ref{Csize_t}()
clGetDeviceInfo(d, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL)
clGetDeviceInfo(d, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL)
clGetDeviceInfo(d, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(Csize_t), height, C_NULL)
return (width[], height[])
end

if s == :max_image3d_shape
width = Ref{Csize_t}()
width = Ref{Csize_t}()
height = Ref{Csize_t}()
depth = Ref{Csize_t}()
depth = Ref{Csize_t}()
clGetDeviceInfo(d, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(Csize_t), width, C_NULL)
clGetDeviceInfo(d, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(Csize_t), height, C_NULL)
clGetDeviceInfo(d, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(Csize_t), depth, C_NULL)
Expand All @@ -164,42 +164,102 @@ function queue_properties(d::Device, type=:host)
result = Ref{cl_command_queue_properties}()
if type === :host
clGetDeviceInfo(d, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
sizeof(cl_command_queue_properties), result, C_NULL)
sizeof(cl_command_queue_properties), result, C_NULL)
elseif type === :device
clGetDeviceInfo(d, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
sizeof(cl_command_queue_properties), result, C_NULL)
sizeof(cl_command_queue_properties), result, C_NULL)
else
throw(ArgumentError("Unknown queue type: $type"))
end
mask = result[]

return (;
out_of_order_exec = mask & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE != 0,
profiling = mask & CL_QUEUE_PROFILING_ENABLE != 0
out_of_order_exec=mask & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE != 0,
profiling=mask & CL_QUEUE_PROFILING_ENABLE != 0
)
end

function exec_capabilities(d::Device)
result = Ref{cl_device_exec_capabilities}()
clGetDeviceInfo(d, CL_DEVICE_EXECUTION_CAPABILITIES,
sizeof(cl_device_exec_capabilities), result, C_NULL)
sizeof(cl_device_exec_capabilities), result, C_NULL)
mask = result[]

return (;
native_kernel = mask & CL_EXEC_NATIVE_KERNEL != 0,
native_kernel=mask & CL_EXEC_NATIVE_KERNEL != 0,
)
end

function usm_capabilities(d::Device)
available = try
result1 = Ref{cl_device_unified_shared_memory_capabilities_intel}()
clGetDeviceInfo(
d, CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL,
sizeof(cl_device_unified_shared_memory_capabilities_intel), result1, C_NULL
)

result2 = Ref{cl_device_unified_shared_memory_capabilities_intel}()
clGetDeviceInfo(
d, CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL,
sizeof(cl_device_unified_shared_memory_capabilities_intel), result2, C_NULL
)

result3 = Ref{cl_device_unified_shared_memory_capabilities_intel}()
clGetDeviceInfo(
d, CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
sizeof(cl_device_unified_shared_memory_capabilities_intel), result3, C_NULL
)

result4 = Ref{cl_device_unified_shared_memory_capabilities_intel}()
clGetDeviceInfo(
d, CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL,
sizeof(cl_device_unified_shared_memory_capabilities_intel), result4, C_NULL
)

result5 = Ref{cl_device_unified_shared_memory_capabilities_intel}()
clGetDeviceInfo(
d, CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL,
sizeof(cl_device_unified_shared_memory_capabilities_intel), result5, C_NULL
)

mask = (result1[], result2[], result3[], result4[], result5[])

function retmask(m)
return (;
usm_access=m & CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL != 0,
usm_atomic_access=m & CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL != 0,
usm_concurrent_access=m & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL != 0,
usm_concurrent_atomic_acces=m & CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL != 0,
)
end

(;
usm_host_capabilities=retmask(mask[1]),
usm_device_capabilities=retmask(mask[2]),
usm_single_device_capabilities=retmask(mask[3]),
usm_shared_capabilities=retmask(mask[4]),
usm_cross_device_capabilities=retmask(mask[5]),
)
catch e
nothing
end
return if isnothing(available)
@error("USM extension not available for device $(d.name)")
else
available
end
end

function svm_capabilities(d::Device)
result = Ref{cl_device_svm_capabilities}()
clGetDeviceInfo(d, CL_DEVICE_SVM_CAPABILITIES,
sizeof(cl_device_svm_capabilities), result, C_NULL)
sizeof(cl_device_svm_capabilities), result, C_NULL)
mask = result[]

return (;
coarse_grain_buffer = mask & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER != 0,
fine_grain_buffer = mask & CL_DEVICE_SVM_FINE_GRAIN_BUFFER != 0,
fine_grain_system = mask & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM != 0,
coarse_grain_buffer=mask & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER != 0,
fine_grain_buffer=mask & CL_DEVICE_SVM_FINE_GRAIN_BUFFER != 0,
fine_grain_system=mask & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM != 0,
)
end

Expand Down
Loading

0 comments on commit 02735df

Please sign in to comment.