Skip to content

Commit

Permalink
Add 1.11 support (#635)
Browse files Browse the repository at this point in the history
  • Loading branch information
pxl-th authored May 18, 2024
1 parent 451ca7d commit d7358ce
Show file tree
Hide file tree
Showing 11 changed files with 85 additions and 37 deletions.
41 changes: 41 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,47 @@ steps:
JULIA_AMDGPU_HIP_MUST_LOAD: "1"
JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"

- label: "Julia 1.11 typed pointers"
plugins:
- JuliaCI/julia#v1:
version: "1.11"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
agents:
queue: "juliagpu"
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
command: "julia --project -e 'using Pkg; Pkg.update()'"
timeout_in_minutes: 180
env:
JULIA_NUM_THREADS: 4
JULIA_AMDGPU_CORE_MUST_LOAD: "1"
JULIA_AMDGPU_HIP_MUST_LOAD: "1"
JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"

- label: "Julia 1.11 opaque pointers"
plugins:
- JuliaCI/julia#v1:
version: "1.11"
- JuliaCI/julia-test#v1:
- JuliaCI/julia-coverage#v1:
codecov: true
agents:
queue: "juliagpu"
rocm: "*"
rocmgpu: "*"
if: build.message !~ /\[skip tests\]/
command: "julia --project -e 'using Pkg; Pkg.update()'"
timeout_in_minutes: 180
env:
JULIA_LLVM_ARGS: "-opaque-pointers"
JULIA_NUM_THREADS: 4
JULIA_AMDGPU_CORE_MUST_LOAD: "1"
JULIA_AMDGPU_HIP_MUST_LOAD: "1"
JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"

- label: "GPU-less environment"
plugins:
- JuliaCI/julia#v1:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@

docs/build
docs/Manifest.toml
Manifest.toml
Manifest*.toml
LocalPreferences.toml
1 change: 0 additions & 1 deletion src/AMDGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ import .Runtime: Mem, ROCDim, ROCDim3

include("memory.jl")

const ci_cache = GPUCompiler.CodeCache()
Base.Experimental.@MethodTable(method_table)

# Device sources must load _before_ the compiler infrastructure,
Expand Down
2 changes: 0 additions & 2 deletions src/compiler/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ end

GPUCompiler.runtime_module(@nospecialize(::HIPCompilerJob)) = AMDGPU

GPUCompiler.ci_cache(@nospecialize(::HIPCompilerJob)) = AMDGPU.ci_cache

GPUCompiler.method_table(@nospecialize(::HIPCompilerJob)) = AMDGPU.method_table

GPUCompiler.kernel_state_type(@nospecialize(::HIPCompilerJob)) = AMDGPU.KernelState
Expand Down
13 changes: 6 additions & 7 deletions src/compiler/zeroinit_lds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ function zeroinit_lds!(mod::LLVM.Module, entry::LLVM.Function)
to_init = []
for gbl in LLVM.globals(mod)
if startswith(LLVM.name(gbl), "__zeroinit")
as = LLVM.addrspace(value_type(gbl))
vt = value_type(gbl)
as = LLVM.addrspace(vt)
if as == AMDGPU.Device.AS.Local
push!(to_init, gbl)
sz = llvmsize(global_value_type(gbl))
push!(to_init, (gbl, sz))
end
end
end
Expand All @@ -37,13 +39,10 @@ function zeroinit_lds!(mod::LLVM.Module, entry::LLVM.Function)
position!(builder, instruction)

# Use memset to clear all values to 0.
for gbl in to_init
sz = llvmsize(eltype(value_type(gbl)))
for (gbl, sz) in to_init
sz == 0 && continue

LLVM.memset!(builder, gbl,
ConstantInt(UInt8(0)), ConstantInt(sz),
LLVM.alignment(gbl))
ConstantInt(UInt8(0)), ConstantInt(sz), LLVM.alignment(gbl))
end

# Synchronize the workgroup to prevent races.
Expand Down
9 changes: 5 additions & 4 deletions src/device/gcn/assertion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ macro rocassert(ex, msgs...)
msg = :(Main.Base.string($(Expr(:quote,msg))))
end

return :($(esc(ex)) ? $(nothing)
: rocassert_fail($(Val(Symbol(msg))),
$(Val(__source__.file)),
$(Val(__source__.line))))
return :($(esc(ex)) ? $(nothing) :
rocassert_fail(
$(Val(Symbol(msg))),
$(Val(__source__.file)),
$(Val(__source__.line))))
end

assert_counter = 0
Expand Down
8 changes: 6 additions & 2 deletions src/device/gcn/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ for jltype in (Float64, Float32, Float16)
type_suffix = fntypes[jltype]

for (mod, intrinsic) in DEFINED_UNARY_INTRNISICS
# sin(Float16) is broken, we override it manually at the very bottom.
# sin(::Float16) is broken, we override it manually at the very bottom.
jltype == Float16 && intrinsic == :sin && continue
# log(::Float16) is broken, we override it manually at the very bottom.
# https://github.com/ROCm/llvm-project/blob/592734c97a3ddcb7ca4009ac94550595a52450ce/amd/device-libs/ocml/src/logH.cl#L15
jltype == Float16 && intrinsic == :log && continue

fname = "extern __ocml_$(intrinsic)_$(type_suffix)"
if isnothing(mod)
Expand Down Expand Up @@ -132,4 +135,5 @@ end
end

# sin(Float16) is broken, so cast to Float32, see #177.
@device_override Base.sin(x::Float16) = sin(Float32(x))
@device_override Base.sin(x::Float16) = Float16(sin(Float32(x)))
@device_override Base.log(x::Float16) = Float16(log(Float32(x)))
3 changes: 1 addition & 2 deletions src/device/gcn/memory_static.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

ptr_with_as = gep!(builder, gv_typ, gv,
[ConstantInt(0), ConstantInt(0)])
ptr_with_as = gep!(builder, gv_typ, gv, [ConstantInt(0), ConstantInt(0)])
ptr = bitcast!(builder, ptr_with_as, T_ptr_i8)
ret!(builder, ptr)
end
Expand Down
33 changes: 17 additions & 16 deletions src/device/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,18 @@ struct Philox2x32{R} <: RandomNumbers.AbstractRNG{UInt64}
@inline function Philox2x32{R}() where R
rng = new{R}()
if rng.key == 0
# initialize the key. this happens when first accessing the (0-initialized)
# shared memory key from each block. if we ever want to make the device seed
# controlable from the host, this would be the place to read a global seed.
#
# note however that it is undefined how shared memory persists across e.g.
# launches, so we may not be able to rely on the zero initalization then.
rng.key = Random.make_seed()
# initialize the key. this happens when first accessing
# the (0-initialized) shared memory key from each block.
@static if VERSION >= v"1.11-"
Random.seed!(rng, nothing)
else
rng.key = Random.make_seed()
end
end
return rng
end
end

if VERSION >= v"1.11-"
# `Random.seed!(::AbstractRNG)` now passes a `nothing` seed value
Random.seed!(rng::Philox2x32, seed::Nothing) =
Random.seed!(rng, Base.unsafe_trunc(UInt32, readcyclecounter()))
else
# ... where it used to call `Random_make_seed()`
@device_override Random.make_seed() = Base.unsafe_trunc(UInt32, readcyclecounter())
end

# default to 7 rounds; enough to pass SmallCrush
@inline Philox2x32() = Philox2x32{7}()

Expand Down Expand Up @@ -135,6 +126,7 @@ function Random.seed!(rng::Philox2x32, seed::Integer, counter::Integer=0)
rng.ctr1 = counter
return
end

# seeding the implicit default RNG
if VERSION >= v"1.11-"
@device_override Random.seed!(seed) =
Expand All @@ -144,6 +136,15 @@ else
Random.seed!(Random.default_rng(), seed)
end

if VERSION >= v"1.11-"
# `Random.seed!(::AbstractRNG)` now passes a `nothing` seed value
Random.seed!(rng::Philox2x32, seed::Nothing) =
Random.seed!(rng, Base.unsafe_trunc(UInt32, readcyclecounter()))
else
# ... where it used to call `Random_make_seed()`
@device_override Random.make_seed() = Base.unsafe_trunc(UInt32, readcyclecounter())
end

"""
Random.rand(rng::Philox2x32, UInt32)
Expand Down
5 changes: 4 additions & 1 deletion src/discovery/discovery.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,10 @@ function __init__()
else
# Detect HIP version, which will influence what device libraries to use.
hip_version = Base.thisminor(_hip_runtime_version())
hip_version > v"5.4" ? true : use_artifacts()
# Check if opaque pointers are enabled and turn off artifacts.
llvm_args = get(ENV, "JULIA_LLVM_ARGS", "")
enabled_opaque_pointers = occursin("-opaque-pointers", llvm_args)
(hip_version > v"5.4" && !enabled_opaque_pointers) ? true : use_artifacts()
end
# If ROCm 5.5+ - use artifact device libraries.
global libdevice_libs = get_device_libs(from_artifact; rocm_path)
Expand Down
5 changes: 4 additions & 1 deletion src/kernels/indexing.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
Base.to_index(::ROCArray, I::AbstractArray{Bool}) = findall(I)

# TODO Julia 1.11 specifics
if VERSION >= v"1.11.0-DEV.1157"
Base.to_indices(x::ROCArray, I::Tuple{AbstractArray{Bool}}) =
(Base.to_index(x, I[1]),)
end

function Base.findall(bools::AnyROCArray{Bool})
I = keytype(bools)
Expand Down

0 comments on commit d7358ce

Please sign in to comment.