From 9c17a13c48f872865553061d119534aec44e1545 Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Fri, 3 Feb 2023 19:55:58 +0200 Subject: [PATCH 1/4] Add AMDGPU extension --- Project.toml | 9 +++++- docs/src/index.md | 4 ++- ext/AMDGPUExt/AMDGPUExt.jl | 61 ++++++++++++++++++++++++++++++++++++ ext/AMDGPUExt/activations.jl | 16 ++++++++++ ext/AMDGPUExt/conv.jl | 50 +++++++++++++++++++++++++++++ ext/AMDGPUExt/pool.jl | 43 +++++++++++++++++++++++++ ext/AMDGPUExt/softmax.jl | 11 +++++++ test/amd/activations.jl | 10 ++++++ test/amd/batched_mul.jl | 34 ++++++++++++++++++++ test/amd/batched_repr.jl | 43 +++++++++++++++++++++++++ test/amd/conv.jl | 9 ++++++ test/amd/pool.jl | 11 +++++++ test/amd/runtests.jl | 54 +++++++++++++++++++++++++++++++ test/amd/softmax.jl | 17 ++++++++++ test/amd/storage_type.jl | 13 ++++++++ test/runtests.jl | 13 ++++++++ 16 files changed, 396 insertions(+), 2 deletions(-) create mode 100644 ext/AMDGPUExt/AMDGPUExt.jl create mode 100644 ext/AMDGPUExt/activations.jl create mode 100644 ext/AMDGPUExt/conv.jl create mode 100644 ext/AMDGPUExt/pool.jl create mode 100644 ext/AMDGPUExt/softmax.jl create mode 100644 test/amd/activations.jl create mode 100644 test/amd/batched_mul.jl create mode 100644 test/amd/batched_repr.jl create mode 100644 test/amd/conv.jl create mode 100644 test/amd/pool.jl create mode 100644 test/amd/runtests.jl create mode 100644 test/amd/softmax.jl create mode 100644 test/amd/storage_type.jl diff --git a/Project.toml b/Project.toml index 753a0faf2..b24c66814 100644 --- a/Project.toml +++ b/Project.toml @@ -11,7 +11,14 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Requires = "ae029012-a4dd-5104-9daa-d747884805df" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +[weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" + +[extensions] +AMDGPUExt = "AMDGPU" + [compat] +AMDGPU = "0.4.5" Adapt = "2, 3.2" ChainRulesCore = "1.13" Requires = "0.5, 1.0" @@ -32,4 +39,4 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["ChainRulesTestUtils", "CUDA", "Documenter", "FiniteDifferences", "ForwardDiff", "Logging", "NNlibCUDA", "Random", "StableRNGs", "Test", "UnicodePlots", "Zygote"] +test = ["ChainRulesTestUtils", "AMDGPU", "CUDA", "Documenter", "FiniteDifferences", "ForwardDiff", "Logging", "NNlibCUDA", "Random", "StableRNGs", "Test", "UnicodePlots", "Zygote"] diff --git a/docs/src/index.md b/docs/src/index.md index 0eea8ddbb..63168abe7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,4 +4,6 @@ For use with automatic differentiation, this package defines gradients using [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl). These will be seen by various packages including [Zygote.jl](https://github.com/FluxML/Zygote.jl). -To use these functions with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) you will need [NNlibCUDA.jl](https://github.com/FluxML/NNlibCUDA.jl) as well. +To use these functions with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) +or [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) you will need to load them +and NNlib in the same Julia session. diff --git a/ext/AMDGPUExt/AMDGPUExt.jl b/ext/AMDGPUExt/AMDGPUExt.jl new file mode 100644 index 000000000..f141c6f8a --- /dev/null +++ b/ext/AMDGPUExt/AMDGPUExt.jl @@ -0,0 +1,61 @@ +module AMDGPUExt + +using Adapt +using AMDGPU +using AMDGPU.MIOpen +using ChainRulesCore +using NNlib +using NNlib: BatchedAdjoint, BatchedTranspose, BatchedAdjOrTrans +using NNlib: DenseConvDims, PoolDims + +const MIOPENFloat = Union{Float16, Float32} + +const ROCBatchedAdjoint{T} = BatchedAdjoint{T, <: ROCArray{T}} +const ROCBatchedTranspose{T} = BatchedTranspose{T, <: ROCArray{T}} +const ROCBatchedAdjOrTrans{T} = Union{ROCBatchedAdjoint{T}, ROCBatchedTranspose{T}} +const WrappedROCBatchedAdjOrTrans{T, N} = Adapt.WrappedArray{T, N, ROCBatchedAdjOrTrans{T}, ROCBatchedAdjOrTrans{T}} +const AnyROCBatchedAdjOrTrans = Union{ROCBatchedAdjOrTrans, WrappedROCBatchedAdjOrTrans} + +function Base.convert(::Type{T}, b::AnyROCBatchedAdjOrTrans) where {T <: Array} + Base.convert(T, adapt(Array, b)) +end + +function Base.Array{T, N}(b::AnyROCBatchedAdjOrTrans) where {T, N} + Array{T, N}(adapt(Array, b)) +end + +Base.collect(b::AnyROCBatchedAdjOrTrans) = collect(adapt(Array, b)) + +function Base.show( + io::IO, mime::MIME{Symbol("text/plain")}, x::AnyROCBatchedAdjOrTrans, +) + show(io, mime, adapt(Array, x)) +end + +Base.show(io::IO, x::AnyROCBatchedAdjOrTrans) = show(io, adapt(Array, x)) + +Base.display(x::AnyROCBatchedAdjOrTrans) = display(adapt(Array, x)) + +function NNlib._batched_gemm!( + ::Type{<: ROCArray}, transA::Char, transB::Char, α, A, B, β, C, +) + AMDGPU.rocBLAS.gemm_batched!(transA, transB, α, A, B, β, C) +end + +function nnlib_padding(dims) + pd = NNlib.padding(dims) + if !all(pd[1:2:end] .== pd[2:2:end]) + @warn """ + MIOpen does not support asymmetric padding, defaulting to symmetric choice: + $pd -> $(pd[1:2:end]). + """ maxlog=1 + end + pd[1:2:end] +end + +include("conv.jl") +include("pool.jl") +include("softmax.jl") +include("activations.jl") + +end diff --git a/ext/AMDGPUExt/activations.jl b/ext/AMDGPUExt/activations.jl new file mode 100644 index 000000000..1563bb45e --- /dev/null +++ b/ext/AMDGPUExt/activations.jl @@ -0,0 +1,16 @@ +for (f, op) in [ + NNlib.relu => MIOpen.relu, + NNlib.relu6 => x -> MIOpen.clippedrelu(x, 6), + NNlib.softplus => MIOpen.softrelu, + NNlib.σ => MIOpen.sigmoid, + Base.tanh => MIOpen.tanh, + # TODO define for leakyrelu, elu, etc.? +] + @eval function Base.materialize( + bc::Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{ROCArray{<:MIOPENFloat}}} + ) + return $op(bc.args[1]) + end +end + +Base.broadcasted(::typeof(identity), x::ROCArray{T}) where {T<:MIOPENFloat} = x diff --git a/ext/AMDGPUExt/conv.jl b/ext/AMDGPUExt/conv.jl new file mode 100644 index 000000000..b0cebff87 --- /dev/null +++ b/ext/AMDGPUExt/conv.jl @@ -0,0 +1,50 @@ +function NNlib.conv!( + y::ROCArray{T, N}, x::ROCArray{T, N}, w::ROCArray{T, N}, cdims::DenseConvDims, +) where {T <: MIOPENFloat, N} + NNlib.flipkernel(cdims) || throw(ArgumentError( + "MIOpen supports only cross-correlation as its convolution implementation.")) + + nd = max(0, 4 - N) + ncdims = NNlib.insert_singleton_spatial_dimension(cdims, nd) + MIOpen.convolution!( + NNlib.insert_singleton_spatial_dimension(y, nd), + NNlib.insert_singleton_spatial_dimension(x, nd), + NNlib.insert_singleton_spatial_dimension(w, nd); + padding=nnlib_padding(ncdims), stride=NNlib.stride(ncdims), + dilation=NNlib.dilation(ncdims), groups=NNlib.groupcount(ncdims)) + return y +end + +function NNlib.∇conv_data!( + dx::ROCArray{T, N}, dy::ROCArray{T, N}, w::ROCArray{T, N}, cdims::DenseConvDims, +) where {T <: MIOPENFloat, N} + NNlib.flipkernel(cdims) || throw(ArgumentError( + "MIOpen supports only cross-correlation as its convolution implementation.")) + + nd = max(0, 4 - N) + ncdims = NNlib.insert_singleton_spatial_dimension(cdims, nd) + MIOpen.∇convolution_data!( + NNlib.insert_singleton_spatial_dimension(dx, nd), + NNlib.insert_singleton_spatial_dimension(dy, nd), + NNlib.insert_singleton_spatial_dimension(w, nd); + padding=nnlib_padding(ncdims), stride=NNlib.stride(ncdims), + dilation=NNlib.dilation(ncdims), groups=NNlib.groupcount(ncdims)) + return dx +end + +function NNlib.∇conv_filter!( + dw::ROCArray{T, N}, x::ROCArray{T, N}, dy::ROCArray{T, N}, cdims::DenseConvDims, +) where {T <: MIOPENFloat, N} + NNlib.flipkernel(cdims) || throw(ArgumentError( + "MIOpen supports only cross-correlation as its convolution implementation.")) + + nd = max(0, 4 - N) + ncdims = NNlib.insert_singleton_spatial_dimension(cdims, nd) + MIOpen.∇convolution_weight!( + NNlib.insert_singleton_spatial_dimension(dw, nd), + NNlib.insert_singleton_spatial_dimension(dy, nd), + NNlib.insert_singleton_spatial_dimension(x, nd); + padding=nnlib_padding(ncdims), stride=NNlib.stride(ncdims), + dilation=NNlib.dilation(ncdims), groups=NNlib.groupcount(ncdims)) + return dw +end diff --git a/ext/AMDGPUExt/pool.jl b/ext/AMDGPUExt/pool.jl new file mode 100644 index 000000000..5549bab1c --- /dev/null +++ b/ext/AMDGPUExt/pool.jl @@ -0,0 +1,43 @@ +for poolname in (:maxpool, :meanpool) + @eval function NNlib.$(poolname)( + x::ROCArray{T, N}, pdims::PoolDims, + ) where {T <: MIOPENFloat, N} + y = similar(x, NNlib.output_size(pdims)..., NNlib.channels_out(pdims), size(x, N)) + nd = max(0, 4 - N) + npdims = NNlib.insert_singleton_spatial_dimension(pdims, nd) + MIOpen.$(Symbol("$(poolname)!"))( + NNlib.insert_singleton_spatial_dimension(y, nd), + NNlib.insert_singleton_spatial_dimension(x, nd); + dims=NNlib.kernel_size(npdims), padding=nnlib_padding(npdims), + stride=NNlib.stride(npdims), do_backward=false) + return y + end + + @eval function ChainRulesCore.rrule( + ::typeof(NNlib.$(poolname)), x::ROCArray{T, N}, pdims::PoolDims, + ) where {T <: MIOPENFloat, N} + y = similar(x, NNlib.output_size(pdims)..., NNlib.channels_out(pdims), size(x, N)) + nd = max(0, 4 - N) + npdims = NNlib.insert_singleton_spatial_dimension(pdims, nd) + + # `workspace` is used in the pullback. + _, workspace = MIOpen.$(Symbol("$(poolname)!"))( + NNlib.insert_singleton_spatial_dimension(y, nd), + NNlib.insert_singleton_spatial_dimension(x, nd); + dims=NNlib.kernel_size(npdims), padding=nnlib_padding(npdims), + stride=NNlib.stride(npdims)) + + function _pooling_pullback(Δ) + dx = similar(x) + MIOpen.$(Symbol("∇$(poolname)!"))( + NNlib.insert_singleton_spatial_dimension(dx, nd), + NNlib.insert_singleton_spatial_dimension(unthunk(Δ), nd), + NNlib.insert_singleton_spatial_dimension(y, nd), + NNlib.insert_singleton_spatial_dimension(x, nd); + dims=NNlib.kernel_size(npdims), padding=nnlib_padding(npdims), + stride=NNlib.stride(npdims), workspace) + return NoTangent(), dx, NoTangent() + end + y, _pooling_pullback + end +end diff --git a/ext/AMDGPUExt/softmax.jl b/ext/AMDGPUExt/softmax.jl new file mode 100644 index 000000000..de75f9748 --- /dev/null +++ b/ext/AMDGPUExt/softmax.jl @@ -0,0 +1,11 @@ +for fname in (:softmax, :logsoftmax) + @eval function NNlib.$(fname)(x::ROCArray{T}; dims = 1) where T <: MIOPENFloat + MIOpen.$(fname)(x; dims) + end + + @eval function NNlib.$(Symbol("∇$(fname)"))( + dy::ROCArray{T, N}, x::ROCArray{T, N}, y::ROCArray{T, N}; dims = 1, + ) where {T <: MIOPENFloat, N} + MIOpen.$(Symbol("∇$(fname)!"))(dy, y; dims) + end +end diff --git a/test/amd/activations.jl b/test/amd/activations.jl new file mode 100644 index 000000000..2abb0c272 --- /dev/null +++ b/test/amd/activations.jl @@ -0,0 +1,10 @@ +@testset "Compare CPU & GPU" begin + for (T, atol) in ((Float16, 1f-2), (Float32, 1f-5)) + x = randn(T, 16) + gputest(x -> NNlib.relu.(x), x; atol) + gputest(x -> NNlib.relu6.(x), x; atol) + gputest(x -> NNlib.softplus.(x), x; atol) + gputest(x -> tanh.(x), x; atol) + gputest(x -> identity.(x), x; atol) + end +end diff --git a/test/amd/batched_mul.jl b/test/amd/batched_mul.jl new file mode 100644 index 000000000..bc9dae899 --- /dev/null +++ b/test/amd/batched_mul.jl @@ -0,0 +1,34 @@ +@testset "batched_mul" begin + A = rand(Float32, 3, 3, 2) + B = rand(Float32, 3, 3, 2) + dA, dB = ROCArray.((A, B)) + + C = batched_mul(A, B) + @test ROCArray(C) ≈ batched_mul(dA, dB) + + Ct = batched_mul(batched_transpose(A), B) + @test ROCArray(Ct) ≈ batched_mul(batched_transpose(dA), dB) + + Ca = batched_mul(A, batched_adjoint(B)) + @test ROCArray(Ca) ≈ batched_mul(dA, batched_adjoint(dB)) + + # 5-arg batched_mul! + C .= pi + batched_mul!(C, A, B, 2f0, 3f0) + Cpi = ROCArray(similar(C)) .= pi + @test ROCArray(C) ≈ batched_mul!(Cpi, dA, dB, 2f0, 3f0) + + # PermutedDimsArray + @test ROCArray(Ct) ≈ batched_mul(PermutedDimsArray(dA, (2, 1, 3)), dB) + + # FIXME same but with (1, 3, 2) errors + D = permutedims(B, (2, 1, 3)) + Cp = batched_mul(batched_adjoint(A), B) + @test ROCArray(Cp) ≈ batched_mul( + batched_adjoint(dA), PermutedDimsArray(ROCArray(D), (2, 1, 3))) + + # Methods which reshape + M = randn(Float32, 3, 3) + Cm = batched_mul(A, M) + @test ROCArray(Cm) ≈ batched_mul(dA, ROCArray(M)) +end diff --git a/test/amd/batched_repr.jl b/test/amd/batched_repr.jl new file mode 100644 index 000000000..dfdbc558b --- /dev/null +++ b/test/amd/batched_repr.jl @@ -0,0 +1,43 @@ +function print_array_strs(x) + str = sprint((io, x)->show(io, MIME"text/plain"(), x), x) + return @view split(str, '\n')[2:end] +end + +@testset "BatchedAdjOrTrans" begin + x = rand(Float32, 3, 4, 2) + y = ROCArray(x) + + bax = batched_adjoint(x) + btx = batched_transpose(x) + bay = batched_adjoint(y) + bty = batched_transpose(y) + + @test sprint(show, bax) == sprint(show, bay) + @test sprint(show, btx) == sprint(show, bty) + + @test print_array_strs(bax) == print_array_strs(bay) + @test print_array_strs(btx) == print_array_strs(bty) + + @test Array(bax) == Array(bay) + @test collect(bax) == collect(bay) + @test Array(btx) == Array(bty) + @test collect(btx) == collect(bty) + + for shape in (:, (12, 2)) + rbax = reshape(bax, shape) + rbtx = reshape(btx, shape) + rbay = reshape(bay, shape) + rbty = reshape(bty, shape) + + @test sprint(show, rbax) == sprint(show, rbay) + @test sprint(show, rbtx) == sprint(show, rbty) + + @test print_array_strs(rbax) == print_array_strs(rbay) + @test print_array_strs(rbtx) == print_array_strs(rbty) + + @test Array(rbax) == Array(rbay) + @test collect(rbax) == collect(rbay) + @test Array(rbtx) == Array(rbty) + @test collect(rbtx) == collect(rbty) + end +end diff --git a/test/amd/conv.jl b/test/amd/conv.jl new file mode 100644 index 000000000..b6be3fd39 --- /dev/null +++ b/test/amd/conv.jl @@ -0,0 +1,9 @@ +@testset "Compare CPU & GPU" begin + channels, batch = 3, 2 + for T in (Float16, Float32), nd in (1, 2, 3) + x = rand(Float32, fill(4, nd)..., 3, 1) + w = rand(Float32, fill(2, nd)..., channels, 4) + cdims = DenseConvDims(x, w, flipkernel=true) + gputest((x, w) -> NNlib.conv(x, w, cdims), x, w; atol=1e-4) + end +end diff --git a/test/amd/pool.jl b/test/amd/pool.jl new file mode 100644 index 000000000..c32f67298 --- /dev/null +++ b/test/amd/pool.jl @@ -0,0 +1,11 @@ +@testset "Compare CPU & GPU" begin + channels, batch = 3, 2 + for T in (Float16, Float32), nd in (1, 2, 3) + x = rand(T, fill(8, nd)..., channels, batch) + pdims = PoolDims(x, 2) + # NOTE: Disable grad check for maxpool as *sometimes* + # it does not *completely* agree with CPU :/ + gputest(x -> NNlib.maxpool(x, pdims), x; checkgrad=false) + gputest(x -> NNlib.meanpool(x, pdims), x) + end +end diff --git a/test/amd/runtests.jl b/test/amd/runtests.jl new file mode 100644 index 000000000..fd15e6274 --- /dev/null +++ b/test/amd/runtests.jl @@ -0,0 +1,54 @@ +using NNlib: batched_adjoint, batched_mul, batched_mul!, batched_transpose +using NNlib: is_strided, storage_type +using LinearAlgebra + +AMDGPU.allowscalar(false) + +function gputest(f, xs...; checkgrad=true, atol=1e-6, kws...) + cpu_in = xs + gpu_in = ROCArray.(xs) + + cpu_out = f(cpu_in...; kws...) + gpu_out = f(gpu_in...; kws...) + @test collect(cpu_out) ≈ collect(gpu_out) + + if checkgrad + cpu_grad = gradient((x...) -> sum(f(x...; kws...)), cpu_in...) + gpu_grad = gradient((x...) -> sum(f(x...; kws...)), gpu_in...) + for (cpu_g, gpu_g) in zip(cpu_grad, gpu_grad) + if cpu_g === nothing + @test gpu_g === nothing + else + @test collect(cpu_g) ≈ collect(gpu_g) atol=atol + end + end + end +end + +@testset "Storage types" begin + include("storage_type.jl") +end + +@testset "Batched repr" begin + include("batched_repr.jl") +end + +@testset "Batched multiplication" begin + include("batched_mul.jl") +end + +@testset "Convolution" begin + include("conv.jl") +end + +@testset "Pooling" begin + include("pool.jl") +end + +@testset "Softmax" begin + include("softmax.jl") +end + +@testset "Activations" begin + include("activations.jl") +end diff --git a/test/amd/softmax.jl b/test/amd/softmax.jl new file mode 100644 index 000000000..cd8545223 --- /dev/null +++ b/test/amd/softmax.jl @@ -0,0 +1,17 @@ +@testset "Compare CPU & GPU" begin + for (T, atol) in ((Float16, 1f-2), (Float32, 1f-5)) + for (sz, dims) in [ + ((5,), :), ((5,), 1), + ((5, 5), :), ((5, 5), 1), ((5, 5), 2), + ((5, 5, 5, 5), (2, 3)), ((5, 5, 5, 5), (2, 4)), + ] + if T == Float16 + x = ones(T, sz) # Really low precision. + else + x = randn(T, sz) + end + gputest(NNlib.softmax, x; atol) + gputest(NNlib.logsoftmax, x; atol) + end + end +end diff --git a/test/amd/storage_type.jl b/test/amd/storage_type.jl new file mode 100644 index 000000000..d884ddd7f --- /dev/null +++ b/test/amd/storage_type.jl @@ -0,0 +1,13 @@ +@testset "NNlib storage type" begin + x = ROCArray(ones(Float32, 10, 10)) + @test storage_type(x) <: ROCArray{Float32, 2} + @test storage_type(reshape(view(x, 1:2:10,:), 10, :)) <: ROCArray{Float32, 2} + + @test is_strided(x) + @test is_strided(view(x, 1:2:5,:)) + @test is_strided(PermutedDimsArray(x, (2, 1))) + + @test !is_strided(reshape(view(x, 1:2:10, :), 10, :)) + @test !is_strided((x .+ im)') + @test !is_strided(Diagonal(ROCArray(ones(3)))) +end diff --git a/test/runtests.jl b/test/runtests.jl index 357b96f4a..e4f2f518a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,6 +26,19 @@ include("test_utils.jl") @info "Insufficient version or CUDA not found; Skipping CUDA tests" end + if get(ENV, "NNLIB_TEST_AMDGPU", "false") == "true" + using AMDGPU + if AMDGPU.functional() && AMDGPU.functional(:MIOpen) + @testset "AMDGPU" begin + include("amd/runtests.jl") + end + else + @info "AMDGPU.jl package is not functional. Skipping AMDGPU tests." + end + else + @info "Skipping AMDGPU tests, set NNLIB_TEST_CUDA=true to run them." + end + if VERSION < v"1.6" @info "skipping doctests, on Julia $VERSION" else From c68bc4bfed349c2cede19cd1651f605a89b420ed Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Mon, 6 Feb 2023 18:32:17 +0200 Subject: [PATCH 2/4] Add compat bounds, buildkite AMDGPU job & fix docs When running builkite job for the AMDGPU, we populate 'test' target with AMDGPU before running them. --- .buildkite/pipeline.yml | 29 +++++++++++++++++++++++++++++ Project.toml | 5 +++-- docs/src/index.md | 5 ++--- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index a047715b9..a06af0e80 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -55,6 +55,35 @@ steps: if: build.pull_request.labels includes "benchmark" timeout_in_minutes: 30 + - label: "AMDGPU - Julia 1.9 - No Artifacts" + plugins: + - JuliaCI/julia#v1: + version: 1.9-nightly + - JuliaCI/julia-test#v1: + - JuliaCI/julia-coverage#v1: + codecov: true + dirs: + - src + - ext + agents: + queue: "juliagpu" + rocm: "*" + rocmgpu: "*" + command: + - julia -e """ + using TOML + conf = TOML.parse(read(\"Project.toml\", String)) + push!(conf[\"targets\"][\"test\"], \"AMDGPU\") + open(io -> TOML.print(io, conf), \"Project.toml\", \"w\") + """ + - julia --project -e 'using Pkg; Pkg.update()' + timeout_in_minutes: 30 + env: + JULIA_AMDGPU_CORE_MUST_LOAD: "1" + JULIA_AMDGPU_HIP_MUST_LOAD: "1" + JULIA_AMDGPU_DISABLE_ARTIFACTS: "1" + NNLIB_TEST_AMDGPU: true + # - label: "GPU julia nightly" # plugins: # - JuliaCI/julia#v1: diff --git a/Project.toml b/Project.toml index b24c66814..1e2d73edf 100644 --- a/Project.toml +++ b/Project.toml @@ -18,13 +18,14 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" AMDGPUExt = "AMDGPU" [compat] -AMDGPU = "0.4.5" +AMDGPU = "0.4.7" Adapt = "2, 3.2" ChainRulesCore = "1.13" Requires = "0.5, 1.0" julia = "1.6" [extras] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" @@ -39,4 +40,4 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["ChainRulesTestUtils", "AMDGPU", "CUDA", "Documenter", "FiniteDifferences", "ForwardDiff", "Logging", "NNlibCUDA", "Random", "StableRNGs", "Test", "UnicodePlots", "Zygote"] +test = ["ChainRulesTestUtils", "CUDA", "Documenter", "FiniteDifferences", "ForwardDiff", "Logging", "NNlibCUDA", "Random", "StableRNGs", "Test", "UnicodePlots", "Zygote"] diff --git a/docs/src/index.md b/docs/src/index.md index 63168abe7..91adcee0c 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -4,6 +4,5 @@ For use with automatic differentiation, this package defines gradients using [ChainRules.jl](https://github.com/JuliaDiff/ChainRules.jl). These will be seen by various packages including [Zygote.jl](https://github.com/FluxML/Zygote.jl). -To use these functions with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) -or [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) you will need to load them -and NNlib in the same Julia session. +To use these functions with [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) you will need [NNlibCUDA.jl](https://github.com/FluxML/NNlibCUDA.jl) as well. +For [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) you will need to load it and NNlib in the same Julia session. From d4bd704fe52b2cd5de1a56dac8b68d01436ba7f7 Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Fri, 10 Feb 2023 00:34:34 +0200 Subject: [PATCH 3/4] Update buildkite config --- .buildkite/pipeline.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index a06af0e80..eff9249b2 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -76,7 +76,6 @@ steps: push!(conf[\"targets\"][\"test\"], \"AMDGPU\") open(io -> TOML.print(io, conf), \"Project.toml\", \"w\") """ - - julia --project -e 'using Pkg; Pkg.update()' timeout_in_minutes: 30 env: JULIA_AMDGPU_CORE_MUST_LOAD: "1" From 44f7b3dcf580568d00c4454c212760f279a8f2e8 Mon Sep 17 00:00:00 2001 From: Anton Smirnov Date: Fri, 10 Feb 2023 00:37:57 +0200 Subject: [PATCH 4/4] Update buildkite config --- .buildkite/pipeline.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index eff9249b2..61fe79362 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -71,10 +71,10 @@ steps: rocmgpu: "*" command: - julia -e """ - using TOML - conf = TOML.parse(read(\"Project.toml\", String)) - push!(conf[\"targets\"][\"test\"], \"AMDGPU\") - open(io -> TOML.print(io, conf), \"Project.toml\", \"w\") + using TOML; + conf = TOML.parse(read(\"Project.toml\", String)); + push!(conf[\"targets\"][\"test\"], \"AMDGPU\"); + open(io -> TOML.print(io, conf), \"Project.toml\", \"w\"); """ timeout_in_minutes: 30 env: