diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 187695a..0c785da 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -3,8 +3,13 @@ on: push: branches: - master + tags: '*' pull_request: - +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.julia-threads }} thread(s) - ${{ github.event_name }} diff --git a/Project.toml b/Project.toml index 93c5d95..c2e2667 100644 --- a/Project.toml +++ b/Project.toml @@ -11,18 +11,16 @@ GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d" Requires = "ae029012-a4dd-5104-9daa-d747884805df" Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" [compat] -Adapt = "3.3" -CUDA = "3, 4" +Adapt = "3, 4" +CUDA = "3, 4, 5" ChainRulesCore = "1.3" -GPUArrays = "8" +GPUArrays = "8, 9, 10" GPUArraysCore = "0.1" -NNlib = "0.7, 0.8" -NNlibCUDA = "0.2" +NNlib = "0.7, 0.8, 0.9" Requires = "1.1" Static = "0.7, 0.8" julia = "1.6" diff --git a/src/NeuralAttentionlib.jl b/src/NeuralAttentionlib.jl index 8d40af9..8827d94 100644 --- a/src/NeuralAttentionlib.jl +++ b/src/NeuralAttentionlib.jl @@ -9,7 +9,6 @@ import GPUArraysCore using ChainRulesCore using NNlib -using NNlibCUDA using Requires diff --git a/src/mask/mask.jl b/src/mask/mask.jl index afc2371..ab16786 100644 --- a/src/mask/mask.jl +++ b/src/mask/mask.jl @@ -138,7 +138,7 @@ Base.@propagate_inbounds Base.getindex(m::M, I::Integer...) where {M <: Union{<: Base.@propagate_inbounds Base.getindex(m::MaskIndexer, i::CartesianIndex) = m[Tuple(i)] Base.@propagate_inbounds Base.getindex(m::MaskIndexer, I::Tuple) = m[I...] -Adapt.adapt(to::CUDA.Adaptor, m::AbstractArrayMask) = Indexer{typeof(m)}(map(Base.Fix1(Adapt.adapt, to), GetIndexer(m).__fields)) +Adapt.adapt(to, m::AbstractArrayMask) = Indexer{typeof(m)}(map(Base.Fix1(Adapt.adapt, to), GetIndexer(m).__fields)) randomness(::AbstractMask) = static(false) require_dest(::AbstractMask) = static(false) diff --git a/src/mask/wrapper.jl b/src/mask/wrapper.jl index 0b0e3aa..46fc3c9 100644 --- a/src/mask/wrapper.jl +++ b/src/mask/wrapper.jl @@ -9,7 +9,7 @@ AttenMask(m::FlipMask) = FlipMask(AttenMask(m.mask)) Base.:!(m::AbstractMask) = FlipMask(m) Base.:!(m::FlipMask) = m.mask -Adapt.adapt(to::CUDA.Adaptor, m::FlipMask) = Indexer{typeof(m)}((mask = adapt(to, m.mask),)) +Adapt.adapt(to, m::FlipMask) = Indexer{typeof(m)}((mask = adapt(to, m.mask),)) adapt_structure(to, x::FlipMask) = FlipMask(adapt(to, x.mask)) GetIndexer(m::FlipMask, dest_size = nothing) = Indexer{typeof(m)}((mask = GetIndexer(m.mask, dest_size),), dest_size) @@ -43,7 +43,7 @@ Base.:|(::Nothing, m::AbstractMask) = nothing Base.:&(m::AbstractMask, ::Nothing) = m Base.:&(::Nothing, m::AbstractMask) = m -Adapt.adapt(to::CUDA.Adaptor, m::CombinedMask) = Indexer{typeof(m)}((f = adapt(to, m.f), +Adapt.adapt(to, m::CombinedMask) = Indexer{typeof(m)}((f = adapt(to, m.f), masks = map(Base.Fix1(adapt, to), m.masks))) adapt_structure(to, x::CombinedMask) = CombinedMask(x.f, adapt(to, x.masks)) GetIndexer(m::CombinedMask, dest_size = nothing) = Indexer{typeof(m)}((m.f, masks = map(Base.Fix2(GetIndexer, dest_size), m.masks))) @@ -101,7 +101,7 @@ function BatchedMask(mask) return BatchedMask(mask, batch_dim) end -Adapt.adapt(to::CUDA.Adaptor, m::BatchedMask) = Indexer{typeof(m)}((mask = adapt(to, m.mask), batch_dim = static(m.batch_dim))) +Adapt.adapt(to, m::BatchedMask) = Indexer{typeof(m)}((mask = adapt(to, m.mask), batch_dim = static(m.batch_dim))) adapt_structure(to, x::BatchedMask) = BatchedMask(adapt(to, x.mask), x.batch_dim) GetIndexer(m::BatchedMask, dest_size = nothing) = Indexer{typeof(m)}((mask = GetIndexer(m.mask, dest_size), batch_dim = static(m.batch_dim))) @@ -138,7 +138,7 @@ end AttenMask(r::RepeatMask) = RepeatMask(AttenMask(r.mask), r.num) -Adapt.adapt(to::CUDA.Adaptor, m::RepeatMask) = Indexer{typeof(m)}((mask = adapt(to, m.mask), num = m.num)) +Adapt.adapt(to, m::RepeatMask) = Indexer{typeof(m)}((mask = adapt(to, m.mask), num = m.num)) adapt_structure(to, x::RepeatMask) = RepeatMask(adapt(to, x.mask), x.num) GetIndexer(m::RepeatMask, dest_size = nothing) = Indexer{typeof(m)}((mask = GetIndexer(m.mask, dest_size), num = m.num)) @@ -176,7 +176,7 @@ struct BiSequenceMask{QM<:AbstractMask, KM<:AbstractMask} <: AbstractWrapperMask k_mask::KM end -Adapt.adapt(to::CUDA.Adaptor, m::BiSequenceMask) = Indexer{typeof(m)}((q_mask = adapt(to, m.q_mask), k_mask = adapt(to, m.k_mask))) +Adapt.adapt(to, m::BiSequenceMask) = Indexer{typeof(m)}((q_mask = adapt(to, m.q_mask), k_mask = adapt(to, m.k_mask))) adapt_structure(to, x::BiSequenceMask) = BiSequenceMask(adapt(to, x.q_mask), adapt(to, x.k_mask)) bi_dest_size(::Nothing, is_q) = nothing