diff --git a/.gitignore b/.gitignore index 8c960ec..3f02ca7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.jl.cov *.jl.*.cov *.jl.mem +Manifest.toml diff --git a/src/Missings.jl b/src/Missings.jl index 77482f7..fda0525 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -2,7 +2,7 @@ module Missings export allowmissing, disallowmissing, ismissing, missing, missings, Missing, MissingException, levels, coalesce, passmissing, nonmissingtype, - skipmissings + skipmissings, spreadmissings using Base: ismissing, missing, Missing, MissingException using Base: @deprecate @@ -208,6 +208,448 @@ missing """ passmissing(f) = PassMissing{Core.Typeof(f)}(f) +abstract type AbstractSpread end +struct SpreadDefault <: AbstractSpread end +struct SpreadNonMissing <: AbstractSpread end +struct SpreadNone <: AbstractSpread end +struct SpreadAll <: AbstractSpread end + +struct SpreadMissings{F, S <: AbstractSpread} <: Function + f::F + spread::S + function SpreadMissings(f, spread::AbstractSpread) + if !(spread isa AbstractSpread) + throw(ArgumentError("spread must be either SpreadDefault(), SpreadNonMissing(), or SpreadNone()")) + end + new{Core.Typeof(f), typeof(spread)}(f, spread) + end +end + +""" + nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) + +Given an input vector `a` where `nonmissinginds` is guaranteed +to not include any missing values, return a `SubArray` referencing +the `nonmissinginds`. The element type of the returned output +does not include `missing`. +""" +function nomissing_subarray(a::AbstractVector, nonmissinginds::AbstractVector) + T = nonmissingtype(eltype(a)) # Element type + N = 1 # Dimension of view + P = typeof(a) # Type of parent array + I = Tuple{typeof(nonmissinginds)} # Type of the non-missing indices + L = Base.IndexStyle(a) === IndexLinear # If the type supports fast linear indexing + SubArray{T, N, P, I, L}(a, (nonmissinginds,), 0, 1) +end + +function new_args_subarray(args::Tuple, nonmissinginds::AbstractVector) + newargs = map(args) do a + if a isa AbstractVector + nomissing_subarray(a, nonmissinginds) + else + a + end + end +end + +function spread_missing( + res::AbstractVector{T}, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} + + if length(res) != length(nonmissinginds) + s = "When spreading a vector result with `spread=$(S)`, " * + "length of output must match number of jointly non-" + "missing values in inputs " + "(got $(length(res)) and $(length(nonmissinginds))).". + + throw(DimensionMismatch(s)) + end + out = similar(res, Union{eltype(res), Missing}, length(vecs[1])) + fill!(out, missing) + out[nonmissingmask] .= res + out +end + +function maybespread_missing( + res::T, + spread::SpreadDefault, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::T where{T} + + res +end + +function maybespread_missing( + res::AbstractVector{T}, + spread::SpreadDefault, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} + + spread_missing(res, vecs, nonmissinginds, nonmissingmask) +end + +function maybespread_missing( + res::T, + spread::SpreadNonMissing, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where{T} + + out = Vector{Union{typeof(res), Missing}}(undef, length(vecs[1])) + fill!(out, missing) + out[nonmissinginds] .= Ref(res) + out +end + +function maybespread_missing( + res::AbstractVector{T}, + spread::SpreadNonMissing, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{Union{Missing, T}} where {T} + + spread_missing(res, vecs, nonmissinginds, nonmissingmask) +end + +function maybespread_missing( + res::T, + spread::SpreadNone, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::T where {T} + + res +end + +function maybespread_missing( + res::T, + spread::SpreadAll, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool})::AbstractVector{T} where {T} + + out = Vector{typeof(res)}(undef, length(first(vecs))) + out .= Ref(res) + out +end + +function maybespread_missing( + res::AbstractVector, + spread::SpreadAll, + vecs::Tuple, + nonmissinginds::AbstractVector{<:Integer}, + nonmissingmask::AbstractVector{<:Bool}) + + throw(ArgumentError("spreadmissings with :all on vector output is reserved")) +end + +function spread_nomissing( + res::AbstractVector{T}, + vecs::Tuple)::typeof(res) where {T} + + if length(res) != length(first(vecs)) + s = "When spreading a vector result with `spread=$(S)`, " * + "length of output must match number of jointly non-" + "missing values in inputs " + "(got $(length(res)) and $(length(first(vecs)))).". + throw(DimensionMismatch(s)) + end + res +end + +function maybespread_nomissing( + res::T, + spread::SpreadDefault, + vecs::Tuple)::T where{T} + + res +end + +function maybespread_nomissing( + res::AbstractVector{T}, + spread::SpreadDefault, + vecs::Tuple)::typeof(res) where {T} + + spread_nomissing(res, vecs) +end + +function maybespread_nomissing( + res::T, + spread::SpreadNonMissing, + vecs::Tuple)::Vector{T} where{T} + + out = Vector{typeof(res)}(undef, length(vecs[1])) + fill!(out, res) + out +end + +function maybespread_nomissing( + res::AbstractVector{T}, + spread::SpreadNonMissing, + vecs::Tuple)::typeof(res) where {T} + + spread_nomissing(res, vecs) +end + +function maybespread_nomissing( + res::T, + spread::SpreadNone, + vecs::Tuple)::T where {T} + + res +end + +function maybespread_nomissing( + res::T, + spread::SpreadAll, + vecs::Tuple)::AbstractVector{T} where {T} + + out = Vector{typeof(res)}(undef, length(first(vecs))) + out .= Ref(res) + out +end + +function maybespread_nomissing( + res::AbstractVector, + spread::SpreadAll, + vecs::Tuple) + + throw(ArgumentError("spreadmissings with :all on vector output is reserved")) +end + +function check_indices_match(vecs...) + Base.require_one_based_indexing(vecs...) + eachindex(vecs...) + nothing +end + +function (f::SpreadMissings{F, S})(args...; kwargs...) where {F, S} + kwargs_vals = values(values(kwargs)) + xs = tuple(args..., kwargs_vals...) + + if any(ismissing, xs) + s = "Using `spreadmissings` with a positional or keyword argumet" * + " that is `missing` is reserved" + throw(ArgumentError(s)) + end + + # Detect vector inputs which contain missing in + # either the main arguments or keyword arguments + if any(x -> x isa AbstractVector{>:Missing}, xs) + # Check that all vector inputs have the + # same indices. Collect these vector inputs + # into a single object. + # + # TODO: Allow users to protect vector inputs + vecs = Base.filter(x -> x isa AbstractVector, xs) + check_indices_match(vecs...) + # Determine which indices in our collection of + # vector inputs have no missing values in + # all our inputs. + nonmissingmask = fill(true, length(vecs[1])) + for v in vecs + nonmissingmask .&= .!ismissing.(v) + end + nonmissinginds = findall(nonmissingmask) + # Construct new versions of arguments + # with SubArrays whose eltypes do not allow Missing + newargs = new_args_subarray(args, nonmissinginds) + new_kwargs_vals = new_args_subarray(kwargs_vals, nonmissinginds) + + new_kwargs = (k => v for (k, v) in zip(keys(kwargs), new_kwargs_vals)) + res = f.f(newargs...; new_kwargs...) + maybespread_missing(res, f.spread, vecs, nonmissinginds, nonmissingmask) + # There is at least one vector, but none of the vectors can contain missing + elseif any(x -> x isa AbstractVector, xs) + vecs = Base.filter(x -> x isa AbstractVector, xs) + check_indices_match(vecs...) + res = f.f(args...; kwargs...) + maybespread_nomissing(res, f.spread, vecs) + else + f.f(args...; kwargs...) + end +end + +""" + spreadmissings(f; spread = :default) + +Return a function which calls function `f` after skipping entries +corresponding to missing values in `AbstractVector` arguments. + +All input vectors must have the same length. Non-`AbstractVector` +arguments are left untouched. + +If `spread` is `:default` or `:nonmissing` and `f` returns a vector, +its length must be equal to the number of jointly non-missing +entries in the vector inputs. A vector of the same length as +vector inputs is returned, filling positions corresponding +to missing values with `missing`. + +If `spread` is `:none`, or if `f` returns a value other than a vector, +it is returned as-is. + +For each vector argument, `f` is passed a `SubArray` +view with an element type equal to `nonmissingtype(T)`, +with `T` the element type of the original argument. + +If none of the input arguments are vectors of any kind, +`spreadmissings(f)` behaves exactly the same as `f`. No +pre or post-processing is done. + +### Examples + +```julia-repl +julia> using Statistics; + +julia> xmiss = [1, 2, 3, missing]; + +julia> ymiss = [missing, 200, 300, 400]; + +julia> summeans(x, y) = mean(x) + mean(y); + +julia> spreadmissings(summeans)(xmiss, ymiss) +252.5 + +julia> xmiss = [10, 20, 30, missing]; + +julia> ymiss = [missing, 500, 400, 300]; + +julia> cor(xmiss, ymiss) +missing + +julia> spreadmissings(cor)(xmiss, ymiss) +-1.0 + +julia> standardize(xmiss) +4-element Vector{Missing}: + missing + missing + missing + missing + +julia> spreadmissings(standardize)(xmiss) +4-element Vector{Union{Missing, Float64}}: + -10.0 + 0.0 + 10.0 + missing +``` + +# Extended help + +The behavior of `spreadmissing` can be illustrated using an example. The call + +``` +spreadmissings(f)(x::AbstractVector, y::Integer, z::AbstractVector) +``` + +finds the indices which correspond to `missing` values in *both* +`x` and `z`. Then `f` is applied on the `SubArray`s of `x` and `z` which +contain non-missing values. This is essentially equivalent to: + +``` +inds = .!missing.(x) .& .!missing.(z) +sx = view(x, inds); sy = view(y, inds) +f(sx, y, sy) +``` + + +`spreadmissings` does not use the default `view` behavior. Rather, +it constructs a `SubArray` directly such that the eltype of the new +inputs do not include `Missing`. + +# `spread` keyword argument + +The `spread` keyword argument controls whether the output from +`f` is "spread" over non-missing values. + +* `:default`: + * If output is not a vector, it is returned directly. + * If output is a vector with the same length as the number of + jointly non-missing elements of the inputs, it is "spread" + to match the non-missing elements of the inputs. + * Otherwise a `DimensionMismatch` error is thrown. + +* `:nonmissing`: + * If output is not a vector, it is is spread over non-missing + elements of the inputs. + * If output is a vector, behavior is the same as `:default`. + * If `output` is not a `Vector`, `output` is spread along non-missing + elements of the inputs. +* `:none`: output is returned directly, whether a vector or not. +* `:all`: + * If output is not a vector, it is spread over the full + length of the input vectors, not only the indices with + missing values with inputs. + * If the output is a vector, an error is thrown. + +A summary of the behavior is given in the table below: + +| spread \\ output type | Vector | Non-vector | +|:---------------------- |:------------------------------- |:------------------------------------| +| :default | spread over non-missing indices | return | +| :nonmissing | spread over non-missing indices | spread over non-missing indices | +| :none | return | return | +| :all | error | spread over all indices | + +If there are `AbstractVector` inputs but none of these inputs are +`AbstractVector{>:Missing}`, the returned vectors will not allow +for `missing`. + +If none of the arguments are `AbstractVector`s, `spreadmissings(f)` +behaves the same as `f` regardless of `spread`. + +!!! note + `spreadmissings` has a subtly different behavior than common uses of + `skipmissing`. Compare the two functions below + + ```julia-repl + julia> function fillmean_skip(x) + m = mean(skipmissing(x)) + fill(m, length(x)) + end; + + julia> fillmean(x) = fill(mean(x), length(x)); + + julia> x = [2, missing]; + + julia> fillmean_skip(x) + 2-element Vector{Float64}: + 2.0 + 2.0 + + julia> spreadmissings(fillmean)(x) + 2-element Vector{Union{Missing, Float64}}: + 2.0 + missing + ``` + + 1. `fillmean_skip` fills all entries of the original vector `x` with the mean, + excluding `missing`s. By contrast, `spreadmissings(fillmean)` only fills non-missing + elements of the original `x`. + 2. `fillmean_skip` returns a vector which does not allow for `missing`, while + `spreadmissings(fillmean)` does. + + Use the keyword `spread = :all` to emulate the `skipmissing` behavior. +""" +function spreadmissings(f; spread::Symbol = :default) + if spread === :default + SpreadMissings(f, SpreadDefault()) + elseif spread === :nonmissing + SpreadMissings(f, SpreadNonMissing()) + elseif spread === :none + SpreadMissings(f, SpreadNone()) + elseif spread === :all + SpreadMissings(f, SpreadAll()) + else + throw(ArgumentError("`spread` must be one of `:default`, `:nonmissing`, `:none`, or `:all`")) + end +end + """ skipmissings(args...) @@ -229,7 +671,6 @@ julia> collect(tx) 2-element Array{Int64,1}: 1 2 - ``` """ function skipmissings(args...) @@ -258,7 +699,7 @@ struct SkipMissings{V, T} others::T end -Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{AbstractArray}}, i) +Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{AbstractArray}}, i) for oth in others oth[i] === missing && return true end @@ -267,7 +708,7 @@ Base.@propagate_inbounds function _anymissingindex(others::Tuple{Vararg{Abstract end @inline function _anymissingiterate(others::Tuple, state) - for oth in others + for oth in others y = iterate(oth, state) y !== nothing && first(y) === missing && return true end @@ -278,7 +719,7 @@ end const SkipMissingsofArrays = SkipMissings{V, T} where {V <: AbstractArray, T <: Tuple{Vararg{AbstractArray}}} -function Base.show(io::IO, mime::MIME"text/plain", itr::SkipMissings{V}) where V +function Base.show(io::IO, mime::MIME"text/plain", itr::SkipMissings{V}) where V print(io, SkipMissings, '{', V, '}', '(', itr.x, ')', " comprised of " * "$(length(itr.others) + 1) iterators") end @@ -335,7 +776,7 @@ end @inline function Base.getindex(itr::SkipMissingsofArrays, i) @boundscheck checkbounds(itr.x, i) @inbounds xi = itr.x[i] - if xi === missing || @inbounds _anymissingindex(itr.others, i) + if xi === missing || @inbounds _anymissingindex(itr.others, i) throw(MissingException("the value at index $i is missing for some element")) end return xi @@ -380,9 +821,9 @@ Base.mapreduce_impl(f, op, A::SkipMissingsofArrays, ifirst::Integer, ilast::Inte A = itr.x if ifirst == ilast @inbounds a1 = A[ifirst] - if a1 === missing + if a1 === missing return nothing - elseif _anymissingindex(itr.others, ifirst) + elseif _anymissingindex(itr.others, ifirst) return nothing else return Some(Base.mapreduce_first(f, op, a1)) @@ -436,7 +877,7 @@ end Return a vector similar to the array wrapped by the given `SkipMissings` iterator but skipping all elements with a `missing` value in one of the iterators passed to `skipmissing` and elements for which `f` returns `false`. This method -only applies when all iterators passed to `skipmissings` are arrays. +only applies when all iterators passed to `skipmissings` are arrays. # Examples ``` diff --git a/test/.ipynb_checkpoints/runtests-checkpoint.jl b/test/.ipynb_checkpoints/runtests-checkpoint.jl new file mode 100644 index 0000000..7c18225 --- /dev/null +++ b/test/.ipynb_checkpoints/runtests-checkpoint.jl @@ -0,0 +1,227 @@ +using Test, SparseArrays, Missings + +# Must be defined outside testset on v1.0 +struct CubeRooter end +(::CubeRooter)(x) = cbrt(x) + +@testset "Missings" begin + x = Missings.replace([1, 2, missing, 4], 3) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (4,) + @test collect(x) == collect(1:4) + @test collect(x) isa Vector{Int} + x = Missings.replace([1, 2, missing, 4], 3.0) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (4,) + @test collect(x) == collect(1:4) + @test collect(x) isa Vector{Int} + x = Missings.replace([1 2; missing 4], 3) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (2, 2) + @test collect(x) == [1 2; 3 4] + @test collect(x) isa Matrix{Int} + x = Missings.replace((v for v in [missing, 1, missing, 2, 4]), 0) + @test length(x) == 5 + @test size(x) == (5,) + @test eltype(x) === Any + @test collect(x) == [0, 1, 0, 2, 4] + @test collect(x) isa Vector{Int} + + x = Missings.fail([1, 2, 3, 4]) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (4,) + @test collect(x) == [1, 2, 3, 4] + @test collect(x) isa Vector{Int} + x = Missings.fail([1 2; 3 4]) + @test eltype(x) === Int + @test length(x) == 4 + @test size(x) == (2, 2) + @test collect(x) == [1 2; 3 4] + @test collect(x) isa Matrix{Int} + @test_throws MissingException collect(Missings.fail([1, 2, missing, 4])) + x = Missings.fail(v for v in [1, 2, 4]) + @test eltype(x) === Any + @test length(x) == 3 + @test size(x) == (3,) + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + + x = skipmissing([1, 2, missing, 4]) + @test eltype(x) === Int + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + x = skipmissing([1 2; missing 4]) + @test eltype(x) === Int + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + x = collect(skipmissing([missing])) + @test eltype(x) === Union{} + @test isempty(collect(x)) + @test collect(x) isa Vector{Union{}} + x = collect(skipmissing(Union{Int, Missing}[])) + @test eltype(x) === Int + @test isempty(collect(x)) + @test collect(x) isa Vector{Int} + x = skipmissing([missing, missing, 1, 2, missing, 4, missing, missing]) + @test eltype(x) === Int + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + x = skipmissing(v for v in [missing, 1, missing, 2, 4]) + @test eltype(x) === Any + @test collect(x) == [1, 2, 4] + @test collect(x) isa Vector{Int} + + x = [1, 2, missing, 4] + y = ["a", "b", "c", missing] + z = [missing, missing, 3.1, 4.5] + l = [1, 2, 3, 4, 5] + @test_throws ArgumentError skipmissings(x, l) + mx, my = skipmissings(x, y) + iobuf = IOBuffer() + show(iobuf, MIME("text/plain"), mx) + s = String(take!(iobuf)) + @test s == "Missings.SkipMissings{Array{Union{Missing, $Int},1}}(" * + "Union{Missing, $Int}[1, 2, missing, 4]) comprised of 2 iterators" + @test collect(mx) == [1, 2] + @test collect(mx) isa Vector{Int} + @test reduce(+, mx) === reduce(+, collect(mx)) === sum(mx) === + mapreduce(identity, +, mx) === 3 + @test mapreduce(x -> x^2, +, mx) === mapreduce(x -> x^2, +, collect(mx)) === 5 + mx, my, mz = skipmissings(x, y, z) + @test eltype(mx) == Int + @test eltype(my) == String + @test eltype(mz) == Float64 + @test isempty(collect(mx)) + @test sum(mx) === 0 + x = [missing 4; 2 5; 3 6] + y = [1 4; missing 5; 3 6] + mx, my = skipmissings(x, y) + @test collect(mx) == [3, 4, 5, 6] + @test mx[3] == 3 + @test_throws MissingException mx[1] + @test reduce(+, mx) === 18 + @test isapprox(mapreduce(cos, *, collect(mx)), mapreduce(cos, *, mx)) + @static if VERSION >= v"1.4.0-DEV" + @inferred Union{Float64, Missing} mapreduce(cos, *, mx) + @inferred Union{Float64, Missing} sum(mx) + @inferred Union{Float64, Missing} reduce(+, mx) + end + + x = [missing missing missing] + y = [1, 2, 3] + mx, my = skipmissings(x, y) + @test_throws ArgumentError reduce(x -> x/2, mx) + @test_throws ArgumentError mapreduce(x -> x/2, +, mx) + @test_throws MethodError length(mx) + @test IndexStyle(typeof(mx)) == IndexStyle(typeof(x)) + x = [isodd(i) ? missing : i for i in 1:64] + y = [isodd(i) ? missing : i for i in 65:128] + mx, my = skipmissings(x, y) + @test sum(mx) === 1056 + @static if VERSION >= v"1.4.0-DEV" + @inferred Union{Missing, Int} sum(mx) + @inferred Union{Missing, Int} reduce(+, mx) + end + + @test levels(1:1) == levels([1]) == levels([1, missing]) == levels([missing, 1]) == [1] + @test levels(2:-1:1) == levels([2, 1]) == levels([2, missing, 1]) == [1, 2] + @test levels([missing, "a", "c", missing, "b"]) == ["a", "b", "c"] + @test levels([Complex(0, 1), Complex(1, 0), missing]) == [Complex(0, 1), Complex(1, 0)] + @test levels(sparse([0 3 2])) == [0, 2, 3] + @test typeof(levels([1])) === typeof(levels([1, missing])) === Vector{Int} + @test typeof(levels(["a"])) === typeof(levels(["a", missing])) === Vector{String} + @test typeof(levels(sparse([1]))) === Vector{Int} + @test isempty(levels([missing])) + @test isempty(levels([])) + + @test nonmissingtype(Union{Int, Missing}) == Int + @test nonmissingtype(Any) == Any + @test nonmissingtype(Missing) == Union{} + @test nonmissingtype(Union{Array{Int}, Missing}) == Array{Int} + + @test isequal(missings(1), [missing]) + @test isequal(missings(Any, 1), [missing]) + @test isequal(missings(Int, 1), [missing]) + @test missings(Int, 1) isa Vector{Union{Int, Missing}} + @test missings(Any, 1) isa Vector{Union{Any, Missing}} + @test isequal(missings(Union{Int, Missing}, 1, 2), [missing missing]) + @test missings(Union{Int, Missing}, 1, 2) isa Matrix{Union{Int, Missing}} + @test Union{Int, Missing}[1,2,3] == (Union{Int, Missing})[1,2,3] + x = missings(Int, (1, 2)) + @test isa(x, Matrix{Union{Int, Missing}}) + @test isequal(x, [missing missing]) + x = missings((1, 2)) + @test isa(x, Matrix{Missing}) + @test isequal(x, [missing missing]) + + @test allowmissing([1]) == [1] + @test allowmissing([1]) isa AbstractVector{Union{Int, Missing}} + @test allowmissing(Any[:a]) == [:a] + @test allowmissing(Any[:a]) isa AbstractVector{Any} + @test isequal(allowmissing([1, missing]), [1, missing]) + @test allowmissing([1, missing]) isa AbstractVector{Union{Int, Missing}} + @test isequal(allowmissing([missing]), [missing]) + @test allowmissing([missing]) isa AbstractVector{Missing} + + @test allowmissing([1 1]) == [1 1] + @test allowmissing([1 1]) isa AbstractArray{Union{Int, Missing}, 2} + @test allowmissing([:a 1]) == [:a 1] + @test allowmissing([:a 1]) isa AbstractArray{Any, 2} + @test isequal(allowmissing([1 missing]), [1 missing]) + @test allowmissing([1 missing]) isa AbstractArray{Union{Int, Missing}, 2} + @test isequal(allowmissing([missing missing]), [missing missing]) + @test allowmissing([missing missing]) isa AbstractArray{Missing, 2} + + @test disallowmissing(Union{Int, Missing}[1]) == [1] + @test disallowmissing(Union{Int, Missing}[1]) isa AbstractVector{Int} + @test disallowmissing([1]) == [1] + @test disallowmissing([1]) isa AbstractVector{Int} + @test disallowmissing(Any[:a]) == [:a] + @test disallowmissing(Any[:a]) isa AbstractVector{Any} + @test_throws MethodError disallowmissing([1, missing]) + @test_throws MethodError disallowmissing([missing]) + + @test disallowmissing(Union{Int, Missing}[1 1]) == [1 1] + @test disallowmissing(Union{Int, Missing}[1 1]) isa AbstractArray{Int, 2} + @test disallowmissing([1 1]) == [1 1] + @test disallowmissing([1 1]) isa AbstractArray{Int, 2} + @test disallowmissing([:a 1]) == [:a 1] + @test disallowmissing([:a 1]) isa AbstractArray{Any, 2} + @test_throws MethodError disallowmissing([1 missing]) + @test_throws MethodError disallowmissing([missing missing]) + + # Lifting + ## functor + cuberoot = CubeRooter() # defined at top of file + @test passmissing(cuberoot)(27) == 3.0 + @test isequal(passmissing(cuberoot)(missing), missing) + ## type + @test passmissing(Int)(1.0) == 1 + @test isequal(passmissing(Int)(missing), missing) + ## function + @test passmissing(sqrt)(4) == 2.0 + @test isequal(passmissing(sqrt)(missing), missing) + @test isequal(passmissing(sqrt).([missing, 4]), [missing, 2.0]) + @test passmissing((x,y)->"$x $y")(1, 2) == "1 2" + @test isequal(passmissing((x,y)->"$x $y")(missing), missing) + if VERSION >= v"1.4.0-DEV" + @test_throws MethodError passmissing(string)(missing, base=2) + else + @test_throws ErrorException passmissing(string)(missing, base=2) + end + + @test passmissing(sin) === Missings.PassMissing{typeof(sin)}(sin) + @test passmissing(Int) === Missings.PassMissing{Type{Int}}(Int) + @test passmissing(cuberoot) === Missings.PassMissing{CubeRooter}(cuberoot) + + @testset "deprecated" begin + # The (unexported) `Missings.T` was deprecated to `Missings.nonmissingtype` + for x in (Union{Int, Missing}, Any, Missing, Union{Array{Int}, Missing}) + @test Missings.T(x) == Missings.nonmissingtype(x) + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 7c18225..f642921 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,8 @@ using Test, SparseArrays, Missings struct CubeRooter end (::CubeRooter)(x) = cbrt(x) +include("spreadmissings.jl") + @testset "Missings" begin x = Missings.replace([1, 2, missing, 4], 3) @test eltype(x) === Int diff --git a/test/spreadmissings.jl b/test/spreadmissings.jl new file mode 100644 index 0000000..3733c9b --- /dev/null +++ b/test/spreadmissings.jl @@ -0,0 +1,212 @@ +module SpreadMissingTests + +using Test, Missings, CategoricalArrays + +const ≈ = isequal + +function small_vec(args...; kwargs...) + [1, 2] +end + +function right_vec(args...; kwargs...) + kwargs_vals = values(values(kwargs)) + xs = tuple(args..., kwargs_vals...) + vecs = Base.filter(x -> x isa AbstractVector, xs) + if !isempty(vecs) + collect(1:length(first(vecs))) + else + [-1] + end +end + +function scalar(args...; kwargs...) + 1 +end + +xmiss = [1, 2, 3, missing] +x = [1, 2, 3, 4] + +ymiss = [missing, 200, 300, 400] +y = [100, 200, 300, 400] + +s = [1000, 2000] + +@testset "vector, :default" begin + ### missings in main arg only + t = spreadmissings(right_vec)(xmiss) + @test t ≈ [1, 2, 3, missing] + ### missing in keyword arg only + t = spreadmissings(right_vec)(; z = ymiss) + @test t ≈ [missing, 1, 2, 3] + ### missing in both main arg and keyword arg + t = spreadmissings(right_vec)(x, xmiss; z = ymiss) + @test t ≈ [missing, 1, 2, missing] + ### missings nowhere + t = spreadmissings(right_vec)(x; z = y) + @test t ≈ [1, 2, 3, 4] + @test t isa Vector{Int} + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec)(x, s) + ### no vectors + t = spreadmissings(right_vec)(1, 2; z = 9) + @test t == [-1] +end + +@testset "vector, :nonmissing" begin + ### missings in main arg only + t = spreadmissings(right_vec; spread = :nonmissing)(xmiss) + @test t ≈ [1, 2, 3, missing] + ### missing in keyword arg only + t = spreadmissings(right_vec; spread = :nonmissing)(; z = ymiss) + @test t ≈ [missing, 1, 2, 3] + ### missing in both main arg and keyword arg + t = spreadmissings(right_vec; spread = :nonmissing)(x, xmiss; z = ymiss) + @test t ≈ [missing, 1, 2, missing] + ### missings nowhere + t = spreadmissings(right_vec; spread = :nonmissing)(x; z = y) + @test t ≈ [1, 2, 3, 4] + @test t isa Vector{Int} + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec; spread = :nonmissing)(x, s) + ### no vectors + t = spreadmissings(right_vec)(1, 2; z = 9) + @test t == [-1] +end + +@testset "vector, :none" begin + t = spreadmissings(right_vec; spread = :none)(xmiss) + @test t ≈ [1, 2, 3] + ### missing in keyword arg only + t = spreadmissings(right_vec; spread = :none)(; z = ymiss) + @test t ≈ [1, 2, 3] + ### missing in both main arg and keyword arg + t = spreadmissings(right_vec; spread = :none)(x, xmiss; z = ymiss) + @test t ≈ [1, 2] + ### missings nowhere + t = spreadmissings(right_vec; spread = :none)(x; z = y) + @test t ≈ [1, 2, 3, 4] + @test t isa Vector{Int} + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec; spread = :none)(x, s) +end + +@testset "vector, :all" begin + ### missings in main arg only + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(xmiss) + + ### missing in keyword arg only + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(; z = ymiss) + + ### missing in both main arg and keyword arg + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(x, xmiss; z = ymiss) + + ### missings nowhere + @test_throws ArgumentError spreadmissings(right_vec; spread = :all)(x; z = y) + + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(right_vec)(x, s) + ### no vectors + t = spreadmissings(right_vec)(1, 2; z = 9) + @test t == [-1] +end + +@testset "non-vector, :default" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :default)(xmiss) + @test t ≈ 1 + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :default)(; z = ymiss) + @test t ≈ 1 + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :default)(x, xmiss; z = ymiss) + @test t ≈ 1 + ### missings nowhere + t = spreadmissings(scalar; spread = :default)(x; z = y) + @test t ≈ 1 + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :default)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :default)(1, 2; z = 9) + @test t == 1 +end + +@testset "non-vector, :nonmissing" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :nonmissing)(xmiss) + @test t ≈ [1, 1, 1, missing] + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :nonmissing)(; z = ymiss) + @test t ≈ [missing, 1, 1, 1] + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :nonmissing)(x, xmiss; z = ymiss) + @test t ≈ [missing, 1, 1, missing] + ### missings nowhere + t = spreadmissings(scalar; spread = :nonmissing)(x; z = y) + @test t ≈ [1, 1, 1, 1] + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :nonmissing)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :nonmissing)(1, 2; z = 9) + @test t == 1 +end + +@testset "non-vector, :none" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :none)(xmiss) + @test t ≈ 1 + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :none)(; z = ymiss) + @test t ≈ 1 + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :none)(x, xmiss; z = ymiss) + @test t ≈ 1 + ### missings nowhere + t = spreadmissings(scalar; spread = :none)(x; z = y) + @test t ≈ 1 + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :none)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :none)(1, 2; z = 9) + @test t == 1 +end + +@testset "non-vector, :all" begin + ### missings in main arg only + t = spreadmissings(scalar; spread = :all)(xmiss) + @test t ≈ [1, 1, 1, 1] + ### missing in keyword arg only + t = spreadmissings(scalar; spread = :all)(; z = ymiss) + @test t ≈ [1, 1, 1, 1] + ### missing in both main arg and keyword arg + t = spreadmissings(scalar; spread = :all)(x, xmiss; z = ymiss) + @test t ≈ [1, 1, 1, 1] + ### missings nowhere + t = spreadmissings(scalar; spread = :all)(x; z = y) + @test t ≈ [1, 1, 1, 1] + ### Mis-matched vector lengths + @test_throws DimensionMismatch spreadmissings(scalar; spread = :all)(x, s) + ### no vectors + t = spreadmissings(scalar; spread = :all)(1, 2; z = 9) + @test t == 1 +end + +@testset "categorical" begin + x = [1, 2, 3] + t = spreadmissings(categorical)(x) + @test t == categorical([1, 2, 3]) + @test typeof(t) == typeof(categorical([1, 2, 3])) + + x = [1, 2, 3, missing] + t = spreadmissings(categorical)(x) + @test t ≈ categorical([1, 2, 3, missing]) + @test typeof(t) == typeof(categorical([1, 2, 3, missing])) +end + +# Error on missing +alwaysone(args...; kwargs...) = 1 +@test_throws ArgumentError spreadmissings(alwaysone)(missing) +@test_throws ArgumentError spreadmissings(alwaysone)(; a = missing) +@test_throws ArgumentError spreadmissings(alwaysone)([1, 2], missing) +@test_throws ArgumentError spreadmissings(alwaysone)([1, 2]; a = missing) + +end # module \ No newline at end of file