diff --git a/.travis.yml b/.travis.yml index 3497f72..1f98ac8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: julia julia: - - 0.4 - - 0.5 + # - 0.6 - nightly os: - linux diff --git a/REQUIRE b/REQUIRE index b4b9b4d..e75c726 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,4 +1,5 @@ -julia 0.4 -StatsBase 0.3 +julia 0.6- Compat 0.8.6 +StatsBase 0.3 Reexport +SpecialFunctions diff --git a/appveyor.yml b/appveyor.yml index cd60c55..8332183 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,9 +1,7 @@ environment: matrix: - - JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe" - - JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe" - - JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe" + # - JULIAVERSION: "julialang/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" + # - JULIAVERSION: "julialang/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe" - JULIAVERSION: "julianightlies/bin/winnt/x64/julia-latest-win64.exe" diff --git a/src/DataArrays.jl b/src/DataArrays.jl index f0574cb..8d9292f 100644 --- a/src/DataArrays.jl +++ b/src/DataArrays.jl @@ -1,14 +1,14 @@ __precompile__() module DataArrays + using Base: promote_op using Base.Cartesian, Compat, Reexport - import Compat.String @reexport using StatsBase + using SpecialFunctions const DEFAULT_POOLED_REF_TYPE = UInt32 - import Base: ==, !=, >, <, >=, <=, +, -, *, !, &, |, $, ^, /, - .==, .!=, .>, .<, .>=, .<=, .+, .-, .*, .%, ./, .\, .^ + import Base: ==, !=, >, <, >=, <=, +, -, *, !, &, |, $, ^, / import StatsBase: autocor, inverse_rle, rle @@ -80,10 +80,4 @@ module DataArrays include("predicates.jl") include("literals.jl") include("deprecated.jl") - - Base.@deprecate removeNA dropna - Base.@deprecate each_failNA each_failna - Base.@deprecate each_replaceNA each_replacena - Base.@deprecate set_levels setlevels - Base.@deprecate set_levels! setlevels! end diff --git a/src/abstractdataarray.jl b/src/abstractdataarray.jl index 6321ea4..2c74ff4 100644 --- a/src/abstractdataarray.jl +++ b/src/abstractdataarray.jl @@ -2,17 +2,17 @@ #' #' An AbstractDataArray is an Array whose entries can take on #' values of type `T` or the value `NA`. -abstract AbstractDataArray{T, N} <: AbstractArray{T, N} +abstract type AbstractDataArray{T, N} <: AbstractArray{T, N} end #' @description #' #' An AbstractDataVector is an AbstractDataArray of order 1. -typealias AbstractDataVector{T} AbstractDataArray{T, 1} +const AbstractDataVector{T} = AbstractDataArray{T, 1} #' @description #' #' An AbstractDataMatrix is an AbstractDataArray of order 2. -typealias AbstractDataMatrix{T} AbstractDataArray{T, 2} +const AbstractDataMatrix{T} = AbstractDataArray{T, 2} #' @description #' Determine the type of the elements of an AbstractDataArray. @@ -121,7 +121,7 @@ dropna(v::AbstractVector) = copy(v) # -> AbstractVector # TODO: Use values() # Use DataValueIterator type? -type EachFailNA{T} +struct EachFailNA{T} da::AbstractDataArray{T} end each_failna{T}(da::AbstractDataArray{T}) = EachFailNA(da) @@ -136,7 +136,7 @@ function Base.next(itr::EachFailNA, ind::Integer) end end -type EachDropNA{T} +struct EachDropNA{T} da::AbstractDataArray{T} end each_dropna{T}(da::AbstractDataArray{T}) = EachDropNA(da) @@ -154,7 +154,7 @@ function Base.next(itr::EachDropNA, ind::Int) (itr.da[ind], _next_nonna_ind(itr.da, ind)) end -type EachReplaceNA{S, T} +struct EachReplaceNA{S, T} da::AbstractDataArray{S} replacement::T end diff --git a/src/broadcast.jl b/src/broadcast.jl index e58623a..903fa81 100644 --- a/src/broadcast.jl +++ b/src/broadcast.jl @@ -3,34 +3,7 @@ using Base: @get!, promote_eltype using Base.Broadcast: bitcache_chunks, bitcache_size, dumpbitcache using Compat: promote_eltype_op -if isdefined(Base, :OneTo) - if VERSION < v"0.6.0-dev.1121" - _broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_shape(x...)) - else - _broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...)) - end -else - const _broadcast_shape = Base.Broadcast.broadcast_shape -end - -# Check that all arguments are broadcast compatible with shape -# Differs from Base in that we check for exact matches -function check_broadcast_shape(shape::Dims, As::(@compat Union{AbstractArray,Number})...) - samesize = true - for A in As - if ndims(A) > length(shape) - throw(DimensionMismatch("cannot broadcast array to have fewer dimensions")) - end - for k in 1:length(shape) - n, nA = shape[k], size(A, k) - samesize &= (n == nA) - if n != nA != 1 - throw(DimensionMismatch("array could not be broadcast to match destination")) - end - end - end - samesize -end +_broadcast_shape(x...) = Base.to_shape(Base.Broadcast.broadcast_indices(x...)) # Get ref for value for a PooledDataArray, adding to the pool if # necessary @@ -44,7 +17,9 @@ end # Generate a branch for each possible combination of NA/not NA. This # gives good performance at the cost of 2^narrays branches. -function gen_na_conds(f, nd, arrtype, outtype, daidx=find([arrtype...] .!= AbstractArray), pos=1, isna=()) +function gen_na_conds(f, nd, arrtype, outtype, + daidx=find(t -> t <: DataArray || t <: PooledDataArray, arrtype), pos=1, isna=()) + if pos > length(daidx) args = Any[Symbol("v_$(k)") for k = 1:length(arrtype)] for i = 1:length(daidx) @@ -57,9 +32,9 @@ function gen_na_conds(f, nd, arrtype, outtype, daidx=find([arrtype...] .!= Abstr val = gensym("val") quote $val = $(Expr(:call, f, args...)) - $(if outtype == DataArray + $(if outtype <: DataArray :(@inbounds unsafe_dasetindex!(Bdata, Bc, $val, ind)) - elseif outtype == PooledDataArray + elseif outtype <: PooledDataArray :(@inbounds (@nref $nd Brefs i) = _unsafe_pdaref!(Bpool, Brefdict, $val)) end) end @@ -69,7 +44,7 @@ function gen_na_conds(f, nd, arrtype, outtype, daidx=find([arrtype...] .!= Abstr if $(Symbol("isna_$(k)")) $(gen_na_conds(f, nd, arrtype, outtype, daidx, pos+1, tuple(isna..., true))) else - $(if arrtype[k] == DataArray + $(if arrtype[k] <: DataArray :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("data_$(k)"))[$(Symbol("state_$(k)_0"))]) else :(@inbounds $(Symbol("v_$(k)")) = $(Symbol("pool_$(k)"))[$(Symbol("r_$(k)"))]) @@ -84,271 +59,141 @@ end # # TODO: Fall back on faster implementation for same-sized inputs when # it is safe to do so. -function gen_broadcast_dataarray(nd::Int, arrtype::@compat(Tuple{Vararg{DataType}}), outtype, f::Function) - F = Expr(:quote, f) - narrays = length(arrtype) - As = [Symbol("A_$(i)") for i = 1:narrays] - dataarrays = find([arrtype...] .== DataArray) - abstractdataarrays = find([arrtype...] .!= AbstractArray) - have_fastpath = outtype == DataArray && all(x->!(x <: PooledDataArray), arrtype) +Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0::AbstractArray, As::AbstractArray...) = + broadcast!(f, B, A0, As...) +Base.map!{F}(f::F, B::Union{DataArray, PooledDataArray}, A0, As...) = + broadcast!(f, B, A0, As...) - @eval let - function _F_(B::$(outtype), $(As...)) - @assert ndims(B) == $nd +@generated function _broadcast!(f, B::Union{DataArray, PooledDataArray}, As...) - # Set up input DataArray/PooledDataArrays - $(Expr(:block, [ - arrtype[k] == DataArray ? quote - $(Symbol("na_$(k)")) = $(Symbol("A_$(k)")).na.chunks - $(Symbol("data_$(k)")) = $(Symbol("A_$(k)")).data - $(Symbol("state_$(k)_0")) = $(Symbol("state_$(k)_$(nd)")) = 1 - @nexprs $nd d->($(Symbol("skip_$(k)_d")) = size($(Symbol("data_$(k)")), d) == 1) - end : arrtype[k] == PooledDataArray ? quote - $(Symbol("refs_$(k)")) = $(Symbol("A_$(k)")).refs - $(Symbol("pool_$(k)")) = $(Symbol("A_$(k)")).pool - end : nothing - for k = 1:narrays]...)) + F = :(f) + nd = ndims(B) + N = length(As) - # Set up output DataArray/PooledDataArray - $(if outtype == DataArray - quote - Bdata = B.data - # Copy in case aliased - # TODO: check for aliasing? - Bna = falses(size(Bdata)) - Bc = Bna.chunks - ind = 1 - end - elseif outtype == PooledDataArray - quote - Bpool = B.pool = similar(B.pool, 0) - Brefs = B.refs - Brefdict = Dict{eltype(Bpool),eltype(Brefs)}() - end - end) + dataarrays = find(t -> t <: DataArray, As) - @nloops($nd, i, $(outtype == DataArray ? (:Bdata) : (:Brefs)), - # pre - d->($(Expr(:block, [ - arrtype[k] == DataArray ? quote - $(Symbol("state_$(k)_")){d-1} = $(Symbol("state_$(k)_d")); - $(Symbol("j_$(k)_d")) = $(Symbol("skip_$(k)_d")) ? 1 : i_d + quote + @boundscheck Base.Broadcast.check_broadcast_indices(indices(B), As...) + # check_broadcast_shape(size(B), As...) + @nexprs $N i->(A_i = As[i]) + + @assert ndims(B) == $nd + + # Set up input DataArray/PooledDataArrays + $(Expr(:block, [ + As[k] <: DataArray ? quote + $(Symbol("na_$(k)")) = $(Symbol("A_$(k)")).na.chunks + $(Symbol("data_$(k)")) = $(Symbol("A_$(k)")).data + $(Symbol("state_$(k)_0")) = $(Symbol("state_$(k)_$(nd)")) = 1 + @nexprs $nd d->($(Symbol("skip_$(k)_d")) = size($(Symbol("data_$(k)")), d) == 1) + end : As[k] <: PooledDataArray ? quote + $(Symbol("refs_$(k)")) = $(Symbol("A_$(k)")).refs + $(Symbol("pool_$(k)")) = $(Symbol("A_$(k)")).pool + end : nothing + for k = 1:N]...)) + + # Set up output DataArray/PooledDataArray + $(if B <: DataArray + quote + Bdata = B.data + # Copy in case aliased + # TODO: check for aliasing? + Bna = falses(size(Bdata)) + Bc = Bna.chunks + ind = 1 + end + elseif B <: PooledDataArray + quote + Bpool = B.pool = similar(B.pool, 0) + Brefs = B.refs + Brefdict = Dict{eltype(Bpool),eltype(Brefs)}() + end + end) + + @nloops($nd, i, $(B <: DataArray ? (:Bdata) : (:Brefs)), + # pre + d->($(Expr(:block, [ + As[k] <: DataArray ? quote + $(Symbol("state_$(k)_")){d-1} = $(Symbol("state_$(k)_d")); + $(Symbol("j_$(k)_d")) = $(Symbol("skip_$(k)_d")) ? 1 : i_d + end : (As[k] <: AbstractArray ? quote + $(Symbol("j_$(k)_d")) = size($(Symbol("A_$(k)")), d) == 1 ? 1 : i_d + end : quote + $(Symbol("j_$(k)_d")) = 1 + end) + for k = 1:N]...))), + + # post + d->($(Expr(:block, [quote + $(Symbol("skip_$(k)_d")) || ($(Symbol("state_$(k)_d")) = $(Symbol("state_$(k)_0"))) + end for k in dataarrays]...))), + + # body + begin + # Advance iterators for DataArray and determine NA status + $(Expr(:block, [ + As[k] <: DataArray ? quote + @inbounds $(Symbol("isna_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) + end : As[k] <: PooledDataArray ? quote + @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d")) + $(Symbol("isna_$(k)")) = $(Symbol("r_$(k)")) == 0 + end : nothing + for k = 1:N]...)) + + # Extract values for other type + $(Expr(:block, [ + As[k] <: AbstractArray && !(As[k] <: AbstractDataArray) ? quote + # ordinary AbstractArrays + @inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d")) end : quote - $(Symbol("j_$(k)_d")) = size($(Symbol("A_$(k)")), d) == 1 ? 1 : i_d + # non AbstractArrays (e.g. Strings and Numbers) + @inbounds $(Symbol("v_$(k)")) = $(Symbol("A_$(k)")) end - for k = 1:narrays]...))), - - # post - d->($(Expr(:block, [quote - $(Symbol("skip_$(k)_d")) || ($(Symbol("state_$(k)_d")) = $(Symbol("state_$(k)_0"))) - end for k in dataarrays]...))), - - # body - begin - # Advance iterators for DataArray and determine NA status - $(Expr(:block, [ - arrtype[k] == DataArray ? quote - @inbounds $(Symbol("isna_$(k)")) = Base.unsafe_bitgetindex($(Symbol("na_$(k)")), $(Symbol("state_$(k)_0"))) - end : arrtype[k] == PooledDataArray ? quote - @inbounds $(Symbol("r_$(k)")) = @nref $nd $(Symbol("refs_$(k)")) d->$(Symbol("j_$(k)_d")) - $(Symbol("isna_$(k)")) = $(Symbol("r_$(k)")) == 0 - end : nothing - for k = 1:narrays]...)) - - # Extract values for ordinary AbstractArrays - $(Expr(:block, [ - :(@inbounds $(Symbol("v_$(k)")) = @nref $nd $(Symbol("A_$(k)")) d->$(Symbol("j_$(k)_d"))) - for k = find([arrtype...] .== AbstractArray)]...)) + for k = 1:N]...)) - # Compute and store return value - $(gen_na_conds(F, nd, arrtype, outtype)) + # Compute and store return value + $(gen_na_conds(F, nd, As, B)) - # Increment state - $(Expr(:block, [:($(Symbol("state_$(k)_0")) += 1) for k in dataarrays]...)) - $(if outtype == DataArray - :(ind += 1) - end) + # Increment state + $(Expr(:block, [:($(Symbol("state_$(k)_0")) += 1) for k in dataarrays]...)) + $(if B <: DataArray + :(ind += 1) end) - - $(if outtype == DataArray - :(B.na = Bna) end) - end - _F_ - end -end - -datype(A_1::PooledDataArray, As...) = tuple(PooledDataArray, datype(As...)...) -datype(A_1::DataArray, As...) = tuple(DataArray, datype(As...)...) -datype(A_1, As...) = tuple(AbstractArray, datype(As...)...) -datype() = () - -datype_int(A_1::PooledDataArray, As...) = (@compat(UInt64(2)) | (datype_int(As...) << 2)) -datype_int(A_1::DataArray, As...) = (@compat(UInt64(1)) | (datype_int(As...) << 2)) -datype_int(A_1, As...) = (datype_int(As...) << 2) -datype_int() = @compat UInt64(0) - -# The following four methods are to avoid ambiguity warnings on 0.4 -Base.map!(f::Base.Callable, B::DataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) -Base.map!(f::Base.Callable, B::PooledDataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) -Base.broadcast!(f::Base.Function, B::DataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) -Base.broadcast!(f::Base.Function, B::PooledDataArray) = - invoke(map!, Tuple{Base.Callable, AbstractArray}, f, B) - -for bsig in (DataArray, PooledDataArray), asig in (Union{Array,BitArray,Number},DataArray, PooledDataArray,) - @eval let cache = Dict{Function,Dict{UInt64,Dict{Int,Function}}}() - function Base.map!(f::Base.Callable, B::$bsig, As::$asig...) - nd = ndims(B) - length(As) <= 8 || throw(ArgumentError("too many arguments")) - samesize = check_broadcast_shape(size(B), As...) - samesize || throw(DimensionMismatch("Argument dimensions must match")) - arrtype = datype_int(As...) - - cache_f = @get! cache f Dict{UInt64,Dict{Int,Function}}() - cache_f_na = @get! cache_f arrtype Dict{Int,Function}() - func = @get! cache_f_na nd gen_broadcast_dataarray(nd, datype(As...), $bsig, f) - func(B, As...) - B - end - - function Base.broadcast!(f::Function, B::$bsig, As::$asig...) - nd = ndims(B) - length(As) <= 8 || throw(ArgumentError("too many arguments")) - samesize = check_broadcast_shape(size(B), As...) - arrtype = datype_int(As...) - - cache_f = @get! cache f Dict{UInt64,Dict{Int,Function}}() - cache_f_na = @get! cache_f arrtype Dict{Int,Function}() - func = @get! cache_f_na nd gen_broadcast_dataarray(nd, datype(As...), $bsig, f) - - # println(code_typed(func, typeof(tuple(B, As...)))) - func(B, As...) - B - end - end -end + $(if B <: DataArray + :(B.na = Bna) + end) -databroadcast(f::Function, As...) = broadcast!(f, DataArray(promote_eltype(As...), _broadcast_shape(As...)), As...) -pdabroadcast(f::Function, As...) = broadcast!(f, PooledDataArray(promote_eltype(As...), _broadcast_shape(As...)), As...) - -function exreplace!(ex::Expr, search, rep) - for i = 1:length(ex.args) - if ex.args[i] == search - splice!(ex.args, i, rep) - break - else - exreplace!(ex.args[i], search, rep) - end + return B end - ex end -exreplace!(ex, search, rep) = ex - -macro da_broadcast_vararg(func) - if (func.head != :function && func.head != :(=)) || - func.args[1].head != :call || !isa(func.args[1].args[end], Expr) || - func.args[1].args[end].head != :... - throw(ArgumentError("@da_broadcast_vararg may only be applied to vararg functions")) - end - - va = func.args[1].args[end] - defs = Any[] - for n = 1:4, aa = 0:n-1 - def = deepcopy(func) - rep = Any[Symbol("A_$(i)") for i = 1:n] - push!(rep, va) - exreplace!(def.args[2], va, rep) - rep = Vector{Any}(n+1) - for i = 1:aa - rep[i] = Expr(:(::), Symbol("A_$i"), AbstractArray) - end - for i = aa+1:n - rep[i] = Expr(:(::), Symbol("A_$i"), (@compat Union{DataArray, PooledDataArray})) - end - rep[end] = Expr(:..., Expr(:(::), va.args[1], AbstractArray)) - exreplace!(def.args[1], va, rep) - push!(defs, def) - end - esc(Expr(:block, defs...)) -end - -macro da_broadcast_binary(func) - if (func.head != :function && func.head != :(=)) || - func.args[1].head != :call || - length(func.args[1].args) != 3 - throw(ArgumentError("@da_broadcast_binary may only be applied to two-argument functions")) - end - (ff, A, B) = func.args[1].args - f = esc(ff) - body = func.args[2] - quote - ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::(@compat Union{DataArray, PooledDataArray})) = $(body) - ($f)($A::(@compat Union{DataArray, PooledDataArray}), $B::AbstractArray) = $(body) - ($f)($A::AbstractArray, $B::(@compat Union{DataArray, PooledDataArray})) = $(body) - end -end - -# Broadcasting DataArrays returns a DataArray -@da_broadcast_vararg Base.broadcast(f::Function, As...) = databroadcast(f, As...) - -# Definitions for operators, -(.*)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(*, A, B) -(.*)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(*, A, B) -@da_broadcast_vararg (.*)(As...) = databroadcast(*, As...) -@da_broadcast_binary (.%)(A, B) = databroadcast(%, A, B) -@da_broadcast_vararg (.+)(As...) = broadcast!(+, DataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) -@da_broadcast_binary (.-)(A, B) = - broadcast!(-, DataArray(promote_op(@functorize(-), eltype(A), eltype(B)), - _broadcast_shape(A,B)), A, B) -@da_broadcast_binary (./)(A, B) = - broadcast!(/, DataArray(promote_op(@functorize(/), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -@da_broadcast_binary (.\)(A, B) = - broadcast!(\, DataArray(promote_op(@functorize(\), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -(.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -(.^)(A::BitArray, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -(.^)(A::AbstractArray{Bool}, B::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}})) = databroadcast(>=, A, B) -(.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::BitArray) = databroadcast(>=, A, B) -(.^)(A::(@compat Union{DataArray{Bool}, PooledDataArray{Bool}}), B::AbstractArray{Bool}) = databroadcast(>=, A, B) -@da_broadcast_binary (.^)(A, B) = - broadcast!(^, DataArray(promote_op(@functorize(^), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) - -# XXX is a PDA the right return type for these? -Base.broadcast(f::Function, As::PooledDataArray...) = pdabroadcast(f, As...) -(.*)(As::PooledDataArray...) = pdabroadcast(*, As...) -(.%)(A::PooledDataArray, B::PooledDataArray) = pdabroadcast(%, A, B) -(.+)(As::PooledDataArray...) = - broadcast!(+, PooledDataArray(promote_eltype_op(@functorize(+), As...), _broadcast_shape(As...)), As...) -(.-)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(-, PooledDataArray(promote_op(@functorize(-), eltype(A), eltype(B)), - _broadcast_shape(A,B)), A, B) -(./)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(/, PooledDataArray(promote_op(@functorize(/), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -(.\)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(\, PooledDataArray(promote_op(@functorize(\), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) -(.^)(A::PooledDataArray{Bool}, B::PooledDataArray{Bool}) = databroadcast(>=, A, B) -(.^)(A::PooledDataArray, B::PooledDataArray) = - broadcast!(^, PooledDataArray(promote_op(@functorize(^), eltype(A), eltype(B)), - _broadcast_shape(A, B)), A, B) - -for (sf, vf) in zip(scalar_comparison_operators, array_comparison_operators) - @eval begin - # ambiguity - $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = - broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - $(vf)(A::(@compat Union{PooledDataArray{Bool},DataArray{Bool}}), B::AbstractArray{Bool}) = - broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - $(vf)(A::AbstractArray{Bool}, B::(@compat Union{PooledDataArray{Bool},DataArray{Bool}})) = - broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - - @da_broadcast_binary $(vf)(A, B) = broadcast!($sf, DataArray(Bool, _broadcast_shape(A, B)), A, B) - end +Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, ::Type{T}, As...) where T = + _broadcast!((t...) -> f(T, t...), B, As...) +Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0::Number, As::Number...) = + _broadcast!(f, B, A0, As...) +Base.Broadcast.broadcast!(f, B::Union{DataArray, PooledDataArray}, A0, As...) = + _broadcast!(f, B, A0, As...) + +Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{PooledDataArray}) = PooledDataArray +Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{Array}) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{Array}) = PooledDataArray +Base.Broadcast.promote_containertype(::Type{Array}, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{Array}, ::Type{PooledDataArray}) = PooledDataArray +Base.Broadcast.promote_containertype(::Type{DataArray}, ::Type{PooledDataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(::Type{DataArray}, ct) = DataArray +Base.Broadcast.promote_containertype(::Type{PooledDataArray}, ct) = PooledDataArray +Base.Broadcast.promote_containertype(ct, ::Type{DataArray}) = DataArray +Base.Broadcast.promote_containertype(ct, ::Type{PooledDataArray}) = PooledDataArray +Base.Broadcast._containertype(::Type{T}) where T<:DataArray = DataArray +Base.Broadcast._containertype(::Type{T}) where T<:PooledDataArray = PooledDataArray +Base.Broadcast.broadcast_indices(::Type{T}, A) where T<:AbstractDataArray = indices(A) + +@inline function Base.Broadcast.broadcast_c{S<:AbstractDataArray}(f, ::Type{S}, A, Bs...) + T = Base.Broadcast._broadcast_eltype(f, A, Bs...) + shape = Base.Broadcast.broadcast_indices(A, Bs...) + dest = S(T, Base.index_lengths(shape...)) + return broadcast!(f, dest, A, Bs...) end diff --git a/src/dataarray.jl b/src/dataarray.jl index e875a5b..22afd47 100644 --- a/src/dataarray.jl +++ b/src/dataarray.jl @@ -18,11 +18,11 @@ #' dv = DataArray([1, 2, 3], [false, false, true]) #' #' dm = DataArray([1 2; 3 4], [false false; true false]) -type DataArray{T, N} <: AbstractDataArray{T, N} +mutable struct DataArray{T, N} <: AbstractDataArray{T, N} data::Array{T, N} na::BitArray{N} - function DataArray(d::Array{T, N}, m::BitArray{N}) + function DataArray{T,N}(d::Array{T, N}, m::BitArray{N}) where {T, N} # Ensure data values and missingness metadata match if size(d) != size(m) msg = "Data and missingness arrays must be the same size" @@ -45,12 +45,12 @@ end #' @description #' #' An DataVector is an DataArray of order 1. -typealias DataVector{T} DataArray{T, 1} +const DataVector{T} = DataArray{T, 1} #' @description #' #' An DataMatrix is an DataArray of order 2. -typealias DataMatrix{T} DataArray{T, 2} +const DataMatrix{T} = DataArray{T, 2} #' @description #' @@ -108,7 +108,7 @@ end #' #' da = DataArray(Int, 2, 2) function DataArray(T::Type, dims::Integer...) # -> DataArray{T} - return DataArray(Array(T, dims...), trues(dims...)) + return DataArray(Array{T}(dims...), trues(dims...)) end #' @description @@ -125,7 +125,7 @@ end #' #' da = DataArray(Int, (2, 2)) function DataArray{N}(T::Type, dims::NTuple{N, Int}) # -> DataArray{T} - return DataArray(Array(T, dims...), trues(dims...)) + return DataArray(Array{T}(dims...), trues(dims...)) end #' @description @@ -176,11 +176,7 @@ function Base.copy!(dest::DataArray, doffs::Integer, src::DataArray, soffs::Inte if n == 0 return dest elseif n < 0 - if VERSION >= v"0.5.0-dev+4711" - throw(ArgumentError("tried to copy n=$n elements, but n should be nonnegative")) - else - throw(BoundsError()) - end + throw(ArgumentError("tried to copy n=$n elements, but n should be nonnegative")) end if isbits(eltype(src)) copy!(dest.data, doffs, src.data, soffs, n) @@ -250,7 +246,7 @@ end #' dv = @data [false, false, true, false] #' dv_new = similar(dv, Float64, 2, 2, 2) function Base.similar(da::DataArray, T::Type, dims::Dims) #-> DataArray{T} - return DataArray(Array(T, dims), trues(dims)) + return DataArray(Array{T}(dims), trues(dims)) end #' @description @@ -329,7 +325,7 @@ function Base.find(da::DataArray{Bool}) # -> Array{Int} @inbounds @bitenumerate da.na i na begin ntrue += !na && data[i] end - res = Array(Int, ntrue) + res = Vector{Int}(ntrue) count = 1 @inbounds @bitenumerate da.na i na begin if !na && data[i] @@ -389,7 +385,7 @@ function Base.convert{S, T, N}( replacement::Any ) # -> Array{S, N} replacementS = convert(S, replacement) - res = Array(S, size(da)) + res = Array{S}(size(da)) for i in 1:length(da) if da.na[i] res[i] = replacementS @@ -428,7 +424,7 @@ end #' #' dv = @data [1, 2, NA, 4] #' v = dropna(dv) -dropna(dv::DataVector) = copy(dv.data[!dv.na]) # -> Vector +dropna(dv::DataVector) = dv.data[.!dv.na] # -> Vector #' @description #' @@ -497,23 +493,6 @@ anyna(da::DataArray) = any(da.na) # -> Bool #' allna(da) allna(da::DataArray) = all(da.na) # -> Bool -#' @description -#' -#' Determine if the entries of an DataArray are `NaN`. -#' -#' @param da::DataArray{T, N} The DataArray whose elements will -#' be assessed. -#' -#' @returns na::DataArray{Bool} Elementwise Boolean whether entry is `NaN`. -#' -#' @examples -#' -#' da = @data([1, 2, 3]) -#' isnan(da) -function Base.isnan(da::DataArray) # -> DataArray{Bool} - return @compat DataArray(isnan.(da.data), copy(da.na)) -end - #' @description #' #' Determine if the entries of an DataArray are finite, which means @@ -530,7 +509,7 @@ end #' isfinite(da) function Base.isfinite(da::DataArray) # -> DataArray{Bool} n = length(da) - res = Array(Bool, size(da)) + res = Array{Bool}(size(da)) for i in 1:n if !da.na[i] res[i] = isfinite(da.data[i]) @@ -657,7 +636,7 @@ end #' dv = @data [1, 2, NA, 4] #' distinct_values, firstna = finduniques(dv) function finduniques{T}(da::DataArray{T}) # -> Vector{T}, Int - out = Array(T,0) + out = Vector{T}(0) seen = Set{T}() n = length(da) firstna = 0 @@ -696,7 +675,7 @@ function Base.unique{T}(da::DataArray{T}) # -> DataVector{T} unique_values, firstna = finduniques(da) n = length(unique_values) if firstna > 0 - res = DataArray(Array(T, n + 1)) + res = DataArray(Vector{T}(n + 1)) i = 1 for val in unique_values if i == firstna diff --git a/src/datavector.jl b/src/datavector.jl index f30e5a7..5a4a6e2 100644 --- a/src/datavector.jl +++ b/src/datavector.jl @@ -44,14 +44,14 @@ function Base.shift!{T}(dv::DataVector{T}) end end -function Base.splice!(dv::DataVector, inds::(@compat Union{Integer, UnitRange{Int}})) +function Base.splice!(dv::DataVector, inds::Union{Integer, UnitRange{Int}}) v = dv[inds] deleteat!(dv.data, inds) deleteat!(dv.na, inds) v end -function Base.splice!(dv::DataVector, inds::(@compat Union{Integer, UnitRange{Int}}), ins::AbstractVector) +function Base.splice!(dv::DataVector, inds::Union{Integer, UnitRange{Int}}, ins::AbstractVector) # We cannot merely use the implementation in Base because this # needs to handle NA in the replacement vector v = dv[inds] @@ -68,32 +68,14 @@ function Base.splice!(dv::DataVector, inds::(@compat Union{Integer, UnitRange{In l = last(inds) d = length(inds) - if VERSION >= v"0.5.0-dev+5022" - if m < d - delta = d - m - i = (f - 1 < n - l) ? f : (l - delta + 1) - Base._deleteat!(a, i, delta) - elseif m > d - delta = m - d - i = (f - 1 < n - l) ? f : (l + 1) - Base._growat!(a, i, delta) - end - else - if m < d - delta = d - m - if f-1 < n-l - Base._deleteat_beg!(a, f, delta) - else - Base._deleteat_end!(a, l-delta+1, delta) - end - elseif m > d - delta = m - d - if f-1 < n-l - Base._growat_beg!(a, f, delta) - else - Base._growat_end!(a, l+1, delta) - end - end + if m < d + delta = d - m + i = (f - 1 < n - l) ? f : (l - delta + 1) + Base._deleteat!(a, i, delta) + elseif m > d + delta = m - d + i = (f - 1 < n - l) ? f : (l + 1) + Base._growat!(a, i, delta) end for k = 1:m @@ -156,13 +138,13 @@ Base.shift!(pdv::PooledDataVector) = pdv.pool[shift!(pdv.refs)] Base.reverse(x::AbstractDataVector) = x[end:-1:1] -function Base.splice!(pdv::PooledDataVector, inds::(@compat Union{Integer, UnitRange{Int}})) +function Base.splice!(pdv::PooledDataVector, inds::Union{Integer, UnitRange{Int}}) v = pdv[inds] deleteat!(pdv.refs, inds) v end -function Base.splice!(pdv::PooledDataVector, inds::(@compat Union{Integer, UnitRange{Int}}), ins::AbstractVector) +function Base.splice!(pdv::PooledDataVector, inds::Union{Integer, UnitRange{Int}}, ins::AbstractVector) v = pdv[inds] splice!(pdv.refs, inds, [getpoolidx(pdv, v) for v in ins]) v diff --git a/src/deprecated.jl b/src/deprecated.jl index ec0af60..ed4a288 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,146 +1,5 @@ -# Note: These methods need a more helpfull error message than a `NoMethodError`, -# when the deprecation is removed -import Base.@deprecate -import Base.Operators: / -@deprecate (/)(x::(@compat Union{NAtype,Number}),A::AbstractDataArray) x ./ A - -#' @description -#' -#' Turn a DataArray into an Array. Raises an error if NA's are encountered. -#' -#' @param da::DataArray{T} DataArray that will be converted to an Array. -#' -#' @returns a::Array{T} Array containing values of `da`. -#' -#' @examples -#' -#' dv = @data [1, 2, 3, 4] -#' v = convert(Vector, dv) -#' -#' dm = @data [1 2; 3 4] -#' m = convert(Matrix, dm) -function array{T}(da::DataArray{T}) # -> Array{T} - Base.depwarn( - """ - array(da::DataArray{T}) is deprecated. - Use convert(Array, da). - """, - :array - ) - res = Array(T, size(da)) - for i in 1:length(da) - if da.na[i] - throw(NAException()) - else - res[i] = da.data[i] - end - end - return res +# Deprecate in Julia 0.6 cycle +function Base.isnan(da::DataArray) + Base.depwarn("vectorized method isnan(da) is deprecated, use isnan.(da) instead", :isnan) + return isnan.(da) end - -#' @description -#' -#' Turn a DataArray into an Array. Replace any NA's with the value -#' of second argument, `replacement`. -#' -#' @param da::DataArray{T} DataArray that will be converted to an Array. -#' @param replacement::T Value that will replace NA's in `da`. -#' -#' @returns a::Array{T} Array containing values of `da` plus replacements. -#' -#' @examples -#' -#' dv = @data [1, 2, NA, 4] -#' v = convert(Vector, dv, 3) -#' -#' dm = @data [1 2; NA 4] -#' m = convert(Matrix, dm, 3) -function array{T}(da::DataArray{T}, replacement::Any) # -> Array{T} - Base.depwarn( - """ - array(da::DataArray{T}, replacement::Any) is deprecated. - Use convert(Array, da, replacement) instead. - """, - :array - ) - res = Array(T, size(da)) - replacementT = convert(T, replacement) - for i in 1:length(da) - if da.na[i] - res[i] = replacementT - else - res[i] = da.data[i] - end - end - return res -end - -# Turn a PooledDataArray into an Array. Fail on NA -function array{T, R}(da::PooledDataArray{T, R}) - Base.depwarn( - """ - array(pda::PooledDataArray{T, R}) is deprecated. - Use convert(Array, pda) instead. - """, - :array - ) - n = length(da) - res = Array(T, size(da)) - for i in 1:n - if da.refs[i] == zero(R) - throw(NAException()) - else - res[i] = da.pool[da.refs[i]] - end - end - return res -end - -function array{T, R}(da::PooledDataArray{T, R}, replacement::T) - Base.depwarn( - """ - array(pda::PooledDataArray{T, R}, replacement::T) is deprecated. - Use convert(Array, pda, replacement) instead. - """, - :array - ) - n = length(da) - res = Array(T, size(da)) - for i in 1:n - if da.refs[i] == zero(R) - res[i] = replacement - else - res[i] = da.pool[da.refs[i]] - end - end - return res -end - -@deprecate head(dv::AbstractDataVector) dv[1:min(6, end)] -@deprecate tail(dv::AbstractDataVector) dv[max(end-6, 1):end] - -function rep{T <: Integer}(x::AbstractVector, lengths::AbstractVector{T}) - Base.depwarn( - """ - rep{T <: Integer}(x::AbstractVector, lengths::AbstractVector{T}) is deprecated. - """, - :rep - ) - if length(x) != length(lengths) - throw(DimensionMismatch("vector lengths must match")) - end - res = similar(x, sum(lengths)) - i = 1 - for idx in 1:length(x) - tmp = x[idx] - for kdx in 1:lengths[idx] - res[i] = tmp - i += 1 - end - end - return res -end - -@deprecate rep(x::AbstractVector, times::Integer, each::Integer = 1) Compat.repeat(x; inner=each, outer=times) -@deprecate rep(x::AbstractVector; times::Integer = 1, each::Integer = 1) Compat.repeat(x; inner=each, outer=times) -@deprecate rep(x::Any, times::Integer) Compat.repeat(x; inner=times) diff --git a/src/extras.jl b/src/extras.jl index f5b98a7..18d9aad 100644 --- a/src/extras.jl +++ b/src/extras.jl @@ -20,11 +20,11 @@ function StatsBase.addcounts!{T,U,W}(cm::Dict{U,W}, x::AbstractDataArray{T}, wv: end function StatsBase.countmap{T}(x::AbstractDataArray{T}) - addcounts!(Dict{(@compat Union{T, NAtype}), Int}(), x) + addcounts!(Dict{Union{T, NAtype}, Int}(), x) end function StatsBase.countmap{T,W}(x::AbstractDataArray{T}, wv::WeightVec{W}) - addcounts!(Dict{(@compat Union{T, NAtype}), W}(), x, wv) + addcounts!(Dict{Union{T, NAtype}, W}(), x, wv) end function cut{S, T}(x::AbstractVector{S}, breaks::Vector{T}) @@ -49,7 +49,7 @@ function cut{S, T}(x::AbstractVector{S}, breaks::Vector{T}) n = length(breaks) from = map(x -> sprint(showcompact, x), breaks[1:(n - 1)]) to = map(x -> sprint(showcompact, x), breaks[2:n]) - pool = Array(String, n - 1) + pool = Vector{String}(n - 1) if breaks[1] == min_x pool[1] = string("[", from[1], ",", to[1], "]") else diff --git a/src/indexing.jl b/src/indexing.jl index 6e1adc7..e93c39f 100644 --- a/src/indexing.jl +++ b/src/indexing.jl @@ -20,9 +20,9 @@ unsafe_getindex_notna(a, extr, idx::Real) = Base.unsafe_getindex(a, idx) # Set NA or data portion of DataArray unsafe_bitsettrue!(chunks::Vector{UInt64}, idx::Real) = - chunks[Base._div64(@compat(Int(idx))-1)+1] |= (@compat(UInt64(1)) << Base._mod64(@compat(Int(idx))-1)) + chunks[Base._div64(Int(idx)-1)+1] |= (UInt64(1) << Base._mod64(Int(idx)-1)) unsafe_bitsetfalse!(chunks::Vector{UInt64}, idx::Real) = - chunks[Base._div64(@compat(Int(idx))-1)+1] &= ~(@compat(UInt64(1)) << Base._mod64(@compat(Int(idx))-1)) + chunks[Base._div64(Int(idx)-1)+1] &= ~(UInt64(1) << Base._mod64(Int(idx)-1)) unsafe_setna!(da::DataArray, extr, idx::Real) = unsafe_bitsettrue!(extr[2], idx) unsafe_setna!(da::PooledDataArray, extr, idx::Real) = setindex!(extr[1], 0, idx) @@ -62,7 +62,7 @@ function combine_pools!(pool, newpool) end # Find pool elements in existing array, or add them - poolidx = Array(Int, length(newpool)) + poolidx = Vector{Int}(length(newpool)) for j = 1:length(newpool) poolidx[j] = Base.@get!(seen, newpool[j], (push!(pool, newpool[j]); i += 1)) end @@ -114,6 +114,8 @@ Base.getindex(t::AbstractDataArray, i::Real) = ## getindex: DataArray +Base.IndexStyle(::Type{<:AbstractDataArray}) = Base.IndexLinear() + # Scalar case function Base.getindex(da::DataArray, I::Real) if getindex(da.na, I) @@ -122,61 +124,33 @@ function Base.getindex(da::DataArray, I::Real) return getindex(da.data, I) end end -@nsplat N function Base.getindex(da::DataArray, I::NTuple{N,Real}...) - if getindex(da.na, I...) - return NA - else - return getindex(da.data, I...) - end -end -if VERSION > v"0.5-" - Base.unsafe_getindex(x::Number, i::Int) = (@inbounds r = x[i]; r) -end +Base.unsafe_getindex(x::Number, i) = (@inbounds xi = x[i]; xi) -# Vector case -@generated function _getindex!(dest::DataArray, src::DataArray, I::Union{Real, AbstractArray, Colon}...) +@generated function Base._unsafe_getindex!(dest::DataArray, src::DataArray, I::Union{Real, AbstractArray}...) N = length(I) quote $(Expr(:meta, :inline)) - idxlens = index_lengths(src, I...) # TODO: unsplat? + flipbits!(dest.na) # similar initializes with NAs + @nexprs $N d->(J_d = I[d]) srcextr = daextract(src) destextr = daextract(dest) srcsz = size(src) - k = 1 - @nloops $N i d->(1:idxlens[d]) d->(@inbounds j_d = getindex(I[d], i_d)) begin + D = eachindex(dest) + Ds = start(D) + @nloops $N j d->J_d begin offset_0 = @ncall $N sub2ind srcsz j + d, Ds = next(D, Ds) if unsafe_isna(src, srcextr, offset_0) - unsafe_dasetindex!(dest, destextr, NA, k) + unsafe_dasetindex!(dest, destextr, NA, d) else - unsafe_dasetindex!(dest, destextr, unsafe_getindex_notna(src, srcextr, offset_0), k) + unsafe_dasetindex!(dest, destextr, unsafe_getindex_notna(src, srcextr, offset_0), d) end - k += 1 end dest end end -function _getindex{T}(A::DataArray{T}, I::@compat Tuple{Vararg{Union{Int,AbstractVector}}}) - shape = _index_shape(A, I...) - _getindex!(DataArray(Array(T, shape), falses(shape)), A, I...) -end - -@nsplat N function Base.getindex(A::DataArray, I::NTuple{N,(@compat Union{Real,AbstractVector})}...) - checkbounds(A, I...) - _getindex(A, Base.to_indexes(I...)) -end - -# Dispatch our implementation for these cases instead of Base -function Base.getindex(A::DataArray, I::AbstractVector) - checkbounds(A, I) - _getindex(A, (Base.to_index(I),)) -end -function Base.getindex(A::DataArray, I::AbstractArray) - checkbounds(A, I) - _getindex(A, (Base.to_index(I),)) -end - ## getindex: PooledDataArray # Scalar case @@ -187,7 +161,8 @@ function Base.getindex(pda::PooledDataArray, I::Real) return pda.pool[getindex(pda.refs, I)] end end -@nsplat N function Base.getindex(pda::PooledDataArray, I::NTuple{N,Real}...) + +@inline function Base.getindex(pda::PooledDataArray, I::Integer...) if getindex(pda.refs, I...) == 0 return NA else @@ -196,16 +171,10 @@ end end # Vector case -@nsplat N function Base.getindex(A::PooledDataArray, I::NTuple{N,(@compat Union{Real,AbstractVector})}...) +@inline function Base.getindex(A::PooledDataArray, I::Union{AbstractVector,Colon}...) PooledDataArray(RefArray(getindex(A.refs, I...)), copy(A.pool)) end -# Dispatch our implementation for these cases instead of Base -Base.getindex(A::PooledDataArray, I::AbstractVector) = - PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) -Base.getindex(A::PooledDataArray, I::AbstractArray) = - PooledDataArray(RefArray(getindex(A.refs, I)), copy(A.pool)) - ## setindex!: DataArray function Base.setindex!(da::DataArray, val::NAtype, i::Real) @@ -233,57 +202,59 @@ end ## setindex!: both DataArray and PooledDataArray -@ngenerate N typeof(A) function Base.setindex!(A::AbstractDataArray, x, - J::NTuple{N,(@compat Union{Real,AbstractArray})}...) - if !isa(x, AbstractArray) && isa(A, PooledDataArray) - # Only perform one pool lookup when assigning a scalar value in - # a PooledDataArray - setindex!(A.refs, getpoolidx(A, x), J...) - return A - end - - Aextr = daextract(A) - @ncall N checkbounds A J - @nexprs N d->(I_d = Base.to_index(J_d)) - stride_1 = 1 - @nexprs N d->(stride_{d+1} = stride_d*size(A,d)) - @nexprs N d->(offset_d = 1) # really only need offset_$N = 1 - if !isa(x, AbstractArray) - @nloops N i d->(1:length(I_d)) d->(@inbounds offset_{d-1} = offset_d + (Base.unsafe_getindex(I_d, i_d)-1)*stride_d) begin - if isa(x, NAtype) - @inbounds unsafe_setna!(A, Aextr, offset_0) - else - @inbounds unsafe_setnotna!(A, Aextr, offset_0) - @inbounds unsafe_dasetindex!(A, Aextr, x, offset_0) - end +@generated function Base.setindex!(A::AbstractDataArray, x, J::Union{Real,Colon,AbstractArray}...) + N = length(J) + quote + if !isa(x, AbstractArray) && isa(A, PooledDataArray) + # Only perform one pool lookup when assigning a scalar value in + # a PooledDataArray + setindex!(A.refs, getpoolidx(A, x), J...) + return A end - else - X = x - idxlens = @ncall N index_lengths A I - @ncall N setindex_shape_check X (d->idxlens[d]) - k = 1 - if isa(A, PooledDataArray) && isa(X, PooledDataArray) - # When putting one PDA into another, first unify the pools - # and then translate the references - poolmap = combine_pools!(A.pool, X.pool) - Arefs = A.refs - Xrefs = X.refs - @nloops N i d->(1:idxlens[d]) d->(@inbounds offset_{d-1} = offset_d + (Base.unsafe_getindex(I_d, i_d)-1)*stride_d) begin - @inbounds Arefs[offset_0] = Xrefs[k] == 0 ? 0 : poolmap[Xrefs[k]] - k += 1 + + Aextr = daextract(A) + @nexprs $N d->(I_d = Base.to_indices(A, J)[d]) + @ncall $N checkbounds A I + stride_1 = 1 + @nexprs $N d->(stride_{d+1} = stride_d*size(A,d)) + @nexprs $N d->(offset_d = 1) # really only need offset_$N = 1 + if !isa(x, AbstractArray) + @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin + if isa(x, NAtype) + @inbounds unsafe_setna!(A, Aextr, offset_0) + else + @inbounds unsafe_setnotna!(A, Aextr, offset_0) + @inbounds unsafe_dasetindex!(A, Aextr, x, offset_0) + end end else - Xextr = daextract(X) - @nloops N i d->(1:idxlens[d]) d->(@inbounds offset_{d-1} = offset_d + (Base.unsafe_getindex(I_d, i_d)-1)*stride_d) begin - @inbounds if isa(X, AbstractDataArray) && unsafe_isna(X, Xextr, k) - unsafe_setna!(A, Aextr, offset_0) - else - unsafe_setnotna!(A, Aextr, offset_0) - unsafe_dasetindex!(A, Aextr, unsafe_getindex_notna(X, Xextr, k), offset_0) + X = x + idxlens = @ncall $N index_lengths I + @ncall $N setindex_shape_check X (d->idxlens[d]) + k = 1 + if isa(A, PooledDataArray) && isa(X, PooledDataArray) + # When putting one PDA into another, first unify the pools + # and then translate the references + poolmap = combine_pools!(A.pool, X.pool) + Arefs = A.refs + Xrefs = X.refs + @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin + @inbounds Arefs[offset_0] = Xrefs[k] == 0 ? 0 : poolmap[Xrefs[k]] + k += 1 + end + else + Xextr = daextract(X) + @nloops $N i d->I_d d->(@inbounds offset_{d-1} = offset_d + (i_d - 1)*stride_d) begin + @inbounds if isa(X, AbstractDataArray) && unsafe_isna(X, Xextr, k) + unsafe_setna!(A, Aextr, offset_0) + else + unsafe_setnotna!(A, Aextr, offset_0) + unsafe_dasetindex!(A, Aextr, unsafe_getindex_notna(X, Xextr, k), offset_0) + end + k += 1 end - k += 1 end end + A end - A end diff --git a/src/literals.jl b/src/literals.jl index 9ae272a..019479a 100644 --- a/src/literals.jl +++ b/src/literals.jl @@ -1,6 +1,6 @@ function fixargs(args::Vector{Any}, stub::Any) n = length(args) - data = Array(Any, n) + data = Array{Any}(n) na = BitArray(n) for i in 1:n if args[i] == :NA @@ -47,12 +47,12 @@ end function parsevector(ex::Expr) if ex.head in (:ref, :typed_hcat, :typed_vcat) data, na = fixargs(ex.args[2:end], :(zero($(ex.args[1])))) - return Expr(ex.head, ex.args[1], data...), ex.head == :typed_hcat ? na' : na + return Expr(ex.head, ex.args[1], data...), ex.head == :typed_hcat ? reshape(na, 1, length(na)) : na else stub = findstub_vector(ex) data, na = fixargs(ex.args, stub) if ex.head == :hcat - na = na' + na = reshape(na, 1, length(na)) end if isequal(stub, NA) @@ -73,8 +73,8 @@ function parsematrix(ex::Expr) end nrows = length(rows) - datarows = Array(Expr, nrows) - narows = Array(Expr, nrows) + datarows = Array{Expr}(nrows) + narows = Array{Expr}(nrows) for irow in 1:nrows data, na = fixargs(ex.args[rows[irow]].args, stub) datarows[irow] = Expr(:row, data...) @@ -91,7 +91,7 @@ end function parsedata(ex::Expr) if length(ex.args) == 0 - return :([]), Expr(:call, :Array, :Bool, 0) + return :([]), Expr(:call, :(Array{Bool}), 0) end if ex.head == :typed_vcat || (isa(ex.args[1], Expr) && ex.args[1].head == :row) return parsematrix(ex) @@ -104,7 +104,7 @@ macro data(ex) if !(ex.head in (:vect, :vcat, :hcat, :ref, :typed_vcat, :typed_hcat)) return quote tmp = $(esc(ex)) - DataArray(tmp, bitbroadcast(x->isequal(x, NA), tmp)) + DataArray(tmp, broadcast(x->isequal(x, NA), tmp)) end end dataexpr, naexpr = parsedata(ex) @@ -115,7 +115,7 @@ macro pdata(ex) if !(ex.head in (:vect, :vcat, :hcat, :ref, :typed_vcat, :typed_hcat)) return quote tmp = $(esc(ex)) - PooledDataArray(tmp, bitbroadcast(x->isequal(x, NA), tmp)) + PooledDataArray(tmp, broadcast(x->isequal(x, NA), tmp)) end end dataexpr, naexpr = parsedata(ex) diff --git a/src/natype.jl b/src/natype.jl index 2f48f6f..490786b 100644 --- a/src/natype.jl +++ b/src/natype.jl @@ -14,24 +14,28 @@ ## ############################################################################## -type NAtype +struct NAtype end const NA = NAtype() Base.show(io::IO, x::NAtype) = print(io, "NA") -type NAException <: Exception +struct NAException <: Exception msg::String end NAException() = NAException("NA found") Base.length(x::NAtype) = 1 Base.size(x::NAtype) = () +Base.size(x::NAtype, i::Integer) = i < 1 ? throw(BoundsError()) : 1 Base.ndims(x::NAtype) = 0 +Base.getindex(x::NAtype, i) = i == 1 ? NA : throw(BoundsError()) isna(x::NAtype) = true isna(x::Any) = false # TODO: Rethink this rule Base.promote_rule{T}(::Type{T}, ::Type{NAtype} ) = T + +Base.isnan(::NAtype) = NA diff --git a/src/operators.jl b/src/operators.jl index bdc27fe..b6e21b2 100644 --- a/src/operators.jl +++ b/src/operators.jl @@ -1,98 +1,3 @@ -promote_op{R,S}(f::Any, ::Type{R}, ::Type{S}) = - Base.promote_op(f, R, S) - -# Required for /(::Int, ::Int) -if VERSION < v"0.5.0-dev" - promote_op{R<:Integer,S<:Integer}(op, ::Type{R}, ::Type{S}) = typeof(op(one(R), one(S))) -end - -const unary_operators = [:+, :-, :!, :*] - -const numeric_unary_operators = [:+, :-] - -const logical_unary_operators = [:!] - -const elementary_functions = [:(Base.abs), - :(Base.abs2), - :(Base.sign), - :(Base.acos), - :(Base.acosh), - :(Base.asin), - :(Base.asinh), - :(Base.atan), - :(Base.atanh), - :(Base.sin), - :(Base.sinh), - :(Base.conj), - :(Base.cos), - :(Base.cosh), - :(Base.tan), - :(Base.tanh), - :(Base.ceil), - :(Base.floor), - :(Base.round), - :(Base.trunc), - :(Base.exp), - :(Base.exp2), - :(Base.expm1), - :(Base.log), - :(Base.log10), - :(Base.log1p), - :(Base.log2), - :(Base.exponent), - :(Base.sqrt), - :(Base.gamma), - :(Base.lgamma), - :(Base.digamma), - :(Base.erf), - :(Base.erfc)] - -const two_argument_elementary_functions = [:(Base.round), - :(Base.ceil), - :(Base.floor), - :(Base.trunc)] - -const special_comparison_operators = [:(Base.isless)] - -const comparison_operators = [:(==),:(.==),:(!=),:(.!=),:(>),:(.>),:(>=),:(.>=),:(<),:(.<),:(<=),:(.<=)] - -const scalar_comparison_operators = [:(==),:(!=),:(>),:(>=),:(<),:(<=)] - -const array_comparison_operators = [:(.==),:(.!=),:(.>),:(.>=),:(.<),:(.<=)] - -const vectorized_comparison_operators = [:(.==),:(==),:(.!=),:(!=),:(.>),:(>),:(.>=),:(>=),:(.<),:(<),:(.<=),:(<=)] - -const binary_operators = [:(+),:(.+),:(-),:(.-),:(*),:(.*),:(/),:(./),:(.^), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)] - -const induced_binary_operators = [(:^)] - -const arithmetic_operators = [:(+),:(.+),:(-),:(.-),:(*),:(.*),:(/),:(./),:(.^), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)] - -const induced_arithmetic_operators = [:(^)] - -const biscalar_operators = [:(Base.maximum), - :(Base.minimum)] - -const scalar_arithmetic_operators = [:(+),:(-),:(*),:(/), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)] - -const induced_scalar_arithmetic_operators = [:(^)] - -const array_arithmetic_operators = [:(+),:(.+),:(-),:(.-),:(.*),:(.^)] - -const bit_operators = [:(&),:(|),:($)] - const unary_vector_operators = [:(Base.median), :(StatsBase.mad), :(Base.norm), @@ -101,16 +6,6 @@ const unary_vector_operators = [:(Base.median), # TODO: dist, iqr -const pairwise_vector_operators = [:(Base.diff)] - #:(Base.reldiff), - #:(Base.percent_change)] - -const cumulative_vector_operators = [:(Base.cumprod), - :(Base.cumsum), - :(Base.cumsum_kbn), - :(Base.cummin), - :(Base.cummax)] - const ffts = [:(Base.fft)] const binary_vector_operators = [:(Base.dot), @@ -129,23 +24,9 @@ const rowwise_operators = [:rowminimums, :rowffts, :rownorms] -const columnar_operators = [:colminimums, - :colmaxs, - :colprods, - :colsums, - :colmeans, - :colmedians, - :colstds, - :colvars, - :colffts, - :colnorms] - -const boolean_operators = [:(Base.any), - :(Base.all)] - # Swap arguments to fname() anywhere in AST. Returns the number of # arguments swapped -function swapargs(ast::Expr, fname::(@compat Union{Expr, Symbol})) +function swapargs(ast::Expr, fname::Union{Expr, Symbol}) if ast.head == :call && (ast.args[1] == fname || (isa(ast.args[1], Expr) && ast.args[1].head == :curly && @@ -162,7 +43,7 @@ function swapargs(ast::Expr, fname::(@compat Union{Expr, Symbol})) n end end -function swapargs(ast, fname::(@compat Union{Expr, Symbol})) +function swapargs(ast, fname::Union{Expr, Symbol}) ast 0 end @@ -232,7 +113,7 @@ macro dataarray_binary_scalar(vectorfunc, scalarfunc, outtype, swappable) Any[ begin if outtype == :nothing - outtype = :(promote_op(@functorize($scalarfunc), + outtype = :(promote_op($scalarfunc, eltype(a), eltype(b))) end fns = Any[ @@ -257,7 +138,7 @@ macro dataarray_binary_scalar(vectorfunc, scalarfunc, outtype, swappable) if swappable # For /, Array/Number is valid but not Number/Array # All other operators should be swappable - map!(x->Expr(:macrocall, Symbol("@swappable"), x, scalarfunc), fns) + map!(x->Expr(:macrocall, Symbol("@swappable"), x, scalarfunc), fns, fns) end Expr(:block, fns...) end @@ -275,8 +156,8 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) function $(vectorfunc)(a::$atype, b::$btype) data1 = $(atype == :DataArray || atype == :(DataArray{Bool}) ? :(a.data) : :a) data2 = $(btype == :DataArray || btype == :(DataArray{Bool}) ? :(b.data) : :b) - res = Array(promote_op(@functorize($vectorfunc), eltype(a), eltype(b)), - promote_shape(size(a), size(b))) + res = Array{promote_op($vectorfunc, eltype(a), eltype(b))}( + promote_shape(size(a), size(b))) resna = $narule @bitenumerate resna i na begin if !na @@ -288,7 +169,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) end for (atype, btype, narule) in ((:(DataArray), :(Range), :(copy(a.na))), (:(Range), :(DataArray), :(copy(b.na))), - (:DataArray, :DataArray, :(a.na | b.na)), + (:DataArray, :DataArray, :(a.na .| b.na)), (:DataArray, :AbstractArray, :(copy(a.na))), (:AbstractArray, :DataArray, :(copy(b.na)))) ]..., @@ -298,7 +179,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) quote function $(vectorfunc)(a::$atype, b::$btype) res = similar($(asim ? :a : :b), - promote_op(@functorize($vectorfunc), eltype(a), eltype(b)), + promote_op($vectorfunc, eltype(a), eltype(b)), promote_shape(size(a), size(b))) for i = 1:length(a) res[i] = $(scalarfunc)(a[i], b[i]) @@ -318,7 +199,7 @@ macro dataarray_binary_array(vectorfunc, scalarfunc) end # Unary operators, NA -for f in unary_operators +for f in [:+,:-,:*,:/] @eval $(f)(d::NAtype) = NA end @@ -330,7 +211,8 @@ end # Treat ctranspose and * in a special way for (f, elf) in ((:(Base.ctranspose), :conj), (:(Base.transpose), :identity)) @eval begin - function $(f){T}(d::(@compat Union{DataVector{T}, DataMatrix{T}})) + $(f)(::NAtype) = NA + function $(f){T}(d::DataMatrix{T}) # (c)transpose in Base uses a cache-friendly algorithm for # numeric arrays, which is faster than our naive algorithm, # but chokes on undefined values in the data array. @@ -360,8 +242,8 @@ end # But we're getting 10x R while maintaining NA's for (adata, bdata) in ((true, false), (false, true), (true, true)) @eval begin - function (*)(a::$(adata ? :((@compat Union{DataVector, DataMatrix})) : :((@compat Union{Vector, Matrix}))), - b::$(bdata ? :((@compat Union{DataVector, DataMatrix})) : :(@compat Union{Vector, Matrix}))) + function (*)(a::$(adata ? :(Union{DataVector, DataMatrix}) : :(Union{Vector, Matrix})), + b::$(bdata ? :(Union{DataVector, DataMatrix}) : :(Union{Vector, Matrix}))) c = $(adata ? :(a.data) : :a) * $(bdata ? :(b.data) : :b) res = DataArray(c, falses(size(c))) # Propagation can be made more efficient by storing record of corrupt @@ -421,11 +303,19 @@ for f in (:(Base.abs), :(Base.abs2), :(Base.conj), :(Base.sign)) end # One-argument elementary functions that always return floating points +## Base for f in (:(Base.acos), :(Base.acosh), :(Base.asin), :(Base.asinh), :(Base.atan), :(Base.atanh), :(Base.sin), :(Base.sinh), :(Base.cos), :(Base.cosh), :(Base.tan), :(Base.tanh), :(Base.exp), :(Base.exp2), :(Base.expm1), :(Base.log), :(Base.log10), :(Base.log1p), - :(Base.log2), :(Base.exponent), :(Base.sqrt), :(Base.gamma), :(Base.lgamma), - :(Base.digamma), :(Base.erf), :(Base.erfc)) + :(Base.log2), :(Base.exponent), :(Base.sqrt), :(Base.gamma), :(Base.lgamma)) + @eval begin + ($f)(::NAtype) = NA + @dataarray_unary $(f) AbstractFloat T + @dataarray_unary $(f) Real Float64 + end +end +## SpecialFunctions (should be a conditional module when supported) +for f in (:(SpecialFunctions.digamma), :(SpecialFunctions.erf), :(SpecialFunctions.erfc)) @eval begin ($f)(::NAtype) = NA @dataarray_unary $(f) AbstractFloat T @@ -473,7 +363,7 @@ end # To avoid ambiguity warning @swappable (|)(a::NAtype, b::Function) = NA -for f in (:(&), :(|), :($)) +for f in (:(&), :(|), :(Base.xor)) @eval begin # Scalar with NA ($f)(::NAtype, ::NAtype) = NA @@ -481,17 +371,17 @@ for f in (:(&), :(|), :($)) end end -# DataArray with DataArray -(&)(a::DataArray{Bool}, b::DataArray{Bool}) = - DataArray(a.data & b.data, (a.na & b.na) | (a.na & b.data) | (b.na & a.data)) -(|)(a::DataArray{Bool}, b::DataArray{Bool}) = - DataArray(a.data | b.data, (a.na & b.na) | (a.na & !b.data) | (b.na & !a.data)) -($)(a::DataArray{Bool}, b::DataArray{Bool}) = - DataArray(a.data $ b.data, a.na | b.na) +# # DataArray with DataArray +# (&)(a::DataArray{Bool}, b::DataArray{Bool}) = +# DataArray(a.data & b.data, (a.na & b.na) | (a.na & b.data) | (b.na & a.data)) +# (|)(a::DataArray{Bool}, b::DataArray{Bool}) = +# DataArray(a.data | b.data, (a.na & b.na) | (a.na & !b.data) | (b.na & !a.data)) +# ($)(a::DataArray{Bool}, b::DataArray{Bool}) = +# DataArray(a.data $ b.data, a.na | b.na) # DataArray with non-DataArray # Need explicit definition for BitArray to avoid ambiguity -for t in (:(BitArray), :(Range{Bool}), :((@compat Union{AbstractArray{Bool}, Bool}))) +for t in (:(BitArray), :(Range{Bool}), :(Union{AbstractArray{Bool}, Bool})) @eval begin @swappable (&)(a::DataArray{Bool}, b::$t) = DataArray(convert(Array{Bool}, a.data & b), a.na & b) @swappable (|)(a::DataArray{Bool}, b::$t) = DataArray(convert(Array{Bool}, a.data | b), a.na & !b) @@ -597,19 +487,11 @@ end # ambiguity @swappable (==)(::NAtype, ::WeakRef) = NA -for (sf,vf) in zip(scalar_comparison_operators, array_comparison_operators) +for sf in [:(==),:(!=),:(>),:(>=),:(<),:(<=)] @eval begin - # Array with NA - @swappable ($(vf)){T,N}(::NAtype, b::AbstractArray{T,N}) = - DataArray(Array(Bool, size(b)), trues(size(b))) - # Scalar with NA - ($(vf))(::NAtype, ::NAtype) = NA ($(sf))(::NAtype, ::NAtype) = NA - @swappable ($(vf))(::NAtype, b) = NA @swappable ($(sf))(::NAtype, b) = NA - - @dataarray_binary_scalar $(vf) $(sf) Bool true end end @@ -617,14 +499,8 @@ end # Binary operators # -# Necessary to avoid ambiguity warnings -(.^)(::Irrational{:e}, B::DataArray) = exp(B) -(.^)(::Irrational{:e}, B::AbstractDataArray) = exp(B) - -for f in (:(+), :(.+), :(-), :(.-), - :(*), :(.*), :(/), :(./), :(.^), :(Base.div), - :(Base.mod), :(Base.fld), :(Base.rem), :(Base.min), - :(Base.max)) +for f in (:(+), :(-), :(*), :(/), + :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem), :(Base.min), :(Base.max)) @eval begin # Scalar with NA ($f)(::NAtype, ::NAtype) = NA @@ -700,12 +576,11 @@ end end # if isdefined(Base, :UniformScaling) -for f in (:(.+), :(.-), :(*), :(.*), :(./), - :(.^), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)) +for f in (:(*), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)) @eval begin # Array with NA @swappable $(f){T,N}(::NAtype, b::AbstractArray{T,N}) = - DataArray(Array(T, size(b)), trues(size(b))) + DataArray(Array{T,N}(size(b)), trues(size(b))) # DataArray with scalar @dataarray_binary_scalar $f $f nothing true @@ -715,7 +590,7 @@ end for f in (:(+), :(-)) # Array with NA @eval @swappable $(f){T,N}(::NAtype, b::AbstractArray{T,N}) = - DataArray(Array(T, size(b)), trues(size(b))) + DataArray(Array{T,N}(size(b)), trues(size(b))) end (^)(::NAtype, ::NAtype) = NA @@ -735,47 +610,59 @@ end # / is defined separately since it is not swappable (/){T,N}(b::AbstractArray{T,N}, ::NAtype) = - DataArray(Array(T, size(b)), trues(size(b))) + DataArray(Array{T,N}(size(b)), trues(size(b))) @dataarray_binary_scalar(/, /, nothing, false) -for f in biscalar_operators +for f in [:(Base.maximum), :(Base.minimum)] @eval begin ($f)(::NAtype, ::NAtype) = NA @swappable $(f)(::Number, ::NAtype) = NA end end -for f in pairwise_vector_operators - @eval function ($f)(dv::DataVector) - n = length(dv) - new_data = ($f)(dv.data) - new_na = falses(n - 1) - new_na[1] = dv.na[1] - for i = 2:(n - 1) - if dv.na[i] - new_na[i - 1] = true - new_na[i] = true - end +function Base.LinAlg.diff(dv::DataVector) + n = length(dv) + new_data = diff(dv.data) + new_na = falses(n - 1) + new_na[1] = dv.na[1] + for i = 2:(n - 1) + if dv.na[i] + new_na[i - 1] = true + new_na[i] = true end - new_na[n - 1] = new_na[n - 1] || dv.na[n] - return DataArray(new_data, new_na) end -end - -for f in cumulative_vector_operators - @eval function ($f)(dv::DataVector) - new_data = ($f)(dv.data) - new_na = falses(length(dv)) - hitna = false - @bitenumerate dv.na i na begin - hitna |= na - if hitna - new_na[i] = true - end + new_na[n - 1] = new_na[n - 1] || dv.na[n] + return DataArray(new_data, new_na) +end + +# for f in cumulative_vector_operators +# @eval function ($f)(dv::DataVector) +# new_data = ($f)(dv.data) +# new_na = falses(length(dv)) +# hitna = false +# @bitenumerate dv.na i na begin +# hitna |= na +# if hitna +# new_na[i] = true +# end +# end +# return DataArray(new_data, new_na) +# end +# end +function Base.accumulate(f, dv::DataVector) + new_data = accumulate(f, dv.data) + new_na = falses(length(dv)) + hitna = false + @bitenumerate dv.na i na begin + hitna |= na + if hitna + new_na[i] = true end - return DataArray(new_data, new_na) end + return DataArray(new_data, new_na) end +Base.cumsum(dv::DataVector) = accumulate(+, dv) +Base.cumprod(dv::DataVector) = accumulate(*, dv) for f in [unary_vector_operators; ffts] @eval ($f)(dv::DataVector) = any(dv.na) ? NA : ($f)(dv.data) @@ -900,7 +787,7 @@ function rle{T}(v::AbstractDataVector{T}) current_length = 1 values = DataArray(T, n) total_values = 1 - lengths = Array(Int16, n) + lengths = Vector{Int16}(n) total_lengths = 1 for i in 2:n if isna(v[i]) || isna(current_value) diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl index 7b8cb0f..dcdaeed 100644 --- a/src/pooleddataarray.jl +++ b/src/pooleddataarray.jl @@ -15,16 +15,15 @@ # This is used as a wrapper during PooledDataArray construction only, to distinguish # arrays of pool indices from normal arrays -type RefArray{R<:Integer,N} +struct RefArray{R<:Integer,N} a::Array{R,N} end -type PooledDataArray{T, R<:Integer, N} <: AbstractDataArray{T, N} +mutable struct PooledDataArray{T, R<:Integer, N} <: AbstractDataArray{T, N} refs::Array{R, N} pool::Vector{T} - function PooledDataArray(rs::RefArray{R, N}, - p::Vector{T}) + function PooledDataArray{T,R,N}(rs::RefArray{R, N}, p::Vector{T}) where {T,R,N} # refs mustn't overflow pool if length(rs.a) > 0 && maximum(rs.a) > prod(size(p)) throw(ArgumentError("Reference array points beyond the end of the pool")) @@ -32,8 +31,8 @@ type PooledDataArray{T, R<:Integer, N} <: AbstractDataArray{T, N} new(rs.a,p) end end -typealias PooledDataVector{T,R} PooledDataArray{T,R,1} -typealias PooledDataMatrix{T,R} PooledDataArray{T,R,2} +const PooledDataVector{T,R} = PooledDataArray{T,R,1} +const PooledDataMatrix{T,R} = PooledDataArray{T,R,2} ############################################################################## ## @@ -70,7 +69,7 @@ function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N}, throw(ArgumentError("Cannot construct a PooledDataVector with type $R with a pool of size $(length(pool))")) end - newrefs = Array(R, size(d)) + newrefs = Array{R,N}(size(d)) poolref = Dict{T, R}() # loop through once to fill the poolref dict @@ -93,7 +92,7 @@ end function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N}, m::AbstractArray{Bool, N}, r::Type{R} = DEFAULT_POOLED_REF_TYPE) - pool = convert(Array, unique(d[!m])) + pool = convert(Array, unique(d[.!m])) if method_exists(isless, (T, T)) sort!(pool) end @@ -101,14 +100,14 @@ function PooledDataArray{T,R<:Integer,N}(d::AbstractArray{T, N}, end # Construct an all-NA PooledDataVector of a specific type -PooledDataArray(t::Type, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array(t, dims), trues(dims)) -PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array(t, dims), trues(dims)) -PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::@compat Tuple{Vararg{Int}}) = PooledDataArray(Array(t, dims), trues(dims), r) +PooledDataArray(t::Type, dims::Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims)) +PooledDataArray(t::Type, dims::Int...) = PooledDataArray(Array{t}(dims), trues(dims)) +PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::Tuple{Vararg{Int}}) = PooledDataArray(Array{t}(dims), trues(dims), r) PooledDataArray{R<:Integer}(t::Type, r::Type{R}, dims::Int...) = PooledDataArray(Array(t, dims), trues(dims), r) # Construct an empty PooledDataVector of a specific type -PooledDataArray(t::Type) = PooledDataArray(similar(Array(t,1),0), trues(0)) -PooledDataArray{R<:Integer}(t::Type, r::Type{R}) = PooledDataArray(similar(Array(t,1),0), trues(0), r) +PooledDataArray(t::Type) = PooledDataArray(similar(Vector{t}(1),0), trues(0)) +PooledDataArray{R<:Integer}(t::Type, r::Type{R}) = PooledDataArray(similar(Vector{t}(1),0), trues(0), r) # Convert a BitArray to an Array{Bool} (m = missingness) # For some reason an additional method is needed but even that doesn't work @@ -289,7 +288,7 @@ end function Base.unique{T}(pda::PooledDataArray{T}) n = length(pda) nlevels = length(pda.pool) - unique_values = Array(T, 0) + unique_values = Vector{T}(0) sizehint!(unique_values, nlevels) seen = Set{eltype(pda.refs)}() @@ -312,7 +311,7 @@ function Base.unique{T}(pda::PooledDataArray{T}) end if firstna > 0 - res = DataArray(Array(T, nlevels + 1)) + res = DataArray(Vector{T}(nlevels + 1)) i = 0 for val in unique_values i += 1 @@ -600,7 +599,7 @@ end Base.sort(pda::PooledDataArray; kw...) = pda[sortperm(pda; kw...)] -type FastPerm{O<:Base.Sort.Ordering,V<:AbstractVector} <: Base.Sort.Ordering +struct FastPerm{O<:Base.Sort.Ordering,V<:AbstractVector} <: Base.Sort.Ordering ord::O vec::V end @@ -659,8 +658,8 @@ function PooledDataVecs(v1::AbstractArray, ## Return two PooledDataVecs that share the same pool. ## TODO: allow specification of REFTYPE - refs1 = Array(DEFAULT_POOLED_REF_TYPE, size(v1)) - refs2 = Array(DEFAULT_POOLED_REF_TYPE, size(v2)) + refs1 = Array{DEFAULT_POOLED_REF_TYPE}(size(v1)) + refs2 = Array{DEFAULT_POOLED_REF_TYPE}(size(v2)) poolref = Dict{promote_type(eltype(v1), eltype(v2)), DEFAULT_POOLED_REF_TYPE}() maxref = 0 @@ -738,7 +737,7 @@ Base.convert(::Type{PooledDataArray}, a::AbstractArray) = function Base.convert{S,T,R<:Integer,N}(::Type{DataArray{S,N}}, pda::PooledDataArray{T,R,N}) - res = DataArray(Array(S, size(pda)), BitArray(size(pda))) + res = DataArray(Array{S}(size(pda)), BitArray(size(pda))) for i in 1:length(pda) r = pda.refs[i] if r == 0 # TODO: Use zero(R) @@ -759,9 +758,9 @@ pdata(a::AbstractArray) = convert(PooledDataArray, a) function Base.convert{S, T, R, N}( ::Type{Array{S, N}}, - pda::PooledDataArray{T, R, N} -) - res = Array(S, size(pda)) + pda::PooledDataArray{T, R, N}) + + res = Array{S}(size(pda)) for i in 1:length(pda) if pda.refs[i] == zero(R) throw(NAException()) @@ -787,9 +786,9 @@ end function Base.convert{S, T, R, N}( ::Type{Array{S, N}}, pda::PooledDataArray{T, R, N}, - replacement::Any -) - res = Array(S, size(pda)) + replacement::Any) + + res = Array{S}(size(pda)) replacementS = convert(S, replacement) for i in 1:length(pda) if pda.refs[i] == zero(R) @@ -815,7 +814,7 @@ end function dropna{T}(pdv::PooledDataVector{T}) n = length(pdv) - res = Array(T, n) + res = Array{T}(n) total = 0 for i in 1:n if pdv.refs[i] > 0 diff --git a/src/reduce.jl b/src/reduce.jl index 6eeda48..1ff0926 100644 --- a/src/reduce.jl +++ b/src/reduce.jl @@ -64,7 +64,7 @@ end mapreduce_impl_skipna{T}(f, op, A::DataArray{T}) = mapreduce_seq_impl_skipna(f, op, T, A, 1, length(A.data)) -mapreduce_impl_skipna(f, op::typeof(@functorize(+)), A::DataArray) = +mapreduce_impl_skipna(f, op::typeof(+), A::DataArray) = mapreduce_pairwise_impl_skipna(f, op, A, 1, length(A.na.chunks), length(A.na)-countnz(A.na), max(128, sum_pairwise_blocksize(f))) @@ -87,48 +87,48 @@ end # NA, it returns NA. Otherwise we will fall back to the implementation # in Base, which is slow because it's type-unstable, but guarantees the # correct semantics -typealias SafeMapFuns @compat Union{typeof(@functorize(identity)), typeof(@functorize(abs)), typeof(@functorize(abs2)), - typeof(@functorize(exp)), typeof(@functorize(log)), typeof(@functorize(centralizedabs2fun))} -typealias SafeReduceFuns @compat Union{typeof(@functorize(+)), typeof(@functorize(*)), typeof(@functorize(max)), typeof(@functorize(min))} +const SafeMapFuns = Union{typeof(identity), typeof(abs), typeof(abs2), + typeof(exp), typeof(log), typeof(Base.centralizedabs2fun)} +const SafeReduceFuns = Union{typeof(+), typeof(*), typeof(max), typeof(min)} function Base._mapreduce(f::SafeMapFuns, op::SafeReduceFuns, A::DataArray) any(A.na) && return NA Base._mapreduce(f, op, A.data) end function Base.mapreduce(f, op::Function, A::DataArray; skipna::Bool=false) - (op === +) ? (skipna ? _mapreduce_skipna(f, @functorize(+), A) : Base._mapreduce(f, @functorize(+), A)) : - (op === *) ? (skipna ? _mapreduce_skipna(f, @functorize(*), A) : Base._mapreduce(f, @functorize(*), A)) : - (op === &) ? (skipna ? _mapreduce_skipna(f, @functorize(&), A) : Base._mapreduce(f, @functorize(&), A)) : - (op === |) ? (skipna ? _mapreduce_skipna(f, @functorize(|), A) : Base._mapreduce(f, @functorize(|), A)) : + (op === +) ? (skipna ? _mapreduce_skipna(f, +, A) : Base._mapreduce(f, +, A)) : + (op === *) ? (skipna ? _mapreduce_skipna(f, *, A) : Base._mapreduce(f, *, A)) : + (op === &) ? (skipna ? _mapreduce_skipna(f, &, A) : Base._mapreduce(f, &, A)) : + (op === |) ? (skipna ? _mapreduce_skipna(f, |, A) : Base._mapreduce(f, |, A)) : skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) end # To silence deprecations, but could be more efficient -Base.mapreduce(f, op::(@compat Union{typeof(@functorize(|)), typeof(@functorize(&))}), A::DataArray; skipna::Bool=false) = +Base.mapreduce(f, op::Union{typeof(|), typeof(&)}, A::DataArray; skipna::Bool=false) = skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) Base.mapreduce(f, op, A::DataArray; skipna::Bool=false) = skipna ? _mapreduce_skipna(f, op, A) : Base._mapreduce(f, op, A) Base.reduce(op, A::DataArray; skipna::Bool=false) = - mapreduce(@functorize(identity), op, A; skipna=skipna) + mapreduce(identity, op, A; skipna=skipna) ## usual reductions -for (fn, op) in ((:(Base.sum), @functorize(+)), - (:(Base.prod), @functorize(*)), - (:(Base.minimum), @functorize(min)), - (:(Base.maximum), @functorize(max))) +for (fn, op) in ((:(Base.sum), +), + (:(Base.prod), *), + (:(Base.minimum), min), + (:(Base.maximum), max)) @eval begin - $fn(f::(@compat Union{Function,$(supertype(typeof(@functorize(abs))))}), a::DataArray; skipna::Bool=false) = + $fn(f::Union{Function,$(supertype(typeof(abs)))}, a::DataArray; skipna::Bool=false) = mapreduce(f, $op, a; skipna=skipna) $fn(a::DataArray; skipna::Bool=false) = - mapreduce(@functorize(identity), $op, a; skipna=skipna) + mapreduce(identity, $op, a; skipna=skipna) end end -for (fn, f, op) in ((:(Base.sumabs), @functorize(abs), @functorize(+)), - (:(Base.sumabs2), @functorize(abs2), @functorize(+))) +for (fn, f, op) in ((:(Base.sumabs), abs, +), + (:(Base.sumabs2), abs2, +)) @eval $fn(a::DataArray; skipna::Bool=false) = mapreduce($f, $op, a; skipna=skipna) end @@ -147,11 +147,11 @@ function Base.varm{T}(A::DataArray{T}, m::Number; corrected::Bool=true, skipna:: nna = countnz(na) nna == n && return convert(Base.momenttype(T), NaN) nna == n-1 && return convert(Base.momenttype(T), - abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - @compat(Int(corrected)))) + abs2(A.data[Base.findnextnot(na, 1)] - m)/(1 - corrected)) /(nna == 0 ? Base.centralize_sumabs2(A.data, m, 1, n) : - mapreduce_impl_skipna(@functorize(centralizedabs2fun)(m), @functorize(+), A), - n - nna - @compat(Int(corrected))) + mapreduce_impl_skipna(Base.centralizedabs2fun(m), +, A), + n - nna - corrected) else any(A.na) && return NA Base.varm(A.data, m; corrected=corrected) @@ -162,7 +162,7 @@ Base.varm{T}(A::DataArray{T}, m::NAtype; corrected::Bool=true, skipna::Bool=fals function Base.var(A::DataArray; corrected::Bool=true, mean=nothing, skipna::Bool=false) mean == 0 ? Base.varm(A, 0; corrected=corrected, skipna=skipna) : mean == nothing ? varm(A, Base.mean(A; skipna=skipna); corrected=corrected, skipna=skipna) : - isa(mean, (@compat Union{Number, NAtype})) ? + isa(mean, Union{Number, NAtype}) ? varm(A, mean; corrected=corrected, skipna=skipna) : throw(ErrorException("Invalid value of mean.")) end diff --git a/src/reducedim.jl b/src/reducedim.jl index 1e1a499..82e6362 100644 --- a/src/reducedim.jl +++ b/src/reducedim.jl @@ -5,7 +5,7 @@ using Base.check_reducedims # This is a substantially faster implementation of the "all" reduction # across dimensions for reducing a BitArray to an Array{Bool}. We use # this below for implementing MaxFun and MinFun with skipna=true. -@ngenerate N typeof(R) function Base._mapreducedim!{N}(f, op::typeof(@functorize(&)), R::Array{Bool}, A::BitArray{N}) +@ngenerate N typeof(R) function Base._mapreducedim!{N}(f, op::typeof(&), R::Array{Bool}, A::BitArray{N}) lsiz = check_reducedims(R, A) isempty(A) && return R @nextract N sizeR d->size(R, d) @@ -167,7 +167,7 @@ _getdata(A::DataArray) = A.data # mapreduce across a dimension. If specified, C contains the number of # non-NA values reduced into each element of R. @ngenerate N typeof(R) function _mapreducedim_skipna_impl!{T,N}(f, op, R::AbstractArray, - C::(@compat Union{Array{Int}, Void}), + C::Union{Array{Int}, Void}, A::DataArray{T,N}) data = A.data na = A.na @@ -226,11 +226,11 @@ _mapreducedim_skipna!(f, op, R::AbstractArray, A::DataArray) = _mapreducedim_skipna_impl!(f, op, R, nothing, A) # for MinFun/MaxFun, min or max is NA if all values along a dimension are NA -function _mapreducedim_skipna!(f, op::(@compat Union{typeof(@functorize(min)), typeof(@functorize(max))}), R::DataArray, A::DataArray) +function _mapreducedim_skipna!(f, op::Union{typeof(min), typeof(max)}, R::DataArray, A::DataArray) R.na = BitArray(all!(fill(true, size(R)), A.na)) _mapreducedim_skipna_impl!(f, op, R, nothing, A) end -function _mapreducedim_skipna!(f, op::(@compat Union{typeof(@functorize(min)), typeof(@functorize(max))}), R::AbstractArray, A::DataArray) +function _mapreducedim_skipna!(f, op::Union{typeof(min), typeof(max)}, R::AbstractArray, A::DataArray) if any(all!(fill(true, size(R)), A.na)) throw(NAException("all values along specified dimension are NA for one element of reduced dimension; cannot reduce to non-DataArray")) end @@ -239,7 +239,7 @@ end ## general reducedim interface -for op in (@functorize(+), @functorize(*), @functorize(&), @functorize(|),@functorize(scalarmin), @functorize(scalarmax), @functorize(min), @functorize(max)) +for op in (+, *, &, |, min, max) @eval begin function Base.initarray!{T}(a::DataArray{T}, op::typeof($op), init::Bool) if init @@ -251,45 +251,22 @@ for op in (@functorize(+), @functorize(*), @functorize(&), @functorize(|),@funct end end -# min and max defunctorize to ElementwiseMin/MaxFun which don't have initarray! -# or reducedim_init methods on 0.4. -if VERSION < v"0.5.0-dev+3701" - Base.initarray!(a::AbstractArray, ::Base.ElementwiseMaxFun, init::Bool) = - Base.initarray!(a, Base.MaxFun(), init) - Base.initarray!(a::AbstractArray, ::Base.ElementwiseMinFun, init::Bool) = - Base.initarray!(a, Base.MinFun(), init) - Base.reducedim_init(f, ::Base.ElementwiseMaxFun, a::AbstractArray, dim) = Base.reducedim_init(f, Base.MaxFun(), a, dim) - Base.reducedim_init(f, ::Base.ElementwiseMinFun, a::AbstractArray, dim) = Base.reducedim_init(f, Base.MinFun(), a, dim) -end - function Base.reducedim_initarray{R}(A::DataArray, region, v0, ::Type{R}) - if VERSION < v"0.6.0-dev.1121" - rd = Base.reduced_dims(A.data, region) - else - rd = length.(Base.reduced_indices(A.data, region)) - end + rd = length.(Base.reduced_indices(A.data, region)) DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) end function Base.reducedim_initarray0{R}(A::DataArray, region, v0, ::Type{R}) - if VERSION < v"0.6.0-dev.1121" - rd = Base.reduced_dims0(A,region) - else - rd = length.(Base.reduced_indices0(A,region)) - end + rd = length.(Base.reduced_indices0(A,region)) DataArray(fill!(similar(A.data, R, rd), v0), falses(rd)) end function Base.mapreducedim!(f::Function, op, R::AbstractArray, A::DataArray; skipna::Bool=false) - (op === +) ? (skipna ? _mapreducedim_skipna!(f, @functorize(+), R, A) : _mapreducedim!(f, @functorize(+), R, A)) : - (op === *) ? (skipna ? _mapreducedim_skipna!(f, @functorize(*), R, A) : _mapreducedim!(f, @functorize(*), R, A)) : - (op === &) ? (skipna ? _mapreducedim_skipna!(f, @functorize(&), R, A) : _mapreducedim!(f, @functorize(&), R, A)) : - (op === |) ? (skipna ? _mapreducedim_skipna!(f, @functorize(|), R, A) : _mapreducedim!(f, @functorize(|), R, A)) : skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) end Base.mapreducedim!(f, op, R::AbstractArray, A::DataArray; skipna::Bool=false) = skipna ? _mapreducedim_skipna!(f, op, R, A) : _mapreducedim!(f, op, R, A) Base.reducedim!{RT}(op, R::DataArray{RT}, A::AbstractArray; skipna::Bool=false) = - Base.mapreducedim!(@functorize(identity), op, R, A, zero(RT); skipna=skipna) + Base.mapreducedim!(identity, op, R, A, zero(RT); skipna=skipna) Base.mapreducedim(f, op, A::DataArray, region, v0; skipna::Bool=false) = Base.mapreducedim!(f, op, Base.reducedim_initarray(A, region, v0), A; skipna=skipna) @@ -297,35 +274,35 @@ Base.mapreducedim{T}(f, op, A::DataArray{T}, region; skipna::Bool=false) = Base.mapreducedim!(f, op, Base.reducedim_init(f, op, A, region), A; skipna=skipna) Base.reducedim(op, A::DataArray, region, v0; skipna::Bool=false) = - Base.mapreducedim(@functorize(identity), op, A, region, v0; skipna=skipna) + Base.mapreducedim(identity, op, A, region, v0; skipna=skipna) Base.reducedim(op, A::DataArray, region; skipna::Bool=false) = - Base.mapreducedim(@functorize(identity), op, A, region; skipna=skipna) + Base.mapreducedim(identity, op, A, region; skipna=skipna) ## usual reductions -for (basfn, Op) in [(:sum, @functorize(+)), (:prod, @functorize(*)), - (:maximum, @functorize(max)), (:minimum, @functorize(min)), - (:all, @functorize(&)), (:any, @functorize(|))] +for (basfn, Op) in [(:sum, +), (:prod, *), + (:maximum, max), (:minimum, min), + (:all, &), (:any, |)] fname = Expr(:., :Base, Base.Meta.quot(basfn)) fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) @eval begin - $(fname!)(f::(@compat Union{Function,$(supertype(typeof(@functorize(abs))))}), r::AbstractArray, A::DataArray; + $(fname!)(f::Union{Function,$(supertype(typeof(abs)))}, r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = Base.mapreducedim!(f, $(Op), Base.initarray!(r, $(Op), init), A; skipna=skipna) $(fname!)(r::AbstractArray, A::DataArray; init::Bool=true, skipna::Bool=false) = - $(fname!)(@functorize(identity), r, A; init=init, skipna=skipna) + $(fname!)(identity, r, A; init=init, skipna=skipna) - $(fname)(f::(@compat Union{Function,$(supertype(typeof(@functorize(abs))))}), A::DataArray, region; skipna::Bool=false) = + $(fname)(f::Union{Function,$(supertype(typeof(abs)))}, A::DataArray, region; skipna::Bool=false) = Base.mapreducedim(f, $(Op), A, region; skipna=skipna) $(fname)(A::DataArray, region; skipna::Bool=false) = - $(fname)(@functorize(identity), A, region; skipna=skipna) + $(fname)(identity, A, region; skipna=skipna) end end -for (basfn, fbase, Fun) in [(:sumabs, :sum, @functorize(abs)), - (:sumabs2, :sum, @functorize(abs2)), - (:maxabs, :maximum, @functorize(abs)), - (:minabs, :minimum, @functorize(abs))] +for (basfn, fbase, Fun) in [(:sumabs, :sum, abs), + (:sumabs2, :sum, abs2), + (:maxabs, :maximum, abs), + (:minabs, :minimum, abs)] fname = Expr(:., :Base, Base.Meta.quot(basfn)) fname! = Expr(:., :Base, Base.Meta.quot(Symbol(string(basfn, '!')))) fbase! = Expr(:., :Base, Base.Meta.quot(Symbol(string(fbase, '!')))) @@ -343,8 +320,8 @@ function Base.mean!{T}(R::AbstractArray{T}, A::DataArray; skipna::Bool=false, init::Bool=true) init && fill!(R, zero(eltype(R))) if skipna - C = Array(Int, size(R)) - _mapreducedim_skipna_impl!(@functorize(identity), @functorize(+), R, C, A) + C = Array{Int}(size(R)) + _mapreducedim_skipna_impl!(identity, +, R, C, A) broadcast!(/, R, R, C) else sum!(R, A; skipna=false) @@ -363,7 +340,7 @@ immutable MapReduceDim2ArgHelperFun{F,T} f::F val::T end -@compat (f::MapReduceDim2ArgHelperFun)(x) = f.f(x, f.val) +(f::MapReduceDim2ArgHelperFun)(x) = f.f(x, f.val) # A version of _mapreducedim! that accepts an array S of the same size # as R, the elements of which are passed as a second argument to f. @@ -429,7 +406,7 @@ end # A version of _mapreducedim_skipna! that accepts an array S of the same size # as R, the elements of which are passed as a second argument to f. @ngenerate N typeof(R) function _mapreducedim_skipna_2arg!{T,N}(f, op, R::AbstractArray, - C::(@compat Union{Array{Int}, Void}), + C::Union{Array{Int}, Void}, A::DataArray{T,N}, S::AbstractArray) data = A.data na = A.na @@ -502,7 +479,7 @@ end end immutable Abs2MinusFun end -@compat (::Abs2MinusFun)(x, m) = abs2(x - m) +(::Abs2MinusFun)(x, m) = abs2(x - m) function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected::Bool=true, skipna::Bool=false, init::Bool=true) @@ -511,10 +488,10 @@ function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected: else init && fill!(R, zero(eltype(R))) if skipna - C = Array(Int, size(R)) + C = Array{Int}(size(R)) # Compute R = abs2(A-m) - _mapreducedim_skipna_2arg!(Abs2MinusFun(), @functorize(+), R, C, A, m) + _mapreducedim_skipna_2arg!(Abs2MinusFun(), +, R, C, A, m) # Divide by number of non-NA values if corrected @@ -525,10 +502,10 @@ function Base.varm!(R::AbstractArray, A::DataArray, m::AbstractArray; corrected: broadcast!(/, R, R, C) else # Compute R = abs2(A-m) - _mapreducedim_2arg!(Abs2MinusFun(), @functorize(+), R, A, m) + _mapreducedim_2arg!(Abs2MinusFun(), +, R, A, m) # Divide by number of values - broadcast!(/, R, R, div(length(A), length(R)) - @compat(Int(corrected))) + broadcast!(/, R, R, div(length(A), length(R)) - corrected) end end end @@ -538,7 +515,7 @@ Base.varm{T}(A::DataArray{T}, m::AbstractArray, region; corrected::Bool=true, Base.varm!(Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), A, m; corrected=corrected, skipna=skipna, init=false) -function Base.var{T}(A::DataArray{T}, region::(@compat Union{Integer, AbstractArray, Tuple}); +function Base.var{T}(A::DataArray{T}, region::Union{Integer, AbstractArray, Tuple}; corrected::Bool=true, mean=nothing, skipna::Bool=false) if mean == 0 Base.varm(A, Base.reducedim_initarray(A, region, zero(Base.momenttype(T))), region; @@ -546,11 +523,7 @@ function Base.var{T}(A::DataArray{T}, region::(@compat Union{Integer, AbstractAr elseif mean == nothing if skipna # Can reduce mean into ordinary array - if VERSION < v"0.6.0-dev.1121" - m = zeros(Base.momenttype(T), Base.reduced_dims(A, region)) - else - m = zeros(Base.momenttype(T), length.(Base.reduced_indices(A, region))) - end + m = zeros(Base.momenttype(T), length.(Base.reduced_indices(A, region))) Base.varm(A, Base.mean!(m, A; skipna=skipna), region; corrected=corrected, skipna=skipna) else diff --git a/src/sort.jl b/src/sort.jl index 40c47d0..45d7177 100644 --- a/src/sort.jl +++ b/src/sort.jl @@ -15,7 +15,7 @@ end datachunks(o::Base.Order.Perm, v::AbstractVector{Int}) = (v, o.data.na.chunks) datachunks(o::Base.Order.DirectOrdering, v::DataVector) = (v.data, v.na.chunks) -function nas2left!(v::(@compat Union{AbstractVector{Int}, DataVector}), o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) +function nas2left!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) data, chunks = datachunks(o, v) i = lo @@ -37,7 +37,7 @@ function nas2left!(v::(@compat Union{AbstractVector{Int}, DataVector}), o::Base. return i, hi end -function nas2right!(v::(@compat Union{AbstractVector{Int}, DataVector}), o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) +function nas2right!(v::Union{AbstractVector{Int}, DataVector}, o::Base.Order.Ordering, lo::Int=1, hi::Int=length(v)) data, chunks = datachunks(o, v) i = hi diff --git a/test/broadcast.jl b/test/broadcast.jl index 24ad887..e0db9b5 100644 --- a/test/broadcast.jl +++ b/test/broadcast.jl @@ -7,7 +7,7 @@ as_dataarray_bigfloat(x) = convert(DataArray{BigFloat}, x) as_pda(x) = convert(PooledDataArray, x) as_pda_bigfloat(x) = convert(PooledDataArray{BigFloat}, x) -bittest(f::Function, ewf::Function, a...) = (@test ewf(a...) == +bittest(f::Function, a...) = (@test broadcast(f, a...) == invoke(broadcast, Tuple{Function,ntuple(x->AbstractArray, length(a))...}, f, a...)) n1 = 21 n2 = 32 @@ -20,7 +20,6 @@ rb = 1:5 @test broadcast!(+, DataArray(Int, 2, 2), [1, 0], [1 4]) == [2 5; 1 4] @test broadcast!(+, DataArray(Int, 2), [1, 0], [1, 4]) == [2, 4] @test broadcast!(+, DataArray(Int, 2), [1, 0], 2) == [3, 2] -@test broadcast!(abs, @data([-1, -2])) == @data([1, 2]) for arr in (identity, as_dataarray, as_pda, as_dataarray_bigfloat, as_pda_bigfloat) @test broadcast(+, arr(eye(2)), arr([1, 4])) == [2 1; 4 5] @test broadcast(+, arr(eye(2)), arr([1 4])) == [2 4; 1 5] @@ -81,21 +80,18 @@ for arr in (identity, as_dataarray, as_pda, as_dataarray_bigfloat, as_pda_bigflo # @test A == diagm(10:12) # @test_throws BoundsError broadcast_setindex!(A, 7, [1,-1], [1 2]) - for (f, ewf) in (((==), (.==)), - ((<) , (.<) ), - ((!=), (.!=)), - ((<=), (.<=))) - bittest(f, ewf, arr(eye(2)), arr([1, 4])) - bittest(f, ewf, arr(eye(2)), arr([1 4])) - bittest(f, ewf, arr([0, 1]), arr([1 4])) - bittest(f, ewf, arr([0 1]), arr([1, 4])) - bittest(f, ewf, arr([1, 0]), arr([1, 4])) + for f in (==, (<), (!=), (<=)) + bittest(f, arr(eye(2)), arr([1, 4])) + bittest(f, arr(eye(2)), arr([1 4])) + bittest(f, arr([0, 1]), arr([1 4])) + bittest(f, arr([0 1]), arr([1, 4])) + bittest(f, arr([1, 0]), arr([1, 4])) # these should work once indexing is fixed - #bittest(f, ewf, arr(rand(rb, n1, n2, n3)), arr(rand(rb, n1, n2, n3))) - #bittest(f, ewf, arr(rand(rb, 1, n2, n3)), arr(rand(rb, n1, 1, n3))) - #bittest(f, ewf, arr(rand(rb, 1, n2, 1)), arr(rand(rb, n1, 1, n3))) - #bittest(f, ewf, arr(bitrand(n1, n2, n3)), arr(bitrand(n1, n2, n3))) + bittest(f, arr(rand(rb, n1, n2, n3)), arr(rand(rb, n1, n2, n3))) + bittest(f, arr(rand(rb, 1, n2, n3)), arr(rand(rb, n1, 1, n3))) + bittest(f, arr(rand(rb, 1, n2, 1)), arr(rand(rb, n1, 1, n3))) + bittest(f, arr(bitrand(n1, n2, n3)), arr(bitrand(n1, n2, n3))) end end @@ -104,19 +100,22 @@ r2 = 1:5 ratio = @data [1,1/2,1/3,1/4,1/5] @test r1.*r2 == collect(1:5) @test r1./r2 == ratio -m = @data [1,2]' +m = @data [1 2] @test m.*r2 == DataArray([1:5 2:2:10]) -@test_approx_eq m./r2 [ratio 2ratio] -@test_approx_eq m./collect(r2) [ratio 2ratio] +@test m./r2 ≈ [ratio 2ratio] +@test m./collect(r2) ≈ [ratio 2ratio] @test @inferred([0,1.2].+reshape([0,-2],1,1,2)) == reshape([0 -2; 1.2 -0.8],2,1,2) rt = Base.return_types(.+, (DataArray{Float64, 3}, DataArray{Int, 1})) @test length(rt) == 1 && rt[1] == DataArray{Float64, 3} -rt = Base.return_types(broadcast, (Function, Array{Float64, 3}, DataArray{Int, 1})) +rt = Base.return_types(broadcast, (typeof(+), Array{Float64, 3}, DataArray{Int, 1})) @test length(rt) == 1 && rt[1] == DataArray{Float64, 3} -rt = Base.return_types(broadcast!, (Function, DataArray{Float64, 3}, Array{Float64, 3}, Array{Int, 1})) +rt = Base.return_types(broadcast!, (typeof(+), DataArray{Float64, 3}, Array{Float64, 3}, Array{Int, 1})) @test length(rt) == 1 && rt[1] == DataArray{Float64, 3} +# Test String broadcast +@test broadcast(==, @data(["a", "b", "c", "d"]), "a") == @data([true,false,false,false]) + # Test broadcasting of functions that do something besides propagate NA @test isequal(broadcast(isequal, @data([NA, 1]), @data([NA 1])), @data([true false; false true])) @test isequal(broadcast(isequal, @pdata([NA, 1]), @data([NA 1])), @data([true false; false true])) @@ -126,9 +125,9 @@ rt = Base.return_types(broadcast!, (Function, DataArray{Float64, 3}, Array{Float @test isequal(broadcast(|, @data([NA, false]), @data([NA true false])), @data([NA true NA; NA true false])) # Test map! -@test_throws DimensionMismatch map!(+, DataArray(Float64, 2, 2), @data([1, 2]), @data([1 2])) @test map!(+, DataArray(Float64, 2), @data([1, 2]), @data([1, 2])) == @data([2, 4]) -@test map!(abs, @data([-1, -2])) == @data([1, 2]) +x = @data([-1, -2]) +@test map!(abs, x, x) == @data([1, 2]) @test isequal(map!(+, DataArray(Float64, 3), @data([1, NA, 3]), @data([NA, 2, 3])), @data([NA, NA, 6])) @test map!(isequal, DataArray(Float64, 3), @data([1, NA, NA]), @data([1, NA, 3])) == @data([true, true, false]) end diff --git a/test/data.jl b/test/data.jl index c1371ff..31a8431 100644 --- a/test/data.jl +++ b/test/data.jl @@ -26,21 +26,12 @@ module TestData @assert isa(dvint2, DataVector{Int}) @assert isa(dvint3, DataVector{Int}) @assert isa(dvflt, DataVector{Float64}) - if VERSION < v"0.5.0-dev+3876" - @assert isa(dvstr, DataVector{ASCIIString}) - else - @assert isa(dvstr, DataVector{String}) - end - # @test throws_exception(DataArray([5:8], falses(2)), Exception) + @assert isa(dvstr, DataVector{String}) + @test_throws ArgumentError DataArray([5:8], falses(2)) #test_group("PooledDataVector creation") pdvstr = @pdata ["one", "one", "two", "two", NA, "one", "one"] - if VERSION < v"0.5.0-dev+3876" - @assert isa(pdvstr, PooledDataVector{ASCIIString}) - else - @assert isa(pdvstr, PooledDataVector{String}) - end - # @test throws_exception(PooledDataVector["one", "one", 9], Exception) + @assert isa(pdvstr, PooledDataVector{String}) @assert isequal(PooledDataArray(pdvstr), pdvstr) #test_group("PooledDataVector creation with predetermined pool") @@ -92,11 +83,7 @@ module TestData @assert size(pdvstr) == (7,) @assert length(pdvstr) == 7 @assert sum(isna(pdvstr)) == 1 - if VERSION < v"0.5.0-dev+3876" - @assert eltype(pdvstr) == ASCIIString - else - @assert eltype(pdvstr) == String - end + @assert eltype(pdvstr) == String #test_group("DataVector operations") @assert isequal(dvint .+ 1, DataArray([2, 3, 4, 5], [false, false, true, false])) @@ -118,7 +105,7 @@ module TestData @assert all(convert(Vector{Int}, dvint2) .== [5:8;]) @assert all([i + 1 for i in dvint2] .== [6:9;]) @assert all([length(x)::Int for x in dvstr] == [3, 3, 1, 4]) - @assert repr(dvint) == "[1,2,NA,4]" + @assert repr(dvint) == "[1, 2, NA, 4]" #test_group("PooledDataVector to something else") @assert all(dropna(pdvstr) .== ["one", "one", "two", "two", "one", "one"]) diff --git a/test/dataarray.jl b/test/dataarray.jl index d9d88ba..fcf7088 100644 --- a/test/dataarray.jl +++ b/test/dataarray.jl @@ -8,7 +8,7 @@ module TestDataArray m = [1 2; 3 4] dm = DataArray(m, falses(size(m))) - t = Array(Int, 2, 2, 2) + t = Array{Int}(2, 2, 2) t[1:2, 1:2, 1:2] = 1 dt = DataArray(t, falses(size(t))) @@ -59,9 +59,9 @@ module TestDataArray function nonbits(dv) ret = similar(dv, Integer) for i = 1:length(dv) - if !isna(dv, i) + # if !isna(dv, i) ret[i] = dv[i] - end + # end end ret end @@ -92,11 +92,7 @@ module TestDataArray @test_throws BoundsError copy!(dest, 1, src, idx, 1) end - if VERSION >= v"0.5.0-dev+4711" - @test_throws ArgumentError copy!(dest, 1, src, 1, -1) - else - @test_throws BoundsError copy!(dest, 1, src, 1, -1) - end + @test_throws ArgumentError copy!(dest, 1, src, 1, -1) @test_throws BoundsError copy!(dest, bigsrc) diff --git a/test/datamatrix.jl b/test/datamatrix.jl index c2bb6df..dde5879 100644 --- a/test/datamatrix.jl +++ b/test/datamatrix.jl @@ -44,10 +44,10 @@ module TestDataMatrix b[1, 1] = NA res = a * b[1:1, :] @assert all(isna(res[:, 1])) - @assert all(!isna(res[:, 2])) - @assert all(!isna(res[:, 3])) + @assert all(.!(isna(res[:, 2]))) + @assert all(.!(isna(res[:, 3]))) res = a * b[2:2, :] - @assert all(!isna(res)) + @assert all(.!(isna(res))) # # DataMatrix w NA's * DataVector @@ -55,8 +55,8 @@ module TestDataMatrix res = b * a @assert isna(res[1]) - @assert !isna(res[2]) - @assert !isna(res[3]) + @assert .!(isna(res[2])) + @assert .!(isna(res[3])) # # DataMatrix * DataMatrix @@ -71,11 +71,11 @@ module TestDataMatrix @assert isna(res[1, 2]) @assert isna(res[1, 3]) @assert isna(res[2, 1]) - @assert !isna(res[2, 2]) - @assert !isna(res[2, 3]) + @assert .!(isna(res[2, 2])) + @assert .!(isna(res[2, 3])) @assert isna(res[3, 1]) - @assert !isna(res[3, 2]) - @assert !isna(res[3, 3]) + @assert .!(isna(res[3, 2])) + @assert .!(isna(res[3, 3])) res = b * @data eye(3) # 3x3 Float64 DataMatrix: @@ -85,12 +85,12 @@ module TestDataMatrix @assert isna(res[1, 1]) @assert isna(res[1, 2]) @assert isna(res[1, 3]) - @assert !isna(res[2, 1]) - @assert !isna(res[2, 2]) - @assert !isna(res[2, 3]) - @assert !isna(res[3, 1]) - @assert !isna(res[3, 2]) - @assert !isna(res[3, 3]) + @assert .!(isna(res[2, 1])) + @assert .!(isna(res[2, 2])) + @assert .!(isna(res[2, 3])) + @assert .!(isna(res[3, 1])) + @assert .!(isna(res[3, 2])) + @assert .!(isna(res[3, 3])) res = (@data eye(3)) * b # julia> dataeye(3) * b @@ -99,14 +99,14 @@ module TestDataMatrix # NA 1.0 0.0 # NA 0.0 1.0 @assert isna(res[1, 1]) - @assert !isna(res[1, 2]) - @assert !isna(res[1, 3]) + @assert .!(isna(res[1, 2])) + @assert .!(isna(res[1, 3])) @assert isna(res[2, 1]) - @assert !isna(res[2, 2]) - @assert !isna(res[2, 3]) + @assert .!(isna(res[2, 2])) + @assert .!(isna(res[2, 3])) @assert isna(res[3, 1]) - @assert !isna(res[3, 2]) - @assert !isna(res[3, 3]) + @assert .!(isna(res[3, 2])) + @assert .!(isna(res[3, 3])) # Test row operations dm = @data eye(6, 2) diff --git a/test/extras.jl b/test/extras.jl index b23f87f..be72d0b 100644 --- a/test/extras.jl +++ b/test/extras.jl @@ -10,8 +10,8 @@ module TestExtras d = @data [NA,3,3] w = weights([1.1,2.2,3.3]) - cm = Dict{(@compat Union{Int, NAtype}), Int}([(NA, 1), (3, 2)]) - cmw = Dict{(@compat Union{Int, NAtype}), Real}([(NA, 1.1), (3, 5.5)]) + cm = Dict{Union{Int, NAtype}, Int}([(NA, 1), (3, 2)]) + cmw = Dict{Union{Int, NAtype}, Real}([(NA, 1.1), (3, 5.5)]) @assert isequal(countmap(d), cm) @assert isequal(countmap(d, w), cmw) diff --git a/test/newtests/dataarray.jl b/test/newtests/dataarray.jl index 240b615..4f652d9 100644 --- a/test/newtests/dataarray.jl +++ b/test/newtests/dataarray.jl @@ -240,19 +240,19 @@ module TestDataArrays da[[1, 2]] = 5 # isna(a::AbstractArray) - isna([1, 2]) - isna(repeat([1, 2], outer = [1, 2])) - isna(repeat([1, 2], outer = [1, 2, 2])) + isna.([1, 2]) + isna.(repeat([1, 2], outer = [1, 2])) + isna.(repeat([1, 2], outer = [1, 2, 2])) # isna(da::DataArray) - isna(DataArray([1, 2], falses(2))) - isna(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - isna(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + isna.(DataArray([1, 2], falses(2))) + isna.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + isna.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # Base.isnan(da::DataArray) - isnan(DataArray([1, 2], falses(2))) - isnan(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - isnan(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + isnan.(DataArray([1, 2], falses(2))) + isnan.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + isnan.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # Base.isfinite(da::DataArray) isfinite(DataArray([1, 2], falses(2))) @@ -312,19 +312,19 @@ module TestDataArrays convert(DataArray, DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # round(Int, da::DataArray) - round(Int, DataArray([1, 2], falses(2))) - round(Int, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - round(Int, DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + round.(Int, DataArray([1, 2], falses(2))) + round.(Int, DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + round.(Int, DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # float(da::DataArray) - float(DataArray([1, 2], falses(2))) - float(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) - float(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) + float.(DataArray([1, 2], falses(2))) + float.(DataArray(repeat([1, 2], outer = [1, 2]), falses(2, 2))) + float.(DataArray(repeat([1, 2], outer = [1, 2, 2]), falses(2, 2, 2))) # map(Bool, da::DataArray) - @compat map(Bool, DataArray([1, 0], falses(2))) - @compat map(Bool, DataArray(repeat([1, 0], outer = [1, 2]), falses(2, 2))) - @compat map(Bool, DataArray(repeat([1, 0], outer = [1, 2, 2]), falses(2, 2, 2))) + map(Bool, DataArray([1, 0], falses(2))) + map(Bool, DataArray(repeat([1, 0], outer = [1, 2]), falses(2, 2))) + map(Bool, DataArray(repeat([1, 0], outer = [1, 2, 2]), falses(2, 2, 2))) # Base.hash(a::AbstractDataArray) hash(DataArray([1, 2], falses(2))) diff --git a/test/operators.jl b/test/operators.jl index 95ef746..0c696dc 100644 --- a/test/operators.jl +++ b/test/operators.jl @@ -2,6 +2,47 @@ module TestOperators using Base.Test using DataArrays, StatsBase + const bit_operators = [:(&),:(|),:(⊻)] + + const arithmetic_operators = [:(+),:(-),:(*),:(/), :(Base.div), :(Base.mod), :(Base.fld), :(Base.rem)] + + const comparison_operators = [:(==),:(!=),:(>),:(>=),:(<),:(<=)] + + const elementary_functions = [:(abs), + :(abs2), + :(sign), + :(acos), + :(acosh), + :(asin), + :(asinh), + :(atan), + :(atanh), + :(sin), + :(sinh), + :(conj), + :(cos), + :(cosh), + :(tan), + :(tanh), + :(ceil), + :(floor), + :(round), + :(trunc), + :(exp), + :(exp2), + :(expm1), + :(log), + :(log10), + :(log1p), + :(log2), + :(exponent), + :(sqrt), + :(gamma), + :(lgamma), + :(digamma), + :(erf), + :(erfc)] + macro test_da_pda(da, code) esc(quote let $da = copy($da) @@ -14,47 +55,47 @@ module TestOperators end # All unary operators return NA when evaluating NA - for f in map(eval, DataArrays.unary_operators) + for f in [+,-,*,/] @assert isna(f(NA)) end # All elementary functions return NA when evaluating NA - for f in map(eval, DataArrays.elementary_functions) - @assert isna(f(NA)) + for f in elementary_functions + @assert @eval isna(($f)(NA)) end # All comparison operators return NA when comparing NA with NA # All comparison operators return NA when comparing scalars with NA # All comparison operators return NA when comparing NA with scalars - for f in map(eval, DataArrays.comparison_operators) - @assert isna(f(NA, NA)) - @assert isna(f(NA, 1)) - @assert isna(f(1, NA)) + for f in comparison_operators + @assert @eval isna(($f)(NA, NA)) + @assert @eval isna(($f)(NA, 1)) + @assert @eval isna(($f)(1, NA)) end # All arithmetic operators7 return NA when operating on two NA's # All arithmetic operators return NA when operating on a scalar and an NA # All arithmetic operators return NA when operating on an NA and a scalar - for f in map(eval, DataArrays.arithmetic_operators) - @assert isna(f(NA, NA)) - @assert isna(f(1, NA)) - @assert isna(f(NA, 1)) + for f in arithmetic_operators + @assert @eval isna(($f)(NA, NA)) + @assert @eval isna(($f)(1, NA)) + @assert @eval isna(($f)(NA, 1)) end # All bit operators return NA when operating on two NA's # All bit operators return NA when operating on a scalar and an NA # All bit operators return NA when operating on an NA and a scalar - for f in map(eval, DataArrays.bit_operators) - @assert isna(f(NA, NA)) - @assert isna(f(1, NA)) - @assert isna(f(NA, 1)) + for f in bit_operators + @assert @eval isna(($f)(NA, NA)) + @assert @eval isna(($f)(1, NA)) + @assert @eval isna(($f)(NA, 1)) end # Unary operators on DataVector's should be equivalent to elementwise # application of those same operators dv = @data ones(5) @test_da_pda dv begin - for f in map(eval, DataArrays.numeric_unary_operators) + for f in [+,-] for i in 1:length(dv) @assert f(dv)[i] == f(dv[i]) end @@ -62,7 +103,7 @@ module TestOperators end dv = convert(DataArray, trues(5)) @test_da_pda dv begin - for f in map(eval, DataArrays.logical_unary_operators) + for f in [!] for i in 1:length(dv) @assert f(dv)[i] == f(dv[i]) end @@ -99,9 +140,9 @@ module TestOperators # Elementary functions on DataVector's dv = convert(DataArray, ones(5)) @test_da_pda dv begin - for f in map(eval, DataArrays.elementary_functions) + for f in elementary_functions for i in 1:length(dv) - @assert f(dv)[i] == f(dv[i]) + @assert @eval ($f).(dv)[$i] == ($f)(dv[$i]) end end end @@ -109,18 +150,7 @@ module TestOperators # Broadcasting operations between NA's and DataVector's dv = convert(DataArray, ones(5)) @test_da_pda dv begin - for f in map(eval, [:(.+), - :(+), - :(.-), - :(-), - :(*), - :(.*), - :(./), - :(.^), - :(Base.div), - :(Base.mod), - :(Base.fld), - :(Base.rem)]) + for f in [+, *, Base.div, Base.mod, Base.fld, Base.rem] for i in 1:length(dv) @assert isna(f(dv, NA)[i]) @assert isna(f(NA, dv)[i]) @@ -128,6 +158,14 @@ module TestOperators @assert f(1, dv)[i] == f(1, dv[i]) end end + for f in arithmetic_operators + for i in 1:length(dv) + @assert @eval isna(($f).(dv, NA)[$i]) + @assert @eval isna(($f).(NA, dv)[$i]) + @assert @eval ($f).(dv, 1)[$i] == ($f)(dv[$i], 1) + @assert @eval ($f).(1, dv)[$i] == ($f)(1, dv[$i]) + end + end end @test_da_pda dv begin @@ -138,10 +176,10 @@ module TestOperators end dv = @data([false, true, false, true, false]) - for f in map(eval, DataArrays.bit_operators) + for f in bit_operators for i in 1:length(dv) - @assert f(dv, true)[i] == f(dv[i], true) - @assert f(true, dv)[i] == f(true, dv[i]) + @assert @eval $(f).(dv, true)[$i] == ($f).(dv[$i], true) + @assert @eval $(f).(true, dv)[$i] == ($f).(true, dv[$i]) end end @@ -153,20 +191,20 @@ module TestOperators bbv = BitArray([true, false, false, true, true]) bdv = @data [false, true, false, false, true] @test_da_pda dv begin - for f in map(eval, DataArrays.array_arithmetic_operators) + for f in [:(+),:(-),:(*),:(^)] for i in 1:length(dv) - @assert isna(f(v, dv)[i]) && isna(dv[i]) || - f(v, dv)[i] == f(v[i], dv[i]) - @assert isna(f(dv, v)[i]) && isna(dv[i]) || - f(dv, v)[i] == f(dv[i], v[i]) + @assert @eval isna(($f).(v, dv)[$i]) && isna(dv[$i]) || + ($f).(v, dv)[$i] == ($f)(v[$i], dv[$i]) + @assert @eval isna(($f).(dv, v)[$i]) && isna(dv[$i]) || + ($f).(dv, v)[$i] == ($f)(dv[$i], v[$i]) end end - for f in map(eval, DataArrays.bit_operators) + for f in bit_operators for i in 1:length(bdv) - @assert f(bv, bdv)[i] == f(bv[i], bdv[i]) - @assert f(bdv, bv)[i] == f(bdv[i], bv[i]) - @assert f(bbv, bdv)[i] == f(bbv[i], bdv[i]) - @assert f(bdv, bbv)[i] == f(bdv[i], bbv[i]) + @assert @eval ($f).(bv, bdv)[$i] == ($f).(bv[$i], bdv[$i]) + @assert @eval ($f).(bdv, bv)[$i] == ($f).(bdv[$i], bv[$i]) + @assert @eval ($f).(bbv, bdv)[$i] == ($f).(bbv[$i], bdv[$i]) + @assert @eval ($f).(bdv, bbv)[$i] == ($f).(bdv[$i], bbv[$i]) end end end @@ -177,15 +215,21 @@ module TestOperators dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) dv[1] = dvd[1] = NA @test_da_pda dv begin - for f in map(eval, DataArrays.array_arithmetic_operators) + for f in [:(+),:(-),:(*),:(^)] for i in 1:length(dv) - @assert isna(f(dv, dv)[i]) && isna(dv[i]) || - f(dv, dv)[i] == f(dv[i], dv[i]) + @assert @eval isna(($f).(dv, dv)[$i]) && isna(dv[$i]) || + ($f).(dv, dv)[$i] == ($f)(dv[$i], dv[$i]) end end - for f in map(eval, DataArrays.bit_operators) + for f in [+,-] + for i in 1:length(dv) + @assert isna((f)(dv, dv)[i]) && isna(dv[i]) || + (f)(dv, dv)[i] == (f)(dv[i], dv[i]) + end + end + for f in bit_operators for i in 1:length(bv) - @assert f(bv, bv)[i] == f(bv[i], bv[i]) + @assert @eval ($f).(bv, bv)[$i] == ($f).(bv[$i], bv[$i]) end end for i in 1:length(dvd) @@ -233,16 +277,18 @@ module TestOperators end # Pairwise vector operators on DataVector's + const pairwise_vector_operators = [diff] + dv = @data([911, 269, 835.0, 448, 772]) # Dates are an example of type for which operations return a different type from their inputs dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators @assert isequal(f(dv), f(dv.data)) @assert isequal(f(dvd), f(dvd.data)) end dv = @data([NA, 269, 835.0, 448, 772]) dvd = @data([NA, Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05")]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators v = f(dv) @assert isna(v[1]) @assert isequal(v[2:4], f(dv.data)[2:4]) @@ -253,7 +299,7 @@ module TestOperators end dv = @data([911, NA, 835.0, 448, 772]) dvd = @data([Base.Date("2000-01-01"), NA, Base.Date("2010-01-01"), Base.Date("2010-01-05")]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators v = f(dv) @assert isna(v[1]) @assert isna(v[2]) @@ -266,7 +312,7 @@ module TestOperators end dv = @data([911, 269, 835.0, 448, NA]) dvd = @data([Base.Date("2000-01-01"), Base.Date("2010-01-01"), Base.Date("2010-01-05"), NA]) - for f in map(eval, DataArrays.pairwise_vector_operators) + for f in pairwise_vector_operators v = f(dv) @assert isna(v[4]) @assert isequal(v[1:3], f(dv.data)[1:3]) @@ -278,13 +324,13 @@ module TestOperators # Cumulative vector operators on DataVector's dv = convert(DataArray, ones(5)) - for f in map(eval, DataArrays.cumulative_vector_operators) + for f in [Base.cumprod, Base.cumsum, t -> accumulate(min, t), t -> accumulate(max, t)] for i in 1:length(dv) @assert f(dv)[i] == f(dv.data)[i] end end dv[4] = NA - for f in map(eval, DataArrays.cumulative_vector_operators) + for f in [Base.cumprod, Base.cumsum] for i in 1:3 @assert f(dv)[i] == f(dv.data)[i] end @@ -445,10 +491,10 @@ module TestOperators @assert isequal(dv, rdv) # Issue #90 - a = @data([false, true, false, true]); - b = @data([false, false, true, true]); - a[:] = NA; - b[:] = NA; - @test allna(a & b) - @test allna(a | b) + a = @data([false, true, false, true]) + b = @data([false, false, true, true]) + a[:] = NA + b[:] = NA + @test allna(a .& b) + @test allna(a .| b) end diff --git a/test/reduce.jl b/test/reduce.jl index aa7d21d..16fa026 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -6,37 +6,20 @@ srand(1337) ## extended test of sum for skipna in (true, false) - if VERSION < v"0.5-" - @test sum(@data(Int8[]); skipna=skipna) === 0 - @test sum(@data(Int[]); skipna=skipna) === 0 - @test sum(@data(Float64[]); skipna=skipna) === 0.0 - - @test sum(@data([@compat(Int8(3))]); skipna=skipna) === 3 - @test sum(@data([3]); skipna=skipna) === 3 - @test sum(@data([3.0]); skipna=skipna) === 3.0 - - z = DataArray(reshape(1:16, (2,2,2,2))) - fz = convert(DataArray{Float64}, z) - bfz = convert(DataArray{BigFloat}, z) - @test sum(z) === 136 - @test sum(fz) === 136.0 - @test sum(bfz) == 136 - else - @test sum(@data(Int8[]); skipna=skipna) === Int32(0) - @test sum(@data(Int[]); skipna=skipna) === 0 - @test sum(@data(Float64[]); skipna=skipna) === 0.0 - - @test sum(@data([@compat(Int8(3))]); skipna=skipna) === Int32(3) - @test sum(@data([3]); skipna=skipna) === 3 - @test sum(@data([3.0]); skipna=skipna) === 3.0 - - z = DataArray(reshape(1:16, (2,2,2,2))) - fz = convert(DataArray{Float64}, z) - bfz = convert(DataArray{BigFloat}, z) - @test sum(z) === 136 - @test sum(fz) === 136.0 - @test sum(bfz) == 136 - end + @test sum(@data(Int8[]); skipna=skipna) === Int32(0) + @test sum(@data(Int[]); skipna=skipna) === 0 + @test sum(@data(Float64[]); skipna=skipna) === 0.0 + + @test sum(@data([Int8(3)]); skipna=skipna) === Int32(3) + @test sum(@data([3]); skipna=skipna) === 3 + @test sum(@data([3.0]); skipna=skipna) === 3.0 + + z = DataArray(reshape(1:16, (2,2,2,2))) + fz = convert(DataArray{Float64}, z) + bfz = convert(DataArray{BigFloat}, z) + @test sum(z) === 136 + @test sum(fz) === 136.0 + @test sum(bfz) == 136 end @test sum(@data(Int[NA])) === NA @@ -58,30 +41,30 @@ bfz = convert(DataArray{BigFloat}, z) @test sum(fz; skipna=true) === 130.0 @test sum(bfz; skipna=true) == 130 -bs = DataArrays.sum_pairwise_blocksize(@functorize(identity)) +bs = DataArrays.sum_pairwise_blocksize(identity) for n in [bs-64, bs-1, bs, bs+1, bs+2, 2*bs-2:2*bs+3..., 4*bs-2:4*bs+3...] da = DataArray(randn(n)) s = sum(da.data) - @test_approx_eq sum(da) s - @test_approx_eq sum(da; skipna=true) s + @test sum(da) ≈ s + @test sum(da; skipna=true) ≈ s da2 = copy(da) da2[1:2:end] = NA @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) da2 = convert(DataArray{BigFloat}, da2) @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) da2 = copy(da) da2[2:2:end] = NA @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) da2 = convert(DataArray{BigFloat}, da2) @test isna(sum(da2)) - @test_approx_eq sum(da2; skipna=true) sum(dropna(da2)) + @test sum(da2; skipna=true) ≈ sum(dropna(da2)) end ## other reductions @@ -93,7 +76,7 @@ macro same_behavior(ex1, ex2) catch e e end - isa(v, Exception) ? @test_throws(typeof(v), $ex1) : @test_approx_eq($ex1, v) + isa(v, Exception) ? @test_throws(typeof(v), $ex1) : @test isapprox($ex1, v) end end @@ -137,9 +120,9 @@ end for fn in (+, *, |, &) da = convert(DataArray, bitrand(10)) - s = mapreduce(@functorize(identity), fn, da.data) - @test mapreduce(@functorize(identity), fn, da) == s - @test mapreduce(@functorize(identity), fn, da; skipna=true) == s + s = mapreduce(identity, fn, da.data) + @test mapreduce(identity, fn, da) == s + @test mapreduce(identity, fn, da; skipna=true) == s @test reduce(fn, da) == s @test reduce(fn, da; skipna=true) == s end @@ -159,11 +142,11 @@ da2 = DataArray(randn(128)) @same_behavior mean(da1, weights(da2.data); skipna=true) mean(da1.data, weights(da2.data)) da1[1:3:end] = NA -@same_behavior mean(da1, weights(da2); skipna=true) mean(dropna(da1), weights(da2.data[!da1.na])) -@same_behavior mean(da1, weights(da2.data); skipna=true) mean(dropna(da1), weights(da2.data[!da1.na])) +@same_behavior mean(da1, weights(da2); skipna=true) mean(dropna(da1), weights(da2.data[(!).(da1.na)])) +@same_behavior mean(da1, weights(da2.data); skipna=true) mean(dropna(da1), weights(da2.data[(!).(da1.na)])) da2[1:2:end] = NA -keep = !da1.na & !da2.na +keep = .!da1.na .& .!da2.na @test isna(mean(da1, weights(da2))) @same_behavior mean(da1, weights(da2); skipna=true) mean(da1.data[keep], weights(da2.data[keep])) end diff --git a/test/reducedim.jl b/test/reducedim.jl index 4061e27..c79310f 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -105,13 +105,13 @@ end macro test_da_approx_eq(da1, da2) quote - v1 = $da1 - v2 = $da2 + v1 = $(esc(da1)) + v2 = $(esc(da2)) na = isna(v1) @test na == isna(v2) - defined = !na + defined = (!).(na) if any(defined) - @test_approx_eq v1[defined] v2[defined] + @test isapprox(v1[defined], v2[defined], nans = true) end end end @@ -127,11 +127,7 @@ for Areduc in (DataArray(rand(3, 4, 5, 6)), (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)] # println("region = $region, skipna = $skipna") - if VERSION < v"0.6.0-dev.1121" - outputs = Any[DataArray(fill(NaN, Base.reduced_dims(size(Areduc), region)))] - else - outputs = Any[DataArray(fill(NaN, length.(Base.reduced_indices(indices(Areduc), region))))] - end + outputs = Any[DataArray(fill(NaN, length.(Base.reduced_indices(indices(Areduc), region))))] has_na = anyna(Areduc) if has_na && !skipna # Should throw an error reducing to non-DataArray diff --git a/test/sort.jl b/test/sort.jl index 58e969a..c094c81 100644 --- a/test/sort.jl +++ b/test/sort.jl @@ -15,9 +15,9 @@ for T in (Float64, BigFloat) n = 1000 na = bitrand(n) nna = sum(na) - a = Array(T, n) + a = Vector{T}(n) ra = randn(n-nna) - a[!na] = ra + a[.!na] = ra for da in (DataArray(a, na), PooledDataArray(a, na), (pda = PooledDataArray(a, na); setlevels!(pda, shuffle!(pda.pool)))) @test isequal(sort(da), [DataArray(sort(dropna(da))); DataArray(T, nna)]) @test isequal(sort(da; lt=(x,y)->isless(x,y)), [DataArray(sort(dropna(da))); DataArray(T, nna)])