Skip to content

Fix constructors and conversion between categorical arrays #63

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 44 additions & 51 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,8 @@ for (A, V, M) in ((:CategoricalArray, :CategoricalVector, :CategoricalMatrix),
$As(A::NullableCategoricalArray; ordered::Bool=false)

If `A` is already a `CategoricalArray` or a `NullableCategoricalArray`, its levels
and their order are preserved. The reference type is also preserved unless `compress`
is provided. On the contrary, the `ordered` keyword argument takes precedence over
the corresponding property of the input array, even when not provided.

In all cases, a copy of `A` is made: use `convert` to avoid making copies when
unnecessary.
are preserved; the same applies to the ordered property and the reference type unless
explicitly overriden.
""" ->
function $A end

Expand Down Expand Up @@ -88,12 +84,8 @@ for (A, V, M) in ((:CategoricalArray, :CategoricalVector, :CategoricalMatrix),
$Vs(A::NullableCategoricalVector; ordered::Bool=false)

If `A` is already a `CategoricalVector` or a `NullableCategoricalVector`, its levels
and their order are preserved. The reference type is also preserved unless `compress`
is provided. On the contrary, the `ordered` keyword argument takes precedence over
the corresponding property of the input array, even when not provided.

In all cases, a copy of `A` is made: use `convert` to avoid making copies when
unnecessary.
are preserved; the same applies to the ordered property and the reference type unless
explicitly overriden.
""" ->
function $V end

Expand All @@ -118,16 +110,12 @@ for (A, V, M) in ((:CategoricalArray, :CategoricalVector, :CategoricalMatrix),
argument determines whether the array values can be compared according to the
ordering of levels or not (see [`isordered`](@ref)).

$Ms(A::CategoricalMatrix; ordered::Bool=false)
$Ms(A::NullableCategoricalMatrix; ordered::Bool=false)
$Ms(A::CategoricalMatrix; ordered::Bool=isordered(A))
$Ms(A::NullableCategoricalMatrix; ordered::Bool=isordered(A))

If `A` is already a `CategoricalMatrix` or a `NullableCategoricalMatrix`, its levels
and their order are preserved. The reference type is also preserved unless `compress`
is provided. On the contrary, the `ordered` keyword argument takes precedence over
the corresponding property of the input array, even when not provided.

In all cases, a copy of `A` is made: use `convert` to avoid making copies when
unnecessary.
are preserved; the same applies to the ordered property and the reference type unless
explicitly overriden.
""" ->
function $M end

Expand Down Expand Up @@ -187,11 +175,16 @@ for (A, V, M) in ((:CategoricalArray, :CategoricalVector, :CategoricalMatrix),

## Constructors from arrays

# This method is needed to ensure ordered!() only mutates a copy of A
@compat (::Type{$A{T, N, R}}){T, N, R}(A::$A{T, N, R}; ordered=_isordered(A)) =
ordered!(copy(A), ordered)
# This method is needed to ensure that a copy of the pool is always made
# so that ordered!() does not affect the original array
@compat function (::Type{$A{T, N, R}}){S, T, N, Q, R}(A::CatArray{S, N, Q}; ordered=_isordered(A))
res = convert($A{T, N, R}, A)
if res.pool === A.pool # convert() only makes a copy when necessary
res = $A{T, N, R}(res.refs, deepcopy(res.pool))
end
ordered!(res, ordered)
end

# Note this method is also used for CategoricalArrays when T, N or R don't match
@compat (::Type{$A{T, N, R}}){T, N, R}(A::AbstractArray; ordered=_isordered(A)) =
ordered!(convert($A{T, N, R}, A), ordered)

Expand All @@ -218,21 +211,21 @@ for (A, V, M) in ((:CategoricalArray, :CategoricalVector, :CategoricalMatrix),
$A{T, 2}(A, ordered=ordered)

# From CategoricalArray (preserve R)
@compat (::Type{$A{T, N}}){S, T, N, R}(A::$A{S, N, R}; ordered=_isordered(A)) =
@compat (::Type{$A{T, N}}){S, T, N, R}(A::CatArray{S, N, R}; ordered=_isordered(A)) =
$A{T, N, R}(A, ordered=ordered)
@compat (::Type{$A{T}}){S, T, N, R}(A::$A{S, N, R}; ordered=_isordered(A)) =
@compat (::Type{$A{T}}){S, T, N, R}(A::CatArray{S, N, R}; ordered=_isordered(A)) =
$A{T, N, R}(A, ordered=ordered)
@compat (::Type{$A}){T, N, R}(A::$A{T, N, R}; ordered=_isordered(A)) =
@compat (::Type{$A}){T, N, R}(A::CatArray{T, N, R}; ordered=_isordered(A)) =
$A{T, N, R}(A, ordered=ordered)

@compat (::Type{$V{T}}){S, T, R}(A::$V{S, R}; ordered=_isordered(A)) =
@compat (::Type{$V{T}}){S, T, R}(A::CatArray{S, 1, R}; ordered=_isordered(A)) =
$A{T, 1, R}(A, ordered=ordered)
@compat (::Type{$V}){T, R}(A::$V{T, R}; ordered=_isordered(A)) =
@compat (::Type{$V}){T, R}(A::CatArray{T, 1, R}; ordered=_isordered(A)) =
$A{T, 1, R}(A, ordered=ordered)

@compat (::Type{$M{T}}){S, T, R}(A::$M{S, R}; ordered=_isordered(A)) =
@compat (::Type{$M{T}}){S, T, R}(A::CatArray{S, 2, R}; ordered=_isordered(A)) =
$A{T, 2, R}(A, ordered=ordered)
@compat (::Type{$M}){T, R}(A::$M{T, R}; ordered=_isordered(A)) =
@compat (::Type{$M}){T, R}(A::CatArray{T, 2, R}; ordered=_isordered(A)) =
$A{T, 2, R}(A, ordered=ordered)


Expand Down Expand Up @@ -270,21 +263,25 @@ for (A, V, M) in ((:CategoricalArray, :CategoricalVector, :CategoricalMatrix),
res
end

# From CategoricalArray (preserve R)
function convert{S, T, N, R}(::Type{$A{T, N, R}}, A::$A{S, N})
# From CategoricalArray (preserve levels, ordering and R)
function convert{S, T, N, R}(::Type{$A{T, N, R}}, A::CatArray{S, N})
if length(A.pool) > typemax(R)
throw(LevelsException{T, R}(levels(A)[typemax(R)+1:end]))
end

if $A <: CategoricalArray && isa(A, NullableCategoricalArray)
any(x -> x == 0, A.refs) && throw(NullException())
end

refs = convert(Array{R, N}, A.refs)
pool = convert(CategoricalPool{T, R}, A.pool)
ordered!($A(refs, pool), isordered(A))
$A(refs, pool)
end
convert{S, T, N, R}(::Type{$A{T, N}}, A::$A{S, N, R}) =
convert{S, T, N, R}(::Type{$A{T, N}}, A::CatArray{S, N, R}) =
convert($A{T, N, R}, A)
convert{S, T, N, R}(::Type{$A{T}}, A::$A{S, N, R}) =
convert{S, T, N, R}(::Type{$A{T}}, A::CatArray{S, N, R}) =
convert($A{T, N, R}, A)
convert{T, N, R}(::Type{$A}, A::$A{T, N, R}) =
convert{T, N, R}(::Type{$A}, A::CatArray{T, N, R}) =
convert($A{T, N, R}, A)

# R<:Integer is needed for this method to be considered more specific
Expand Down Expand Up @@ -658,37 +655,33 @@ this parameter will also introduce a type instability which can affect performan
the function where the call is made. Therefore, use this option with caution (the
one-argument version does not suffer from this problem).

categorical{T}(A::CategoricalArray{T}[, compress::Bool]; ordered::Bool=false)
categorical{T}(A::NullableCategoricalArray{T}[, compress::Bool]; ordered::Bool=false)
categorical{T}(A::CategoricalArray{T}[, compress::Bool]; ordered::Bool=isordered(A))
categorical{T}(A::NullableCategoricalArray{T}[, compress::Bool]; ordered::Bool=isordered(A))

If `A` is already a `CategoricalArray` or a `NullableCategoricalArray`, its levels
are preserved. The reference type is also preserved unless `compress` is provided.
On the contrary, the `ordered` keyword argument takes precedence over the
corresponding property of the input array, even when not provided.

In all cases, a copy of `A` is made: use `convert` to avoid making copies when
unnecessary.
are preserved; the same applies to the ordered property, and to the reference type
unless `compress` is passed.
"""
function categorical end

categorical(A::AbstractArray; ordered=false) = CategoricalArray(A, ordered=ordered)
categorical{T<:Nullable}(A::AbstractArray{T}; ordered=false) =
categorical(A::AbstractArray; ordered=_isordered(A)) = CategoricalArray(A, ordered=ordered)
categorical{T<:Nullable}(A::AbstractArray{T}; ordered=_isordered(A)) =
NullableCategoricalArray(A, ordered=ordered)

# Type-unstable methods
function categorical{T, N}(A::AbstractArray{T, N}, compress; ordered=false)
function categorical{T, N}(A::AbstractArray{T, N}, compress; ordered=_isordered(A))
RefType = compress ? reftype(length(unique(A))) : DefaultRefType
CategoricalArray{T, N, RefType}(A, ordered=ordered)
end
function categorical{T<:Nullable, N}(A::AbstractArray{T, N}, compress; ordered=false)
function categorical{T<:Nullable, N}(A::AbstractArray{T, N}, compress; ordered=_isordered(A))
RefType = compress ? reftype(length(unique(A))) : DefaultRefType
NullableCategoricalArray{T, N, RefType}(A, ordered=ordered)
end
function categorical{T, N, R}(A::CategoricalArray{T, N, R}, compress; ordered=false)
function categorical{T, N, R}(A::CategoricalArray{T, N, R}, compress; ordered=_isordered(A))
RefType = compress ? reftype(length(levels(A))) : R
CategoricalArray{T, N, RefType}(A, ordered=ordered)
end
function categorical{T, N, R}(A::NullableCategoricalArray{T, N, R}, compress; ordered=false)
function categorical{T, N, R}(A::NullableCategoricalArray{T, N, R}, compress; ordered=_isordered(A))
RefType = compress ? reftype(length(levels(A))) : R
NullableCategoricalArray{T, N, RefType}(A, ordered=ordered)
end
Expand Down
2 changes: 1 addition & 1 deletion src/pool.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function Base.convert{S, R}(::Type{CategoricalPool{S, R}}, pool::CategoricalPool
indexS = convert(Vector{S}, pool.index)
invindexS = convert(Dict{S, R}, pool.invindex)
order = convert(Vector{R}, pool.order)
return CategoricalPool(indexS, invindexS, order)
return CategoricalPool(indexS, invindexS, order, pool.ordered)
end

function Base.show{T, R}(io::IO, pool::CategoricalPool{T, R})
Expand Down
4 changes: 4 additions & 0 deletions test/05_convert.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,8 @@ module TestConvert
@test promote(1, v1) === (1, 1)
@test promote(1.0, v1) === (1.0, 1.0)
@test promote(0x1, v1) === (1, 1)

# Test that ordered property is preserved
pool = CategoricalPool([1, 2, 3], true)
@test convert(CategoricalPool{Float64, UInt8}, pool).ordered === true
end
66 changes: 0 additions & 66 deletions test/11_array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,6 @@ for ordered in (false, true)
@test convert(CategoricalVector{String, DefaultRefType}, x) == x
@test convert(CategoricalVector{String, UInt8}, x) == x

for y in (CategoricalArray(x, ordered=ordered),
CategoricalArray{String}(x, ordered=ordered),
CategoricalArray{String, 1}(x, ordered=ordered),
CategoricalArray{String, 1, R}(x, ordered=ordered),
CategoricalArray{String, 1, DefaultRefType}(x, ordered=ordered),
CategoricalArray{String, 1, UInt8}(x, ordered=ordered),
CategoricalVector(x, ordered=ordered),
CategoricalVector{String}(x, ordered=ordered),
CategoricalVector{String, R}(x, ordered=ordered),
CategoricalVector{String, DefaultRefType}(x, ordered=ordered),
CategoricalVector{String, UInt8}(x, ordered=ordered),
categorical(x, ordered=ordered),
categorical(x, false, ordered=ordered),
categorical(x, true, ordered=ordered))
@test isa(y, CategoricalVector{String})
@test isordered(y) === ordered
@test y == x
@test y !== x
@test y.refs !== x.refs
@test y.pool !== x.pool
end

@test convert(CategoricalArray, a) == x
@test convert(CategoricalArray{String}, a) == x
@test convert(CategoricalArray{String, 1}, a) == x
Expand Down Expand Up @@ -254,28 +232,6 @@ for ordered in (false, true)
@test convert(CategoricalVector{Float64, DefaultRefType}, x) == x
@test convert(CategoricalVector{Float64, UInt8}, x) == x

for y in (CategoricalArray(x, ordered=ordered),
CategoricalArray{Float64}(x, ordered=ordered),
CategoricalArray{Float64, 1}(x, ordered=ordered),
CategoricalArray{Float64, 1, R}(x, ordered=ordered),
CategoricalArray{Float64, 1, DefaultRefType}(x, ordered=ordered),
CategoricalArray{Float64, 1, UInt8}(x, ordered=ordered),
CategoricalVector(x, ordered=ordered),
CategoricalVector{Float64}(x, ordered=ordered),
CategoricalVector{Float64, R}(x, ordered=ordered),
CategoricalVector{Float64, DefaultRefType}(x, ordered=ordered),
CategoricalVector{Float64, UInt8}(x, ordered=ordered),
categorical(x, ordered=ordered),
categorical(x, false, ordered=ordered),
categorical(x, true, ordered=ordered))
@test isa(y, CategoricalVector{Float64})
@test isordered(y) === ordered
@test y == x
@test y !== x
@test y.refs !== x.refs
@test y.pool !== x.pool
end

@test convert(CategoricalArray, a) == x
@test convert(CategoricalArray{Float64}, a) == x
@test convert(CategoricalArray{Float32}, a) == x
Expand Down Expand Up @@ -438,28 +394,6 @@ for ordered in (false, true)
@test convert(CategoricalMatrix{String, DefaultRefType}, x) == x
@test convert(CategoricalMatrix{String, UInt8}, x) == x

for y in (CategoricalArray(x, ordered=ordered),
CategoricalArray{String}(x, ordered=ordered),
CategoricalArray{String, 2}(x, ordered=ordered),
CategoricalArray{String, 2, R}(x, ordered=ordered),
CategoricalArray{String, 2, DefaultRefType}(x, ordered=ordered),
CategoricalArray{String, 2, UInt8}(x, ordered=ordered),
CategoricalMatrix(x, ordered=ordered),
CategoricalMatrix{String}(x, ordered=ordered),
CategoricalMatrix{String, R}(x, ordered=ordered),
CategoricalMatrix{String, DefaultRefType}(x, ordered=ordered),
CategoricalMatrix{String, UInt8}(x, ordered=ordered),
categorical(x, ordered=ordered),
categorical(x, false, ordered=ordered),
categorical(x, true, ordered=ordered))
@test isa(y, CategoricalMatrix{String})
@test isordered(y) === ordered
@test y == x
@test y !== x
@test y.refs !== x.refs
@test y.pool !== x.pool
end

@test convert(CategoricalArray, a) == x
@test convert(CategoricalArray{String}, a) == x
@test convert(CategoricalArray{String, 2, R}, a) == x
Expand Down
Loading