Skip to content
This repository was archived by the owner on May 5, 2019. It is now read-only.

Commit 5f611bf

Browse files
cjprybolnalimilan
authored andcommitted
Update colwise (#28)
Return a vector when a single function is passed, and a matrix when several functions are passed. Rely on comprehensions to infer the return type, and return a NullableArray when applicable. Allow passing a tuple of functions, not only a vector.
1 parent d49f76b commit 5f611bf

File tree

2 files changed

+113
-17
lines changed

2 files changed

+113
-17
lines changed

src/groupeddatatable/grouping.jl

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -232,21 +232,31 @@ dt = DataTable(a = repeat([1, 2, 3, 4], outer=[2]),
232232
b = repeat([2, 1], outer=[4]),
233233
c = randn(8))
234234
colwise(sum, dt)
235+
colwise([sum, lenth], dt)
236+
colwise((minimum, maximum), dt)
235237
colwise(sum, groupby(dt, :a))
236238
```
237239
238240
"""
239-
colwise(f::Function, d::AbstractDataTable) = Any[vcat(f(d[idx])) for idx in 1:size(d, 2)]
240-
colwise(f::Function, gd::GroupedDataTable) = map(colwise(f), gd)
241-
colwise(f::Function) = x -> colwise(f, x)
242-
colwise(f) = x -> colwise(f, x)
241+
function colwise(f, d::AbstractDataTable)
242+
x = [f(d[i]) for i in 1:ncol(d)]
243+
if eltype(x) <: Nullable
244+
return NullableArray(x)
245+
else
246+
return x
247+
end
248+
end
243249
# apply several functions to each column in a DataTable
244-
colwise{T<:Function}(fns::Vector{T}, d::AbstractDataTable) =
245-
reshape(Any[vcat(f(d[idx])) for f in fns, idx in 1:size(d, 2)],
246-
length(fns)*size(d, 2))
247-
colwise{T<:Function}(fns::Vector{T}, gd::GroupedDataTable) = map(colwise(fns), gd)
248-
colwise{T<:Function}(fns::Vector{T}) = x -> colwise(fns, x)
249-
250+
function colwise(fns::Union{AbstractVector, Tuple}, d::AbstractDataTable)
251+
x = [f(d[i]) for f in fns, i in 1:ncol(d)]
252+
if eltype(x) <: Nullable
253+
return NullableArray(x)
254+
else
255+
return x
256+
end
257+
end
258+
colwise(f, gd::GroupedDataTable) = [colwise(f, g) for g in gd]
259+
colwise(f) = x -> colwise(f, x)
250260

251261
"""
252262
Split-apply-combine in one step; apply `f` to each grouping in `d`
@@ -371,12 +381,11 @@ end
371381

372382
function _makeheaders{T<:Function}(fs::Vector{T}, cn::Vector{Symbol})
373383
fnames = _fnames(fs) # see other/utils.jl
374-
reshape([Symbol(colname,'_',fname) for fname in fnames, colname in cn],
375-
length(fnames)*length(cn))
384+
[Symbol(colname,'_',fname) for fname in fnames for colname in cn]
376385
end
377386

378387
function _aggregate{T<:Function}(d::AbstractDataTable, fs::Vector{T}, headers::Vector{Symbol}, sort::Bool=false)
379-
res = DataTable(colwise(fs, d), headers)
388+
res = DataTable(Any[vcat(f(d[i])) for f in fs for i in 1:size(d, 2)], headers)
380389
sort && sort!(res, cols=headers)
381390
res
382391
end

test/grouping.jl

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,104 @@ module TestGrouping
22
using Base.Test
33
using DataTables
44

5+
srand(1)
56
dt = DataTable(a = repeat([1, 2, 3, 4], outer=[2]),
67
b = repeat([2, 1], outer=[4]),
78
c = randn(8))
89
#dt[6, :a] = Nullable()
910
#dt[7, :b] = Nullable()
1011

11-
cols = [:a, :b]
12+
nullfree = DataTable(Any[collect(1:10)], [:x1])
13+
@testset "colwise" begin
14+
@testset "::Function, ::AbstractDataTable" begin
15+
cw = colwise(sum, dt)
16+
answer = NullableArray([20, 12, -0.4283098098931877])
17+
@test isa(cw, NullableArray{Any, 1})
18+
@test size(cw) == (ncol(dt),)
19+
@test isequal(cw, answer)
20+
21+
cw = colwise(sum, nullfree)
22+
answer = [55]
23+
@test isa(cw, Array{Int, 1})
24+
@test size(cw) == (ncol(nullfree),)
25+
@test cw == answer
26+
end
27+
28+
@testset "::Function, ::GroupedDataTable" begin
29+
gd = groupby(DataTable(A = [:A, :A, :B, :B], B = 1:4), :A)
30+
@test colwise(length, gd) == [[2,2], [2,2]]
31+
end
32+
33+
@testset "::Vector, ::AbstractDataTable" begin
34+
cw = colwise([sum], dt)
35+
answer = NullableArray([20 12 -0.4283098098931877])
36+
@test isa(cw, NullableArray{Any, 2})
37+
@test size(cw) == (length([sum]),ncol(dt))
38+
@test isequal(cw, answer)
39+
40+
cw = colwise([sum, minimum], nullfree)
41+
answer = reshape([55, 1], (2,1))
42+
@test isa(cw, Array{Int, 2})
43+
@test size(cw) == (length([sum, minimum]), ncol(nullfree))
44+
@test cw == answer
45+
46+
cw = colwise([NullableArray], nullfree)
47+
answer = reshape([NullableArray(1:10)], (1,1))
48+
@test isa(cw, Array{NullableArray{Int,1},2})
49+
@test size(cw) == (length([NullableArray]), ncol(nullfree))
50+
@test isequal(cw, answer)
51+
52+
@test_throws MethodError colwise(["Bob", :Susie], DataTable(A = 1:10, B = 11:20))
53+
end
54+
55+
@testset "::Vector, ::GroupedDataTable" begin
56+
gd = groupby(DataTable(A = [:A, :A, :B, :B], B = 1:4), :A)
57+
@test colwise([length], gd) == [[2 2], [2 2]]
58+
end
59+
60+
@testset "::Tuple, ::AbstractDataTable" begin
61+
cw = colwise((sum, length), dt)
62+
answer = Any[Nullable(20) Nullable(12) Nullable(-0.4283098098931877); 8 8 8]
63+
@test isa(cw, Array{Any, 2})
64+
@test size(cw) == (length((sum, length)), ncol(dt))
65+
@test isequal(cw, answer)
66+
67+
cw = colwise((sum, length), nullfree)
68+
answer = reshape([55, 10], (2,1))
69+
@test isa(cw, Array{Int, 2})
70+
@test size(cw) == (length((sum, length)), ncol(nullfree))
71+
@test cw == answer
72+
73+
cw = colwise((CategoricalArray, NullableArray), nullfree)
74+
answer = reshape([CategoricalArray(1:10), NullableArray(1:10)],
75+
(length((CategoricalArray, NullableArray)), ncol(nullfree)))
76+
@test typeof(cw) == Array{AbstractVector,2}
77+
@test size(cw) == (length((CategoricalArray, NullableArray)), ncol(nullfree))
78+
@test isequal(cw, answer)
79+
80+
@test_throws MethodError colwise(("Bob", :Susie), DataTable(A = 1:10, B = 11:20))
81+
end
82+
83+
@testset "::Tuple, ::GroupedDataTable" begin
84+
gd = groupby(DataTable(A = [:A, :A, :B, :B], B = 1:4), :A)
85+
@test colwise((length), gd) == [[2,2],[2,2]]
86+
end
87+
88+
@testset "::Function" begin
89+
cw = map(colwise(sum), (nullfree, dt))
90+
answer = ([55], NullableArray(Any[20, 12, -0.4283098098931877]))
91+
@test isequal(cw, answer)
92+
93+
cw = map(colwise((sum, length)), (nullfree, dt))
94+
answer = (reshape([55, 10], (2,1)), Any[Nullable(20) Nullable(12) Nullable(-0.4283098098931877); 8 8 8])
95+
@test isequal(cw, answer)
96+
97+
cw = map(colwise([sum, length]), (nullfree, dt))
98+
@test isequal(cw, answer)
99+
end
100+
end
12101

102+
cols = [:a, :b]
13103
f(dt) = DataTable(cmax = maximum(dt[:c]))
14104

15105
sdt = unique(dt[cols])
@@ -24,9 +114,6 @@ module TestGrouping
24114

25115
byf = by(dt, :a, dt -> DataTable(bsum = sum(dt[:b])))
26116

27-
@test all(T -> T <: AbstractVector, map(typeof, colwise([sum], dt)))
28-
@test all(T -> T <: AbstractVector, map(typeof, colwise(sum, dt)))
29-
30117
# groupby() without groups sorting
31118
gd = groupby(dt, cols)
32119
ga = map(f, gd)

0 commit comments

Comments
 (0)