Skip to content
This repository was archived by the owner on May 4, 2019. It is now read-only.

Commit d78a6d3

Browse files
committed
Merge pull request #101 from JuliaStats/sjk/reduce
Implement reductions with optional skipna argument
2 parents e0e4a70 + 0381da7 commit d78a6d3

13 files changed

+1182
-15
lines changed

benchmark/operators.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ macro perf(fn, replications, idx...)
3535
quote
3636
println($name)
3737
gc_disable()
38-
df = compare([()->$fn for i=$idx], $replications)
38+
df = compare([let i=i; ()->$fn; end for i=$idx], $replications)
3939
gc_enable()
4040
gc()
4141
df[:Function] = TEST_NAMES[$idx]
@@ -71,7 +71,6 @@ const Bool2 = make_test_types(make_bools, 1000)
7171
@perf Bool1[i] $ Bool2[i] 100
7272

7373
# Vector operators
74-
@perf sum(Float1[i]) 250 1:div(length(Float1), 2)
7574
@perf diff(Float1[i]) 50 1:div(length(Float1), 2)
7675
@perf cumsum(Float1[i]) 50 1:div(length(Float1), 2)
7776
end

benchmark/reduce.jl

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
module ReduceBenchmark
2+
using DataArrays, Benchmark
3+
4+
# seed rng for more consistent timings
5+
srand(1776)
6+
7+
const TEST_NAMES = [
8+
"Vector",
9+
"DataVector No NA skipna=false",
10+
"DataVector No NA skipna=true",
11+
"DataVector Half NA skipna=false",
12+
"DataVector Half NA skipna=true"
13+
]
14+
15+
function make_test_types(genfunc, sz)
16+
mat = genfunc(sz)
17+
na = shuffle!([trues(ifloor(sz/2)), falses(iceil(sz/2))])
18+
(
19+
mat,
20+
DataArray(mat),
21+
DataArray(mat, na)
22+
)
23+
end
24+
25+
const Data = make_test_types(rand, 100000000)
26+
27+
macro perf(fn, replications)
28+
quote
29+
println($fn)
30+
fns = [()->$fn(Data[1]),
31+
()->$fn(Data[2]),
32+
()->$fn(Data[2]; skipna=true),
33+
()->$fn(Data[3]),
34+
()->$fn(Data[3]; skipna=true)]
35+
gc_disable()
36+
df = compare(fns, $replications)
37+
gc_enable()
38+
gc()
39+
df[:Function] = TEST_NAMES
40+
df[:Relative] = df[:Average]./df[1, :Average]
41+
println(df)
42+
end
43+
end
44+
45+
@perf sum 10
46+
@perf maximum 10
47+
@perf mean 10
48+
@perf var 10
49+
end

benchmark/reducedim.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
module ReducedimBenchmark
2+
using DataArrays, Benchmark
3+
4+
# seed rng for more consistent timings
5+
srand(1776)
6+
7+
const TEST_NAMES = [
8+
"Matrix",
9+
"DataMatrix No NA skipna=false",
10+
"DataMatrix No NA skipna=true",
11+
"DataMatrix Half NA skipna=false",
12+
"DataMatrix Half NA skipna=true"
13+
]
14+
15+
function make_test_types(genfunc, sz)
16+
mat = genfunc(abs2(sz))
17+
na = shuffle!([trues(ifloor(abs2(sz)/2)), falses(iceil(abs2(sz)/2))])
18+
(
19+
reshape(mat, sz, sz),
20+
DataArray(reshape(mat, sz, sz)),
21+
DataArray(reshape(mat, sz, sz), reshape(na, sz, sz))
22+
)
23+
end
24+
25+
const Data = make_test_types(rand, 10000)
26+
27+
macro perf(fn, dim, replications)
28+
quote
29+
println($fn, " (region = ", $dim, ")")
30+
fns = [()->$fn(Data[1], $dim),
31+
()->$fn(Data[2], $dim),
32+
()->$fn(Data[2], $dim; skipna=true),
33+
()->$fn(Data[3], $dim),
34+
()->$fn(Data[3], $dim; skipna=true)]
35+
gc_disable()
36+
df = compare(fns, $replications)
37+
gc_enable()
38+
gc()
39+
df[:Function] = TEST_NAMES
40+
df[:Relative] = df[:Average]./df[1, :Average]
41+
println(df)
42+
end
43+
end
44+
45+
@perf sum 1 10
46+
@perf sum 2 10
47+
@perf maximum 1 10
48+
@perf maximum 2 10
49+
@perf mean 1 10
50+
@perf mean 2 10
51+
@perf var 1 10
52+
@perf var 2 10
53+
end

src/DataArrays.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ module DataArrays
6363
include("datamatrix.jl")
6464
include("linalg.jl")
6565
include("operators.jl")
66+
include("reduce.jl")
67+
include("reducedim.jl")
6668
include("broadcast.jl")
6769
include("sort.jl")
6870
include("extras.jl")

src/broadcast.jl

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using DataArrays, Base.Cartesian, Base.@get!
1+
using DataArrays, Base.@get!
22
using Base.Broadcast: bitcache_chunks, bitcache_size, dumpbitcache,
33
promote_eltype, broadcast_shape, eltype_plus, type_minus, type_div,
44
type_pow
@@ -103,9 +103,11 @@ function gen_broadcast_dataarray(nd::Int, arrtype::(DataType...), outtype, f::Fu
103103
# Set up output DataArray/PooledDataArray
104104
$(if outtype == DataArray
105105
quote
106-
Bc = B.na.chunks
107-
fill!(Bc, 0)
108106
Bdata = B.data
107+
# Copy in case aliased
108+
# TODO: check for aliasing?
109+
Bna = falses(size(Bdata))
110+
Bc = Bna.chunks
109111
ind = 1
110112
end
111113
elseif outtype == PooledDataArray
@@ -158,6 +160,10 @@ function gen_broadcast_dataarray(nd::Int, arrtype::(DataType...), outtype, f::Fu
158160
:(ind += 1)
159161
end)
160162
end)
163+
164+
$(if outtype == DataArray
165+
:(B.na = Bna)
166+
end)
161167
end
162168
_F_
163169
end

src/dataarray.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ function Base.copy!(dest::DataArray, src::DataArray) # -> DataArray{T}
152152
dest
153153
end
154154

155+
Base.fill!(A::DataArray, ::NAtype) = (fill!(A.na, true); A)
156+
Base.fill!(A::DataArray, v) = (fill!(A.data, v); fill!(A.na, false); A)
157+
155158
#' @description
156159
#'
157160
#' Create a deep copy of a DataArray.

src/indexing.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ daextract(a) = nothing
1212
# Check for NA
1313
unsafe_isna(da::DataArray, extr, idx::Real) = Base.unsafe_bitgetindex(extr[2], idx)
1414
unsafe_isna(pda::PooledDataArray, extr, idx::Real) = extr[1][idx] == 0
15+
unsafe_isna(a, extr, idx::Real) = false
1516
unsafe_getindex_notna(da::DataArray, extr, idx::Real) = getindex(extr[1], idx)
1617
unsafe_getindex_notna(pda::PooledDataArray, extr, idx::Real) = getindex(extr[2], extr[1][idx])
1718
unsafe_getindex_notna(a, extr, idx::Real) = Base.unsafe_getindex(a, idx)
@@ -43,6 +44,7 @@ unsafe_dasetindex!(da::PooledDataArray, extr, val::NAtype, idx::Real) = nothing
4344
unsafe_dasetindex!(da::DataArray, extr, val, idx::Real) = setindex!(extr[1], val, idx)
4445
unsafe_dasetindex!(pda::PooledDataArray, extr, val, idx::Real) =
4546
setindex!(extr[1], getpoolidx(pda, val), idx)
47+
unsafe_dasetindex!(a::AbstractArray, extr, val, idx::Real) = setindex!(a, val, idx)
4648

4749
## PooledDataArray helper functions
4850

src/operators.jl

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const numeric_unary_operators = [:(Base.(:+)),
99
const logical_unary_operators = [:(Base.(:!))]
1010

1111
const elementary_functions = [:(Base.abs),
12+
:(Base.abs2),
1213
:(Base.sign),
1314
:(Base.acos),
1415
:(Base.acosh),
@@ -146,14 +147,7 @@ const bit_operators = [:(Base.(:&)),
146147
:(Base.(:|)),
147148
:(Base.(:$))]
148149

149-
const unary_vector_operators = [:(Base.minimum),
150-
:(Base.maximum),
151-
:(Base.prod),
152-
:(Base.sum),
153-
:(Base.mean),
154-
:(Base.median),
155-
:(Base.std),
156-
:(Base.var),
150+
const unary_vector_operators = [:(Base.median),
157151
:(StatsBase.mad),
158152
:(Base.norm),
159153
:(StatsBase.skewness),
@@ -460,9 +454,17 @@ end
460454
# XXX: The below should be revisited once we have a way to infer what
461455
# the proper return type of an array should be.
462456

457+
# One-argument elementary functions that do something different for
458+
# Complex
459+
for f in (:(Base.abs), :(Base.abs2))
460+
@eval begin
461+
@dataarray_unary $(f) Complex T.parameters[1]
462+
end
463+
end
464+
463465
# One-argument elementary functions that return the same type as their
464466
# inputs
465-
for f in (:(Base.abs), :(Base.conj), :(Base.sign))
467+
for f in (:(Base.abs), :(Base.abs2), :(Base.conj), :(Base.sign))
466468
@eval begin
467469
$(f)(::NAtype) = NA
468470
@dataarray_unary $(f) Number T
@@ -672,7 +674,8 @@ Base.(:.^)(::MathConst{:e}, B::AbstractDataArray) = exp(B)
672674

673675
for f in (:(Base.(:+)), :(Base.(:.+)), :(Base.(:-)), :(Base.(:.-)),
674676
:(Base.(:*)), :(Base.(:.*)), :(Base.(:.^)), :(Base.div),
675-
:(Base.mod), :(Base.fld), :(Base.rem))
677+
:(Base.mod), :(Base.fld), :(Base.rem), :(Base.min),
678+
:(Base.max))
676679
@eval begin
677680
# Scalar with NA
678681
($f)(::NAtype, ::NAtype) = NA

0 commit comments

Comments
 (0)