Skip to content
This repository was archived by the owner on May 4, 2019. It is now read-only.

Commit d520b1d

Browse files
cjprybolararslan
authored andcommitted
Move describe functions for DataArrays from DataFrames to DataArrays (#248)
1 parent 278f3c4 commit d520b1d

File tree

3 files changed

+95
-1
lines changed

3 files changed

+95
-1
lines changed

REQUIRE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
julia 0.6-
22
Compat 0.8.6
3-
StatsBase 0.3
3+
StatsBase 0.14.1
44
Reexport
55
SpecialFunctions

src/statistics.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,31 @@ function gl(n::Integer, k::Integer, l::Integer)
3535
end
3636

3737
gl(n::Integer, k::Integer) = gl(n, k, n*k)
38+
39+
StatsBase.describe(X::DataVector) = StatsBase.describe(STDOUT, X)
40+
41+
function StatsBase.describe{T<:Real}(io::IO, X::AbstractDataVector{T})
42+
nacount = sum(isna.(X))
43+
pna = 100nacount/length(X)
44+
if pna != 100 # describe will fail if dropna returns an empty vector
45+
describe(io, dropna(X))
46+
else
47+
println(io, "Summary Stats:")
48+
println(io, "Type: $(eltype(X))")
49+
end
50+
println(io, "Number Missing: $(nacount)")
51+
@printf(io, "%% Missing: %.6f\n", pna)
52+
return
53+
end
54+
55+
function StatsBase.describe(io::IO, X::AbstractDataVector)
56+
nacount = sum(isna.(X))
57+
pna = 100nacount/length(X)
58+
println(io, "Summary Stats:")
59+
println(io, "Length: $(length(X))")
60+
println(io, "Type: $(eltype(X))")
61+
println(io, "Number Unique: $(length(unique(X)))")
62+
println(io, "Number Missing: $(nacount)")
63+
@printf(io, "%% Missing: %.6f\n", pna)
64+
return
65+
end

test/statistics.jl

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,69 @@
11
@testset "Stats" begin
22
autocor(DataArray([1, 2, 3, 4, 5]))
33
end
4+
5+
@testset "describe" begin
6+
io = IOBuffer()
7+
describe(io, @data(collect(1:10)))
8+
@test String(take!(io)) == """
9+
Summary Stats:
10+
Mean: 5.500000
11+
Minimum: 1.000000
12+
1st Quartile: 3.250000
13+
Median: 5.500000
14+
3rd Quartile: 7.750000
15+
Maximum: 10.000000
16+
Length: 10
17+
Type: $Int
18+
Number Missing: 0
19+
% Missing: 0.000000
20+
"""
21+
describe(io, @data([1, NA]))
22+
@test String(take!(io)) == """
23+
Summary Stats:
24+
Mean: 1.000000
25+
Minimum: 1.000000
26+
1st Quartile: 1.000000
27+
Median: 1.000000
28+
3rd Quartile: 1.000000
29+
Maximum: 1.000000
30+
Length: 1
31+
Type: $Int
32+
Number Missing: 1
33+
% Missing: 50.000000
34+
"""
35+
describe(io, @data(["s"]))
36+
@test String(take!(io)) == """
37+
Summary Stats:
38+
Length: 1
39+
Type: String
40+
Number Unique: 1
41+
Number Missing: 0
42+
% Missing: 0.000000
43+
"""
44+
describe(io, @data(["s", NA]))
45+
@test String(take!(io)) == """
46+
Summary Stats:
47+
Length: 2
48+
Type: String
49+
Number Unique: 2
50+
Number Missing: 1
51+
% Missing: 50.000000
52+
"""
53+
describe(io, DataArray(Any, 5))
54+
@test String(take!(io)) == """
55+
Summary Stats:
56+
Length: 5
57+
Type: Any
58+
Number Unique: 1
59+
Number Missing: 5
60+
% Missing: 100.000000
61+
"""
62+
describe(io, DataArray(Float64, 5))
63+
@test String(take!(io)) == """
64+
Summary Stats:
65+
Type: Float64
66+
Number Missing: 5
67+
% Missing: 100.000000
68+
"""
69+
end

0 commit comments

Comments
 (0)