Skip to content

Commit 4620545

Browse files
authored
Enhance ranking code (#589)
* ordinalrank!(): use eachindex() * ranking: use _rank() helper, @inbounds _rank() helper provides: 1) correct support for n-dim, n>1, input arrays 2) minimizes code duplication 3) passthrough of sortperm() args 4) macro-less support for missing values * replace while-loops with for-loops * rankings: expand sortkwargs in docstring * rankings: cleanup docstring
1 parent ed3b86e commit 4620545

File tree

1 file changed

+59
-79
lines changed

1 file changed

+59
-79
lines changed

src/ranking.jl

Lines changed: 59 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -6,65 +6,70 @@
66
# The implementations here follow this wikipedia page.
77
#
88

9-
109
function _check_randparams(rks, x, p)
1110
n = length(rks)
1211
length(x) == length(p) == n || raise_dimerror()
1312
return n
1413
end
1514

15+
# ranking helper function: calls sortperm(x) and then ranking method f!
16+
function _rank(f!, x::AbstractArray, R::Type=Int; sortkwargs...)
17+
rks = similar(x, R)
18+
ord = reshape(sortperm(vec(x); sortkwargs...), size(x))
19+
return f!(rks, x, ord)
20+
end
1621

22+
# ranking helper function for arrays with missing values
23+
function _rank(f!, x::AbstractArray{>: Missing}, R::Type=Int; sortkwargs...)
24+
inds = findall(!ismissing, vec(x))
25+
isempty(inds) && return missings(R, size(x))
26+
xv = disallowmissing(view(vec(x), inds))
27+
ordv = sortperm(xv; sortkwargs...)
28+
rks = missings(R, size(x))
29+
f!(view(rks, inds), xv, ordv)
30+
return rks
31+
end
1732

1833
# Ordinal ranking ("1234 ranking") -- use the literal order resulted from sort
19-
function ordinalrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
20-
n = _check_randparams(rks, x, p)
21-
22-
if n > 0
23-
i = 1
24-
while i <= n
25-
rks[p[i]] = i
26-
i += 1
27-
end
34+
function _ordinalrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
35+
_check_randparams(rks, x, p)
36+
@inbounds for i in eachindex(p)
37+
rks[p[i]] = i
2838
end
29-
3039
return rks
3140
end
3241

3342

3443
"""
35-
ordinalrank(x; lt = isless, rev::Bool = false)
44+
ordinalrank(x; lt=isless, by=identity, rev::Bool=false, ...)
3645
3746
Return the [ordinal ranking](https://en.wikipedia.org/wiki/Ranking#Ordinal_ranking_.28.221234.22_ranking.29)
38-
("1234" ranking) of an array. The `lt` keyword allows providing a custom "less
39-
than" function; use `rev=true` to reverse the sorting order.
40-
All items in `x` are given distinct, successive ranks based on their
41-
position in `sort(x; lt = lt, rev = rev)`.
47+
("1234" ranking) of an array. Supports the same keyword arguments as the `sort` function.
48+
All items in `x` are given distinct, successive ranks based on their position
49+
in the sorted vector.
4250
Missing values are assigned rank `missing`.
4351
"""
44-
ordinalrank(x::AbstractArray; lt = isless, rev::Bool = false) =
45-
ordinalrank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev))
52+
ordinalrank(x::AbstractArray; sortkwargs...) =
53+
_rank(_ordinalrank!, x; sortkwargs...)
4654

4755

4856
# Competition ranking ("1224" ranking) -- resolve tied ranks using min
49-
function competerank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
57+
function _competerank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
5058
n = _check_randparams(rks, x, p)
5159

52-
if n > 0
60+
@inbounds if n > 0
5361
p1 = p[1]
5462
v = x[p1]
5563
rks[p1] = k = 1
5664

57-
i = 2
58-
while i <= n
65+
for i in 2:n
5966
pi = p[i]
6067
xi = x[pi]
61-
if xi == v
62-
rks[pi] = k
63-
else
64-
rks[pi] = k = i
68+
if xi != v
6569
v = xi
70+
k = i
6671
end
67-
i += 1
72+
rks[pi] = k
6873
end
6974
end
7075

@@ -73,39 +78,35 @@ end
7378

7479

7580
"""
76-
competerank(x; lt = isless, rev::Bool = false)
81+
competerank(x; lt=isless, by=identity, rev::Bool=false, ...)
7782
7883
Return the [standard competition ranking](http://en.wikipedia.org/wiki/Ranking#Standard_competition_ranking_.28.221224.22_ranking.29)
79-
("1224" ranking) of an array. The `lt` keyword allows providing a custom "less
80-
than" function; use `rev=true` to reverse the sorting order.
81-
Items that compare equal are given the same rank, then a gap is left
82-
in the rankings the size of the number of tied items - 1.
84+
("1224" ranking) of an array. Supports the same keyword arguments as the `sort` function.
85+
Equal (*"tied"*) items are given the same rank, and the next rank comes after a gap
86+
that is equal to the number of tied items - 1.
8387
Missing values are assigned rank `missing`.
8488
"""
85-
competerank(x::AbstractArray; lt = isless, rev::Bool = false) =
86-
competerank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev))
89+
competerank(x::AbstractArray; sortkwargs...) =
90+
_rank(_competerank!, x; sortkwargs...)
8791

8892

8993
# Dense ranking ("1223" ranking) -- resolve tied ranks using min
90-
function denserank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
94+
function _denserank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
9195
n = _check_randparams(rks, x, p)
9296

93-
if n > 0
97+
@inbounds if n > 0
9498
p1 = p[1]
9599
v = x[p1]
96100
rks[p1] = k = 1
97101

98-
i = 2
99-
while i <= n
102+
for i in 2:n
100103
pi = p[i]
101104
xi = x[pi]
102-
if xi == v
103-
rks[pi] = k
104-
else
105-
rks[pi] = (k += 1)
105+
if xi != v
106106
v = xi
107+
k += 1
107108
end
108-
i += 1
109+
rks[pi] = k
109110
end
110111
end
111112

@@ -114,29 +115,27 @@ end
114115

115116

116117
"""
117-
denserank(x)
118+
denserank(x; lt=isless, by=identity, rev::Bool=false, ...)
118119
119120
Return the [dense ranking](http://en.wikipedia.org/wiki/Ranking#Dense_ranking_.28.221223.22_ranking.29)
120-
("1223" ranking) of an array. The `lt` keyword allows providing a custom "less
121-
than" function; use `rev=true` to reverse the sorting order. Items that
122-
compare equal receive the same ranking, and the next subsequent rank is
121+
("1223" ranking) of an array. Supports the same keyword arguments as the `sort` function.
122+
Equal items receive the same rank, and the next subsequent rank is
123123
assigned with no gap.
124124
Missing values are assigned rank `missing`.
125125
"""
126-
denserank(x::AbstractArray; lt = isless, rev::Bool = false) =
127-
denserank!(Array{Int}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev))
126+
denserank(x::AbstractArray; sortkwargs...) =
127+
_rank(_denserank!, x; sortkwargs...)
128128

129129

130130
# Tied ranking ("1 2.5 2.5 4" ranking) -- resolve tied ranks using average
131-
function tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
131+
function _tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
132132
n = _check_randparams(rks, x, p)
133133

134-
if n > 0
134+
@inbounds if n > 0
135135
v = x[p[1]]
136136

137137
s = 1 # starting index of current range
138-
e = 2 # pass-by-end index of current range
139-
while e <= n
138+
for e in 2:n # e is pass-by-end index of current range
140139
cx = x[p[e]]
141140
if cx != v
142141
# fill average rank to s : e-1
@@ -148,10 +147,9 @@ function tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
148147
s = e
149148
v = cx
150149
end
151-
e += 1
152150
end
153151

154-
# the last range (e == n+1)
152+
# the last range
155153
ar = (s + n) / 2
156154
for i = s : n
157155
rks[p[i]] = ar
@@ -161,33 +159,15 @@ function tiedrank!(rks::AbstractArray, x::AbstractArray, p::IntegerArray)
161159
return rks
162160
end
163161

164-
# order (aka. rank), resolving ties using the mean rank
165162
"""
166-
tiedrank(x)
163+
tiedrank(x; lt=isless, by=identity, rev::Bool=false, ...)
167164
168165
Return the [tied ranking](http://en.wikipedia.org/wiki/Ranking#Fractional_ranking_.28.221_2.5_2.5_4.22_ranking.29),
169166
also called fractional or "1 2.5 2.5 4" ranking,
170-
of an array. The `lt` keyword allows providing a custom "less
171-
than" function; use `rev=true` to reverse the sorting order.
172-
Items that compare equal receive the mean of the
173-
rankings they would have been assigned under ordinal ranking.
167+
of an array. Supports the same keyword arguments as the `sort` function.
168+
Equal (*"tied"*) items receive the mean of the ranks they would
169+
have been assigned under the ordinal ranking (see [`ordinalrank`](@ref)).
174170
Missing values are assigned rank `missing`.
175171
"""
176-
tiedrank(x::AbstractArray; lt = isless, rev::Bool = false) =
177-
tiedrank!(Array{Float64}(undef, size(x)), x, sortperm(x; lt = lt, rev = rev))
178-
179-
for (f, f!, S) in zip([:ordinalrank, :competerank, :denserank, :tiedrank],
180-
[:ordinalrank!, :competerank!, :denserank!, :tiedrank!],
181-
[Int, Int, Int, Float64])
182-
@eval begin
183-
function $f(x::AbstractArray{>: Missing}; lt = isless, rev::Bool = false)
184-
inds = findall(!ismissing, x)
185-
isempty(inds) && return missings($S, size(x))
186-
xv = disallowmissing(view(x, inds))
187-
sp = sortperm(xv; lt = lt, rev = rev)
188-
rks = missings($S, length(x))
189-
$(f!)(view(rks, inds), xv, sp)
190-
rks
191-
end
192-
end
193-
end
172+
tiedrank(x::AbstractArray; sortkwargs...) =
173+
_rank(_tiedrank!, x, Float64; sortkwargs...)

0 commit comments

Comments
 (0)