@@ -4,99 +4,61 @@ fastpermute!(v::AbstractArray, p::AbstractVector) = copyto!(v, v[p])
4
4
fastpermute! (v:: StructArray , p:: AbstractVector ) = permute! (v, p)
5
5
fastpermute! (v:: PooledArray , p:: AbstractVector ) = permute! (v, p)
6
6
7
- optimize_isequal (v:: AbstractArray ) = v
8
- optimize_isequal (v:: PooledArray ) = v. refs
9
- optimize_isequal (v:: StructArray{<:Union{Tuple, NamedTuple}} ) = StructArray (map (optimize_isequal, fieldarrays (v)))
10
-
11
- recover_original (v:: AbstractArray , el) = el
12
- recover_original (v:: PooledArray , el) = v. pool[el]
13
- recover_original (v:: StructArray{T} , el) where {T<: Union{Tuple, NamedTuple} } = T (map (recover_original, fieldarrays (v), el))
14
-
15
- pool (v:: AbstractArray , condition = ! isbitstype∘ eltype) = condition (v) ? convert (PooledArray, v) : v
16
- pool (v:: StructArray , condition = ! isbitstype∘ eltype) = replace_storage (t -> pool (t, condition), v)
17
-
18
7
function Base. permute! (c:: StructArray , p:: AbstractVector )
19
8
foreachfield (v -> fastpermute! (v, p), c)
20
9
return c
21
10
end
22
11
23
- struct TiedIndices{T<: AbstractVector , V<: AbstractVector{<:Integer} , U<: AbstractUnitRange }
24
- vec:: T
25
- perm:: V
12
+ pool (v:: AbstractArray , condition = ! isbitstype∘ eltype) = condition (v) ? convert (PooledArray, v) : v
13
+ pool (v:: StructArray , condition = ! isbitstype∘ eltype) = replace_storage (t -> pool (t, condition), v)
14
+
15
+ struct GroupPerm{V<: AbstractVector , P<: AbstractVector{<:Integer} , U<: AbstractUnitRange }
16
+ vec:: V
17
+ perm:: P
26
18
within:: U
27
19
end
28
20
29
- TiedIndices (vec:: AbstractVector , perm= sortperm (vec)) =
30
- TiedIndices (vec, perm, axes (vec, 1 ))
31
-
32
- Base. IteratorSize (:: Type{<:TiedIndices} ) = Base. SizeUnknown ()
21
+ GroupPerm (vec, perm= sortperm (vec)) = GroupPerm (vec, perm, axes (vec, 1 ))
33
22
34
- Base. eltype (:: Type{<:TiedIndices{T}} ) where {T} =
35
- Pair{eltype (T), UnitRange{Int}}
23
+ Base. sortperm (g:: GroupPerm ) = g. perm
36
24
37
- Base. sortperm (t:: TiedIndices ) = t. perm
38
-
39
- function Base. iterate (n:: TiedIndices , i = first (n. within))
40
- vec, perm = n. vec, n. perm
41
- l = last (n. within)
25
+ function Base. iterate (g:: GroupPerm , i = first (g. within))
26
+ vec, perm = g. vec, g. perm
27
+ l = last (g. within)
42
28
i > l && return nothing
43
- @inbounds row = vec[ perm[i] ]
29
+ @inbounds pi = perm[i]
44
30
i1 = i+ 1
45
- @inbounds while i1 <= l && isequal (row, vec[ perm[i1] ])
31
+ @inbounds while i1 <= l && roweq (vec, pi , perm[i1])
46
32
i1 += 1
47
33
end
48
- return (row => i: (i1- 1 ), i1)
34
+ return (i: (i1- 1 ), i1)
49
35
end
50
36
51
- """
52
- `tiedindices(v, perm=sortperm(v))`
53
-
54
- Given an abstract vector `v` and a permutation vector `perm`, return an iterator
55
- of pairs `val => range` where `range` is a maximal interval such as `v[perm[range]]`
56
- is constant: `val` is the unique value of `v[perm[range]]`.
57
- """
58
- tiedindices (v, perm= sortperm (v)) = TiedIndices (v, perm)
59
-
60
- """
61
- `maptiedindices(f, v, perm)`
62
-
63
- Given a function `f`, compute the iterator `tiedindices(v, perm)` and return
64
- in iterable object which yields `f(val, idxs)` where `val => idxs` are the pairs
65
- iterated by `tiedindices(v, perm)`.
66
-
67
- ## Examples
68
-
69
- `maptiedindices` is a low level building block that can be used to define grouping
70
- operators. For example:
71
-
72
- ```jldoctest
73
- julia> function mygroupby(f, keys, data)
74
- perm = sortperm(keys)
75
- StructArrays.maptiedindices(keys, perm) do key, idxs
76
- key => f(data[perm[idxs]])
77
- end
78
- end
79
- mygroupby (generic function with 1 method)
80
-
81
- julia> StructArray(mygroupby(sum, [1, 2, 1, 3], [1, 4, 10, 11]))
82
- 3-element StructArray{Pair{Int64,Int64},1,NamedTuple{(:first, :second),Tuple{Array{Int64,1},Array{Int64,1}}}}:
83
- 1 => 11
84
- 2 => 4
85
- 3 => 11
86
- ```
87
- """
88
- function maptiedindices (f, v, perm)
89
- fast_v = optimize_isequal (v)
90
- itr = TiedIndices (fast_v, perm)
91
- (f (recover_original (v, val), idxs) for (val, idxs) in itr)
37
+ Base. IteratorSize (:: Type{<:GroupPerm} ) = Base. SizeUnknown ()
38
+
39
+ Base. eltype (:: Type{<:GroupPerm} ) = UnitRange{Int}
40
+
41
+ @inline roweq (x:: AbstractVector , i, j) = (@inbounds eq= isequal (x[i], x[j]); eq)
42
+ @inline roweq (a:: PooledArray , i, j) = (@inbounds x= a. refs[i] == a. refs[j]; x)
43
+ @generated function roweq (c:: StructVector{D,C} , i, j) where {D,C}
44
+ N = fieldcount (C)
45
+ ex = :(roweq (getfield (fieldarrays (c),1 ), i, j))
46
+ for n in 2 : N
47
+ ex = :(($ ex) && (roweq (getfield (fieldarrays (c),$ n), i, j)))
48
+ end
49
+ ex
92
50
end
93
51
94
52
function uniquesorted (keys, perm= sortperm (keys))
95
- maptiedindices ((key, _) -> key, keys, perm)
53
+ (keys[perm[idxs[ 1 ]]] for idxs in GroupPerm ( keys, perm) )
96
54
end
97
55
98
56
function finduniquesorted (keys, perm= sortperm (keys))
99
- maptiedindices ((key, idxs) -> (key => perm[idxs]), keys, perm)
57
+ func = function (idxs)
58
+ p_idxs = perm[idxs]
59
+ return keys[p_idxs[1 ]] => p_idxs
60
+ end
61
+ (func (idxs) for idxs in GroupPerm (keys, perm))
100
62
end
101
63
102
64
function Base. sortperm (c:: StructVector{T} ) where {T<: Union{Tuple, NamedTuple} }
@@ -126,7 +88,7 @@ function refine_perm!(p, cols, c, x, y′, lo, hi)
126
88
order = Perm (Forward, y′)
127
89
y = something (forward_vec (order), y′)
128
90
nc = length (cols)
129
- for (_, idxs) in TiedIndices ( optimize_isequal (x) , p, lo: hi)
91
+ for idxs in GroupPerm (x , p, lo: hi)
130
92
i, i1 = extrema (idxs)
131
93
if i1 > i
132
94
sort_sub_by! (p, i, i1, y, order, temp)
0 commit comments