15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
+ use std:: collections:: VecDeque ;
19
+ use std:: iter;
18
20
use std:: mem:: size_of;
19
21
use std:: sync:: Arc ;
20
22
21
- use arrow:: array:: { ArrayRef , AsArray , BooleanArray , PrimitiveArray } ;
23
+ use arrow:: array:: { ArrayRef , ArrowNativeTypeOp , AsArray , BooleanArray , PrimitiveArray } ;
22
24
use arrow:: buffer:: NullBuffer ;
23
25
use arrow:: compute;
24
26
use arrow:: datatypes:: ArrowPrimitiveType ;
25
27
use arrow:: datatypes:: DataType ;
26
28
use datafusion_common:: { internal_datafusion_err, DataFusionError , Result } ;
27
29
use datafusion_expr_common:: groups_accumulator:: { EmitTo , GroupsAccumulator } ;
28
30
29
- use super :: accumulate:: FlatNullState ;
31
+ use crate :: aggregate:: groups_accumulator:: accumulate:: NullStateAdapter ;
32
+ use crate :: aggregate:: groups_accumulator:: { ensure_room_enough_for_blocks, Block } ;
30
33
31
34
/// An accumulator that implements a single operation over
32
35
/// [`ArrowPrimitiveType`] where the accumulated state is the same as
44
47
F : Fn ( & mut T :: Native , T :: Native ) + Send + Sync ,
45
48
{
46
49
/// values per group, stored as the native type
47
- values : Vec < T :: Native > ,
50
+ values : VecDeque < Vec < T :: Native > > ,
48
51
49
52
/// The output type (needed for Decimal precision and scale)
50
53
data_type : DataType ,
@@ -53,10 +56,12 @@ where
53
56
starting_value : T :: Native ,
54
57
55
58
/// Track nulls in the input / filters
56
- null_state : FlatNullState ,
59
+ null_state : NullStateAdapter ,
57
60
58
61
/// Function that computes the primitive result
59
62
prim_fn : F ,
63
+
64
+ block_size : Option < usize > ,
60
65
}
61
66
62
67
impl < T , F > PrimitiveGroupsAccumulator < T , F >
@@ -66,11 +71,12 @@ where
66
71
{
67
72
pub fn new ( data_type : & DataType , prim_fn : F ) -> Self {
68
73
Self {
69
- values : vec ! [ ] ,
74
+ values : VecDeque :: new ( ) ,
70
75
data_type : data_type. clone ( ) ,
71
- null_state : FlatNullState :: new ( ) ,
76
+ null_state : NullStateAdapter :: new ( None ) ,
72
77
starting_value : T :: default_value ( ) ,
73
78
prim_fn,
79
+ block_size : None ,
74
80
}
75
81
}
76
82
@@ -97,16 +103,34 @@ where
97
103
let values = values[ 0 ] . as_primitive :: < T > ( ) ;
98
104
99
105
// update values
100
- self . values . resize ( total_num_groups, self . starting_value ) ;
106
+ if let Some ( blk_size) = self . block_size {
107
+ let new_block = |block_size : usize | Vec :: with_capacity ( block_size) ;
108
+ ensure_room_enough_for_blocks (
109
+ & mut self . values ,
110
+ total_num_groups,
111
+ blk_size,
112
+ new_block,
113
+ self . starting_value ,
114
+ ) ;
115
+ } else {
116
+ if self . values . is_empty ( ) {
117
+ self . values . push_back ( Vec :: new ( ) ) ;
118
+ }
119
+
120
+ self . values
121
+ . back_mut ( )
122
+ . unwrap ( )
123
+ . resize ( total_num_groups, self . starting_value ) ;
124
+ }
101
125
102
126
// NullState dispatches / handles tracking nulls and groups that saw no values
103
127
self . null_state . accumulate (
104
128
group_indices,
105
129
values,
106
130
opt_filter,
107
131
total_num_groups,
108
- |_ , group_index , new_value| {
109
- let value = & mut self . values [ group_index as usize ] ;
132
+ |block_id , block_offset , new_value| {
133
+ let value = & mut self . values [ block_id as usize ] [ block_offset as usize ] ;
110
134
( self . prim_fn ) ( value, new_value) ;
111
135
} ,
112
136
) ;
@@ -115,7 +139,7 @@ where
115
139
}
116
140
117
141
fn evaluate ( & mut self , emit_to : EmitTo ) -> Result < ArrayRef > {
118
- let values = emit_to. take_needed_rows ( & mut self . values ) ;
142
+ let values = emit_to. take_needed ( & mut self . values , self . block_size . is_some ( ) ) ;
119
143
let nulls = self . null_state . build ( emit_to) ;
120
144
let values = PrimitiveArray :: < T > :: new ( values. into ( ) , Some ( nulls) ) // no copy
121
145
. with_data_type ( self . data_type . clone ( ) ) ;
@@ -198,4 +222,28 @@ where
198
222
fn size ( & self ) -> usize {
199
223
self . values . capacity ( ) * size_of :: < T :: Native > ( ) + self . null_state . size ( )
200
224
}
225
+
226
+ fn supports_blocked_groups ( & self ) -> bool {
227
+ true
228
+ }
229
+
230
+ fn alter_block_size ( & mut self , block_size : Option < usize > ) -> Result < ( ) > {
231
+ self . values . clear ( ) ;
232
+ self . null_state = NullStateAdapter :: new ( block_size) ;
233
+ self . block_size = block_size;
234
+
235
+ Ok ( ( ) )
236
+ }
237
+ }
238
+
239
+ impl < N : ArrowNativeTypeOp > Block for Vec < N > {
240
+ type T = N ;
241
+
242
+ fn len ( & self ) -> usize {
243
+ self . len ( )
244
+ }
245
+
246
+ fn fill_default_value ( & mut self , fill_len : usize , default_value : Self :: T ) {
247
+ self . extend ( iter:: repeat ( default_value. clone ( ) ) . take ( fill_len) ) ;
248
+ }
201
249
}
0 commit comments