@@ -54,12 +54,13 @@ use datafusion_physical_expr::utils::{collect_columns, merge_vectors};
54
54
use datafusion_physical_expr:: {
55
55
LexOrdering , PhysicalExpr , PhysicalExprRef , PhysicalSortExpr ,
56
56
} ;
57
+ use hashbrown:: hash_table:: Entry :: { Occupied , Vacant } ;
58
+ use hashbrown:: HashTable ;
57
59
58
60
use crate :: joins:: SharedBitmapBuilder ;
59
61
use crate :: projection:: ProjectionExec ;
60
62
use futures:: future:: { BoxFuture , Shared } ;
61
63
use futures:: { ready, FutureExt } ;
62
- use hashbrown:: raw:: RawTable ;
63
64
use parking_lot:: Mutex ;
64
65
65
66
/// Maps a `u64` hash value based on the build side ["on" values] to a list of indices with this key's value.
@@ -126,20 +127,20 @@ use parking_lot::Mutex;
126
127
/// ```
127
128
pub struct JoinHashMap {
128
129
// Stores hash value to last row index
129
- map : RawTable < ( u64 , u64 ) > ,
130
+ map : HashTable < ( u64 , u64 ) > ,
130
131
// Stores indices in chained list data structure
131
132
next : Vec < u64 > ,
132
133
}
133
134
134
135
impl JoinHashMap {
135
136
#[ cfg( test) ]
136
- pub ( crate ) fn new ( map : RawTable < ( u64 , u64 ) > , next : Vec < u64 > ) -> Self {
137
+ pub ( crate ) fn new ( map : HashTable < ( u64 , u64 ) > , next : Vec < u64 > ) -> Self {
137
138
Self { map, next }
138
139
}
139
140
140
141
pub ( crate ) fn with_capacity ( capacity : usize ) -> Self {
141
142
JoinHashMap {
142
- map : RawTable :: with_capacity ( capacity) ,
143
+ map : HashTable :: with_capacity ( capacity) ,
143
144
next : vec ! [ 0 ; capacity] ,
144
145
}
145
146
}
@@ -199,9 +200,9 @@ pub trait JoinHashMapType {
199
200
/// Extend with zero
200
201
fn extend_zero ( & mut self , len : usize ) ;
201
202
/// Returns mutable references to the hash map and the next.
202
- fn get_mut ( & mut self ) -> ( & mut RawTable < ( u64 , u64 ) > , & mut Self :: NextType ) ;
203
+ fn get_mut ( & mut self ) -> ( & mut HashTable < ( u64 , u64 ) > , & mut Self :: NextType ) ;
203
204
/// Returns a reference to the hash map.
204
- fn get_map ( & self ) -> & RawTable < ( u64 , u64 ) > ;
205
+ fn get_map ( & self ) -> & HashTable < ( u64 , u64 ) > ;
205
206
/// Returns a reference to the next.
206
207
fn get_list ( & self ) -> & Self :: NextType ;
207
208
@@ -212,24 +213,28 @@ pub trait JoinHashMapType {
212
213
deleted_offset : usize ,
213
214
) {
214
215
let ( mut_map, mut_list) = self . get_mut ( ) ;
215
- for ( row, hash_value) in iter {
216
- let item = mut_map. get_mut ( * hash_value, |( hash, _) | * hash_value == * hash) ;
217
- if let Some ( ( _, index) ) = item {
218
- // Already exists: add index to next array
219
- let prev_index = * index;
220
- // Store new value inside hashmap
221
- * index = ( row + 1 ) as u64 ;
222
- // Update chained Vec at `row` with previous value
223
- mut_list[ row - deleted_offset] = prev_index;
224
- } else {
225
- mut_map. insert (
226
- * hash_value,
227
- // store the value + 1 as 0 value reserved for end of list
228
- ( * hash_value, ( row + 1 ) as u64 ) ,
229
- |( hash, _) | * hash,
230
- ) ;
231
- // chained list at `row` is already initialized with 0
232
- // meaning end of list
216
+ for ( row, & hash_value) in iter {
217
+ let entry = mut_map. entry (
218
+ hash_value,
219
+ |& ( hash, _) | hash_value == hash,
220
+ |& ( hash, _) | hash,
221
+ ) ;
222
+
223
+ match entry {
224
+ Occupied ( mut occupied_entry) => {
225
+ // Already exists: add index to next array
226
+ let ( _, index) = occupied_entry. get_mut ( ) ;
227
+ let prev_index = * index;
228
+ // Store new value inside hashmap
229
+ * index = ( row + 1 ) as u64 ;
230
+ // Update chained Vec at `row` with previous value
231
+ mut_list[ row - deleted_offset] = prev_index;
232
+ }
233
+ Vacant ( vacant_entry) => {
234
+ vacant_entry. insert ( ( hash_value, ( row + 1 ) as u64 ) ) ;
235
+ // chained list at `row` is already initialized with 0
236
+ // meaning end of list
237
+ }
233
238
}
234
239
}
235
240
}
@@ -251,7 +256,7 @@ pub trait JoinHashMapType {
251
256
for ( row_idx, hash_value) in iter {
252
257
// Get the hash and find it in the index
253
258
if let Some ( ( _, index) ) =
254
- hash_map. get ( * hash_value, |( hash, _) | * hash_value == * hash)
259
+ hash_map. find ( * hash_value, |( hash, _) | * hash_value == * hash)
255
260
{
256
261
let mut i = * index - 1 ;
257
262
loop {
@@ -299,7 +304,7 @@ pub trait JoinHashMapType {
299
304
300
305
let mut remaining_output = limit;
301
306
302
- let hash_map: & RawTable < ( u64 , u64 ) > = self . get_map ( ) ;
307
+ let hash_map: & HashTable < ( u64 , u64 ) > = self . get_map ( ) ;
303
308
let next_chain = self . get_list ( ) ;
304
309
305
310
// Calculate initial `hash_values` index before iterating
@@ -330,7 +335,7 @@ pub trait JoinHashMapType {
330
335
let mut row_idx = to_skip;
331
336
for hash_value in & hash_values[ to_skip..] {
332
337
if let Some ( ( _, index) ) =
333
- hash_map. get ( * hash_value, |( hash, _) | * hash_value == * hash)
338
+ hash_map. find ( * hash_value, |( hash, _) | * hash_value == * hash)
334
339
{
335
340
chain_traverse ! (
336
341
input_indices,
@@ -358,12 +363,12 @@ impl JoinHashMapType for JoinHashMap {
358
363
fn extend_zero ( & mut self , _: usize ) { }
359
364
360
365
/// Get mutable references to the hash map and the next.
361
- fn get_mut ( & mut self ) -> ( & mut RawTable < ( u64 , u64 ) > , & mut Self :: NextType ) {
366
+ fn get_mut ( & mut self ) -> ( & mut HashTable < ( u64 , u64 ) > , & mut Self :: NextType ) {
362
367
( & mut self . map , & mut self . next )
363
368
}
364
369
365
370
/// Get a reference to the hash map.
366
- fn get_map ( & self ) -> & RawTable < ( u64 , u64 ) > {
371
+ fn get_map ( & self ) -> & HashTable < ( u64 , u64 ) > {
367
372
& self . map
368
373
}
369
374
0 commit comments