@@ -87,6 +87,8 @@ struct JoinLeftData {
87
87
hash_map : JoinHashMap ,
88
88
/// The input rows for the build side
89
89
batch : RecordBatch ,
90
+ /// The build side on expressions values
91
+ values : Vec < ArrayRef > ,
90
92
/// Shared bitmap builder for visited left indices
91
93
visited_indices_bitmap : SharedBitmapBuilder ,
92
94
/// Counter of running probe-threads, potentially
@@ -104,13 +106,15 @@ impl JoinLeftData {
104
106
fn new (
105
107
hash_map : JoinHashMap ,
106
108
batch : RecordBatch ,
109
+ values : Vec < ArrayRef > ,
107
110
visited_indices_bitmap : SharedBitmapBuilder ,
108
111
probe_threads_counter : AtomicUsize ,
109
112
reservation : MemoryReservation ,
110
113
) -> Self {
111
114
Self {
112
115
hash_map,
113
116
batch,
117
+ values,
114
118
visited_indices_bitmap,
115
119
probe_threads_counter,
116
120
_reservation : reservation,
@@ -127,6 +131,11 @@ impl JoinLeftData {
127
131
& self . batch
128
132
}
129
133
134
+ /// returns a reference to the build side expressions values
135
+ fn values ( & self ) -> & [ ArrayRef ] {
136
+ & self . values
137
+ }
138
+
130
139
/// returns a reference to the visited indices bitmap
131
140
fn visited_indices_bitmap ( & self ) -> & SharedBitmapBuilder {
132
141
& self . visited_indices_bitmap
@@ -853,7 +862,6 @@ impl ExecutionPlan for HashJoinExec {
853
862
854
863
Ok ( Box :: pin ( HashJoinStream {
855
864
schema : self . schema ( ) ,
856
- on_left,
857
865
on_right,
858
866
filter : self . filter . clone ( ) ,
859
867
join_type : self . join_type ,
@@ -984,9 +992,18 @@ async fn collect_left_input(
984
992
BooleanBufferBuilder :: new ( 0 )
985
993
} ;
986
994
995
+ let left_values = on_left
996
+ . iter ( )
997
+ . map ( |c| {
998
+ c. evaluate ( & single_batch) ?
999
+ . into_array ( single_batch. num_rows ( ) )
1000
+ } )
1001
+ . collect :: < Result < Vec < _ > > > ( ) ?;
1002
+
987
1003
let data = JoinLeftData :: new (
988
1004
hashmap,
989
1005
single_batch,
1006
+ left_values,
990
1007
Mutex :: new ( visited_indices_bitmap) ,
991
1008
AtomicUsize :: new ( probe_threads_count) ,
992
1009
reservation,
@@ -1136,6 +1153,8 @@ impl HashJoinStreamState {
1136
1153
struct ProcessProbeBatchState {
1137
1154
/// Current probe-side batch
1138
1155
batch : RecordBatch ,
1156
+ /// Probe-side on expressions values
1157
+ values : Vec < ArrayRef > ,
1139
1158
/// Starting offset for JoinHashMap lookups
1140
1159
offset : JoinHashMapOffset ,
1141
1160
/// Max joined probe-side index from current batch
@@ -1162,8 +1181,6 @@ impl ProcessProbeBatchState {
1162
1181
struct HashJoinStream {
1163
1182
/// Input schema
1164
1183
schema : Arc < Schema > ,
1165
- /// equijoin columns from the left (build side)
1166
- on_left : Vec < PhysicalExprRef > ,
1167
1184
/// equijoin columns from the right (probe side)
1168
1185
on_right : Vec < PhysicalExprRef > ,
1169
1186
/// optional join filter
@@ -1249,27 +1266,13 @@ impl RecordBatchStream for HashJoinStream {
1249
1266
#[ allow( clippy:: too_many_arguments) ]
1250
1267
fn lookup_join_hashmap (
1251
1268
build_hashmap : & JoinHashMap ,
1252
- build_input_buffer : & RecordBatch ,
1253
- probe_batch : & RecordBatch ,
1254
- build_on : & [ PhysicalExprRef ] ,
1255
- probe_on : & [ PhysicalExprRef ] ,
1269
+ build_side_values : & [ ArrayRef ] ,
1270
+ probe_side_values : & [ ArrayRef ] ,
1256
1271
null_equals_null : bool ,
1257
1272
hashes_buffer : & [ u64 ] ,
1258
1273
limit : usize ,
1259
1274
offset : JoinHashMapOffset ,
1260
1275
) -> Result < ( UInt64Array , UInt32Array , Option < JoinHashMapOffset > ) > {
1261
- let keys_values = probe_on
1262
- . iter ( )
1263
- . map ( |c| c. evaluate ( probe_batch) ?. into_array ( probe_batch. num_rows ( ) ) )
1264
- . collect :: < Result < Vec < _ > > > ( ) ?;
1265
- let build_join_values = build_on
1266
- . iter ( )
1267
- . map ( |c| {
1268
- c. evaluate ( build_input_buffer) ?
1269
- . into_array ( build_input_buffer. num_rows ( ) )
1270
- } )
1271
- . collect :: < Result < Vec < _ > > > ( ) ?;
1272
-
1273
1276
let ( probe_indices, build_indices, next_offset) = build_hashmap
1274
1277
. get_matched_indices_with_limit_offset ( hashes_buffer, None , limit, offset) ;
1275
1278
@@ -1279,8 +1282,8 @@ fn lookup_join_hashmap(
1279
1282
let ( build_indices, probe_indices) = equal_rows_arr (
1280
1283
& build_indices,
1281
1284
& probe_indices,
1282
- & build_join_values ,
1283
- & keys_values ,
1285
+ build_side_values ,
1286
+ probe_side_values ,
1284
1287
null_equals_null,
1285
1288
) ?;
1286
1289
@@ -1430,6 +1433,7 @@ impl HashJoinStream {
1430
1433
self . state =
1431
1434
HashJoinStreamState :: ProcessProbeBatch ( ProcessProbeBatchState {
1432
1435
batch,
1436
+ values : keys_values,
1433
1437
offset : ( 0 , None ) ,
1434
1438
joined_probe_idx : None ,
1435
1439
} ) ;
@@ -1454,10 +1458,8 @@ impl HashJoinStream {
1454
1458
// get the matched by join keys indices
1455
1459
let ( left_indices, right_indices, next_offset) = lookup_join_hashmap (
1456
1460
build_side. left_data . hash_map ( ) ,
1457
- build_side. left_data . batch ( ) ,
1458
- & state. batch ,
1459
- & self . on_left ,
1460
- & self . on_right ,
1461
+ build_side. left_data . values ( ) ,
1462
+ & state. values ,
1461
1463
self . null_equals_null ,
1462
1464
& self . hashes_buffer ,
1463
1465
self . batch_size ,
@@ -3297,17 +3299,20 @@ mod tests {
3297
3299
3298
3300
let join_hash_map = JoinHashMap :: new ( hashmap_left, next) ;
3299
3301
3302
+ let left_keys_values = key_column. evaluate ( & left) ?. into_array ( left. num_rows ( ) ) ?;
3300
3303
let right_keys_values =
3301
3304
key_column. evaluate ( & right) ?. into_array ( right. num_rows ( ) ) ?;
3302
3305
let mut hashes_buffer = vec ! [ 0 ; right. num_rows( ) ] ;
3303
- create_hashes ( & [ right_keys_values] , & random_state, & mut hashes_buffer) ?;
3306
+ create_hashes (
3307
+ & [ Arc :: clone ( & right_keys_values) ] ,
3308
+ & random_state,
3309
+ & mut hashes_buffer,
3310
+ ) ?;
3304
3311
3305
3312
let ( l, r, _) = lookup_join_hashmap (
3306
3313
& join_hash_map,
3307
- & left,
3308
- & right,
3309
- & [ Arc :: clone ( & key_column) ] ,
3310
- & [ key_column] ,
3314
+ & [ left_keys_values] ,
3315
+ & [ right_keys_values] ,
3311
3316
false ,
3312
3317
& hashes_buffer,
3313
3318
8192 ,
0 commit comments