Skip to content

Commit 266daf8

Browse files
authored
doc: Add comments to clarify algorithm for MarkJoins (#16436)
* doc: Add doc to clarify algorithm for `MarkJoin`s * fix: fmt * fix: NullEquality merge
1 parent e6df27c commit 266daf8

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

datafusion/physical-plan/src/joins/symmetric_hash_join.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,21 @@ where
810810
{
811811
// Store the result in a tuple
812812
let result = match (build_side, join_type) {
813+
// For a mark join we “mark” each build‐side row with a dummy 0 in the probe‐side index
814+
// if it ever matched. For example, if
815+
//
816+
// prune_length = 5
817+
// deleted_offset = 0
818+
// visited_rows = {1, 3}
819+
//
820+
// then we produce:
821+
//
822+
// build_indices = [0, 1, 2, 3, 4]
823+
// probe_indices = [None, Some(0), None, Some(0), None]
824+
//
825+
// Example: for each build row i in [0..5):
826+
// – We always output its own index i in `build_indices`
827+
// – We output `Some(0)` in `probe_indices[i]` if row i was ever visited, else `None`
813828
(JoinSide::Left, JoinType::LeftMark) => {
814829
let build_indices = (0..prune_length)
815830
.map(L::Native::from_usize)

0 commit comments

Comments
 (0)