Skip to content

Commit 52930cd

Browse files
authored
Minor: Add arrow-rs ticket reference and turn some comments into docstrings (#4088)
1 parent 1287529 commit 52930cd

File tree

1 file changed

+56
-52
lines changed
  • datafusion/core/src/physical_plan/file_format

1 file changed

+56
-52
lines changed

datafusion/core/src/physical_plan/file_format/parquet.rs

Lines changed: 56 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -585,51 +585,53 @@ impl FileOpener for ParquetOpener {
585585
}
586586
}
587587

588-
// For example:
589-
// > ┏━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━
590-
// > ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
591-
// > ┃ ┌──────────────┐ │ ┌──────────────┐ │ ┃
592-
// > ┃ │ │ │ │ │ │ ┃
593-
// > ┃ │ │ │ │ Page │ │
594-
// > │ │ │ │ │ 3 │ ┃
595-
// > ┃ │ │ │ │ min: "A" │ │ ┃
596-
// > ┃ │ │ │ │ │ max: "C" │ ┃
597-
// > ┃ │ Page │ │ │ first_row: 0 │ │
598-
// > │ │ 1 │ │ │ │ ┃
599-
// > ┃ │ min: 10 │ │ └──────────────┘ │ ┃
600-
// > ┃ │ │ max: 20 │ │ ┌──────────────┐ ┃
601-
// > ┃ │ first_row: 0 │ │ │ │ │
602-
// > │ │ │ │ │ Page │ ┃
603-
// > ┃ │ │ │ │ 4 │ │ ┃
604-
// > ┃ │ │ │ │ │ min: "D" │ ┃
605-
// > ┃ │ │ │ │ max: "G" │ │
606-
// > │ │ │ │ │first_row: 100│ ┃
607-
// > ┃ └──────────────┘ │ │ │ │ ┃
608-
// > ┃ │ ┌──────────────┐ │ │ │ ┃
609-
// > ┃ │ │ │ └──────────────┘ │
610-
// > │ │ Page │ │ ┌──────────────┐ ┃
611-
// > ┃ │ 2 │ │ │ │ │ ┃
612-
// > ┃ │ │ min: 30 │ │ │ Page │ ┃
613-
// > ┃ │ max: 40 │ │ │ 5 │ │
614-
// > │ │first_row: 200│ │ │ min: "H" │ ┃
615-
// > ┃ │ │ │ │ max: "Z" │ │ ┃
616-
// > ┃ │ │ │ │ │first_row: 250│ ┃
617-
// > ┃ └──────────────┘ │ │ │ │
618-
// > │ │ └──────────────┘ ┃
619-
// > ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ┃
620-
// > ┃ ColumnChunk ColumnChunk ┃
621-
// > ┃ A B
622-
// > ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━┛
623-
// >
624-
// > Total rows: 300
625-
//
626-
// Given the predicate 'A > 35 AND B = "F"':
627-
// using `extract_page_index_push_down_predicates` get two single column predicate:
628-
// Using 'A > 35': could get RowSelector1: [ Skip(0~199), Read(200~299)]
629-
// Using B = "F": could get RowSelector2: [ Skip(0~99), Read(100~249), Skip(250~299)]
630-
//
631-
// As the Final selection is the intersection of each columns RowSelectors:
632-
// final_selection:[ Skip(0~199), Read(200~249), Skip(250~299)]
588+
/// For example:
589+
/// ```text
590+
/// ┏━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━
591+
/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃
592+
/// ┃ ┌──────────────┐ │ ┌──────────────┐ │ ┃
593+
/// ┃ │ │ │ │ │ │ ┃
594+
/// ┃ │ │ │ │ Page │ │
595+
/// │ │ │ │ │ 3 │ ┃
596+
/// ┃ │ │ │ │ min: "A" │ │ ┃
597+
/// ┃ │ │ │ │ │ max: "C" │ ┃
598+
/// ┃ │ Page │ │ │ first_row: 0 │ │
599+
/// │ │ 1 │ │ │ │ ┃
600+
/// ┃ │ min: 10 │ │ └──────────────┘ │ ┃
601+
/// ┃ │ │ max: 20 │ │ ┌──────────────┐ ┃
602+
/// ┃ │ first_row: 0 │ │ │ │ │
603+
/// │ │ │ │ │ Page │ ┃
604+
/// ┃ │ │ │ │ 4 │ │ ┃
605+
/// ┃ │ │ │ │ │ min: "D" │ ┃
606+
/// ┃ │ │ │ │ max: "G" │ │
607+
/// │ │ │ │ │first_row: 100│ ┃
608+
/// ┃ └──────────────┘ │ │ │ │ ┃
609+
/// ┃ │ ┌──────────────┐ │ │ │ ┃
610+
/// ┃ │ │ │ └──────────────┘ │
611+
/// │ │ Page │ │ ┌──────────────┐ ┃
612+
/// ┃ │ 2 │ │ │ │ │ ┃
613+
/// ┃ │ │ min: 30 │ │ │ Page │ ┃
614+
/// ┃ │ max: 40 │ │ │ 5 │ │
615+
/// │ │first_row: 200│ │ │ min: "H" │ ┃
616+
/// ┃ │ │ │ │ max: "Z" │ │ ┃
617+
/// ┃ │ │ │ │ │first_row: 250│ ┃
618+
/// ┃ └──────────────┘ │ │ │ │
619+
/// │ │ └──────────────┘ ┃
620+
/// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ┃
621+
/// ┃ ColumnChunk ColumnChunk ┃
622+
/// ┃ A B
623+
/// ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━┛
624+
///
625+
/// Total rows: 300
626+
/// ```
627+
///
628+
/// Given the predicate 'A > 35 AND B = "F"':
629+
/// using `extract_page_index_push_down_predicates` get two single column predicate:
630+
/// Using 'A > 35': could get `RowSelector1: [ Skip(0~199), Read(200~299)]`
631+
/// Using B = "F": could get `RowSelector2: [ Skip(0~99), Read(100~249), Skip(250~299)]`
632+
///
633+
/// As the Final selection is the intersection of each columns `RowSelectors:
634+
/// final_selection:[ Skip(0~199), Read(200~249), Skip(250~299)]`
633635
fn combine_multi_col_selection(
634636
row_selections: VecDeque<Vec<RowSelector>>,
635637
) -> Vec<RowSelector> {
@@ -639,13 +641,15 @@ fn combine_multi_col_selection(
639641
.unwrap()
640642
}
641643

642-
// combine two `RowSelection` return the intersection
643-
// For example:
644-
// self: NNYYYYNNY
645-
// other: NYNNNNNNY
646-
//
647-
// returned: NNNNNNNNY
648-
// set `need_combine` true will combine result: Select(2) + Select(1) + Skip(2) -> Select(3) + Skip(2)
644+
/// combine two `RowSelection` return the intersection
645+
/// For example:
646+
/// self: NNYYYYNNY
647+
/// other: NYNNNNNNY
648+
///
649+
/// returned: NNNNNNNNY
650+
/// set `need_combine` true will combine result: Select(2) + Select(1) + Skip(2) -> Select(3) + Skip(2)
651+
///
652+
/// Move to arrow-rs: https://github.com/apache/arrow-rs/issues/3003
649653
pub(crate) fn intersect_row_selection(
650654
left: Vec<RowSelector>,
651655
right: Vec<RowSelector>,

0 commit comments

Comments
 (0)