improve comments.

Rachelint · Rachelint · commit 426e2eeb5777 · 2025-05-04T02:03:36.000+08:00
diff --git a/datafusion-examples/examples/advanced_udaf.rs b/datafusion-examples/examples/advanced_udaf.rs
@@ -248,7 +248,10 @@ impl GroupsAccumulator for GeometricMeanGroupsAccumulator {
         // increment counts, update sums
         self.counts.resize(total_num_groups, 0);
         self.prods.resize(total_num_groups, 1.0);
-        // Use the `NullState` structure to generate specialized code for null / non null input elements
+        // Use the `NullState` structure to generate specialized code for null / non null input elements.
+        // `block_id` is ignored in `value_fn`, because `AvgGroupsAccumulator`
+        // still not support blocked groups.
+        // More details can see `GroupsAccumulator::supports_blocked_groups`.
         self.null_state.accumulate(
             group_indices,
             values,
@@ -280,6 +283,9 @@ impl GroupsAccumulator for GeometricMeanGroupsAccumulator {
         let partial_counts = values[1].as_primitive::<UInt32Type>();
         // update counts with partial counts
         self.counts.resize(total_num_groups, 0);
+        // `block_id` is ignored in `value_fn`, because `AvgGroupsAccumulator`
+        // still not support blocked groups.
+        // More details can see `GroupsAccumulator::supports_blocked_groups`.
         self.null_state.accumulate(
             group_indices,
             partial_counts,
diff --git a/datafusion/expr-common/src/groups_accumulator.rs b/datafusion/expr-common/src/groups_accumulator.rs
@@ -41,6 +41,14 @@ pub enum EmitTo {
 
 impl EmitTo {
     /// Remove and return `needed values` from `values`.
+    ///
+    /// Inputs:
+    ///   - `values`, the emitting source.
+    ///   - `is_blocked_groups`, is the `values` organized in `single`
+    ///     or `blocked` approach, more details can see
+    ///     [`GroupsAccumulator::supports_blocked_groups`].
+    ///     
+    ///
     pub fn take_needed<T>(
         &self,
         values: &mut VecDeque<Vec<T>>,
@@ -290,6 +298,25 @@ pub trait GroupsAccumulator: Send {
     fn size(&self) -> usize;
 
     /// Returns `true` if this accumulator supports blocked groups.
+    ///
+    /// Blocked groups(or called blocked management approach) is an optimization
+    /// to reduce the cost of managing aggregation intermediate states.
+    ///
+    /// Here is brief introduction for two states management approaches:
+    ///   - Blocked approach, states are stored and managed in multiple `Vec`s,
+    ///     we call it `Block`s. Organize like this is for avoiding to resize `Vec`
+    ///     and allocate a new `Vec` instead to reduce cost and get better performance.
+    ///     When locating data in `Block`s, we need to use `block_id` to locate the
+    ///     needed `Block` at first, and use `block_offset` to locate the needed
+    ///     data in `Block` after.
+    ///
+    ///   - Single approach, all states are stored and managed in a single large `Block`.
+    ///     So when locating data, `block_id` will always be 0, and we only need `block_offset`
+    ///     to locate data in the single `Block`.
+    ///
+    /// More details can see:
+    /// https://github.com/apache/datafusion/issues/7065
+    ///
     fn supports_blocked_groups(&self) -> bool {
         false
     }
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
@@ -245,7 +245,10 @@ pub trait SeenValues: Default + Debug + Send {
 }
 
 /// [`SeenValues`] for `flat groups input`
-///
+/// 
+/// At first, you may need to see something about `block_id` and `block_offset`
+/// from [`GroupsAccumulator::supports_blocked_groups`].
+/// 
 /// The `flat groups input` are organized like:
 ///
 /// ```text
@@ -261,7 +264,9 @@ pub trait SeenValues: Default + Debug + Send {
 ///
 /// For `set_bit(block_id, block_offset, value)`, `block_id` is unused,
 /// `block_offset` will be set to `group_index`.
-///
+/// 
+/// [`GroupsAccumulator::supports_blocked_groups`]: datafusion_expr_common::groups_accumulator::GroupsAccumulator::supports_blocked_groups
+/// 
 #[derive(Debug)]
 pub struct FlatSeenValues {
     builder: BooleanBufferBuilder,
@@ -318,6 +323,9 @@ impl SeenValues for FlatSeenValues {
 
 /// [`SeenValues`] for `blocked groups input`
 ///
+/// At first, you may need to see something about `block_id` and `block_offset`
+/// from [`GroupsAccumulator::supports_blocked_groups`].
+/// 
 /// The `flat groups input` are organized like:
 ///
 /// ```text
@@ -328,10 +336,12 @@ impl SeenValues for FlatSeenValues {
 ///     row_n (block_id_n, block_offset_n)    
 /// ```
 ///
-/// If ` row_x (block_id_x, block_offset_x)` is not filtered
+/// If `row_x (block_id_x, block_offset_x)` is not filtered
 /// (`block_id_x, block_offset_x` is seen), `seen_values[block_id_x][block_offset_x]`
 /// will be set to `true`.
 ///
+/// [`GroupsAccumulator::supports_blocked_groups`]: datafusion_expr_common::groups_accumulator::GroupsAccumulator::supports_blocked_groups
+/// 
 #[derive(Debug, Default)]
 pub struct BlockedSeenValues {
     blocked_builders: VecDeque<BooleanBufferBuilder>,
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
@@ -579,6 +579,10 @@ where
         // increment counts, update sums
         self.counts.resize(total_num_groups, 0);
         self.sums.resize(total_num_groups, T::default_value());
+
+        // `block_id` is ignored in `value_fn`, because `AvgGroupsAccumulator`
+        // still not support blocked groups.
+        // More details can see `GroupsAccumulator::supports_blocked_groups`.
         self.null_state.accumulate(
             group_indices,
             values,
@@ -663,6 +667,10 @@ where
         let partial_sums = values[1].as_primitive::<T>();
         // update counts with partial counts
         self.counts.resize(total_num_groups, 0);
+
+        // `block_id` is ignored in `value_fn`, because `AvgGroupsAccumulator`
+        // still not support blocked groups.
+        // More details can see `GroupsAccumulator::supports_blocked_groups`.
         self.null_state.accumulate(
             group_indices,
             partial_counts,
@@ -675,6 +683,9 @@ where
 
         // update sums
         self.sums.resize(total_num_groups, T::default_value());
+        // `block_id` is ignored in `value_fn`, because `AvgGroupsAccumulator`
+        // still not support blocked groups.
+        // More details can see `GroupsAccumulator::supports_blocked_groups`.
         self.null_state.accumulate(
             group_indices,
             partial_sums,
diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
@@ -112,6 +112,25 @@ pub(crate) trait GroupValues: Send {
     fn clear_shrink(&mut self, batch: &RecordBatch);
 
     /// Returns `true` if this accumulator supports blocked groups.
+    ///
+    /// Blocked groups(or called blocked management approach) is an optimization
+    /// to reduce the cost of managing aggregation intermediate states.
+    ///
+    /// Here is brief introduction for two states management approaches:
+    ///   - Blocked approach, states are stored and managed in multiple `Vec`s,
+    ///     we call it `Block`s. Organize like this is for avoiding to resize `Vec`
+    ///     and allocate a new `Vec` instead to reduce cost and get better performance.
+    ///     When locating data in `Block`s, we need to use `block_id` to locate the
+    ///     needed `Block` at first, and use `block_offset` to locate the needed
+    ///     data in `Block` after.
+    ///
+    ///   - Single approach, all states are stored and managed in a single large `Block`.
+    ///     So when locating data, `block_id` will always be 0, and we only need `block_offset`
+    ///     to locate data in the single `Block`.
+    ///
+    /// More details can see:
+    /// https://github.com/apache/datafusion/issues/7065
+    ///
     fn supports_blocked_groups(&self) -> bool {
         false
     }
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -360,11 +360,16 @@ impl SkipAggregationProbe {
 /// (also with the same predefined block size based capacity)
 /// instead of expanding the current one and copying the data.
 /// This method eliminates unnecessary copies and significantly improves performance.
-/// For a nice introduction to the blocked approach, maybe you can see [#7065].
+///
+/// You can find some implementation details(like how to locate data in such two approaches)
+/// in [`GroupsAccumulator::supports_blocked_groups`] and [`GroupValues::supports_blocked_groups`].
+///
+/// And for a really detailed introduction to the design of blocked approach, maybe you can see [#7065].
 ///
 /// The conditions that trigger the blocked groups optimization can be found in
 /// [`maybe_enable_blocked_groups`].
 ///  
+/// [`GroupAccumulator`]
 /// [`group_values`]: Self::group_values
 /// [`accumulators`]: Self::accumulators
 /// [#7065]: https://github.com/apache/datafusion/issues/7065
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -676,14 +676,16 @@ DROP VIEW test.xyz
 
 
 # show_external_create_table()
-statement error DataFusion error: Object Store error: Object at location /Users/kamille/Desktop/github/datafusion/testing/data/csv/aggregate_test_100\.csv not found: No such file or directory \(os error 2\)
+statement ok
 CREATE EXTERNAL TABLE abc
 STORED AS CSV
 LOCATION '../../testing/data/csv/aggregate_test_100.csv'
 OPTIONS ('format.has_header' 'true');
 
-query error DataFusion error: Error during planning: table 'datafusion\.public\.abc' not found
+query TTTT
 SHOW CREATE TABLE abc;
+----
+datafusion public abc CREATE EXTERNAL TABLE abc STORED AS CSV LOCATION ../../testing/data/csv/aggregate_test_100.csv
 
 # string_agg has different arg_types but same return type. Test avoiding duplicate entries for the same function.
 query TTT
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
@@ -97,8 +97,8 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.skip_partial_aggregation_probe_rows_threshold      | 100000                    | Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | datafusion.execution.use_row_number_estimates_to_optimize_partitioning  | false                     | Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.                                                                                                                                                                                                             |
 | datafusion.execution.enforce_batch_size_in_joins                        | false                     | Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.                                                                                                                                                                                                                                                                                                     |
-| datafusion.execution.enable_aggregation_blocked_groups                  | true                      | Should DataFusion use a blocked approach to manage grouping state. By default, the blocked approach is used which allocates capacity based on a predefined block size firstly. When the block reaches its limit, we allocate a new block (also with the same predefined block size based capacity) instead of expanding the current one and copying the data. If `false`, a single allocation approach is used, where values are managed within a single large memory block. As this block grows, it often triggers numerous copies, resulting in poor performance.      |
 | datafusion.execution.objectstore_writer_buffer_size                     | 10485760                  | Size (bytes) of data buffer DataFusion uses when writing output files. This affects the size of the data chunks that are uploaded to remote object stores (e.g. AWS S3). If very large (>= 100 GiB) output files are being written, it may be necessary to increase this size to avoid errors from the remote end point.                                                                                                                                                                                                                                                 |
+| datafusion.execution.enable_aggregation_blocked_groups                  | true                      | Should DataFusion use a blocked approach to manage grouping state. By default, the blocked approach is used which allocates capacity based on a predefined block size firstly. When the block reaches its limit, we allocate a new block (also with the same predefined block size based capacity) instead of expanding the current one and copying the data. If `false`, a single allocation approach is used, where values are managed within a single large memory block. As this block grows, it often triggers numerous copies, resulting in poor performance.      |
 | datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                |