Skip to content

Commit 329f95d

Browse files
jayzhan211alamb
authored andcommitted
Minor: Convert Count's name to lowercase (apache#11028)
* push down non-unnest only Signed-off-by: jayzhan211 <[email protected]> * add doc Signed-off-by: jayzhan211 <[email protected]> * to lowercase Signed-off-by: jayzhan211 <[email protected]> * fix tpch Signed-off-by: jayzhan211 <[email protected]> * Update test * fix test Signed-off-by: jayzhan211 <[email protected]> --------- Signed-off-by: jayzhan211 <[email protected]> Co-authored-by: Andrew Lamb <[email protected]>
1 parent 80efd2b commit 329f95d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+359
-361
lines changed

datafusion/core/src/dataframe/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2018,7 +2018,7 @@ mod tests {
20182018

20192019
assert_batches_sorted_eq!(
20202020
["+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
2021-
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | COUNT(aggregate_test_100.c12) | COUNT(DISTINCT aggregate_test_100.c12) |",
2021+
"| c1 | MIN(aggregate_test_100.c12) | MAX(aggregate_test_100.c12) | AVG(aggregate_test_100.c12) | sum(aggregate_test_100.c12) | count(aggregate_test_100.c12) | count(DISTINCT aggregate_test_100.c12) |",
20222022
"+----+-----------------------------+-----------------------------+-----------------------------+-----------------------------+-------------------------------+----------------------------------------+",
20232023
"| a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |",
20242024
"| b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |",
@@ -3171,7 +3171,7 @@ mod tests {
31713171

31723172
let sql = r#"
31733173
SELECT
3174-
COUNT(1)
3174+
count(1)
31753175
FROM
31763176
test
31773177
GROUP BY

datafusion/core/src/execution/context/csv.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,12 @@ mod tests {
110110
)
111111
.await?;
112112
let results =
113-
plan_and_collect(&ctx, "SELECT sum(c1), sum(c2), COUNT(*) FROM test").await?;
113+
plan_and_collect(&ctx, "SELECT sum(c1), sum(c2), count(*) FROM test").await?;
114114

115115
assert_eq!(results.len(), 1);
116116
let expected = [
117117
"+--------------+--------------+----------+",
118-
"| sum(test.c1) | sum(test.c2) | COUNT(*) |",
118+
"| sum(test.c1) | sum(test.c2) | count(*) |",
119119
"+--------------+--------------+----------+",
120120
"| 10 | 110 | 20 |",
121121
"+--------------+--------------+----------+",

datafusion/core/src/physical_optimizer/aggregate_statistics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ fn take_optimizable_column_and_table_count(
141141
) -> Option<(ScalarValue, String)> {
142142
let col_stats = &stats.column_statistics;
143143
if let Some(agg_expr) = agg_expr.as_any().downcast_ref::<AggregateFunctionExpr>() {
144-
if agg_expr.fun().name() == "COUNT" && !agg_expr.is_distinct() {
144+
if agg_expr.fun().name() == "count" && !agg_expr.is_distinct() {
145145
if let Precision::Exact(num_rows) = stats.num_rows {
146146
let exprs = agg_expr.expressions();
147147
if exprs.len() == 1 {

datafusion/core/tests/custom_sources_cases/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ async fn optimizers_catch_all_statistics() {
278278

279279
let expected = RecordBatch::try_new(
280280
Arc::new(Schema::new(vec![
281-
Field::new("COUNT(*)", DataType::Int64, false),
281+
Field::new("count(*)", DataType::Int64, false),
282282
Field::new("MIN(test.c1)", DataType::Int32, false),
283283
Field::new("MAX(test.c1)", DataType::Int32, false),
284284
])),

datafusion/core/tests/dataframe/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ async fn test_count_wildcard_on_window() -> Result<()> {
170170
let ctx = create_join_context()?;
171171

172172
let sql_results = ctx
173-
.sql("select COUNT(*) OVER(ORDER BY a DESC RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING) from t1")
173+
.sql("select count(*) OVER(ORDER BY a DESC RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING) from t1")
174174
.await?
175175
.explain(false, false)?
176176
.collect()
@@ -211,7 +211,7 @@ async fn test_count_wildcard_on_aggregate() -> Result<()> {
211211
let sql_results = ctx
212212
.sql("select count(*) from t1")
213213
.await?
214-
.select(vec![col("COUNT(*)")])?
214+
.select(vec![col("count(*)")])?
215215
.explain(false, false)?
216216
.collect()
217217
.await?;
@@ -604,7 +604,7 @@ async fn test_grouping_sets() -> Result<()> {
604604

605605
let expected = vec![
606606
"+-----------+-----+---------------+",
607-
"| a | b | COUNT(test.a) |",
607+
"| a | b | count(test.a) |",
608608
"+-----------+-----+---------------+",
609609
"| | 100 | 1 |",
610610
"| | 10 | 2 |",
@@ -645,7 +645,7 @@ async fn test_grouping_sets_count() -> Result<()> {
645645

646646
let expected = vec![
647647
"+----+----+-----------------+",
648-
"| c1 | c2 | COUNT(Int32(1)) |",
648+
"| c1 | c2 | count(Int32(1)) |",
649649
"+----+----+-----------------+",
650650
"| | 5 | 14 |",
651651
"| | 4 | 23 |",
@@ -1233,7 +1233,7 @@ async fn unnest_aggregate_columns() -> Result<()> {
12331233
.await?;
12341234
let expected = [
12351235
r#"+-------------+"#,
1236-
r#"| COUNT(tags) |"#,
1236+
r#"| count(tags) |"#,
12371237
r#"+-------------+"#,
12381238
r#"| 9 |"#,
12391239
r#"+-------------+"#,

datafusion/core/tests/path_partition.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ async fn parquet_distinct_partition_col() -> Result<()> {
120120
//3. limit is not contained within a single partition
121121
//The id column is included to ensure that the parquet file is actually scanned.
122122
let results = ctx
123-
.sql("SELECT COUNT(*) as num_rows_per_month, month, MAX(id) from t group by month order by num_rows_per_month desc")
123+
.sql("SELECT count(*) as num_rows_per_month, month, MAX(id) from t group by month order by num_rows_per_month desc")
124124
.await?
125125
.collect()
126126
.await?;
@@ -339,7 +339,7 @@ async fn csv_grouping_by_partition() -> Result<()> {
339339

340340
let expected = [
341341
"+------------+----------+----------------------+",
342-
"| date | COUNT(*) | COUNT(DISTINCT t.c1) |",
342+
"| date | count(*) | count(DISTINCT t.c1) |",
343343
"+------------+----------+----------------------+",
344344
"| 2021-10-26 | 100 | 5 |",
345345
"| 2021-10-27 | 100 | 5 |",

datafusion/core/tests/sql/aggregates.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@ async fn csv_query_array_agg_distinct() -> Result<()> {
6969
#[tokio::test]
7070
async fn count_partitioned() -> Result<()> {
7171
let results =
72-
execute_with_partition("SELECT COUNT(c1), COUNT(c2) FROM test", 4).await?;
72+
execute_with_partition("SELECT count(c1), count(c2) FROM test", 4).await?;
7373
assert_eq!(results.len(), 1);
7474

7575
let expected = [
7676
"+----------------+----------------+",
77-
"| COUNT(test.c1) | COUNT(test.c2) |",
77+
"| count(test.c1) | count(test.c2) |",
7878
"+----------------+----------------+",
7979
"| 40 | 40 |",
8080
"+----------------+----------------+",
@@ -86,11 +86,11 @@ async fn count_partitioned() -> Result<()> {
8686
#[tokio::test]
8787
async fn count_aggregated() -> Result<()> {
8888
let results =
89-
execute_with_partition("SELECT c1, COUNT(c2) FROM test GROUP BY c1", 4).await?;
89+
execute_with_partition("SELECT c1, count(c2) FROM test GROUP BY c1", 4).await?;
9090

9191
let expected = [
9292
"+----+----------------+",
93-
"| c1 | COUNT(test.c2) |",
93+
"| c1 | count(test.c2) |",
9494
"+----+----------------+",
9595
"| 0 | 10 |",
9696
"| 1 | 10 |",
@@ -105,14 +105,14 @@ async fn count_aggregated() -> Result<()> {
105105
#[tokio::test]
106106
async fn count_aggregated_cube() -> Result<()> {
107107
let results = execute_with_partition(
108-
"SELECT c1, c2, COUNT(c3) FROM test GROUP BY CUBE (c1, c2) ORDER BY c1, c2",
108+
"SELECT c1, c2, count(c3) FROM test GROUP BY CUBE (c1, c2) ORDER BY c1, c2",
109109
4,
110110
)
111111
.await?;
112112

113113
let expected = vec![
114114
"+----+----+----------------+",
115-
"| c1 | c2 | COUNT(test.c3) |",
115+
"| c1 | c2 | count(test.c3) |",
116116
"+----+----+----------------+",
117117
"| | | 40 |",
118118
"| | 1 | 4 |",
@@ -222,15 +222,15 @@ async fn run_count_distinct_integers_aggregated_scenario(
222222
"
223223
SELECT
224224
c_group,
225-
COUNT(c_uint64),
226-
COUNT(DISTINCT c_int8),
227-
COUNT(DISTINCT c_int16),
228-
COUNT(DISTINCT c_int32),
229-
COUNT(DISTINCT c_int64),
230-
COUNT(DISTINCT c_uint8),
231-
COUNT(DISTINCT c_uint16),
232-
COUNT(DISTINCT c_uint32),
233-
COUNT(DISTINCT c_uint64)
225+
count(c_uint64),
226+
count(DISTINCT c_int8),
227+
count(DISTINCT c_int16),
228+
count(DISTINCT c_int32),
229+
count(DISTINCT c_int64),
230+
count(DISTINCT c_uint8),
231+
count(DISTINCT c_uint16),
232+
count(DISTINCT c_uint32),
233+
count(DISTINCT c_uint64)
234234
FROM test
235235
GROUP BY c_group
236236
",
@@ -260,7 +260,7 @@ async fn count_distinct_integers_aggregated_single_partition() -> Result<()> {
260260
let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
261261

262262
let expected = ["+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
263-
"| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |",
263+
"| c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) |",
264264
"+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
265265
"| a | 3 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |",
266266
"| b | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |",
@@ -284,7 +284,7 @@ async fn count_distinct_integers_aggregated_multiple_partitions() -> Result<()>
284284
let results = run_count_distinct_integers_aggregated_scenario(partitions).await?;
285285

286286
let expected = ["+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
287-
"| c_group | COUNT(test.c_uint64) | COUNT(DISTINCT test.c_int8) | COUNT(DISTINCT test.c_int16) | COUNT(DISTINCT test.c_int32) | COUNT(DISTINCT test.c_int64) | COUNT(DISTINCT test.c_uint8) | COUNT(DISTINCT test.c_uint16) | COUNT(DISTINCT test.c_uint32) | COUNT(DISTINCT test.c_uint64) |",
287+
"| c_group | count(test.c_uint64) | count(DISTINCT test.c_int8) | count(DISTINCT test.c_int16) | count(DISTINCT test.c_int32) | count(DISTINCT test.c_int64) | count(DISTINCT test.c_uint8) | count(DISTINCT test.c_uint16) | count(DISTINCT test.c_uint32) | count(DISTINCT test.c_uint64) |",
288288
"+---------+----------------------+-----------------------------+------------------------------+------------------------------+------------------------------+------------------------------+-------------------------------+-------------------------------+-------------------------------+",
289289
"| a | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |",
290290
"| b | 5 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |",
@@ -301,7 +301,7 @@ async fn test_accumulator_row_accumulator() -> Result<()> {
301301
let ctx = SessionContext::new_with_config(config);
302302
register_aggregate_csv(&ctx).await?;
303303

304-
let sql = "SELECT c1, c2, MIN(c13) as min1, MIN(c9) as min2, MAX(c13) as max1, MAX(c9) as max2, AVG(c9) as avg1, MIN(c13) as min3, COUNT(C9) as cnt1, 0.5*SUM(c9-c8) as sum1
304+
let sql = "SELECT c1, c2, MIN(c13) as min1, MIN(c9) as min2, MAX(c13) as max1, MAX(c9) as max2, AVG(c9) as avg1, MIN(c13) as min3, count(C9) as cnt1, 0.5*SUM(c9-c8) as sum1
305305
FROM aggregate_test_100
306306
GROUP BY c1, c2
307307
ORDER BY c1, c2

datafusion/core/tests/sql/explain_analyze.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ async fn explain_analyze_baseline_metrics() {
8181
);
8282
assert_metrics!(
8383
&formatted,
84-
"ProjectionExec: expr=[COUNT(*)",
84+
"ProjectionExec: expr=[count(*)",
8585
"metrics=[output_rows=1, elapsed_compute="
8686
);
8787
assert_metrics!(
@@ -700,7 +700,7 @@ async fn csv_explain_analyze() {
700700
// Only test basic plumbing and try to avoid having to change too
701701
// many things. explain_analyze_baseline_metrics covers the values
702702
// in greater depth
703-
let needle = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[COUNT(*)], metrics=[output_rows=5";
703+
let needle = "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[count(*)], metrics=[output_rows=5";
704704
assert_contains!(&formatted, needle);
705705

706706
let verbose_needle = "Output Rows";
@@ -793,7 +793,7 @@ async fn explain_logical_plan_only() {
793793
let expected = vec![
794794
vec![
795795
"logical_plan",
796-
"Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1)) AS COUNT(*)]]\
796+
"Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]]\
797797
\n SubqueryAlias: t\
798798
\n Projection: \
799799
\n Values: (Utf8(\"a\"), Int64(1), Int64(100)), (Utf8(\"a\"), Int64(2), Int64(150))"
@@ -812,7 +812,7 @@ async fn explain_physical_plan_only() {
812812

813813
let expected = vec![vec![
814814
"physical_plan",
815-
"ProjectionExec: expr=[2 as COUNT(*)]\
815+
"ProjectionExec: expr=[2 as count(*)]\
816816
\n PlaceholderRowExec\
817817
\n",
818818
]];

datafusion/functions-aggregate/src/count.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ pub fn count_distinct(expr: Expr) -> datafusion_expr::Expr {
7777

7878
pub struct Count {
7979
signature: Signature,
80-
aliases: Vec<String>,
8180
}
8281

8382
impl Debug for Count {
@@ -98,7 +97,6 @@ impl Default for Count {
9897
impl Count {
9998
pub fn new() -> Self {
10099
Self {
101-
aliases: vec!["count".to_string()],
102100
signature: Signature::variadic_any(Volatility::Immutable),
103101
}
104102
}
@@ -110,7 +108,7 @@ impl AggregateUDFImpl for Count {
110108
}
111109

112110
fn name(&self) -> &str {
113-
"COUNT"
111+
"count"
114112
}
115113

116114
fn signature(&self) -> &Signature {
@@ -249,7 +247,7 @@ impl AggregateUDFImpl for Count {
249247
}
250248

251249
fn aliases(&self) -> &[String] {
252-
&self.aliases
250+
&[]
253251
}
254252

255253
fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {

0 commit comments

Comments
 (0)