|
18 | 18 | use std::sync::Arc;
|
19 | 19 |
|
20 | 20 | use crate::physical_optimizer::test_utils::{
|
21 |
| - aggregate_exec, bounded_window_exec, check_integrity, coalesce_batches_exec, |
| 21 | + aggregate_exec, bounded_window_exec, bounded_window_exec_non_set_monotonic, |
| 22 | + bounded_window_exec_with_partition, check_integrity, coalesce_batches_exec, |
22 | 23 | coalesce_partitions_exec, create_test_schema, create_test_schema2,
|
23 |
| - create_test_schema3, filter_exec, global_limit_exec, hash_join_exec, limit_exec, |
24 |
| - local_limit_exec, memory_exec, parquet_exec, repartition_exec, sort_exec, sort_expr, |
25 |
| - sort_expr_options, sort_merge_join_exec, sort_preserving_merge_exec, |
26 |
| - spr_repartition_exec, stream_exec_ordered, union_exec, RequirementsTestExec, |
| 24 | + create_test_schema3, create_test_schema4, filter_exec, global_limit_exec, |
| 25 | + hash_join_exec, limit_exec, local_limit_exec, memory_exec, parquet_exec, |
| 26 | + repartition_exec, sort_exec, sort_expr, sort_expr_options, sort_merge_join_exec, |
| 27 | + sort_preserving_merge_exec, spr_repartition_exec, stream_exec_ordered, union_exec, |
| 28 | + RequirementsTestExec, |
27 | 29 | };
|
28 | 30 |
|
29 | 31 | use datafusion_physical_plan::displayable;
|
@@ -238,6 +240,208 @@ async fn test_remove_unnecessary_sort5() -> Result<()> {
|
238 | 240 | Ok(())
|
239 | 241 | }
|
240 | 242 |
|
| 243 | +#[tokio::test] |
| 244 | +async fn test_bounded_window_set_monotonic_no_partition() -> Result<()> { |
| 245 | + let schema = create_test_schema()?; |
| 246 | + |
| 247 | + let source = parquet_exec_sorted(&schema, vec![]); |
| 248 | + |
| 249 | + let sort_exprs = vec![sort_expr_options( |
| 250 | + "nullable_col", |
| 251 | + &schema, |
| 252 | + SortOptions { |
| 253 | + descending: true, |
| 254 | + nulls_first: false, |
| 255 | + }, |
| 256 | + )]; |
| 257 | + let sort = sort_exec(sort_exprs.clone(), source); |
| 258 | + |
| 259 | + let bounded_window = bounded_window_exec("nullable_col", vec![], sort); |
| 260 | + |
| 261 | + let output_schema = bounded_window.schema(); |
| 262 | + let sort_exprs2 = vec![sort_expr_options( |
| 263 | + "count", |
| 264 | + &output_schema, |
| 265 | + SortOptions { |
| 266 | + descending: false, |
| 267 | + nulls_first: false, |
| 268 | + }, |
| 269 | + )]; |
| 270 | + let physical_plan = sort_exec(sort_exprs2.clone(), bounded_window); |
| 271 | + |
| 272 | + let expected_input = [ |
| 273 | + "SortExec: expr=[count@2 ASC NULLS LAST], preserve_partitioning=[false]", |
| 274 | + " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 275 | + " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", |
| 276 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", |
| 277 | + ]; |
| 278 | + let expected_optimized = [ |
| 279 | + "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 280 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", |
| 281 | + ]; |
| 282 | + assert_optimized!(expected_input, expected_optimized, physical_plan, true); |
| 283 | + |
| 284 | + Ok(()) |
| 285 | +} |
| 286 | + |
| 287 | +#[tokio::test] |
| 288 | +async fn test_bounded_plain_window_set_monotonic_with_partitions() -> Result<()> { |
| 289 | + let schema = create_test_schema()?; |
| 290 | + |
| 291 | + let source = parquet_exec_sorted(&schema, vec![]); |
| 292 | + |
| 293 | + let sort_exprs = vec![sort_expr_options( |
| 294 | + "nullable_col", |
| 295 | + &schema, |
| 296 | + SortOptions { |
| 297 | + descending: true, |
| 298 | + nulls_first: false, |
| 299 | + }, |
| 300 | + )]; |
| 301 | + let sort = sort_exec(sort_exprs.clone(), source); |
| 302 | + |
| 303 | + let partition_bys = &[col("nullable_col", &schema)?]; |
| 304 | + let bounded_window = bounded_window_exec_with_partition( |
| 305 | + "non_nullable_col", |
| 306 | + vec![], |
| 307 | + partition_bys, |
| 308 | + sort, |
| 309 | + false, |
| 310 | + ); |
| 311 | + |
| 312 | + let output_schema = bounded_window.schema(); |
| 313 | + let sort_exprs2 = vec![sort_expr_options( |
| 314 | + "count", |
| 315 | + &output_schema, |
| 316 | + SortOptions { |
| 317 | + descending: false, |
| 318 | + nulls_first: false, |
| 319 | + }, |
| 320 | + )]; |
| 321 | + let physical_plan = sort_exec(sort_exprs2.clone(), bounded_window); |
| 322 | + |
| 323 | + let expected_input = [ |
| 324 | + "SortExec: expr=[count@2 ASC NULLS LAST], preserve_partitioning=[false]", |
| 325 | + " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 326 | + " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", |
| 327 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", |
| 328 | + ]; |
| 329 | + let expected_optimized = [ |
| 330 | + "SortExec: expr=[count@2 ASC NULLS LAST], preserve_partitioning=[false]", |
| 331 | + " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 332 | + " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", |
| 333 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", |
| 334 | + ]; |
| 335 | + assert_optimized!(expected_input, expected_optimized, physical_plan, true); |
| 336 | + |
| 337 | + Ok(()) |
| 338 | +} |
| 339 | + |
| 340 | +#[tokio::test] |
| 341 | +async fn test_bounded_plain_window_set_monotonic_with_partitions_partial() -> Result<()> { |
| 342 | + let schema = create_test_schema()?; |
| 343 | + |
| 344 | + let source = parquet_exec_sorted(&schema, vec![]); |
| 345 | + |
| 346 | + let sort_exprs = vec![sort_expr_options( |
| 347 | + "nullable_col", |
| 348 | + &schema, |
| 349 | + SortOptions { |
| 350 | + descending: true, |
| 351 | + nulls_first: false, |
| 352 | + }, |
| 353 | + )]; |
| 354 | + let sort = sort_exec(sort_exprs.clone(), source); |
| 355 | + |
| 356 | + let partition_bys = &[col("nullable_col", &schema)?]; |
| 357 | + let bounded_window = bounded_window_exec_with_partition( |
| 358 | + "non_nullable_col", |
| 359 | + vec![], |
| 360 | + partition_bys, |
| 361 | + sort, |
| 362 | + false, |
| 363 | + ); |
| 364 | + |
| 365 | + let output_schema = bounded_window.schema(); |
| 366 | + let sort_exprs2 = vec![ |
| 367 | + sort_expr_options( |
| 368 | + "nullable_col", |
| 369 | + &output_schema, |
| 370 | + SortOptions { |
| 371 | + descending: true, |
| 372 | + nulls_first: false, |
| 373 | + }, |
| 374 | + ), |
| 375 | + sort_expr_options( |
| 376 | + "count", |
| 377 | + &output_schema, |
| 378 | + SortOptions { |
| 379 | + descending: false, |
| 380 | + nulls_first: false, |
| 381 | + }, |
| 382 | + ), |
| 383 | + ]; |
| 384 | + let physical_plan = sort_exec(sort_exprs2.clone(), bounded_window); |
| 385 | + |
| 386 | + let expected_input = [ |
| 387 | + "SortExec: expr=[nullable_col@0 DESC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", |
| 388 | + " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 389 | + " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", |
| 390 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", |
| 391 | + ]; |
| 392 | + let expected_optimized = [ |
| 393 | + "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 394 | + " SortExec: expr=[nullable_col@0 DESC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", |
| 395 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", |
| 396 | + ]; |
| 397 | + assert_optimized!(expected_input, expected_optimized, physical_plan, true); |
| 398 | + |
| 399 | + Ok(()) |
| 400 | +} |
| 401 | + |
| 402 | +#[tokio::test] |
| 403 | +async fn test_bounded_window_non_set_monotonic_sort() -> Result<()> { |
| 404 | + let schema = create_test_schema4()?; |
| 405 | + let sort_exprs = vec![sort_expr_options( |
| 406 | + "a", |
| 407 | + &schema, |
| 408 | + SortOptions { |
| 409 | + descending: true, |
| 410 | + nulls_first: false, |
| 411 | + }, |
| 412 | + )]; |
| 413 | + let source = parquet_exec_sorted(&schema, sort_exprs.clone()); |
| 414 | + let sort = sort_exec(sort_exprs.clone(), source); |
| 415 | + |
| 416 | + let bounded_window = |
| 417 | + bounded_window_exec_non_set_monotonic("a", sort_exprs.clone(), sort); |
| 418 | + let output_schema = bounded_window.schema(); |
| 419 | + let sort_exprs2 = vec![sort_expr_options( |
| 420 | + "avg", |
| 421 | + &output_schema, |
| 422 | + SortOptions { |
| 423 | + descending: false, |
| 424 | + nulls_first: false, |
| 425 | + }, |
| 426 | + )]; |
| 427 | + let physical_plan = sort_exec(sort_exprs2.clone(), bounded_window); |
| 428 | + |
| 429 | + let expected_input = [ |
| 430 | + "SortExec: expr=[avg@5 ASC NULLS LAST], preserve_partitioning=[false]", |
| 431 | + " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 432 | + " SortExec: expr=[a@0 DESC NULLS LAST], preserve_partitioning=[false]", |
| 433 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST]", |
| 434 | + ]; |
| 435 | + let expected_optimized = [ |
| 436 | + "SortExec: expr=[avg@5 ASC NULLS LAST], preserve_partitioning=[false]", |
| 437 | + " BoundedWindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", |
| 438 | + " ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 DESC NULLS LAST]", |
| 439 | + ]; |
| 440 | + assert_optimized!(expected_input, expected_optimized, physical_plan, true); |
| 441 | + |
| 442 | + Ok(()) |
| 443 | +} |
| 444 | + |
241 | 445 | #[tokio::test]
|
242 | 446 | async fn test_do_not_remove_sort_with_limit() -> Result<()> {
|
243 | 447 | let schema = create_test_schema()?;
|
|
0 commit comments