15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- use arrow_array:: builder:: Int32Builder ;
18
+ use arrow_array:: builder:: { Date32Builder , Decimal128Builder , Int32Builder } ;
19
19
use arrow_array:: { builder:: StringBuilder , RecordBatch } ;
20
20
use arrow_schema:: { DataType , Field , Schema } ;
21
- use comet:: execution:: shuffle:: { write_ipc_compressed , CompressionCodec , ShuffleWriterExec } ;
21
+ use comet:: execution:: shuffle:: { CompressionCodec , ShuffleBlockWriter , ShuffleWriterExec } ;
22
22
use criterion:: { criterion_group, criterion_main, Criterion } ;
23
23
use datafusion:: physical_plan:: metrics:: Time ;
24
24
use datafusion:: {
@@ -31,67 +31,56 @@ use std::sync::Arc;
31
31
use tokio:: runtime:: Runtime ;
32
32
33
33
fn criterion_benchmark ( c : & mut Criterion ) {
34
+ let batch = create_batch ( 8192 , true ) ;
34
35
let mut group = c. benchmark_group ( "shuffle_writer" ) ;
35
- group. bench_function ( "shuffle_writer: encode (no compression))" , |b| {
36
- let batch = create_batch ( 8192 , true ) ;
37
- let mut buffer = vec ! [ ] ;
38
- let ipc_time = Time :: default ( ) ;
39
- b. iter ( || {
40
- buffer. clear ( ) ;
41
- let mut cursor = Cursor :: new ( & mut buffer) ;
42
- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: None , & ipc_time)
43
- } ) ;
44
- } ) ;
45
- group. bench_function ( "shuffle_writer: encode and compress (snappy)" , |b| {
46
- let batch = create_batch ( 8192 , true ) ;
47
- let mut buffer = vec ! [ ] ;
48
- let ipc_time = Time :: default ( ) ;
49
- b. iter ( || {
50
- buffer. clear ( ) ;
51
- let mut cursor = Cursor :: new ( & mut buffer) ;
52
- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Snappy , & ipc_time)
53
- } ) ;
54
- } ) ;
55
- group. bench_function ( "shuffle_writer: encode and compress (lz4)" , |b| {
56
- let batch = create_batch ( 8192 , true ) ;
57
- let mut buffer = vec ! [ ] ;
58
- let ipc_time = Time :: default ( ) ;
59
- b. iter ( || {
60
- buffer. clear ( ) ;
61
- let mut cursor = Cursor :: new ( & mut buffer) ;
62
- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Lz4Frame , & ipc_time)
63
- } ) ;
64
- } ) ;
65
- group. bench_function ( "shuffle_writer: encode and compress (zstd level 1)" , |b| {
66
- let batch = create_batch ( 8192 , true ) ;
67
- let mut buffer = vec ! [ ] ;
68
- let ipc_time = Time :: default ( ) ;
69
- b. iter ( || {
70
- buffer. clear ( ) ;
71
- let mut cursor = Cursor :: new ( & mut buffer) ;
72
- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Zstd ( 1 ) , & ipc_time)
73
- } ) ;
74
- } ) ;
75
- group. bench_function ( "shuffle_writer: encode and compress (zstd level 6)" , |b| {
76
- let batch = create_batch ( 8192 , true ) ;
77
- let mut buffer = vec ! [ ] ;
78
- let ipc_time = Time :: default ( ) ;
79
- b. iter ( || {
80
- buffer. clear ( ) ;
81
- let mut cursor = Cursor :: new ( & mut buffer) ;
82
- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Zstd ( 6 ) , & ipc_time)
83
- } ) ;
84
- } ) ;
85
- group. bench_function ( "shuffle_writer: end to end" , |b| {
86
- let ctx = SessionContext :: new ( ) ;
87
- let exec = create_shuffle_writer_exec ( CompressionCodec :: Zstd ( 1 ) ) ;
88
- b. iter ( || {
89
- let task_ctx = ctx. task_ctx ( ) ;
90
- let stream = exec. execute ( 0 , task_ctx) . unwrap ( ) ;
91
- let rt = Runtime :: new ( ) . unwrap ( ) ;
92
- criterion:: black_box ( rt. block_on ( collect ( stream) ) . unwrap ( ) ) ;
93
- } ) ;
94
- } ) ;
36
+ for compression_codec in & [
37
+ CompressionCodec :: None ,
38
+ CompressionCodec :: Lz4Frame ,
39
+ CompressionCodec :: Snappy ,
40
+ CompressionCodec :: Zstd ( 1 ) ,
41
+ CompressionCodec :: Zstd ( 6 ) ,
42
+ ] {
43
+ for enable_fast_encoding in [ true , false ] {
44
+ let name = format ! ( "shuffle_writer: write encoded (enable_fast_encoding={enable_fast_encoding}, compression={compression_codec:?})" ) ;
45
+ group. bench_function ( name, |b| {
46
+ let mut buffer = vec ! [ ] ;
47
+ let ipc_time = Time :: default ( ) ;
48
+ let w = ShuffleBlockWriter :: try_new (
49
+ & batch. schema ( ) ,
50
+ enable_fast_encoding,
51
+ compression_codec. clone ( ) ,
52
+ )
53
+ . unwrap ( ) ;
54
+ b. iter ( || {
55
+ buffer. clear ( ) ;
56
+ let mut cursor = Cursor :: new ( & mut buffer) ;
57
+ w. write_batch ( & batch, & mut cursor, & ipc_time) . unwrap ( ) ;
58
+ } ) ;
59
+ } ) ;
60
+ }
61
+ }
62
+
63
+ for compression_codec in [
64
+ CompressionCodec :: None ,
65
+ CompressionCodec :: Lz4Frame ,
66
+ CompressionCodec :: Snappy ,
67
+ CompressionCodec :: Zstd ( 1 ) ,
68
+ CompressionCodec :: Zstd ( 6 ) ,
69
+ ] {
70
+ group. bench_function (
71
+ format ! ( "shuffle_writer: end to end (compression = {compression_codec:?}" ) ,
72
+ |b| {
73
+ let ctx = SessionContext :: new ( ) ;
74
+ let exec = create_shuffle_writer_exec ( compression_codec. clone ( ) ) ;
75
+ b. iter ( || {
76
+ let task_ctx = ctx. task_ctx ( ) ;
77
+ let stream = exec. execute ( 0 , task_ctx) . unwrap ( ) ;
78
+ let rt = Runtime :: new ( ) . unwrap ( ) ;
79
+ rt. block_on ( collect ( stream) ) . unwrap ( ) ;
80
+ } ) ;
81
+ } ,
82
+ ) ;
83
+ }
95
84
}
96
85
97
86
fn create_shuffle_writer_exec ( compression_codec : CompressionCodec ) -> ShuffleWriterExec {
@@ -104,6 +93,7 @@ fn create_shuffle_writer_exec(compression_codec: CompressionCodec) -> ShuffleWri
104
93
compression_codec,
105
94
"/tmp/data.out" . to_string ( ) ,
106
95
"/tmp/index.out" . to_string ( ) ,
96
+ true ,
107
97
)
108
98
. unwrap ( )
109
99
}
@@ -121,11 +111,19 @@ fn create_batch(num_rows: usize, allow_nulls: bool) -> RecordBatch {
121
111
let schema = Arc :: new ( Schema :: new ( vec ! [
122
112
Field :: new( "c0" , DataType :: Int32 , true ) ,
123
113
Field :: new( "c1" , DataType :: Utf8 , true ) ,
114
+ Field :: new( "c2" , DataType :: Date32 , true ) ,
115
+ Field :: new( "c3" , DataType :: Decimal128 ( 11 , 2 ) , true ) ,
124
116
] ) ) ;
125
117
let mut a = Int32Builder :: new ( ) ;
126
118
let mut b = StringBuilder :: new ( ) ;
119
+ let mut c = Date32Builder :: new ( ) ;
120
+ let mut d = Decimal128Builder :: new ( )
121
+ . with_precision_and_scale ( 11 , 2 )
122
+ . unwrap ( ) ;
127
123
for i in 0 ..num_rows {
128
124
a. append_value ( i as i32 ) ;
125
+ c. append_value ( i as i32 ) ;
126
+ d. append_value ( ( i * 1000000 ) as i128 ) ;
129
127
if allow_nulls && i % 10 == 0 {
130
128
b. append_null ( ) ;
131
129
} else {
@@ -134,7 +132,13 @@ fn create_batch(num_rows: usize, allow_nulls: bool) -> RecordBatch {
134
132
}
135
133
let a = a. finish ( ) ;
136
134
let b = b. finish ( ) ;
137
- RecordBatch :: try_new ( schema. clone ( ) , vec ! [ Arc :: new( a) , Arc :: new( b) ] ) . unwrap ( )
135
+ let c = c. finish ( ) ;
136
+ let d = d. finish ( ) ;
137
+ RecordBatch :: try_new (
138
+ schema. clone ( ) ,
139
+ vec ! [ Arc :: new( a) , Arc :: new( b) , Arc :: new( c) , Arc :: new( d) ] ,
140
+ )
141
+ . unwrap ( )
138
142
}
139
143
140
144
fn config ( ) -> Criterion {
0 commit comments