@@ -42,7 +42,7 @@ use crate::error::Result;
42
42
use crate :: execution:: context:: SessionState ;
43
43
use crate :: physical_plan:: { ExecutionPlan , Statistics } ;
44
44
45
- use arrow_schema:: { DataType , Field , Schema } ;
45
+ use arrow_schema:: { DataType , Field , FieldRef , Schema } ;
46
46
use datafusion_common:: file_options:: file_type:: FileType ;
47
47
use datafusion_common:: { internal_err, not_impl_err, GetExt } ;
48
48
use datafusion_expr:: Expr ;
@@ -235,6 +235,12 @@ pub fn file_type_to_format(
235
235
}
236
236
}
237
237
238
+ /// Create a new field with the specified data type, copying the other
239
+ /// properties from the input field
240
+ fn field_with_new_type ( field : & FieldRef , new_type : DataType ) -> FieldRef {
241
+ Arc :: new ( field. as_ref ( ) . clone ( ) . with_data_type ( new_type) )
242
+ }
243
+
238
244
/// Transform a schema to use view types for Utf8 and Binary
239
245
///
240
246
/// See [parquet::ParquetFormat::force_view_types] for details
@@ -243,14 +249,12 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
243
249
. fields
244
250
. iter ( )
245
251
. map ( |field| match field. data_type ( ) {
246
- DataType :: Utf8 | DataType :: LargeUtf8 => Arc :: new (
247
- Field :: new ( field. name ( ) , DataType :: Utf8View , field. is_nullable ( ) )
248
- . with_metadata ( field. metadata ( ) . to_owned ( ) ) ,
249
- ) ,
250
- DataType :: Binary | DataType :: LargeBinary => Arc :: new (
251
- Field :: new ( field. name ( ) , DataType :: BinaryView , field. is_nullable ( ) )
252
- . with_metadata ( field. metadata ( ) . to_owned ( ) ) ,
253
- ) ,
252
+ DataType :: Utf8 | DataType :: LargeUtf8 => {
253
+ field_with_new_type ( field, DataType :: Utf8View )
254
+ }
255
+ DataType :: Binary | DataType :: LargeBinary => {
256
+ field_with_new_type ( field, DataType :: BinaryView )
257
+ }
254
258
_ => field. clone ( ) ,
255
259
} )
256
260
. collect ( ) ;
@@ -276,6 +280,7 @@ pub(crate) fn coerce_file_schema_to_view_type(
276
280
( f. name ( ) , dt)
277
281
} )
278
282
. collect ( ) ;
283
+
279
284
if !transform {
280
285
return None ;
281
286
}
@@ -285,14 +290,13 @@ pub(crate) fn coerce_file_schema_to_view_type(
285
290
. iter ( )
286
291
. map (
287
292
|field| match ( table_fields. get ( field. name ( ) ) , field. data_type ( ) ) {
288
- ( Some ( DataType :: Utf8View ) , DataType :: Utf8 )
289
- | ( Some ( DataType :: Utf8View ) , DataType :: LargeUtf8 ) => Arc :: new (
290
- Field :: new ( field. name ( ) , DataType :: Utf8View , field. is_nullable ( ) ) ,
291
- ) ,
292
- ( Some ( DataType :: BinaryView ) , DataType :: Binary )
293
- | ( Some ( DataType :: BinaryView ) , DataType :: LargeBinary ) => Arc :: new (
294
- Field :: new ( field. name ( ) , DataType :: BinaryView , field. is_nullable ( ) ) ,
295
- ) ,
293
+ ( Some ( DataType :: Utf8View ) , DataType :: Utf8 | DataType :: LargeUtf8 ) => {
294
+ field_with_new_type ( field, DataType :: Utf8View )
295
+ }
296
+ (
297
+ Some ( DataType :: BinaryView ) ,
298
+ DataType :: Binary | DataType :: LargeBinary ,
299
+ ) => field_with_new_type ( field, DataType :: BinaryView ) ,
296
300
_ => field. clone ( ) ,
297
301
} ,
298
302
)
@@ -310,18 +314,9 @@ pub fn transform_binary_to_string(schema: &Schema) -> Schema {
310
314
. fields
311
315
. iter ( )
312
316
. map ( |field| match field. data_type ( ) {
313
- DataType :: Binary => Arc :: new (
314
- Field :: new ( field. name ( ) , DataType :: Utf8 , field. is_nullable ( ) )
315
- . with_metadata ( field. metadata ( ) . to_owned ( ) ) ,
316
- ) ,
317
- DataType :: LargeBinary => Arc :: new (
318
- Field :: new ( field. name ( ) , DataType :: LargeUtf8 , field. is_nullable ( ) )
319
- . with_metadata ( field. metadata ( ) . to_owned ( ) ) ,
320
- ) ,
321
- DataType :: BinaryView => Arc :: new (
322
- Field :: new ( field. name ( ) , DataType :: BinaryView , field. is_nullable ( ) )
323
- . with_metadata ( field. metadata ( ) . to_owned ( ) ) ,
324
- ) ,
317
+ DataType :: Binary => field_with_new_type ( field, DataType :: Utf8 ) ,
318
+ DataType :: LargeBinary => field_with_new_type ( field, DataType :: LargeUtf8 ) ,
319
+ DataType :: BinaryView => field_with_new_type ( field, DataType :: Utf8View ) ,
325
320
_ => field. clone ( ) ,
326
321
} )
327
322
. collect ( ) ;
@@ -347,35 +342,28 @@ pub(crate) fn coerce_file_schema_to_string_type(
347
342
. map (
348
343
|field| match ( table_fields. get ( field. name ( ) ) , field. data_type ( ) ) {
349
344
// table schema uses string type, coerce the file schema to use string type
350
- ( Some ( DataType :: Utf8 ) ,
351
- DataType :: Binary | DataType :: LargeBinary | DataType :: BinaryView ) => {
345
+ (
346
+ Some ( DataType :: Utf8 ) ,
347
+ DataType :: Binary | DataType :: LargeBinary | DataType :: BinaryView ,
348
+ ) => {
352
349
transform = true ;
353
- Arc :: new ( Field :: new (
354
- field. name ( ) ,
355
- DataType :: Utf8 ,
356
- field. is_nullable ( ) ,
357
- ) )
350
+ field_with_new_type ( field, DataType :: Utf8 )
358
351
}
359
352
// table schema uses large string type, coerce the file schema to use large string type
360
- ( Some ( DataType :: LargeUtf8 ) , DataType :: Binary | DataType :: LargeBinary | DataType :: BinaryView ) => {
353
+ (
354
+ Some ( DataType :: LargeUtf8 ) ,
355
+ DataType :: Binary | DataType :: LargeBinary | DataType :: BinaryView ,
356
+ ) => {
361
357
transform = true ;
362
- Arc :: new ( Field :: new (
363
- field. name ( ) ,
364
- DataType :: LargeUtf8 ,
365
- field. is_nullable ( ) ,
366
- ) )
358
+ field_with_new_type ( field, DataType :: LargeUtf8 )
367
359
}
368
360
// table schema uses string view type, coerce the file schema to use view type
369
361
(
370
362
Some ( DataType :: Utf8View ) ,
371
363
DataType :: Binary | DataType :: LargeBinary | DataType :: BinaryView ,
372
364
) => {
373
365
transform = true ;
374
- Arc :: new ( Field :: new (
375
- field. name ( ) ,
376
- DataType :: Utf8View ,
377
- field. is_nullable ( ) ,
378
- ) )
366
+ field_with_new_type ( field, DataType :: Utf8View )
379
367
}
380
368
_ => field. clone ( ) ,
381
369
} ,
0 commit comments