Skip to content

Commit a20ac87

Browse files
committed
DRI refactor
1 parent 97b686a commit a20ac87

File tree

1 file changed

+35
-47
lines changed
  • datafusion/core/src/datasource/file_format

1 file changed

+35
-47
lines changed

datafusion/core/src/datasource/file_format/mod.rs

Lines changed: 35 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use crate::error::Result;
4242
use crate::execution::context::SessionState;
4343
use crate::physical_plan::{ExecutionPlan, Statistics};
4444

45-
use arrow_schema::{DataType, Field, Schema};
45+
use arrow_schema::{DataType, Field, FieldRef, Schema};
4646
use datafusion_common::file_options::file_type::FileType;
4747
use datafusion_common::{internal_err, not_impl_err, GetExt};
4848
use datafusion_expr::Expr;
@@ -235,6 +235,12 @@ pub fn file_type_to_format(
235235
}
236236
}
237237

238+
/// Create a new field with the specified data type, copying the other
239+
/// properties from the input field
240+
fn field_with_new_type(field: &FieldRef, new_type: DataType) -> FieldRef {
241+
Arc::new(field.as_ref().clone().with_data_type(new_type))
242+
}
243+
238244
/// Transform a schema to use view types for Utf8 and Binary
239245
///
240246
/// See [parquet::ParquetFormat::force_view_types] for details
@@ -243,14 +249,12 @@ pub fn transform_schema_to_view(schema: &Schema) -> Schema {
243249
.fields
244250
.iter()
245251
.map(|field| match field.data_type() {
246-
DataType::Utf8 | DataType::LargeUtf8 => Arc::new(
247-
Field::new(field.name(), DataType::Utf8View, field.is_nullable())
248-
.with_metadata(field.metadata().to_owned()),
249-
),
250-
DataType::Binary | DataType::LargeBinary => Arc::new(
251-
Field::new(field.name(), DataType::BinaryView, field.is_nullable())
252-
.with_metadata(field.metadata().to_owned()),
253-
),
252+
DataType::Utf8 | DataType::LargeUtf8 => {
253+
field_with_new_type(field, DataType::Utf8View)
254+
}
255+
DataType::Binary | DataType::LargeBinary => {
256+
field_with_new_type(field, DataType::BinaryView)
257+
}
254258
_ => field.clone(),
255259
})
256260
.collect();
@@ -276,6 +280,7 @@ pub(crate) fn coerce_file_schema_to_view_type(
276280
(f.name(), dt)
277281
})
278282
.collect();
283+
279284
if !transform {
280285
return None;
281286
}
@@ -285,14 +290,13 @@ pub(crate) fn coerce_file_schema_to_view_type(
285290
.iter()
286291
.map(
287292
|field| match (table_fields.get(field.name()), field.data_type()) {
288-
(Some(DataType::Utf8View), DataType::Utf8)
289-
| (Some(DataType::Utf8View), DataType::LargeUtf8) => Arc::new(
290-
Field::new(field.name(), DataType::Utf8View, field.is_nullable()),
291-
),
292-
(Some(DataType::BinaryView), DataType::Binary)
293-
| (Some(DataType::BinaryView), DataType::LargeBinary) => Arc::new(
294-
Field::new(field.name(), DataType::BinaryView, field.is_nullable()),
295-
),
293+
(Some(DataType::Utf8View), DataType::Utf8 | DataType::LargeUtf8) => {
294+
field_with_new_type(field, DataType::Utf8View)
295+
}
296+
(
297+
Some(DataType::BinaryView),
298+
DataType::Binary | DataType::LargeBinary,
299+
) => field_with_new_type(field, DataType::BinaryView),
296300
_ => field.clone(),
297301
},
298302
)
@@ -310,18 +314,9 @@ pub fn transform_binary_to_string(schema: &Schema) -> Schema {
310314
.fields
311315
.iter()
312316
.map(|field| match field.data_type() {
313-
DataType::Binary => Arc::new(
314-
Field::new(field.name(), DataType::Utf8, field.is_nullable())
315-
.with_metadata(field.metadata().to_owned()),
316-
),
317-
DataType::LargeBinary => Arc::new(
318-
Field::new(field.name(), DataType::LargeUtf8, field.is_nullable())
319-
.with_metadata(field.metadata().to_owned()),
320-
),
321-
DataType::BinaryView => Arc::new(
322-
Field::new(field.name(), DataType::BinaryView, field.is_nullable())
323-
.with_metadata(field.metadata().to_owned()),
324-
),
317+
DataType::Binary => field_with_new_type(field, DataType::Utf8),
318+
DataType::LargeBinary => field_with_new_type(field, DataType::LargeUtf8),
319+
DataType::BinaryView => field_with_new_type(field, DataType::Utf8View),
325320
_ => field.clone(),
326321
})
327322
.collect();
@@ -347,35 +342,28 @@ pub(crate) fn coerce_file_schema_to_string_type(
347342
.map(
348343
|field| match (table_fields.get(field.name()), field.data_type()) {
349344
// table schema uses string type, coerce the file schema to use string type
350-
(Some(DataType::Utf8),
351-
DataType::Binary | DataType::LargeBinary | DataType::BinaryView) => {
345+
(
346+
Some(DataType::Utf8),
347+
DataType::Binary | DataType::LargeBinary | DataType::BinaryView,
348+
) => {
352349
transform = true;
353-
Arc::new(Field::new(
354-
field.name(),
355-
DataType::Utf8,
356-
field.is_nullable(),
357-
))
350+
field_with_new_type(field, DataType::Utf8)
358351
}
359352
// table schema uses large string type, coerce the file schema to use large string type
360-
(Some(DataType::LargeUtf8), DataType::Binary | DataType::LargeBinary | DataType::BinaryView) => {
353+
(
354+
Some(DataType::LargeUtf8),
355+
DataType::Binary | DataType::LargeBinary | DataType::BinaryView,
356+
) => {
361357
transform = true;
362-
Arc::new(Field::new(
363-
field.name(),
364-
DataType::LargeUtf8,
365-
field.is_nullable(),
366-
))
358+
field_with_new_type(field, DataType::LargeUtf8)
367359
}
368360
// table schema uses string view type, coerce the file schema to use view type
369361
(
370362
Some(DataType::Utf8View),
371363
DataType::Binary | DataType::LargeBinary | DataType::BinaryView,
372364
) => {
373365
transform = true;
374-
Arc::new(Field::new(
375-
field.name(),
376-
DataType::Utf8View,
377-
field.is_nullable(),
378-
))
366+
field_with_new_type(field, DataType::Utf8View)
379367
}
380368
_ => field.clone(),
381369
},

0 commit comments

Comments
 (0)