Skip to content

Commit cc6c3db

Browse files
committed
Consolidate remaining parquet config options into ConfigOptions
1 parent 900e15f commit cc6c3db

File tree

12 files changed

+175
-105
lines changed

12 files changed

+175
-105
lines changed

datafusion-examples/examples/flight_server.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,12 @@ impl FlightService for FlightServiceImpl {
6767
) -> Result<Response<SchemaResult>, Status> {
6868
let request = request.into_inner();
6969

70-
let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
70+
let ctx = SessionContext::new();
71+
let format = Arc::new(ParquetFormat::new(ctx.config_options()));
72+
let listing_options = ListingOptions::new(format);
7173
let table_path =
7274
ListingTableUrl::parse(&request.path[0]).map_err(to_tonic_err)?;
7375

74-
let ctx = SessionContext::new();
7576
let schema = listing_options
7677
.infer_schema(&ctx.state(), &table_path)
7778
.await

datafusion-examples/examples/parquet_sql_multiple_files.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ async fn main() -> Result<()> {
3232
let testdata = datafusion::test_util::parquet_test_data();
3333

3434
// Configure listing options
35-
let file_format = ParquetFormat::default().with_enable_pruning(true);
35+
let file_format = ParquetFormat::new(ctx.config_options());
3636
let listing_options = ListingOptions {
3737
file_extension: FileType::PARQUET.get_ext(),
3838
format: Arc::new(file_format),

datafusion/core/src/config.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ pub const OPT_PARQUET_REORDER_FILTERS: &str =
6060
pub const OPT_PARQUET_ENABLE_PAGE_INDEX: &str =
6161
"datafusion.execution.parquet.enable_page_index";
6262

63+
/// Configuration option "datafusion.execution.parquet.pruning"
64+
pub const OPT_PARQUET_ENABLE_PRUNING: &str = "datafusion.execution.parquet.pruning";
65+
66+
/// Configuration option "datafusion.execution.parquet.skip_metadata"
67+
pub const OPT_PARQUET_SKIP_METADATA: &str = "datafusion.execution.parquet.skip_metadata";
68+
69+
/// Configuration option "datafusion.execution.parquet.metadata_size_hint"
70+
pub const OPT_PARQUET_METADATA_SIZE_HINT: &str =
71+
"datafusion.execution.parquet.metadata_size_hint";
72+
6373
/// Configuration option "datafusion.optimizer.skip_failed_rules"
6474
pub const OPT_OPTIMIZER_SKIP_FAILED_RULES: &str =
6575
"datafusion.optimizer.skip_failed_rules";
@@ -234,6 +244,28 @@ impl BuiltInConfigs {
234244
to reduce the number of rows decoded.",
235245
false,
236246
),
247+
ConfigDefinition::new_bool(
248+
OPT_PARQUET_ENABLE_PRUNING,
249+
"If true, the parquet reader attempts to skip entire row groups based \
250+
on the predicate in the query.",
251+
true,
252+
),
253+
ConfigDefinition::new_bool(
254+
OPT_PARQUET_SKIP_METADATA,
255+
"If true, the parquet reader skip the optional embedded metadata that may be in \
256+
the file Schema. This setting can help avoid schema conflicts when querying \
257+
multiple parquet files with schemas containing compatible types but different metadata.",
258+
true,
259+
),
260+
ConfigDefinition::new(
261+
OPT_PARQUET_METADATA_SIZE_HINT,
262+
"If specified, the parquet reader will try and fetch the last `size_hint` \
263+
bytes of the parquet file optimistically. If not specified, two read are required: \
264+
One read to fetch the 8-byte parquet footer and \
265+
another to fetch the metadata length encoded in the footer.",
266+
DataType::Boolean,
267+
ScalarValue::Boolean(None),
268+
),
237269
ConfigDefinition::new_bool(
238270
OPT_OPTIMIZER_SKIP_FAILED_RULES,
239271
"When set to true, the logical plan optimizer will produce warning \

0 commit comments

Comments
 (0)