Skip to content

Commit 527fbd1

Browse files
committed
feat: add hint for missing fields
1 parent 5239d1a commit 527fbd1

File tree

5 files changed

+51
-6
lines changed

5 files changed

+51
-6
lines changed

datafusion/common/src/dfschema.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,7 @@ mod tests {
10691069
Column names are case sensitive. \
10701070
You can use double quotes to refer to the \"\"t1.c0\"\" column \
10711071
or set the datafusion.sql_parser.enable_ident_normalization configuration. \
1072-
Valid fields are t1.c0, t1.c1.";
1072+
Did you mean 't1.c0'?.";
10731073
assert_eq!(err.strip_backtrace(), expected);
10741074
Ok(())
10751075
}

datafusion/common/src/error.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use std::io;
2626
use std::result;
2727
use std::sync::Arc;
2828

29+
use crate::utils::datafusion_strsim::normalized_levenshtein;
2930
use crate::utils::quote_identifier;
3031
use crate::{Column, DFSchema, Diagnostic, TableReference};
3132
#[cfg(feature = "avro")]
@@ -176,6 +177,11 @@ impl Display for SchemaError {
176177
.iter()
177178
.map(|column| column.flat_name().to_lowercase())
178179
.collect::<Vec<String>>();
180+
181+
let valid_fields_names = valid_fields
182+
.iter()
183+
.map(|column| column.flat_name())
184+
.collect::<Vec<String>>();
179185
if lower_valid_fields.contains(&field.flat_name().to_lowercase()) {
180186
write!(
181187
f,
@@ -184,7 +190,15 @@ impl Display for SchemaError {
184190
field.quoted_flat_name()
185191
)?;
186192
}
187-
if !valid_fields.is_empty() {
193+
let field_name = field.name();
194+
if let Some(matched) = valid_fields_names
195+
.iter()
196+
.filter(|str| normalized_levenshtein(str, field_name) > 0.5)
197+
.collect::<Vec<&String>>()
198+
.first()
199+
{
200+
write!(f, ". Did you mean '{matched}'?")?;
201+
} else if !valid_fields.is_empty() {
188202
write!(
189203
f,
190204
". Valid fields are {}",

datafusion/common/src/utils/mod.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,27 @@ pub mod datafusion_strsim {
736736
pub fn levenshtein(a: &str, b: &str) -> usize {
737737
generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
738738
}
739+
740+
/// Calculates the normalized Levenshtein distance between two strings.
741+
/// The normalized distance is a value between 0.0 and 1.0, where 1.0 indicates
742+
/// that the strings are identical and 0.0 indicates no similarity.
743+
///
744+
/// ```
745+
/// use datafusion_common::utils::datafusion_strsim::normalized_levenshtein;
746+
///
747+
/// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
748+
///
749+
/// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
750+
///
751+
/// assert!((normalized_levenshtein("kitten", "sitten") - 0.833).abs() < 0.001);
752+
/// ```
753+
pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
754+
if a.is_empty() && b.is_empty() {
755+
return 1.0;
756+
}
757+
1.0 - (levenshtein(a, b) as f64)
758+
/ (a.chars().count().max(b.chars().count()) as f64)
759+
}
739760
}
740761

741762
/// Merges collections `first` and `second`, removes duplicates and sorts the

datafusion/sqllogictest/test_files/errors.slt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,13 @@ create table records (timestamp timestamp, value float) as values (
161161
'2021-01-01 00:00:00', 1.0,
162162
'2021-01-01 00:00:00', 2.0
163163
);
164+
165+
166+
statement ok
167+
create table a(timestamp int, birthday int);
168+
169+
query error DataFusion error: Schema error: No field named timetamp\. Did you mean 'a\.timestamp'\?\.
170+
select timetamp from a;
171+
172+
query error DataFusion error: Schema error: No field named dadsada\. Valid fields are a\.timestamp, a\.birthday\.
173+
select dadsada from a;

datafusion/sqllogictest/test_files/identifiers.slt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,16 +90,16 @@ drop table case_insensitive_test
9090
statement ok
9191
CREATE TABLE test("Column1" string) AS VALUES ('content1');
9292

93-
statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
93+
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
9494
SELECT COLumn1 from test
9595

96-
statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
96+
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
9797
SELECT Column1 from test
9898

99-
statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
99+
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
100100
SELECT column1 from test
101101

102-
statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
102+
statement error DataFusion error: Schema error: No field named column1\. Valid fields are test\."Column1"\.
103103
SELECT "column1" from test
104104

105105
statement ok

0 commit comments

Comments
 (0)