File tree 5 files changed +51
-6
lines changed
5 files changed +51
-6
lines changed Original file line number Diff line number Diff line change @@ -1069,7 +1069,7 @@ mod tests {
1069
1069
Column names are case sensitive. \
1070
1070
You can use double quotes to refer to the \" \" t1.c0\" \" column \
1071
1071
or set the datafusion.sql_parser.enable_ident_normalization configuration. \
1072
- Valid fields are t1.c0, t1.c1 .";
1072
+ Did you mean ' t1.c0'? .";
1073
1073
assert_eq ! ( err. strip_backtrace( ) , expected) ;
1074
1074
Ok ( ( ) )
1075
1075
}
Original file line number Diff line number Diff line change @@ -26,6 +26,7 @@ use std::io;
26
26
use std:: result;
27
27
use std:: sync:: Arc ;
28
28
29
+ use crate :: utils:: datafusion_strsim:: normalized_levenshtein;
29
30
use crate :: utils:: quote_identifier;
30
31
use crate :: { Column , DFSchema , Diagnostic , TableReference } ;
31
32
#[ cfg( feature = "avro" ) ]
@@ -176,6 +177,11 @@ impl Display for SchemaError {
176
177
. iter ( )
177
178
. map ( |column| column. flat_name ( ) . to_lowercase ( ) )
178
179
. collect :: < Vec < String > > ( ) ;
180
+
181
+ let valid_fields_names = valid_fields
182
+ . iter ( )
183
+ . map ( |column| column. flat_name ( ) )
184
+ . collect :: < Vec < String > > ( ) ;
179
185
if lower_valid_fields. contains ( & field. flat_name ( ) . to_lowercase ( ) ) {
180
186
write ! (
181
187
f,
@@ -184,7 +190,15 @@ impl Display for SchemaError {
184
190
field. quoted_flat_name( )
185
191
) ?;
186
192
}
187
- if !valid_fields. is_empty ( ) {
193
+ let field_name = field. name ( ) ;
194
+ if let Some ( matched) = valid_fields_names
195
+ . iter ( )
196
+ . filter ( |str| normalized_levenshtein ( str, field_name) > 0.5 )
197
+ . collect :: < Vec < & String > > ( )
198
+ . first ( )
199
+ {
200
+ write ! ( f, ". Did you mean '{matched}'?" ) ?;
201
+ } else if !valid_fields. is_empty ( ) {
188
202
write ! (
189
203
f,
190
204
". Valid fields are {}" ,
Original file line number Diff line number Diff line change @@ -736,6 +736,27 @@ pub mod datafusion_strsim {
736
736
pub fn levenshtein ( a : & str , b : & str ) -> usize {
737
737
generic_levenshtein ( & StringWrapper ( a) , & StringWrapper ( b) )
738
738
}
739
+
740
+ /// Calculates the normalized Levenshtein distance between two strings.
741
+ /// The normalized distance is a value between 0.0 and 1.0, where 1.0 indicates
742
+ /// that the strings are identical and 0.0 indicates no similarity.
743
+ ///
744
+ /// ```
745
+ /// use datafusion_common::utils::datafusion_strsim::normalized_levenshtein;
746
+ ///
747
+ /// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
748
+ ///
749
+ /// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
750
+ ///
751
+ /// assert!((normalized_levenshtein("kitten", "sitten") - 0.833).abs() < 0.001);
752
+ /// ```
753
+ pub fn normalized_levenshtein ( a : & str , b : & str ) -> f64 {
754
+ if a. is_empty ( ) && b. is_empty ( ) {
755
+ return 1.0 ;
756
+ }
757
+ 1.0 - ( levenshtein ( a, b) as f64 )
758
+ / ( a. chars ( ) . count ( ) . max ( b. chars ( ) . count ( ) ) as f64 )
759
+ }
739
760
}
740
761
741
762
/// Merges collections `first` and `second`, removes duplicates and sorts the
Original file line number Diff line number Diff line change @@ -161,3 +161,13 @@ create table records (timestamp timestamp, value float) as values (
161
161
'2021-01-01 00:00:00', 1.0,
162
162
'2021-01-01 00:00:00', 2.0
163
163
);
164
+
165
+
166
+ statement ok
167
+ create table a(timestamp int, birthday int);
168
+
169
+ query error DataFusion error: Schema error: No field named timetamp\. Did you mean 'a\.timestamp'\?\.
170
+ select timetamp from a;
171
+
172
+ query error DataFusion error: Schema error: No field named dadsada\. Valid fields are a\.timestamp, a\.birthday\.
173
+ select dadsada from a;
Original file line number Diff line number Diff line change @@ -90,16 +90,16 @@ drop table case_insensitive_test
90
90
statement ok
91
91
CREATE TABLE test("Column1" string) AS VALUES ('content1');
92
92
93
- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
93
+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
94
94
SELECT COLumn1 from test
95
95
96
- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
96
+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
97
97
SELECT Column1 from test
98
98
99
- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
99
+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
100
100
SELECT column1 from test
101
101
102
- statement error DataFusion error: Schema error: No field named column1. Valid fields are test\."Column1"\.
102
+ statement error DataFusion error: Schema error: No field named column1\ . Valid fields are test\."Column1"\.
103
103
SELECT "column1" from test
104
104
105
105
statement ok
You can’t perform that action at this time.
0 commit comments