@@ -30,22 +30,38 @@ pub enum ExtractSubstrssError {
30
30
SubstringNotFound ( Regex , String ) ,
31
31
#[ error( transparent) ]
32
32
RegexError ( #[ from] fancy_regex:: Error ) ,
33
+ #[ error( "Invalid regex in parts, index {part_index}: '{regex_def}' - {error}" ) ]
34
+ InvalidRegexPart {
35
+ part_index : usize ,
36
+ regex_def : String ,
37
+ error : fancy_regex:: Error ,
38
+ } ,
33
39
}
34
40
35
41
pub fn extract_substr_idxes (
36
42
input_str : & str ,
37
43
regex_config : & DecomposedRegexConfig ,
38
44
reveal_private : bool ,
39
45
) -> Result < Vec < ( usize , usize ) > , ExtractSubstrssError > {
46
+ // Validate each regex part individually, to throw better errors
47
+ for ( i, part) in regex_config. parts . iter ( ) . enumerate ( ) {
48
+ Regex :: new ( & part. regex_def ) . map_err ( |e| ExtractSubstrssError :: InvalidRegexPart {
49
+ part_index : i,
50
+ regex_def : part. regex_def . clone ( ) ,
51
+ error : e,
52
+ } ) ?;
53
+ }
54
+
40
55
// Construct the full regex pattern with groups for each part
41
56
let mut entire_regex_str = String :: new ( ) ;
42
57
for ( _, part) in regex_config. parts . iter ( ) . enumerate ( ) {
43
58
let adjusted_regex_def = part. regex_def . replace ( "(" , "(?:" ) ;
44
- entire_regex_str += & format ! ( "({})" , adjusted_regex_def) ; // Wrap each part in a group
59
+ entire_regex_str += & format ! ( "({})" , adjusted_regex_def) ;
45
60
}
46
61
47
62
// Compile the entire regex
48
- let entire_regex = Regex :: new ( & entire_regex_str) ?;
63
+ // This should be impossible to fail, since we tested the seperate regex parts before.
64
+ let entire_regex = Regex :: new ( & entire_regex_str) . unwrap ( ) ;
49
65
50
66
// Find the match for the entire regex
51
67
let entire_captures = entire_regex
@@ -267,6 +283,34 @@ mod test {
267
283
assert_eq ! ( idxes, vec![ ( 21 , 27 ) ] ) ;
268
284
}
269
285
286
+ #[ test]
287
+ fn test_error_handling ( ) {
288
+ let code_regex = DecomposedRegexConfig {
289
+ // max_byte_size: 1024,
290
+ parts : vec ! [
291
+ RegexPartConfig {
292
+ is_public: false ,
293
+ regex_def: "Hello " . to_string( ) ,
294
+ } ,
295
+ RegexPartConfig {
296
+ is_public: true ,
297
+ regex_def: "[^,+" . to_string( ) ,
298
+ } ,
299
+ RegexPartConfig {
300
+ is_public: false ,
301
+ regex_def: "!" . to_string( ) ,
302
+ } ,
303
+ ] ,
304
+ } ;
305
+ let input_str = "Hello Mamba!" ;
306
+ let result = extract_substr_idxes ( input_str, & code_regex, false ) ;
307
+ assert ! ( result. is_err( ) ) ;
308
+ assert_eq ! (
309
+ "Invalid regex in parts, index 1: '[^,+' - Parsing error at position 4: Invalid character class" ,
310
+ result. unwrap_err( ) . to_string( )
311
+ ) ;
312
+ }
313
+
270
314
#[ test]
271
315
fn test_body_hash_valid ( ) {
272
316
let input_str = "dkim-signature:v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1694989812; x=1695594612; dara=google.com; h=to:subject:message-id:date:from:mime-version:from:to:cc:subject :date:message-id:reply-to; bh=BWETwQ9JDReS4GyR2v2TTR8Bpzj9ayumsWQJ3q7vehs=; b=" ;
0 commit comments