@@ -28,6 +28,13 @@ extern "C" {
28
28
*/
29
29
typedef struct rure rure ;
30
30
31
+ /*
32
+ * rure_set is the type of a set of compiled regular expressions.
33
+ *
34
+ * A rure can be safely used from multiple threads simultaneously.
35
+ */
36
+ typedef struct rure_set rure_set ;
37
+
31
38
/*
32
39
* rure_options is the set of non-flag configuration options for compiling
33
40
* a regular expression. Currently, only two options are available: setting
@@ -165,7 +172,7 @@ rure *rure_compile(const uint8_t *pattern, size_t length,
165
172
/*
166
173
* rure_free frees the given compiled regular expression.
167
174
*
168
- * This must be called at most once.
175
+ * This must be called at most once for any rure .
169
176
*/
170
177
void rure_free (rure * re );
171
178
@@ -446,6 +453,90 @@ void rure_options_size_limit(rure_options *options, size_t limit);
446
453
*/
447
454
void rure_options_dfa_size_limit (rure_options * options , size_t limit );
448
455
456
+ /*
457
+ * rure_compile_set compiles the given list of patterns into a single regular
458
+ * expression which can be matched in a linear-scan. Each pattern in patterns
459
+ * must be valid UTF-8 and the length of each pattern in patterns corresponds
460
+ * to a byte length in patterns_lengths.
461
+ *
462
+ * The number of patterns to compile is specified by patterns_count. patterns
463
+ * must contain at least this many entries.
464
+ *
465
+ * flags is a bitfield. Valid values are constants declared with prefix
466
+ * RURE_FLAG_.
467
+ *
468
+ * options contains non-flag configuration settings. If it's NULL, default
469
+ * settings are used. options may be freed immediately after a call to
470
+ * rure_compile.
471
+ *
472
+ * error is set if there was a problem compiling the pattern.
473
+ *
474
+ * The compiled expression set returned may be used from multiple threads.
475
+ */
476
+ rure_set * rure_compile_set (const uint8_t * * patterns ,
477
+ const size_t * patterns_lengths ,
478
+ size_t patterns_count ,
479
+ uint32_t flags ,
480
+ rure_options * options ,
481
+ rure_error * error );
482
+
483
+ /*
484
+ * rure_set_free frees the given compiled regular expression set.
485
+ *
486
+ * This must be called at most once for any rure_set.
487
+ */
488
+ void rure_set_free (rure_set * re );
489
+
490
+ /*
491
+ * rure_is_match returns true if and only if any regexes within the set
492
+ * match anywhere in the haystack. Once a match has been located, the
493
+ * matching engine will quit immediately.
494
+ *
495
+ * haystack may contain arbitrary bytes, but ASCII compatible text is more
496
+ * useful. UTF-8 is even more useful. Other text encodings aren't supported.
497
+ * length should be the number of bytes in haystack.
498
+ *
499
+ * start is the position at which to start searching. Note that setting the
500
+ * start position is distinct from incrementing the pointer, since the regex
501
+ * engine may look at bytes before the start position to determine match
502
+ * information. For example, if the start position is greater than 0, then the
503
+ * \A ("begin text") anchor can never match.
504
+ */
505
+ bool rure_set_is_match (rure_set * re , const uint8_t * haystack , size_t length ,
506
+ size_t start );
507
+
508
+ /*
509
+ * rure_set_matches compares each regex in the set against the haystack and
510
+ * modifies matches with the match result of each pattern. Match results are
511
+ * ordered in the same way as the rure_set was compiled. For example,
512
+ * index 0 of matches corresponds to the first pattern passed to
513
+ * `rure_compile_set`.
514
+ *
515
+ * haystack may contain arbitrary bytes, but ASCII compatible text is more
516
+ * useful. UTF-8 is even more useful. Other text encodings aren't supported.
517
+ * length should be the number of bytes in haystack.
518
+ *
519
+ * start is the position at which to start searching. Note that setting the
520
+ * start position is distinct from incrementing the pointer, since the regex
521
+ * engine may look at bytes before the start position to determine match
522
+ * information. For example, if the start position is greater than 0, then the
523
+ * \A ("begin text") anchor can never match.
524
+ *
525
+ * matches must be greater than or equal to the number of patterns the
526
+ * rure_set was compiled with.
527
+ *
528
+ * Only use this function if you specifically need to know which regexes
529
+ * matched within the set. To determine if any of the regexes matched without
530
+ * caring which, use rure_set_is_match.
531
+ */
532
+ bool rure_set_matches (rure_set * re , const uint8_t * haystack , size_t length ,
533
+ size_t start , bool * matches );
534
+
535
+ /*
536
+ * rure_set_len returns the number of patterns rure_set was compiled with.
537
+ */
538
+ size_t rure_set_len (rure_set * re );
539
+
449
540
/*
450
541
* rure_error_new allocates space for an error.
451
542
*
0 commit comments