@@ -172,6 +172,10 @@ pub struct Dfa<'a, 'b, 'c: 'b, 'm: 'b> {
172
172
search : & ' b mut Search < ' c , ' m > ,
173
173
/// The current position in the input.
174
174
at : usize ,
175
+ /// The last state that matched.
176
+ ///
177
+ /// When no match has occurred, this is set to STATE_UNKNOWN.
178
+ last_match_si : StatePtr ,
175
179
/// The input position of the last cache flush. We use this to determine
176
180
/// if we're thrashing in the cache too often. If so, the DFA quits so
177
181
/// that we can fall back to the NFA algorithm.
@@ -352,6 +356,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
352
356
start : 0 , // filled in below
353
357
search : search,
354
358
at : at,
359
+ last_match_si : STATE_UNKNOWN ,
355
360
last_cache_flush : at,
356
361
compiled : & mut cache. compiled ,
357
362
states : & mut cache. states ,
@@ -370,8 +375,18 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
370
375
} else {
371
376
dfa. exec_at ( & mut cache. qcur , & mut cache. qnext , text)
372
377
} ;
373
- if result. is_match ( ) && !dfa. search . find_many_matches ( ) {
374
- dfa. search . set_match ( 0 ) ;
378
+ if result. is_match ( ) {
379
+ if dfa. search . find_one_match ( ) {
380
+ dfa. search . set_match ( 0 ) ;
381
+ } else {
382
+ debug_assert ! ( dfa. last_match_si != STATE_UNKNOWN ) ;
383
+ debug_assert ! ( dfa. last_match_si != STATE_DEAD ) ;
384
+ for & ip in & dfa. states [ dfa. last_match_si as usize ] . insts {
385
+ if let Inst :: Match ( slot) = dfa. prog [ ip as usize ] {
386
+ dfa. search . set_match ( slot) ;
387
+ }
388
+ }
389
+ }
375
390
}
376
391
result
377
392
}
@@ -457,6 +472,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
457
472
}
458
473
si = next_si;
459
474
if self . states [ si as usize ] . is_match {
475
+ self . last_match_si = si;
460
476
if self . search . quit_after_first_match ( ) {
461
477
return DfaResult :: Match ;
462
478
}
@@ -475,19 +491,13 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
475
491
return result;
476
492
}
477
493
if self . states [ si as usize ] . is_match {
494
+ self . last_match_si = si;
478
495
if self . search . quit_after_first_match ( ) {
479
496
return DfaResult :: Match ;
480
497
}
481
498
result = DfaResult :: Match ;
482
499
self . search . set_end ( Some ( text. len ( ) ) ) ;
483
500
}
484
- if result. is_match ( ) && !self . search . find_one_match ( ) {
485
- for & ip in & self . states [ si as usize ] . insts {
486
- if let Inst :: Match ( slot) = self . prog [ ip as usize ] {
487
- self . search . set_match ( slot) ;
488
- }
489
- }
490
- }
491
501
result
492
502
}
493
503
@@ -529,6 +539,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
529
539
}
530
540
si = next_si;
531
541
if self . states [ si as usize ] . is_match {
542
+ self . last_match_si = si;
532
543
if self . search . quit_after_first_match ( ) {
533
544
return DfaResult :: NoMatch ;
534
545
}
@@ -546,6 +557,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
546
557
return result;
547
558
}
548
559
if self . states [ si as usize ] . is_match {
560
+ self . last_match_si = si;
549
561
if self . search . quit_after_first_match ( ) {
550
562
return DfaResult :: Match ;
551
563
}
@@ -632,12 +644,17 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
632
644
is_match = true ;
633
645
if !self . continue_past_first_match ( ) {
634
646
break ;
635
- } else if !self . search . find_one_match ( ) {
647
+ } else if !self . search . find_one_match ( )
648
+ && !qnext. contains_ip ( ip as usize ) {
636
649
// If we are continuing on to find other matches,
637
650
// then keep a record of the match states we've seen.
638
- if !qnext. contains_ip ( ip as usize ) {
639
- qnext. add ( ip) ;
640
- }
651
+ qnext. add ( ip) ;
652
+ // BREADCRUMBS:
653
+ // Perhaps we need another sparse set here and track
654
+ // these "recorded" matches separately. They should
655
+ // still make their way into cached states, but perhaps
656
+ // they shouldn't prevent a DEAD state from
657
+ // occurring.
641
658
}
642
659
}
643
660
Bytes ( ref inst) => {
0 commit comments