Skip to content

Commit 97d958c

Browse files
committed
Fix #187
Add regression test. Light refactoring in the DFA.
1 parent df76fad commit 97d958c

File tree

2 files changed

+39
-13
lines changed

2 files changed

+39
-13
lines changed

src/dfa.rs

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,10 @@ pub struct Dfa<'a, 'b, 'c: 'b, 'm: 'b> {
172172
search: &'b mut Search<'c, 'm>,
173173
/// The current position in the input.
174174
at: usize,
175+
/// The last state that matched.
176+
///
177+
/// When no match has occurred, this is set to STATE_UNKNOWN.
178+
last_match_si: StatePtr,
175179
/// The input position of the last cache flush. We use this to determine
176180
/// if we're thrashing in the cache too often. If so, the DFA quits so
177181
/// that we can fall back to the NFA algorithm.
@@ -352,6 +356,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
352356
start: 0, // filled in below
353357
search: search,
354358
at: at,
359+
last_match_si: STATE_UNKNOWN,
355360
last_cache_flush: at,
356361
compiled: &mut cache.compiled,
357362
states: &mut cache.states,
@@ -370,8 +375,18 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
370375
} else {
371376
dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text)
372377
};
373-
if result.is_match() && !dfa.search.find_many_matches() {
374-
dfa.search.set_match(0);
378+
if result.is_match() {
379+
if dfa.search.find_one_match() {
380+
dfa.search.set_match(0);
381+
} else {
382+
debug_assert!(dfa.last_match_si != STATE_UNKNOWN);
383+
debug_assert!(dfa.last_match_si != STATE_DEAD);
384+
for &ip in &dfa.states[dfa.last_match_si as usize].insts {
385+
if let Inst::Match(slot) = dfa.prog[ip as usize] {
386+
dfa.search.set_match(slot);
387+
}
388+
}
389+
}
375390
}
376391
result
377392
}
@@ -457,6 +472,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
457472
}
458473
si = next_si;
459474
if self.states[si as usize].is_match {
475+
self.last_match_si = si;
460476
if self.search.quit_after_first_match() {
461477
return DfaResult::Match;
462478
}
@@ -475,19 +491,13 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
475491
return result;
476492
}
477493
if self.states[si as usize].is_match {
494+
self.last_match_si = si;
478495
if self.search.quit_after_first_match() {
479496
return DfaResult::Match;
480497
}
481498
result = DfaResult::Match;
482499
self.search.set_end(Some(text.len()));
483500
}
484-
if result.is_match() && !self.search.find_one_match() {
485-
for &ip in &self.states[si as usize].insts {
486-
if let Inst::Match(slot) = self.prog[ip as usize] {
487-
self.search.set_match(slot);
488-
}
489-
}
490-
}
491501
result
492502
}
493503

@@ -529,6 +539,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
529539
}
530540
si = next_si;
531541
if self.states[si as usize].is_match {
542+
self.last_match_si = si;
532543
if self.search.quit_after_first_match() {
533544
return DfaResult::NoMatch;
534545
}
@@ -546,6 +557,7 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
546557
return result;
547558
}
548559
if self.states[si as usize].is_match {
560+
self.last_match_si = si;
549561
if self.search.quit_after_first_match() {
550562
return DfaResult::Match;
551563
}
@@ -632,12 +644,17 @@ impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
632644
is_match = true;
633645
if !self.continue_past_first_match() {
634646
break;
635-
} else if !self.search.find_one_match() {
647+
} else if !self.search.find_one_match()
648+
&& !qnext.contains_ip(ip as usize) {
636649
// If we are continuing on to find other matches,
637650
// then keep a record of the match states we've seen.
638-
if !qnext.contains_ip(ip as usize) {
639-
qnext.add(ip);
640-
}
651+
qnext.add(ip);
652+
// BREADCRUMBS:
653+
// Perhaps we need another sparse set here and track
654+
// these "recorded" matches separately. They should
655+
// still make their way into cached states, but perhaps
656+
// they shouldn't prevent a DEAD state from
657+
// occurring.
641658
}
642659
}
643660
Bytes(ref inst) => {

tests/set.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,12 @@ matset!(set14, &[r".*", "a"], "zzzzzz", 0);
1515

1616
nomatset!(nset1, &["a", "a"], "b");
1717
nomatset!(nset2, &["^foo", "bar$"], "bar foo");
18+
19+
// See: https://github.com/rust-lang-nursery/regex/issues/187
20+
#[test]
21+
fn regression_subsequent_matches() {
22+
let set = regex_set!(&["ab", "b"]);
23+
let text = text!("ba");
24+
assert!(set.matches(text).matched(1));
25+
assert!(set.matches(text).matched(1));
26+
}

0 commit comments

Comments
 (0)