1
1
#include < algorithm>
2
2
#include < array>
3
- #include < cstddef>
4
3
#include < cctype>
4
+ #include < cstddef>
5
5
#include < iterator>
6
6
#include < numeric>
7
7
#include < string_view>
@@ -25,22 +25,24 @@ static constexpr auto blacklist_words = std::to_array<std::string_view>(
25
25
static const auto blacklist_words_set = FrozenHashSet(blacklist_words);
26
26
}; // namespace
27
27
28
- static bool find_blacklisted_words (const std::string &content, const std::array<std::size_t , 4 > indices) noexcept {
28
+ static bool
29
+ find_blacklisted_words (const std::string &content,
30
+ const std::array<std::size_t , 4 > indices) noexcept {
29
31
30
32
for (std::size_t i = 1 ; i < 4 ; ++i) {
31
33
for (std::size_t j = 0 ; j < i; ++j) {
32
34
auto begin = indices[j];
33
35
auto end = indices[i] - begin;
34
36
35
37
if (end > content.size ())
36
- end = content.size () - begin - 1 ;
38
+ end = content.size () - begin - 1 ;
37
39
38
40
std::string target = content.substr (begin, end);
39
41
std::transform (target.begin (), target.end (), target.begin (),
40
42
[](unsigned char c) { return std::tolower (c); });
41
43
42
44
if (blacklist_words_set.contains (target))
43
- return true ;
45
+ return true ;
44
46
}
45
47
}
46
48
@@ -104,14 +106,25 @@ void CPRDetector::check_leap_year(const std::string &cpr,
104
106
reset (state);
105
107
}
106
108
109
+ std::string CPRDetector::format_cpr (std::string &cpr,
110
+ char separator = 0 ) const noexcept {
111
+ if (separator == 0 ) {
112
+ return cpr;
113
+ } else {
114
+ return std::string (cpr, 0 , 6 ) + separator + std::string (cpr, 6 , 4 );
115
+ }
116
+ }
117
+
107
118
void CPRDetector::check_and_append_cpr (std::string &cpr, MatchResults &results,
108
- size_t begin, size_t end) noexcept {
119
+ size_t begin, size_t end,
120
+ char separator = 0 ) noexcept {
109
121
// Convert the 4 control digits to an int.
110
122
int control = std::stoi (std::string (cpr, 6 , 4 ));
111
123
112
124
// We reject the control sequence '0000'.
113
125
if (control > 0 ) {
114
- MatchResult result (cpr, begin, end, CPRDetector::sensitivity);
126
+ MatchResult result (format_cpr (cpr, separator), begin, end,
127
+ CPRDetector::sensitivity);
115
128
116
129
if (check_mod11_ && !check_mod11 (result))
117
130
return ;
@@ -146,11 +159,11 @@ bool CPRDetector::examine_context(const std::string &content) noexcept {
146
159
indices[4 - spaces] = i;
147
160
--spaces;
148
161
if (spaces == 0 ) {
149
- if (find_blacklisted_words (content, indices))
150
- return true ;
151
-
152
- spaces = 3 ;
153
- indices[0 ] = indices[3 ] + 1 ;
162
+ if (find_blacklisted_words (content, indices))
163
+ return true ;
164
+
165
+ spaces = 3 ;
166
+ indices[0 ] = indices[3 ] + 1 ;
154
167
}
155
168
}
156
169
}
@@ -176,8 +189,9 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
176
189
CPRDetectorState state = CPRDetectorState::Empty;
177
190
std::string cpr (10 , 0 );
178
191
char previous = 0 ;
179
- std:: size_t count = 0 ;
192
+ char separator = 0 ;
180
193
std::size_t begin = 0 ;
194
+ std::size_t end = 0 ;
181
195
bool allow_separator, leap_year = false ;
182
196
Predicate is_acceptable = [](char ) { return false ; };
183
197
@@ -195,7 +209,8 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
195
209
196
210
if (state == CPRDetectorState::First) {
197
211
cpr[0 ] = *it;
198
- begin = count;
212
+ begin =
213
+ static_cast <std::size_t >(std::distance (std::begin (content), it));
199
214
}
200
215
201
216
break ;
@@ -209,13 +224,11 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
209
224
} else {
210
225
reset (state);
211
226
previous = *it;
212
- ++count;
213
227
continue ;
214
228
}
215
229
216
230
previous = cpr[1 ] =
217
231
update (*it, CPRDetectorState::Second, state, is_acceptable);
218
-
219
232
if (previous != 0 )
220
233
// Next time, we allow a space.
221
234
allow_separator = true ;
@@ -235,7 +248,6 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
235
248
} else {
236
249
reset (state);
237
250
previous = 0 ;
238
- ++count;
239
251
continue ;
240
252
}
241
253
@@ -274,8 +286,8 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
274
286
case CPRDetectorState::Sixth:
275
287
if (allow_separator && is_separator (*it)) {
276
288
// Skip one of the valid separator characters.
289
+ separator = *it;
277
290
allow_separator = false ;
278
- ++count;
279
291
continue ;
280
292
}
281
293
@@ -304,17 +316,16 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
304
316
cpr[9 ] = update (*it, CPRDetectorState::Match, state, is_acceptable);
305
317
306
318
auto ahead = it;
307
- if (is_previous_ok (*(++ahead)))
308
- check_and_append_cpr (cpr, results, begin, count);
309
-
319
+ if (is_previous_ok (*(++ahead))) {
320
+ end = static_cast <std::size_t >(std::distance (std::begin (content), it));
321
+ check_and_append_cpr (cpr, results, begin, end, separator);
322
+ }
310
323
previous = *it;
311
324
allow_separator = false ;
312
325
reset (state);
313
326
314
327
break ;
315
328
}
316
-
317
- ++count;
318
329
}
319
330
320
331
return results;
0 commit comments