Skip to content
This repository was archived by the owner on Oct 2, 2023. It is now read-only.

Commit 881acc7

Browse files
author
Tomas Hagenau Andersen
committed
Version 0.0.9 - Delete obsolete reference, use std::distance and handle separators.
1 parent 3dd6a44 commit 881acc7

14 files changed

+43
-31190
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required(VERSION 3.20)
22

3-
project(OS2DSRules VERSION 0.0.8)
3+
project(OS2DSRules VERSION 0.0.9)
44

55
# Detect available compiler on the host system.
66
set(gcc_like_cxx "$<COMPILE_LANG_AND_ID:CXX,ARMClang,AppleClang,Clang,GNU,LCC>")

include/cpr-detector.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ constexpr bool is_digit(char c) noexcept { return '0' <= c && c <= '9'; }
2121

2222
const auto is_separator = make_predicate(' ', '-', '/', '\t');
2323

24-
const auto is_previous_ok = make_predicate(char(0), ' ', '.', ',', '\n', '\t', '\0');
24+
const auto is_previous_ok =
25+
make_predicate(char(0), ' ', '.', ',', '\n', '\t', '\0');
2526

2627
constexpr bool is_space(const char c) noexcept { return c == ' '; }
2728

@@ -49,10 +50,11 @@ class CPRDetector {
4950
char update(char, CPRDetectorState, CPRDetectorState &, Predicate) noexcept;
5051
bool check_day_month(const std::string &, CPRDetectorState &) noexcept;
5152
void check_leap_year(const std::string &, CPRDetectorState &) noexcept;
52-
void check_and_append_cpr(std::string &, MatchResults &, size_t,
53-
size_t) noexcept;
53+
void check_and_append_cpr(std::string &, MatchResults &, size_t, size_t,
54+
char) noexcept;
5455
bool check_mod11(const MatchResult &) noexcept;
5556
bool examine_context(const std::string &) noexcept;
57+
[[nodiscard]] std::string format_cpr(std::string &, char) const noexcept;
5658

5759
public:
5860
constexpr CPRDetector(bool check_mod11 = false,

lib/cpr-detector.cpp

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <algorithm>
22
#include <array>
3-
#include <cstddef>
43
#include <cctype>
4+
#include <cstddef>
55
#include <iterator>
66
#include <numeric>
77
#include <string_view>
@@ -25,22 +25,24 @@ static constexpr auto blacklist_words = std::to_array<std::string_view>(
2525
static const auto blacklist_words_set = FrozenHashSet(blacklist_words);
2626
}; // namespace
2727

28-
static bool find_blacklisted_words(const std::string &content, const std::array<std::size_t, 4> indices) noexcept {
28+
static bool
29+
find_blacklisted_words(const std::string &content,
30+
const std::array<std::size_t, 4> indices) noexcept {
2931

3032
for (std::size_t i = 1; i < 4; ++i) {
3133
for (std::size_t j = 0; j < i; ++j) {
3234
auto begin = indices[j];
3335
auto end = indices[i] - begin;
3436

3537
if (end > content.size())
36-
end = content.size() - begin - 1;
38+
end = content.size() - begin - 1;
3739

3840
std::string target = content.substr(begin, end);
3941
std::transform(target.begin(), target.end(), target.begin(),
4042
[](unsigned char c) { return std::tolower(c); });
4143

4244
if (blacklist_words_set.contains(target))
43-
return true;
45+
return true;
4446
}
4547
}
4648

@@ -104,14 +106,25 @@ void CPRDetector::check_leap_year(const std::string &cpr,
104106
reset(state);
105107
}
106108

109+
std::string CPRDetector::format_cpr(std::string &cpr,
110+
char separator = 0) const noexcept {
111+
if (separator == 0) {
112+
return cpr;
113+
} else {
114+
return std::string(cpr, 0, 6) + separator + std::string(cpr, 6, 4);
115+
}
116+
}
117+
107118
void CPRDetector::check_and_append_cpr(std::string &cpr, MatchResults &results,
108-
size_t begin, size_t end) noexcept {
119+
size_t begin, size_t end,
120+
char separator = 0) noexcept {
109121
// Convert the 4 control digits to an int.
110122
int control = std::stoi(std::string(cpr, 6, 4));
111123

112124
// We reject the control sequence '0000'.
113125
if (control > 0) {
114-
MatchResult result(cpr, begin, end, CPRDetector::sensitivity);
126+
MatchResult result(format_cpr(cpr, separator), begin, end,
127+
CPRDetector::sensitivity);
115128

116129
if (check_mod11_ && !check_mod11(result))
117130
return;
@@ -146,11 +159,11 @@ bool CPRDetector::examine_context(const std::string &content) noexcept {
146159
indices[4 - spaces] = i;
147160
--spaces;
148161
if (spaces == 0) {
149-
if (find_blacklisted_words(content, indices))
150-
return true;
151-
152-
spaces = 3;
153-
indices[0] = indices[3] + 1;
162+
if (find_blacklisted_words(content, indices))
163+
return true;
164+
165+
spaces = 3;
166+
indices[0] = indices[3] + 1;
154167
}
155168
}
156169
}
@@ -176,8 +189,9 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
176189
CPRDetectorState state = CPRDetectorState::Empty;
177190
std::string cpr(10, 0);
178191
char previous = 0;
179-
std::size_t count = 0;
192+
char separator = 0;
180193
std::size_t begin = 0;
194+
std::size_t end = 0;
181195
bool allow_separator, leap_year = false;
182196
Predicate is_acceptable = [](char) { return false; };
183197

@@ -195,7 +209,8 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
195209

196210
if (state == CPRDetectorState::First) {
197211
cpr[0] = *it;
198-
begin = count;
212+
begin =
213+
static_cast<std::size_t>(std::distance(std::begin(content), it));
199214
}
200215

201216
break;
@@ -209,13 +224,11 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
209224
} else {
210225
reset(state);
211226
previous = *it;
212-
++count;
213227
continue;
214228
}
215229

216230
previous = cpr[1] =
217231
update(*it, CPRDetectorState::Second, state, is_acceptable);
218-
219232
if (previous != 0)
220233
// Next time, we allow a space.
221234
allow_separator = true;
@@ -235,7 +248,6 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
235248
} else {
236249
reset(state);
237250
previous = 0;
238-
++count;
239251
continue;
240252
}
241253

@@ -274,8 +286,8 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
274286
case CPRDetectorState::Sixth:
275287
if (allow_separator && is_separator(*it)) {
276288
// Skip one of the valid separator characters.
289+
separator = *it;
277290
allow_separator = false;
278-
++count;
279291
continue;
280292
}
281293

@@ -304,17 +316,16 @@ MatchResults CPRDetector::find_matches(const std::string &content) noexcept {
304316
cpr[9] = update(*it, CPRDetectorState::Match, state, is_acceptable);
305317

306318
auto ahead = it;
307-
if (is_previous_ok(*(++ahead)))
308-
check_and_append_cpr(cpr, results, begin, count);
309-
319+
if (is_previous_ok(*(++ahead))) {
320+
end = static_cast<std::size_t>(std::distance(std::begin(content), it));
321+
check_and_append_cpr(cpr, results, begin, end, separator);
322+
}
310323
previous = *it;
311324
allow_separator = false;
312325
reset(state);
313326

314327
break;
315328
}
316-
317-
++count;
318329
}
319330

320331
return results;

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "os2ds-rules"
7-
version = "0.0.8"
7+
version = "0.0.9"
88
authors = [
99
{ name="HackTheOxidation", email="[email protected]" },
1010
]

reference/cpr/__init__.py

Whitespace-only changes.

reference/cpr/detector/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)