Skip to content

Commit 3456367

Browse files
committed
implement generate regular expression methods
1 parent 80056a6 commit 3456367

File tree

3 files changed

+180
-6
lines changed

3 files changed

+180
-6
lines changed

include/ada/url_pattern-inl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ inline bool url_pattern::has_regexp_groups() const ada_lifetime_bound {
7777
}
7878

7979
inline bool url_pattern_part::is_regexp() const noexcept {
80-
return type == "regexp";
80+
return type == url_pattern_part_type::REGEXP;
8181
}
8282

8383
} // namespace ada

include/ada/url_pattern.h

+43-1
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,42 @@ struct url_pattern_init {
9898
std::optional<std::string> base_url;
9999
};
100100

101+
enum class url_pattern_part_type : uint8_t {
102+
// The part represents a simple fixed text string.
103+
FIXED_TEST,
104+
// The part represents a matching group with a custom regular expression.
105+
REGEXP,
106+
// The part represents a matching group that matches code points up to the
107+
// next separator code point. This is typically used for a named group like
108+
// ":foo" that does not have a custom regular expression.
109+
SEGMENT_WILDCARD,
110+
// The part represents a matching group that greedily matches all code points.
111+
// This is typically used for the "*" wildcard matching group.
112+
FULL_WILDCARD,
113+
};
114+
115+
enum class url_pattern_part_modifier : uint8_t {
116+
// The part does not have a modifier.
117+
NONE,
118+
// The part has an optional modifier indicated by the U+003F (?) code point.
119+
OPTIONAL,
120+
// The part has a "zero or more" modifier indicated by the U+002A (*) code
121+
// point.
122+
ZERO_OR_MORE,
123+
// The part has a "one or more" modifier indicated by the U+002B (+) code
124+
// point.
125+
ONE_OR_MORE,
126+
};
127+
101128
// @see https://urlpattern.spec.whatwg.org/#part
102129
struct url_pattern_part {
103130
// A part has an associated type, a string, which must be set upon creation.
104-
std::string type;
131+
url_pattern_part_type type;
132+
// A part has an associated value, a string, which must be set upon creation.
133+
std::string value;
134+
// A part has an associated modifier a string, which must be set upon
135+
// creation.
136+
url_pattern_part_modifier modifier;
105137
// A part has an associated name, a string, initially the empty string.
106138
std::string name{};
107139
// A part has an associated prefix, a string, initially the empty string.
@@ -410,6 +442,9 @@ std::string process_base_url_string(std::string_view input,
410442
// @see https://urlpattern.spec.whatwg.org/#escape-a-pattern-string
411443
std::string escape_pattern(std::string_view input);
412444

445+
// @see https://urlpattern.spec.whatwg.org/#escape-a-regexp-string
446+
std::string escape_regexp_string(std::string_view input);
447+
413448
// @see https://urlpattern.spec.whatwg.org/#is-an-absolute-pathname
414449
constexpr bool is_absolute_pathname(std::string_view input,
415450
std::string_view type) noexcept;
@@ -439,6 +474,13 @@ constexpr bool is_ipv6_address(std::string_view input) noexcept;
439474
// https://urlpattern.spec.whatwg.org/#protocol-component-matches-a-special-scheme
440475
bool protocol_component_matches_special_scheme(std::string_view input);
441476

477+
// @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string
478+
std::string convert_modifier_to_string(url_pattern_part_modifier modifier);
479+
480+
// @see https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
481+
std::string generate_segment_wildcard_regexp(
482+
url_pattern_compile_component_options options);
483+
442484
} // namespace url_pattern_helpers
443485

444486
} // namespace ada

src/url_pattern.cpp

+136-4
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,12 @@ std::string escape_pattern(std::string_view input) {
660660
return result;
661661
}
662662

663+
std::string escape_regexp_string(std::string_view input) {
664+
(void)input;
665+
// TODO: Implement this.
666+
return "";
667+
}
668+
663669
std::string process_base_url_string(std::string_view input,
664670
std::string_view type) {
665671
// Assert: input is not null.
@@ -763,10 +769,103 @@ std::tuple<std::string, std::vector<std::string>>
763769
generate_regular_expression_and_name_list(
764770
std::vector<url_pattern_part>& part_list,
765771
url_pattern_compile_component_options options) {
766-
// TODO: Implement this
767-
(void)part_list;
768-
(void)options;
769-
return {"", {}};
772+
// Let result be "^"
773+
std::string result = "^";
774+
775+
// Let name list be a new list
776+
std::vector<std::string> name_list;
777+
const std::string full_wildcard_regexp_value = ".*";
778+
779+
// For each part of part list:
780+
for (const url_pattern_part& part : part_list) {
781+
// If part's type is "fixed-text":
782+
if (part.type == url_pattern_part_type::FIXED_TEST) {
783+
// If part's modifier is "none"
784+
if (part.modifier == url_pattern_part_modifier::NONE) {
785+
// Append the result of running escape a regexp string given part's
786+
// value
787+
result += escape_regexp_string(part.value);
788+
} else {
789+
// A "fixed-text" part with a modifier uses a non capturing group
790+
// (?:<fixed text>)<modifier>
791+
result += "(?:" + escape_regexp_string(part.value) + ")" +
792+
convert_modifier_to_string(part.modifier);
793+
}
794+
continue;
795+
}
796+
797+
// Assert: part's name is not the empty string
798+
ADA_ASSERT_TRUE(!part.name.empty());
799+
800+
// Append part's name to name list
801+
name_list.push_back(part.name);
802+
803+
// Let regexp value be part's value
804+
std::string regexp_value = part.value;
805+
806+
// If part's type is "segment-wildcard"
807+
if (part.type == url_pattern_part_type::SEGMENT_WILDCARD) {
808+
regexp_value = generate_segment_wildcard_regexp(options);
809+
}
810+
// Otherwise if part's type is "full-wildcard"
811+
else if (part.type == url_pattern_part_type::FULL_WILDCARD) {
812+
regexp_value = full_wildcard_regexp_value;
813+
}
814+
815+
// If part's prefix is the empty string and part's suffix is the empty
816+
// string
817+
if (part.prefix.empty() && part.suffix.empty()) {
818+
// If part's modifier is "none" or "optional"
819+
if (part.modifier == url_pattern_part_modifier::NONE ||
820+
part.modifier == url_pattern_part_modifier::OPTIONAL) {
821+
// (<regexp value>)<modifier>
822+
result += "(" + regexp_value + ")" +
823+
convert_modifier_to_string(part.modifier);
824+
} else {
825+
// ((?:<regexp value>)<modifier>)
826+
result += "((?:" + regexp_value + ")" +
827+
convert_modifier_to_string(part.modifier) + ")";
828+
}
829+
continue;
830+
}
831+
832+
// If part's modifier is "none" or "optional"
833+
if (part.modifier == url_pattern_part_modifier::NONE ||
834+
part.modifier == url_pattern_part_modifier::OPTIONAL) {
835+
// (?:<prefix>(<regexp value>)<suffix>)<modifier>
836+
result += "(?:" + escape_regexp_string(part.prefix) + "(" + regexp_value +
837+
")" + escape_regexp_string(part.suffix) + ")" +
838+
convert_modifier_to_string(part.modifier);
839+
continue;
840+
}
841+
842+
// Assert: part's modifier is "zero-or-more" or "one-or-more"
843+
ADA_ASSERT_TRUE(part.modifier == url_pattern_part_modifier::ZERO_OR_MORE ||
844+
part.modifier == url_pattern_part_modifier::ONE_OR_MORE);
845+
846+
// Assert: part's prefix is not the empty string or part's suffix is not the
847+
// empty string
848+
ADA_ASSERT_TRUE(!part.prefix.empty() || !part.suffix.empty());
849+
850+
// (?:<prefix>((?:<regexp value>)(?:<suffix><prefix>(?:<regexp
851+
// value>))*)<suffix>)?
852+
result += "(?:" + escape_regexp_string(part.prefix) +
853+
"((?:" + regexp_value +
854+
")(?:" + escape_regexp_string(part.suffix) +
855+
escape_regexp_string(part.prefix) + "(?:" + regexp_value +
856+
"))*)" + escape_regexp_string(part.suffix) + ")";
857+
858+
// If part's modifier is "zero-or-more" then append "?" to the end of result
859+
if (part.modifier == url_pattern_part_modifier::ZERO_OR_MORE) {
860+
result += "?";
861+
}
862+
}
863+
864+
// Append "$" to the end of result
865+
result += "$";
866+
867+
// Return (result, name list)
868+
return {result, name_list};
770869
}
771870

772871
constexpr bool is_ipv6_address(std::string_view input) noexcept {
@@ -786,6 +885,39 @@ constexpr bool is_ipv6_address(std::string_view input) noexcept {
786885
return false;
787886
}
788887

888+
std::string convert_modifier_to_string(url_pattern_part_modifier modifier) {
889+
// TODO: Optimize this.
890+
switch (modifier) {
891+
// If modifier is "zero-or-more", then return "*".
892+
case url_pattern_part_modifier::ZERO_OR_MORE:
893+
return "*";
894+
// If modifier is "optional", then return "?".
895+
case url_pattern_part_modifier::NONE:
896+
return "?";
897+
// If modifier is "one-or-more", then return "+".
898+
case url_pattern_part_modifier::ONE_OR_MORE:
899+
return "+";
900+
// Return the empty string.
901+
default:
902+
return "";
903+
}
904+
}
905+
906+
std::string generate_segment_wildcard_regexp(
907+
url_pattern_compile_component_options options) {
908+
// Let result be "[^".
909+
std::string result = "[^";
910+
// Append the result of running escape a regexp string given options’s
911+
// delimiter code point to the end of result.
912+
ADA_ASSERT_TRUE(options.delimiter.has_value());
913+
result.append(
914+
escape_regexp_string(std::string_view(&options.delimiter.value(), 1)));
915+
// Append "]+?" to the end of result.
916+
result.append("]+?");
917+
// Return result.
918+
return result;
919+
}
920+
789921
bool protocol_component_matches_special_scheme(std::string_view input) {
790922
// TODO: Optimize this.
791923
std::regex rx(input.begin(), input.size());

0 commit comments

Comments
 (0)