|
11 | 11 |
|
12 | 12 | namespace ada { |
13 | 13 |
|
14 | | -namespace url_pattern { |
15 | | - |
16 | | -enum class errors { type_error }; |
17 | | - |
18 | | -// @see https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol |
19 | | -std::optional<std::string> canonicalize_protocol(std::string_view input); |
20 | | - |
21 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-a-username |
22 | | -std::optional<std::string> canonicalize_username(std::string_view input); |
23 | | - |
24 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-a-password |
25 | | -std::optional<std::string> canonicalize_password(std::string_view input); |
26 | | - |
27 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-a-password |
28 | | -std::optional<std::string> canonicalize_hostname(std::string_view input); |
29 | | - |
30 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname |
31 | | -std::optional<std::string> canonicalize_ipv6_hostname(std::string_view input); |
32 | | - |
33 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-a-port |
34 | | -std::optional<std::string> canonicalize_port( |
35 | | - std::string_view input, std::string_view protocol = "fake"); |
36 | | - |
37 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-a-pathname |
38 | | -std::optional<std::string> canonicalize_pathname(std::string_view input); |
39 | | - |
40 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname |
41 | | -std::optional<std::string> canonicalize_opaque_pathname(std::string_view input); |
42 | | - |
43 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-a-search |
44 | | -std::optional<std::string> canonicalize_search(std::string_view input); |
45 | | - |
46 | | -// @see https://wicg.github.io/urlpattern/#canonicalize-a-hash |
47 | | -std::optional<std::string> canonicalize_hash(std::string_view input); |
48 | | - |
49 | | -} // namespace url_pattern |
50 | | - |
51 | 14 | // URLPattern is a Web Platform standard API for matching URLs against a |
52 | 15 | // pattern syntax (think of it as a regular expression for URLs). It is |
53 | 16 | // defined in https://wicg.github.io/urlpattern. |
@@ -175,6 +138,137 @@ class URLPattern { |
175 | 138 | bool ignore_case_ = false; |
176 | 139 | }; |
177 | 140 |
|
| 141 | +namespace url_pattern { |
| 142 | + |
| 143 | +enum class errors { type_error }; |
| 144 | + |
| 145 | +// @see https://urlpattern.spec.whatwg.org/#tokens |
| 146 | +struct Token { |
| 147 | + // @see https://urlpattern.spec.whatwg.org/#tokenize-policy |
| 148 | + enum Policy { |
| 149 | + STRICT, |
| 150 | + LENIENT, |
| 151 | + }; |
| 152 | + |
| 153 | + // @see https://urlpattern.spec.whatwg.org/#token |
| 154 | + enum Type { |
| 155 | + INVALID_CHAR, // 0 |
| 156 | + OPEN, // 1 |
| 157 | + CLOSE, // 2 |
| 158 | + REGEXP, // 3 |
| 159 | + NAME, // 4 |
| 160 | + CHAR, // 5 |
| 161 | + ESCAPED_CHAR, // 6 |
| 162 | + OTHER_MODIFIER, // 7 |
| 163 | + ASTERISK, // 8 |
| 164 | + END, // 9 |
| 165 | + }; |
| 166 | +}; |
| 167 | + |
| 168 | +// @see https://urlpattern.spec.whatwg.org/#tokenizer |
| 169 | +struct Tokenizer { |
| 170 | + explicit Tokenizer(std::string_view input, Token::Policy policy) |
| 171 | + : input(input), policy(std::move(policy)); |
| 172 | + |
| 173 | + // has an associated input, a pattern string, initially the empty string. |
| 174 | + std::string input{}; |
| 175 | + // has an associated policy, a tokenize policy, initially "strict". |
| 176 | + Token::Policy policy = Token::Policy::STRICT; |
| 177 | + // has an associated token list, a token list, initially an empty list. |
| 178 | + std::vector<Token> token_list{}; |
| 179 | + // has an associated index, a number, initially 0. |
| 180 | + size_t index = 0; |
| 181 | + // has an associated next index, a number, initially 0. |
| 182 | + size_t next_index = 0; |
| 183 | + // has an associated code point, a Unicode code point, initially null. |
| 184 | + char* code_point = nullptr; |
| 185 | +}; |
| 186 | + |
| 187 | +// @see https://urlpattern.spec.whatwg.org/#constructor-string-parser |
| 188 | +struct ConstructorStringParser { |
| 189 | + explicit ConstructorStringParser(std::string_view input, |
| 190 | + std::vector<Token>& token_list); |
| 191 | + |
| 192 | + private: |
| 193 | + // @see https://urlpattern.spec.whatwg.org/#constructor-string-parser-state |
| 194 | + enum State { |
| 195 | + INIT, |
| 196 | + PROTOCOL, |
| 197 | + AUTHORITY, |
| 198 | + PASSWORD, |
| 199 | + HOSTNAME, |
| 200 | + PORT, |
| 201 | + PATHNAME, |
| 202 | + SEARCH, |
| 203 | + HASH, |
| 204 | + DONE, |
| 205 | + }; |
| 206 | + // has an associated input, a string, which must be set upon creation. |
| 207 | + std::string input; |
| 208 | + // has an associated token list, a token list, which must be set upon |
| 209 | + // creation. |
| 210 | + std::vector<Token> token_list; |
| 211 | + // has an associated result, a URLPatternInit, initially set to a new |
| 212 | + // URLPatternInit. |
| 213 | + URLPattern::Init result{}; |
| 214 | + // has an associated component start, a number, initially set to 0. |
| 215 | + size_t component_start = 0; |
| 216 | + // has an associated token index, a number, initially set to 0. |
| 217 | + size_t token_index = 0; |
| 218 | + // has an associated token increment, a number, initially set to 1. |
| 219 | + size_t token_increment = 1; |
| 220 | + // has an associated group depth, a number, initially set to 0. |
| 221 | + size_t group_depth = 0; |
| 222 | + // has an associated hostname IPv6 bracket depth, a number, initially set to |
| 223 | + // 0. |
| 224 | + size_t hostname_ipv6_bracket_depth = 0; |
| 225 | + // has an associated protocol matches a special scheme flag, a boolean, |
| 226 | + // initially set to false. |
| 227 | + bool protocol_matches_a_special_scheme_flag = false; |
| 228 | + // has an associated state, a string, initially set to "init". It must be one |
| 229 | + // of the following: |
| 230 | + State state = INIT; |
| 231 | +}; |
| 232 | + |
| 233 | +// @see https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol |
| 234 | +std::optional<std::string> canonicalize_protocol(std::string_view input); |
| 235 | + |
| 236 | +// @see https://wicg.github.io/urlpattern/#canonicalize-a-username |
| 237 | +std::optional<std::string> canonicalize_username(std::string_view input); |
| 238 | + |
| 239 | +// @see https://wicg.github.io/urlpattern/#canonicalize-a-password |
| 240 | +std::optional<std::string> canonicalize_password(std::string_view input); |
| 241 | + |
| 242 | +// @see https://wicg.github.io/urlpattern/#canonicalize-a-password |
| 243 | +std::optional<std::string> canonicalize_hostname(std::string_view input); |
| 244 | + |
| 245 | +// @see https://wicg.github.io/urlpattern/#canonicalize-an-ipv6-hostname |
| 246 | +std::optional<std::string> canonicalize_ipv6_hostname(std::string_view input); |
| 247 | + |
| 248 | +// @see https://wicg.github.io/urlpattern/#canonicalize-a-port |
| 249 | +std::optional<std::string> canonicalize_port( |
| 250 | + std::string_view input, std::string_view protocol = "fake"); |
| 251 | + |
| 252 | +// @see https://wicg.github.io/urlpattern/#canonicalize-a-pathname |
| 253 | +std::optional<std::string> canonicalize_pathname(std::string_view input); |
| 254 | + |
| 255 | +// @see https://wicg.github.io/urlpattern/#canonicalize-an-opaque-pathname |
| 256 | +std::optional<std::string> canonicalize_opaque_pathname(std::string_view input); |
| 257 | + |
| 258 | +// @see https://wicg.github.io/urlpattern/#canonicalize-a-search |
| 259 | +std::optional<std::string> canonicalize_search(std::string_view input); |
| 260 | + |
| 261 | +// @see https://wicg.github.io/urlpattern/#canonicalize-a-hash |
| 262 | +std::optional<std::string> canonicalize_hash(std::string_view input); |
| 263 | + |
| 264 | +// @see https://urlpattern.spec.whatwg.org/#parse-a-constructor-string |
| 265 | +URLPattern::Init parse_constructor_string(std::string_view input); |
| 266 | + |
| 267 | +// @see https://urlpattern.spec.whatwg.org/#tokenize |
| 268 | +std::string tokenize(std::string_view input, Token::Policy policy); |
| 269 | + |
| 270 | +} // namespace url_pattern |
| 271 | + |
178 | 272 | } // namespace ada |
179 | 273 |
|
180 | 274 | #endif |
0 commit comments