Skip to content

Commit 47e44b1

Browse files
committed
optimize urlpattern::test
1 parent 236e519 commit 47e44b1

File tree

2 files changed

+106
-22
lines changed

2 files changed

+106
-22
lines changed

include/ada/url_pattern-inl.h

Lines changed: 103 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,11 @@ url_pattern_component<regex_provider>::create_component_match_result(
4646
// says we should start from 1. This case is handled by the
4747
// std_regex_provider.
4848
for (size_t index = 0; index < exec_result.size(); index++) {
49-
result.groups.insert({
49+
// Optimized: Use emplace instead of insert for better performance
50+
result.groups.emplace(
5051
group_name_list[index],
51-
std::move(exec_result[index]),
52-
});
52+
std::move(exec_result[index])
53+
);
5354
}
5455
return result;
5556
}
@@ -204,16 +205,100 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::exec(
204205

205206
template <url_pattern_regex::regex_concept regex_provider>
206207
result<bool> url_pattern<regex_provider>::test(
207-
const url_pattern_input& input, const std::string_view* base_url) {
208-
// TODO: Optimization opportunity. Rather than returning `url_pattern_result`
209-
// Implement a fast path just like `can_parse()` in ada_url.
210-
// Let result be the result of match given this's associated URL pattern,
211-
// input, and baseURL if given.
212-
// If result is null, return false.
213-
if (auto result = match(input, base_url); result.has_value()) {
214-
return result->has_value();
208+
const url_pattern_input& input, const std::string_view* base_url_string) {
209+
// Fast path: Use regex_match instead of full match() to avoid building result objects
210+
std::string protocol{};
211+
std::string username{};
212+
std::string password{};
213+
std::string hostname{};
214+
std::string port{};
215+
std::string pathname{};
216+
std::string search{};
217+
std::string hash{};
218+
219+
// If input is a URLPatternInit then:
220+
if (std::holds_alternative<url_pattern_init>(input)) {
221+
// If baseURLString was given, throw a TypeError.
222+
if (base_url_string) {
223+
return tl::unexpected(errors::type_error);
224+
}
225+
226+
auto apply_result = url_pattern_init::process(
227+
std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
228+
protocol, username, password, hostname, port, pathname, search, hash);
229+
230+
if (!apply_result.has_value()) {
231+
return false;
232+
}
233+
234+
ADA_ASSERT_TRUE(apply_result->protocol.has_value());
235+
protocol = std::move(apply_result->protocol.value());
236+
ADA_ASSERT_TRUE(apply_result->username.has_value());
237+
username = std::move(apply_result->username.value());
238+
ADA_ASSERT_TRUE(apply_result->password.has_value());
239+
password = std::move(apply_result->password.value());
240+
ADA_ASSERT_TRUE(apply_result->hostname.has_value());
241+
hostname = std::move(apply_result->hostname.value());
242+
ADA_ASSERT_TRUE(apply_result->port.has_value());
243+
port = std::move(apply_result->port.value());
244+
ADA_ASSERT_TRUE(apply_result->pathname.has_value());
245+
pathname = std::move(apply_result->pathname.value());
246+
ADA_ASSERT_TRUE(apply_result->search.has_value());
247+
if (apply_result->search->starts_with("?")) {
248+
search = apply_result->search->substr(1);
249+
} else {
250+
search = std::move(apply_result->search.value());
251+
}
252+
ADA_ASSERT_TRUE(apply_result->hash.has_value());
253+
ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#"));
254+
hash = std::move(apply_result->hash.value());
255+
} else {
256+
ADA_ASSERT_TRUE(std::holds_alternative<std::string_view>(input));
257+
258+
result<url_aggregator> base_url;
259+
260+
if (base_url_string) {
261+
base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
262+
if (!base_url) {
263+
return false;
264+
}
265+
}
266+
267+
url_aggregator* base_url_value =
268+
base_url.has_value() ? &*base_url : nullptr;
269+
270+
auto url = ada::parse<url_aggregator>(std::get<std::string_view>(input),
271+
base_url_value);
272+
273+
if (!url) {
274+
return false;
275+
}
276+
277+
protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
278+
username = url->get_username();
279+
password = url->get_password();
280+
hostname = url->get_hostname();
281+
port = url->get_port();
282+
pathname = url->get_pathname();
283+
if (url->has_search()) {
284+
auto view = url->get_search();
285+
search = view.starts_with("?") ? url->get_search().substr(1) : view;
286+
}
287+
if (url->has_hash()) {
288+
auto view = url->get_hash();
289+
hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
290+
}
215291
}
216-
return tl::unexpected(errors::type_error);
292+
293+
// Fast path: Just use regex_match for boolean testing, no need to extract groups
294+
return regex_provider::regex_match(protocol, protocol_component.regexp) &&
295+
regex_provider::regex_match(username, username_component.regexp) &&
296+
regex_provider::regex_match(password, password_component.regexp) &&
297+
regex_provider::regex_match(hostname, hostname_component.regexp) &&
298+
regex_provider::regex_match(port, port_component.regexp) &&
299+
regex_provider::regex_match(pathname, pathname_component.regexp) &&
300+
regex_provider::regex_match(search, search_component.regexp) &&
301+
regex_provider::regex_match(hash, hash_component.regexp);
217302
}
218303

219304
template <url_pattern_regex::regex_concept regex_provider>
@@ -331,7 +416,8 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
331416
// IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
332417
// is removed. Similar work was done on workerd:
333418
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
334-
protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
419+
auto protocol_view = url->get_protocol();
420+
protocol.assign(protocol_view.data(), protocol_view.size() - 1);
335421
// Set username to url's username.
336422
username = url->get_username();
337423
// Set password to url's password.
@@ -349,16 +435,16 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
349435
// is removed. Similar work was done on workerd:
350436
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
351437
if (url->has_search()) {
352-
auto view = url->get_search();
353-
search = view.starts_with("?") ? url->get_search().substr(1) : view;
438+
auto search_view = url->get_search();
439+
search = search_view.starts_with("?") ? search_view.substr(1) : search_view;
354440
}
355441
// Set hash to url's fragment or the empty string if the value is null.
356442
// IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
357443
// removed. Similar work was done on workerd:
358444
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
359445
if (url->has_hash()) {
360-
auto view = url->get_hash();
361-
hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
446+
auto hash_view = url->get_hash();
447+
hash = hash_view.starts_with("#") ? hash_view.substr(1) : hash_view;
362448
}
363449
}
364450

src/url_pattern_regex.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,9 @@ std::optional<std::regex> std_regex_provider::create_instance(
2525
std::optional<std::vector<std::optional<std::string>>>
2626
std_regex_provider::regex_search(std::string_view input,
2727
const std::regex& pattern) {
28-
std::string input_str(
29-
input.begin(),
30-
input.end()); // Convert string_view to string for regex_search
31-
std::smatch match_result;
32-
if (!std::regex_search(input_str, match_result, pattern,
28+
// Use iterator-based regex_search to avoid string allocation
29+
std::match_results<std::string_view::const_iterator> match_result;
30+
if (!std::regex_search(input.begin(), input.end(), match_result, pattern,
3331
std::regex_constants::match_any)) {
3432
return std::nullopt;
3533
}

0 commit comments

Comments
 (0)