5#ifndef ADA_URL_AGGREGATOR_INL_H
6#define ADA_URL_AGGREGATOR_INL_H
22inline void url_aggregator::update_base_authority(
23 std::string_view base_buffer,
const ada::url_components &base) {
24 std::string_view input = base_buffer.substr(
26 ada_log(
"url_aggregator::update_base_authority ", input);
28 bool input_starts_with_dash = input.starts_with(
"//");
29 uint32_t diff = components.host_start - components.protocol_end;
31 buffer.erase(components.protocol_end,
32 components.host_start - components.protocol_end);
33 components.username_end = components.protocol_end;
35 if (input_starts_with_dash) {
36 input.remove_prefix(2);
38 buffer.insert(components.protocol_end,
"//");
39 components.username_end += 2;
42 size_t password_delimiter = input.find(
':');
46 if (password_delimiter != std::string_view::npos) {
48 std::string_view username = input.substr(0, password_delimiter);
49 std::string_view password = input.substr(password_delimiter + 1);
51 buffer.insert(components.protocol_end + diff, username);
52 diff += uint32_t(username.size());
53 buffer.insert(components.protocol_end + diff,
":");
54 components.username_end = components.protocol_end + diff;
55 buffer.insert(components.protocol_end + diff + 1, password);
56 diff += uint32_t(password.size()) + 1;
57 }
else if (!input.empty()) {
59 buffer.insert(components.protocol_end + diff, input);
60 components.username_end =
61 components.protocol_end + diff + uint32_t(input.size());
62 diff += uint32_t(input.size());
65 components.host_start += diff;
67 if (buffer.size() >
base.host_start && buffer[
base.host_start] !=
'@') {
68 buffer.insert(components.host_start,
"@");
71 components.host_end += diff;
72 components.pathname_start += diff;
74 components.search_start += diff;
77 components.hash_start += diff;
81inline void url_aggregator::update_unencoded_base_hash(std::string_view input) {
82 ada_log(
"url_aggregator::update_unencoded_base_hash ", input,
" [",
83 input.size(),
" bytes], buffer is '", buffer,
"' [", buffer.size(),
84 " bytes] components.hash_start = ", components.hash_start);
88 buffer.resize(components.hash_start);
90 components.hash_start = uint32_t(buffer.size());
92 bool encoding_required = unicode::percent_encode<true>(
96 if (!encoding_required) {
99 ada_log(
"url_aggregator::update_unencoded_base_hash final buffer is '",
100 buffer,
"' [", buffer.size(),
" bytes]");
105 uint32_t start, uint32_t end, std::string_view input) {
106 uint32_t current_length = end - start;
107 uint32_t input_size = uint32_t(input.size());
108 uint32_t new_difference = input_size - current_length;
110 if (current_length == 0) {
111 buffer.insert(start, input);
112 }
else if (input_size == current_length) {
113 buffer.replace(start, input_size, input);
114 }
else if (input_size < current_length) {
115 buffer.erase(start, current_length - input_size);
116 buffer.replace(start, input_size, input);
118 buffer.replace(start, current_length, input.substr(0, current_length));
119 buffer.insert(start + current_length, input.substr(current_length));
122 return new_difference;
125inline void url_aggregator::update_base_hostname(
const std::string_view input) {
126 ada_log(
"url_aggregator::update_base_hostname ", input,
" [", input.size(),
127 " bytes], buffer is '", buffer,
"' [", buffer.size(),
" bytes]");
132 add_authority_slashes_if_needed();
134 bool has_credentials = components.protocol_end + 2 < components.host_start;
135 uint32_t new_difference =
136 replace_and_resize(components.host_start, components.host_end, input);
139 buffer.insert(components.host_start,
"@");
142 components.host_end += new_difference;
143 components.pathname_start += new_difference;
145 components.search_start += new_difference;
148 components.hash_start += new_difference;
155 ada_log(
"url_aggregator::get_pathname_length");
156 uint32_t ending_index = uint32_t(buffer.size());
158 ending_index = components.search_start;
160 ending_index = components.hash_start;
162 return ending_index - components.pathname_start;
170inline void url_aggregator::update_base_search(std::string_view input) {
171 ada_log(
"url_aggregator::update_base_search ", input);
179 if (input[0] ==
'?') {
180 input.remove_prefix(1);
185 components.search_start = uint32_t(buffer.size());
188 buffer.resize(components.search_start + 1);
191 buffer.append(input);
194 components.search_start = components.hash_start;
196 buffer.erase(components.search_start,
197 components.hash_start - components.search_start);
198 components.hash_start = components.search_start;
201 buffer.insert(components.search_start,
"?");
202 buffer.insert(components.search_start + 1, input);
203 components.hash_start += uint32_t(input.size() + 1);
209inline void url_aggregator::update_base_search(
210 std::string_view input,
const uint8_t query_percent_encode_set[]) {
211 ada_log(
"url_aggregator::update_base_search ", input,
218 components.search_start = uint32_t(buffer.size());
221 buffer.resize(components.search_start + 1);
224 bool encoding_required =
225 unicode::percent_encode<true>(input, query_percent_encode_set, buffer);
228 if (!encoding_required) {
229 buffer.append(input);
233 components.search_start = components.hash_start;
235 buffer.erase(components.search_start,
236 components.hash_start - components.search_start);
237 components.hash_start = components.search_start;
240 buffer.insert(components.search_start,
"?");
243 if (idx == input.size()) {
244 buffer.insert(components.search_start + 1, input);
245 components.hash_start += uint32_t(input.size() + 1);
247 buffer.insert(components.search_start + 1, input, 0, idx);
248 input.remove_prefix(idx);
251 std::string encoded =
252 ada::unicode::percent_encode(input, query_percent_encode_set);
253 buffer.insert(components.search_start + idx + 1, encoded);
254 components.hash_start +=
255 uint32_t(encoded.size() + idx + 1);
262inline void url_aggregator::update_base_pathname(
const std::string_view input) {
263 ada_log(
"url_aggregator::update_base_pathname '", input,
"' [", input.size(),
268 const bool begins_with_dashdash = input.starts_with(
"//");
269 if (!begins_with_dashdash && has_dash_dot()) {
279 buffer.insert(components.pathname_start,
"/.");
280 components.pathname_start += 2;
282 components.search_start += 2;
285 components.hash_start += 2;
289 uint32_t difference = replace_and_resize(
290 components.pathname_start,
293 components.search_start += difference;
296 components.hash_start += difference;
301inline void url_aggregator::append_base_pathname(
const std::string_view input) {
302 ada_log(
"url_aggregator::append_base_pathname ", input,
" ",
to_string(),
306#if ADA_DEVELOPMENT_CHECKS
309 path_expected.append(input);
311 uint32_t ending_index = uint32_t(buffer.size());
313 ending_index = components.search_start;
315 ending_index = components.hash_start;
317 buffer.insert(ending_index, input);
320 components.search_start += uint32_t(input.size());
323 components.hash_start += uint32_t(input.size());
325#if ADA_DEVELOPMENT_CHECKS
328 path_expected, path_after,
329 "append_base_pathname problem after inserting " + std::string(input));
334inline void url_aggregator::update_base_username(
const std::string_view input) {
335 ada_log(
"url_aggregator::update_base_username '", input,
"' ",
to_string(),
340 add_authority_slashes_if_needed();
343 bool host_starts_with_at = buffer.size() > components.host_start &&
344 buffer[components.host_start] ==
'@';
345 uint32_t diff = replace_and_resize(components.protocol_end + 2,
346 components.username_end, input);
348 components.username_end += diff;
349 components.host_start += diff;
351 if (!input.empty() && !host_starts_with_at) {
352 buffer.insert(components.host_start,
"@");
354 }
else if (input.empty() && host_starts_with_at && !
has_password) {
357 buffer.erase(components.host_start, 1);
361 components.host_end += diff;
362 components.pathname_start += diff;
364 components.search_start += diff;
367 components.hash_start += diff;
372inline void url_aggregator::append_base_username(
const std::string_view input) {
373 ada_log(
"url_aggregator::append_base_username ", input);
376#if ADA_DEVELOPMENT_CHECKS
379 username_expected.append(input);
381 add_authority_slashes_if_needed();
388 uint32_t difference = uint32_t(input.size());
389 buffer.insert(components.username_end, input);
390 components.username_end += difference;
391 components.host_start += difference;
393 if (buffer[components.host_start] !=
'@' &&
394 components.host_start != components.host_end) {
395 buffer.insert(components.host_start,
"@");
399 components.host_end += difference;
400 components.pathname_start += difference;
402 components.search_start += difference;
405 components.hash_start += difference;
407#if ADA_DEVELOPMENT_CHECKS
410 username_expected, username_after,
411 "append_base_username problem after inserting " + std::string(input));
416constexpr void url_aggregator::clear_password() {
417 ada_log(
"url_aggregator::clear_password ",
to_string());
423 uint32_t diff = components.host_start - components.username_end;
424 buffer.erase(components.username_end, diff);
425 components.host_start -= diff;
426 components.host_end -= diff;
427 components.pathname_start -= diff;
429 components.search_start -= diff;
432 components.hash_start -= diff;
436inline void url_aggregator::update_base_password(
const std::string_view input) {
437 ada_log(
"url_aggregator::update_base_password ", input);
441 add_authority_slashes_if_needed();
449 update_base_username(
"");
456 uint32_t difference = uint32_t(input.size());
458 if (password_exists) {
459 uint32_t current_length =
460 components.host_start - components.username_end - 1;
461 buffer.erase(components.username_end + 1, current_length);
462 difference -= current_length;
464 buffer.insert(components.username_end,
":");
468 buffer.insert(components.username_end + 1, input);
469 components.host_start += difference;
474 if (buffer[components.host_start] !=
'@') {
475 buffer.insert(components.host_start,
"@");
479 components.host_end += difference;
480 components.pathname_start += difference;
482 components.search_start += difference;
485 components.hash_start += difference;
490inline void url_aggregator::append_base_password(
const std::string_view input) {
491 ada_log(
"url_aggregator::append_base_password ", input,
" ",
to_string(),
495#if ADA_DEVELOPMENT_CHECKS
497 std::string password_expected = std::string(
get_password());
498 password_expected.append(input);
500 add_authority_slashes_if_needed();
507 uint32_t difference = uint32_t(input.size());
509 buffer.insert(components.host_start, input);
512 buffer.insert(components.username_end,
":");
513 buffer.insert(components.username_end + 1, input);
515 components.host_start += difference;
520 if (buffer[components.host_start] !=
'@') {
521 buffer.insert(components.host_start,
"@");
525 components.host_end += difference;
526 components.pathname_start += difference;
528 components.search_start += difference;
531 components.hash_start += difference;
533#if ADA_DEVELOPMENT_CHECKS
536 password_expected, password_after,
537 "append_base_password problem after inserting " + std::string(input));
542inline void url_aggregator::update_base_port(uint32_t input) {
543 ada_log(
"url_aggregator::update_base_port");
551 std::string value = helpers::concat(
":", std::to_string(input));
552 uint32_t difference = uint32_t(value.size());
555 difference -= components.pathname_start - components.host_end;
556 buffer.erase(components.host_end,
557 components.pathname_start - components.host_end);
560 buffer.insert(components.host_end, value);
561 components.pathname_start += difference;
563 components.search_start += difference;
566 components.hash_start += difference;
568 components.port = input;
573 ada_log(
"url_aggregator::clear_port");
578 uint32_t length = components.pathname_start - components.host_end;
579 buffer.erase(components.host_end, length);
580 components.pathname_start -= length;
582 components.search_start -= length;
585 components.hash_start -= length;
591[[nodiscard]]
inline uint32_t url_aggregator::retrieve_base_port()
const {
592 ada_log(
"url_aggregator::retrieve_base_port");
593 return components.
port;
597 ada_log(
"url_aggregator::clear_search");
604 buffer.resize(components.search_start);
606 buffer.erase(components.search_start,
607 components.hash_start - components.search_start);
608 components.hash_start = components.search_start;
613#if ADA_DEVELOPMENT_CHECKS
615 "search should have been cleared on buffer=" + buffer +
616 " with " + components.to_string() +
"\n" +
to_diagram());
622 ada_log(
"url_aggregator::clear_hash");
627 buffer.resize(components.hash_start);
630#if ADA_DEVELOPMENT_CHECKS
632 "hash should have been cleared on buffer=" + buffer +
633 " with " + components.to_string() +
"\n" +
to_diagram());
638constexpr void url_aggregator::clear_pathname() {
639 ada_log(
"url_aggregator::clear_pathname");
641 uint32_t ending_index = uint32_t(buffer.size());
647 uint32_t pathname_length = ending_index - components.pathname_start;
648 buffer.erase(components.pathname_start, pathname_length);
649 uint32_t difference = pathname_length;
650 if (components.pathname_start == components.host_end + 2 &&
651 buffer[components.host_end] ==
'/' &&
652 buffer[components.host_end + 1] ==
'.') {
653 components.pathname_start -= 2;
654 buffer.erase(components.host_end, 2);
658 components.search_start -= difference;
661 components.hash_start -= difference;
663 ada_log(
"url_aggregator::clear_pathname completed, running checks...");
664#if ADA_DEVELOPMENT_CHECKS
666 "pathname should have been cleared on buffer=" + buffer +
667 " with " + components.to_string() +
"\n" +
to_diagram());
670 ada_log(
"url_aggregator::clear_pathname completed, running checks... ok");
673constexpr void url_aggregator::clear_hostname() {
674 ada_log(
"url_aggregator::clear_hostname");
676 if (!has_authority()) {
681 uint32_t hostname_length = components.host_end - components.host_start;
682 uint32_t start = components.host_start;
685 if (hostname_length > 0 && buffer[start] ==
'@') {
689 buffer.erase(start, hostname_length);
690 components.host_end = start;
691 components.pathname_start -= hostname_length;
693 components.search_start -= hostname_length;
696 components.hash_start -= hostname_length;
698#if ADA_DEVELOPMENT_CHECKS
700 "hostname should have been cleared on buffer=" + buffer +
701 " with " + components.to_string() +
"\n" +
to_diagram());
705 "hostname should have been cleared on buffer=" + buffer +
706 " with " + components.to_string() +
"\n" +
to_diagram());
711 ada_log(
"url_aggregator::has_hash");
716 ada_log(
"url_aggregator::has_search");
721 ada_log(
"url_aggregator::has_credentials");
725constexpr bool url_aggregator::cannot_have_credentials_or_port()
const {
726 ada_log(
"url_aggregator::cannot_have_credentials_or_port");
736[[nodiscard]]
constexpr bool ada::url_aggregator::has_authority()
738 ada_log(
"url_aggregator::has_authority");
741 return components.protocol_end + 2 <= components.host_start &&
742 helpers::substring(buffer, components.protocol_end,
743 components.protocol_end + 2) ==
"//";
746inline void ada::url_aggregator::add_authority_slashes_if_needed() {
747 ada_log(
"url_aggregator::add_authority_slashes_if_needed");
752 if (has_authority()) {
758 buffer.insert(components.protocol_end,
"//");
759 components.username_end += 2;
760 components.host_start += 2;
761 components.host_end += 2;
762 components.pathname_start += 2;
764 components.search_start += 2;
767 components.hash_start += 2;
772constexpr void ada::url_aggregator::reserve(uint32_t capacity) {
773 buffer.reserve(capacity);
777 ada_log(
"url_aggregator::has_non_empty_username");
778 return components.protocol_end + 2 < components.username_end;
782 ada_log(
"url_aggregator::has_non_empty_password");
783 return components.host_start > components.username_end;
787 ada_log(
"url_aggregator::has_password");
789 return components.host_start > components.username_end &&
790 buffer[components.username_end] ==
':';
797 if (components.host_start == components.host_end) {
800 if (components.host_end > components.host_start + 1) {
803 return components.username_end != components.host_start;
807 return has_authority();
811 ada_log(
"url_aggregator::has_port");
814 return has_hostname() && components.pathname_start != components.host_end;
817[[nodiscard]]
constexpr bool url_aggregator::has_dash_dot() const noexcept {
821 ada_log(
"url_aggregator::has_dash_dot");
822#if ADA_DEVELOPMENT_CHECKS
828 buffer[components.
host_end + 1] ==
'.') ||
829 (buffer[components.
host_end] ==
':' &&
833 buffer[components.
host_end] ==
'/' &&
834 buffer[components.
host_end + 1] ==
'.') {
844 return components.pathname_start == components.host_end + 2 &&
846 buffer[components.host_end + 1] ==
'.';
851 ada_log(
"url_aggregator::get_href");
856url_aggregator::parse_port(std::string_view view,
bool check_trailing_content) {
857 ada_log(
"url_aggregator::parse_port('", view,
"') ", view.size());
858 if (!view.empty() && view[0] ==
'-') {
859 ada_log(
"parse_port: view[0] == '0' && view.size() > 1");
863 uint16_t parsed_port{};
864 auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port);
865 if (r.ec == std::errc::result_out_of_range) {
866 ada_log(
"parse_port: r.ec == std::errc::result_out_of_range");
870 ada_log(
"parse_port: ", parsed_port);
871 const size_t consumed = size_t(r.ptr - view.data());
872 ada_log(
"parse_port: consumed ", consumed);
873 if (check_trailing_content) {
875 (consumed == view.size() || view[consumed] ==
'/' ||
876 view[consumed] ==
'?' || (
is_special() && view[consumed] ==
'\\'));
878 ada_log(
"parse_port: is_valid = ",
is_valid);
880 ada_log(
"parse_port", r.ec == std::errc());
882 auto default_port = scheme_default_port();
883 bool is_port_valid = (default_port == 0 && parsed_port == 0) ||
884 (default_port != parsed_port);
885 if (r.ec == std::errc() && is_port_valid) {
886 update_base_port(parsed_port);
894constexpr void url_aggregator::set_protocol_as_file() {
895 ada_log(
"url_aggregator::set_protocol_as_file ");
900 uint32_t new_difference = 5 - components.protocol_end;
902 if (buffer.empty()) {
903 buffer.append(
"file:");
905 buffer.erase(0, components.protocol_end);
906 buffer.insert(0,
"file:");
908 components.protocol_end = 5;
911 components.username_end += new_difference;
912 components.host_start += new_difference;
913 components.host_end += new_difference;
914 components.pathname_start += new_difference;
916 components.search_start += new_difference;
919 components.hash_start += new_difference;
928 if (!components.check_offset_consistency()) {
929 ada_log(
"url_aggregator::validate inconsistent components \n",
948 ada_log(
"url_aggregator::validate omitted protocol_end \n",
to_diagram());
952 ada_log(
"url_aggregator::validate omitted username_end \n",
to_diagram());
956 ada_log(
"url_aggregator::validate omitted host_start \n",
to_diagram());
960 ada_log(
"url_aggregator::validate omitted host_end \n",
to_diagram());
964 ada_log(
"url_aggregator::validate omitted pathname_start \n",
to_diagram());
968 if (components.protocol_end > buffer.size()) {
969 ada_log(
"url_aggregator::validate protocol_end overflow \n",
to_diagram());
972 if (components.username_end > buffer.size()) {
973 ada_log(
"url_aggregator::validate username_end overflow \n",
to_diagram());
976 if (components.host_start > buffer.size()) {
977 ada_log(
"url_aggregator::validate host_start overflow \n",
to_diagram());
980 if (components.host_end > buffer.size()) {
981 ada_log(
"url_aggregator::validate host_end overflow \n",
to_diagram());
984 if (components.pathname_start > buffer.size()) {
985 ada_log(
"url_aggregator::validate pathname_start overflow \n",
990 if (components.protocol_end > 0) {
991 if (buffer[components.protocol_end - 1] !=
':') {
993 "url_aggregator::validate missing : at the end of the protocol \n",
999 if (components.username_end != buffer.size() &&
1000 components.username_end > components.protocol_end + 2) {
1001 if (buffer[components.username_end] !=
':' &&
1002 buffer[components.username_end] !=
'@') {
1004 "url_aggregator::validate missing : or @ at the end of the username "
1011 if (components.host_start != buffer.size()) {
1012 if (components.host_start > components.username_end) {
1013 if (buffer[components.host_start] !=
'@') {
1015 "url_aggregator::validate missing @ at the end of the password \n",
1019 }
else if (components.host_start == components.username_end &&
1020 components.host_end > components.host_start) {
1021 if (components.host_start == components.protocol_end + 2) {
1022 if (buffer[components.protocol_end] !=
'/' ||
1023 buffer[components.protocol_end + 1] !=
'/') {
1025 "url_aggregator::validate missing // between protocol and host "
1031 if (components.host_start > components.protocol_end &&
1032 buffer[components.host_start] !=
'@') {
1034 "url_aggregator::validate missing @ at the end of the username "
1041 if (components.host_end != components.host_start) {
1042 ada_log(
"url_aggregator::validate expected omitted host \n",
1048 if (components.host_end != buffer.size() &&
1049 components.pathname_start > components.host_end) {
1050 if (components.pathname_start == components.host_end + 2 &&
1051 buffer[components.host_end] ==
'/' &&
1052 buffer[components.host_end + 1] ==
'.') {
1053 if (components.pathname_start + 1 >= buffer.size() ||
1054 buffer[components.pathname_start] !=
'/' ||
1055 buffer[components.pathname_start + 1] !=
'/') {
1057 "url_aggregator::validate expected the path to begin with // \n",
1061 }
else if (buffer[components.host_end] !=
':') {
1062 ada_log(
"url_aggregator::validate missing : at the port \n",
1067 if (components.pathname_start != buffer.size() &&
1068 components.pathname_start < components.search_start &&
1069 components.pathname_start < components.hash_start && !
has_opaque_path) {
1070 if (buffer[components.pathname_start] !=
'/') {
1071 ada_log(
"url_aggregator::validate missing / at the path \n",
1077 if (buffer[components.search_start] !=
'?') {
1078 ada_log(
"url_aggregator::validate missing ? at the search \n",
1084 if (buffer[components.hash_start] !=
'#') {
1085 ada_log(
"url_aggregator::validate missing # at the hash \n",
1096 ada_log(
"url_aggregator::get_pathname pathname_start = ",
1097 components.pathname_start,
" buffer.size() = ", buffer.size(),
1098 " components.search_start = ", components.search_start,
1099 " components.hash_start = ", components.hash_start);
1100 auto ending_index = uint32_t(buffer.size());
1102 ending_index = components.search_start;
1104 ending_index = components.hash_start;
1106 return helpers::substring(buffer, components.pathname_start, ending_index);
1114void url_aggregator::update_host_to_base_host(
const std::string_view input) {
1115 ada_log(
"url_aggregator::update_host_to_base_host ", input);
1123 }
else if (has_dash_dot()) {
1124 add_authority_slashes_if_needed();
1130 update_base_hostname(input);
Definitions of the character sets used by unicode functions.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
#define ADA_ASSERT_EQUAL(LHS, RHS, MESSAGE)
#define ada_really_inline
Definitions for helper functions used within Ada.
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr bool is_digit(char x) noexcept
ada_really_inline size_t percent_encode_index(const std::string_view input, const uint8_t character_set[])
std::ostream & operator<<(std::ostream &out, const ada::url &u)
URL scheme type definitions and utilities.
Memory-efficient URL representation using a single buffer.
ada_really_inline const url_components & get_components() const noexcept
constexpr bool has_non_empty_password() const noexcept
constexpr bool validate() const noexcept
void clear_search() override
std::string_view get_search() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
std::string to_string() const override
std::string_view get_password() const ada_lifetime_bound
std::string_view get_username() const ada_lifetime_bound
std::string to_diagram() const
constexpr bool has_hostname() const noexcept
constexpr bool has_search() const noexcept override
constexpr std::string_view get_href() const noexcept ada_lifetime_bound
constexpr bool has_empty_hostname() const noexcept
constexpr bool has_password() const noexcept
ada_really_inline uint32_t get_pathname_length() const noexcept
constexpr bool has_hash() const noexcept override
constexpr std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_hostname() const ada_lifetime_bound
constexpr bool has_port() const noexcept
ada_really_inline constexpr bool has_credentials() const noexcept
constexpr bool has_non_empty_username() const noexcept
ada_really_inline constexpr bool is_special() const noexcept
Stores byte offsets for URL components within a buffer.
static constexpr uint32_t omitted
Definitions for unicode operations.
Declaration for the ada::url_aggregator class.
Declaration for the URL Components.