1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9
10 #ifndef URL_URL_PARSE_INTERNAL_H_
11 #define URL_URL_PARSE_INTERNAL_H_
12
13 // Contains common inline helper functions used by the URL parsing routines.
14
15 #include "url/third_party/mozilla/url_parse.h"
16
17 namespace url {
18
19 // A helper function to handle a URL separator, which is '/' or '\'.
20 //
21 // The motivation: There are many condition checks in URL Standard like the
22 // following:
23 //
24 // > If url is special and c is U+002F (/) or U+005C (\), ...
IsSlashOrBackslash(char16_t ch)25 inline bool IsSlashOrBackslash(char16_t ch) {
26 return ch == '/' || ch == '\\';
27 }
IsSlashOrBackslash(char ch)28 inline bool IsSlashOrBackslash(char ch) {
29 return IsSlashOrBackslash(static_cast<char16_t>(ch));
30 }
31
32 // Returns true if we should trim this character from the URL because it is a
33 // space or a control character.
ShouldTrimFromURL(char16_t ch)34 inline bool ShouldTrimFromURL(char16_t ch) {
35 return ch <= ' ';
36 }
ShouldTrimFromURL(char ch)37 inline bool ShouldTrimFromURL(char ch) {
38 return ShouldTrimFromURL(static_cast<char16_t>(ch));
39 }
40
41 // Given an already-initialized begin index and length, this shrinks the range
42 // to eliminate "should-be-trimmed" characters. Note that the length does *not*
43 // indicate the length of untrimmed data from |*begin|, but rather the position
44 // in the input string (so the string starts at character |*begin| in the spec,
45 // and goes until |*len|).
46 template<typename CHAR>
47 inline void TrimURL(const CHAR* spec, int* begin, int* len,
48 bool trim_path_end = true) {
49 // Strip leading whitespace and control characters.
50 while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
51 (*begin)++;
52
53 if (trim_path_end) {
54 // Strip trailing whitespace and control characters. We need the >i test
55 // for when the input string is all blanks; we don't want to back past the
56 // input.
57 while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
58 (*len)--;
59 }
60 }
61
62 // Counts the number of consecutive slashes or backslashes starting at the given
63 // offset in the given string of the given length. A slash and backslash can be
64 // mixed.
65 //
66 // TODO(crbug.com/40063064): Rename this function to
67 // `CountConsecutiveSlashesOrBackslashes`.
68 template <typename CHAR>
CountConsecutiveSlashes(const CHAR * str,int begin_offset,int str_len)69 inline int CountConsecutiveSlashes(const CHAR* str,
70 int begin_offset,
71 int str_len) {
72 int count = 0;
73 while (begin_offset + count < str_len &&
74 IsSlashOrBackslash(str[begin_offset + count])) {
75 ++count;
76 }
77 return count;
78 }
79
80 // Returns true if char is a slash.
IsSlash(char16_t ch)81 inline bool IsSlash(char16_t ch) {
82 return ch == '/';
83 }
IsSlash(char ch)84 inline bool IsSlash(char ch) {
85 return IsSlash(static_cast<char16_t>(ch));
86 }
87
88 // Counts the number of consecutive slashes starting at the given offset
89 // in the given string of the given length.
90 //
91 // TODO(crbug.com/40063064): Rename this function to
92 // `CountConsecutiveSlashes` after the current `CountConsecutiveSlashes` is
93 // renamed to CountConsecutiveSlashesOrBackslashes`.
94 template <typename CHAR>
CountConsecutiveSlashesButNotCountBackslashes(const CHAR * str,int begin_offset,int str_len)95 inline int CountConsecutiveSlashesButNotCountBackslashes(const CHAR* str,
96 int begin_offset,
97 int str_len) {
98 int count = 0;
99 while (begin_offset + count < str_len && IsSlash(str[begin_offset + count])) {
100 ++count;
101 }
102 return count;
103 }
104
105 // Internal functions in url_parse.cc that parse the path, that is, everything
106 // following the authority section. The input is the range of everything
107 // following the authority section, and the output is the identified ranges.
108 //
109 // This is designed for the file URL parser or other consumers who may do
110 // special stuff at the beginning, but want regular path parsing, it just
111 // maps to the internal parsing function for paths.
112 void ParsePathInternal(const char* spec,
113 const Component& path,
114 Component* filepath,
115 Component* query,
116 Component* ref);
117 void ParsePathInternal(const char16_t* spec,
118 const Component& path,
119 Component* filepath,
120 Component* query,
121 Component* ref);
122
123 // Internal functions in url_parse.cc that parse non-special URLs, which are
124 // similar to `ParseNonSpecialURL` functions in url_parse.h, but with
125 // `trim_path_end` parameter that controls whether to trim path end or not.
126 Parsed ParseNonSpecialURLInternal(std::string_view url, bool trim_path_end);
127 Parsed ParseNonSpecialURLInternal(std::u16string_view url, bool trim_path_end);
128
129 // Given a spec and a pointer to the character after the colon following the
130 // special scheme, this parses it and fills in the structure, Every item in the
131 // parsed structure is filled EXCEPT for the scheme, which is untouched.
132 void ParseAfterSpecialScheme(const char* spec,
133 int spec_len,
134 int after_scheme,
135 Parsed* parsed);
136 void ParseAfterSpecialScheme(const char16_t* spec,
137 int spec_len,
138 int after_scheme,
139 Parsed* parsed);
140
141 // Given a spec and a pointer to the character after the colon following the
142 // non-special scheme, this parses it and fills in the structure, Every item in
143 // the parsed structure is filled EXCEPT for the scheme, which is untouched.
144 void ParseAfterNonSpecialScheme(const char* spec,
145 int spec_len,
146 int after_scheme,
147 Parsed* parsed);
148 void ParseAfterNonSpecialScheme(const char16_t* spec,
149 int spec_len,
150 int after_scheme,
151 Parsed* parsed);
152
153 } // namespace url
154
155 #endif // URL_URL_PARSE_INTERNAL_H_
156