• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifdef UNSAFE_BUFFERS_BUILD
6 // TODO(crbug.com/350788890): Remove this and spanify to fix the errors.
7 #pragma allow_unsafe_buffers
8 #endif
9 
10 #ifndef URL_URL_PARSE_INTERNAL_H_
11 #define URL_URL_PARSE_INTERNAL_H_
12 
13 // Contains common inline helper functions used by the URL parsing routines.
14 
15 #include "url/third_party/mozilla/url_parse.h"
16 
17 namespace url {
18 
19 // A helper function to handle a URL separator, which is '/' or '\'.
20 //
21 // The motivation: There are many condition checks in URL Standard like the
22 // following:
23 //
24 // > If url is special and c is U+002F (/) or U+005C (\), ...
IsSlashOrBackslash(char16_t ch)25 inline bool IsSlashOrBackslash(char16_t ch) {
26   return ch == '/' || ch == '\\';
27 }
IsSlashOrBackslash(char ch)28 inline bool IsSlashOrBackslash(char ch) {
29   return IsSlashOrBackslash(static_cast<char16_t>(ch));
30 }
31 
32 // Returns true if we should trim this character from the URL because it is a
33 // space or a control character.
ShouldTrimFromURL(char16_t ch)34 inline bool ShouldTrimFromURL(char16_t ch) {
35   return ch <= ' ';
36 }
ShouldTrimFromURL(char ch)37 inline bool ShouldTrimFromURL(char ch) {
38   return ShouldTrimFromURL(static_cast<char16_t>(ch));
39 }
40 
41 // Given an already-initialized begin index and length, this shrinks the range
42 // to eliminate "should-be-trimmed" characters. Note that the length does *not*
43 // indicate the length of untrimmed data from |*begin|, but rather the position
44 // in the input string (so the string starts at character |*begin| in the spec,
45 // and goes until |*len|).
46 template<typename CHAR>
47 inline void TrimURL(const CHAR* spec, int* begin, int* len,
48                     bool trim_path_end = true) {
49   // Strip leading whitespace and control characters.
50   while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
51     (*begin)++;
52 
53   if (trim_path_end) {
54     // Strip trailing whitespace and control characters. We need the >i test
55     // for when the input string is all blanks; we don't want to back past the
56     // input.
57     while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
58       (*len)--;
59   }
60 }
61 
62 // Counts the number of consecutive slashes or backslashes starting at the given
63 // offset in the given string of the given length. A slash and backslash can be
64 // mixed.
65 //
66 // TODO(crbug.com/40063064): Rename this function to
67 // `CountConsecutiveSlashesOrBackslashes`.
68 template <typename CHAR>
CountConsecutiveSlashes(const CHAR * str,int begin_offset,int str_len)69 inline int CountConsecutiveSlashes(const CHAR* str,
70                                    int begin_offset,
71                                    int str_len) {
72   int count = 0;
73   while (begin_offset + count < str_len &&
74          IsSlashOrBackslash(str[begin_offset + count])) {
75     ++count;
76   }
77   return count;
78 }
79 
80 // Returns true if char is a slash.
IsSlash(char16_t ch)81 inline bool IsSlash(char16_t ch) {
82   return ch == '/';
83 }
IsSlash(char ch)84 inline bool IsSlash(char ch) {
85   return IsSlash(static_cast<char16_t>(ch));
86 }
87 
88 // Counts the number of consecutive slashes starting at the given offset
89 // in the given string of the given length.
90 //
91 // TODO(crbug.com/40063064): Rename this function to
92 // `CountConsecutiveSlashes` after the current `CountConsecutiveSlashes` is
93 // renamed to CountConsecutiveSlashesOrBackslashes`.
94 template <typename CHAR>
CountConsecutiveSlashesButNotCountBackslashes(const CHAR * str,int begin_offset,int str_len)95 inline int CountConsecutiveSlashesButNotCountBackslashes(const CHAR* str,
96                                                          int begin_offset,
97                                                          int str_len) {
98   int count = 0;
99   while (begin_offset + count < str_len && IsSlash(str[begin_offset + count])) {
100     ++count;
101   }
102   return count;
103 }
104 
105 // Internal functions in url_parse.cc that parse the path, that is, everything
106 // following the authority section. The input is the range of everything
107 // following the authority section, and the output is the identified ranges.
108 //
109 // This is designed for the file URL parser or other consumers who may do
110 // special stuff at the beginning, but want regular path parsing, it just
111 // maps to the internal parsing function for paths.
112 void ParsePathInternal(const char* spec,
113                        const Component& path,
114                        Component* filepath,
115                        Component* query,
116                        Component* ref);
117 void ParsePathInternal(const char16_t* spec,
118                        const Component& path,
119                        Component* filepath,
120                        Component* query,
121                        Component* ref);
122 
123 // Internal functions in url_parse.cc that parse non-special URLs, which are
124 // similar to `ParseNonSpecialURL` functions in url_parse.h, but with
125 // `trim_path_end` parameter that controls whether to trim path end or not.
126 Parsed ParseNonSpecialURLInternal(std::string_view url, bool trim_path_end);
127 Parsed ParseNonSpecialURLInternal(std::u16string_view url, bool trim_path_end);
128 
129 // Given a spec and a pointer to the character after the colon following the
130 // special scheme, this parses it and fills in the structure, Every item in the
131 // parsed structure is filled EXCEPT for the scheme, which is untouched.
132 void ParseAfterSpecialScheme(const char* spec,
133                              int spec_len,
134                              int after_scheme,
135                              Parsed* parsed);
136 void ParseAfterSpecialScheme(const char16_t* spec,
137                              int spec_len,
138                              int after_scheme,
139                              Parsed* parsed);
140 
141 // Given a spec and a pointer to the character after the colon following the
142 // non-special scheme, this parses it and fills in the structure, Every item in
143 // the parsed structure is filled EXCEPT for the scheme, which is untouched.
144 void ParseAfterNonSpecialScheme(const char* spec,
145                                 int spec_len,
146                                 int after_scheme,
147                                 Parsed* parsed);
148 void ParseAfterNonSpecialScheme(const char16_t* spec,
149                                 int spec_len,
150                                 int after_scheme,
151                                 Parsed* parsed);
152 
153 }  // namespace url
154 
155 #endif  // URL_URL_PARSE_INTERNAL_H_
156