• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2015 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <grpc/support/port_platform.h>
18 
19 #include "src/core/lib/uri/uri_parser.h"
20 
21 #include <ctype.h>
22 #include <stddef.h>
23 
24 #include <algorithm>
25 #include <functional>
26 #include <map>
27 #include <string>
28 #include <utility>
29 
30 #include "absl/status/status.h"
31 #include "absl/strings/ascii.h"
32 #include "absl/strings/escaping.h"
33 #include "absl/strings/match.h"
34 #include "absl/strings/str_cat.h"
35 #include "absl/strings/str_format.h"
36 #include "absl/strings/str_join.h"
37 #include "absl/strings/str_split.h"
38 #include "absl/strings/strip.h"
39 
40 #include <grpc/support/log.h>
41 
42 namespace grpc_core {
43 
44 namespace {
45 
46 // Returns true for any sub-delim character, as defined in:
47 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
IsSubDelimChar(char c)48 bool IsSubDelimChar(char c) {
49   switch (c) {
50     case '!':
51     case '$':
52     case '&':
53     case '\'':
54     case '(':
55     case ')':
56     case '*':
57     case '+':
58     case ',':
59     case ';':
60     case '=':
61       return true;
62   }
63   return false;
64 }
65 
66 // Returns true for any unreserved character, as defined in:
67 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
IsUnreservedChar(char c)68 bool IsUnreservedChar(char c) {
69   if (absl::ascii_isalnum(c)) return true;
70   switch (c) {
71     case '-':
72     case '.':
73     case '_':
74     case '~':
75       return true;
76   }
77   return false;
78 }
79 
80 // Returns true for any character in scheme, as defined in:
81 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
IsSchemeChar(char c)82 bool IsSchemeChar(char c) {
83   if (absl::ascii_isalnum(c)) return true;
84   switch (c) {
85     case '+':
86     case '-':
87     case '.':
88       return true;
89   }
90   return false;
91 }
92 
93 // Returns true for any character in authority, as defined in:
94 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
IsAuthorityChar(char c)95 bool IsAuthorityChar(char c) {
96   if (IsUnreservedChar(c)) return true;
97   if (IsSubDelimChar(c)) return true;
98   switch (c) {
99     case ':':
100     case '[':
101     case ']':
102     case '@':
103       return true;
104   }
105   return false;
106 }
107 
108 // Returns true for any character in pchar, as defined in:
109 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPChar(char c)110 bool IsPChar(char c) {
111   if (IsUnreservedChar(c)) return true;
112   if (IsSubDelimChar(c)) return true;
113   switch (c) {
114     case ':':
115     case '@':
116       return true;
117   }
118   return false;
119 }
120 
121 // Returns true for any character allowed in a URI path, as defined in:
122 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPathChar(char c)123 bool IsPathChar(char c) { return IsPChar(c) || c == '/'; }
124 
125 // Returns true for any character allowed in a URI query or fragment,
126 // as defined in:
127 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentChar(char c)128 bool IsQueryOrFragmentChar(char c) {
129   return IsPChar(c) || c == '/' || c == '?';
130 }
131 
132 // Same as IsQueryOrFragmentChar(), but excludes '&' and '='.
IsQueryKeyOrValueChar(char c)133 bool IsQueryKeyOrValueChar(char c) {
134   return c != '&' && c != '=' && IsQueryOrFragmentChar(c);
135 }
136 
137 // Returns a copy of str, percent-encoding any character for which
138 // is_allowed_char() returns false.
PercentEncode(absl::string_view str,std::function<bool (char)> is_allowed_char)139 std::string PercentEncode(absl::string_view str,
140                           std::function<bool(char)> is_allowed_char) {
141   std::string out;
142   for (char c : str) {
143     if (!is_allowed_char(c)) {
144       std::string hex = absl::BytesToHexString(absl::string_view(&c, 1));
145       GPR_ASSERT(hex.size() == 2);
146       // BytesToHexString() returns lower case, but
147       // https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.1 says
148       // to prefer upper-case.
149       absl::AsciiStrToUpper(&hex);
150       out.push_back('%');
151       out.append(hex);
152     } else {
153       out.push_back(c);
154     }
155   }
156   return out;
157 }
158 
159 // Checks if this string is made up of query/fragment chars and '%' exclusively.
160 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentString(absl::string_view str)161 bool IsQueryOrFragmentString(absl::string_view str) {
162   for (char c : str) {
163     if (!IsQueryOrFragmentChar(c) && c != '%') return false;
164   }
165   return true;
166 }
167 
MakeInvalidURIStatus(absl::string_view part_name,absl::string_view uri,absl::string_view extra)168 absl::Status MakeInvalidURIStatus(absl::string_view part_name,
169                                   absl::string_view uri,
170                                   absl::string_view extra) {
171   return absl::InvalidArgumentError(absl::StrFormat(
172       "Could not parse '%s' from uri '%s'. %s", part_name, uri, extra));
173 }
174 
175 }  // namespace
176 
PercentEncodeAuthority(absl::string_view str)177 std::string URI::PercentEncodeAuthority(absl::string_view str) {
178   return PercentEncode(str, IsAuthorityChar);
179 }
180 
PercentEncodePath(absl::string_view str)181 std::string URI::PercentEncodePath(absl::string_view str) {
182   return PercentEncode(str, IsPathChar);
183 }
184 
185 // Similar to `grpc_permissive_percent_decode_slice`, this %-decodes all valid
186 // triplets, and passes through the rest verbatim.
PercentDecode(absl::string_view str)187 std::string URI::PercentDecode(absl::string_view str) {
188   if (str.empty() || !absl::StrContains(str, "%")) {
189     return std::string(str);
190   }
191   std::string out;
192   std::string unescaped;
193   out.reserve(str.size());
194   for (size_t i = 0; i < str.length(); i++) {
195     unescaped = "";
196     if (str[i] == '%' && i + 3 <= str.length() &&
197         absl::CUnescape(absl::StrCat("\\x", str.substr(i + 1, 2)),
198                         &unescaped) &&
199         unescaped.length() == 1) {
200       out += unescaped[0];
201       i += 2;
202     } else {
203       out += str[i];
204     }
205   }
206   return out;
207 }
208 
Parse(absl::string_view uri_text)209 absl::StatusOr<URI> URI::Parse(absl::string_view uri_text) {
210   absl::string_view remaining = uri_text;
211   // parse scheme
212   size_t offset = remaining.find(':');
213   if (offset == remaining.npos || offset == 0) {
214     return MakeInvalidURIStatus("scheme", uri_text, "Scheme not found.");
215   }
216   std::string scheme(remaining.substr(0, offset));
217   if (scheme.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
218                                "abcdefghijklmnopqrstuvwxyz"
219                                "0123456789+-.") != std::string::npos) {
220     return MakeInvalidURIStatus("scheme", uri_text,
221                                 "Scheme contains invalid characters.");
222   }
223   if (!isalpha(scheme[0])) {
224     return MakeInvalidURIStatus(
225         "scheme", uri_text,
226         "Scheme must begin with an alpha character [A-Za-z].");
227   }
228   remaining.remove_prefix(offset + 1);
229   // parse authority
230   std::string authority;
231   if (absl::ConsumePrefix(&remaining, "//")) {
232     offset = remaining.find_first_of("/?#");
233     authority = PercentDecode(remaining.substr(0, offset));
234     if (offset == remaining.npos) {
235       remaining = "";
236     } else {
237       remaining.remove_prefix(offset);
238     }
239   }
240   // parse path
241   std::string path;
242   if (!remaining.empty()) {
243     offset = remaining.find_first_of("?#");
244     path = PercentDecode(remaining.substr(0, offset));
245     if (offset == remaining.npos) {
246       remaining = "";
247     } else {
248       remaining.remove_prefix(offset);
249     }
250   }
251   // parse query
252   std::vector<QueryParam> query_param_pairs;
253   if (absl::ConsumePrefix(&remaining, "?")) {
254     offset = remaining.find('#');
255     absl::string_view tmp_query = remaining.substr(0, offset);
256     if (tmp_query.empty()) {
257       return MakeInvalidURIStatus("query", uri_text, "Invalid query string.");
258     }
259     if (!IsQueryOrFragmentString(tmp_query)) {
260       return MakeInvalidURIStatus("query string", uri_text,
261                                   "Query string contains invalid characters.");
262     }
263     for (absl::string_view query_param : absl::StrSplit(tmp_query, '&')) {
264       const std::pair<absl::string_view, absl::string_view> possible_kv =
265           absl::StrSplit(query_param, absl::MaxSplits('=', 1));
266       if (possible_kv.first.empty()) continue;
267       query_param_pairs.push_back({PercentDecode(possible_kv.first),
268                                    PercentDecode(possible_kv.second)});
269     }
270     if (offset == remaining.npos) {
271       remaining = "";
272     } else {
273       remaining.remove_prefix(offset);
274     }
275   }
276   std::string fragment;
277   if (absl::ConsumePrefix(&remaining, "#")) {
278     if (!IsQueryOrFragmentString(remaining)) {
279       return MakeInvalidURIStatus("fragment", uri_text,
280                                   "Fragment contains invalid characters.");
281     }
282     fragment = PercentDecode(remaining);
283   }
284   return URI(std::move(scheme), std::move(authority), std::move(path),
285              std::move(query_param_pairs), std::move(fragment));
286 }
287 
Create(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)288 absl::StatusOr<URI> URI::Create(std::string scheme, std::string authority,
289                                 std::string path,
290                                 std::vector<QueryParam> query_parameter_pairs,
291                                 std::string fragment) {
292   if (!authority.empty() && !path.empty() && path[0] != '/') {
293     return absl::InvalidArgumentError(
294         "if authority is present, path must start with a '/'");
295   }
296   return URI(std::move(scheme), std::move(authority), std::move(path),
297              std::move(query_parameter_pairs), std::move(fragment));
298 }
299 
URI(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)300 URI::URI(std::string scheme, std::string authority, std::string path,
301          std::vector<QueryParam> query_parameter_pairs, std::string fragment)
302     : scheme_(std::move(scheme)),
303       authority_(std::move(authority)),
304       path_(std::move(path)),
305       query_parameter_pairs_(std::move(query_parameter_pairs)),
306       fragment_(std::move(fragment)) {
307   for (const auto& kv : query_parameter_pairs_) {
308     query_parameter_map_[kv.key] = kv.value;
309   }
310 }
311 
URI(const URI & other)312 URI::URI(const URI& other)
313     : scheme_(other.scheme_),
314       authority_(other.authority_),
315       path_(other.path_),
316       query_parameter_pairs_(other.query_parameter_pairs_),
317       fragment_(other.fragment_) {
318   for (const auto& kv : query_parameter_pairs_) {
319     query_parameter_map_[kv.key] = kv.value;
320   }
321 }
322 
operator =(const URI & other)323 URI& URI::operator=(const URI& other) {
324   if (this == &other) {
325     return *this;
326   }
327   scheme_ = other.scheme_;
328   authority_ = other.authority_;
329   path_ = other.path_;
330   query_parameter_pairs_ = other.query_parameter_pairs_;
331   fragment_ = other.fragment_;
332   for (const auto& kv : query_parameter_pairs_) {
333     query_parameter_map_[kv.key] = kv.value;
334   }
335   return *this;
336 }
337 
338 namespace {
339 
340 // A pair formatter for use with absl::StrJoin() for formatting query params.
341 struct QueryParameterFormatter {
operator ()grpc_core::__anon430cfb3d0211::QueryParameterFormatter342   void operator()(std::string* out, const URI::QueryParam& query_param) const {
343     out->append(
344         absl::StrCat(PercentEncode(query_param.key, IsQueryKeyOrValueChar), "=",
345                      PercentEncode(query_param.value, IsQueryKeyOrValueChar)));
346   }
347 };
348 
349 }  // namespace
350 
ToString() const351 std::string URI::ToString() const {
352   std::vector<std::string> parts = {PercentEncode(scheme_, IsSchemeChar), ":"};
353   if (!authority_.empty()) {
354     parts.emplace_back("//");
355     parts.emplace_back(PercentEncode(authority_, IsAuthorityChar));
356   }
357   if (!path_.empty()) {
358     parts.emplace_back(PercentEncode(path_, IsPathChar));
359   }
360   if (!query_parameter_pairs_.empty()) {
361     parts.push_back("?");
362     parts.push_back(
363         absl::StrJoin(query_parameter_pairs_, "&", QueryParameterFormatter()));
364   }
365   if (!fragment_.empty()) {
366     parts.push_back("#");
367     parts.push_back(PercentEncode(fragment_, IsQueryOrFragmentChar));
368   }
369   return absl::StrJoin(parts, "");
370 }
371 
372 }  // namespace grpc_core
373