• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2015 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "src/core/util/uri.h"
18 
19 #include <ctype.h>
20 #include <grpc/support/port_platform.h>
21 #include <stddef.h>
22 
23 #include <algorithm>
24 #include <functional>
25 #include <map>
26 #include <string>
27 #include <utility>
28 
29 #include "absl/log/check.h"
30 #include "absl/status/status.h"
31 #include "absl/strings/ascii.h"
32 #include "absl/strings/escaping.h"
33 #include "absl/strings/match.h"
34 #include "absl/strings/str_cat.h"
35 #include "absl/strings/str_format.h"
36 #include "absl/strings/str_join.h"
37 #include "absl/strings/str_split.h"
38 #include "absl/strings/strip.h"
39 
40 namespace grpc_core {
41 
42 namespace {
43 
44 // Returns true for any sub-delim character, as defined in:
45 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
IsSubDelimChar(char c)46 bool IsSubDelimChar(char c) {
47   switch (c) {
48     case '!':
49     case '$':
50     case '&':
51     case '\'':
52     case '(':
53     case ')':
54     case '*':
55     case '+':
56     case ',':
57     case ';':
58     case '=':
59       return true;
60   }
61   return false;
62 }
63 
64 // Returns true for any unreserved character, as defined in:
65 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
IsUnreservedChar(char c)66 bool IsUnreservedChar(char c) {
67   if (absl::ascii_isalnum(c)) return true;
68   switch (c) {
69     case '-':
70     case '.':
71     case '_':
72     case '~':
73       return true;
74   }
75   return false;
76 }
77 
78 // Returns true for any character in scheme, as defined in:
79 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
IsSchemeChar(char c)80 bool IsSchemeChar(char c) {
81   if (absl::ascii_isalnum(c)) return true;
82   switch (c) {
83     case '+':
84     case '-':
85     case '.':
86       return true;
87   }
88   return false;
89 }
90 
91 // Returns true for any character in authority, as defined in:
92 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
IsAuthorityChar(char c)93 bool IsAuthorityChar(char c) {
94   if (IsUnreservedChar(c)) return true;
95   if (IsSubDelimChar(c)) return true;
96   switch (c) {
97     case ':':
98     case '[':
99     case ']':
100     case '@':
101       return true;
102   }
103   return false;
104 }
105 
106 // Returns true for any character in pchar, as defined in:
107 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPChar(char c)108 bool IsPChar(char c) {
109   if (IsUnreservedChar(c)) return true;
110   if (IsSubDelimChar(c)) return true;
111   switch (c) {
112     case ':':
113     case '@':
114       return true;
115   }
116   return false;
117 }
118 
119 // Returns true for any character allowed in a URI path, as defined in:
120 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPathChar(char c)121 bool IsPathChar(char c) { return IsPChar(c) || c == '/'; }
122 
123 // Returns true for any character allowed in a URI query or fragment,
124 // as defined in:
125 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentChar(char c)126 bool IsQueryOrFragmentChar(char c) {
127   return IsPChar(c) || c == '/' || c == '?';
128 }
129 
130 // Same as IsQueryOrFragmentChar(), but excludes '&' and '='.
IsQueryKeyOrValueChar(char c)131 bool IsQueryKeyOrValueChar(char c) {
132   return c != '&' && c != '=' && IsQueryOrFragmentChar(c);
133 }
134 
135 // Returns a copy of str, percent-encoding any character for which
136 // is_allowed_char() returns false.
PercentEncode(absl::string_view str,std::function<bool (char)> is_allowed_char)137 std::string PercentEncode(absl::string_view str,
138                           std::function<bool(char)> is_allowed_char) {
139   std::string out;
140   for (char c : str) {
141     if (!is_allowed_char(c)) {
142       std::string hex = absl::BytesToHexString(absl::string_view(&c, 1));
143       CHECK_EQ(hex.size(), 2u);
144       // BytesToHexString() returns lower case, but
145       // https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.1 says
146       // to prefer upper-case.
147       absl::AsciiStrToUpper(&hex);
148       out.push_back('%');
149       out.append(hex);
150     } else {
151       out.push_back(c);
152     }
153   }
154   return out;
155 }
156 
157 // Checks if this string is made up of query/fragment chars and '%' exclusively.
158 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentString(absl::string_view str)159 bool IsQueryOrFragmentString(absl::string_view str) {
160   for (char c : str) {
161     if (!IsQueryOrFragmentChar(c) && c != '%') return false;
162   }
163   return true;
164 }
165 
MakeInvalidURIStatus(absl::string_view part_name,absl::string_view uri,absl::string_view extra)166 absl::Status MakeInvalidURIStatus(absl::string_view part_name,
167                                   absl::string_view uri,
168                                   absl::string_view extra) {
169   return absl::InvalidArgumentError(absl::StrFormat(
170       "Could not parse '%s' from uri '%s'. %s", part_name, uri, extra));
171 }
172 
173 }  // namespace
174 
PercentEncodeAuthority(absl::string_view str)175 std::string URI::PercentEncodeAuthority(absl::string_view str) {
176   return PercentEncode(str, IsAuthorityChar);
177 }
178 
PercentEncodePath(absl::string_view str)179 std::string URI::PercentEncodePath(absl::string_view str) {
180   return PercentEncode(str, IsPathChar);
181 }
182 
183 // Similar to `grpc_permissive_percent_decode_slice`, this %-decodes all valid
184 // triplets, and passes through the rest verbatim.
PercentDecode(absl::string_view str)185 std::string URI::PercentDecode(absl::string_view str) {
186   if (str.empty() || !absl::StrContains(str, "%")) {
187     return std::string(str);
188   }
189   std::string out;
190   std::string unescaped;
191   out.reserve(str.size());
192   for (size_t i = 0; i < str.length(); i++) {
193     unescaped = "";
194     if (str[i] == '%' && i + 3 <= str.length() &&
195         absl::CUnescape(absl::StrCat("\\x", str.substr(i + 1, 2)),
196                         &unescaped) &&
197         unescaped.length() == 1) {
198       out += unescaped[0];
199       i += 2;
200     } else {
201       out += str[i];
202     }
203   }
204   return out;
205 }
206 
Parse(absl::string_view uri_text)207 absl::StatusOr<URI> URI::Parse(absl::string_view uri_text) {
208   absl::string_view remaining = uri_text;
209   // parse scheme
210   size_t offset = remaining.find(':');
211   if (offset == remaining.npos || offset == 0) {
212     return MakeInvalidURIStatus("scheme", uri_text, "Scheme not found.");
213   }
214   std::string scheme(remaining.substr(0, offset));
215   if (scheme.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
216                                "abcdefghijklmnopqrstuvwxyz"
217                                "0123456789+-.") != std::string::npos) {
218     return MakeInvalidURIStatus("scheme", uri_text,
219                                 "Scheme contains invalid characters.");
220   }
221   if (!isalpha(scheme[0])) {
222     return MakeInvalidURIStatus(
223         "scheme", uri_text,
224         "Scheme must begin with an alpha character [A-Za-z].");
225   }
226   remaining.remove_prefix(offset + 1);
227   // parse authority
228   std::string authority;
229   if (absl::ConsumePrefix(&remaining, "//")) {
230     offset = remaining.find_first_of("/?#");
231     authority = PercentDecode(remaining.substr(0, offset));
232     if (offset == remaining.npos) {
233       remaining = "";
234     } else {
235       remaining.remove_prefix(offset);
236     }
237   }
238   // parse path
239   std::string path;
240   if (!remaining.empty()) {
241     offset = remaining.find_first_of("?#");
242     path = PercentDecode(remaining.substr(0, offset));
243     if (offset == remaining.npos) {
244       remaining = "";
245     } else {
246       remaining.remove_prefix(offset);
247     }
248   }
249   // parse query
250   std::vector<QueryParam> query_param_pairs;
251   if (absl::ConsumePrefix(&remaining, "?")) {
252     offset = remaining.find('#');
253     absl::string_view tmp_query = remaining.substr(0, offset);
254     if (tmp_query.empty()) {
255       return MakeInvalidURIStatus("query", uri_text, "Invalid query string.");
256     }
257     if (!IsQueryOrFragmentString(tmp_query)) {
258       return MakeInvalidURIStatus("query string", uri_text,
259                                   "Query string contains invalid characters.");
260     }
261     for (absl::string_view query_param : absl::StrSplit(tmp_query, '&')) {
262       const std::pair<absl::string_view, absl::string_view> possible_kv =
263           absl::StrSplit(query_param, absl::MaxSplits('=', 1));
264       if (possible_kv.first.empty()) continue;
265       query_param_pairs.push_back({PercentDecode(possible_kv.first),
266                                    PercentDecode(possible_kv.second)});
267     }
268     if (offset == remaining.npos) {
269       remaining = "";
270     } else {
271       remaining.remove_prefix(offset);
272     }
273   }
274   std::string fragment;
275   if (absl::ConsumePrefix(&remaining, "#")) {
276     if (!IsQueryOrFragmentString(remaining)) {
277       return MakeInvalidURIStatus("fragment", uri_text,
278                                   "Fragment contains invalid characters.");
279     }
280     fragment = PercentDecode(remaining);
281   }
282   return URI(std::move(scheme), std::move(authority), std::move(path),
283              std::move(query_param_pairs), std::move(fragment));
284 }
285 
Create(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)286 absl::StatusOr<URI> URI::Create(std::string scheme, std::string authority,
287                                 std::string path,
288                                 std::vector<QueryParam> query_parameter_pairs,
289                                 std::string fragment) {
290   if (!authority.empty() && !path.empty() && path[0] != '/') {
291     return absl::InvalidArgumentError(
292         "if authority is present, path must start with a '/'");
293   }
294   return URI(std::move(scheme), std::move(authority), std::move(path),
295              std::move(query_parameter_pairs), std::move(fragment));
296 }
297 
URI(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)298 URI::URI(std::string scheme, std::string authority, std::string path,
299          std::vector<QueryParam> query_parameter_pairs, std::string fragment)
300     : scheme_(std::move(scheme)),
301       authority_(std::move(authority)),
302       path_(std::move(path)),
303       query_parameter_pairs_(std::move(query_parameter_pairs)),
304       fragment_(std::move(fragment)) {
305   for (const auto& kv : query_parameter_pairs_) {
306     query_parameter_map_[kv.key] = kv.value;
307   }
308 }
309 
URI(const URI & other)310 URI::URI(const URI& other)
311     : scheme_(other.scheme_),
312       authority_(other.authority_),
313       path_(other.path_),
314       query_parameter_pairs_(other.query_parameter_pairs_),
315       fragment_(other.fragment_) {
316   for (const auto& kv : query_parameter_pairs_) {
317     query_parameter_map_[kv.key] = kv.value;
318   }
319 }
320 
operator =(const URI & other)321 URI& URI::operator=(const URI& other) {
322   if (this == &other) {
323     return *this;
324   }
325   scheme_ = other.scheme_;
326   authority_ = other.authority_;
327   path_ = other.path_;
328   query_parameter_pairs_ = other.query_parameter_pairs_;
329   fragment_ = other.fragment_;
330   for (const auto& kv : query_parameter_pairs_) {
331     query_parameter_map_[kv.key] = kv.value;
332   }
333   return *this;
334 }
335 
336 namespace {
337 
338 // A pair formatter for use with absl::StrJoin() for formatting query params.
339 struct QueryParameterFormatter {
operator ()grpc_core::__anon47292c7f0211::QueryParameterFormatter340   void operator()(std::string* out, const URI::QueryParam& query_param) const {
341     out->append(
342         absl::StrCat(PercentEncode(query_param.key, IsQueryKeyOrValueChar), "=",
343                      PercentEncode(query_param.value, IsQueryKeyOrValueChar)));
344   }
345 };
346 
347 }  // namespace
348 
ToString() const349 std::string URI::ToString() const {
350   std::vector<std::string> parts = {PercentEncode(scheme_, IsSchemeChar), ":"};
351   if (!authority_.empty()) {
352     parts.emplace_back("//");
353     parts.emplace_back(PercentEncode(authority_, IsAuthorityChar));
354   }
355   parts.emplace_back(EncodedPathAndQueryParams());
356   if (!fragment_.empty()) {
357     parts.push_back("#");
358     parts.push_back(PercentEncode(fragment_, IsQueryOrFragmentChar));
359   }
360   return absl::StrJoin(parts, "");
361 }
362 
EncodedPathAndQueryParams() const363 std::string URI::EncodedPathAndQueryParams() const {
364   std::vector<std::string> parts;
365   if (!path_.empty()) {
366     parts.emplace_back(PercentEncode(path_, IsPathChar));
367   }
368   if (!query_parameter_pairs_.empty()) {
369     parts.push_back("?");
370     parts.push_back(
371         absl::StrJoin(query_parameter_pairs_, "&", QueryParameterFormatter()));
372   }
373   return absl::StrJoin(parts, "");
374 }
375 
376 }  // namespace grpc_core
377