1 //
2 // Copyright 2015 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "src/core/util/uri.h"
18
19 #include <ctype.h>
20 #include <grpc/support/port_platform.h>
21 #include <stddef.h>
22
23 #include <algorithm>
24 #include <functional>
25 #include <map>
26 #include <string>
27 #include <utility>
28
29 #include "absl/log/check.h"
30 #include "absl/status/status.h"
31 #include "absl/strings/ascii.h"
32 #include "absl/strings/escaping.h"
33 #include "absl/strings/match.h"
34 #include "absl/strings/str_cat.h"
35 #include "absl/strings/str_format.h"
36 #include "absl/strings/str_join.h"
37 #include "absl/strings/str_split.h"
38 #include "absl/strings/strip.h"
39
40 namespace grpc_core {
41
42 namespace {
43
44 // Returns true for any sub-delim character, as defined in:
45 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
IsSubDelimChar(char c)46 bool IsSubDelimChar(char c) {
47 switch (c) {
48 case '!':
49 case '$':
50 case '&':
51 case '\'':
52 case '(':
53 case ')':
54 case '*':
55 case '+':
56 case ',':
57 case ';':
58 case '=':
59 return true;
60 }
61 return false;
62 }
63
64 // Returns true for any unreserved character, as defined in:
65 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
IsUnreservedChar(char c)66 bool IsUnreservedChar(char c) {
67 if (absl::ascii_isalnum(c)) return true;
68 switch (c) {
69 case '-':
70 case '.':
71 case '_':
72 case '~':
73 return true;
74 }
75 return false;
76 }
77
78 // Returns true for any character in scheme, as defined in:
79 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
IsSchemeChar(char c)80 bool IsSchemeChar(char c) {
81 if (absl::ascii_isalnum(c)) return true;
82 switch (c) {
83 case '+':
84 case '-':
85 case '.':
86 return true;
87 }
88 return false;
89 }
90
91 // Returns true for any character in authority, as defined in:
92 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
IsAuthorityChar(char c)93 bool IsAuthorityChar(char c) {
94 if (IsUnreservedChar(c)) return true;
95 if (IsSubDelimChar(c)) return true;
96 switch (c) {
97 case ':':
98 case '[':
99 case ']':
100 case '@':
101 return true;
102 }
103 return false;
104 }
105
106 // Returns true for any character in pchar, as defined in:
107 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPChar(char c)108 bool IsPChar(char c) {
109 if (IsUnreservedChar(c)) return true;
110 if (IsSubDelimChar(c)) return true;
111 switch (c) {
112 case ':':
113 case '@':
114 return true;
115 }
116 return false;
117 }
118
119 // Returns true for any character allowed in a URI path, as defined in:
120 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPathChar(char c)121 bool IsPathChar(char c) { return IsPChar(c) || c == '/'; }
122
123 // Returns true for any character allowed in a URI query or fragment,
124 // as defined in:
125 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentChar(char c)126 bool IsQueryOrFragmentChar(char c) {
127 return IsPChar(c) || c == '/' || c == '?';
128 }
129
130 // Same as IsQueryOrFragmentChar(), but excludes '&' and '='.
IsQueryKeyOrValueChar(char c)131 bool IsQueryKeyOrValueChar(char c) {
132 return c != '&' && c != '=' && IsQueryOrFragmentChar(c);
133 }
134
135 // Returns a copy of str, percent-encoding any character for which
136 // is_allowed_char() returns false.
PercentEncode(absl::string_view str,std::function<bool (char)> is_allowed_char)137 std::string PercentEncode(absl::string_view str,
138 std::function<bool(char)> is_allowed_char) {
139 std::string out;
140 for (char c : str) {
141 if (!is_allowed_char(c)) {
142 std::string hex = absl::BytesToHexString(absl::string_view(&c, 1));
143 CHECK_EQ(hex.size(), 2u);
144 // BytesToHexString() returns lower case, but
145 // https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.1 says
146 // to prefer upper-case.
147 absl::AsciiStrToUpper(&hex);
148 out.push_back('%');
149 out.append(hex);
150 } else {
151 out.push_back(c);
152 }
153 }
154 return out;
155 }
156
157 // Checks if this string is made up of query/fragment chars and '%' exclusively.
158 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentString(absl::string_view str)159 bool IsQueryOrFragmentString(absl::string_view str) {
160 for (char c : str) {
161 if (!IsQueryOrFragmentChar(c) && c != '%') return false;
162 }
163 return true;
164 }
165
MakeInvalidURIStatus(absl::string_view part_name,absl::string_view uri,absl::string_view extra)166 absl::Status MakeInvalidURIStatus(absl::string_view part_name,
167 absl::string_view uri,
168 absl::string_view extra) {
169 return absl::InvalidArgumentError(absl::StrFormat(
170 "Could not parse '%s' from uri '%s'. %s", part_name, uri, extra));
171 }
172
173 } // namespace
174
PercentEncodeAuthority(absl::string_view str)175 std::string URI::PercentEncodeAuthority(absl::string_view str) {
176 return PercentEncode(str, IsAuthorityChar);
177 }
178
PercentEncodePath(absl::string_view str)179 std::string URI::PercentEncodePath(absl::string_view str) {
180 return PercentEncode(str, IsPathChar);
181 }
182
183 // Similar to `grpc_permissive_percent_decode_slice`, this %-decodes all valid
184 // triplets, and passes through the rest verbatim.
PercentDecode(absl::string_view str)185 std::string URI::PercentDecode(absl::string_view str) {
186 if (str.empty() || !absl::StrContains(str, "%")) {
187 return std::string(str);
188 }
189 std::string out;
190 std::string unescaped;
191 out.reserve(str.size());
192 for (size_t i = 0; i < str.length(); i++) {
193 unescaped = "";
194 if (str[i] == '%' && i + 3 <= str.length() &&
195 absl::CUnescape(absl::StrCat("\\x", str.substr(i + 1, 2)),
196 &unescaped) &&
197 unescaped.length() == 1) {
198 out += unescaped[0];
199 i += 2;
200 } else {
201 out += str[i];
202 }
203 }
204 return out;
205 }
206
Parse(absl::string_view uri_text)207 absl::StatusOr<URI> URI::Parse(absl::string_view uri_text) {
208 absl::string_view remaining = uri_text;
209 // parse scheme
210 size_t offset = remaining.find(':');
211 if (offset == remaining.npos || offset == 0) {
212 return MakeInvalidURIStatus("scheme", uri_text, "Scheme not found.");
213 }
214 std::string scheme(remaining.substr(0, offset));
215 if (scheme.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
216 "abcdefghijklmnopqrstuvwxyz"
217 "0123456789+-.") != std::string::npos) {
218 return MakeInvalidURIStatus("scheme", uri_text,
219 "Scheme contains invalid characters.");
220 }
221 if (!isalpha(scheme[0])) {
222 return MakeInvalidURIStatus(
223 "scheme", uri_text,
224 "Scheme must begin with an alpha character [A-Za-z].");
225 }
226 remaining.remove_prefix(offset + 1);
227 // parse authority
228 std::string authority;
229 if (absl::ConsumePrefix(&remaining, "//")) {
230 offset = remaining.find_first_of("/?#");
231 authority = PercentDecode(remaining.substr(0, offset));
232 if (offset == remaining.npos) {
233 remaining = "";
234 } else {
235 remaining.remove_prefix(offset);
236 }
237 }
238 // parse path
239 std::string path;
240 if (!remaining.empty()) {
241 offset = remaining.find_first_of("?#");
242 path = PercentDecode(remaining.substr(0, offset));
243 if (offset == remaining.npos) {
244 remaining = "";
245 } else {
246 remaining.remove_prefix(offset);
247 }
248 }
249 // parse query
250 std::vector<QueryParam> query_param_pairs;
251 if (absl::ConsumePrefix(&remaining, "?")) {
252 offset = remaining.find('#');
253 absl::string_view tmp_query = remaining.substr(0, offset);
254 if (tmp_query.empty()) {
255 return MakeInvalidURIStatus("query", uri_text, "Invalid query string.");
256 }
257 if (!IsQueryOrFragmentString(tmp_query)) {
258 return MakeInvalidURIStatus("query string", uri_text,
259 "Query string contains invalid characters.");
260 }
261 for (absl::string_view query_param : absl::StrSplit(tmp_query, '&')) {
262 const std::pair<absl::string_view, absl::string_view> possible_kv =
263 absl::StrSplit(query_param, absl::MaxSplits('=', 1));
264 if (possible_kv.first.empty()) continue;
265 query_param_pairs.push_back({PercentDecode(possible_kv.first),
266 PercentDecode(possible_kv.second)});
267 }
268 if (offset == remaining.npos) {
269 remaining = "";
270 } else {
271 remaining.remove_prefix(offset);
272 }
273 }
274 std::string fragment;
275 if (absl::ConsumePrefix(&remaining, "#")) {
276 if (!IsQueryOrFragmentString(remaining)) {
277 return MakeInvalidURIStatus("fragment", uri_text,
278 "Fragment contains invalid characters.");
279 }
280 fragment = PercentDecode(remaining);
281 }
282 return URI(std::move(scheme), std::move(authority), std::move(path),
283 std::move(query_param_pairs), std::move(fragment));
284 }
285
Create(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)286 absl::StatusOr<URI> URI::Create(std::string scheme, std::string authority,
287 std::string path,
288 std::vector<QueryParam> query_parameter_pairs,
289 std::string fragment) {
290 if (!authority.empty() && !path.empty() && path[0] != '/') {
291 return absl::InvalidArgumentError(
292 "if authority is present, path must start with a '/'");
293 }
294 return URI(std::move(scheme), std::move(authority), std::move(path),
295 std::move(query_parameter_pairs), std::move(fragment));
296 }
297
URI(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)298 URI::URI(std::string scheme, std::string authority, std::string path,
299 std::vector<QueryParam> query_parameter_pairs, std::string fragment)
300 : scheme_(std::move(scheme)),
301 authority_(std::move(authority)),
302 path_(std::move(path)),
303 query_parameter_pairs_(std::move(query_parameter_pairs)),
304 fragment_(std::move(fragment)) {
305 for (const auto& kv : query_parameter_pairs_) {
306 query_parameter_map_[kv.key] = kv.value;
307 }
308 }
309
URI(const URI & other)310 URI::URI(const URI& other)
311 : scheme_(other.scheme_),
312 authority_(other.authority_),
313 path_(other.path_),
314 query_parameter_pairs_(other.query_parameter_pairs_),
315 fragment_(other.fragment_) {
316 for (const auto& kv : query_parameter_pairs_) {
317 query_parameter_map_[kv.key] = kv.value;
318 }
319 }
320
operator =(const URI & other)321 URI& URI::operator=(const URI& other) {
322 if (this == &other) {
323 return *this;
324 }
325 scheme_ = other.scheme_;
326 authority_ = other.authority_;
327 path_ = other.path_;
328 query_parameter_pairs_ = other.query_parameter_pairs_;
329 fragment_ = other.fragment_;
330 for (const auto& kv : query_parameter_pairs_) {
331 query_parameter_map_[kv.key] = kv.value;
332 }
333 return *this;
334 }
335
336 namespace {
337
338 // A pair formatter for use with absl::StrJoin() for formatting query params.
339 struct QueryParameterFormatter {
operator ()grpc_core::__anon47292c7f0211::QueryParameterFormatter340 void operator()(std::string* out, const URI::QueryParam& query_param) const {
341 out->append(
342 absl::StrCat(PercentEncode(query_param.key, IsQueryKeyOrValueChar), "=",
343 PercentEncode(query_param.value, IsQueryKeyOrValueChar)));
344 }
345 };
346
347 } // namespace
348
ToString() const349 std::string URI::ToString() const {
350 std::vector<std::string> parts = {PercentEncode(scheme_, IsSchemeChar), ":"};
351 if (!authority_.empty()) {
352 parts.emplace_back("//");
353 parts.emplace_back(PercentEncode(authority_, IsAuthorityChar));
354 }
355 parts.emplace_back(EncodedPathAndQueryParams());
356 if (!fragment_.empty()) {
357 parts.push_back("#");
358 parts.push_back(PercentEncode(fragment_, IsQueryOrFragmentChar));
359 }
360 return absl::StrJoin(parts, "");
361 }
362
EncodedPathAndQueryParams() const363 std::string URI::EncodedPathAndQueryParams() const {
364 std::vector<std::string> parts;
365 if (!path_.empty()) {
366 parts.emplace_back(PercentEncode(path_, IsPathChar));
367 }
368 if (!query_parameter_pairs_.empty()) {
369 parts.push_back("?");
370 parts.push_back(
371 absl::StrJoin(query_parameter_pairs_, "&", QueryParameterFormatter()));
372 }
373 return absl::StrJoin(parts, "");
374 }
375
376 } // namespace grpc_core
377