1 //
2 // Copyright 2015 gRPC authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <grpc/support/port_platform.h>
18
19 #include "src/core/lib/uri/uri_parser.h"
20
21 #include <ctype.h>
22 #include <stddef.h>
23
24 #include <algorithm>
25 #include <functional>
26 #include <map>
27 #include <string>
28 #include <utility>
29
30 #include "absl/status/status.h"
31 #include "absl/strings/ascii.h"
32 #include "absl/strings/escaping.h"
33 #include "absl/strings/match.h"
34 #include "absl/strings/str_cat.h"
35 #include "absl/strings/str_format.h"
36 #include "absl/strings/str_join.h"
37 #include "absl/strings/str_split.h"
38 #include "absl/strings/strip.h"
39
40 #include <grpc/support/log.h>
41
42 namespace grpc_core {
43
44 namespace {
45
46 // Returns true for any sub-delim character, as defined in:
47 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
IsSubDelimChar(char c)48 bool IsSubDelimChar(char c) {
49 switch (c) {
50 case '!':
51 case '$':
52 case '&':
53 case '\'':
54 case '(':
55 case ')':
56 case '*':
57 case '+':
58 case ',':
59 case ';':
60 case '=':
61 return true;
62 }
63 return false;
64 }
65
66 // Returns true for any unreserved character, as defined in:
67 // https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
IsUnreservedChar(char c)68 bool IsUnreservedChar(char c) {
69 if (absl::ascii_isalnum(c)) return true;
70 switch (c) {
71 case '-':
72 case '.':
73 case '_':
74 case '~':
75 return true;
76 }
77 return false;
78 }
79
80 // Returns true for any character in scheme, as defined in:
81 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
IsSchemeChar(char c)82 bool IsSchemeChar(char c) {
83 if (absl::ascii_isalnum(c)) return true;
84 switch (c) {
85 case '+':
86 case '-':
87 case '.':
88 return true;
89 }
90 return false;
91 }
92
93 // Returns true for any character in authority, as defined in:
94 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
IsAuthorityChar(char c)95 bool IsAuthorityChar(char c) {
96 if (IsUnreservedChar(c)) return true;
97 if (IsSubDelimChar(c)) return true;
98 switch (c) {
99 case ':':
100 case '[':
101 case ']':
102 case '@':
103 return true;
104 }
105 return false;
106 }
107
108 // Returns true for any character in pchar, as defined in:
109 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPChar(char c)110 bool IsPChar(char c) {
111 if (IsUnreservedChar(c)) return true;
112 if (IsSubDelimChar(c)) return true;
113 switch (c) {
114 case ':':
115 case '@':
116 return true;
117 }
118 return false;
119 }
120
121 // Returns true for any character allowed in a URI path, as defined in:
122 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
IsPathChar(char c)123 bool IsPathChar(char c) { return IsPChar(c) || c == '/'; }
124
125 // Returns true for any character allowed in a URI query or fragment,
126 // as defined in:
127 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentChar(char c)128 bool IsQueryOrFragmentChar(char c) {
129 return IsPChar(c) || c == '/' || c == '?';
130 }
131
132 // Same as IsQueryOrFragmentChar(), but excludes '&' and '='.
IsQueryKeyOrValueChar(char c)133 bool IsQueryKeyOrValueChar(char c) {
134 return c != '&' && c != '=' && IsQueryOrFragmentChar(c);
135 }
136
137 // Returns a copy of str, percent-encoding any character for which
138 // is_allowed_char() returns false.
PercentEncode(absl::string_view str,std::function<bool (char)> is_allowed_char)139 std::string PercentEncode(absl::string_view str,
140 std::function<bool(char)> is_allowed_char) {
141 std::string out;
142 for (char c : str) {
143 if (!is_allowed_char(c)) {
144 std::string hex = absl::BytesToHexString(absl::string_view(&c, 1));
145 GPR_ASSERT(hex.size() == 2);
146 // BytesToHexString() returns lower case, but
147 // https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.1 says
148 // to prefer upper-case.
149 absl::AsciiStrToUpper(&hex);
150 out.push_back('%');
151 out.append(hex);
152 } else {
153 out.push_back(c);
154 }
155 }
156 return out;
157 }
158
159 // Checks if this string is made up of query/fragment chars and '%' exclusively.
160 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsQueryOrFragmentString(absl::string_view str)161 bool IsQueryOrFragmentString(absl::string_view str) {
162 for (char c : str) {
163 if (!IsQueryOrFragmentChar(c) && c != '%') return false;
164 }
165 return true;
166 }
167
MakeInvalidURIStatus(absl::string_view part_name,absl::string_view uri,absl::string_view extra)168 absl::Status MakeInvalidURIStatus(absl::string_view part_name,
169 absl::string_view uri,
170 absl::string_view extra) {
171 return absl::InvalidArgumentError(absl::StrFormat(
172 "Could not parse '%s' from uri '%s'. %s", part_name, uri, extra));
173 }
174
175 } // namespace
176
PercentEncodeAuthority(absl::string_view str)177 std::string URI::PercentEncodeAuthority(absl::string_view str) {
178 return PercentEncode(str, IsAuthorityChar);
179 }
180
PercentEncodePath(absl::string_view str)181 std::string URI::PercentEncodePath(absl::string_view str) {
182 return PercentEncode(str, IsPathChar);
183 }
184
185 // Similar to `grpc_permissive_percent_decode_slice`, this %-decodes all valid
186 // triplets, and passes through the rest verbatim.
PercentDecode(absl::string_view str)187 std::string URI::PercentDecode(absl::string_view str) {
188 if (str.empty() || !absl::StrContains(str, "%")) {
189 return std::string(str);
190 }
191 std::string out;
192 std::string unescaped;
193 out.reserve(str.size());
194 for (size_t i = 0; i < str.length(); i++) {
195 unescaped = "";
196 if (str[i] == '%' && i + 3 <= str.length() &&
197 absl::CUnescape(absl::StrCat("\\x", str.substr(i + 1, 2)),
198 &unescaped) &&
199 unescaped.length() == 1) {
200 out += unescaped[0];
201 i += 2;
202 } else {
203 out += str[i];
204 }
205 }
206 return out;
207 }
208
Parse(absl::string_view uri_text)209 absl::StatusOr<URI> URI::Parse(absl::string_view uri_text) {
210 absl::string_view remaining = uri_text;
211 // parse scheme
212 size_t offset = remaining.find(':');
213 if (offset == remaining.npos || offset == 0) {
214 return MakeInvalidURIStatus("scheme", uri_text, "Scheme not found.");
215 }
216 std::string scheme(remaining.substr(0, offset));
217 if (scheme.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
218 "abcdefghijklmnopqrstuvwxyz"
219 "0123456789+-.") != std::string::npos) {
220 return MakeInvalidURIStatus("scheme", uri_text,
221 "Scheme contains invalid characters.");
222 }
223 if (!isalpha(scheme[0])) {
224 return MakeInvalidURIStatus(
225 "scheme", uri_text,
226 "Scheme must begin with an alpha character [A-Za-z].");
227 }
228 remaining.remove_prefix(offset + 1);
229 // parse authority
230 std::string authority;
231 if (absl::ConsumePrefix(&remaining, "//")) {
232 offset = remaining.find_first_of("/?#");
233 authority = PercentDecode(remaining.substr(0, offset));
234 if (offset == remaining.npos) {
235 remaining = "";
236 } else {
237 remaining.remove_prefix(offset);
238 }
239 }
240 // parse path
241 std::string path;
242 if (!remaining.empty()) {
243 offset = remaining.find_first_of("?#");
244 path = PercentDecode(remaining.substr(0, offset));
245 if (offset == remaining.npos) {
246 remaining = "";
247 } else {
248 remaining.remove_prefix(offset);
249 }
250 }
251 // parse query
252 std::vector<QueryParam> query_param_pairs;
253 if (absl::ConsumePrefix(&remaining, "?")) {
254 offset = remaining.find('#');
255 absl::string_view tmp_query = remaining.substr(0, offset);
256 if (tmp_query.empty()) {
257 return MakeInvalidURIStatus("query", uri_text, "Invalid query string.");
258 }
259 if (!IsQueryOrFragmentString(tmp_query)) {
260 return MakeInvalidURIStatus("query string", uri_text,
261 "Query string contains invalid characters.");
262 }
263 for (absl::string_view query_param : absl::StrSplit(tmp_query, '&')) {
264 const std::pair<absl::string_view, absl::string_view> possible_kv =
265 absl::StrSplit(query_param, absl::MaxSplits('=', 1));
266 if (possible_kv.first.empty()) continue;
267 query_param_pairs.push_back({PercentDecode(possible_kv.first),
268 PercentDecode(possible_kv.second)});
269 }
270 if (offset == remaining.npos) {
271 remaining = "";
272 } else {
273 remaining.remove_prefix(offset);
274 }
275 }
276 std::string fragment;
277 if (absl::ConsumePrefix(&remaining, "#")) {
278 if (!IsQueryOrFragmentString(remaining)) {
279 return MakeInvalidURIStatus("fragment", uri_text,
280 "Fragment contains invalid characters.");
281 }
282 fragment = PercentDecode(remaining);
283 }
284 return URI(std::move(scheme), std::move(authority), std::move(path),
285 std::move(query_param_pairs), std::move(fragment));
286 }
287
Create(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)288 absl::StatusOr<URI> URI::Create(std::string scheme, std::string authority,
289 std::string path,
290 std::vector<QueryParam> query_parameter_pairs,
291 std::string fragment) {
292 if (!authority.empty() && !path.empty() && path[0] != '/') {
293 return absl::InvalidArgumentError(
294 "if authority is present, path must start with a '/'");
295 }
296 return URI(std::move(scheme), std::move(authority), std::move(path),
297 std::move(query_parameter_pairs), std::move(fragment));
298 }
299
URI(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)300 URI::URI(std::string scheme, std::string authority, std::string path,
301 std::vector<QueryParam> query_parameter_pairs, std::string fragment)
302 : scheme_(std::move(scheme)),
303 authority_(std::move(authority)),
304 path_(std::move(path)),
305 query_parameter_pairs_(std::move(query_parameter_pairs)),
306 fragment_(std::move(fragment)) {
307 for (const auto& kv : query_parameter_pairs_) {
308 query_parameter_map_[kv.key] = kv.value;
309 }
310 }
311
URI(const URI & other)312 URI::URI(const URI& other)
313 : scheme_(other.scheme_),
314 authority_(other.authority_),
315 path_(other.path_),
316 query_parameter_pairs_(other.query_parameter_pairs_),
317 fragment_(other.fragment_) {
318 for (const auto& kv : query_parameter_pairs_) {
319 query_parameter_map_[kv.key] = kv.value;
320 }
321 }
322
operator =(const URI & other)323 URI& URI::operator=(const URI& other) {
324 if (this == &other) {
325 return *this;
326 }
327 scheme_ = other.scheme_;
328 authority_ = other.authority_;
329 path_ = other.path_;
330 query_parameter_pairs_ = other.query_parameter_pairs_;
331 fragment_ = other.fragment_;
332 for (const auto& kv : query_parameter_pairs_) {
333 query_parameter_map_[kv.key] = kv.value;
334 }
335 return *this;
336 }
337
338 namespace {
339
340 // A pair formatter for use with absl::StrJoin() for formatting query params.
341 struct QueryParameterFormatter {
operator ()grpc_core::__anon430cfb3d0211::QueryParameterFormatter342 void operator()(std::string* out, const URI::QueryParam& query_param) const {
343 out->append(
344 absl::StrCat(PercentEncode(query_param.key, IsQueryKeyOrValueChar), "=",
345 PercentEncode(query_param.value, IsQueryKeyOrValueChar)));
346 }
347 };
348
349 } // namespace
350
ToString() const351 std::string URI::ToString() const {
352 std::vector<std::string> parts = {PercentEncode(scheme_, IsSchemeChar), ":"};
353 if (!authority_.empty()) {
354 parts.emplace_back("//");
355 parts.emplace_back(PercentEncode(authority_, IsAuthorityChar));
356 }
357 if (!path_.empty()) {
358 parts.emplace_back(PercentEncode(path_, IsPathChar));
359 }
360 if (!query_parameter_pairs_.empty()) {
361 parts.push_back("?");
362 parts.push_back(
363 absl::StrJoin(query_parameter_pairs_, "&", QueryParameterFormatter()));
364 }
365 if (!fragment_.empty()) {
366 parts.push_back("#");
367 parts.push_back(PercentEncode(fragment_, IsQueryOrFragmentChar));
368 }
369 return absl::StrJoin(parts, "");
370 }
371
372 } // namespace grpc_core
373