1 /*
2  *
3  * Copyright 2015 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 #include <grpc/support/port_platform.h>
20 
21 #include "src/core/lib/uri/uri_parser.h"
22 
23 #include <string.h>
24 
25 #include <map>
26 #include <string>
27 
28 #include "absl/strings/escaping.h"
29 #include "absl/strings/str_format.h"
30 #include "absl/strings/str_split.h"
31 
32 #include <grpc/support/log.h>
33 
34 #include "src/core/lib/gpr/string.h"
35 
36 namespace grpc_core {
37 namespace {
38 
39 // Similar to `grpc_permissive_percent_decode_slice`, this %-decodes all valid
40 // triplets, and passes through the rest verbatim.
PercentDecode(absl::string_view str)41 std::string PercentDecode(absl::string_view str) {
42   if (str.empty() || !absl::StrContains(str, "%")) {
43     return std::string(str);
44   }
45   std::string out;
46   std::string unescaped;
47   out.reserve(str.size());
48   for (size_t i = 0; i < str.length(); i++) {
49     unescaped = "";
50     if (str[i] != '%') {
51       out += str[i];
52       continue;
53     }
54     if (i + 3 >= str.length() ||
55         !absl::CUnescape(absl::StrCat("\\x", str.substr(i + 1, 2)),
56                          &unescaped) ||
57         unescaped.length() > 1) {
58       out += str[i];
59     } else {
60       out += unescaped[0];
61       i += 2;
62     }
63   }
64   return out;
65 }
66 
67 // Checks if this string is made up of pchars, '/', '?', and '%' exclusively.
68 // See https://tools.ietf.org/html/rfc3986#section-3.4
IsPCharString(absl::string_view str)69 bool IsPCharString(absl::string_view str) {
70   return (str.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
71                                 "abcdefghijklmnopqrstuvwxyz"
72                                 "0123456789"
73                                 "?/:@\\-._~!$&'()*+,;=%") ==
74           absl::string_view::npos);
75 }
76 
MakeInvalidURIStatus(absl::string_view part_name,absl::string_view uri,absl::string_view extra)77 absl::Status MakeInvalidURIStatus(absl::string_view part_name,
78                                   absl::string_view uri,
79                                   absl::string_view extra) {
80   return absl::InvalidArgumentError(absl::StrFormat(
81       "Could not parse '%s' from uri '%s'. %s", part_name, uri, extra));
82 }
83 }  // namespace
84 
Parse(absl::string_view uri_text)85 absl::StatusOr<URI> URI::Parse(absl::string_view uri_text) {
86   absl::StatusOr<std::string> decoded;
87   absl::string_view remaining = uri_text;
88   // parse scheme
89   size_t idx = remaining.find(':');
90   if (idx == remaining.npos || idx == 0) {
91     return MakeInvalidURIStatus("scheme", uri_text, "Scheme not found.");
92   }
93   std::string scheme(remaining.substr(0, idx));
94   if (scheme.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
95                                "abcdefghijklmnopqrstuvwxyz"
96                                "0123456789+-.") != std::string::npos) {
97     return MakeInvalidURIStatus("scheme", uri_text,
98                                 "Scheme contains invalid characters.");
99   }
100   if (!isalpha(scheme[0])) {
101     return MakeInvalidURIStatus(
102         "scheme", uri_text,
103         "Scheme must begin with an alpha character [A-Za-z].");
104   }
105   remaining.remove_prefix(scheme.length() + 1);
106   // parse authority
107   std::string authority;
108   if (absl::StartsWith(remaining, "//")) {
109     remaining.remove_prefix(2);
110     authority =
111         PercentDecode(remaining.substr(0, remaining.find_first_of("/?#")));
112     remaining.remove_prefix(authority.length());
113   }
114   // parse path
115   std::string path;
116   if (!remaining.empty()) {
117     path = PercentDecode(remaining.substr(0, remaining.find_first_of("?#")));
118     remaining.remove_prefix(path.length());
119   }
120   // parse query
121   std::vector<QueryParam> query_param_pairs;
122   if (!remaining.empty() && remaining[0] == '?') {
123     remaining.remove_prefix(1);
124     absl::string_view tmp_query = remaining.substr(0, remaining.find('#'));
125     if (tmp_query.empty()) {
126       return MakeInvalidURIStatus("query", uri_text, "Invalid query string.");
127     }
128     if (!IsPCharString(tmp_query)) {
129       return MakeInvalidURIStatus("query string", uri_text,
130                                   "Query string contains invalid characters.");
131     }
132     for (absl::string_view query_param : absl::StrSplit(tmp_query, '&')) {
133       const std::pair<absl::string_view, absl::string_view> possible_kv =
134           absl::StrSplit(query_param, absl::MaxSplits('=', 1));
135       if (possible_kv.first.empty()) continue;
136       query_param_pairs.push_back({PercentDecode(possible_kv.first),
137                                    PercentDecode(possible_kv.second)});
138     }
139     remaining.remove_prefix(tmp_query.length());
140   }
141   std::string fragment;
142   if (!remaining.empty() && remaining[0] == '#') {
143     remaining.remove_prefix(1);
144     if (!IsPCharString(remaining)) {
145       return MakeInvalidURIStatus("fragment", uri_text,
146                                   "Fragment contains invalid characters.");
147     }
148     fragment = PercentDecode(remaining);
149   }
150   return URI(std::move(scheme), std::move(authority), std::move(path),
151              std::move(query_param_pairs), std::move(fragment));
152 }
153 
URI(std::string scheme,std::string authority,std::string path,std::vector<QueryParam> query_parameter_pairs,std::string fragment)154 URI::URI(std::string scheme, std::string authority, std::string path,
155          std::vector<QueryParam> query_parameter_pairs, std::string fragment)
156     : scheme_(std::move(scheme)),
157       authority_(std::move(authority)),
158       path_(std::move(path)),
159       query_parameter_pairs_(std::move(query_parameter_pairs)),
160       fragment_(std::move(fragment)) {
161   for (const auto& kv : query_parameter_pairs_) {
162     query_parameter_map_[kv.key] = kv.value;
163   }
164 }
165 
URI(const URI & other)166 URI::URI(const URI& other)
167     : scheme_(other.scheme_),
168       authority_(other.authority_),
169       path_(other.path_),
170       query_parameter_pairs_(other.query_parameter_pairs_),
171       fragment_(other.fragment_) {
172   for (const auto& kv : query_parameter_pairs_) {
173     query_parameter_map_[kv.key] = kv.value;
174   }
175 }
176 
operator =(const URI & other)177 URI& URI::operator=(const URI& other) {
178   if (this == &other) {
179     return *this;
180   }
181   scheme_ = other.scheme_;
182   authority_ = other.authority_;
183   path_ = other.path_;
184   query_parameter_pairs_ = other.query_parameter_pairs_;
185   fragment_ = other.fragment_;
186   for (const auto& kv : query_parameter_pairs_) {
187     query_parameter_map_[kv.key] = kv.value;
188   }
189   return *this;
190 }
191 }  // namespace grpc_core
192