• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * Copyright 2015 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 #include <grpc/support/port_platform.h>
20 
21 #include "src/core/lib/uri/uri_parser.h"
22 
23 #include <string.h>
24 
25 #include <string>
26 
27 #include "absl/strings/str_format.h"
28 
29 #include <grpc/slice_buffer.h>
30 #include <grpc/support/alloc.h>
31 #include <grpc/support/log.h>
32 
33 #include "src/core/lib/gpr/string.h"
34 #include "src/core/lib/slice/percent_encoding.h"
35 #include "src/core/lib/slice/slice_internal.h"
36 #include "src/core/lib/slice/slice_string_helpers.h"
37 
38 /** a size_t default value... maps to all 1's */
39 #define NOT_SET (~(size_t)0)
40 
bad_uri(const char * uri_text,size_t pos,const char * section,bool suppress_errors)41 static grpc_uri* bad_uri(const char* uri_text, size_t pos, const char* section,
42                          bool suppress_errors) {
43   if (!suppress_errors) {
44     std::string line_prefix = absl::StrFormat("bad uri.%s: '", section);
45     gpr_log(GPR_ERROR, "%s%s'", line_prefix.c_str(), uri_text);
46     size_t pfx_len = line_prefix.size() + pos;
47     gpr_log(GPR_ERROR, "%s^ here", std::string(pfx_len, ' ').c_str());
48   }
49   return nullptr;
50 }
51 
52 /** Returns a copy of percent decoded \a src[begin, end) */
decode_and_copy_component(const char * src,size_t begin,size_t end)53 static char* decode_and_copy_component(const char* src, size_t begin,
54                                        size_t end) {
55   grpc_slice component =
56       (begin == NOT_SET || end == NOT_SET)
57           ? grpc_empty_slice()
58           : grpc_slice_from_copied_buffer(src + begin, end - begin);
59   grpc_slice decoded_component =
60       grpc_permissive_percent_decode_slice(component);
61   char* out = grpc_dump_slice(decoded_component, GPR_DUMP_ASCII);
62   grpc_slice_unref_internal(component);
63   grpc_slice_unref_internal(decoded_component);
64   return out;
65 }
66 
valid_hex(char c)67 static bool valid_hex(char c) {
68   return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')) ||
69          ((c >= '0') && (c <= '9'));
70 }
71 
72 /** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar
73  * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent
74  * sign not followed by two hex digits), NOT_SET is returned. */
parse_pchar(const char * uri_text,size_t i)75 static size_t parse_pchar(const char* uri_text, size_t i) {
76   /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
77    * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
78    * pct-encoded = "%" HEXDIG HEXDIG
79    * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
80    / "*" / "+" / "," / ";" / "=" */
81   char c = uri_text[i];
82   switch (c) {
83     default:
84       if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) ||
85           ((c >= '0') && (c <= '9'))) {
86         return 1;
87       }
88       break;
89     case ':':
90     case '@':
91     case '-':
92     case '.':
93     case '_':
94     case '~':
95     case '!':
96     case '$':
97     case '&':
98     case '\'':
99     case '(':
100     case ')':
101     case '*':
102     case '+':
103     case ',':
104     case ';':
105     case '=':
106       return 1;
107     case '%': /* pct-encoded */
108       if (valid_hex(uri_text[i + 1]) && valid_hex(uri_text[i + 2])) {
109         return 2;
110       }
111       return NOT_SET;
112   }
113   return 0;
114 }
115 
116 /* *( pchar / "?" / "/" ) */
parse_fragment_or_query(const char * uri_text,size_t * i)117 static int parse_fragment_or_query(const char* uri_text, size_t* i) {
118   char c;
119   while ((c = uri_text[*i]) != 0) {
120     const size_t advance = parse_pchar(uri_text, *i); /* pchar */
121     switch (advance) {
122       case 0: /* uri_text[i] isn't in pchar */
123         /* maybe it's ? or / */
124         if (uri_text[*i] == '?' || uri_text[*i] == '/') {
125           (*i)++;
126           break;
127         } else {
128           return 1;
129         }
130         GPR_UNREACHABLE_CODE(return 0);
131       default:
132         (*i) += advance;
133         break;
134       case NOT_SET: /* uri_text[i] introduces an invalid URI */
135         return 0;
136     }
137   }
138   /* *i is the first uri_text position past the \a query production, maybe \0 */
139   return 1;
140 }
141 
parse_query_parts(grpc_uri * uri)142 static void parse_query_parts(grpc_uri* uri) {
143   static const char* QUERY_PARTS_SEPARATOR = "&";
144   static const char* QUERY_PARTS_VALUE_SEPARATOR = "=";
145   GPR_ASSERT(uri->query != nullptr);
146   if (uri->query[0] == '\0') {
147     uri->query_parts = nullptr;
148     uri->query_parts_values = nullptr;
149     uri->num_query_parts = 0;
150     return;
151   }
152 
153   gpr_string_split(uri->query, QUERY_PARTS_SEPARATOR, &uri->query_parts,
154                    &uri->num_query_parts);
155   uri->query_parts_values =
156       static_cast<char**>(gpr_malloc(uri->num_query_parts * sizeof(char**)));
157   for (size_t i = 0; i < uri->num_query_parts; i++) {
158     char** query_param_parts;
159     size_t num_query_param_parts;
160     char* full = uri->query_parts[i];
161     gpr_string_split(full, QUERY_PARTS_VALUE_SEPARATOR, &query_param_parts,
162                      &num_query_param_parts);
163     GPR_ASSERT(num_query_param_parts > 0);
164     uri->query_parts[i] = query_param_parts[0];
165     if (num_query_param_parts > 1) {
166       /* TODO(dgq): only the first value after the separator is considered.
167        * Perhaps all chars after the first separator for the query part should
168        * be included, even if they include the separator. */
169       uri->query_parts_values[i] = query_param_parts[1];
170     } else {
171       uri->query_parts_values[i] = nullptr;
172     }
173     for (size_t j = 2; j < num_query_param_parts; j++) {
174       gpr_free(query_param_parts[j]);
175     }
176     gpr_free(query_param_parts);
177     gpr_free(full);
178   }
179 }
180 
grpc_uri_parse(const char * uri_text,bool suppress_errors)181 grpc_uri* grpc_uri_parse(const char* uri_text, bool suppress_errors) {
182   grpc_uri* uri;
183   size_t scheme_begin = 0;
184   size_t scheme_end = NOT_SET;
185   size_t authority_begin = NOT_SET;
186   size_t authority_end = NOT_SET;
187   size_t path_begin = NOT_SET;
188   size_t path_end = NOT_SET;
189   size_t query_begin = NOT_SET;
190   size_t query_end = NOT_SET;
191   size_t fragment_begin = NOT_SET;
192   size_t fragment_end = NOT_SET;
193   size_t i;
194 
195   for (i = scheme_begin; uri_text[i] != 0; i++) {
196     if (uri_text[i] == ':') {
197       scheme_end = i;
198       break;
199     }
200     if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue;
201     if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue;
202     if (i != scheme_begin) {
203       if (uri_text[i] >= '0' && uri_text[i] <= '9') continue;
204       if (uri_text[i] == '+') continue;
205       if (uri_text[i] == '-') continue;
206       if (uri_text[i] == '.') continue;
207     }
208     break;
209   }
210   if (scheme_end == NOT_SET) {
211     return bad_uri(uri_text, i, "scheme", suppress_errors);
212   }
213 
214   if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') {
215     authority_begin = scheme_end + 3;
216     for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET;
217          i++) {
218       if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') {
219         authority_end = i;
220       }
221     }
222     if (authority_end == NOT_SET && uri_text[i] == 0) {
223       authority_end = i;
224     }
225     if (authority_end == NOT_SET) {
226       return bad_uri(uri_text, i, "authority", suppress_errors);
227     }
228     /* TODO(ctiller): parse the authority correctly */
229     path_begin = authority_end;
230   } else {
231     path_begin = scheme_end + 1;
232   }
233 
234   for (i = path_begin; uri_text[i] != 0; i++) {
235     if (uri_text[i] == '?' || uri_text[i] == '#') {
236       path_end = i;
237       break;
238     }
239   }
240   if (path_end == NOT_SET && uri_text[i] == 0) {
241     path_end = i;
242   }
243   if (path_end == NOT_SET) {
244     return bad_uri(uri_text, i, "path", suppress_errors);
245   }
246 
247   if (uri_text[i] == '?') {
248     query_begin = ++i;
249     if (!parse_fragment_or_query(uri_text, &i)) {
250       return bad_uri(uri_text, i, "query", suppress_errors);
251     } else if (uri_text[i] != 0 && uri_text[i] != '#') {
252       /* We must be at the end or at the beginning of a fragment */
253       return bad_uri(uri_text, i, "query", suppress_errors);
254     }
255     query_end = i;
256   }
257   if (uri_text[i] == '#') {
258     fragment_begin = ++i;
259     if (!parse_fragment_or_query(uri_text, &i)) {
260       return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors);
261     } else if (uri_text[i] != 0) {
262       /* We must be at the end */
263       return bad_uri(uri_text, i, "fragment", suppress_errors);
264     }
265     fragment_end = i;
266   }
267 
268   uri = static_cast<grpc_uri*>(gpr_zalloc(sizeof(*uri)));
269   uri->scheme = decode_and_copy_component(uri_text, scheme_begin, scheme_end);
270   uri->authority =
271       decode_and_copy_component(uri_text, authority_begin, authority_end);
272   uri->path = decode_and_copy_component(uri_text, path_begin, path_end);
273   uri->query = decode_and_copy_component(uri_text, query_begin, query_end);
274   uri->fragment =
275       decode_and_copy_component(uri_text, fragment_begin, fragment_end);
276   parse_query_parts(uri);
277 
278   return uri;
279 }
280 
grpc_uri_get_query_arg(const grpc_uri * uri,const char * key)281 const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key) {
282   GPR_ASSERT(key != nullptr);
283   if (key[0] == '\0') return nullptr;
284 
285   for (size_t i = 0; i < uri->num_query_parts; ++i) {
286     if (0 == strcmp(key, uri->query_parts[i])) {
287       return uri->query_parts_values[i];
288     }
289   }
290   return nullptr;
291 }
292 
grpc_uri_destroy(grpc_uri * uri)293 void grpc_uri_destroy(grpc_uri* uri) {
294   if (!uri) return;
295   gpr_free(uri->scheme);
296   gpr_free(uri->authority);
297   gpr_free(uri->path);
298   gpr_free(uri->query);
299   for (size_t i = 0; i < uri->num_query_parts; ++i) {
300     gpr_free(uri->query_parts[i]);
301     gpr_free(uri->query_parts_values[i]);
302   }
303   gpr_free(uri->query_parts);
304   gpr_free(uri->query_parts_values);
305   gpr_free(uri->fragment);
306   gpr_free(uri);
307 }
308