• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2014-2022 The GmSSL Project. All Rights Reserved.
3  *
4  *  Licensed under the Apache License, Version 2.0 (the License); you may
5  *  not use this file except in compliance with the License.
6  *
7  *  http://www.apache.org/licenses/LICENSE-2.0
8  */
9 
10 
11 #include "url_parser.h"
12 #include <errno.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
_strnstr(const char * s,size_t s_len,const char * needle)16 static const char *_strnstr(const char *s, size_t s_len, const char *needle)
17 {
18 	const char *end = s + s_len;
19 	size_t needle_len = strlen(needle);
20 	const char *p;
21 
22 	p = s;
23 	while (p < end - needle_len + 1) {
24 		if (strncmp(p, needle, needle_len) == 0) {
25 			return p;
26 		}
27 		p++;
28 	}
29 
30 	return NULL;
31 }
32 
find_chars(const char * s,size_t s_len,const char * chars)33 static const char *find_chars(const char *s, size_t s_len, const char *chars)
34 {
35 	const char *end = s + s_len;
36 	size_t chars_n = strlen(chars);
37 	const char *p;
38 	int i;
39 
40 	p = s;
41 	while (p < end) {
42 		for (i = 0 ; i < chars_n ; i++) {
43 			if (*p == chars[i]) {
44 				return p;
45 			}
46 		}
47 		p++;
48 	}
49 
50 	return NULL;
51 }
52 
find_chars_reverse(const char * s,size_t s_len,const char * chars)53 static const char *find_chars_reverse(const char *s, size_t s_len, const char *chars)
54 {
55 	const char *end = s + s_len;
56 	size_t chars_n = strlen(chars);
57 	const char *p;
58 	int i;
59 
60 	p = end - 1;
61 	while (p >= s) {
62 		for (i = 0 ; i < chars_n ; i++) {
63 			if (*p == chars[i]) {
64 				return p;
65 			}
66 		}
67 		p--;
68 	}
69 
70 	return NULL;
71 }
72 
is_alpha(char c)73 static int is_alpha(char c)
74 {
75 	if ((c >= 'a' && c <= 'z') ||
76 	    (c >= 'A' && c <= 'Z')) {
77 		return 1;
78 	}
79 	return 0;
80 }
81 
is_digit(char c)82 static int is_digit(char c)
83 {
84 	if (c >= '0' && c <= '9') {
85 		return 1;
86 	}
87 	return 0;
88 }
89 
is_control(char c)90 static int is_control(char c)
91 {
92 	if ((c >= 0x00 && c <= 0x1f) ||
93 	    c == 0x7f) {
94 		return 1;
95 	}
96 	return 0;
97 }
98 
lookup_scheme(const char * s)99 static const char *lookup_scheme(const char *s)
100 {
101 	const char *p = s;
102 	char c;
103 
104 	if (strlen(s) == 0) {
105 		return NULL;
106 	}
107 
108 	if (!is_alpha(*p)) {
109 		return NULL;
110 	}
111 	p++;
112 
113 	while (*p != '\0') {
114 		c = *p;
115 		if (c == ':') {
116 			return p;
117 		}
118 		if (!is_alpha(c) &&
119 		    !is_digit(c) &&
120 		    c != '+' &&
121 		    c != '-' &&
122 		    c != '.') {
123 			return NULL;
124 		}
125 		p++;
126 	}
127 	return NULL;
128 }
129 
parse_user_password(const char * s,size_t s_len,URL_COMPONENTS * c)130 static int parse_user_password(const char *s, size_t s_len, URL_COMPONENTS *c)
131 {
132 	const char *end = s + s_len;
133 	const char *found;
134 
135 	found = _strnstr(s, s_len, ":");
136 	if (found) {
137 		c->user = strndup(s, found - s);
138 		if (c->user == NULL) {
139 			return -1;	/* ENOMEM */
140 		}
141 		c->password = strndup(found + 1, end - found - 1);
142 		if (c->password == NULL) {
143 			return -1;	/* ENOMEM */
144 		}
145 	} else {
146 		c->user = strndup(s, s_len);
147 		if (c->user == NULL) {
148 			return -1;	/* ENOMEM */
149 		}
150 	}
151 
152 	return 0;
153 }
154 
parse_authority(const char * s,size_t s_len,URL_COMPONENTS * c)155 static int parse_authority(const char *s, size_t s_len, URL_COMPONENTS *c)
156 {
157 	const char *end = s + s_len;
158 	const char *p, *found, *host_start, *host_end;
159 	int port;
160 
161 	c->port = -1;
162 
163 	if (s_len == 0) {	/* empty authority */
164 		return 0;
165 	}
166 
167 	found = _strnstr(s, s_len, "@");
168 	if (found) {
169 		if (parse_user_password(s, found - s, c) == -1) {
170 			return -1;
171 		}
172 
173 		host_start = found + 1;
174 	} else {
175 		host_start = s;
176 	}
177 
178 	if (*host_start == '[') {
179 		/* IP-literal host */
180 		if (find_chars(host_start + 1, end - host_start - 1, "[")) {
181 			errno = EINVAL;
182 			return -1;
183 		}
184 		host_end = find_chars(host_start + 1, end - host_start - 1, "]");
185 		if (!host_end) {
186 			errno = EINVAL;
187 			return -1;
188 		}
189 		/* The next character of ']' is termination or ':'. */
190 		if (host_end + 1 != end && host_end[1] != ':') {
191 			errno = EINVAL;
192 			return -1;
193 		}
194 		host_end++;
195 	} else {
196 		/* IPv4address / reg-name host */
197 		host_end = find_chars_reverse(host_start, end - host_start, ":");
198 		if (host_end == NULL) {
199 			host_end = end;
200 		}
201 		if (find_chars(host_start, host_end - host_start, "[]")) {
202 			errno = EINVAL;
203 			return -1;
204 		}
205 	}
206 	if (find_chars(host_start, host_end - host_start, " ")) {
207 		errno = EINVAL;
208 		return -1;
209 	}
210 
211 	/* ASSERT: host_end == end or *host_end == ':' */
212 
213 	if (host_end == end) {
214 		/* without port number */
215 		if (host_start == end) {	/* empty host */
216 			errno = EINVAL;
217 			return -1;
218 		}
219 		c->host = strndup(host_start, end - host_start);
220 		if (c->host == NULL) {
221 			return -1;	/* ENOMEM */
222 		}
223 		return 0;
224 	}
225 
226 	/* ASSERT: *host_end == ':' */
227 
228 	/* host and port */
229 
230 	if (host_start == host_end) {	/* empty host */
231 		errno = EINVAL;
232 		return -1;
233 	}
234 
235 	if (host_end + 1 < end) {
236 		p = host_end + 1;
237 		port = 0;
238 		while (p < end) {
239 			if (*p < '0' || *p > '9') {
240 				errno = EINVAL;
241 				return -1;
242 			}
243 
244 			port = port * 10 + *p - '0';
245 			if (port > 65535) {
246 				errno = EINVAL;
247 				return -1;
248 			}
249 
250 			p++;
251 		}
252 	} else {
253 		/* empty port number */
254 		port = -1;
255 	}
256 
257 	c->host = strndup(host_start, (size_t) (host_end - host_start));
258 	if (c->host == NULL) {
259 		return -1;	/* ENOMEM */
260 	}
261 	c->port = port;
262 
263 	return 0;
264 }
265 
parse_url(const char * url)266 URL_COMPONENTS *parse_url(const char *url)
267 {
268 	URL_COMPONENTS *c;
269 	const char *p;
270 	const char *end = url + strlen(url);
271 	const char *found;
272 	size_t len;
273 
274 	for (p = url ; p < end ; p++) {
275 		if (is_control(*p)) {
276 			errno = EINVAL;
277 			return NULL;
278 		}
279 	}
280 
281 	c = malloc(sizeof(URL_COMPONENTS));
282 	if (!c) {
283 		return NULL;
284 	}
285 	memset(c, 0, sizeof(URL_COMPONENTS));
286 	c->port = -1;
287 
288 	p = url;
289 
290 	/* lookup scheme */
291 	found = lookup_scheme(p);
292 	if (found) {
293 		c->scheme = strndup(url, (size_t) (found - p));
294 		if (c->scheme == NULL) {
295 			goto error;
296 		}
297 		p = found + 1;	/* skip a colon */
298 		if (p >= end) {
299 			return c;
300 		}
301 	}
302 
303 	if (strlen(p) >= 2 &&
304 	    p[0] == '/' && p[1] == '/') {
305 		/* authority */
306 		p = p + 2;
307 		found = find_chars(p, strlen(p), "/?#");
308 		if (found == NULL) {
309 			len = strlen(p);
310 		} else {
311 			len = (size_t) (found - p);
312 		}
313 		if (parse_authority(p, len, c) == -1) {
314 			goto error;	/* ENOMEM,EINVAL */
315 		}
316 
317 		if (!found) {
318 			return c;
319 		}
320 
321 		p = found;
322 	}
323 
324 	if (*p != '?' && *p != '#') {
325 		/* path */
326 		found = find_chars(p, strlen(p), "?#");
327 		found = NULL;
328 		if (found == NULL) {
329 			c->path = strdup(p);
330 			if (c->path == NULL) {
331 				goto error;
332 			}
333 		} else
334 		{
335 			if (found != p) {
336 				c->path = strndup(p,  (size_t) (found - p));
337 				if (c->path == NULL) {
338 					goto error;
339 				}
340 			}
341 		}
342 
343 		if (!found) {
344 			return c;
345 		}
346 
347 		p = found;
348 	}
349 
350 	/* ASSERT: *p is '?' or '#' */
351 #if 0
352 	if (*p == '?') {
353 		/* query */
354 		p = p + 1;
355 		found = find_chars(p, strlen(p), "#");
356 		if (found == NULL) {
357 			c->query = strdup(p);
358 		} else {
359 			c->query = strndup(p,  (size_t) (found - p));
360 		}
361 
362 		if (c->query == NULL) {
363 			goto error;
364 		}
365 
366 		if (!found) {
367 			return c;
368 		}
369 
370 		p = found;
371 	}
372 #endif
373 
374 	/* ASSERT: *p is '#' */
375 
376 	/* fragment */
377 	p = p + 1;
378 	c->fragment = strdup(p);
379 	if (c->fragment == NULL) {
380 		goto error;
381 	}
382 
383 	return c;
384 
385 error:
386 	free(c);
387 
388 	return NULL;
389 }
390 
free_url_components(URL_COMPONENTS * c)391 void free_url_components(URL_COMPONENTS *c)
392 {
393 	if (c->scheme) {
394 		free(c->scheme);
395 	}
396 	if (c->user) {
397 		free(c->user);
398 	}
399 	if (c->password) {
400 		free(c->password);
401 	}
402 	if (c->host) {
403 		free(c->host);
404 	}
405 	if (c->path) {
406 		free(c->path);
407 	}
408 	if (c->query) {
409 		free(c->query);
410 	}
411 	if (c->fragment) {
412 		free(c->fragment);
413 	}
414 	free(c);
415 }
416 
417