• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-uri.c : utility functions to parse URLs */
3 
4 /*
5  * Copyright 1999-2003 Ximian, Inc.
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11 
12 #include <string.h>
13 #include <stdlib.h>
14 
15 #include "soup-uri.h"
16 #include "soup.h"
17 #include "soup-misc-private.h"
18 
19 /**
20  * SECTION:soup-uri
21  * @short_description: URIs
22  *
23  * A #SoupURI represents a (parsed) URI.
24  *
25  * Many applications will not need to use #SoupURI directly at all; on
26  * the client side, soup_message_new() takes a stringified URI, and on
27  * the server side, the path and query components are provided for you
28  * in the server callback.
29  **/
30 
31 /**
32  * SoupURI:
33  * @scheme: the URI scheme (eg, "http")
34  * @user: a username, or %NULL
35  * @password: a password, or %NULL
36  * @host: the hostname or IP address, or %NULL
37  * @port: the port number on @host
38  * @path: the path on @host
39  * @query: a query for @path, or %NULL
40  * @fragment: a fragment identifier within @path, or %NULL
41  *
42  * A #SoupURI represents a (parsed) URI. #SoupURI supports RFC 3986
43  * (URI Generic Syntax), and can parse any valid URI. However, libsoup
44  * only uses "http" and "https" URIs internally; You can use
45  * SOUP_URI_VALID_FOR_HTTP() to test if a #SoupURI is a valid HTTP
46  * URI.
47  *
48  * @scheme will always be set in any URI. It is an interned string and
49  * is always all lowercase. (If you parse a URI with a non-lowercase
50  * scheme, it will be converted to lowercase.) The macros
51  * %SOUP_URI_SCHEME_HTTP and %SOUP_URI_SCHEME_HTTPS provide the
52  * interned values for "http" and "https" and can be compared against
53  * URI @scheme values.
54  *
55  * @user and @password are parsed as defined in the older URI specs
56  * (ie, separated by a colon; RFC 3986 only talks about a single
57  * "userinfo" field). Note that @password is not included in the
58  * output of soup_uri_to_string(). libsoup does not normally use these
59  * fields; authentication is handled via #SoupSession signals.
60  *
61  * @host contains the hostname, and @port the port specified in the
62  * URI. If the URI doesn't contain a hostname, @host will be %NULL,
63  * and if it doesn't specify a port, @port may be 0. However, for
64  * "http" and "https" URIs, @host is guaranteed to be non-%NULL
65  * (trying to parse an http URI with no @host will return %NULL), and
66  * @port will always be non-0 (because libsoup knows the default value
67  * to use when it is not specified in the URI).
68  *
69  * @path is always non-%NULL. For http/https URIs, @path will never be
70  * an empty string either; if the input URI has no path, the parsed
71  * #SoupURI will have a @path of "/".
72  *
73  * @query and @fragment are optional for all URI types.
74  * soup_form_decode() may be useful for parsing @query.
75  *
76  * Note that @path, @query, and @fragment may contain
77  * %<!-- -->-encoded characters. soup_uri_new() calls
78  * soup_uri_normalize() on them, but not soup_uri_decode(). This is
79  * necessary to ensure that soup_uri_to_string() will generate a URI
80  * that has exactly the same meaning as the original. (In theory,
81  * #SoupURI should leave @user, @password, and @host partially-encoded
82  * as well, but this would be more annoying than useful.)
83  **/
84 
85 /**
86  * SOUP_URI_IS_VALID:
87  * @uri: a #SoupURI
88  *
89  * Tests whether @uri is a valid #SoupURI; that is, that it is non-%NULL
90  * and its @scheme and @path members are also non-%NULL.
91  *
92  * This macro does not check whether http and https URIs have a non-%NULL
93  * @host member.
94  *
95  * Return value: %TRUE if @uri is valid for use.
96  *
97  * Since: 2.38
98  **/
99 
100 /**
101  * SOUP_URI_VALID_FOR_HTTP:
102  * @uri: a #SoupURI
103  *
104  * Tests if @uri is a valid #SoupURI for HTTP communication; that is, if
105  * it can be used to construct a #SoupMessage.
106  *
107  * Return value: %TRUE if @uri is a valid "http" or "https" URI.
108  *
109  * Since: 2.24
110  **/
111 
112 /**
113  * SOUP_URI_SCHEME_HTTP:
114  *
115  * "http" as an interned string; you can compare this directly to a
116  * #SoupURI's <literal>scheme</literal> field using
117  * <literal>==</literal>.
118  */
119 /**
120  * SOUP_URI_SCHEME_HTTPS:
121  *
122  * "https" as an interned string; you can compare this directly to a
123  * #SoupURI's <literal>scheme</literal> field using
124  * <literal>==</literal>.
125  */
126 /**
127  * SOUP_URI_SCHEME_FTP:
128  *
129  * "ftp" as an interned string; you can compare this directly to a
130  * #SoupURI's <literal>scheme</literal> field using
131  * <literal>==</literal>.
132  *
133  * Since: 2.30
134  */
135 /**
136  * SOUP_URI_SCHEME_FILE:
137  *
138  * "file" as an interned string; you can compare this directly to a
139  * #SoupURI's <literal>scheme</literal> field using
140  * <literal>==</literal>.
141  *
142  * Since: 2.30
143  */
144 /**
145  * SOUP_URI_SCHEME_DATA:
146  *
147  * "data" as an interned string; you can compare this directly to a
148  * #SoupURI's <literal>scheme</literal> field using
149  * <literal>==</literal>.
150  *
151  * Since: 2.30
152  */
153 /**
154  * SOUP_URI_SCHEME_RESOURCE:
155  *
156  * "data" as an interned string; you can compare this directly to a
157  * #SoupURI's <literal>scheme</literal> field using
158  * <literal>==</literal>.
159  *
160  * Since: 2.42
161  */
162 /**
163  * SOUP_URI_SCHEME_WS:
164  *
165  * "ws" (WebSocket) as an interned string; you can compare this
166  * directly to a #SoupURI's <literal>scheme</literal> field using
167  * <literal>==</literal>.
168  *
169  * Since: 2.50
170  */
171 /**
172  * SOUP_URI_SCHEME_WSS:
173  *
174  * "wss" (WebSocket over TLS) as an interned string; you can compare
175  * this directly to a #SoupURI's <literal>scheme</literal> field using
176  * <literal>==</literal>.
177  *
178  * Since: 2.50
179  */
180 
181 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
182 static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra);
183 
184 gpointer _SOUP_URI_SCHEME_HTTP, _SOUP_URI_SCHEME_HTTPS;
185 gpointer _SOUP_URI_SCHEME_WS, _SOUP_URI_SCHEME_WSS;
186 gpointer _SOUP_URI_SCHEME_FTP;
187 gpointer _SOUP_URI_SCHEME_FILE, _SOUP_URI_SCHEME_DATA, _SOUP_URI_SCHEME_RESOURCE;
188 
189 static inline const char *
soup_uri_parse_scheme(const char * scheme,int len)190 soup_uri_parse_scheme (const char *scheme, int len)
191 {
192 	if (len == 4 && !g_ascii_strncasecmp (scheme, "http", len)) {
193 		return SOUP_URI_SCHEME_HTTP;
194 	} else if (len == 5 && !g_ascii_strncasecmp (scheme, "https", len)) {
195 		return SOUP_URI_SCHEME_HTTPS;
196 	} else if (len == 8 && !g_ascii_strncasecmp (scheme, "resource", len)) {
197 		return SOUP_URI_SCHEME_RESOURCE;
198 	} else if (len == 2 && !g_ascii_strncasecmp (scheme, "ws", len)) {
199 		return SOUP_URI_SCHEME_WS;
200 	} else if (len == 3 && !g_ascii_strncasecmp (scheme, "wss", len)) {
201 		return SOUP_URI_SCHEME_WSS;
202 	} else {
203 		char *lower_scheme;
204 
205 		lower_scheme = g_ascii_strdown (scheme, len);
206 		scheme = g_intern_static_string (lower_scheme);
207 		if (scheme != (const char *)lower_scheme)
208 			g_free (lower_scheme);
209 		return scheme;
210 	}
211 }
212 
213 static inline guint
soup_scheme_default_port(const char * scheme)214 soup_scheme_default_port (const char *scheme)
215 {
216 	if (scheme == SOUP_URI_SCHEME_HTTP || scheme == SOUP_URI_SCHEME_WS)
217 		return 80;
218 	else if (scheme == SOUP_URI_SCHEME_HTTPS || scheme == SOUP_URI_SCHEME_WSS)
219 		return 443;
220 	else if (scheme == SOUP_URI_SCHEME_FTP)
221 		return 21;
222 	else
223 		return 0;
224 }
225 
226 /**
227  * soup_uri_new_with_base: (constructor)
228  * @base: a base URI
229  * @uri_string: the URI
230  *
231  * Parses @uri_string relative to @base.
232  *
233  * Returns: a parsed #SoupURI.
234  **/
235 SoupURI *
soup_uri_new_with_base(SoupURI * base,const char * uri_string)236 soup_uri_new_with_base (SoupURI *base, const char *uri_string)
237 {
238 	SoupURI *uri, fixed_base;
239 	const char *end, *hash, *colon, *at, *path, *question;
240 	const char *p, *hostend;
241 	gboolean remove_dot_segments = TRUE;
242 	int len;
243 
244 	g_return_val_if_fail (uri_string != NULL, NULL);
245 
246 	/* Allow a %NULL path in @base, for compatibility */
247 	if (base && base->scheme && !base->path) {
248 		g_warn_if_fail (SOUP_URI_IS_VALID (base));
249 
250 		memcpy (&fixed_base, base, sizeof (SoupURI));
251 		fixed_base.path = "";
252 		base = &fixed_base;
253 	}
254 
255 	g_return_val_if_fail (base == NULL || SOUP_URI_IS_VALID (base), NULL);
256 
257 	/* First some cleanup steps (which are supposed to all be no-ops,
258 	 * but...). Skip initial whitespace, strip out internal tabs and
259 	 * line breaks, and ignore trailing whitespace.
260 	 */
261 	while (g_ascii_isspace (*uri_string))
262 		uri_string++;
263 
264 	len = strcspn (uri_string, "\t\n\r");
265 	if (uri_string[len]) {
266 		char *clean = g_malloc (strlen (uri_string) + 1), *d;
267 		const char *s;
268 
269 		for (s = uri_string, d = clean; *s; s++) {
270 			if (*s != '\t' && *s != '\n' && *s != '\r')
271 				*d++ = *s;
272 		}
273 		*d = '\0';
274 
275 		uri = soup_uri_new_with_base (base, clean);
276 		g_free (clean);
277 		return uri;
278 	}
279 	end = uri_string + len;
280 	while (end > uri_string && g_ascii_isspace (end[-1]))
281 		end--;
282 
283 	uri = g_slice_new0 (SoupURI);
284 
285 	/* Find fragment. */
286 	hash = strchr (uri_string, '#');
287 	if (hash) {
288 		uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1,
289 						     NULL);
290 		end = hash;
291 	}
292 
293 	/* Find scheme */
294 	p = uri_string;
295 	while (p < end && (g_ascii_isalpha (*p) ||
296 			   (p > uri_string && (g_ascii_isdigit (*p) ||
297 					       *p == '.' ||
298 					       *p == '+' ||
299 					       *p == '-'))))
300 		p++;
301 
302 	if (p > uri_string && *p == ':') {
303 		uri->scheme = soup_uri_parse_scheme (uri_string, p - uri_string);
304 		uri_string = p + 1;
305 	}
306 
307 	if (uri_string == end && !base && !uri->fragment) {
308 		uri->path = g_strdup ("");
309 		return uri;
310         }
311 
312 	/* Check for authority */
313 	if (strncmp (uri_string, "//", 2) == 0) {
314 		uri_string += 2;
315 
316 		path = uri_string + strcspn (uri_string, "/?#");
317 		if (path > end)
318 			path = end;
319 		at = strchr (uri_string, '@');
320 		if (at && at < path) {
321 			colon = strchr (uri_string, ':');
322 			if (colon && colon < at) {
323 				uri->password = soup_uri_decoded_copy (colon + 1,
324 								       at - colon - 1, NULL);
325 			} else {
326 				uri->password = NULL;
327 				colon = at;
328 			}
329 
330 			uri->user = soup_uri_decoded_copy (uri_string,
331 							   colon - uri_string, NULL);
332 			uri_string = at + 1;
333 		} else
334 			uri->user = uri->password = NULL;
335 
336 		/* Find host and port. */
337 		if (*uri_string == '[') {
338 			const char *pct;
339 
340 			uri_string++;
341 			hostend = strchr (uri_string, ']');
342 			if (!hostend || hostend > path) {
343 				soup_uri_free (uri);
344 				return NULL;
345 			}
346 			if (*(hostend + 1) == ':')
347 				colon = hostend + 1;
348 			else
349 				colon = NULL;
350 
351 			pct = memchr (uri_string, '%', hostend - uri_string);
352 			if (!pct || (pct[1] == '2' && pct[2] == '5')) {
353 				uri->host = soup_uri_decoded_copy (uri_string,
354 								   hostend - uri_string, NULL);
355 			} else
356 				uri->host = g_strndup (uri_string, hostend - uri_string);
357 		} else {
358 			colon = memchr (uri_string, ':', path - uri_string);
359 			hostend = colon ? colon : path;
360 			uri->host = soup_uri_decoded_copy (uri_string,
361 							   hostend - uri_string, NULL);
362 		}
363 
364 		if (colon && colon != path - 1) {
365 			char *portend;
366 			uri->port = strtoul (colon + 1, &portend, 10);
367 			if (portend != (char *)path) {
368 				soup_uri_free (uri);
369 				return NULL;
370 			}
371 		}
372 
373 		uri_string = path;
374 	}
375 
376 	/* Find query */
377 	question = memchr (uri_string, '?', end - uri_string);
378 	if (question) {
379 		uri->query = uri_normalized_copy (question + 1,
380 						  end - (question + 1),
381 						  NULL);
382 		end = question;
383 	}
384 
385 	if (end != uri_string) {
386 		uri->path = uri_normalized_copy (uri_string, end - uri_string,
387 						 NULL);
388 	}
389 
390 	/* Apply base URI. This is spelled out in RFC 3986. */
391 	if (base && !uri->scheme && uri->host)
392 		uri->scheme = base->scheme;
393 	else if (base && !uri->scheme) {
394 		uri->scheme = base->scheme;
395 		uri->user = g_strdup (base->user);
396 		uri->password = g_strdup (base->password);
397 		uri->host = g_strdup (base->host);
398 		uri->port = base->port;
399 
400 		if (!uri->path) {
401 			uri->path = g_strdup (base->path);
402 			if (!uri->query)
403 				uri->query = g_strdup (base->query);
404 			remove_dot_segments = FALSE;
405 		} else if (*uri->path != '/') {
406 			char *newpath, *last;
407 
408 			last = strrchr (base->path, '/');
409 			if (last) {
410 				newpath = g_strdup_printf ("%.*s%s",
411 							   (int)(last + 1 - base->path),
412 							   base->path,
413 							   uri->path);
414 			} else
415 				newpath = g_strdup_printf ("/%s", uri->path);
416 
417 			g_free (uri->path);
418 			uri->path = newpath;
419 		}
420 	}
421 
422 	if (remove_dot_segments && uri->path && *uri->path) {
423 		char *p, *q;
424 
425 		/* Remove "./" where "." is a complete segment. */
426 		for (p = uri->path + 1; *p; ) {
427 			if (*(p - 1) == '/' &&
428 			    *p == '.' && *(p + 1) == '/')
429 				memmove (p, p + 2, strlen (p + 2) + 1);
430 			else
431 				p++;
432 		}
433 		/* Remove "." at end. */
434 		if (p > uri->path + 2 &&
435 		    *(p - 1) == '.' && *(p - 2) == '/')
436 			*(p - 1) = '\0';
437 
438 		/* Remove "<segment>/../" where <segment> != ".." */
439 		for (p = uri->path + 1; *p; ) {
440 			if (!strncmp (p, "../", 3)) {
441 				p += 3;
442 				continue;
443 			}
444 			q = strchr (p + 1, '/');
445 			if (!q)
446 				break;
447 			if (strncmp (q, "/../", 4) != 0) {
448 				p = q + 1;
449 				continue;
450 			}
451 			memmove (p, q + 4, strlen (q + 4) + 1);
452 			p = uri->path + 1;
453 		}
454 		/* Remove "<segment>/.." at end where <segment> != ".." */
455 		q = strrchr (uri->path, '/');
456 		if (q && q != uri->path && !strcmp (q, "/..")) {
457 			p = q - 1;
458 			while (p > uri->path && *p != '/')
459 				p--;
460 			if (strncmp (p, "/../", 4) != 0)
461 				*(p + 1) = 0;
462 		}
463 
464 		/* Remove extraneous initial "/.."s */
465 		while (!strncmp (uri->path, "/../", 4))
466 			memmove (uri->path, uri->path + 3, strlen (uri->path) - 2);
467 		if (!strcmp (uri->path, "/.."))
468 			uri->path[1] = '\0';
469 	}
470 
471 	/* HTTP-specific stuff */
472 	if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
473 	    uri->scheme == SOUP_URI_SCHEME_HTTPS) {
474 		if (!uri->path)
475 			uri->path = g_strdup ("/");
476 		if (!SOUP_URI_VALID_FOR_HTTP (uri)) {
477 			soup_uri_free (uri);
478 			return NULL;
479 		}
480 	}
481 
482 	if (uri->scheme == SOUP_URI_SCHEME_FTP) {
483 		if (!uri->host) {
484 			soup_uri_free (uri);
485 			return NULL;
486 		}
487 	}
488 
489 	if (!uri->port)
490 		uri->port = soup_scheme_default_port (uri->scheme);
491 	if (!uri->path)
492 		uri->path = g_strdup ("");
493 
494 	return uri;
495 }
496 
497 /**
498  * soup_uri_new:
499  * @uri_string: (allow-none): a URI
500  *
501  * Parses an absolute URI.
502  *
503  * You can also pass %NULL for @uri_string if you want to get back an
504  * "empty" #SoupURI that you can fill in by hand. (You will need to
505  * call at least soup_uri_set_scheme() and soup_uri_set_path(), since
506  * those fields are required.)
507  *
508  * Return value: (nullable): a #SoupURI, or %NULL if the given string
509  *  was found to be invalid.
510  **/
511 SoupURI *
soup_uri_new(const char * uri_string)512 soup_uri_new (const char *uri_string)
513 {
514 	SoupURI *uri;
515 
516 	if (!uri_string)
517 		return g_slice_new0 (SoupURI);
518 
519 	uri = soup_uri_new_with_base (NULL, uri_string);
520 	if (!uri)
521 		return NULL;
522 	if (!SOUP_URI_IS_VALID (uri)) {
523 		soup_uri_free (uri);
524 		return NULL;
525 	}
526 
527 	return uri;
528 }
529 
530 
531 char *
soup_uri_to_string_internal(SoupURI * uri,gboolean just_path_and_query,gboolean include_password,gboolean force_port)532 soup_uri_to_string_internal (SoupURI *uri, gboolean just_path_and_query,
533 			     gboolean include_password, gboolean force_port)
534 {
535 	GString *str;
536 	char *return_result;
537 
538 	g_return_val_if_fail (uri != NULL, NULL);
539 	g_warn_if_fail (SOUP_URI_IS_VALID (uri));
540 
541 	str = g_string_sized_new (40);
542 
543 	if (uri->scheme && !just_path_and_query)
544 		g_string_append_printf (str, "%s:", uri->scheme);
545 	if (uri->host && !just_path_and_query) {
546 		g_string_append (str, "//");
547 		if (uri->user) {
548 			append_uri_encoded (str, uri->user, ":;@?/");
549 			if (uri->password && include_password) {
550 				g_string_append_c (str, ':');
551 				append_uri_encoded (str, uri->password, ";@?/");
552 			}
553 			g_string_append_c (str, '@');
554 		}
555 		if (strchr (uri->host, ':')) {
556 			const char *pct;
557 
558 			g_string_append_c (str, '[');
559 			pct = strchr (uri->host, '%');
560 			if (pct) {
561 				g_string_append_printf (str, "%.*s%%25%s",
562 							(int) (pct - uri->host),
563 							uri->host, pct + 1);
564 			} else
565 				g_string_append (str, uri->host);
566 			g_string_append_c (str, ']');
567 		} else
568 			append_uri_encoded (str, uri->host, ":/");
569 		if (uri->port && (force_port || uri->port != soup_scheme_default_port (uri->scheme)))
570 			g_string_append_printf (str, ":%u", uri->port);
571 		if (!uri->path && (uri->query || uri->fragment))
572 			g_string_append_c (str, '/');
573 		else if ((!uri->path || !*uri->path) &&
574 			 (uri->scheme == SOUP_URI_SCHEME_HTTP ||
575 			  uri->scheme == SOUP_URI_SCHEME_HTTPS))
576 			g_string_append_c (str, '/');
577 	}
578 
579 	if (uri->path && *uri->path)
580 		g_string_append (str, uri->path);
581 	else if (just_path_and_query)
582 		g_string_append_c (str, '/');
583 
584 	if (uri->query) {
585 		g_string_append_c (str, '?');
586 		g_string_append (str, uri->query);
587 	}
588 	if (uri->fragment && !just_path_and_query) {
589 		g_string_append_c (str, '#');
590 		g_string_append (str, uri->fragment);
591 	}
592 
593 	return_result = str->str;
594 	g_string_free (str, FALSE);
595 
596 	return return_result;
597 }
598 
599 /**
600  * soup_uri_to_string:
601  * @uri: a #SoupURI
602  * @just_path_and_query: if %TRUE, output just the path and query portions
603  *
604  * Returns a string representing @uri.
605  *
606  * If @just_path_and_query is %TRUE, this concatenates the path and query
607  * together. That is, it constructs the string that would be needed in
608  * the Request-Line of an HTTP request for @uri.
609  *
610  * Note that the output will never contain a password, even if @uri
611  * does.
612  *
613  * Return value: a string representing @uri, which the caller must free.
614  **/
615 char *
soup_uri_to_string(SoupURI * uri,gboolean just_path_and_query)616 soup_uri_to_string (SoupURI *uri, gboolean just_path_and_query)
617 {
618 	return soup_uri_to_string_internal (uri, just_path_and_query, FALSE, FALSE);
619 }
620 
621 /**
622  * soup_uri_copy:
623  * @uri: a #SoupURI
624  *
625  * Copies @uri
626  *
627  * Return value: a copy of @uri, which must be freed with soup_uri_free()
628  **/
629 SoupURI *
soup_uri_copy(SoupURI * uri)630 soup_uri_copy (SoupURI *uri)
631 {
632 	SoupURI *dup;
633 
634 	g_return_val_if_fail (uri != NULL, NULL);
635 	g_warn_if_fail (SOUP_URI_IS_VALID (uri));
636 
637 	dup = g_slice_new0 (SoupURI);
638 	dup->scheme   = uri->scheme;
639 	dup->user     = g_strdup (uri->user);
640 	dup->password = g_strdup (uri->password);
641 	dup->host     = g_strdup (uri->host);
642 	dup->port     = uri->port;
643 	dup->path     = g_strdup (uri->path);
644 	dup->query    = g_strdup (uri->query);
645 	dup->fragment = g_strdup (uri->fragment);
646 
647 	return dup;
648 }
649 
650 static inline gboolean
parts_equal(const char * one,const char * two,gboolean insensitive)651 parts_equal (const char *one, const char *two, gboolean insensitive)
652 {
653 	if (!one && !two)
654 		return TRUE;
655 	if (!one || !two)
656 		return FALSE;
657 	return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two);
658 }
659 
660 /**
661  * soup_uri_equal:
662  * @uri1: a #SoupURI
663  * @uri2: another #SoupURI
664  *
665  * Tests whether or not @uri1 and @uri2 are equal in all parts
666  *
667  * Return value: %TRUE or %FALSE
668  **/
669 gboolean
soup_uri_equal(SoupURI * uri1,SoupURI * uri2)670 soup_uri_equal (SoupURI *uri1, SoupURI *uri2)
671 {
672 	g_return_val_if_fail (uri1 != NULL, FALSE);
673 	g_return_val_if_fail (uri2 != NULL, FALSE);
674 	g_warn_if_fail (SOUP_URI_IS_VALID (uri1));
675 	g_warn_if_fail (SOUP_URI_IS_VALID (uri2));
676 
677 	if (uri1->scheme != uri2->scheme                         ||
678 	    uri1->port   != uri2->port                           ||
679 	    !parts_equal (uri1->user, uri2->user, FALSE)         ||
680 	    !parts_equal (uri1->password, uri2->password, FALSE) ||
681 	    !parts_equal (uri1->host, uri2->host, TRUE)          ||
682 	    !parts_equal (uri1->path, uri2->path, FALSE)         ||
683 	    !parts_equal (uri1->query, uri2->query, FALSE)       ||
684 	    !parts_equal (uri1->fragment, uri2->fragment, FALSE))
685 		return FALSE;
686 
687 	return TRUE;
688 }
689 
690 /**
691  * soup_uri_free:
692  * @uri: a #SoupURI
693  *
694  * Frees @uri.
695  **/
696 void
soup_uri_free(SoupURI * uri)697 soup_uri_free (SoupURI *uri)
698 {
699 	g_return_if_fail (uri != NULL);
700 
701 	g_free (uri->user);
702 	g_free (uri->password);
703 	g_free (uri->host);
704 	g_free (uri->path);
705 	g_free (uri->query);
706 	g_free (uri->fragment);
707 
708 	g_slice_free (SoupURI, uri);
709 }
710 
711 static void
append_uri_encoded(GString * str,const char * in,const char * extra_enc_chars)712 append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars)
713 {
714 	const unsigned char *s = (const unsigned char *)in;
715 
716 	while (*s) {
717 		if (soup_char_is_uri_percent_encoded (*s) ||
718 		    soup_char_is_uri_gen_delims (*s) ||
719 		    (extra_enc_chars && strchr (extra_enc_chars, *s)))
720 			g_string_append_printf (str, "%%%02X", (int)*s++);
721 		else
722 			g_string_append_c (str, *s++);
723 	}
724 }
725 
726 /**
727  * soup_uri_encode:
728  * @part: a URI part
729  * @escape_extra: (allow-none): additional reserved characters to
730  * escape (or %NULL)
731  *
732  * This %<!-- -->-encodes the given URI part and returns the escaped
733  * version in allocated memory, which the caller must free when it is
734  * done.
735  *
736  * Return value: the encoded URI part
737  **/
738 char *
soup_uri_encode(const char * part,const char * escape_extra)739 soup_uri_encode (const char *part, const char *escape_extra)
740 {
741 	GString *str;
742 	char *encoded;
743 
744 	g_return_val_if_fail (part != NULL, NULL);
745 
746 	str = g_string_new (NULL);
747 	append_uri_encoded (str, part, escape_extra);
748 	encoded = str->str;
749 	g_string_free (str, FALSE);
750 
751 	return encoded;
752 }
753 
754 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
755 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
756 
757 /* length must be set (e.g. from strchr()) such that [part, part + length]
758  * contains no nul bytes */
759 char *
soup_uri_decoded_copy(const char * part,int length,int * decoded_length)760 soup_uri_decoded_copy (const char *part, int length, int *decoded_length)
761 {
762 	unsigned char *s, *d;
763 	char *decoded;
764 
765 	g_return_val_if_fail (part != NULL, NULL);
766 
767 	decoded = g_strndup (part, length);
768 	s = d = (unsigned char *)decoded;
769 	do {
770 		if (*s == '%') {
771 			if (s[1] == '\0' ||
772 			    s[2] == '\0' ||
773 			    !g_ascii_isxdigit (s[1]) ||
774 			    !g_ascii_isxdigit (s[2])) {
775 				*d++ = *s;
776 				continue;
777 			}
778 			*d++ = HEXCHAR (s);
779 			s += 2;
780 		} else
781 			*d++ = *s;
782 	} while (*s++);
783 
784 	if (decoded_length)
785 		*decoded_length = d - (unsigned char *)decoded - 1;
786 
787 	return decoded;
788 }
789 
790 /**
791  * soup_uri_decode:
792  * @part: a URI part
793  *
794  * Fully %<!-- -->-decodes @part.
795  *
796  * In the past, this would return %NULL if @part contained invalid
797  * percent-encoding, but now it just ignores the problem (as
798  * soup_uri_new() already did).
799  *
800  * Return value: the decoded URI part.
801  */
802 char *
soup_uri_decode(const char * part)803 soup_uri_decode (const char *part)
804 {
805 	g_return_val_if_fail (part != NULL, NULL);
806 
807 	return soup_uri_decoded_copy (part, strlen (part), NULL);
808 }
809 
810 /* length must be set (e.g. from strchr()) such that [part, part + length]
811  * contains no nul bytes */
812 static char *
uri_normalized_copy(const char * part,int length,const char * unescape_extra)813 uri_normalized_copy (const char *part, int length,
814 		     const char *unescape_extra)
815 {
816 	unsigned char *s, *d, c;
817 	char *normalized = g_strndup (part, length);
818 	gboolean need_fixup = FALSE;
819 
820 	if (!unescape_extra)
821 		unescape_extra = "";
822 
823 	s = d = (unsigned char *)normalized;
824 	while (*s) {
825 		if (*s == '%') {
826 			if (s[1] == '\0' ||
827 			    s[2] == '\0' ||
828 			    !g_ascii_isxdigit (s[1]) ||
829 			    !g_ascii_isxdigit (s[2])) {
830 				*d++ = *s++;
831 				continue;
832 			}
833 
834 			c = HEXCHAR (s);
835 			if (soup_char_is_uri_unreserved (c) ||
836 			    (c && strchr (unescape_extra, c))) {
837 				*d++ = c;
838 				s += 3;
839 			} else {
840 				/* We leave it unchanged. We used to uppercase percent-encoded
841 				 * triplets but we do not do it any more as RFC3986 Section 6.2.2.1
842 				 * says that they only SHOULD be case normalized.
843 				 */
844 				*d++ = *s++;
845 				*d++ = *s++;
846 				*d++ = *s++;
847 			}
848 		} else {
849 			if (!g_ascii_isgraph (*s) &&
850 			    !strchr (unescape_extra, *s))
851 				need_fixup = TRUE;
852 			*d++ = *s++;
853 		}
854 	}
855 	*d = '\0';
856 
857 	if (need_fixup) {
858 		GString *fixed;
859 
860 		fixed = g_string_new (NULL);
861 		s = (guchar *)normalized;
862 		while (*s) {
863 			if (g_ascii_isgraph (*s) ||
864 			    strchr (unescape_extra, *s))
865 				g_string_append_c (fixed, *s);
866 			else
867 				g_string_append_printf (fixed, "%%%02X", (int)*s);
868 			s++;
869 		}
870 		g_free (normalized);
871 		normalized = g_string_free (fixed, FALSE);
872 	}
873 
874 	return normalized;
875 }
876 
877 /**
878  * soup_uri_normalize:
879  * @part: a URI part
880  * @unescape_extra: (allow-none): reserved characters to unescape (or %NULL)
881  *
882  * %<!-- -->-decodes any "unreserved" characters (or characters in
883  * @unescape_extra) in @part, and %<!-- -->-encodes any non-ASCII
884  * characters, spaces, and non-printing characters in @part.
885  *
886  * "Unreserved" characters are those that are not allowed to be used
887  * for punctuation according to the URI spec. For example, letters are
888  * unreserved, so soup_uri_normalize() will turn
889  * <literal>http://example.com/foo/b%<!-- -->61r</literal> into
890  * <literal>http://example.com/foo/bar</literal>, which is guaranteed
891  * to mean the same thing. However, "/" is "reserved", so
892  * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not
893  * be changed, because it might mean something different to the
894  * server.
895  *
896  * In the past, this would return %NULL if @part contained invalid
897  * percent-encoding, but now it just ignores the problem (as
898  * soup_uri_new() already did).
899  *
900  * Return value: the normalized URI part
901  */
902 char *
soup_uri_normalize(const char * part,const char * unescape_extra)903 soup_uri_normalize (const char *part, const char *unescape_extra)
904 {
905 	g_return_val_if_fail (part != NULL, NULL);
906 
907 	return uri_normalized_copy (part, strlen (part), unescape_extra);
908 }
909 
910 
911 /**
912  * soup_uri_uses_default_port:
913  * @uri: a #SoupURI
914  *
915  * Tests if @uri uses the default port for its scheme. (Eg, 80 for
916  * http.) (This only works for http, https and ftp; libsoup does not know
917  * the default ports of other protocols.)
918  *
919  * Return value: %TRUE or %FALSE
920  **/
921 gboolean
soup_uri_uses_default_port(SoupURI * uri)922 soup_uri_uses_default_port (SoupURI *uri)
923 {
924 	g_return_val_if_fail (uri != NULL, FALSE);
925 	g_warn_if_fail (SOUP_URI_IS_VALID (uri));
926 
927 	return uri->port == soup_scheme_default_port (uri->scheme);
928 }
929 
930 /**
931  * soup_uri_get_scheme:
932  * @uri: a #SoupURI
933  *
934  * Gets @uri's scheme.
935  *
936  * Return value: @uri's scheme.
937  *
938  * Since: 2.32
939  **/
940 const char *
soup_uri_get_scheme(SoupURI * uri)941 soup_uri_get_scheme (SoupURI *uri)
942 {
943 	g_return_val_if_fail (uri != NULL, NULL);
944 
945 	return uri->scheme;
946 }
947 
948 /**
949  * soup_uri_set_scheme:
950  * @uri: a #SoupURI
951  * @scheme: the URI scheme
952  *
953  * Sets @uri's scheme to @scheme. This will also set @uri's port to
954  * the default port for @scheme, if known.
955  **/
956 void
soup_uri_set_scheme(SoupURI * uri,const char * scheme)957 soup_uri_set_scheme (SoupURI *uri, const char *scheme)
958 {
959 	g_return_if_fail (uri != NULL);
960 	g_return_if_fail (scheme != NULL);
961 
962 	uri->scheme = soup_uri_parse_scheme (scheme, strlen (scheme));
963 	uri->port = soup_scheme_default_port (uri->scheme);
964 }
965 
966 /**
967  * soup_uri_get_user:
968  * @uri: a #SoupURI
969  *
970  * Gets @uri's user.
971  *
972  * Return value: @uri's user.
973  *
974  * Since: 2.32
975  **/
976 const char *
soup_uri_get_user(SoupURI * uri)977 soup_uri_get_user (SoupURI *uri)
978 {
979 	g_return_val_if_fail (uri != NULL, NULL);
980 
981 	return uri->user;
982 }
983 
984 /**
985  * soup_uri_set_user:
986  * @uri: a #SoupURI
987  * @user: (allow-none): the username, or %NULL
988  *
989  * Sets @uri's user to @user.
990  **/
991 void
soup_uri_set_user(SoupURI * uri,const char * user)992 soup_uri_set_user (SoupURI *uri, const char *user)
993 {
994 	g_return_if_fail (uri != NULL);
995 
996 	g_free (uri->user);
997 	uri->user = g_strdup (user);
998 }
999 
1000 /**
1001  * soup_uri_get_password:
1002  * @uri: a #SoupURI
1003  *
1004  * Gets @uri's password.
1005  *
1006  * Return value: @uri's password.
1007  *
1008  * Since: 2.32
1009  **/
1010 const char *
soup_uri_get_password(SoupURI * uri)1011 soup_uri_get_password (SoupURI *uri)
1012 {
1013 	g_return_val_if_fail (uri != NULL, NULL);
1014 
1015 	return uri->password;
1016 }
1017 
1018 /**
1019  * soup_uri_set_password:
1020  * @uri: a #SoupURI
1021  * @password: (allow-none): the password, or %NULL
1022  *
1023  * Sets @uri's password to @password.
1024  **/
1025 void
soup_uri_set_password(SoupURI * uri,const char * password)1026 soup_uri_set_password (SoupURI *uri, const char *password)
1027 {
1028 	g_return_if_fail (uri != NULL);
1029 
1030 	g_free (uri->password);
1031 	uri->password = g_strdup (password);
1032 }
1033 
1034 /**
1035  * soup_uri_get_host:
1036  * @uri: a #SoupURI
1037  *
1038  * Gets @uri's host.
1039  *
1040  * Return value: @uri's host.
1041  *
1042  * Since: 2.32
1043  **/
1044 const char *
soup_uri_get_host(SoupURI * uri)1045 soup_uri_get_host (SoupURI *uri)
1046 {
1047 	g_return_val_if_fail (uri != NULL, NULL);
1048 
1049 	return uri->host;
1050 }
1051 
1052 /**
1053  * soup_uri_set_host:
1054  * @uri: a #SoupURI
1055  * @host: (allow-none): the hostname or IP address, or %NULL
1056  *
1057  * Sets @uri's host to @host.
1058  *
1059  * If @host is an IPv6 IP address, it should not include the brackets
1060  * required by the URI syntax; they will be added automatically when
1061  * converting @uri to a string.
1062  *
1063  * http and https URIs should not have a %NULL @host.
1064  **/
1065 void
soup_uri_set_host(SoupURI * uri,const char * host)1066 soup_uri_set_host (SoupURI *uri, const char *host)
1067 {
1068 	g_return_if_fail (uri != NULL);
1069 
1070 	g_free (uri->host);
1071 	uri->host = g_strdup (host);
1072 }
1073 
1074 /**
1075  * soup_uri_get_port:
1076  * @uri: a #SoupURI
1077  *
1078  * Gets @uri's port.
1079  *
1080  * Return value: @uri's port.
1081  *
1082  * Since: 2.32
1083  **/
1084 guint
soup_uri_get_port(SoupURI * uri)1085 soup_uri_get_port (SoupURI *uri)
1086 {
1087 	g_return_val_if_fail (uri != NULL, 0);
1088 
1089 	return uri->port;
1090 }
1091 
1092 /**
1093  * soup_uri_set_port:
1094  * @uri: a #SoupURI
1095  * @port: the port, or 0
1096  *
1097  * Sets @uri's port to @port. If @port is 0, @uri will not have an
1098  * explicitly-specified port.
1099  **/
1100 void
soup_uri_set_port(SoupURI * uri,guint port)1101 soup_uri_set_port (SoupURI *uri, guint port)
1102 {
1103 	g_return_if_fail (uri != NULL);
1104 
1105 	uri->port = port;
1106 }
1107 
1108 /**
1109  * soup_uri_get_path:
1110  * @uri: a #SoupURI
1111  *
1112  * Gets @uri's path.
1113  *
1114  * Return value: @uri's path.
1115  *
1116  * Since: 2.32
1117  **/
1118 const char *
soup_uri_get_path(SoupURI * uri)1119 soup_uri_get_path (SoupURI *uri)
1120 {
1121 	g_return_val_if_fail (uri != NULL, NULL);
1122 
1123 	return uri->path;
1124 }
1125 
1126 /**
1127  * soup_uri_set_path:
1128  * @uri: a #SoupURI
1129  * @path: the non-%NULL path
1130  *
1131  * Sets @uri's path to @path.
1132  **/
1133 void
soup_uri_set_path(SoupURI * uri,const char * path)1134 soup_uri_set_path (SoupURI *uri, const char *path)
1135 {
1136 	g_return_if_fail (uri != NULL);
1137 
1138 	/* We allow a NULL path for compatibility, but warn about it. */
1139 	if (!path) {
1140 		g_warn_if_fail (path != NULL);
1141 		path = "";
1142 	}
1143 
1144 	g_free (uri->path);
1145 	uri->path = g_strdup (path);
1146 }
1147 
1148 /**
1149  * soup_uri_get_query:
1150  * @uri: a #SoupURI
1151  *
1152  * Gets @uri's query.
1153  *
1154  * Return value: @uri's query.
1155  *
1156  * Since: 2.32
1157  **/
1158 const char *
soup_uri_get_query(SoupURI * uri)1159 soup_uri_get_query (SoupURI *uri)
1160 {
1161 	g_return_val_if_fail (uri != NULL, NULL);
1162 
1163 	return uri->query;
1164 }
1165 
1166 /**
1167  * soup_uri_set_query:
1168  * @uri: a #SoupURI
1169  * @query: (allow-none): the query
1170  *
1171  * Sets @uri's query to @query.
1172  **/
1173 void
soup_uri_set_query(SoupURI * uri,const char * query)1174 soup_uri_set_query (SoupURI *uri, const char *query)
1175 {
1176 	g_return_if_fail (uri != NULL);
1177 
1178 	g_free (uri->query);
1179 	uri->query = g_strdup (query);
1180 }
1181 
1182 /**
1183  * soup_uri_set_query_from_form:
1184  * @uri: a #SoupURI
1185  * @form: (element-type utf8 utf8): a #GHashTable containing HTML form
1186  * information
1187  *
1188  * Sets @uri's query to the result of encoding @form according to the
1189  * HTML form rules. See soup_form_encode_hash() for more information.
1190  **/
1191 void
soup_uri_set_query_from_form(SoupURI * uri,GHashTable * form)1192 soup_uri_set_query_from_form (SoupURI *uri, GHashTable *form)
1193 {
1194 	g_return_if_fail (uri != NULL);
1195 
1196 	g_free (uri->query);
1197 	uri->query = soup_form_encode_hash (form);
1198 }
1199 
1200 /**
1201  * soup_uri_set_query_from_fields:
1202  * @uri: a #SoupURI
1203  * @first_field: name of the first form field to encode into query
1204  * @...: value of @first_field, followed by additional field names
1205  * and values, terminated by %NULL.
1206  *
1207  * Sets @uri's query to the result of encoding the given form fields
1208  * and values according to the * HTML form rules. See
1209  * soup_form_encode() for more information.
1210  **/
1211 void
soup_uri_set_query_from_fields(SoupURI * uri,const char * first_field,...)1212 soup_uri_set_query_from_fields (SoupURI    *uri,
1213 				const char *first_field,
1214 				...)
1215 {
1216 	va_list args;
1217 
1218 	g_return_if_fail (uri != NULL);
1219 
1220 	g_free (uri->query);
1221 	va_start (args, first_field);
1222 	uri->query = soup_form_encode_valist (first_field, args);
1223 	va_end (args);
1224 }
1225 
1226 /**
1227  * soup_uri_get_fragment:
1228  * @uri: a #SoupURI
1229  *
1230  * Gets @uri's fragment.
1231  *
1232  * Return value: @uri's fragment.
1233  *
1234  * Since: 2.32
1235  **/
1236 const char *
soup_uri_get_fragment(SoupURI * uri)1237 soup_uri_get_fragment (SoupURI *uri)
1238 {
1239 	g_return_val_if_fail (uri != NULL, NULL);
1240 
1241 	return uri->fragment;
1242 }
1243 
1244 /**
1245  * soup_uri_set_fragment:
1246  * @uri: a #SoupURI
1247  * @fragment: (allow-none): the fragment
1248  *
1249  * Sets @uri's fragment to @fragment.
1250  **/
1251 void
soup_uri_set_fragment(SoupURI * uri,const char * fragment)1252 soup_uri_set_fragment (SoupURI *uri, const char *fragment)
1253 {
1254 	g_return_if_fail (uri != NULL);
1255 
1256 	g_free (uri->fragment);
1257 	uri->fragment = g_strdup (fragment);
1258 }
1259 
1260 /**
1261  * soup_uri_copy_host:
1262  * @uri: a #SoupURI
1263  *
1264  * Makes a copy of @uri, considering only the protocol, host, and port
1265  *
1266  * Return value: the new #SoupURI
1267  *
1268  * Since: 2.28
1269  **/
1270 SoupURI *
soup_uri_copy_host(SoupURI * uri)1271 soup_uri_copy_host (SoupURI *uri)
1272 {
1273 	SoupURI *dup;
1274 
1275 	g_return_val_if_fail (uri != NULL, NULL);
1276 	g_warn_if_fail (SOUP_URI_IS_VALID (uri));
1277 
1278 	dup = soup_uri_new (NULL);
1279 	dup->scheme = uri->scheme;
1280 	dup->host   = g_strdup (uri->host);
1281 	dup->port   = uri->port;
1282 	dup->path   = g_strdup ("");
1283 
1284 	return dup;
1285 }
1286 
1287 /**
1288  * soup_uri_host_hash:
1289  * @key: (type Soup.URI): a #SoupURI with a non-%NULL @host member
1290  *
1291  * Hashes @key, considering only the scheme, host, and port.
1292  *
1293  * Return value: a hash
1294  *
1295  * Since: 2.28
1296  **/
1297 guint
soup_uri_host_hash(gconstpointer key)1298 soup_uri_host_hash (gconstpointer key)
1299 {
1300 	const SoupURI *uri = key;
1301 
1302 	g_return_val_if_fail (uri != NULL && uri->host != NULL, 0);
1303 	g_warn_if_fail (SOUP_URI_IS_VALID (uri));
1304 
1305 	return GPOINTER_TO_UINT (uri->scheme) + uri->port +
1306 		soup_str_case_hash (uri->host);
1307 }
1308 
1309 /**
1310  * soup_uri_host_equal:
1311  * @v1: (type Soup.URI): a #SoupURI with a non-%NULL @host member
1312  * @v2: (type Soup.URI): a #SoupURI with a non-%NULL @host member
1313  *
1314  * Compares @v1 and @v2, considering only the scheme, host, and port.
1315  *
1316  * Return value: whether or not the URIs are equal in scheme, host,
1317  * and port.
1318  *
1319  * Since: 2.28
1320  **/
1321 gboolean
soup_uri_host_equal(gconstpointer v1,gconstpointer v2)1322 soup_uri_host_equal (gconstpointer v1, gconstpointer v2)
1323 {
1324 	const SoupURI *one = v1;
1325 	const SoupURI *two = v2;
1326 
1327 	g_return_val_if_fail (one != NULL && two != NULL, one == two);
1328 	g_return_val_if_fail (one->host != NULL && two->host != NULL, one->host == two->host);
1329 	g_warn_if_fail (SOUP_URI_IS_VALID (one));
1330 	g_warn_if_fail (SOUP_URI_IS_VALID (two));
1331 
1332 	if (one->scheme != two->scheme)
1333 		return FALSE;
1334 	if (one->port != two->port)
1335 		return FALSE;
1336 
1337 	return g_ascii_strcasecmp (one->host, two->host) == 0;
1338 }
1339 
1340 gboolean
soup_uri_is_http(SoupURI * uri,char ** aliases)1341 soup_uri_is_http (SoupURI *uri, char **aliases)
1342 {
1343 	int i;
1344 
1345 	if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
1346 	    uri->scheme == SOUP_URI_SCHEME_WS)
1347 		return TRUE;
1348 	else if (uri->scheme == SOUP_URI_SCHEME_HTTPS ||
1349 		 uri->scheme == SOUP_URI_SCHEME_WSS)
1350 		return FALSE;
1351 	else if (!aliases)
1352 		return FALSE;
1353 
1354 	for (i = 0; aliases[i]; i++) {
1355 		if (uri->scheme == aliases[i])
1356 			return TRUE;
1357 	}
1358 
1359 	if (aliases[0] && !aliases[1] && !strcmp (aliases[0], "*"))
1360 		return TRUE;
1361 	else
1362 		return FALSE;
1363 }
1364 
1365 gboolean
soup_uri_is_https(SoupURI * uri,char ** aliases)1366 soup_uri_is_https (SoupURI *uri, char **aliases)
1367 {
1368 	int i;
1369 
1370 	if (uri->scheme == SOUP_URI_SCHEME_HTTPS ||
1371 	    uri->scheme == SOUP_URI_SCHEME_WSS)
1372 		return TRUE;
1373 	else if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
1374 		 uri->scheme == SOUP_URI_SCHEME_WS)
1375 		return FALSE;
1376 	else if (!aliases)
1377 		return FALSE;
1378 
1379 	for (i = 0; aliases[i]; i++) {
1380 		if (uri->scheme == aliases[i])
1381 			return TRUE;
1382 	}
1383 
1384 	return FALSE;
1385 }
1386 
1387 G_DEFINE_BOXED_TYPE (SoupURI, soup_uri, soup_uri_copy, soup_uri_free)
1388