1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-uri.c : utility functions to parse URLs */
3
4 /*
5 * Copyright 1999-2003 Ximian, Inc.
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <string.h>
13 #include <stdlib.h>
14
15 #include "soup-uri.h"
16 #include "soup.h"
17 #include "soup-misc-private.h"
18
19 /**
20 * SECTION:soup-uri
21 * @short_description: URIs
22 *
23 * A #SoupURI represents a (parsed) URI.
24 *
25 * Many applications will not need to use #SoupURI directly at all; on
26 * the client side, soup_message_new() takes a stringified URI, and on
27 * the server side, the path and query components are provided for you
28 * in the server callback.
29 **/
30
31 /**
32 * SoupURI:
33 * @scheme: the URI scheme (eg, "http")
34 * @user: a username, or %NULL
35 * @password: a password, or %NULL
36 * @host: the hostname or IP address, or %NULL
37 * @port: the port number on @host
38 * @path: the path on @host
39 * @query: a query for @path, or %NULL
40 * @fragment: a fragment identifier within @path, or %NULL
41 *
42 * A #SoupURI represents a (parsed) URI. #SoupURI supports RFC 3986
43 * (URI Generic Syntax), and can parse any valid URI. However, libsoup
44 * only uses "http" and "https" URIs internally; You can use
45 * SOUP_URI_VALID_FOR_HTTP() to test if a #SoupURI is a valid HTTP
46 * URI.
47 *
48 * @scheme will always be set in any URI. It is an interned string and
49 * is always all lowercase. (If you parse a URI with a non-lowercase
50 * scheme, it will be converted to lowercase.) The macros
51 * %SOUP_URI_SCHEME_HTTP and %SOUP_URI_SCHEME_HTTPS provide the
52 * interned values for "http" and "https" and can be compared against
53 * URI @scheme values.
54 *
55 * @user and @password are parsed as defined in the older URI specs
56 * (ie, separated by a colon; RFC 3986 only talks about a single
57 * "userinfo" field). Note that @password is not included in the
58 * output of soup_uri_to_string(). libsoup does not normally use these
59 * fields; authentication is handled via #SoupSession signals.
60 *
61 * @host contains the hostname, and @port the port specified in the
62 * URI. If the URI doesn't contain a hostname, @host will be %NULL,
63 * and if it doesn't specify a port, @port may be 0. However, for
64 * "http" and "https" URIs, @host is guaranteed to be non-%NULL
65 * (trying to parse an http URI with no @host will return %NULL), and
66 * @port will always be non-0 (because libsoup knows the default value
67 * to use when it is not specified in the URI).
68 *
69 * @path is always non-%NULL. For http/https URIs, @path will never be
70 * an empty string either; if the input URI has no path, the parsed
71 * #SoupURI will have a @path of "/".
72 *
73 * @query and @fragment are optional for all URI types.
74 * soup_form_decode() may be useful for parsing @query.
75 *
76 * Note that @path, @query, and @fragment may contain
77 * %<!-- -->-encoded characters. soup_uri_new() calls
78 * soup_uri_normalize() on them, but not soup_uri_decode(). This is
79 * necessary to ensure that soup_uri_to_string() will generate a URI
80 * that has exactly the same meaning as the original. (In theory,
81 * #SoupURI should leave @user, @password, and @host partially-encoded
82 * as well, but this would be more annoying than useful.)
83 **/
84
85 /**
86 * SOUP_URI_IS_VALID:
87 * @uri: a #SoupURI
88 *
89 * Tests whether @uri is a valid #SoupURI; that is, that it is non-%NULL
90 * and its @scheme and @path members are also non-%NULL.
91 *
92 * This macro does not check whether http and https URIs have a non-%NULL
93 * @host member.
94 *
95 * Return value: %TRUE if @uri is valid for use.
96 *
97 * Since: 2.38
98 **/
99
100 /**
101 * SOUP_URI_VALID_FOR_HTTP:
102 * @uri: a #SoupURI
103 *
104 * Tests if @uri is a valid #SoupURI for HTTP communication; that is, if
105 * it can be used to construct a #SoupMessage.
106 *
107 * Return value: %TRUE if @uri is a valid "http" or "https" URI.
108 *
109 * Since: 2.24
110 **/
111
112 /**
113 * SOUP_URI_SCHEME_HTTP:
114 *
115 * "http" as an interned string; you can compare this directly to a
116 * #SoupURI's <literal>scheme</literal> field using
117 * <literal>==</literal>.
118 */
119 /**
120 * SOUP_URI_SCHEME_HTTPS:
121 *
122 * "https" as an interned string; you can compare this directly to a
123 * #SoupURI's <literal>scheme</literal> field using
124 * <literal>==</literal>.
125 */
126 /**
127 * SOUP_URI_SCHEME_FTP:
128 *
129 * "ftp" as an interned string; you can compare this directly to a
130 * #SoupURI's <literal>scheme</literal> field using
131 * <literal>==</literal>.
132 *
133 * Since: 2.30
134 */
135 /**
136 * SOUP_URI_SCHEME_FILE:
137 *
138 * "file" as an interned string; you can compare this directly to a
139 * #SoupURI's <literal>scheme</literal> field using
140 * <literal>==</literal>.
141 *
142 * Since: 2.30
143 */
144 /**
145 * SOUP_URI_SCHEME_DATA:
146 *
147 * "data" as an interned string; you can compare this directly to a
148 * #SoupURI's <literal>scheme</literal> field using
149 * <literal>==</literal>.
150 *
151 * Since: 2.30
152 */
153 /**
154 * SOUP_URI_SCHEME_RESOURCE:
155 *
156 * "data" as an interned string; you can compare this directly to a
157 * #SoupURI's <literal>scheme</literal> field using
158 * <literal>==</literal>.
159 *
160 * Since: 2.42
161 */
162 /**
163 * SOUP_URI_SCHEME_WS:
164 *
165 * "ws" (WebSocket) as an interned string; you can compare this
166 * directly to a #SoupURI's <literal>scheme</literal> field using
167 * <literal>==</literal>.
168 *
169 * Since: 2.50
170 */
171 /**
172 * SOUP_URI_SCHEME_WSS:
173 *
174 * "wss" (WebSocket over TLS) as an interned string; you can compare
175 * this directly to a #SoupURI's <literal>scheme</literal> field using
176 * <literal>==</literal>.
177 *
178 * Since: 2.50
179 */
180
181 static void append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars);
182 static char *uri_normalized_copy (const char *str, int length, const char *unescape_extra);
183
184 gpointer _SOUP_URI_SCHEME_HTTP, _SOUP_URI_SCHEME_HTTPS;
185 gpointer _SOUP_URI_SCHEME_WS, _SOUP_URI_SCHEME_WSS;
186 gpointer _SOUP_URI_SCHEME_FTP;
187 gpointer _SOUP_URI_SCHEME_FILE, _SOUP_URI_SCHEME_DATA, _SOUP_URI_SCHEME_RESOURCE;
188
189 static inline const char *
soup_uri_parse_scheme(const char * scheme,int len)190 soup_uri_parse_scheme (const char *scheme, int len)
191 {
192 if (len == 4 && !g_ascii_strncasecmp (scheme, "http", len)) {
193 return SOUP_URI_SCHEME_HTTP;
194 } else if (len == 5 && !g_ascii_strncasecmp (scheme, "https", len)) {
195 return SOUP_URI_SCHEME_HTTPS;
196 } else if (len == 8 && !g_ascii_strncasecmp (scheme, "resource", len)) {
197 return SOUP_URI_SCHEME_RESOURCE;
198 } else if (len == 2 && !g_ascii_strncasecmp (scheme, "ws", len)) {
199 return SOUP_URI_SCHEME_WS;
200 } else if (len == 3 && !g_ascii_strncasecmp (scheme, "wss", len)) {
201 return SOUP_URI_SCHEME_WSS;
202 } else {
203 char *lower_scheme;
204
205 lower_scheme = g_ascii_strdown (scheme, len);
206 scheme = g_intern_static_string (lower_scheme);
207 if (scheme != (const char *)lower_scheme)
208 g_free (lower_scheme);
209 return scheme;
210 }
211 }
212
213 static inline guint
soup_scheme_default_port(const char * scheme)214 soup_scheme_default_port (const char *scheme)
215 {
216 if (scheme == SOUP_URI_SCHEME_HTTP || scheme == SOUP_URI_SCHEME_WS)
217 return 80;
218 else if (scheme == SOUP_URI_SCHEME_HTTPS || scheme == SOUP_URI_SCHEME_WSS)
219 return 443;
220 else if (scheme == SOUP_URI_SCHEME_FTP)
221 return 21;
222 else
223 return 0;
224 }
225
226 /**
227 * soup_uri_new_with_base: (constructor)
228 * @base: a base URI
229 * @uri_string: the URI
230 *
231 * Parses @uri_string relative to @base.
232 *
233 * Returns: a parsed #SoupURI.
234 **/
235 SoupURI *
soup_uri_new_with_base(SoupURI * base,const char * uri_string)236 soup_uri_new_with_base (SoupURI *base, const char *uri_string)
237 {
238 SoupURI *uri, fixed_base;
239 const char *end, *hash, *colon, *at, *path, *question;
240 const char *p, *hostend;
241 gboolean remove_dot_segments = TRUE;
242 int len;
243
244 g_return_val_if_fail (uri_string != NULL, NULL);
245
246 /* Allow a %NULL path in @base, for compatibility */
247 if (base && base->scheme && !base->path) {
248 g_warn_if_fail (SOUP_URI_IS_VALID (base));
249
250 memcpy (&fixed_base, base, sizeof (SoupURI));
251 fixed_base.path = "";
252 base = &fixed_base;
253 }
254
255 g_return_val_if_fail (base == NULL || SOUP_URI_IS_VALID (base), NULL);
256
257 /* First some cleanup steps (which are supposed to all be no-ops,
258 * but...). Skip initial whitespace, strip out internal tabs and
259 * line breaks, and ignore trailing whitespace.
260 */
261 while (g_ascii_isspace (*uri_string))
262 uri_string++;
263
264 len = strcspn (uri_string, "\t\n\r");
265 if (uri_string[len]) {
266 char *clean = g_malloc (strlen (uri_string) + 1), *d;
267 const char *s;
268
269 for (s = uri_string, d = clean; *s; s++) {
270 if (*s != '\t' && *s != '\n' && *s != '\r')
271 *d++ = *s;
272 }
273 *d = '\0';
274
275 uri = soup_uri_new_with_base (base, clean);
276 g_free (clean);
277 return uri;
278 }
279 end = uri_string + len;
280 while (end > uri_string && g_ascii_isspace (end[-1]))
281 end--;
282
283 uri = g_slice_new0 (SoupURI);
284
285 /* Find fragment. */
286 hash = strchr (uri_string, '#');
287 if (hash) {
288 uri->fragment = uri_normalized_copy (hash + 1, end - hash + 1,
289 NULL);
290 end = hash;
291 }
292
293 /* Find scheme */
294 p = uri_string;
295 while (p < end && (g_ascii_isalpha (*p) ||
296 (p > uri_string && (g_ascii_isdigit (*p) ||
297 *p == '.' ||
298 *p == '+' ||
299 *p == '-'))))
300 p++;
301
302 if (p > uri_string && *p == ':') {
303 uri->scheme = soup_uri_parse_scheme (uri_string, p - uri_string);
304 uri_string = p + 1;
305 }
306
307 if (uri_string == end && !base && !uri->fragment) {
308 uri->path = g_strdup ("");
309 return uri;
310 }
311
312 /* Check for authority */
313 if (strncmp (uri_string, "//", 2) == 0) {
314 uri_string += 2;
315
316 path = uri_string + strcspn (uri_string, "/?#");
317 if (path > end)
318 path = end;
319 at = strchr (uri_string, '@');
320 if (at && at < path) {
321 colon = strchr (uri_string, ':');
322 if (colon && colon < at) {
323 uri->password = soup_uri_decoded_copy (colon + 1,
324 at - colon - 1, NULL);
325 } else {
326 uri->password = NULL;
327 colon = at;
328 }
329
330 uri->user = soup_uri_decoded_copy (uri_string,
331 colon - uri_string, NULL);
332 uri_string = at + 1;
333 } else
334 uri->user = uri->password = NULL;
335
336 /* Find host and port. */
337 if (*uri_string == '[') {
338 const char *pct;
339
340 uri_string++;
341 hostend = strchr (uri_string, ']');
342 if (!hostend || hostend > path) {
343 soup_uri_free (uri);
344 return NULL;
345 }
346 if (*(hostend + 1) == ':')
347 colon = hostend + 1;
348 else
349 colon = NULL;
350
351 pct = memchr (uri_string, '%', hostend - uri_string);
352 if (!pct || (pct[1] == '2' && pct[2] == '5')) {
353 uri->host = soup_uri_decoded_copy (uri_string,
354 hostend - uri_string, NULL);
355 } else
356 uri->host = g_strndup (uri_string, hostend - uri_string);
357 } else {
358 colon = memchr (uri_string, ':', path - uri_string);
359 hostend = colon ? colon : path;
360 uri->host = soup_uri_decoded_copy (uri_string,
361 hostend - uri_string, NULL);
362 }
363
364 if (colon && colon != path - 1) {
365 char *portend;
366 uri->port = strtoul (colon + 1, &portend, 10);
367 if (portend != (char *)path) {
368 soup_uri_free (uri);
369 return NULL;
370 }
371 }
372
373 uri_string = path;
374 }
375
376 /* Find query */
377 question = memchr (uri_string, '?', end - uri_string);
378 if (question) {
379 uri->query = uri_normalized_copy (question + 1,
380 end - (question + 1),
381 NULL);
382 end = question;
383 }
384
385 if (end != uri_string) {
386 uri->path = uri_normalized_copy (uri_string, end - uri_string,
387 NULL);
388 }
389
390 /* Apply base URI. This is spelled out in RFC 3986. */
391 if (base && !uri->scheme && uri->host)
392 uri->scheme = base->scheme;
393 else if (base && !uri->scheme) {
394 uri->scheme = base->scheme;
395 uri->user = g_strdup (base->user);
396 uri->password = g_strdup (base->password);
397 uri->host = g_strdup (base->host);
398 uri->port = base->port;
399
400 if (!uri->path) {
401 uri->path = g_strdup (base->path);
402 if (!uri->query)
403 uri->query = g_strdup (base->query);
404 remove_dot_segments = FALSE;
405 } else if (*uri->path != '/') {
406 char *newpath, *last;
407
408 last = strrchr (base->path, '/');
409 if (last) {
410 newpath = g_strdup_printf ("%.*s%s",
411 (int)(last + 1 - base->path),
412 base->path,
413 uri->path);
414 } else
415 newpath = g_strdup_printf ("/%s", uri->path);
416
417 g_free (uri->path);
418 uri->path = newpath;
419 }
420 }
421
422 if (remove_dot_segments && uri->path && *uri->path) {
423 char *p, *q;
424
425 /* Remove "./" where "." is a complete segment. */
426 for (p = uri->path + 1; *p; ) {
427 if (*(p - 1) == '/' &&
428 *p == '.' && *(p + 1) == '/')
429 memmove (p, p + 2, strlen (p + 2) + 1);
430 else
431 p++;
432 }
433 /* Remove "." at end. */
434 if (p > uri->path + 2 &&
435 *(p - 1) == '.' && *(p - 2) == '/')
436 *(p - 1) = '\0';
437
438 /* Remove "<segment>/../" where <segment> != ".." */
439 for (p = uri->path + 1; *p; ) {
440 if (!strncmp (p, "../", 3)) {
441 p += 3;
442 continue;
443 }
444 q = strchr (p + 1, '/');
445 if (!q)
446 break;
447 if (strncmp (q, "/../", 4) != 0) {
448 p = q + 1;
449 continue;
450 }
451 memmove (p, q + 4, strlen (q + 4) + 1);
452 p = uri->path + 1;
453 }
454 /* Remove "<segment>/.." at end where <segment> != ".." */
455 q = strrchr (uri->path, '/');
456 if (q && q != uri->path && !strcmp (q, "/..")) {
457 p = q - 1;
458 while (p > uri->path && *p != '/')
459 p--;
460 if (strncmp (p, "/../", 4) != 0)
461 *(p + 1) = 0;
462 }
463
464 /* Remove extraneous initial "/.."s */
465 while (!strncmp (uri->path, "/../", 4))
466 memmove (uri->path, uri->path + 3, strlen (uri->path) - 2);
467 if (!strcmp (uri->path, "/.."))
468 uri->path[1] = '\0';
469 }
470
471 /* HTTP-specific stuff */
472 if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
473 uri->scheme == SOUP_URI_SCHEME_HTTPS) {
474 if (!uri->path)
475 uri->path = g_strdup ("/");
476 if (!SOUP_URI_VALID_FOR_HTTP (uri)) {
477 soup_uri_free (uri);
478 return NULL;
479 }
480 }
481
482 if (uri->scheme == SOUP_URI_SCHEME_FTP) {
483 if (!uri->host) {
484 soup_uri_free (uri);
485 return NULL;
486 }
487 }
488
489 if (!uri->port)
490 uri->port = soup_scheme_default_port (uri->scheme);
491 if (!uri->path)
492 uri->path = g_strdup ("");
493
494 return uri;
495 }
496
497 /**
498 * soup_uri_new:
499 * @uri_string: (allow-none): a URI
500 *
501 * Parses an absolute URI.
502 *
503 * You can also pass %NULL for @uri_string if you want to get back an
504 * "empty" #SoupURI that you can fill in by hand. (You will need to
505 * call at least soup_uri_set_scheme() and soup_uri_set_path(), since
506 * those fields are required.)
507 *
508 * Return value: (nullable): a #SoupURI, or %NULL if the given string
509 * was found to be invalid.
510 **/
511 SoupURI *
soup_uri_new(const char * uri_string)512 soup_uri_new (const char *uri_string)
513 {
514 SoupURI *uri;
515
516 if (!uri_string)
517 return g_slice_new0 (SoupURI);
518
519 uri = soup_uri_new_with_base (NULL, uri_string);
520 if (!uri)
521 return NULL;
522 if (!SOUP_URI_IS_VALID (uri)) {
523 soup_uri_free (uri);
524 return NULL;
525 }
526
527 return uri;
528 }
529
530
531 char *
soup_uri_to_string_internal(SoupURI * uri,gboolean just_path_and_query,gboolean include_password,gboolean force_port)532 soup_uri_to_string_internal (SoupURI *uri, gboolean just_path_and_query,
533 gboolean include_password, gboolean force_port)
534 {
535 GString *str;
536 char *return_result;
537
538 g_return_val_if_fail (uri != NULL, NULL);
539 g_warn_if_fail (SOUP_URI_IS_VALID (uri));
540
541 str = g_string_sized_new (40);
542
543 if (uri->scheme && !just_path_and_query)
544 g_string_append_printf (str, "%s:", uri->scheme);
545 if (uri->host && !just_path_and_query) {
546 g_string_append (str, "//");
547 if (uri->user) {
548 append_uri_encoded (str, uri->user, ":;@?/");
549 if (uri->password && include_password) {
550 g_string_append_c (str, ':');
551 append_uri_encoded (str, uri->password, ";@?/");
552 }
553 g_string_append_c (str, '@');
554 }
555 if (strchr (uri->host, ':')) {
556 const char *pct;
557
558 g_string_append_c (str, '[');
559 pct = strchr (uri->host, '%');
560 if (pct) {
561 g_string_append_printf (str, "%.*s%%25%s",
562 (int) (pct - uri->host),
563 uri->host, pct + 1);
564 } else
565 g_string_append (str, uri->host);
566 g_string_append_c (str, ']');
567 } else
568 append_uri_encoded (str, uri->host, ":/");
569 if (uri->port && (force_port || uri->port != soup_scheme_default_port (uri->scheme)))
570 g_string_append_printf (str, ":%u", uri->port);
571 if (!uri->path && (uri->query || uri->fragment))
572 g_string_append_c (str, '/');
573 else if ((!uri->path || !*uri->path) &&
574 (uri->scheme == SOUP_URI_SCHEME_HTTP ||
575 uri->scheme == SOUP_URI_SCHEME_HTTPS))
576 g_string_append_c (str, '/');
577 }
578
579 if (uri->path && *uri->path)
580 g_string_append (str, uri->path);
581 else if (just_path_and_query)
582 g_string_append_c (str, '/');
583
584 if (uri->query) {
585 g_string_append_c (str, '?');
586 g_string_append (str, uri->query);
587 }
588 if (uri->fragment && !just_path_and_query) {
589 g_string_append_c (str, '#');
590 g_string_append (str, uri->fragment);
591 }
592
593 return_result = str->str;
594 g_string_free (str, FALSE);
595
596 return return_result;
597 }
598
599 /**
600 * soup_uri_to_string:
601 * @uri: a #SoupURI
602 * @just_path_and_query: if %TRUE, output just the path and query portions
603 *
604 * Returns a string representing @uri.
605 *
606 * If @just_path_and_query is %TRUE, this concatenates the path and query
607 * together. That is, it constructs the string that would be needed in
608 * the Request-Line of an HTTP request for @uri.
609 *
610 * Note that the output will never contain a password, even if @uri
611 * does.
612 *
613 * Return value: a string representing @uri, which the caller must free.
614 **/
615 char *
soup_uri_to_string(SoupURI * uri,gboolean just_path_and_query)616 soup_uri_to_string (SoupURI *uri, gboolean just_path_and_query)
617 {
618 return soup_uri_to_string_internal (uri, just_path_and_query, FALSE, FALSE);
619 }
620
621 /**
622 * soup_uri_copy:
623 * @uri: a #SoupURI
624 *
625 * Copies @uri
626 *
627 * Return value: a copy of @uri, which must be freed with soup_uri_free()
628 **/
629 SoupURI *
soup_uri_copy(SoupURI * uri)630 soup_uri_copy (SoupURI *uri)
631 {
632 SoupURI *dup;
633
634 g_return_val_if_fail (uri != NULL, NULL);
635 g_warn_if_fail (SOUP_URI_IS_VALID (uri));
636
637 dup = g_slice_new0 (SoupURI);
638 dup->scheme = uri->scheme;
639 dup->user = g_strdup (uri->user);
640 dup->password = g_strdup (uri->password);
641 dup->host = g_strdup (uri->host);
642 dup->port = uri->port;
643 dup->path = g_strdup (uri->path);
644 dup->query = g_strdup (uri->query);
645 dup->fragment = g_strdup (uri->fragment);
646
647 return dup;
648 }
649
650 static inline gboolean
parts_equal(const char * one,const char * two,gboolean insensitive)651 parts_equal (const char *one, const char *two, gboolean insensitive)
652 {
653 if (!one && !two)
654 return TRUE;
655 if (!one || !two)
656 return FALSE;
657 return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two);
658 }
659
660 /**
661 * soup_uri_equal:
662 * @uri1: a #SoupURI
663 * @uri2: another #SoupURI
664 *
665 * Tests whether or not @uri1 and @uri2 are equal in all parts
666 *
667 * Return value: %TRUE or %FALSE
668 **/
669 gboolean
soup_uri_equal(SoupURI * uri1,SoupURI * uri2)670 soup_uri_equal (SoupURI *uri1, SoupURI *uri2)
671 {
672 g_return_val_if_fail (uri1 != NULL, FALSE);
673 g_return_val_if_fail (uri2 != NULL, FALSE);
674 g_warn_if_fail (SOUP_URI_IS_VALID (uri1));
675 g_warn_if_fail (SOUP_URI_IS_VALID (uri2));
676
677 if (uri1->scheme != uri2->scheme ||
678 uri1->port != uri2->port ||
679 !parts_equal (uri1->user, uri2->user, FALSE) ||
680 !parts_equal (uri1->password, uri2->password, FALSE) ||
681 !parts_equal (uri1->host, uri2->host, TRUE) ||
682 !parts_equal (uri1->path, uri2->path, FALSE) ||
683 !parts_equal (uri1->query, uri2->query, FALSE) ||
684 !parts_equal (uri1->fragment, uri2->fragment, FALSE))
685 return FALSE;
686
687 return TRUE;
688 }
689
690 /**
691 * soup_uri_free:
692 * @uri: a #SoupURI
693 *
694 * Frees @uri.
695 **/
696 void
soup_uri_free(SoupURI * uri)697 soup_uri_free (SoupURI *uri)
698 {
699 g_return_if_fail (uri != NULL);
700
701 g_free (uri->user);
702 g_free (uri->password);
703 g_free (uri->host);
704 g_free (uri->path);
705 g_free (uri->query);
706 g_free (uri->fragment);
707
708 g_slice_free (SoupURI, uri);
709 }
710
711 static void
append_uri_encoded(GString * str,const char * in,const char * extra_enc_chars)712 append_uri_encoded (GString *str, const char *in, const char *extra_enc_chars)
713 {
714 const unsigned char *s = (const unsigned char *)in;
715
716 while (*s) {
717 if (soup_char_is_uri_percent_encoded (*s) ||
718 soup_char_is_uri_gen_delims (*s) ||
719 (extra_enc_chars && strchr (extra_enc_chars, *s)))
720 g_string_append_printf (str, "%%%02X", (int)*s++);
721 else
722 g_string_append_c (str, *s++);
723 }
724 }
725
726 /**
727 * soup_uri_encode:
728 * @part: a URI part
729 * @escape_extra: (allow-none): additional reserved characters to
730 * escape (or %NULL)
731 *
732 * This %<!-- -->-encodes the given URI part and returns the escaped
733 * version in allocated memory, which the caller must free when it is
734 * done.
735 *
736 * Return value: the encoded URI part
737 **/
738 char *
soup_uri_encode(const char * part,const char * escape_extra)739 soup_uri_encode (const char *part, const char *escape_extra)
740 {
741 GString *str;
742 char *encoded;
743
744 g_return_val_if_fail (part != NULL, NULL);
745
746 str = g_string_new (NULL);
747 append_uri_encoded (str, part, escape_extra);
748 encoded = str->str;
749 g_string_free (str, FALSE);
750
751 return encoded;
752 }
753
754 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
755 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
756
757 /* length must be set (e.g. from strchr()) such that [part, part + length]
758 * contains no nul bytes */
759 char *
soup_uri_decoded_copy(const char * part,int length,int * decoded_length)760 soup_uri_decoded_copy (const char *part, int length, int *decoded_length)
761 {
762 unsigned char *s, *d;
763 char *decoded;
764
765 g_return_val_if_fail (part != NULL, NULL);
766
767 decoded = g_strndup (part, length);
768 s = d = (unsigned char *)decoded;
769 do {
770 if (*s == '%') {
771 if (s[1] == '\0' ||
772 s[2] == '\0' ||
773 !g_ascii_isxdigit (s[1]) ||
774 !g_ascii_isxdigit (s[2])) {
775 *d++ = *s;
776 continue;
777 }
778 *d++ = HEXCHAR (s);
779 s += 2;
780 } else
781 *d++ = *s;
782 } while (*s++);
783
784 if (decoded_length)
785 *decoded_length = d - (unsigned char *)decoded - 1;
786
787 return decoded;
788 }
789
790 /**
791 * soup_uri_decode:
792 * @part: a URI part
793 *
794 * Fully %<!-- -->-decodes @part.
795 *
796 * In the past, this would return %NULL if @part contained invalid
797 * percent-encoding, but now it just ignores the problem (as
798 * soup_uri_new() already did).
799 *
800 * Return value: the decoded URI part.
801 */
802 char *
soup_uri_decode(const char * part)803 soup_uri_decode (const char *part)
804 {
805 g_return_val_if_fail (part != NULL, NULL);
806
807 return soup_uri_decoded_copy (part, strlen (part), NULL);
808 }
809
810 /* length must be set (e.g. from strchr()) such that [part, part + length]
811 * contains no nul bytes */
812 static char *
uri_normalized_copy(const char * part,int length,const char * unescape_extra)813 uri_normalized_copy (const char *part, int length,
814 const char *unescape_extra)
815 {
816 unsigned char *s, *d, c;
817 char *normalized = g_strndup (part, length);
818 gboolean need_fixup = FALSE;
819
820 if (!unescape_extra)
821 unescape_extra = "";
822
823 s = d = (unsigned char *)normalized;
824 while (*s) {
825 if (*s == '%') {
826 if (s[1] == '\0' ||
827 s[2] == '\0' ||
828 !g_ascii_isxdigit (s[1]) ||
829 !g_ascii_isxdigit (s[2])) {
830 *d++ = *s++;
831 continue;
832 }
833
834 c = HEXCHAR (s);
835 if (soup_char_is_uri_unreserved (c) ||
836 (c && strchr (unescape_extra, c))) {
837 *d++ = c;
838 s += 3;
839 } else {
840 /* We leave it unchanged. We used to uppercase percent-encoded
841 * triplets but we do not do it any more as RFC3986 Section 6.2.2.1
842 * says that they only SHOULD be case normalized.
843 */
844 *d++ = *s++;
845 *d++ = *s++;
846 *d++ = *s++;
847 }
848 } else {
849 if (!g_ascii_isgraph (*s) &&
850 !strchr (unescape_extra, *s))
851 need_fixup = TRUE;
852 *d++ = *s++;
853 }
854 }
855 *d = '\0';
856
857 if (need_fixup) {
858 GString *fixed;
859
860 fixed = g_string_new (NULL);
861 s = (guchar *)normalized;
862 while (*s) {
863 if (g_ascii_isgraph (*s) ||
864 strchr (unescape_extra, *s))
865 g_string_append_c (fixed, *s);
866 else
867 g_string_append_printf (fixed, "%%%02X", (int)*s);
868 s++;
869 }
870 g_free (normalized);
871 normalized = g_string_free (fixed, FALSE);
872 }
873
874 return normalized;
875 }
876
877 /**
878 * soup_uri_normalize:
879 * @part: a URI part
880 * @unescape_extra: (allow-none): reserved characters to unescape (or %NULL)
881 *
882 * %<!-- -->-decodes any "unreserved" characters (or characters in
883 * @unescape_extra) in @part, and %<!-- -->-encodes any non-ASCII
884 * characters, spaces, and non-printing characters in @part.
885 *
886 * "Unreserved" characters are those that are not allowed to be used
887 * for punctuation according to the URI spec. For example, letters are
888 * unreserved, so soup_uri_normalize() will turn
889 * <literal>http://example.com/foo/b%<!-- -->61r</literal> into
890 * <literal>http://example.com/foo/bar</literal>, which is guaranteed
891 * to mean the same thing. However, "/" is "reserved", so
892 * <literal>http://example.com/foo%<!-- -->2Fbar</literal> would not
893 * be changed, because it might mean something different to the
894 * server.
895 *
896 * In the past, this would return %NULL if @part contained invalid
897 * percent-encoding, but now it just ignores the problem (as
898 * soup_uri_new() already did).
899 *
900 * Return value: the normalized URI part
901 */
902 char *
soup_uri_normalize(const char * part,const char * unescape_extra)903 soup_uri_normalize (const char *part, const char *unescape_extra)
904 {
905 g_return_val_if_fail (part != NULL, NULL);
906
907 return uri_normalized_copy (part, strlen (part), unescape_extra);
908 }
909
910
911 /**
912 * soup_uri_uses_default_port:
913 * @uri: a #SoupURI
914 *
915 * Tests if @uri uses the default port for its scheme. (Eg, 80 for
916 * http.) (This only works for http, https and ftp; libsoup does not know
917 * the default ports of other protocols.)
918 *
919 * Return value: %TRUE or %FALSE
920 **/
921 gboolean
soup_uri_uses_default_port(SoupURI * uri)922 soup_uri_uses_default_port (SoupURI *uri)
923 {
924 g_return_val_if_fail (uri != NULL, FALSE);
925 g_warn_if_fail (SOUP_URI_IS_VALID (uri));
926
927 return uri->port == soup_scheme_default_port (uri->scheme);
928 }
929
930 /**
931 * soup_uri_get_scheme:
932 * @uri: a #SoupURI
933 *
934 * Gets @uri's scheme.
935 *
936 * Return value: @uri's scheme.
937 *
938 * Since: 2.32
939 **/
940 const char *
soup_uri_get_scheme(SoupURI * uri)941 soup_uri_get_scheme (SoupURI *uri)
942 {
943 g_return_val_if_fail (uri != NULL, NULL);
944
945 return uri->scheme;
946 }
947
948 /**
949 * soup_uri_set_scheme:
950 * @uri: a #SoupURI
951 * @scheme: the URI scheme
952 *
953 * Sets @uri's scheme to @scheme. This will also set @uri's port to
954 * the default port for @scheme, if known.
955 **/
956 void
soup_uri_set_scheme(SoupURI * uri,const char * scheme)957 soup_uri_set_scheme (SoupURI *uri, const char *scheme)
958 {
959 g_return_if_fail (uri != NULL);
960 g_return_if_fail (scheme != NULL);
961
962 uri->scheme = soup_uri_parse_scheme (scheme, strlen (scheme));
963 uri->port = soup_scheme_default_port (uri->scheme);
964 }
965
966 /**
967 * soup_uri_get_user:
968 * @uri: a #SoupURI
969 *
970 * Gets @uri's user.
971 *
972 * Return value: @uri's user.
973 *
974 * Since: 2.32
975 **/
976 const char *
soup_uri_get_user(SoupURI * uri)977 soup_uri_get_user (SoupURI *uri)
978 {
979 g_return_val_if_fail (uri != NULL, NULL);
980
981 return uri->user;
982 }
983
984 /**
985 * soup_uri_set_user:
986 * @uri: a #SoupURI
987 * @user: (allow-none): the username, or %NULL
988 *
989 * Sets @uri's user to @user.
990 **/
991 void
soup_uri_set_user(SoupURI * uri,const char * user)992 soup_uri_set_user (SoupURI *uri, const char *user)
993 {
994 g_return_if_fail (uri != NULL);
995
996 g_free (uri->user);
997 uri->user = g_strdup (user);
998 }
999
1000 /**
1001 * soup_uri_get_password:
1002 * @uri: a #SoupURI
1003 *
1004 * Gets @uri's password.
1005 *
1006 * Return value: @uri's password.
1007 *
1008 * Since: 2.32
1009 **/
1010 const char *
soup_uri_get_password(SoupURI * uri)1011 soup_uri_get_password (SoupURI *uri)
1012 {
1013 g_return_val_if_fail (uri != NULL, NULL);
1014
1015 return uri->password;
1016 }
1017
1018 /**
1019 * soup_uri_set_password:
1020 * @uri: a #SoupURI
1021 * @password: (allow-none): the password, or %NULL
1022 *
1023 * Sets @uri's password to @password.
1024 **/
1025 void
soup_uri_set_password(SoupURI * uri,const char * password)1026 soup_uri_set_password (SoupURI *uri, const char *password)
1027 {
1028 g_return_if_fail (uri != NULL);
1029
1030 g_free (uri->password);
1031 uri->password = g_strdup (password);
1032 }
1033
1034 /**
1035 * soup_uri_get_host:
1036 * @uri: a #SoupURI
1037 *
1038 * Gets @uri's host.
1039 *
1040 * Return value: @uri's host.
1041 *
1042 * Since: 2.32
1043 **/
1044 const char *
soup_uri_get_host(SoupURI * uri)1045 soup_uri_get_host (SoupURI *uri)
1046 {
1047 g_return_val_if_fail (uri != NULL, NULL);
1048
1049 return uri->host;
1050 }
1051
1052 /**
1053 * soup_uri_set_host:
1054 * @uri: a #SoupURI
1055 * @host: (allow-none): the hostname or IP address, or %NULL
1056 *
1057 * Sets @uri's host to @host.
1058 *
1059 * If @host is an IPv6 IP address, it should not include the brackets
1060 * required by the URI syntax; they will be added automatically when
1061 * converting @uri to a string.
1062 *
1063 * http and https URIs should not have a %NULL @host.
1064 **/
1065 void
soup_uri_set_host(SoupURI * uri,const char * host)1066 soup_uri_set_host (SoupURI *uri, const char *host)
1067 {
1068 g_return_if_fail (uri != NULL);
1069
1070 g_free (uri->host);
1071 uri->host = g_strdup (host);
1072 }
1073
1074 /**
1075 * soup_uri_get_port:
1076 * @uri: a #SoupURI
1077 *
1078 * Gets @uri's port.
1079 *
1080 * Return value: @uri's port.
1081 *
1082 * Since: 2.32
1083 **/
1084 guint
soup_uri_get_port(SoupURI * uri)1085 soup_uri_get_port (SoupURI *uri)
1086 {
1087 g_return_val_if_fail (uri != NULL, 0);
1088
1089 return uri->port;
1090 }
1091
1092 /**
1093 * soup_uri_set_port:
1094 * @uri: a #SoupURI
1095 * @port: the port, or 0
1096 *
1097 * Sets @uri's port to @port. If @port is 0, @uri will not have an
1098 * explicitly-specified port.
1099 **/
1100 void
soup_uri_set_port(SoupURI * uri,guint port)1101 soup_uri_set_port (SoupURI *uri, guint port)
1102 {
1103 g_return_if_fail (uri != NULL);
1104
1105 uri->port = port;
1106 }
1107
1108 /**
1109 * soup_uri_get_path:
1110 * @uri: a #SoupURI
1111 *
1112 * Gets @uri's path.
1113 *
1114 * Return value: @uri's path.
1115 *
1116 * Since: 2.32
1117 **/
1118 const char *
soup_uri_get_path(SoupURI * uri)1119 soup_uri_get_path (SoupURI *uri)
1120 {
1121 g_return_val_if_fail (uri != NULL, NULL);
1122
1123 return uri->path;
1124 }
1125
1126 /**
1127 * soup_uri_set_path:
1128 * @uri: a #SoupURI
1129 * @path: the non-%NULL path
1130 *
1131 * Sets @uri's path to @path.
1132 **/
1133 void
soup_uri_set_path(SoupURI * uri,const char * path)1134 soup_uri_set_path (SoupURI *uri, const char *path)
1135 {
1136 g_return_if_fail (uri != NULL);
1137
1138 /* We allow a NULL path for compatibility, but warn about it. */
1139 if (!path) {
1140 g_warn_if_fail (path != NULL);
1141 path = "";
1142 }
1143
1144 g_free (uri->path);
1145 uri->path = g_strdup (path);
1146 }
1147
1148 /**
1149 * soup_uri_get_query:
1150 * @uri: a #SoupURI
1151 *
1152 * Gets @uri's query.
1153 *
1154 * Return value: @uri's query.
1155 *
1156 * Since: 2.32
1157 **/
1158 const char *
soup_uri_get_query(SoupURI * uri)1159 soup_uri_get_query (SoupURI *uri)
1160 {
1161 g_return_val_if_fail (uri != NULL, NULL);
1162
1163 return uri->query;
1164 }
1165
1166 /**
1167 * soup_uri_set_query:
1168 * @uri: a #SoupURI
1169 * @query: (allow-none): the query
1170 *
1171 * Sets @uri's query to @query.
1172 **/
1173 void
soup_uri_set_query(SoupURI * uri,const char * query)1174 soup_uri_set_query (SoupURI *uri, const char *query)
1175 {
1176 g_return_if_fail (uri != NULL);
1177
1178 g_free (uri->query);
1179 uri->query = g_strdup (query);
1180 }
1181
1182 /**
1183 * soup_uri_set_query_from_form:
1184 * @uri: a #SoupURI
1185 * @form: (element-type utf8 utf8): a #GHashTable containing HTML form
1186 * information
1187 *
1188 * Sets @uri's query to the result of encoding @form according to the
1189 * HTML form rules. See soup_form_encode_hash() for more information.
1190 **/
1191 void
soup_uri_set_query_from_form(SoupURI * uri,GHashTable * form)1192 soup_uri_set_query_from_form (SoupURI *uri, GHashTable *form)
1193 {
1194 g_return_if_fail (uri != NULL);
1195
1196 g_free (uri->query);
1197 uri->query = soup_form_encode_hash (form);
1198 }
1199
1200 /**
1201 * soup_uri_set_query_from_fields:
1202 * @uri: a #SoupURI
1203 * @first_field: name of the first form field to encode into query
1204 * @...: value of @first_field, followed by additional field names
1205 * and values, terminated by %NULL.
1206 *
1207 * Sets @uri's query to the result of encoding the given form fields
1208 * and values according to the * HTML form rules. See
1209 * soup_form_encode() for more information.
1210 **/
1211 void
soup_uri_set_query_from_fields(SoupURI * uri,const char * first_field,...)1212 soup_uri_set_query_from_fields (SoupURI *uri,
1213 const char *first_field,
1214 ...)
1215 {
1216 va_list args;
1217
1218 g_return_if_fail (uri != NULL);
1219
1220 g_free (uri->query);
1221 va_start (args, first_field);
1222 uri->query = soup_form_encode_valist (first_field, args);
1223 va_end (args);
1224 }
1225
1226 /**
1227 * soup_uri_get_fragment:
1228 * @uri: a #SoupURI
1229 *
1230 * Gets @uri's fragment.
1231 *
1232 * Return value: @uri's fragment.
1233 *
1234 * Since: 2.32
1235 **/
1236 const char *
soup_uri_get_fragment(SoupURI * uri)1237 soup_uri_get_fragment (SoupURI *uri)
1238 {
1239 g_return_val_if_fail (uri != NULL, NULL);
1240
1241 return uri->fragment;
1242 }
1243
1244 /**
1245 * soup_uri_set_fragment:
1246 * @uri: a #SoupURI
1247 * @fragment: (allow-none): the fragment
1248 *
1249 * Sets @uri's fragment to @fragment.
1250 **/
1251 void
soup_uri_set_fragment(SoupURI * uri,const char * fragment)1252 soup_uri_set_fragment (SoupURI *uri, const char *fragment)
1253 {
1254 g_return_if_fail (uri != NULL);
1255
1256 g_free (uri->fragment);
1257 uri->fragment = g_strdup (fragment);
1258 }
1259
1260 /**
1261 * soup_uri_copy_host:
1262 * @uri: a #SoupURI
1263 *
1264 * Makes a copy of @uri, considering only the protocol, host, and port
1265 *
1266 * Return value: the new #SoupURI
1267 *
1268 * Since: 2.28
1269 **/
1270 SoupURI *
soup_uri_copy_host(SoupURI * uri)1271 soup_uri_copy_host (SoupURI *uri)
1272 {
1273 SoupURI *dup;
1274
1275 g_return_val_if_fail (uri != NULL, NULL);
1276 g_warn_if_fail (SOUP_URI_IS_VALID (uri));
1277
1278 dup = soup_uri_new (NULL);
1279 dup->scheme = uri->scheme;
1280 dup->host = g_strdup (uri->host);
1281 dup->port = uri->port;
1282 dup->path = g_strdup ("");
1283
1284 return dup;
1285 }
1286
1287 /**
1288 * soup_uri_host_hash:
1289 * @key: (type Soup.URI): a #SoupURI with a non-%NULL @host member
1290 *
1291 * Hashes @key, considering only the scheme, host, and port.
1292 *
1293 * Return value: a hash
1294 *
1295 * Since: 2.28
1296 **/
1297 guint
soup_uri_host_hash(gconstpointer key)1298 soup_uri_host_hash (gconstpointer key)
1299 {
1300 const SoupURI *uri = key;
1301
1302 g_return_val_if_fail (uri != NULL && uri->host != NULL, 0);
1303 g_warn_if_fail (SOUP_URI_IS_VALID (uri));
1304
1305 return GPOINTER_TO_UINT (uri->scheme) + uri->port +
1306 soup_str_case_hash (uri->host);
1307 }
1308
1309 /**
1310 * soup_uri_host_equal:
1311 * @v1: (type Soup.URI): a #SoupURI with a non-%NULL @host member
1312 * @v2: (type Soup.URI): a #SoupURI with a non-%NULL @host member
1313 *
1314 * Compares @v1 and @v2, considering only the scheme, host, and port.
1315 *
1316 * Return value: whether or not the URIs are equal in scheme, host,
1317 * and port.
1318 *
1319 * Since: 2.28
1320 **/
1321 gboolean
soup_uri_host_equal(gconstpointer v1,gconstpointer v2)1322 soup_uri_host_equal (gconstpointer v1, gconstpointer v2)
1323 {
1324 const SoupURI *one = v1;
1325 const SoupURI *two = v2;
1326
1327 g_return_val_if_fail (one != NULL && two != NULL, one == two);
1328 g_return_val_if_fail (one->host != NULL && two->host != NULL, one->host == two->host);
1329 g_warn_if_fail (SOUP_URI_IS_VALID (one));
1330 g_warn_if_fail (SOUP_URI_IS_VALID (two));
1331
1332 if (one->scheme != two->scheme)
1333 return FALSE;
1334 if (one->port != two->port)
1335 return FALSE;
1336
1337 return g_ascii_strcasecmp (one->host, two->host) == 0;
1338 }
1339
1340 gboolean
soup_uri_is_http(SoupURI * uri,char ** aliases)1341 soup_uri_is_http (SoupURI *uri, char **aliases)
1342 {
1343 int i;
1344
1345 if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
1346 uri->scheme == SOUP_URI_SCHEME_WS)
1347 return TRUE;
1348 else if (uri->scheme == SOUP_URI_SCHEME_HTTPS ||
1349 uri->scheme == SOUP_URI_SCHEME_WSS)
1350 return FALSE;
1351 else if (!aliases)
1352 return FALSE;
1353
1354 for (i = 0; aliases[i]; i++) {
1355 if (uri->scheme == aliases[i])
1356 return TRUE;
1357 }
1358
1359 if (aliases[0] && !aliases[1] && !strcmp (aliases[0], "*"))
1360 return TRUE;
1361 else
1362 return FALSE;
1363 }
1364
1365 gboolean
soup_uri_is_https(SoupURI * uri,char ** aliases)1366 soup_uri_is_https (SoupURI *uri, char **aliases)
1367 {
1368 int i;
1369
1370 if (uri->scheme == SOUP_URI_SCHEME_HTTPS ||
1371 uri->scheme == SOUP_URI_SCHEME_WSS)
1372 return TRUE;
1373 else if (uri->scheme == SOUP_URI_SCHEME_HTTP ||
1374 uri->scheme == SOUP_URI_SCHEME_WS)
1375 return FALSE;
1376 else if (!aliases)
1377 return FALSE;
1378
1379 for (i = 0; aliases[i]; i++) {
1380 if (uri->scheme == aliases[i])
1381 return TRUE;
1382 }
1383
1384 return FALSE;
1385 }
1386
1387 G_DEFINE_BOXED_TYPE (SoupURI, soup_uri, soup_uri_copy, soup_uri_free)
1388