• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * soup-headers.c: HTTP message header parsing
4  *
5  * Copyright (C) 2001-2003, Ximian, Inc.
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11 
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "soup-headers.h"
16 #include "soup.h"
17 
18 /**
19  * soup_headers_parse:
20  * @str: the header string (including the Request-Line or Status-Line,
21  *   but not the trailing blank line)
22  * @len: length of @str
23  * @dest: #SoupMessageHeaders to store the header values in
24  *
25  * Parses the headers of an HTTP request or response in @str and
26  * stores the results in @dest. Beware that @dest may be modified even
27  * on failure.
28  *
29  * This is a low-level method; normally you would use
30  * soup_headers_parse_request() or soup_headers_parse_response().
31  *
32  * Return value: success or failure
33  *
34  * Since: 2.26
35  **/
36 gboolean
soup_headers_parse(const char * str,int len,SoupMessageHeaders * dest)37 soup_headers_parse (const char *str, int len, SoupMessageHeaders *dest)
38 {
39 	const char *headers_start;
40 	char *headers_copy, *name, *name_end, *value, *value_end;
41 	char *eol, *sol, *p;
42 	gsize copy_len;
43 	gboolean success = FALSE;
44 
45 	g_return_val_if_fail (str != NULL, FALSE);
46 	g_return_val_if_fail (dest != NULL, FALSE);
47 
48 	/* As per RFC 2616 section 19.3, we treat '\n' as the
49 	 * line terminator, and '\r', if it appears, merely as
50 	 * ignorable trailing whitespace.
51 	 */
52 
53 	/* Skip over the Request-Line / Status-Line */
54 	headers_start = memchr (str, '\n', len);
55 	if (!headers_start)
56 		return FALSE;
57 	/* No '\0's in the Request-Line / Status-Line */
58 	if (memchr (str, '\0', headers_start - str))
59 		return FALSE;
60 
61 	/* We work on a copy of the headers, which we can write '\0's
62 	 * into, so that we don't have to individually g_strndup and
63 	 * then g_free each header name and value.
64 	 */
65 	copy_len = len - (headers_start - str);
66 	headers_copy = g_malloc (copy_len + 1);
67 	memcpy (headers_copy, headers_start, copy_len);
68 	headers_copy[copy_len] = '\0';
69 	value_end = headers_copy;
70 
71 	/* There shouldn't be any '\0's in the headers already, but
72 	 * this is the web we're talking about.
73 	 */
74 	while ((p = memchr (headers_copy, '\0', copy_len))) {
75 		memmove (p, p + 1, copy_len - (p - headers_copy));
76 		copy_len--;
77 	}
78 
79 	while (*(value_end + 1)) {
80 		name = value_end + 1;
81 		name_end = strchr (name, ':');
82 
83 		/* Reject if there is no ':', or the header name is
84 		 * empty, or it contains whitespace.
85 		 */
86 		if (!name_end ||
87 		    name_end == name ||
88 		    name + strcspn (name, " \t\r\n") < name_end) {
89 			/* Ignore this line. Note that if it has
90 			 * continuation lines, we'll end up ignoring
91 			 * them too since they'll start with spaces.
92 			 */
93 			value_end = strchr (name, '\n');
94 			if (!value_end)
95 				goto done;
96 			continue;
97 		}
98 
99 		/* Find the end of the value; ie, an end-of-line that
100 		 * isn't followed by a continuation line.
101 		 */
102 		value = name_end + 1;
103 		value_end = strchr (name, '\n');
104 		if (!value_end)
105 			goto done;
106 		while (*(value_end + 1) == ' ' || *(value_end + 1) == '\t') {
107 			value_end = strchr (value_end + 1, '\n');
108 			if (!value_end)
109 				goto done;
110 		}
111 
112 		*name_end = '\0';
113 		*value_end = '\0';
114 
115 		/* Skip leading whitespace */
116 		while (value < value_end &&
117 		       (*value == ' ' || *value == '\t' ||
118 			*value == '\r' || *value == '\n'))
119 			value++;
120 
121 		/* Collapse continuation lines */
122 		while ((eol = strchr (value, '\n'))) {
123 			/* find start of next line */
124 			sol = eol + 1;
125 			while (*sol == ' ' || *sol == '\t')
126 				sol++;
127 
128 			/* back up over trailing whitespace on current line */
129 			while (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r')
130 				eol--;
131 
132 			/* Delete all but one SP */
133 			*eol = ' ';
134 			memmove (eol + 1, sol, strlen (sol) + 1);
135 		}
136 
137 		/* clip trailing whitespace */
138 		eol = strchr (value, '\0');
139 		while (eol > value &&
140 		       (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r'))
141 			eol--;
142 		*eol = '\0';
143 
144 		/* convert (illegal) '\r's to spaces */
145 		for (p = strchr (value, '\r'); p; p = strchr (p, '\r'))
146 			*p = ' ';
147 
148 		soup_message_headers_append (dest, name, value);
149         }
150 	success = TRUE;
151 
152 done:
153 	g_free (headers_copy);
154 	return success;
155 }
156 
157 /**
158  * soup_headers_parse_request:
159  * @str: the headers (up to, but not including, the trailing blank line)
160  * @len: length of @str
161  * @req_headers: #SoupMessageHeaders to store the header values in
162  * @req_method: (out) (allow-none): if non-%NULL, will be filled in with the
163  * request method
164  * @req_path: (out) (allow-none): if non-%NULL, will be filled in with the
165  * request path
166  * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
167  * version
168  *
169  * Parses the headers of an HTTP request in @str and stores the
170  * results in @req_method, @req_path, @ver, and @req_headers.
171  *
172  * Beware that @req_headers may be modified even on failure.
173  *
174  * Return value: %SOUP_STATUS_OK if the headers could be parsed, or an
175  * HTTP error to be returned to the client if they could not be.
176  **/
177 guint
soup_headers_parse_request(const char * str,int len,SoupMessageHeaders * req_headers,char ** req_method,char ** req_path,SoupHTTPVersion * ver)178 soup_headers_parse_request (const char          *str,
179 			    int                  len,
180 			    SoupMessageHeaders  *req_headers,
181 			    char               **req_method,
182 			    char               **req_path,
183 			    SoupHTTPVersion     *ver)
184 {
185 	const char *method, *method_end, *path, *path_end;
186 	const char *version, *version_end, *headers;
187 	unsigned long major_version, minor_version;
188 	char *p;
189 
190 	g_return_val_if_fail (str != NULL, SOUP_STATUS_MALFORMED);
191 
192 	/* RFC 2616 4.1 "servers SHOULD ignore any empty line(s)
193 	 * received where a Request-Line is expected."
194 	 */
195 	while ((*str == '\r' || *str == '\n') && len > 0) {
196 		str++;
197 		len--;
198 	}
199 	if (!len)
200 		return SOUP_STATUS_BAD_REQUEST;
201 
202 	/* RFC 2616 19.3 "[servers] SHOULD accept any amount of SP or
203 	 * HT characters between [Request-Line] fields"
204 	 */
205 
206 	method = method_end = str;
207 	while (method_end < str + len && *method_end != ' ' && *method_end != '\t')
208 		method_end++;
209 	if (method_end >= str + len)
210 		return SOUP_STATUS_BAD_REQUEST;
211 
212 	path = method_end;
213 	while (path < str + len && (*path == ' ' || *path == '\t'))
214 		path++;
215 	if (path >= str + len)
216 		return SOUP_STATUS_BAD_REQUEST;
217 
218 	path_end = path;
219 	while (path_end < str + len && *path_end != ' ' && *path_end != '\t')
220 		path_end++;
221 	if (path_end >= str + len)
222 		return SOUP_STATUS_BAD_REQUEST;
223 
224 	version = path_end;
225 	while (version < str + len && (*version == ' ' || *version == '\t'))
226 		version++;
227 	if (version + 8 >= str + len)
228 		return SOUP_STATUS_BAD_REQUEST;
229 
230 	if (strncmp (version, "HTTP/", 5) != 0 ||
231 	    !g_ascii_isdigit (version[5]))
232 		return SOUP_STATUS_BAD_REQUEST;
233 	major_version = strtoul (version + 5, &p, 10);
234 	if (*p != '.' || !g_ascii_isdigit (p[1]))
235 		return SOUP_STATUS_BAD_REQUEST;
236 	minor_version = strtoul (p + 1, &p, 10);
237 	version_end = p;
238 	if (major_version != 1)
239 		return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
240 	if (minor_version > 1)
241 		return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
242 
243 	headers = version_end;
244 	while (headers < str + len && (*headers == '\r' || *headers == ' '))
245 		headers++;
246 	if (headers >= str + len || *headers != '\n')
247 		return SOUP_STATUS_BAD_REQUEST;
248 
249 	if (!soup_headers_parse (str, len, req_headers))
250 		return SOUP_STATUS_BAD_REQUEST;
251 
252 	if (soup_message_headers_get_expectations (req_headers) &
253 	    SOUP_EXPECTATION_UNRECOGNIZED)
254 		return SOUP_STATUS_EXPECTATION_FAILED;
255 	/* RFC 2616 14.10 */
256 	if (minor_version == 0)
257 		soup_message_headers_clean_connection_headers (req_headers);
258 
259 	if (req_method)
260 		*req_method = g_strndup (method, method_end - method);
261 	if (req_path)
262 		*req_path = g_strndup (path, path_end - path);
263 	if (ver)
264 		*ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
265 
266 	return SOUP_STATUS_OK;
267 }
268 
269 /**
270  * soup_headers_parse_status_line:
271  * @status_line: an HTTP Status-Line
272  * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
273  * version
274  * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
275  * the status code
276  * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
277  * the reason phrase
278  *
279  * Parses the HTTP Status-Line string in @status_line into @ver,
280  * @status_code, and @reason_phrase. @status_line must be terminated by
281  * either "\0" or "\r\n".
282  *
283  * Return value: %TRUE if @status_line was parsed successfully.
284  **/
285 gboolean
soup_headers_parse_status_line(const char * status_line,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)286 soup_headers_parse_status_line (const char       *status_line,
287 				SoupHTTPVersion  *ver,
288 				guint            *status_code,
289 				char            **reason_phrase)
290 {
291 	unsigned long major_version, minor_version, code;
292 	const char *code_start, *code_end, *phrase_start, *phrase_end;
293 	char *p;
294 
295 	g_return_val_if_fail (status_line != NULL, FALSE);
296 
297 	if (strncmp (status_line, "HTTP/", 5) == 0 &&
298 	    g_ascii_isdigit (status_line[5])) {
299 		major_version = strtoul (status_line + 5, &p, 10);
300 		if (*p != '.' || !g_ascii_isdigit (p[1]))
301 			return FALSE;
302 		minor_version = strtoul (p + 1, &p, 10);
303 		if (major_version != 1)
304 			return FALSE;
305 		if (minor_version > 1)
306 			return FALSE;
307 		if (ver)
308 			*ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
309 	} else if (!strncmp (status_line, "ICY", 3)) {
310 		/* Shoutcast not-quite-HTTP format */
311 		if (ver)
312 			*ver = SOUP_HTTP_1_0;
313 		p = (char *)status_line + 3;
314 	} else
315 		return FALSE;
316 
317 	code_start = p;
318 	while (*code_start == ' ' || *code_start == '\t')
319 		code_start++;
320 	code_end = code_start;
321 	while (*code_end >= '0' && *code_end <= '9')
322 		code_end++;
323 	if (code_end != code_start + 3)
324 		return FALSE;
325 	code = atoi (code_start);
326 	if (code < 100 || code > 999)
327 		return FALSE;
328 	if (status_code)
329 		*status_code = code;
330 
331 	phrase_start = code_end;
332 	while (*phrase_start == ' ' || *phrase_start == '\t')
333 		phrase_start++;
334 	phrase_end = phrase_start + strcspn (phrase_start, "\n");
335 	while (phrase_end > phrase_start &&
336 	       (phrase_end[-1] == '\r' || phrase_end[-1] == ' ' || phrase_end[-1] == '\t'))
337 		phrase_end--;
338 	if (reason_phrase)
339 		*reason_phrase = g_strndup (phrase_start, phrase_end - phrase_start);
340 
341 	return TRUE;
342 }
343 
344 /**
345  * soup_headers_parse_response:
346  * @str: the headers (up to, but not including, the trailing blank line)
347  * @len: length of @str
348  * @headers: #SoupMessageHeaders to store the header values in
349  * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
350  * version
351  * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
352  * the status code
353  * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
354  * the reason phrase
355  *
356  * Parses the headers of an HTTP response in @str and stores the
357  * results in @ver, @status_code, @reason_phrase, and @headers.
358  *
359  * Beware that @headers may be modified even on failure.
360  *
361  * Return value: success or failure.
362  **/
363 gboolean
soup_headers_parse_response(const char * str,int len,SoupMessageHeaders * headers,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)364 soup_headers_parse_response (const char          *str,
365 			     int                  len,
366 			     SoupMessageHeaders  *headers,
367 			     SoupHTTPVersion     *ver,
368 			     guint               *status_code,
369 			     char               **reason_phrase)
370 {
371 	SoupHTTPVersion version;
372 
373 	g_return_val_if_fail (str != NULL, FALSE);
374 
375 	/* Workaround for broken servers that send extra line breaks
376 	 * after a response, which we then see prepended to the next
377 	 * response on that connection.
378 	 */
379 	while ((*str == '\r' || *str == '\n') && len > 0) {
380 		str++;
381 		len--;
382 	}
383 	if (!len)
384 		return FALSE;
385 
386 	if (!soup_headers_parse (str, len, headers))
387 		return FALSE;
388 
389 	if (!soup_headers_parse_status_line (str,
390 					     &version,
391 					     status_code,
392 					     reason_phrase))
393 		return FALSE;
394 	if (ver)
395 		*ver = version;
396 
397 	/* RFC 2616 14.10 */
398 	if (version == SOUP_HTTP_1_0)
399 		soup_message_headers_clean_connection_headers (headers);
400 
401 	return TRUE;
402 }
403 
404 
405 /*
406  * Parsing of specific HTTP header types
407  */
408 
409 static const char *
skip_lws(const char * s)410 skip_lws (const char *s)
411 {
412 	while (g_ascii_isspace (*s))
413 		s++;
414 	return s;
415 }
416 
417 static const char *
unskip_lws(const char * s,const char * start)418 unskip_lws (const char *s, const char *start)
419 {
420 	while (s > start && g_ascii_isspace (*(s - 1)))
421 		s--;
422 	return s;
423 }
424 
425 static const char *
skip_delims(const char * s,char delim)426 skip_delims (const char *s, char delim)
427 {
428 	/* The grammar allows for multiple delimiters */
429 	while (g_ascii_isspace (*s) || *s == delim)
430 		s++;
431 	return s;
432 }
433 
434 static const char *
skip_item(const char * s,char delim)435 skip_item (const char *s, char delim)
436 {
437 	gboolean quoted = FALSE;
438 	const char *start = s;
439 
440 	/* A list item ends at the last non-whitespace character
441 	 * before a delimiter which is not inside a quoted-string. Or
442 	 * at the end of the string.
443 	 */
444 
445 	while (*s) {
446 		if (*s == '"')
447 			quoted = !quoted;
448 		else if (quoted) {
449 			if (*s == '\\' && *(s + 1))
450 				s++;
451 		} else {
452 			if (*s == delim)
453 				break;
454 		}
455 		s++;
456 	}
457 
458 	return unskip_lws (s, start);
459 }
460 
461 static GSList *
parse_list(const char * header,char delim)462 parse_list (const char *header, char delim)
463 {
464 	GSList *list = NULL;
465 	const char *end;
466 
467 	header = skip_delims (header, delim);
468 	while (*header) {
469 		end = skip_item (header, delim);
470 		list = g_slist_prepend (list, g_strndup (header, end - header));
471 		header = skip_delims (end, delim);
472 	}
473 
474 	return g_slist_reverse (list);
475 }
476 
477 /**
478  * soup_header_parse_list:
479  * @header: a header value
480  *
481  * Parses a header whose content is described by RFC2616 as
482  * "#something", where "something" does not itself contain commas,
483  * except as part of quoted-strings.
484  *
485  * Return value: (transfer full) (element-type utf8): a #GSList of
486  * list elements, as allocated strings
487  **/
488 GSList *
soup_header_parse_list(const char * header)489 soup_header_parse_list (const char *header)
490 {
491 	g_return_val_if_fail (header != NULL, NULL);
492 
493 	return parse_list (header, ',');
494 }
495 
496 typedef struct {
497 	char *item;
498 	double qval;
499 } QualityItem;
500 
501 static int
sort_by_qval(const void * a,const void * b)502 sort_by_qval (const void *a, const void *b)
503 {
504 	QualityItem *qia = (QualityItem *)a;
505 	QualityItem *qib = (QualityItem *)b;
506 
507 	if (qia->qval == qib->qval)
508 		return 0;
509 	else if (qia->qval < qib->qval)
510 		return 1;
511 	else
512 		return -1;
513 }
514 
515 /**
516  * soup_header_parse_quality_list:
517  * @header: a header value
518  * @unacceptable: (out) (allow-none) (transfer full) (element-type utf8): on
519  * return, will contain a list of unacceptable values
520  *
521  * Parses a header whose content is a list of items with optional
522  * "qvalue"s (eg, Accept, Accept-Charset, Accept-Encoding,
523  * Accept-Language, TE).
524  *
525  * If @unacceptable is not %NULL, then on return, it will contain the
526  * items with qvalue 0. Either way, those items will be removed from
527  * the main list.
528  *
529  * Return value: (transfer full) (element-type utf8): a #GSList of
530  * acceptable values (as allocated strings), highest-qvalue first.
531  **/
532 GSList *
soup_header_parse_quality_list(const char * header,GSList ** unacceptable)533 soup_header_parse_quality_list (const char *header, GSList **unacceptable)
534 {
535 	GSList *unsorted;
536 	QualityItem *array;
537 	GSList *sorted, *iter;
538 	char *item, *semi;
539 	const char *param, *equal, *value;
540 	double qval;
541 	int n;
542 
543 	g_return_val_if_fail (header != NULL, NULL);
544 
545 	if (unacceptable)
546 		*unacceptable = NULL;
547 
548 	unsorted = soup_header_parse_list (header);
549 	array = g_new0 (QualityItem, g_slist_length (unsorted));
550 	for (iter = unsorted, n = 0; iter; iter = iter->next) {
551 		item = iter->data;
552 		qval = 1.0;
553 		for (semi = strchr (item, ';'); semi; semi = strchr (semi + 1, ';')) {
554 			param = skip_lws (semi + 1);
555 			if (*param != 'q')
556 				continue;
557 			equal = skip_lws (param + 1);
558 			if (!equal || *equal != '=')
559 				continue;
560 			value = skip_lws (equal + 1);
561 			if (!value)
562 				continue;
563 
564 			if (value[0] != '0' && value[0] != '1')
565 				continue;
566 			qval = (double)(value[0] - '0');
567 			if (value[0] == '0' && value[1] == '.') {
568 				if (g_ascii_isdigit (value[2])) {
569 					qval += (double)(value[2] - '0') / 10;
570 					if (g_ascii_isdigit (value[3])) {
571 						qval += (double)(value[3] - '0') / 100;
572 						if (g_ascii_isdigit (value[4]))
573 							qval += (double)(value[4] - '0') / 1000;
574 					}
575 				}
576 			}
577 
578 			*semi = '\0';
579 			break;
580 		}
581 
582 		if (qval == 0.0) {
583 			if (unacceptable) {
584 				*unacceptable = g_slist_prepend (*unacceptable,
585 								 item);
586 			}
587 		} else {
588 			array[n].item = item;
589 			array[n].qval = qval;
590 			n++;
591 		}
592 	}
593 	g_slist_free (unsorted);
594 
595 	qsort (array, n, sizeof (QualityItem), sort_by_qval);
596 	sorted = NULL;
597 	while (n--)
598 		sorted = g_slist_prepend (sorted, array[n].item);
599 	g_free (array);
600 
601 	return sorted;
602 }
603 
604 /**
605  * soup_header_free_list: (skip)
606  * @list: a #GSList returned from soup_header_parse_list() or
607  * soup_header_parse_quality_list()
608  *
609  * Frees @list.
610  **/
611 void
soup_header_free_list(GSList * list)612 soup_header_free_list (GSList *list)
613 {
614 	g_slist_free_full (list, g_free);
615 }
616 
617 /**
618  * soup_header_contains:
619  * @header: An HTTP header suitable for parsing with
620  * soup_header_parse_list()
621  * @token: a token
622  *
623  * Parses @header to see if it contains the token @token (matched
624  * case-insensitively). Note that this can't be used with lists
625  * that have qvalues.
626  *
627  * Return value: whether or not @header contains @token
628  **/
629 gboolean
soup_header_contains(const char * header,const char * token)630 soup_header_contains (const char *header, const char *token)
631 {
632 	const char *end;
633 	guint len;
634 
635 	g_return_val_if_fail (header != NULL, FALSE);
636 	g_return_val_if_fail (token != NULL, FALSE);
637 
638 	len = strlen (token);
639 
640 	header = skip_delims (header, ',');
641 	while (*header) {
642 		end = skip_item (header, ',');
643 		if (end - header == len &&
644 		    !g_ascii_strncasecmp (header, token, len))
645 			return TRUE;
646 		header = skip_delims (end, ',');
647 	}
648 
649 	return FALSE;
650 }
651 
652 static void
decode_quoted_string(char * quoted_string)653 decode_quoted_string (char *quoted_string)
654 {
655 	char *src, *dst;
656 
657 	src = quoted_string + 1;
658 	dst = quoted_string;
659 	while (*src && *src != '"') {
660 		if (*src == '\\' && *(src + 1))
661 			src++;
662 		*dst++ = *src++;
663 	}
664 	*dst = '\0';
665 }
666 
667 static gboolean
decode_rfc5987(char * encoded_string)668 decode_rfc5987 (char *encoded_string)
669 {
670 	char *q, *decoded;
671 	gboolean iso_8859_1 = FALSE;
672 
673 	q = strchr (encoded_string, '\'');
674 	if (!q)
675 		return FALSE;
676 	if (g_ascii_strncasecmp (encoded_string, "UTF-8",
677 				 q - encoded_string) == 0)
678 		;
679 	else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1",
680 				      q - encoded_string) == 0)
681 		iso_8859_1 = TRUE;
682 	else
683 		return FALSE;
684 
685 	q = strchr (q + 1, '\'');
686 	if (!q)
687 		return FALSE;
688 
689 	decoded = soup_uri_decode (q + 1);
690 	if (iso_8859_1) {
691 		char *utf8 =  g_convert_with_fallback (decoded, -1, "UTF-8",
692 						       "iso-8859-1", "_",
693 						       NULL, NULL, NULL);
694 		g_free (decoded);
695 		if (!utf8)
696 			return FALSE;
697 		decoded = utf8;
698 	}
699 
700 	/* If encoded_string was UTF-8, then each 3-character %-escape
701 	 * will be converted to a single byte, and so decoded is
702 	 * shorter than encoded_string. If encoded_string was
703 	 * iso-8859-1, then each 3-character %-escape will be
704 	 * converted into at most 2 bytes in UTF-8, and so it's still
705 	 * shorter.
706 	 */
707 	strcpy (encoded_string, decoded);
708 	g_free (decoded);
709 	return TRUE;
710 }
711 
712 static GHashTable *
parse_param_list(const char * header,char delim,gboolean strict)713 parse_param_list (const char *header, char delim, gboolean strict)
714 {
715 	GHashTable *params;
716 	GSList *list, *iter;
717 	char *item, *eq, *name_end, *value;
718 	gboolean override, duplicated;
719 
720 	params = g_hash_table_new_full (soup_str_case_hash,
721 					soup_str_case_equal,
722 					g_free, NULL);
723 
724 	list = parse_list (header, delim);
725 	for (iter = list; iter; iter = iter->next) {
726 		item = iter->data;
727 		override = FALSE;
728 
729 		eq = strchr (item, '=');
730 		if (eq) {
731 			name_end = (char *)unskip_lws (eq, item);
732 			if (name_end == item) {
733 				/* That's no good... */
734 				g_free (item);
735 				continue;
736 			}
737 
738 			*name_end = '\0';
739 
740 			value = (char *)skip_lws (eq + 1);
741 
742 			if (name_end[-1] == '*' && name_end > item + 1) {
743 				name_end[-1] = '\0';
744 				if (!decode_rfc5987 (value)) {
745 					g_free (item);
746 					continue;
747 				}
748 				override = TRUE;
749 			} else if (*value == '"')
750 				decode_quoted_string (value);
751 		} else
752 			value = NULL;
753 
754 		duplicated = g_hash_table_lookup_extended (params, item, NULL, NULL);
755 
756 		if (strict && duplicated) {
757 			soup_header_free_param_list (params);
758 			params = NULL;
759 			g_slist_foreach (iter, (GFunc)g_free, NULL);
760 			break;
761 		} else if (override || !duplicated)
762 			g_hash_table_replace (params, item, value);
763 		else
764 			g_free (item);
765 	}
766 
767 	g_slist_free (list);
768 	return params;
769 }
770 
771 /**
772  * soup_header_parse_param_list:
773  * @header: a header value
774  *
775  * Parses a header which is a comma-delimited list of something like:
776  * <literal>token [ "=" ( token | quoted-string ) ]</literal>.
777  *
778  * Tokens that don't have an associated value will still be added to
779  * the resulting hash table, but with a %NULL value.
780  *
781  * This also handles RFC5987 encoding (which in HTTP is mostly used
782  * for giving UTF8-encoded filenames in the Content-Disposition
783  * header).
784  *
785  * Return value: (element-type utf8 utf8) (transfer full): a
786  * #GHashTable of list elements, which can be freed with
787  * soup_header_free_param_list().
788  **/
789 GHashTable *
soup_header_parse_param_list(const char * header)790 soup_header_parse_param_list (const char *header)
791 {
792 	g_return_val_if_fail (header != NULL, NULL);
793 
794 	return parse_param_list (header, ',', FALSE);
795 }
796 
797 /**
798  * soup_header_parse_semi_param_list:
799  * @header: a header value
800  *
801  * Parses a header which is a semicolon-delimited list of something
802  * like: <literal>token [ "=" ( token | quoted-string ) ]</literal>.
803  *
804  * Tokens that don't have an associated value will still be added to
805  * the resulting hash table, but with a %NULL value.
806  *
807  * This also handles RFC5987 encoding (which in HTTP is mostly used
808  * for giving UTF8-encoded filenames in the Content-Disposition
809  * header).
810  *
811  * Return value: (element-type utf8 utf8) (transfer full): a
812  * #GHashTable of list elements, which can be freed with
813  * soup_header_free_param_list().
814  *
815  * Since: 2.24
816  **/
817 GHashTable *
soup_header_parse_semi_param_list(const char * header)818 soup_header_parse_semi_param_list (const char *header)
819 {
820 	g_return_val_if_fail (header != NULL, NULL);
821 
822 	return parse_param_list (header, ';', FALSE);
823 }
824 
825 /**
826  * soup_header_parse_param_list_strict:
827  * @header: a header value
828  *
829  * A strict version of soup_header_parse_param_list()
830  * that bails out if there are duplicate parameters.
831  * Note that this function will treat RFC5987-encoded
832  * parameters as duplicated if an ASCII version is also
833  * present. For header fields that might contain
834  * RFC5987-encoded parameters, use
835  * soup_header_parse_param_list() instead.
836  *
837  * Return value: (element-type utf8 utf8) (transfer full) (nullable):
838  * a #GHashTable of list elements, which can be freed with
839  * soup_header_free_param_list() or %NULL if there are duplicate
840  * elements.
841  *
842  * Since: 2.66
843  **/
844 GHashTable *
soup_header_parse_param_list_strict(const char * header)845 soup_header_parse_param_list_strict (const char *header)
846 {
847 	g_return_val_if_fail (header != NULL, NULL);
848 
849 	return parse_param_list (header, ',', TRUE);
850 }
851 
852 /**
853  * soup_header_parse_semi_param_list_strict:
854  * @header: a header value
855  *
856  * A strict version of soup_header_parse_semi_param_list()
857  * that bails out if there are duplicate parameters.
858  * Note that this function will treat RFC5987-encoded
859  * parameters as duplicated if an ASCII version is also
860  * present. For header fields that might contain
861  * RFC5987-encoded parameters, use
862  * soup_header_parse_semi_param_list() instead.
863  *
864  * Return value: (element-type utf8 utf8) (transfer full) (nullable):
865  * a #GHashTable of list elements, which can be freed with
866  * soup_header_free_param_list() or %NULL if there are duplicate
867  * elements.
868  *
869  * Since: 2.66
870  **/
871 GHashTable *
soup_header_parse_semi_param_list_strict(const char * header)872 soup_header_parse_semi_param_list_strict (const char *header)
873 {
874 	g_return_val_if_fail (header != NULL, NULL);
875 
876 	return parse_param_list (header, ';', TRUE);
877 }
878 
879 /**
880  * soup_header_free_param_list:
881  * @param_list: (element-type utf8 utf8): a #GHashTable returned from soup_header_parse_param_list()
882  * or soup_header_parse_semi_param_list()
883  *
884  * Frees @param_list.
885  **/
886 void
soup_header_free_param_list(GHashTable * param_list)887 soup_header_free_param_list (GHashTable *param_list)
888 {
889 	g_return_if_fail (param_list != NULL);
890 
891 	g_hash_table_destroy (param_list);
892 }
893 
894 static void
append_param_rfc5987(GString * string,const char * name,const char * value)895 append_param_rfc5987 (GString    *string,
896 		      const char *name,
897 		      const char *value)
898 {
899 	char *encoded;
900 
901 	g_string_append (string, name);
902 	g_string_append (string, "*=UTF-8''");
903 	encoded = soup_uri_encode (value, " *'%()<>@,;:\\\"/[]?=");
904 	g_string_append (string, encoded);
905 	g_free (encoded);
906 }
907 
908 static void
append_param_quoted(GString * string,const char * name,const char * value)909 append_param_quoted (GString    *string,
910 		     const char *name,
911 		     const char *value)
912 {
913 	int len;
914 
915 	g_string_append (string, name);
916 	g_string_append (string, "=\"");
917 	while (*value) {
918 		while (*value == '\\' || *value == '"') {
919 			g_string_append_c (string, '\\');
920 			g_string_append_c (string, *value++);
921 		}
922 		len = strcspn (value, "\\\"");
923 		g_string_append_len (string, value, len);
924 		value += len;
925 	}
926 	g_string_append_c (string, '"');
927 }
928 
929 static void
append_param_internal(GString * string,const char * name,const char * value,gboolean allow_token)930 append_param_internal (GString    *string,
931 		       const char *name,
932 		       const char *value,
933 		       gboolean    allow_token)
934 {
935 	const char *v;
936 	gboolean use_token = allow_token;
937 
938 	for (v = value; *v; v++) {
939 		if (*v & 0x80) {
940 			if (g_utf8_validate (value, -1, NULL)) {
941 				append_param_rfc5987 (string, name, value);
942 				return;
943 			} else {
944 				use_token = FALSE;
945 				break;
946 			}
947 		} else if (!soup_char_is_token (*v))
948 			use_token = FALSE;
949 	}
950 
951 	if (use_token) {
952 		g_string_append (string, name);
953 		g_string_append_c (string, '=');
954 		g_string_append (string, value);
955 	} else
956 		append_param_quoted (string, name, value);
957 }
958 
959 /**
960  * soup_header_g_string_append_param_quoted:
961  * @string: a #GString being used to construct an HTTP header value
962  * @name: a parameter name
963  * @value: a parameter value
964  *
965  * Appends something like <literal>@name="@value"</literal> to
966  * @string, taking care to escape any quotes or backslashes in @value.
967  *
968  * If @value is (non-ASCII) UTF-8, this will instead use RFC 5987
969  * encoding, just like soup_header_g_string_append_param().
970  *
971  * Since: 2.30
972  **/
973 void
soup_header_g_string_append_param_quoted(GString * string,const char * name,const char * value)974 soup_header_g_string_append_param_quoted (GString    *string,
975 					  const char *name,
976 					  const char *value)
977 {
978 	g_return_if_fail (string != NULL);
979 	g_return_if_fail (name != NULL);
980 	g_return_if_fail (value != NULL);
981 
982 	append_param_internal (string, name, value, FALSE);
983 }
984 
985 /**
986  * soup_header_g_string_append_param:
987  * @string: a #GString being used to construct an HTTP header value
988  * @name: a parameter name
989  * @value: a parameter value, or %NULL
990  *
991  * Appends something like <literal>@name=@value</literal> to @string,
992  * taking care to quote @value if needed, and if so, to escape any
993  * quotes or backslashes in @value.
994  *
995  * Alternatively, if @value is a non-ASCII UTF-8 string, it will be
996  * appended using RFC5987 syntax. Although in theory this is supposed
997  * to work anywhere in HTTP that uses this style of parameter, in
998  * reality, it can only be used portably with the Content-Disposition
999  * "filename" parameter.
1000  *
1001  * If @value is %NULL, this will just append @name to @string.
1002  *
1003  * Since: 2.26
1004  **/
1005 void
soup_header_g_string_append_param(GString * string,const char * name,const char * value)1006 soup_header_g_string_append_param (GString    *string,
1007 				   const char *name,
1008 				   const char *value)
1009 {
1010 	g_return_if_fail (string != NULL);
1011 	g_return_if_fail (name != NULL);
1012 
1013 	if (!value) {
1014 		g_string_append (string, name);
1015 		return;
1016 	}
1017 
1018 	append_param_internal (string, name, value, TRUE);
1019 }
1020