1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * soup-headers.c: HTTP message header parsing
4 *
5 * Copyright (C) 2001-2003, Ximian, Inc.
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <stdlib.h>
13 #include <string.h>
14
15 #include "soup-headers.h"
16 #include "soup.h"
17
18 /**
19 * soup_headers_parse:
20 * @str: the header string (including the Request-Line or Status-Line,
21 * but not the trailing blank line)
22 * @len: length of @str
23 * @dest: #SoupMessageHeaders to store the header values in
24 *
25 * Parses the headers of an HTTP request or response in @str and
26 * stores the results in @dest. Beware that @dest may be modified even
27 * on failure.
28 *
29 * This is a low-level method; normally you would use
30 * soup_headers_parse_request() or soup_headers_parse_response().
31 *
32 * Return value: success or failure
33 *
34 * Since: 2.26
35 **/
36 gboolean
soup_headers_parse(const char * str,int len,SoupMessageHeaders * dest)37 soup_headers_parse (const char *str, int len, SoupMessageHeaders *dest)
38 {
39 const char *headers_start;
40 char *headers_copy, *name, *name_end, *value, *value_end;
41 char *eol, *sol, *p;
42 gsize copy_len;
43 gboolean success = FALSE;
44
45 g_return_val_if_fail (str != NULL, FALSE);
46 g_return_val_if_fail (dest != NULL, FALSE);
47
48 /* As per RFC 2616 section 19.3, we treat '\n' as the
49 * line terminator, and '\r', if it appears, merely as
50 * ignorable trailing whitespace.
51 */
52
53 /* Skip over the Request-Line / Status-Line */
54 headers_start = memchr (str, '\n', len);
55 if (!headers_start)
56 return FALSE;
57 /* No '\0's in the Request-Line / Status-Line */
58 if (memchr (str, '\0', headers_start - str))
59 return FALSE;
60
61 /* We work on a copy of the headers, which we can write '\0's
62 * into, so that we don't have to individually g_strndup and
63 * then g_free each header name and value.
64 */
65 copy_len = len - (headers_start - str);
66 headers_copy = g_malloc (copy_len + 1);
67 memcpy (headers_copy, headers_start, copy_len);
68 headers_copy[copy_len] = '\0';
69 value_end = headers_copy;
70
71 /* There shouldn't be any '\0's in the headers already, but
72 * this is the web we're talking about.
73 */
74 while ((p = memchr (headers_copy, '\0', copy_len))) {
75 memmove (p, p + 1, copy_len - (p - headers_copy));
76 copy_len--;
77 }
78
79 while (*(value_end + 1)) {
80 name = value_end + 1;
81 name_end = strchr (name, ':');
82
83 /* Reject if there is no ':', or the header name is
84 * empty, or it contains whitespace.
85 */
86 if (!name_end ||
87 name_end == name ||
88 name + strcspn (name, " \t\r\n") < name_end) {
89 /* Ignore this line. Note that if it has
90 * continuation lines, we'll end up ignoring
91 * them too since they'll start with spaces.
92 */
93 value_end = strchr (name, '\n');
94 if (!value_end)
95 goto done;
96 continue;
97 }
98
99 /* Find the end of the value; ie, an end-of-line that
100 * isn't followed by a continuation line.
101 */
102 value = name_end + 1;
103 value_end = strchr (name, '\n');
104 if (!value_end)
105 goto done;
106 while (*(value_end + 1) == ' ' || *(value_end + 1) == '\t') {
107 value_end = strchr (value_end + 1, '\n');
108 if (!value_end)
109 goto done;
110 }
111
112 *name_end = '\0';
113 *value_end = '\0';
114
115 /* Skip leading whitespace */
116 while (value < value_end &&
117 (*value == ' ' || *value == '\t' ||
118 *value == '\r' || *value == '\n'))
119 value++;
120
121 /* Collapse continuation lines */
122 while ((eol = strchr (value, '\n'))) {
123 /* find start of next line */
124 sol = eol + 1;
125 while (*sol == ' ' || *sol == '\t')
126 sol++;
127
128 /* back up over trailing whitespace on current line */
129 while (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r')
130 eol--;
131
132 /* Delete all but one SP */
133 *eol = ' ';
134 memmove (eol + 1, sol, strlen (sol) + 1);
135 }
136
137 /* clip trailing whitespace */
138 eol = strchr (value, '\0');
139 while (eol > value &&
140 (eol[-1] == ' ' || eol[-1] == '\t' || eol[-1] == '\r'))
141 eol--;
142 *eol = '\0';
143
144 /* convert (illegal) '\r's to spaces */
145 for (p = strchr (value, '\r'); p; p = strchr (p, '\r'))
146 *p = ' ';
147
148 soup_message_headers_append (dest, name, value);
149 }
150 success = TRUE;
151
152 done:
153 g_free (headers_copy);
154 return success;
155 }
156
157 /**
158 * soup_headers_parse_request:
159 * @str: the headers (up to, but not including, the trailing blank line)
160 * @len: length of @str
161 * @req_headers: #SoupMessageHeaders to store the header values in
162 * @req_method: (out) (allow-none): if non-%NULL, will be filled in with the
163 * request method
164 * @req_path: (out) (allow-none): if non-%NULL, will be filled in with the
165 * request path
166 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
167 * version
168 *
169 * Parses the headers of an HTTP request in @str and stores the
170 * results in @req_method, @req_path, @ver, and @req_headers.
171 *
172 * Beware that @req_headers may be modified even on failure.
173 *
174 * Return value: %SOUP_STATUS_OK if the headers could be parsed, or an
175 * HTTP error to be returned to the client if they could not be.
176 **/
177 guint
soup_headers_parse_request(const char * str,int len,SoupMessageHeaders * req_headers,char ** req_method,char ** req_path,SoupHTTPVersion * ver)178 soup_headers_parse_request (const char *str,
179 int len,
180 SoupMessageHeaders *req_headers,
181 char **req_method,
182 char **req_path,
183 SoupHTTPVersion *ver)
184 {
185 const char *method, *method_end, *path, *path_end;
186 const char *version, *version_end, *headers;
187 unsigned long major_version, minor_version;
188 char *p;
189
190 g_return_val_if_fail (str != NULL, SOUP_STATUS_MALFORMED);
191
192 /* RFC 2616 4.1 "servers SHOULD ignore any empty line(s)
193 * received where a Request-Line is expected."
194 */
195 while ((*str == '\r' || *str == '\n') && len > 0) {
196 str++;
197 len--;
198 }
199 if (!len)
200 return SOUP_STATUS_BAD_REQUEST;
201
202 /* RFC 2616 19.3 "[servers] SHOULD accept any amount of SP or
203 * HT characters between [Request-Line] fields"
204 */
205
206 method = method_end = str;
207 while (method_end < str + len && *method_end != ' ' && *method_end != '\t')
208 method_end++;
209 if (method_end >= str + len)
210 return SOUP_STATUS_BAD_REQUEST;
211
212 path = method_end;
213 while (path < str + len && (*path == ' ' || *path == '\t'))
214 path++;
215 if (path >= str + len)
216 return SOUP_STATUS_BAD_REQUEST;
217
218 path_end = path;
219 while (path_end < str + len && *path_end != ' ' && *path_end != '\t')
220 path_end++;
221 if (path_end >= str + len)
222 return SOUP_STATUS_BAD_REQUEST;
223
224 version = path_end;
225 while (version < str + len && (*version == ' ' || *version == '\t'))
226 version++;
227 if (version + 8 >= str + len)
228 return SOUP_STATUS_BAD_REQUEST;
229
230 if (strncmp (version, "HTTP/", 5) != 0 ||
231 !g_ascii_isdigit (version[5]))
232 return SOUP_STATUS_BAD_REQUEST;
233 major_version = strtoul (version + 5, &p, 10);
234 if (*p != '.' || !g_ascii_isdigit (p[1]))
235 return SOUP_STATUS_BAD_REQUEST;
236 minor_version = strtoul (p + 1, &p, 10);
237 version_end = p;
238 if (major_version != 1)
239 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
240 if (minor_version > 1)
241 return SOUP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
242
243 headers = version_end;
244 while (headers < str + len && (*headers == '\r' || *headers == ' '))
245 headers++;
246 if (headers >= str + len || *headers != '\n')
247 return SOUP_STATUS_BAD_REQUEST;
248
249 if (!soup_headers_parse (str, len, req_headers))
250 return SOUP_STATUS_BAD_REQUEST;
251
252 if (soup_message_headers_get_expectations (req_headers) &
253 SOUP_EXPECTATION_UNRECOGNIZED)
254 return SOUP_STATUS_EXPECTATION_FAILED;
255 /* RFC 2616 14.10 */
256 if (minor_version == 0)
257 soup_message_headers_clean_connection_headers (req_headers);
258
259 if (req_method)
260 *req_method = g_strndup (method, method_end - method);
261 if (req_path)
262 *req_path = g_strndup (path, path_end - path);
263 if (ver)
264 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
265
266 return SOUP_STATUS_OK;
267 }
268
269 /**
270 * soup_headers_parse_status_line:
271 * @status_line: an HTTP Status-Line
272 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
273 * version
274 * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
275 * the status code
276 * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
277 * the reason phrase
278 *
279 * Parses the HTTP Status-Line string in @status_line into @ver,
280 * @status_code, and @reason_phrase. @status_line must be terminated by
281 * either "\0" or "\r\n".
282 *
283 * Return value: %TRUE if @status_line was parsed successfully.
284 **/
285 gboolean
soup_headers_parse_status_line(const char * status_line,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)286 soup_headers_parse_status_line (const char *status_line,
287 SoupHTTPVersion *ver,
288 guint *status_code,
289 char **reason_phrase)
290 {
291 unsigned long major_version, minor_version, code;
292 const char *code_start, *code_end, *phrase_start, *phrase_end;
293 char *p;
294
295 g_return_val_if_fail (status_line != NULL, FALSE);
296
297 if (strncmp (status_line, "HTTP/", 5) == 0 &&
298 g_ascii_isdigit (status_line[5])) {
299 major_version = strtoul (status_line + 5, &p, 10);
300 if (*p != '.' || !g_ascii_isdigit (p[1]))
301 return FALSE;
302 minor_version = strtoul (p + 1, &p, 10);
303 if (major_version != 1)
304 return FALSE;
305 if (minor_version > 1)
306 return FALSE;
307 if (ver)
308 *ver = (minor_version == 0) ? SOUP_HTTP_1_0 : SOUP_HTTP_1_1;
309 } else if (!strncmp (status_line, "ICY", 3)) {
310 /* Shoutcast not-quite-HTTP format */
311 if (ver)
312 *ver = SOUP_HTTP_1_0;
313 p = (char *)status_line + 3;
314 } else
315 return FALSE;
316
317 code_start = p;
318 while (*code_start == ' ' || *code_start == '\t')
319 code_start++;
320 code_end = code_start;
321 while (*code_end >= '0' && *code_end <= '9')
322 code_end++;
323 if (code_end != code_start + 3)
324 return FALSE;
325 code = atoi (code_start);
326 if (code < 100 || code > 999)
327 return FALSE;
328 if (status_code)
329 *status_code = code;
330
331 phrase_start = code_end;
332 while (*phrase_start == ' ' || *phrase_start == '\t')
333 phrase_start++;
334 phrase_end = phrase_start + strcspn (phrase_start, "\n");
335 while (phrase_end > phrase_start &&
336 (phrase_end[-1] == '\r' || phrase_end[-1] == ' ' || phrase_end[-1] == '\t'))
337 phrase_end--;
338 if (reason_phrase)
339 *reason_phrase = g_strndup (phrase_start, phrase_end - phrase_start);
340
341 return TRUE;
342 }
343
344 /**
345 * soup_headers_parse_response:
346 * @str: the headers (up to, but not including, the trailing blank line)
347 * @len: length of @str
348 * @headers: #SoupMessageHeaders to store the header values in
349 * @ver: (out) (allow-none): if non-%NULL, will be filled in with the HTTP
350 * version
351 * @status_code: (out) (allow-none): if non-%NULL, will be filled in with
352 * the status code
353 * @reason_phrase: (out) (allow-none): if non-%NULL, will be filled in with
354 * the reason phrase
355 *
356 * Parses the headers of an HTTP response in @str and stores the
357 * results in @ver, @status_code, @reason_phrase, and @headers.
358 *
359 * Beware that @headers may be modified even on failure.
360 *
361 * Return value: success or failure.
362 **/
363 gboolean
soup_headers_parse_response(const char * str,int len,SoupMessageHeaders * headers,SoupHTTPVersion * ver,guint * status_code,char ** reason_phrase)364 soup_headers_parse_response (const char *str,
365 int len,
366 SoupMessageHeaders *headers,
367 SoupHTTPVersion *ver,
368 guint *status_code,
369 char **reason_phrase)
370 {
371 SoupHTTPVersion version;
372
373 g_return_val_if_fail (str != NULL, FALSE);
374
375 /* Workaround for broken servers that send extra line breaks
376 * after a response, which we then see prepended to the next
377 * response on that connection.
378 */
379 while ((*str == '\r' || *str == '\n') && len > 0) {
380 str++;
381 len--;
382 }
383 if (!len)
384 return FALSE;
385
386 if (!soup_headers_parse (str, len, headers))
387 return FALSE;
388
389 if (!soup_headers_parse_status_line (str,
390 &version,
391 status_code,
392 reason_phrase))
393 return FALSE;
394 if (ver)
395 *ver = version;
396
397 /* RFC 2616 14.10 */
398 if (version == SOUP_HTTP_1_0)
399 soup_message_headers_clean_connection_headers (headers);
400
401 return TRUE;
402 }
403
404
405 /*
406 * Parsing of specific HTTP header types
407 */
408
409 static const char *
skip_lws(const char * s)410 skip_lws (const char *s)
411 {
412 while (g_ascii_isspace (*s))
413 s++;
414 return s;
415 }
416
417 static const char *
unskip_lws(const char * s,const char * start)418 unskip_lws (const char *s, const char *start)
419 {
420 while (s > start && g_ascii_isspace (*(s - 1)))
421 s--;
422 return s;
423 }
424
425 static const char *
skip_delims(const char * s,char delim)426 skip_delims (const char *s, char delim)
427 {
428 /* The grammar allows for multiple delimiters */
429 while (g_ascii_isspace (*s) || *s == delim)
430 s++;
431 return s;
432 }
433
434 static const char *
skip_item(const char * s,char delim)435 skip_item (const char *s, char delim)
436 {
437 gboolean quoted = FALSE;
438 const char *start = s;
439
440 /* A list item ends at the last non-whitespace character
441 * before a delimiter which is not inside a quoted-string. Or
442 * at the end of the string.
443 */
444
445 while (*s) {
446 if (*s == '"')
447 quoted = !quoted;
448 else if (quoted) {
449 if (*s == '\\' && *(s + 1))
450 s++;
451 } else {
452 if (*s == delim)
453 break;
454 }
455 s++;
456 }
457
458 return unskip_lws (s, start);
459 }
460
461 static GSList *
parse_list(const char * header,char delim)462 parse_list (const char *header, char delim)
463 {
464 GSList *list = NULL;
465 const char *end;
466
467 header = skip_delims (header, delim);
468 while (*header) {
469 end = skip_item (header, delim);
470 list = g_slist_prepend (list, g_strndup (header, end - header));
471 header = skip_delims (end, delim);
472 }
473
474 return g_slist_reverse (list);
475 }
476
477 /**
478 * soup_header_parse_list:
479 * @header: a header value
480 *
481 * Parses a header whose content is described by RFC2616 as
482 * "#something", where "something" does not itself contain commas,
483 * except as part of quoted-strings.
484 *
485 * Return value: (transfer full) (element-type utf8): a #GSList of
486 * list elements, as allocated strings
487 **/
488 GSList *
soup_header_parse_list(const char * header)489 soup_header_parse_list (const char *header)
490 {
491 g_return_val_if_fail (header != NULL, NULL);
492
493 return parse_list (header, ',');
494 }
495
496 typedef struct {
497 char *item;
498 double qval;
499 } QualityItem;
500
501 static int
sort_by_qval(const void * a,const void * b)502 sort_by_qval (const void *a, const void *b)
503 {
504 QualityItem *qia = (QualityItem *)a;
505 QualityItem *qib = (QualityItem *)b;
506
507 if (qia->qval == qib->qval)
508 return 0;
509 else if (qia->qval < qib->qval)
510 return 1;
511 else
512 return -1;
513 }
514
515 /**
516 * soup_header_parse_quality_list:
517 * @header: a header value
518 * @unacceptable: (out) (allow-none) (transfer full) (element-type utf8): on
519 * return, will contain a list of unacceptable values
520 *
521 * Parses a header whose content is a list of items with optional
522 * "qvalue"s (eg, Accept, Accept-Charset, Accept-Encoding,
523 * Accept-Language, TE).
524 *
525 * If @unacceptable is not %NULL, then on return, it will contain the
526 * items with qvalue 0. Either way, those items will be removed from
527 * the main list.
528 *
529 * Return value: (transfer full) (element-type utf8): a #GSList of
530 * acceptable values (as allocated strings), highest-qvalue first.
531 **/
532 GSList *
soup_header_parse_quality_list(const char * header,GSList ** unacceptable)533 soup_header_parse_quality_list (const char *header, GSList **unacceptable)
534 {
535 GSList *unsorted;
536 QualityItem *array;
537 GSList *sorted, *iter;
538 char *item, *semi;
539 const char *param, *equal, *value;
540 double qval;
541 int n;
542
543 g_return_val_if_fail (header != NULL, NULL);
544
545 if (unacceptable)
546 *unacceptable = NULL;
547
548 unsorted = soup_header_parse_list (header);
549 array = g_new0 (QualityItem, g_slist_length (unsorted));
550 for (iter = unsorted, n = 0; iter; iter = iter->next) {
551 item = iter->data;
552 qval = 1.0;
553 for (semi = strchr (item, ';'); semi; semi = strchr (semi + 1, ';')) {
554 param = skip_lws (semi + 1);
555 if (*param != 'q')
556 continue;
557 equal = skip_lws (param + 1);
558 if (!equal || *equal != '=')
559 continue;
560 value = skip_lws (equal + 1);
561 if (!value)
562 continue;
563
564 if (value[0] != '0' && value[0] != '1')
565 continue;
566 qval = (double)(value[0] - '0');
567 if (value[0] == '0' && value[1] == '.') {
568 if (g_ascii_isdigit (value[2])) {
569 qval += (double)(value[2] - '0') / 10;
570 if (g_ascii_isdigit (value[3])) {
571 qval += (double)(value[3] - '0') / 100;
572 if (g_ascii_isdigit (value[4]))
573 qval += (double)(value[4] - '0') / 1000;
574 }
575 }
576 }
577
578 *semi = '\0';
579 break;
580 }
581
582 if (qval == 0.0) {
583 if (unacceptable) {
584 *unacceptable = g_slist_prepend (*unacceptable,
585 item);
586 }
587 } else {
588 array[n].item = item;
589 array[n].qval = qval;
590 n++;
591 }
592 }
593 g_slist_free (unsorted);
594
595 qsort (array, n, sizeof (QualityItem), sort_by_qval);
596 sorted = NULL;
597 while (n--)
598 sorted = g_slist_prepend (sorted, array[n].item);
599 g_free (array);
600
601 return sorted;
602 }
603
604 /**
605 * soup_header_free_list: (skip)
606 * @list: a #GSList returned from soup_header_parse_list() or
607 * soup_header_parse_quality_list()
608 *
609 * Frees @list.
610 **/
611 void
soup_header_free_list(GSList * list)612 soup_header_free_list (GSList *list)
613 {
614 g_slist_free_full (list, g_free);
615 }
616
617 /**
618 * soup_header_contains:
619 * @header: An HTTP header suitable for parsing with
620 * soup_header_parse_list()
621 * @token: a token
622 *
623 * Parses @header to see if it contains the token @token (matched
624 * case-insensitively). Note that this can't be used with lists
625 * that have qvalues.
626 *
627 * Return value: whether or not @header contains @token
628 **/
629 gboolean
soup_header_contains(const char * header,const char * token)630 soup_header_contains (const char *header, const char *token)
631 {
632 const char *end;
633 guint len;
634
635 g_return_val_if_fail (header != NULL, FALSE);
636 g_return_val_if_fail (token != NULL, FALSE);
637
638 len = strlen (token);
639
640 header = skip_delims (header, ',');
641 while (*header) {
642 end = skip_item (header, ',');
643 if (end - header == len &&
644 !g_ascii_strncasecmp (header, token, len))
645 return TRUE;
646 header = skip_delims (end, ',');
647 }
648
649 return FALSE;
650 }
651
652 static void
decode_quoted_string(char * quoted_string)653 decode_quoted_string (char *quoted_string)
654 {
655 char *src, *dst;
656
657 src = quoted_string + 1;
658 dst = quoted_string;
659 while (*src && *src != '"') {
660 if (*src == '\\' && *(src + 1))
661 src++;
662 *dst++ = *src++;
663 }
664 *dst = '\0';
665 }
666
667 static gboolean
decode_rfc5987(char * encoded_string)668 decode_rfc5987 (char *encoded_string)
669 {
670 char *q, *decoded;
671 gboolean iso_8859_1 = FALSE;
672
673 q = strchr (encoded_string, '\'');
674 if (!q)
675 return FALSE;
676 if (g_ascii_strncasecmp (encoded_string, "UTF-8",
677 q - encoded_string) == 0)
678 ;
679 else if (g_ascii_strncasecmp (encoded_string, "iso-8859-1",
680 q - encoded_string) == 0)
681 iso_8859_1 = TRUE;
682 else
683 return FALSE;
684
685 q = strchr (q + 1, '\'');
686 if (!q)
687 return FALSE;
688
689 decoded = soup_uri_decode (q + 1);
690 if (iso_8859_1) {
691 char *utf8 = g_convert_with_fallback (decoded, -1, "UTF-8",
692 "iso-8859-1", "_",
693 NULL, NULL, NULL);
694 g_free (decoded);
695 if (!utf8)
696 return FALSE;
697 decoded = utf8;
698 }
699
700 /* If encoded_string was UTF-8, then each 3-character %-escape
701 * will be converted to a single byte, and so decoded is
702 * shorter than encoded_string. If encoded_string was
703 * iso-8859-1, then each 3-character %-escape will be
704 * converted into at most 2 bytes in UTF-8, and so it's still
705 * shorter.
706 */
707 strcpy (encoded_string, decoded);
708 g_free (decoded);
709 return TRUE;
710 }
711
712 static GHashTable *
parse_param_list(const char * header,char delim,gboolean strict)713 parse_param_list (const char *header, char delim, gboolean strict)
714 {
715 GHashTable *params;
716 GSList *list, *iter;
717 char *item, *eq, *name_end, *value;
718 gboolean override, duplicated;
719
720 params = g_hash_table_new_full (soup_str_case_hash,
721 soup_str_case_equal,
722 g_free, NULL);
723
724 list = parse_list (header, delim);
725 for (iter = list; iter; iter = iter->next) {
726 item = iter->data;
727 override = FALSE;
728
729 eq = strchr (item, '=');
730 if (eq) {
731 name_end = (char *)unskip_lws (eq, item);
732 if (name_end == item) {
733 /* That's no good... */
734 g_free (item);
735 continue;
736 }
737
738 *name_end = '\0';
739
740 value = (char *)skip_lws (eq + 1);
741
742 if (name_end[-1] == '*' && name_end > item + 1) {
743 name_end[-1] = '\0';
744 if (!decode_rfc5987 (value)) {
745 g_free (item);
746 continue;
747 }
748 override = TRUE;
749 } else if (*value == '"')
750 decode_quoted_string (value);
751 } else
752 value = NULL;
753
754 duplicated = g_hash_table_lookup_extended (params, item, NULL, NULL);
755
756 if (strict && duplicated) {
757 soup_header_free_param_list (params);
758 params = NULL;
759 g_slist_foreach (iter, (GFunc)g_free, NULL);
760 break;
761 } else if (override || !duplicated)
762 g_hash_table_replace (params, item, value);
763 else
764 g_free (item);
765 }
766
767 g_slist_free (list);
768 return params;
769 }
770
771 /**
772 * soup_header_parse_param_list:
773 * @header: a header value
774 *
775 * Parses a header which is a comma-delimited list of something like:
776 * <literal>token [ "=" ( token | quoted-string ) ]</literal>.
777 *
778 * Tokens that don't have an associated value will still be added to
779 * the resulting hash table, but with a %NULL value.
780 *
781 * This also handles RFC5987 encoding (which in HTTP is mostly used
782 * for giving UTF8-encoded filenames in the Content-Disposition
783 * header).
784 *
785 * Return value: (element-type utf8 utf8) (transfer full): a
786 * #GHashTable of list elements, which can be freed with
787 * soup_header_free_param_list().
788 **/
789 GHashTable *
soup_header_parse_param_list(const char * header)790 soup_header_parse_param_list (const char *header)
791 {
792 g_return_val_if_fail (header != NULL, NULL);
793
794 return parse_param_list (header, ',', FALSE);
795 }
796
797 /**
798 * soup_header_parse_semi_param_list:
799 * @header: a header value
800 *
801 * Parses a header which is a semicolon-delimited list of something
802 * like: <literal>token [ "=" ( token | quoted-string ) ]</literal>.
803 *
804 * Tokens that don't have an associated value will still be added to
805 * the resulting hash table, but with a %NULL value.
806 *
807 * This also handles RFC5987 encoding (which in HTTP is mostly used
808 * for giving UTF8-encoded filenames in the Content-Disposition
809 * header).
810 *
811 * Return value: (element-type utf8 utf8) (transfer full): a
812 * #GHashTable of list elements, which can be freed with
813 * soup_header_free_param_list().
814 *
815 * Since: 2.24
816 **/
817 GHashTable *
soup_header_parse_semi_param_list(const char * header)818 soup_header_parse_semi_param_list (const char *header)
819 {
820 g_return_val_if_fail (header != NULL, NULL);
821
822 return parse_param_list (header, ';', FALSE);
823 }
824
825 /**
826 * soup_header_parse_param_list_strict:
827 * @header: a header value
828 *
829 * A strict version of soup_header_parse_param_list()
830 * that bails out if there are duplicate parameters.
831 * Note that this function will treat RFC5987-encoded
832 * parameters as duplicated if an ASCII version is also
833 * present. For header fields that might contain
834 * RFC5987-encoded parameters, use
835 * soup_header_parse_param_list() instead.
836 *
837 * Return value: (element-type utf8 utf8) (transfer full) (nullable):
838 * a #GHashTable of list elements, which can be freed with
839 * soup_header_free_param_list() or %NULL if there are duplicate
840 * elements.
841 *
842 * Since: 2.66
843 **/
844 GHashTable *
soup_header_parse_param_list_strict(const char * header)845 soup_header_parse_param_list_strict (const char *header)
846 {
847 g_return_val_if_fail (header != NULL, NULL);
848
849 return parse_param_list (header, ',', TRUE);
850 }
851
852 /**
853 * soup_header_parse_semi_param_list_strict:
854 * @header: a header value
855 *
856 * A strict version of soup_header_parse_semi_param_list()
857 * that bails out if there are duplicate parameters.
858 * Note that this function will treat RFC5987-encoded
859 * parameters as duplicated if an ASCII version is also
860 * present. For header fields that might contain
861 * RFC5987-encoded parameters, use
862 * soup_header_parse_semi_param_list() instead.
863 *
864 * Return value: (element-type utf8 utf8) (transfer full) (nullable):
865 * a #GHashTable of list elements, which can be freed with
866 * soup_header_free_param_list() or %NULL if there are duplicate
867 * elements.
868 *
869 * Since: 2.66
870 **/
871 GHashTable *
soup_header_parse_semi_param_list_strict(const char * header)872 soup_header_parse_semi_param_list_strict (const char *header)
873 {
874 g_return_val_if_fail (header != NULL, NULL);
875
876 return parse_param_list (header, ';', TRUE);
877 }
878
879 /**
880 * soup_header_free_param_list:
881 * @param_list: (element-type utf8 utf8): a #GHashTable returned from soup_header_parse_param_list()
882 * or soup_header_parse_semi_param_list()
883 *
884 * Frees @param_list.
885 **/
886 void
soup_header_free_param_list(GHashTable * param_list)887 soup_header_free_param_list (GHashTable *param_list)
888 {
889 g_return_if_fail (param_list != NULL);
890
891 g_hash_table_destroy (param_list);
892 }
893
894 static void
append_param_rfc5987(GString * string,const char * name,const char * value)895 append_param_rfc5987 (GString *string,
896 const char *name,
897 const char *value)
898 {
899 char *encoded;
900
901 g_string_append (string, name);
902 g_string_append (string, "*=UTF-8''");
903 encoded = soup_uri_encode (value, " *'%()<>@,;:\\\"/[]?=");
904 g_string_append (string, encoded);
905 g_free (encoded);
906 }
907
908 static void
append_param_quoted(GString * string,const char * name,const char * value)909 append_param_quoted (GString *string,
910 const char *name,
911 const char *value)
912 {
913 int len;
914
915 g_string_append (string, name);
916 g_string_append (string, "=\"");
917 while (*value) {
918 while (*value == '\\' || *value == '"') {
919 g_string_append_c (string, '\\');
920 g_string_append_c (string, *value++);
921 }
922 len = strcspn (value, "\\\"");
923 g_string_append_len (string, value, len);
924 value += len;
925 }
926 g_string_append_c (string, '"');
927 }
928
929 static void
append_param_internal(GString * string,const char * name,const char * value,gboolean allow_token)930 append_param_internal (GString *string,
931 const char *name,
932 const char *value,
933 gboolean allow_token)
934 {
935 const char *v;
936 gboolean use_token = allow_token;
937
938 for (v = value; *v; v++) {
939 if (*v & 0x80) {
940 if (g_utf8_validate (value, -1, NULL)) {
941 append_param_rfc5987 (string, name, value);
942 return;
943 } else {
944 use_token = FALSE;
945 break;
946 }
947 } else if (!soup_char_is_token (*v))
948 use_token = FALSE;
949 }
950
951 if (use_token) {
952 g_string_append (string, name);
953 g_string_append_c (string, '=');
954 g_string_append (string, value);
955 } else
956 append_param_quoted (string, name, value);
957 }
958
959 /**
960 * soup_header_g_string_append_param_quoted:
961 * @string: a #GString being used to construct an HTTP header value
962 * @name: a parameter name
963 * @value: a parameter value
964 *
965 * Appends something like <literal>@name="@value"</literal> to
966 * @string, taking care to escape any quotes or backslashes in @value.
967 *
968 * If @value is (non-ASCII) UTF-8, this will instead use RFC 5987
969 * encoding, just like soup_header_g_string_append_param().
970 *
971 * Since: 2.30
972 **/
973 void
soup_header_g_string_append_param_quoted(GString * string,const char * name,const char * value)974 soup_header_g_string_append_param_quoted (GString *string,
975 const char *name,
976 const char *value)
977 {
978 g_return_if_fail (string != NULL);
979 g_return_if_fail (name != NULL);
980 g_return_if_fail (value != NULL);
981
982 append_param_internal (string, name, value, FALSE);
983 }
984
985 /**
986 * soup_header_g_string_append_param:
987 * @string: a #GString being used to construct an HTTP header value
988 * @name: a parameter name
989 * @value: a parameter value, or %NULL
990 *
991 * Appends something like <literal>@name=@value</literal> to @string,
992 * taking care to quote @value if needed, and if so, to escape any
993 * quotes or backslashes in @value.
994 *
995 * Alternatively, if @value is a non-ASCII UTF-8 string, it will be
996 * appended using RFC5987 syntax. Although in theory this is supposed
997 * to work anywhere in HTTP that uses this style of parameter, in
998 * reality, it can only be used portably with the Content-Disposition
999 * "filename" parameter.
1000 *
1001 * If @value is %NULL, this will just append @name to @string.
1002 *
1003 * Since: 2.26
1004 **/
1005 void
soup_header_g_string_append_param(GString * string,const char * name,const char * value)1006 soup_header_g_string_append_param (GString *string,
1007 const char *name,
1008 const char *value)
1009 {
1010 g_return_if_fail (string != NULL);
1011 g_return_if_fail (name != NULL);
1012
1013 if (!value) {
1014 g_string_append (string, name);
1015 return;
1016 }
1017
1018 append_param_internal (string, name, value, TRUE);
1019 }
1020