1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* soup-form.c : utility functions for HTML forms */
3
4 /*
5 * Copyright 2008 Red Hat, Inc.
6 */
7
8 #ifdef HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #include <string.h>
13
14 #include "soup-form.h"
15 #include "soup.h"
16
17 /**
18 * SECTION:soup-form
19 * @short_description: HTML form handling
20 * @see_also: #SoupMultipart
21 *
22 * libsoup contains several help methods for processing HTML forms as
23 * defined by <ulink
24 * url="http://www.w3.org/TR/html401/interact/forms.html#h-17.13">the
25 * HTML 4.01 specification</ulink>.
26 **/
27
28 /**
29 * SOUP_FORM_MIME_TYPE_URLENCODED:
30 *
31 * A macro containing the value
32 * <literal>"application/x-www-form-urlencoded"</literal>; the default
33 * MIME type for POSTing HTML form data.
34 *
35 * Since: 2.26
36 **/
37
38 /**
39 * SOUP_FORM_MIME_TYPE_MULTIPART:
40 *
41 * A macro containing the value
42 * <literal>"multipart/form-data"</literal>; the MIME type used for
43 * posting form data that contains files to be uploaded.
44 *
45 * Since: 2.26
46 **/
47
48 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
49 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
50
51 static gboolean
form_decode(char * part)52 form_decode (char *part)
53 {
54 unsigned char *s, *d;
55
56 s = d = (unsigned char *)part;
57 do {
58 if (*s == '%') {
59 if (!g_ascii_isxdigit (s[1]) ||
60 !g_ascii_isxdigit (s[2]))
61 return FALSE;
62 *d++ = HEXCHAR (s);
63 s += 2;
64 } else if (*s == '+')
65 *d++ = ' ';
66 else
67 *d++ = *s;
68 } while (*s++);
69
70 return TRUE;
71 }
72
73 /**
74 * soup_form_decode:
75 * @encoded_form: data of type "application/x-www-form-urlencoded"
76 *
77 * Decodes @form, which is an urlencoded dataset as defined in the
78 * HTML 4.01 spec.
79 *
80 * Return value: (element-type utf8 utf8) (transfer container): a hash
81 * table containing the name/value pairs from @encoded_form, which you
82 * can free with g_hash_table_destroy().
83 **/
84 GHashTable *
soup_form_decode(const char * encoded_form)85 soup_form_decode (const char *encoded_form)
86 {
87 GHashTable *form_data_set;
88 char **pairs, *eq, *name, *value;
89 int i;
90
91 form_data_set = g_hash_table_new_full (g_str_hash, g_str_equal,
92 g_free, NULL);
93 pairs = g_strsplit (encoded_form, "&", -1);
94 for (i = 0; pairs[i]; i++) {
95 name = pairs[i];
96 eq = strchr (name, '=');
97 if (eq) {
98 *eq = '\0';
99 value = eq + 1;
100 } else
101 value = NULL;
102 if (!value || !form_decode (name) || !form_decode (value)) {
103 g_free (name);
104 continue;
105 }
106
107 g_hash_table_replace (form_data_set, name, value);
108 }
109 g_free (pairs);
110
111 return form_data_set;
112 }
113
114 /**
115 * soup_form_decode_multipart:
116 * @msg: a #SoupMessage containing a "multipart/form-data" request body
117 * @file_control_name: (allow-none): the name of the HTML file upload control, or %NULL
118 * @filename: (out) (allow-none): return location for the name of the uploaded file, or %NULL
119 * @content_type: (out) (allow-none): return location for the MIME type of the uploaded file, or %NULL
120 * @file: (out) (allow-none): return location for the uploaded file data, or %NULL
121 *
122 * Decodes the "multipart/form-data" request in @msg; this is a
123 * convenience method for the case when you have a single file upload
124 * control in a form. (Or when you don't have any file upload
125 * controls, but are still using "multipart/form-data" anyway.) Pass
126 * the name of the file upload control in @file_control_name, and
127 * soup_form_decode_multipart() will extract the uploaded file data
128 * into @filename, @content_type, and @file. All of the other form
129 * control data will be returned (as strings, as with
130 * soup_form_decode()) in the returned #GHashTable.
131 *
132 * You may pass %NULL for @filename, @content_type and/or @file if you do not
133 * care about those fields. soup_form_decode_multipart() may also
134 * return %NULL in those fields if the client did not provide that
135 * information. You must free the returned filename and content-type
136 * with g_free(), and the returned file data with soup_buffer_free().
137 *
138 * If you have a form with more than one file upload control, you will
139 * need to decode it manually, using soup_multipart_new_from_message()
140 * and soup_multipart_get_part().
141 *
142 * Return value: (nullable) (element-type utf8 utf8) (transfer container):
143 * a hash table containing the name/value pairs (other than
144 * @file_control_name) from @msg, which you can free with
145 * g_hash_table_destroy(). On error, it will return %NULL.
146 *
147 * Since: 2.26
148 **/
149 GHashTable *
soup_form_decode_multipart(SoupMessage * msg,const char * file_control_name,char ** filename,char ** content_type,SoupBuffer ** file)150 soup_form_decode_multipart (SoupMessage *msg, const char *file_control_name,
151 char **filename, char **content_type,
152 SoupBuffer **file)
153 {
154 SoupMultipart *multipart;
155 GHashTable *form_data_set, *params;
156 SoupMessageHeaders *part_headers;
157 SoupBuffer *part_body;
158 char *disposition, *name;
159 int i;
160
161 g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL);
162
163 multipart = soup_multipart_new_from_message (msg->request_headers,
164 msg->request_body);
165 if (!multipart)
166 return NULL;
167
168 if (filename)
169 *filename = NULL;
170 if (content_type)
171 *content_type = NULL;
172 if (file)
173 *file = NULL;
174
175 form_data_set = g_hash_table_new_full (g_str_hash, g_str_equal,
176 g_free, g_free);
177 for (i = 0; i < soup_multipart_get_length (multipart); i++) {
178 soup_multipart_get_part (multipart, i, &part_headers, &part_body);
179 if (!soup_message_headers_get_content_disposition (
180 part_headers, &disposition, ¶ms))
181 continue;
182 name = g_hash_table_lookup (params, "name");
183 if (g_ascii_strcasecmp (disposition, "form-data") != 0 ||
184 !name) {
185 g_free (disposition);
186 g_hash_table_destroy (params);
187 continue;
188 }
189
190 if (file_control_name && !strcmp (name, file_control_name)) {
191 if (filename)
192 *filename = g_strdup (g_hash_table_lookup (params, "filename"));
193 if (content_type)
194 *content_type = g_strdup (soup_message_headers_get_content_type (part_headers, NULL));
195 if (file)
196 *file = soup_buffer_copy (part_body);
197 } else {
198 g_hash_table_insert (form_data_set,
199 g_strdup (name),
200 g_strndup (part_body->data,
201 part_body->length));
202 }
203
204 g_free (disposition);
205 g_hash_table_destroy (params);
206 }
207
208 soup_multipart_free (multipart);
209 return form_data_set;
210 }
211
212 static void
append_form_encoded(GString * str,const char * in)213 append_form_encoded (GString *str, const char *in)
214 {
215 const unsigned char *s = (const unsigned char *)in;
216
217 while (*s) {
218 if (*s == ' ') {
219 g_string_append_c (str, '+');
220 s++;
221 } else if (!g_ascii_isalnum (*s) && (*s != '-') && (*s != '_')
222 && (*s != '.'))
223 g_string_append_printf (str, "%%%02X", (int)*s++);
224 else
225 g_string_append_c (str, *s++);
226 }
227 }
228
229 static void
encode_pair(GString * str,const char * name,const char * value)230 encode_pair (GString *str, const char *name, const char *value)
231 {
232 g_return_if_fail (name != NULL);
233 g_return_if_fail (value != NULL);
234
235 if (str->len)
236 g_string_append_c (str, '&');
237 append_form_encoded (str, name);
238 g_string_append_c (str, '=');
239 append_form_encoded (str, value);
240 }
241
242 /**
243 * soup_form_encode:
244 * @first_field: name of the first form field
245 * @...: value of @first_field, followed by additional field names
246 * and values, terminated by %NULL.
247 *
248 * Encodes the given field names and values into a value of type
249 * "application/x-www-form-urlencoded", as defined in the HTML 4.01
250 * spec.
251 *
252 * This method requires you to know the names of the form fields (or
253 * at the very least, the total number of fields) at compile time; for
254 * working with dynamic forms, use soup_form_encode_hash() or
255 * soup_form_encode_datalist().
256 *
257 * Return value: the encoded form
258 **/
259 char *
soup_form_encode(const char * first_field,...)260 soup_form_encode (const char *first_field, ...)
261 {
262 va_list args;
263 char *encoded;
264
265 va_start (args, first_field);
266 encoded = soup_form_encode_valist (first_field, args);
267 va_end (args);
268
269 return encoded;
270 }
271
272 /**
273 * soup_form_encode_hash:
274 * @form_data_set: (element-type utf8 utf8): a hash table containing
275 * name/value pairs (as strings)
276 *
277 * Encodes @form_data_set into a value of type
278 * "application/x-www-form-urlencoded", as defined in the HTML 4.01
279 * spec.
280 *
281 * Note that the HTML spec states that "The control names/values are
282 * listed in the order they appear in the document." Since this method
283 * takes a hash table, it cannot enforce that; if you care about the
284 * ordering of the form fields, use soup_form_encode_datalist().
285 *
286 * Return value: the encoded form
287 **/
288 char *
soup_form_encode_hash(GHashTable * form_data_set)289 soup_form_encode_hash (GHashTable *form_data_set)
290 {
291 GString *str = g_string_new (NULL);
292 GHashTableIter iter;
293 gpointer name, value;
294
295 g_hash_table_iter_init (&iter, form_data_set);
296 while (g_hash_table_iter_next (&iter, &name, &value))
297 encode_pair (str, name, value);
298 return g_string_free (str, FALSE);
299 }
300
301 static void
datalist_encode_foreach(GQuark key_id,gpointer value,gpointer str)302 datalist_encode_foreach (GQuark key_id, gpointer value, gpointer str)
303 {
304 encode_pair (str, g_quark_to_string (key_id), value);
305 }
306
307 /**
308 * soup_form_encode_datalist:
309 * @form_data_set: a datalist containing name/value pairs
310 *
311 * Encodes @form_data_set into a value of type
312 * "application/x-www-form-urlencoded", as defined in the HTML 4.01
313 * spec. Unlike soup_form_encode_hash(), this preserves the ordering
314 * of the form elements, which may be required in some situations.
315 *
316 * Return value: the encoded form
317 **/
318 char *
soup_form_encode_datalist(GData ** form_data_set)319 soup_form_encode_datalist (GData **form_data_set)
320 {
321 GString *str = g_string_new (NULL);
322
323 g_datalist_foreach (form_data_set, datalist_encode_foreach, str);
324 return g_string_free (str, FALSE);
325 }
326
327 /**
328 * soup_form_encode_valist:
329 * @first_field: name of the first form field
330 * @args: pointer to additional values, as in soup_form_encode()
331 *
332 * See soup_form_encode(). This is mostly an internal method, used by
333 * various other methods such as soup_uri_set_query_from_fields() and
334 * soup_form_request_new().
335 *
336 * Return value: the encoded form
337 **/
338 char *
soup_form_encode_valist(const char * first_field,va_list args)339 soup_form_encode_valist (const char *first_field, va_list args)
340 {
341 GString *str = g_string_new (NULL);
342 const char *name, *value;
343
344 name = first_field;
345 value = va_arg (args, const char *);
346 while (name && value) {
347 encode_pair (str, name, value);
348
349 name = va_arg (args, const char *);
350 if (name)
351 value = va_arg (args, const char *);
352 }
353
354 return g_string_free (str, FALSE);
355 }
356
357 static SoupMessage *
soup_form_request_for_data(const char * method,const char * uri_string,char * form_data)358 soup_form_request_for_data (const char *method, const char *uri_string,
359 char *form_data)
360 {
361 SoupMessage *msg;
362 SoupURI *uri;
363
364 uri = soup_uri_new (uri_string);
365 if (!uri)
366 return NULL;
367
368 if (!strcmp (method, "GET")) {
369 g_free (uri->query);
370 uri->query = form_data;
371
372 msg = soup_message_new_from_uri (method, uri);
373 } else if (!strcmp (method, "POST") || !strcmp (method, "PUT")) {
374 msg = soup_message_new_from_uri (method, uri);
375
376 soup_message_set_request (
377 msg, SOUP_FORM_MIME_TYPE_URLENCODED,
378 SOUP_MEMORY_TAKE,
379 form_data, strlen (form_data));
380 } else {
381 g_warning ("invalid method passed to soup_form_request_new");
382 g_free (form_data);
383
384 /* Don't crash */
385 msg = soup_message_new_from_uri (method, uri);
386 }
387 soup_uri_free (uri);
388
389 return msg;
390 }
391
392 /**
393 * soup_form_request_new:
394 * @method: the HTTP method, either "GET" or "POST"
395 * @uri: the URI to send the form data to
396 * @first_field: name of the first form field
397 * @...: value of @first_field, followed by additional field names
398 * and values, terminated by %NULL.
399 *
400 * Creates a new %SoupMessage and sets it up to send the given data
401 * to @uri via @method. (That is, if @method is "GET", it will encode
402 * the form data into @uri's query field, and if @method is "POST", it
403 * will encode it into the %SoupMessage's request_body.)
404 *
405 * Return value: (transfer full): the new %SoupMessage
406 **/
407 SoupMessage *
soup_form_request_new(const char * method,const char * uri,const char * first_field,...)408 soup_form_request_new (const char *method, const char *uri,
409 const char *first_field, ...)
410 {
411 va_list args;
412 char *form_data;
413
414 va_start (args, first_field);
415 form_data = soup_form_encode_valist (first_field, args);
416 va_end (args);
417
418 return soup_form_request_for_data (method, uri, form_data);
419 }
420
421 /**
422 * soup_form_request_new_from_hash:
423 * @method: the HTTP method, either "GET" or "POST"
424 * @uri: the URI to send the form data to
425 * @form_data_set: (element-type utf8 utf8): the data to send to @uri
426 *
427 * Creates a new %SoupMessage and sets it up to send @form_data_set to
428 * @uri via @method, as with soup_form_request_new().
429 *
430 * Return value: (transfer full): the new %SoupMessage
431 **/
432 SoupMessage *
soup_form_request_new_from_hash(const char * method,const char * uri,GHashTable * form_data_set)433 soup_form_request_new_from_hash (const char *method, const char *uri,
434 GHashTable *form_data_set)
435 {
436 return soup_form_request_for_data (
437 method, uri, soup_form_encode_hash (form_data_set));
438 }
439
440 /**
441 * soup_form_request_new_from_datalist:
442 * @method: the HTTP method, either "GET" or "POST"
443 * @uri: the URI to send the form data to
444 * @form_data_set: the data to send to @uri
445 *
446 * Creates a new %SoupMessage and sets it up to send @form_data_set to
447 * @uri via @method, as with soup_form_request_new().
448 *
449 * Return value: (transfer full): the new %SoupMessage
450 **/
451 SoupMessage *
soup_form_request_new_from_datalist(const char * method,const char * uri,GData ** form_data_set)452 soup_form_request_new_from_datalist (const char *method, const char *uri,
453 GData **form_data_set)
454 {
455 return soup_form_request_for_data (
456 method, uri, soup_form_encode_datalist (form_data_set));
457 }
458
459 /**
460 * soup_form_request_new_from_multipart:
461 * @uri: the URI to send the form data to
462 * @multipart: a "multipart/form-data" #SoupMultipart
463 *
464 * Creates a new %SoupMessage and sets it up to send @multipart to
465 * @uri via POST.
466 *
467 * To send a <literal>"multipart/form-data"</literal> POST, first
468 * create a #SoupMultipart, using %SOUP_FORM_MIME_TYPE_MULTIPART as
469 * the MIME type. Then use soup_multipart_append_form_string() and
470 * soup_multipart_append_form_file() to add the value of each form
471 * control to the multipart. (These are just convenience methods, and
472 * you can use soup_multipart_append_part() if you need greater
473 * control over the part headers.) Finally, call
474 * soup_form_request_new_from_multipart() to serialize the multipart
475 * structure and create a #SoupMessage.
476 *
477 * Return value: (transfer full): the new %SoupMessage
478 *
479 * Since: 2.26
480 **/
481 SoupMessage *
soup_form_request_new_from_multipart(const char * uri,SoupMultipart * multipart)482 soup_form_request_new_from_multipart (const char *uri,
483 SoupMultipart *multipart)
484 {
485 SoupMessage *msg;
486
487 msg = soup_message_new ("POST", uri);
488 soup_multipart_to_message (multipart, msg->request_headers,
489 msg->request_body);
490 return msg;
491 }
492