1 /* GLIB - Library of useful routines for C programming
2 *
3 * gconvert.c: Convert between character sets using iconv
4 * Copyright Red Hat Inc., 2000
5 * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "config.h"
22 #include "glibconfig.h"
23
24 #ifndef G_OS_WIN32
25 #include <iconv.h>
26 #endif
27 #include <errno.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31
32 #ifdef G_OS_WIN32
33 #include "win_iconv.c"
34 #endif
35
36 #ifdef G_PLATFORM_WIN32
37 #define STRICT
38 #include <windows.h>
39 #undef STRICT
40 #endif
41
42 #include "gconvert.h"
43
44 #include "gcharsetprivate.h"
45 #include "gslist.h"
46 #include "gstrfuncs.h"
47 #include "gtestutils.h"
48 #include "gthread.h"
49 #include "gthreadprivate.h"
50 #include "gunicode.h"
51 #include "gfileutils.h"
52
53 #include "glibintl.h"
54
55
56 /**
57 * SECTION:conversions
58 * @title: Character Set Conversion
59 * @short_description: convert strings between different character sets
60 *
61 * The g_convert() family of function wraps the functionality of iconv().
62 * In addition to pure character set conversions, GLib has functions to
63 * deal with the extra complications of encodings for file names.
64 *
65 * ## File Name Encodings
66 *
67 * Historically, UNIX has not had a defined encoding for file names:
68 * a file name is valid as long as it does not have path separators
69 * in it ("/"). However, displaying file names may require conversion:
70 * from the character set in which they were created, to the character
71 * set in which the application operates. Consider the Spanish file name
72 * "Presentación.sxi". If the application which created it uses
73 * ISO-8859-1 for its encoding,
74 * |[
75 * Character: P r e s e n t a c i ó n . s x i
76 * Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69
77 * ]|
78 * However, if the application use UTF-8, the actual file name on
79 * disk would look like this:
80 * |[
81 * Character: P r e s e n t a c i ó n . s x i
82 * Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69
83 * ]|
84 * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ that use
85 * GLib do the same thing. If you get a file name from the file system,
86 * for example, from readdir() or from g_dir_read_name(), and you wish
87 * to display the file name to the user, you will need to convert it
88 * into UTF-8. The opposite case is when the user types the name of a
89 * file they wish to save: the toolkit will give you that string in
90 * UTF-8 encoding, and you will need to convert it to the character
91 * set used for file names before you can create the file with open()
92 * or fopen().
93 *
94 * By default, GLib assumes that file names on disk are in UTF-8
95 * encoding. This is a valid assumption for file systems which
96 * were created relatively recently: most applications use UTF-8
97 * encoding for their strings, and that is also what they use for
98 * the file names they create. However, older file systems may
99 * still contain file names created in "older" encodings, such as
100 * ISO-8859-1. In this case, for compatibility reasons, you may want
101 * to instruct GLib to use that particular encoding for file names
102 * rather than UTF-8. You can do this by specifying the encoding for
103 * file names in the [`G_FILENAME_ENCODING`][G_FILENAME_ENCODING]
104 * environment variable. For example, if your installation uses
105 * ISO-8859-1 for file names, you can put this in your `~/.profile`:
106 * |[
107 * export G_FILENAME_ENCODING=ISO-8859-1
108 * ]|
109 * GLib provides the functions g_filename_to_utf8() and
110 * g_filename_from_utf8() to perform the necessary conversions.
111 * These functions convert file names from the encoding specified
112 * in `G_FILENAME_ENCODING` to UTF-8 and vice-versa. This
113 * [diagram][file-name-encodings-diagram] illustrates how
114 * these functions are used to convert between UTF-8 and the
115 * encoding for file names in the file system.
116 *
117 * ## Conversion between file name encodings # {#file-name-encodings-diagram)
118 *
119 * 
120 *
121 * ## Checklist for Application Writers
122 *
123 * This section is a practical summary of the detailed
124 * things to do to make sure your applications process file
125 * name encodings correctly.
126 *
127 * 1. If you get a file name from the file system from a function
128 * such as readdir() or gtk_file_chooser_get_filename(), you do
129 * not need to do any conversion to pass that file name to
130 * functions like open(), rename(), or fopen() -- those are "raw"
131 * file names which the file system understands.
132 *
133 * 2. If you need to display a file name, convert it to UTF-8 first
134 * by using g_filename_to_utf8(). If conversion fails, display a
135 * string like "Unknown file name". Do not convert this string back
136 * into the encoding used for file names if you wish to pass it to
137 * the file system; use the original file name instead.
138 *
139 * For example, the document window of a word processor could display
140 * "Unknown file name" in its title bar but still let the user save
141 * the file, as it would keep the raw file name internally. This
142 * can happen if the user has not set the `G_FILENAME_ENCODING`
143 * environment variable even though he has files whose names are
144 * not encoded in UTF-8.
145 *
146 * 3. If your user interface lets the user type a file name for saving
147 * or renaming, convert it to the encoding used for file names in
148 * the file system by using g_filename_from_utf8(). Pass the converted
149 * file name to functions like fopen(). If conversion fails, ask the
150 * user to enter a different file name. This can happen if the user
151 * types Japanese characters when `G_FILENAME_ENCODING` is set to
152 * `ISO-8859-1`, for example.
153 */
154
155 /* We try to terminate strings in unknown charsets with this many zero bytes
156 * to ensure that multibyte strings really are nul-terminated when we return
157 * them from g_convert() and friends.
158 */
159 #define NUL_TERMINATOR_LENGTH 4
160
G_DEFINE_QUARK(g_convert_error,g_convert_error)161 G_DEFINE_QUARK (g_convert_error, g_convert_error)
162
163 static gboolean
164 try_conversion (const char *to_codeset,
165 const char *from_codeset,
166 iconv_t *cd)
167 {
168 *cd = iconv_open (to_codeset, from_codeset);
169
170 if (*cd == (iconv_t)-1 && errno == EINVAL)
171 return FALSE;
172 else
173 return TRUE;
174 }
175
176 static gboolean
try_to_aliases(const char ** to_aliases,const char * from_codeset,iconv_t * cd)177 try_to_aliases (const char **to_aliases,
178 const char *from_codeset,
179 iconv_t *cd)
180 {
181 if (to_aliases)
182 {
183 const char **p = to_aliases;
184 while (*p)
185 {
186 if (try_conversion (*p, from_codeset, cd))
187 return TRUE;
188
189 p++;
190 }
191 }
192
193 return FALSE;
194 }
195
196 /**
197 * g_iconv_open: (skip)
198 * @to_codeset: destination codeset
199 * @from_codeset: source codeset
200 *
201 * Same as the standard UNIX routine iconv_open(), but
202 * may be implemented via libiconv on UNIX flavors that lack
203 * a native implementation.
204 *
205 * GLib provides g_convert() and g_locale_to_utf8() which are likely
206 * more convenient than the raw iconv wrappers.
207 *
208 * Returns: a "conversion descriptor", or (GIConv)-1 if
209 * opening the converter failed.
210 **/
211 GIConv
g_iconv_open(const gchar * to_codeset,const gchar * from_codeset)212 g_iconv_open (const gchar *to_codeset,
213 const gchar *from_codeset)
214 {
215 iconv_t cd;
216
217 if (!try_conversion (to_codeset, from_codeset, &cd))
218 {
219 const char **to_aliases = _g_charset_get_aliases (to_codeset);
220 const char **from_aliases = _g_charset_get_aliases (from_codeset);
221
222 if (from_aliases)
223 {
224 const char **p = from_aliases;
225 while (*p)
226 {
227 if (try_conversion (to_codeset, *p, &cd))
228 goto out;
229
230 if (try_to_aliases (to_aliases, *p, &cd))
231 goto out;
232
233 p++;
234 }
235 }
236
237 if (try_to_aliases (to_aliases, from_codeset, &cd))
238 goto out;
239 }
240
241 out:
242 return (cd == (iconv_t)-1) ? (GIConv)-1 : (GIConv)cd;
243 }
244
245 /**
246 * g_iconv: (skip)
247 * @converter: conversion descriptor from g_iconv_open()
248 * @inbuf: bytes to convert
249 * @inbytes_left: inout parameter, bytes remaining to convert in @inbuf
250 * @outbuf: converted output bytes
251 * @outbytes_left: inout parameter, bytes available to fill in @outbuf
252 *
253 * Same as the standard UNIX routine iconv(), but
254 * may be implemented via libiconv on UNIX flavors that lack
255 * a native implementation.
256 *
257 * GLib provides g_convert() and g_locale_to_utf8() which are likely
258 * more convenient than the raw iconv wrappers.
259 *
260 * Note that the behaviour of iconv() for characters which are valid in the
261 * input character set, but which have no representation in the output character
262 * set, is implementation defined. This function may return success (with a
263 * positive number of non-reversible conversions as replacement characters were
264 * used), or it may return -1 and set an error such as %EILSEQ, in such a
265 * situation.
266 *
267 * Returns: count of non-reversible conversions, or -1 on error
268 **/
269 gsize
g_iconv(GIConv converter,gchar ** inbuf,gsize * inbytes_left,gchar ** outbuf,gsize * outbytes_left)270 g_iconv (GIConv converter,
271 gchar **inbuf,
272 gsize *inbytes_left,
273 gchar **outbuf,
274 gsize *outbytes_left)
275 {
276 iconv_t cd = (iconv_t)converter;
277
278 return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
279 }
280
281 /**
282 * g_iconv_close: (skip)
283 * @converter: a conversion descriptor from g_iconv_open()
284 *
285 * Same as the standard UNIX routine iconv_close(), but
286 * may be implemented via libiconv on UNIX flavors that lack
287 * a native implementation. Should be called to clean up
288 * the conversion descriptor from g_iconv_open() when
289 * you are done converting things.
290 *
291 * GLib provides g_convert() and g_locale_to_utf8() which are likely
292 * more convenient than the raw iconv wrappers.
293 *
294 * Returns: -1 on error, 0 on success
295 **/
296 gint
g_iconv_close(GIConv converter)297 g_iconv_close (GIConv converter)
298 {
299 iconv_t cd = (iconv_t)converter;
300
301 return iconv_close (cd);
302 }
303
304 static GIConv
open_converter(const gchar * to_codeset,const gchar * from_codeset,GError ** error)305 open_converter (const gchar *to_codeset,
306 const gchar *from_codeset,
307 GError **error)
308 {
309 GIConv cd;
310
311 cd = g_iconv_open (to_codeset, from_codeset);
312
313 if (cd == (GIConv) -1)
314 {
315 /* Something went wrong. */
316 if (error)
317 {
318 if (errno == EINVAL)
319 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
320 _("Conversion from character set “%s” to “%s” is not supported"),
321 from_codeset, to_codeset);
322 else
323 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
324 _("Could not open converter from “%s” to “%s”"),
325 from_codeset, to_codeset);
326 }
327 }
328
329 return cd;
330 }
331
332 static int
close_converter(GIConv cd)333 close_converter (GIConv cd)
334 {
335 if (cd == (GIConv) -1)
336 return 0;
337
338 return g_iconv_close (cd);
339 }
340
341 /**
342 * g_convert_with_iconv: (skip)
343 * @str: (array length=len) (element-type guint8):
344 * the string to convert.
345 * @len: the length of the string in bytes, or -1 if the string is
346 * nul-terminated (Note that some encodings may allow nul
347 * bytes to occur inside strings. In that case, using -1
348 * for the @len parameter is unsafe)
349 * @converter: conversion descriptor from g_iconv_open()
350 * @bytes_read: (out) (optional): location to store the number of bytes in
351 * the input string that were successfully converted, or %NULL.
352 * Even if the conversion was successful, this may be
353 * less than @len if there were partial characters
354 * at the end of the input. If the error
355 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
356 * stored will be the byte offset after the last valid
357 * input sequence.
358 * @bytes_written: (out) (optional): the number of bytes stored in
359 * the output buffer (not including the terminating nul).
360 * @error: location to store the error occurring, or %NULL to ignore
361 * errors. Any of the errors in #GConvertError may occur.
362 *
363 * Converts a string from one character set to another.
364 *
365 * Note that you should use g_iconv() for streaming conversions.
366 * Despite the fact that @bytes_read can return information about partial
367 * characters, the g_convert_... functions are not generally suitable
368 * for streaming. If the underlying converter maintains internal state,
369 * then this won't be preserved across successive calls to g_convert(),
370 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
371 * this is the GNU C converter for CP1255 which does not emit a base
372 * character until it knows that the next character is not a mark that
373 * could combine with the base character.)
374 *
375 * Characters which are valid in the input character set, but which have no
376 * representation in the output character set will result in a
377 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error. This is in contrast to the iconv()
378 * specification, which leaves this behaviour implementation defined. Note that
379 * this is the same error code as is returned for an invalid byte sequence in
380 * the input character set. To get defined behaviour for conversion of
381 * unrepresentable characters, use g_convert_with_fallback().
382 *
383 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
384 * If the conversion was successful, a newly allocated buffer
385 * containing the converted string, which must be freed with
386 * g_free(). Otherwise %NULL and @error will be set.
387 **/
388 gchar*
g_convert_with_iconv(const gchar * str,gssize len,GIConv converter,gsize * bytes_read,gsize * bytes_written,GError ** error)389 g_convert_with_iconv (const gchar *str,
390 gssize len,
391 GIConv converter,
392 gsize *bytes_read,
393 gsize *bytes_written,
394 GError **error)
395 {
396 gchar *dest;
397 gchar *outp;
398 const gchar *p;
399 gsize inbytes_remaining;
400 gsize outbytes_remaining;
401 gsize err;
402 gsize outbuf_size;
403 gboolean have_error = FALSE;
404 gboolean done = FALSE;
405 gboolean reset = FALSE;
406
407 g_return_val_if_fail (converter != (GIConv) -1, NULL);
408
409 if (len < 0)
410 len = strlen (str);
411
412 p = str;
413 inbytes_remaining = len;
414 outbuf_size = len + NUL_TERMINATOR_LENGTH;
415
416 outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH;
417 outp = dest = g_malloc (outbuf_size);
418
419 while (!done && !have_error)
420 {
421 if (reset)
422 err = g_iconv (converter, NULL, &inbytes_remaining, &outp, &outbytes_remaining);
423 else
424 err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
425
426 if (err == (gsize) -1)
427 {
428 switch (errno)
429 {
430 case EINVAL:
431 /* Incomplete text, do not report an error */
432 done = TRUE;
433 break;
434 case E2BIG:
435 {
436 gsize used = outp - dest;
437
438 outbuf_size *= 2;
439 dest = g_realloc (dest, outbuf_size);
440
441 outp = dest + used;
442 outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH;
443 }
444 break;
445 case EILSEQ:
446 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
447 _("Invalid byte sequence in conversion input"));
448 have_error = TRUE;
449 break;
450 default:
451 {
452 int errsv = errno;
453
454 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
455 _("Error during conversion: %s"),
456 g_strerror (errsv));
457 }
458 have_error = TRUE;
459 break;
460 }
461 }
462 else if (err > 0)
463 {
464 /* @err gives the number of replacement characters used. */
465 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
466 _("Unrepresentable character in conversion input"));
467 have_error = TRUE;
468 }
469 else
470 {
471 if (!reset)
472 {
473 /* call g_iconv with NULL inbuf to cleanup shift state */
474 reset = TRUE;
475 inbytes_remaining = 0;
476 }
477 else
478 done = TRUE;
479 }
480 }
481
482 memset (outp, 0, NUL_TERMINATOR_LENGTH);
483
484 if (bytes_read)
485 *bytes_read = p - str;
486 else
487 {
488 if ((p - str) != len)
489 {
490 if (!have_error)
491 {
492 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
493 _("Partial character sequence at end of input"));
494 have_error = TRUE;
495 }
496 }
497 }
498
499 if (bytes_written)
500 *bytes_written = outp - dest; /* Doesn't include '\0' */
501
502 if (have_error)
503 {
504 g_free (dest);
505 return NULL;
506 }
507 else
508 return dest;
509 }
510
511 /**
512 * g_convert:
513 * @str: (array length=len) (element-type guint8):
514 * the string to convert.
515 * @len: the length of the string in bytes, or -1 if the string is
516 * nul-terminated (Note that some encodings may allow nul
517 * bytes to occur inside strings. In that case, using -1
518 * for the @len parameter is unsafe)
519 * @to_codeset: name of character set into which to convert @str
520 * @from_codeset: character set of @str.
521 * @bytes_read: (out) (optional): location to store the number of bytes in
522 * the input string that were successfully converted, or %NULL.
523 * Even if the conversion was successful, this may be
524 * less than @len if there were partial characters
525 * at the end of the input. If the error
526 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
527 * stored will be the byte offset after the last valid
528 * input sequence.
529 * @bytes_written: (out) (optional): the number of bytes stored in
530 * the output buffer (not including the terminating nul).
531 * @error: location to store the error occurring, or %NULL to ignore
532 * errors. Any of the errors in #GConvertError may occur.
533 *
534 * Converts a string from one character set to another.
535 *
536 * Note that you should use g_iconv() for streaming conversions.
537 * Despite the fact that @bytes_read can return information about partial
538 * characters, the g_convert_... functions are not generally suitable
539 * for streaming. If the underlying converter maintains internal state,
540 * then this won't be preserved across successive calls to g_convert(),
541 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
542 * this is the GNU C converter for CP1255 which does not emit a base
543 * character until it knows that the next character is not a mark that
544 * could combine with the base character.)
545 *
546 * Using extensions such as "//TRANSLIT" may not work (or may not work
547 * well) on many platforms. Consider using g_str_to_ascii() instead.
548 *
549 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
550 * If the conversion was successful, a newly allocated buffer
551 * containing the converted string, which must be freed with g_free().
552 * Otherwise %NULL and @error will be set.
553 **/
554 gchar*
g_convert(const gchar * str,gssize len,const gchar * to_codeset,const gchar * from_codeset,gsize * bytes_read,gsize * bytes_written,GError ** error)555 g_convert (const gchar *str,
556 gssize len,
557 const gchar *to_codeset,
558 const gchar *from_codeset,
559 gsize *bytes_read,
560 gsize *bytes_written,
561 GError **error)
562 {
563 gchar *res;
564 GIConv cd;
565
566 g_return_val_if_fail (str != NULL, NULL);
567 g_return_val_if_fail (to_codeset != NULL, NULL);
568 g_return_val_if_fail (from_codeset != NULL, NULL);
569
570 cd = open_converter (to_codeset, from_codeset, error);
571
572 if (cd == (GIConv) -1)
573 {
574 if (bytes_read)
575 *bytes_read = 0;
576
577 if (bytes_written)
578 *bytes_written = 0;
579
580 return NULL;
581 }
582
583 res = g_convert_with_iconv (str, len, cd,
584 bytes_read, bytes_written,
585 error);
586
587 close_converter (cd);
588
589 return res;
590 }
591
592 /**
593 * g_convert_with_fallback:
594 * @str: (array length=len) (element-type guint8):
595 * the string to convert.
596 * @len: the length of the string in bytes, or -1 if the string is
597 * nul-terminated (Note that some encodings may allow nul
598 * bytes to occur inside strings. In that case, using -1
599 * for the @len parameter is unsafe)
600 * @to_codeset: name of character set into which to convert @str
601 * @from_codeset: character set of @str.
602 * @fallback: UTF-8 string to use in place of characters not
603 * present in the target encoding. (The string must be
604 * representable in the target encoding).
605 * If %NULL, characters not in the target encoding will
606 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
607 * @bytes_read: (out) (optional): location to store the number of bytes in
608 * the input string that were successfully converted, or %NULL.
609 * Even if the conversion was successful, this may be
610 * less than @len if there were partial characters
611 * at the end of the input.
612 * @bytes_written: (out) (optional): the number of bytes stored in
613 * the output buffer (not including the terminating nul).
614 * @error: location to store the error occurring, or %NULL to ignore
615 * errors. Any of the errors in #GConvertError may occur.
616 *
617 * Converts a string from one character set to another, possibly
618 * including fallback sequences for characters not representable
619 * in the output. Note that it is not guaranteed that the specification
620 * for the fallback sequences in @fallback will be honored. Some
621 * systems may do an approximate conversion from @from_codeset
622 * to @to_codeset in their iconv() functions,
623 * in which case GLib will simply return that approximate conversion.
624 *
625 * Note that you should use g_iconv() for streaming conversions.
626 * Despite the fact that @bytes_read can return information about partial
627 * characters, the g_convert_... functions are not generally suitable
628 * for streaming. If the underlying converter maintains internal state,
629 * then this won't be preserved across successive calls to g_convert(),
630 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
631 * this is the GNU C converter for CP1255 which does not emit a base
632 * character until it knows that the next character is not a mark that
633 * could combine with the base character.)
634 *
635 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
636 * If the conversion was successful, a newly allocated buffer
637 * containing the converted string, which must be freed with g_free().
638 * Otherwise %NULL and @error will be set.
639 **/
640 gchar*
g_convert_with_fallback(const gchar * str,gssize len,const gchar * to_codeset,const gchar * from_codeset,const gchar * fallback,gsize * bytes_read,gsize * bytes_written,GError ** error)641 g_convert_with_fallback (const gchar *str,
642 gssize len,
643 const gchar *to_codeset,
644 const gchar *from_codeset,
645 const gchar *fallback,
646 gsize *bytes_read,
647 gsize *bytes_written,
648 GError **error)
649 {
650 gchar *utf8;
651 gchar *dest;
652 gchar *outp;
653 const gchar *insert_str = NULL;
654 const gchar *p;
655 gsize inbytes_remaining;
656 const gchar *save_p = NULL;
657 gsize save_inbytes = 0;
658 gsize outbytes_remaining;
659 gsize err;
660 GIConv cd;
661 gsize outbuf_size;
662 gboolean have_error = FALSE;
663 gboolean done = FALSE;
664
665 GError *local_error = NULL;
666
667 g_return_val_if_fail (str != NULL, NULL);
668 g_return_val_if_fail (to_codeset != NULL, NULL);
669 g_return_val_if_fail (from_codeset != NULL, NULL);
670
671 if (len < 0)
672 len = strlen (str);
673
674 /* Try an exact conversion; we only proceed if this fails
675 * due to an illegal sequence in the input string.
676 */
677 dest = g_convert (str, len, to_codeset, from_codeset,
678 bytes_read, bytes_written, &local_error);
679 if (!local_error)
680 return dest;
681
682 if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
683 {
684 g_propagate_error (error, local_error);
685 return NULL;
686 }
687 else
688 g_error_free (local_error);
689
690 local_error = NULL;
691
692 /* No go; to proceed, we need a converter from "UTF-8" to
693 * to_codeset, and the string as UTF-8.
694 */
695 cd = open_converter (to_codeset, "UTF-8", error);
696 if (cd == (GIConv) -1)
697 {
698 if (bytes_read)
699 *bytes_read = 0;
700
701 if (bytes_written)
702 *bytes_written = 0;
703
704 return NULL;
705 }
706
707 utf8 = g_convert (str, len, "UTF-8", from_codeset,
708 bytes_read, &inbytes_remaining, error);
709 if (!utf8)
710 {
711 close_converter (cd);
712 if (bytes_written)
713 *bytes_written = 0;
714 return NULL;
715 }
716
717 /* Now the heart of the code. We loop through the UTF-8 string, and
718 * whenever we hit an offending character, we form fallback, convert
719 * the fallback to the target codeset, and then go back to
720 * converting the original string after finishing with the fallback.
721 *
722 * The variables save_p and save_inbytes store the input state
723 * for the original string while we are converting the fallback
724 */
725 p = utf8;
726
727 outbuf_size = len + NUL_TERMINATOR_LENGTH;
728 outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH;
729 outp = dest = g_malloc (outbuf_size);
730
731 while (!done && !have_error)
732 {
733 gsize inbytes_tmp = inbytes_remaining;
734 err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
735 inbytes_remaining = inbytes_tmp;
736
737 if (err == (gsize) -1)
738 {
739 switch (errno)
740 {
741 case EINVAL:
742 g_assert_not_reached();
743 break;
744 case E2BIG:
745 {
746 gsize used = outp - dest;
747
748 outbuf_size *= 2;
749 dest = g_realloc (dest, outbuf_size);
750
751 outp = dest + used;
752 outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH;
753
754 break;
755 }
756 case EILSEQ:
757 if (save_p)
758 {
759 /* Error converting fallback string - fatal
760 */
761 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
762 _("Cannot convert fallback “%s” to codeset “%s”"),
763 insert_str, to_codeset);
764 have_error = TRUE;
765 break;
766 }
767 else if (p)
768 {
769 if (!fallback)
770 {
771 gunichar ch = g_utf8_get_char (p);
772 insert_str = g_strdup_printf (ch < 0x10000 ? "\\u%04x" : "\\U%08x",
773 ch);
774 }
775 else
776 insert_str = fallback;
777
778 save_p = g_utf8_next_char (p);
779 save_inbytes = inbytes_remaining - (save_p - p);
780 p = insert_str;
781 inbytes_remaining = strlen (p);
782 break;
783 }
784 /* if p is null */
785 G_GNUC_FALLTHROUGH;
786 default:
787 {
788 int errsv = errno;
789
790 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
791 _("Error during conversion: %s"),
792 g_strerror (errsv));
793 }
794
795 have_error = TRUE;
796 break;
797 }
798 }
799 else
800 {
801 if (save_p)
802 {
803 if (!fallback)
804 g_free ((gchar *)insert_str);
805 p = save_p;
806 inbytes_remaining = save_inbytes;
807 save_p = NULL;
808 }
809 else if (p)
810 {
811 /* call g_iconv with NULL inbuf to cleanup shift state */
812 p = NULL;
813 inbytes_remaining = 0;
814 }
815 else
816 done = TRUE;
817 }
818 }
819
820 /* Cleanup
821 */
822 memset (outp, 0, NUL_TERMINATOR_LENGTH);
823
824 close_converter (cd);
825
826 if (bytes_written)
827 *bytes_written = outp - dest; /* Doesn't include '\0' */
828
829 g_free (utf8);
830
831 if (have_error)
832 {
833 if (save_p && !fallback)
834 g_free ((gchar *)insert_str);
835 g_free (dest);
836 return NULL;
837 }
838 else
839 return dest;
840 }
841
842 /*
843 * g_locale_to_utf8
844 *
845 *
846 */
847
848 /*
849 * Validate @string as UTF-8. @len can be negative if @string is
850 * nul-terminated, or a non-negative value in bytes. If @string ends in an
851 * incomplete sequence, or contains any illegal sequences or nul codepoints,
852 * %NULL will be returned and the error set to
853 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
854 * On success, @bytes_read and @bytes_written, if provided, will be set to
855 * the number of bytes in @string up to @len or the terminating nul byte.
856 * On error, @bytes_read will be set to the byte offset after the last valid
857 * and non-nul UTF-8 sequence in @string, and @bytes_written will be set to 0.
858 */
859 static gchar *
strdup_len(const gchar * string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)860 strdup_len (const gchar *string,
861 gssize len,
862 gsize *bytes_read,
863 gsize *bytes_written,
864 GError **error)
865 {
866 gsize real_len;
867 const gchar *end_valid;
868
869 if (!g_utf8_validate (string, len, &end_valid))
870 {
871 if (bytes_read)
872 *bytes_read = end_valid - string;
873 if (bytes_written)
874 *bytes_written = 0;
875
876 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
877 _("Invalid byte sequence in conversion input"));
878 return NULL;
879 }
880
881 real_len = end_valid - string;
882
883 if (bytes_read)
884 *bytes_read = real_len;
885 if (bytes_written)
886 *bytes_written = real_len;
887
888 return g_strndup (string, real_len);
889 }
890
891 typedef enum
892 {
893 CONVERT_CHECK_NO_NULS_IN_INPUT = 1 << 0,
894 CONVERT_CHECK_NO_NULS_IN_OUTPUT = 1 << 1
895 } ConvertCheckFlags;
896
897 /*
898 * Convert from @string in the encoding identified by @from_codeset,
899 * returning a string in the encoding identifed by @to_codeset.
900 * @len can be negative if @string is nul-terminated, or a non-negative
901 * value in bytes. Flags defined in #ConvertCheckFlags can be set in @flags
902 * to check the input, the output, or both, for embedded nul bytes.
903 * On success, @bytes_read, if provided, will be set to the number of bytes
904 * in @string up to @len or the terminating nul byte, and @bytes_written, if
905 * provided, will be set to the number of output bytes written into the
906 * returned buffer, excluding the terminating nul sequence.
907 * On error, @bytes_read will be set to the byte offset after the last valid
908 * sequence in @string, and @bytes_written will be set to 0.
909 */
910 static gchar *
convert_checked(const gchar * string,gssize len,const gchar * to_codeset,const gchar * from_codeset,ConvertCheckFlags flags,gsize * bytes_read,gsize * bytes_written,GError ** error)911 convert_checked (const gchar *string,
912 gssize len,
913 const gchar *to_codeset,
914 const gchar *from_codeset,
915 ConvertCheckFlags flags,
916 gsize *bytes_read,
917 gsize *bytes_written,
918 GError **error)
919 {
920 gchar *out;
921 gsize outbytes;
922
923 if ((flags & CONVERT_CHECK_NO_NULS_IN_INPUT) && len > 0)
924 {
925 const gchar *early_nul = memchr (string, '\0', len);
926 if (early_nul != NULL)
927 {
928 if (bytes_read)
929 *bytes_read = early_nul - string;
930 if (bytes_written)
931 *bytes_written = 0;
932
933 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
934 _("Embedded NUL byte in conversion input"));
935 return NULL;
936 }
937 }
938
939 out = g_convert (string, len, to_codeset, from_codeset,
940 bytes_read, &outbytes, error);
941 if (out == NULL)
942 {
943 if (bytes_written)
944 *bytes_written = 0;
945 return NULL;
946 }
947
948 if ((flags & CONVERT_CHECK_NO_NULS_IN_OUTPUT)
949 && memchr (out, '\0', outbytes) != NULL)
950 {
951 g_free (out);
952 if (bytes_written)
953 *bytes_written = 0;
954 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL,
955 _("Embedded NUL byte in conversion output"));
956 return NULL;
957 }
958
959 if (bytes_written)
960 *bytes_written = outbytes;
961 return out;
962 }
963
964 /**
965 * g_locale_to_utf8:
966 * @opsysstring: (array length=len) (element-type guint8): a string in the
967 * encoding of the current locale. On Windows
968 * this means the system codepage.
969 * @len: the length of the string, or -1 if the string is
970 * nul-terminated (Note that some encodings may allow nul
971 * bytes to occur inside strings. In that case, using -1
972 * for the @len parameter is unsafe)
973 * @bytes_read: (out) (optional): location to store the number of bytes in the
974 * input string that were successfully converted, or %NULL.
975 * Even if the conversion was successful, this may be
976 * less than @len if there were partial characters
977 * at the end of the input. If the error
978 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
979 * stored will be the byte offset after the last valid
980 * input sequence.
981 * @bytes_written: (out) (optional): the number of bytes stored in the output
982 * buffer (not including the terminating nul).
983 * @error: location to store the error occurring, or %NULL to ignore
984 * errors. Any of the errors in #GConvertError may occur.
985 *
986 * Converts a string which is in the encoding used for strings by
987 * the C runtime (usually the same as that used by the operating
988 * system) in the [current locale][setlocale] into a UTF-8 string.
989 *
990 * If the source encoding is not UTF-8 and the conversion output contains a
991 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
992 * function returns %NULL.
993 * If the source encoding is UTF-8, an embedded nul character is treated with
994 * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
995 * earlier versions of this library. Use g_convert() to produce output that
996 * may contain embedded nul characters.
997 *
998 * Returns: (type utf8): The converted string, or %NULL on an error.
999 **/
1000 gchar *
g_locale_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1001 g_locale_to_utf8 (const gchar *opsysstring,
1002 gssize len,
1003 gsize *bytes_read,
1004 gsize *bytes_written,
1005 GError **error)
1006 {
1007 const char *charset;
1008
1009 if (g_get_charset (&charset))
1010 return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1011 else
1012 return convert_checked (opsysstring, len, "UTF-8", charset,
1013 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1014 bytes_read, bytes_written, error);
1015 }
1016
1017 /**
1018 * g_locale_from_utf8:
1019 * @utf8string: a UTF-8 encoded string
1020 * @len: the length of the string, or -1 if the string is
1021 * nul-terminated.
1022 * @bytes_read: (out) (optional): location to store the number of bytes in the
1023 * input string that were successfully converted, or %NULL.
1024 * Even if the conversion was successful, this may be
1025 * less than @len if there were partial characters
1026 * at the end of the input. If the error
1027 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1028 * stored will be the byte offset after the last valid
1029 * input sequence.
1030 * @bytes_written: (out) (optional): the number of bytes stored in the output
1031 * buffer (not including the terminating nul).
1032 * @error: location to store the error occurring, or %NULL to ignore
1033 * errors. Any of the errors in #GConvertError may occur.
1034 *
1035 * Converts a string from UTF-8 to the encoding used for strings by
1036 * the C runtime (usually the same as that used by the operating
1037 * system) in the [current locale][setlocale]. On Windows this means
1038 * the system codepage.
1039 *
1040 * The input string shall not contain nul characters even if the @len
1041 * argument is positive. A nul character found inside the string will result
1042 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
1043 * input that may contain embedded nul characters.
1044 *
1045 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
1046 * A newly-allocated buffer containing the converted string,
1047 * or %NULL on an error, and error will be set.
1048 **/
1049 gchar *
g_locale_from_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1050 g_locale_from_utf8 (const gchar *utf8string,
1051 gssize len,
1052 gsize *bytes_read,
1053 gsize *bytes_written,
1054 GError **error)
1055 {
1056 const gchar *charset;
1057
1058 if (g_get_charset (&charset))
1059 return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1060 else
1061 return convert_checked (utf8string, len, charset, "UTF-8",
1062 CONVERT_CHECK_NO_NULS_IN_INPUT,
1063 bytes_read, bytes_written, error);
1064 }
1065
1066 #ifndef G_PLATFORM_WIN32
1067
1068 typedef struct _GFilenameCharsetCache GFilenameCharsetCache;
1069
1070 struct _GFilenameCharsetCache {
1071 gboolean is_utf8;
1072 gchar *charset;
1073 gchar **filename_charsets;
1074 };
1075
1076 static void
filename_charset_cache_free(gpointer data)1077 filename_charset_cache_free (gpointer data)
1078 {
1079 GFilenameCharsetCache *cache = data;
1080 g_free (cache->charset);
1081 g_strfreev (cache->filename_charsets);
1082 g_free (cache);
1083 }
1084
1085 /**
1086 * g_get_filename_charsets:
1087 * @filename_charsets: (out) (transfer none) (array zero-terminated=1):
1088 * return location for the %NULL-terminated list of encoding names
1089 *
1090 * Determines the preferred character sets used for filenames.
1091 * The first character set from the @charsets is the filename encoding, the
1092 * subsequent character sets are used when trying to generate a displayable
1093 * representation of a filename, see g_filename_display_name().
1094 *
1095 * On Unix, the character sets are determined by consulting the
1096 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
1097 * On Windows, the character set used in the GLib API is always UTF-8
1098 * and said environment variables have no effect.
1099 *
1100 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
1101 * character set names. The special token "\@locale" is taken
1102 * to mean the character set for the [current locale][setlocale].
1103 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
1104 * the character set of the current locale is taken as the filename
1105 * encoding. If neither environment variable is set, UTF-8 is taken
1106 * as the filename encoding, but the character set of the current locale
1107 * is also put in the list of encodings.
1108 *
1109 * The returned @charsets belong to GLib and must not be freed.
1110 *
1111 * Note that on Unix, regardless of the locale character set or
1112 * `G_FILENAME_ENCODING` value, the actual file names present
1113 * on a system might be in any random encoding or just gibberish.
1114 *
1115 * Returns: %TRUE if the filename encoding is UTF-8.
1116 *
1117 * Since: 2.6
1118 */
1119 gboolean
g_get_filename_charsets(const gchar *** filename_charsets)1120 g_get_filename_charsets (const gchar ***filename_charsets)
1121 {
1122 static GPrivate cache_private = G_PRIVATE_INIT (filename_charset_cache_free);
1123 GFilenameCharsetCache *cache = g_private_get (&cache_private);
1124 const gchar *charset;
1125
1126 if (!cache)
1127 cache = g_private_set_alloc0 (&cache_private, sizeof (GFilenameCharsetCache));
1128
1129 g_get_charset (&charset);
1130
1131 if (!(cache->charset && strcmp (cache->charset, charset) == 0))
1132 {
1133 const gchar *new_charset;
1134 gchar *p;
1135 gint i;
1136
1137 g_free (cache->charset);
1138 g_strfreev (cache->filename_charsets);
1139 cache->charset = g_strdup (charset);
1140
1141 p = getenv ("G_FILENAME_ENCODING");
1142 if (p != NULL && p[0] != '\0')
1143 {
1144 cache->filename_charsets = g_strsplit (p, ",", 0);
1145 cache->is_utf8 = (strcmp (cache->filename_charsets[0], "UTF-8") == 0);
1146
1147 for (i = 0; cache->filename_charsets[i]; i++)
1148 {
1149 if (strcmp ("@locale", cache->filename_charsets[i]) == 0)
1150 {
1151 g_get_charset (&new_charset);
1152 g_free (cache->filename_charsets[i]);
1153 cache->filename_charsets[i] = g_strdup (new_charset);
1154 }
1155 }
1156 }
1157 else if (getenv ("G_BROKEN_FILENAMES") != NULL)
1158 {
1159 cache->filename_charsets = g_new0 (gchar *, 2);
1160 cache->is_utf8 = g_get_charset (&new_charset);
1161 cache->filename_charsets[0] = g_strdup (new_charset);
1162 }
1163 else
1164 {
1165 cache->filename_charsets = g_new0 (gchar *, 3);
1166 cache->is_utf8 = TRUE;
1167 cache->filename_charsets[0] = g_strdup ("UTF-8");
1168 if (!g_get_charset (&new_charset))
1169 cache->filename_charsets[1] = g_strdup (new_charset);
1170 }
1171 }
1172
1173 if (filename_charsets)
1174 *filename_charsets = (const gchar **)cache->filename_charsets;
1175
1176 return cache->is_utf8;
1177 }
1178
1179 #else /* G_PLATFORM_WIN32 */
1180
1181 gboolean
g_get_filename_charsets(const gchar *** filename_charsets)1182 g_get_filename_charsets (const gchar ***filename_charsets)
1183 {
1184 static const gchar *charsets[] = {
1185 "UTF-8",
1186 NULL
1187 };
1188
1189 #ifdef G_OS_WIN32
1190 /* On Windows GLib pretends that the filename charset is UTF-8 */
1191 if (filename_charsets)
1192 *filename_charsets = charsets;
1193
1194 return TRUE;
1195 #else
1196 gboolean result;
1197
1198 /* Cygwin works like before */
1199 result = g_get_charset (&(charsets[0]));
1200
1201 if (filename_charsets)
1202 *filename_charsets = charsets;
1203
1204 return result;
1205 #endif
1206 }
1207
1208 #endif /* G_PLATFORM_WIN32 */
1209
1210 static gboolean
get_filename_charset(const gchar ** filename_charset)1211 get_filename_charset (const gchar **filename_charset)
1212 {
1213 const gchar **charsets;
1214 gboolean is_utf8;
1215
1216 is_utf8 = g_get_filename_charsets (&charsets);
1217
1218 if (filename_charset)
1219 *filename_charset = charsets[0];
1220
1221 return is_utf8;
1222 }
1223
1224 /**
1225 * g_filename_to_utf8:
1226 * @opsysstring: (type filename): a string in the encoding for filenames
1227 * @len: the length of the string, or -1 if the string is
1228 * nul-terminated (Note that some encodings may allow nul
1229 * bytes to occur inside strings. In that case, using -1
1230 * for the @len parameter is unsafe)
1231 * @bytes_read: (out) (optional): location to store the number of bytes in the
1232 * input string that were successfully converted, or %NULL.
1233 * Even if the conversion was successful, this may be
1234 * less than @len if there were partial characters
1235 * at the end of the input. If the error
1236 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1237 * stored will be the byte offset after the last valid
1238 * input sequence.
1239 * @bytes_written: (out) (optional): the number of bytes stored in the output
1240 * buffer (not including the terminating nul).
1241 * @error: location to store the error occurring, or %NULL to ignore
1242 * errors. Any of the errors in #GConvertError may occur.
1243 *
1244 * Converts a string which is in the encoding used by GLib for
1245 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
1246 * for filenames; on other platforms, this function indirectly depends on
1247 * the [current locale][setlocale].
1248 *
1249 * The input string shall not contain nul characters even if the @len
1250 * argument is positive. A nul character found inside the string will result
1251 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
1252 * If the source encoding is not UTF-8 and the conversion output contains a
1253 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
1254 * function returns %NULL. Use g_convert() to produce output that
1255 * may contain embedded nul characters.
1256 *
1257 * Returns: (type utf8): The converted string, or %NULL on an error.
1258 **/
1259 gchar*
g_filename_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1260 g_filename_to_utf8 (const gchar *opsysstring,
1261 gssize len,
1262 gsize *bytes_read,
1263 gsize *bytes_written,
1264 GError **error)
1265 {
1266 const gchar *charset;
1267
1268 g_return_val_if_fail (opsysstring != NULL, NULL);
1269
1270 if (get_filename_charset (&charset))
1271 return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1272 else
1273 return convert_checked (opsysstring, len, "UTF-8", charset,
1274 CONVERT_CHECK_NO_NULS_IN_INPUT |
1275 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1276 bytes_read, bytes_written, error);
1277 }
1278
1279 /**
1280 * g_filename_from_utf8:
1281 * @utf8string: (type utf8): a UTF-8 encoded string.
1282 * @len: the length of the string, or -1 if the string is
1283 * nul-terminated.
1284 * @bytes_read: (out) (optional): location to store the number of bytes in
1285 * the input string that were successfully converted, or %NULL.
1286 * Even if the conversion was successful, this may be
1287 * less than @len if there were partial characters
1288 * at the end of the input. If the error
1289 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1290 * stored will be the byte offset after the last valid
1291 * input sequence.
1292 * @bytes_written: (out) (optional): the number of bytes stored in
1293 * the output buffer (not including the terminating nul).
1294 * @error: location to store the error occurring, or %NULL to ignore
1295 * errors. Any of the errors in #GConvertError may occur.
1296 *
1297 * Converts a string from UTF-8 to the encoding GLib uses for
1298 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
1299 * on other platforms, this function indirectly depends on the
1300 * [current locale][setlocale].
1301 *
1302 * The input string shall not contain nul characters even if the @len
1303 * argument is positive. A nul character found inside the string will result
1304 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. If the filename encoding is
1305 * not UTF-8 and the conversion output contains a nul character, the error
1306 * %G_CONVERT_ERROR_EMBEDDED_NUL is set and the function returns %NULL.
1307 *
1308 * Returns: (type filename):
1309 * The converted string, or %NULL on an error.
1310 **/
1311 gchar*
g_filename_from_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1312 g_filename_from_utf8 (const gchar *utf8string,
1313 gssize len,
1314 gsize *bytes_read,
1315 gsize *bytes_written,
1316 GError **error)
1317 {
1318 const gchar *charset;
1319
1320 if (get_filename_charset (&charset))
1321 return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1322 else
1323 return convert_checked (utf8string, len, charset, "UTF-8",
1324 CONVERT_CHECK_NO_NULS_IN_INPUT |
1325 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1326 bytes_read, bytes_written, error);
1327 }
1328
1329 /* Test of haystack has the needle prefix, comparing case
1330 * insensitive. haystack may be UTF-8, but needle must
1331 * contain only ascii. */
1332 static gboolean
has_case_prefix(const gchar * haystack,const gchar * needle)1333 has_case_prefix (const gchar *haystack, const gchar *needle)
1334 {
1335 const gchar *h, *n;
1336
1337 /* Eat one character at a time. */
1338 h = haystack;
1339 n = needle;
1340
1341 while (*n && *h &&
1342 g_ascii_tolower (*n) == g_ascii_tolower (*h))
1343 {
1344 n++;
1345 h++;
1346 }
1347
1348 return *n == '\0';
1349 }
1350
1351 typedef enum {
1352 UNSAFE_ALL = 0x1, /* Escape all unsafe characters */
1353 UNSAFE_ALLOW_PLUS = 0x2, /* Allows '+' */
1354 UNSAFE_PATH = 0x8, /* Allows '/', '&', '=', ':', '@', '+', '$' and ',' */
1355 UNSAFE_HOST = 0x10, /* Allows '/' and ':' and '@' */
1356 UNSAFE_SLASHES = 0x20 /* Allows all characters except for '/' and '%' */
1357 } UnsafeCharacterSet;
1358
1359 static const guchar acceptable[96] = {
1360 /* A table of the ASCII chars from space (32) to DEL (127) */
1361 /* ! " # $ % & ' ( ) * + , - . / */
1362 0x00,0x3F,0x20,0x20,0x28,0x00,0x2C,0x3F,0x3F,0x3F,0x3F,0x2A,0x28,0x3F,0x3F,0x1C,
1363 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
1364 0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x38,0x20,0x20,0x2C,0x20,0x20,
1365 /* @ A B C D E F G H I J K L M N O */
1366 0x38,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1367 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
1368 0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x20,0x3F,
1369 /* ` a b c d e f g h i j k l m n o */
1370 0x20,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1371 /* p q r s t u v w x y z { | } ~ DEL */
1372 0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x3F,0x20
1373 };
1374
1375 static const gchar hex[16] = "0123456789ABCDEF";
1376
1377 /* Note: This escape function works on file: URIs, but if you want to
1378 * escape something else, please read RFC-2396 */
1379 static gchar *
g_escape_uri_string(const gchar * string,UnsafeCharacterSet mask)1380 g_escape_uri_string (const gchar *string,
1381 UnsafeCharacterSet mask)
1382 {
1383 #define ACCEPTABLE(a) ((a)>=32 && (a)<128 && (acceptable[(a)-32] & use_mask))
1384
1385 const gchar *p;
1386 gchar *q;
1387 gchar *result;
1388 int c;
1389 gint unacceptable;
1390 UnsafeCharacterSet use_mask;
1391
1392 g_return_val_if_fail (mask == UNSAFE_ALL
1393 || mask == UNSAFE_ALLOW_PLUS
1394 || mask == UNSAFE_PATH
1395 || mask == UNSAFE_HOST
1396 || mask == UNSAFE_SLASHES, NULL);
1397
1398 unacceptable = 0;
1399 use_mask = mask;
1400 for (p = string; *p != '\0'; p++)
1401 {
1402 c = (guchar) *p;
1403 if (!ACCEPTABLE (c))
1404 unacceptable++;
1405 }
1406
1407 result = g_malloc (p - string + unacceptable * 2 + 1);
1408
1409 use_mask = mask;
1410 for (q = result, p = string; *p != '\0'; p++)
1411 {
1412 c = (guchar) *p;
1413
1414 if (!ACCEPTABLE (c))
1415 {
1416 *q++ = '%'; /* means hex coming */
1417 *q++ = hex[c >> 4];
1418 *q++ = hex[c & 15];
1419 }
1420 else
1421 *q++ = *p;
1422 }
1423
1424 *q = '\0';
1425
1426 return result;
1427 }
1428
1429
1430 static gchar *
g_escape_file_uri(const gchar * hostname,const gchar * pathname)1431 g_escape_file_uri (const gchar *hostname,
1432 const gchar *pathname)
1433 {
1434 char *escaped_hostname = NULL;
1435 char *escaped_path;
1436 char *res;
1437
1438 #ifdef G_OS_WIN32
1439 char *p, *backslash;
1440
1441 /* Turn backslashes into forward slashes. That's what Netscape
1442 * does, and they are actually more or less equivalent in Windows.
1443 */
1444
1445 pathname = g_strdup (pathname);
1446 p = (char *) pathname;
1447
1448 while ((backslash = strchr (p, '\\')) != NULL)
1449 {
1450 *backslash = '/';
1451 p = backslash + 1;
1452 }
1453 #endif
1454
1455 if (hostname && *hostname != '\0')
1456 {
1457 escaped_hostname = g_escape_uri_string (hostname, UNSAFE_HOST);
1458 }
1459
1460 escaped_path = g_escape_uri_string (pathname, UNSAFE_PATH);
1461
1462 res = g_strconcat ("file://",
1463 (escaped_hostname) ? escaped_hostname : "",
1464 (*escaped_path != '/') ? "/" : "",
1465 escaped_path,
1466 NULL);
1467
1468 #ifdef G_OS_WIN32
1469 g_free ((char *) pathname);
1470 #endif
1471
1472 g_free (escaped_hostname);
1473 g_free (escaped_path);
1474
1475 return res;
1476 }
1477
1478 static int
unescape_character(const char * scanner)1479 unescape_character (const char *scanner)
1480 {
1481 int first_digit;
1482 int second_digit;
1483
1484 first_digit = g_ascii_xdigit_value (scanner[0]);
1485 if (first_digit < 0)
1486 return -1;
1487
1488 second_digit = g_ascii_xdigit_value (scanner[1]);
1489 if (second_digit < 0)
1490 return -1;
1491
1492 return (first_digit << 4) | second_digit;
1493 }
1494
1495 static gchar *
g_unescape_uri_string(const char * escaped,int len,const char * illegal_escaped_characters,gboolean ascii_must_not_be_escaped)1496 g_unescape_uri_string (const char *escaped,
1497 int len,
1498 const char *illegal_escaped_characters,
1499 gboolean ascii_must_not_be_escaped)
1500 {
1501 const gchar *in, *in_end;
1502 gchar *out, *result;
1503 int c;
1504
1505 if (escaped == NULL)
1506 return NULL;
1507
1508 if (len < 0)
1509 len = strlen (escaped);
1510
1511 result = g_malloc (len + 1);
1512
1513 out = result;
1514 for (in = escaped, in_end = escaped + len; in < in_end; in++)
1515 {
1516 c = *in;
1517
1518 if (c == '%')
1519 {
1520 /* catch partial escape sequences past the end of the substring */
1521 if (in + 3 > in_end)
1522 break;
1523
1524 c = unescape_character (in + 1);
1525
1526 /* catch bad escape sequences and NUL characters */
1527 if (c <= 0)
1528 break;
1529
1530 /* catch escaped ASCII */
1531 if (ascii_must_not_be_escaped && c <= 0x7F)
1532 break;
1533
1534 /* catch other illegal escaped characters */
1535 if (strchr (illegal_escaped_characters, c) != NULL)
1536 break;
1537
1538 in += 2;
1539 }
1540
1541 *out++ = c;
1542 }
1543
1544 g_assert (out - result <= len);
1545 *out = '\0';
1546
1547 if (in != in_end)
1548 {
1549 g_free (result);
1550 return NULL;
1551 }
1552
1553 return result;
1554 }
1555
1556 static gboolean
is_asciialphanum(gunichar c)1557 is_asciialphanum (gunichar c)
1558 {
1559 return c <= 0x7F && g_ascii_isalnum (c);
1560 }
1561
1562 static gboolean
is_asciialpha(gunichar c)1563 is_asciialpha (gunichar c)
1564 {
1565 return c <= 0x7F && g_ascii_isalpha (c);
1566 }
1567
1568 /* allows an empty string */
1569 static gboolean
hostname_validate(const char * hostname)1570 hostname_validate (const char *hostname)
1571 {
1572 const char *p;
1573 gunichar c, first_char, last_char;
1574
1575 p = hostname;
1576 if (*p == '\0')
1577 return TRUE;
1578 do
1579 {
1580 /* read in a label */
1581 c = g_utf8_get_char (p);
1582 p = g_utf8_next_char (p);
1583 if (!is_asciialphanum (c))
1584 return FALSE;
1585 first_char = c;
1586 do
1587 {
1588 last_char = c;
1589 c = g_utf8_get_char (p);
1590 p = g_utf8_next_char (p);
1591 }
1592 while (is_asciialphanum (c) || c == '-');
1593 if (last_char == '-')
1594 return FALSE;
1595
1596 /* if that was the last label, check that it was a toplabel */
1597 if (c == '\0' || (c == '.' && *p == '\0'))
1598 return is_asciialpha (first_char);
1599 }
1600 while (c == '.');
1601 return FALSE;
1602 }
1603
1604 /**
1605 * g_filename_from_uri:
1606 * @uri: a uri describing a filename (escaped, encoded in ASCII).
1607 * @hostname: (out) (optional) (nullable): Location to store hostname for the URI.
1608 * If there is no hostname in the URI, %NULL will be
1609 * stored in this location.
1610 * @error: location to store the error occurring, or %NULL to ignore
1611 * errors. Any of the errors in #GConvertError may occur.
1612 *
1613 * Converts an escaped ASCII-encoded URI to a local filename in the
1614 * encoding used for filenames.
1615 *
1616 * Returns: (type filename): a newly-allocated string holding
1617 * the resulting filename, or %NULL on an error.
1618 **/
1619 gchar *
g_filename_from_uri(const gchar * uri,gchar ** hostname,GError ** error)1620 g_filename_from_uri (const gchar *uri,
1621 gchar **hostname,
1622 GError **error)
1623 {
1624 const char *path_part;
1625 const char *host_part;
1626 char *unescaped_hostname;
1627 char *result;
1628 char *filename;
1629 int offs;
1630 #ifdef G_OS_WIN32
1631 char *p, *slash;
1632 #endif
1633
1634 if (hostname)
1635 *hostname = NULL;
1636
1637 if (!has_case_prefix (uri, "file:/"))
1638 {
1639 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1640 _("The URI “%s” is not an absolute URI using the “file” scheme"),
1641 uri);
1642 return NULL;
1643 }
1644
1645 path_part = uri + strlen ("file:");
1646
1647 if (strchr (path_part, '#') != NULL)
1648 {
1649 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1650 _("The local file URI “%s” may not include a “#”"),
1651 uri);
1652 return NULL;
1653 }
1654
1655 if (has_case_prefix (path_part, "///"))
1656 path_part += 2;
1657 else if (has_case_prefix (path_part, "//"))
1658 {
1659 path_part += 2;
1660 host_part = path_part;
1661
1662 path_part = strchr (path_part, '/');
1663
1664 if (path_part == NULL)
1665 {
1666 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1667 _("The URI “%s” is invalid"),
1668 uri);
1669 return NULL;
1670 }
1671
1672 unescaped_hostname = g_unescape_uri_string (host_part, path_part - host_part, "", TRUE);
1673
1674 if (unescaped_hostname == NULL ||
1675 !hostname_validate (unescaped_hostname))
1676 {
1677 g_free (unescaped_hostname);
1678 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1679 _("The hostname of the URI “%s” is invalid"),
1680 uri);
1681 return NULL;
1682 }
1683
1684 if (hostname)
1685 *hostname = unescaped_hostname;
1686 else
1687 g_free (unescaped_hostname);
1688 }
1689
1690 filename = g_unescape_uri_string (path_part, -1, "/", FALSE);
1691
1692 if (filename == NULL)
1693 {
1694 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1695 _("The URI “%s” contains invalidly escaped characters"),
1696 uri);
1697 return NULL;
1698 }
1699
1700 offs = 0;
1701 #ifdef G_OS_WIN32
1702 /* Drop localhost */
1703 if (hostname && *hostname != NULL &&
1704 g_ascii_strcasecmp (*hostname, "localhost") == 0)
1705 {
1706 g_free (*hostname);
1707 *hostname = NULL;
1708 }
1709
1710 /* Turn slashes into backslashes, because that's the canonical spelling */
1711 p = filename;
1712 while ((slash = strchr (p, '/')) != NULL)
1713 {
1714 *slash = '\\';
1715 p = slash + 1;
1716 }
1717
1718 /* Windows URIs with a drive letter can be like "file://host/c:/foo"
1719 * or "file://host/c|/foo" (some Netscape versions). In those cases, start
1720 * the filename from the drive letter.
1721 */
1722 if (g_ascii_isalpha (filename[1]))
1723 {
1724 if (filename[2] == ':')
1725 offs = 1;
1726 else if (filename[2] == '|')
1727 {
1728 filename[2] = ':';
1729 offs = 1;
1730 }
1731 }
1732 #endif
1733
1734 result = g_strdup (filename + offs);
1735 g_free (filename);
1736
1737 return result;
1738 }
1739
1740 /**
1741 * g_filename_to_uri:
1742 * @filename: (type filename): an absolute filename specified in the GLib file
1743 * name encoding, which is the on-disk file name bytes on Unix, and UTF-8
1744 * on Windows
1745 * @hostname: (nullable): A UTF-8 encoded hostname, or %NULL for none.
1746 * @error: location to store the error occurring, or %NULL to ignore
1747 * errors. Any of the errors in #GConvertError may occur.
1748 *
1749 * Converts an absolute filename to an escaped ASCII-encoded URI, with the path
1750 * component following Section 3.3. of RFC 2396.
1751 *
1752 * Returns: a newly-allocated string holding the resulting
1753 * URI, or %NULL on an error.
1754 **/
1755 gchar *
g_filename_to_uri(const gchar * filename,const gchar * hostname,GError ** error)1756 g_filename_to_uri (const gchar *filename,
1757 const gchar *hostname,
1758 GError **error)
1759 {
1760 char *escaped_uri;
1761
1762 g_return_val_if_fail (filename != NULL, NULL);
1763
1764 if (!g_path_is_absolute (filename))
1765 {
1766 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
1767 _("The pathname “%s” is not an absolute path"),
1768 filename);
1769 return NULL;
1770 }
1771
1772 if (hostname &&
1773 !(g_utf8_validate (hostname, -1, NULL)
1774 && hostname_validate (hostname)))
1775 {
1776 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1777 _("Invalid hostname"));
1778 return NULL;
1779 }
1780
1781 #ifdef G_OS_WIN32
1782 /* Don't use localhost unnecessarily */
1783 if (hostname && g_ascii_strcasecmp (hostname, "localhost") == 0)
1784 hostname = NULL;
1785 #endif
1786
1787 escaped_uri = g_escape_file_uri (hostname, filename);
1788
1789 return escaped_uri;
1790 }
1791
1792 /**
1793 * g_uri_list_extract_uris:
1794 * @uri_list: an URI list
1795 *
1796 * Splits an URI list conforming to the text/uri-list
1797 * mime type defined in RFC 2483 into individual URIs,
1798 * discarding any comments. The URIs are not validated.
1799 *
1800 * Returns: (transfer full): a newly allocated %NULL-terminated list
1801 * of strings holding the individual URIs. The array should be freed
1802 * with g_strfreev().
1803 *
1804 * Since: 2.6
1805 */
1806 gchar **
g_uri_list_extract_uris(const gchar * uri_list)1807 g_uri_list_extract_uris (const gchar *uri_list)
1808 {
1809 GSList *uris, *u;
1810 const gchar *p, *q;
1811 gchar **result;
1812 gint n_uris = 0;
1813
1814 uris = NULL;
1815
1816 p = uri_list;
1817
1818 /* We don't actually try to validate the URI according to RFC
1819 * 2396, or even check for allowed characters - we just ignore
1820 * comments and trim whitespace off the ends. We also
1821 * allow LF delimination as well as the specified CRLF.
1822 *
1823 * We do allow comments like specified in RFC 2483.
1824 */
1825 while (p)
1826 {
1827 if (*p != '#')
1828 {
1829 while (g_ascii_isspace (*p))
1830 p++;
1831
1832 q = p;
1833 while (*q && (*q != '\n') && (*q != '\r'))
1834 q++;
1835
1836 if (q > p)
1837 {
1838 q--;
1839 while (q > p && g_ascii_isspace (*q))
1840 q--;
1841
1842 if (q > p)
1843 {
1844 uris = g_slist_prepend (uris, g_strndup (p, q - p + 1));
1845 n_uris++;
1846 }
1847 }
1848 }
1849 p = strchr (p, '\n');
1850 if (p)
1851 p++;
1852 }
1853
1854 result = g_new (gchar *, n_uris + 1);
1855
1856 result[n_uris--] = NULL;
1857 for (u = uris; u; u = u->next)
1858 result[n_uris--] = u->data;
1859
1860 g_slist_free (uris);
1861
1862 return result;
1863 }
1864
1865 /**
1866 * g_filename_display_basename:
1867 * @filename: (type filename): an absolute pathname in the
1868 * GLib file name encoding
1869 *
1870 * Returns the display basename for the particular filename, guaranteed
1871 * to be valid UTF-8. The display name might not be identical to the filename,
1872 * for instance there might be problems converting it to UTF-8, and some files
1873 * can be translated in the display.
1874 *
1875 * If GLib cannot make sense of the encoding of @filename, as a last resort it
1876 * replaces unknown characters with U+FFFD, the Unicode replacement character.
1877 * You can search the result for the UTF-8 encoding of this character (which is
1878 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
1879 * encoding.
1880 *
1881 * You must pass the whole absolute pathname to this functions so that
1882 * translation of well known locations can be done.
1883 *
1884 * This function is preferred over g_filename_display_name() if you know the
1885 * whole path, as it allows translation.
1886 *
1887 * Returns: a newly allocated string containing
1888 * a rendition of the basename of the filename in valid UTF-8
1889 *
1890 * Since: 2.6
1891 **/
1892 gchar *
g_filename_display_basename(const gchar * filename)1893 g_filename_display_basename (const gchar *filename)
1894 {
1895 char *basename;
1896 char *display_name;
1897
1898 g_return_val_if_fail (filename != NULL, NULL);
1899
1900 basename = g_path_get_basename (filename);
1901 display_name = g_filename_display_name (basename);
1902 g_free (basename);
1903 return display_name;
1904 }
1905
1906 /**
1907 * g_filename_display_name:
1908 * @filename: (type filename): a pathname hopefully in the
1909 * GLib file name encoding
1910 *
1911 * Converts a filename into a valid UTF-8 string. The conversion is
1912 * not necessarily reversible, so you should keep the original around
1913 * and use the return value of this function only for display purposes.
1914 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
1915 * even if the filename actually isn't in the GLib file name encoding.
1916 *
1917 * If GLib cannot make sense of the encoding of @filename, as a last resort it
1918 * replaces unknown characters with U+FFFD, the Unicode replacement character.
1919 * You can search the result for the UTF-8 encoding of this character (which is
1920 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
1921 * encoding.
1922 *
1923 * If you know the whole pathname of the file you should use
1924 * g_filename_display_basename(), since that allows location-based
1925 * translation of filenames.
1926 *
1927 * Returns: a newly allocated string containing
1928 * a rendition of the filename in valid UTF-8
1929 *
1930 * Since: 2.6
1931 **/
1932 gchar *
g_filename_display_name(const gchar * filename)1933 g_filename_display_name (const gchar *filename)
1934 {
1935 gint i;
1936 const gchar **charsets;
1937 gchar *display_name = NULL;
1938 gboolean is_utf8;
1939
1940 is_utf8 = g_get_filename_charsets (&charsets);
1941
1942 if (is_utf8)
1943 {
1944 if (g_utf8_validate (filename, -1, NULL))
1945 display_name = g_strdup (filename);
1946 }
1947
1948 if (!display_name)
1949 {
1950 /* Try to convert from the filename charsets to UTF-8.
1951 * Skip the first charset if it is UTF-8.
1952 */
1953 for (i = is_utf8 ? 1 : 0; charsets[i]; i++)
1954 {
1955 display_name = g_convert (filename, -1, "UTF-8", charsets[i],
1956 NULL, NULL, NULL);
1957
1958 if (display_name)
1959 break;
1960 }
1961 }
1962
1963 /* if all conversions failed, we replace invalid UTF-8
1964 * by a question mark
1965 */
1966 if (!display_name)
1967 display_name = g_utf8_make_valid (filename, -1);
1968
1969 return display_name;
1970 }
1971
1972 #ifdef G_OS_WIN32
1973
1974 /* Binary compatibility versions. Not for newly compiled code. */
1975
1976 _GLIB_EXTERN gchar *g_filename_to_utf8_utf8 (const gchar *opsysstring,
1977 gssize len,
1978 gsize *bytes_read,
1979 gsize *bytes_written,
1980 GError **error) G_GNUC_MALLOC;
1981 _GLIB_EXTERN gchar *g_filename_from_utf8_utf8 (const gchar *utf8string,
1982 gssize len,
1983 gsize *bytes_read,
1984 gsize *bytes_written,
1985 GError **error) G_GNUC_MALLOC;
1986 _GLIB_EXTERN gchar *g_filename_from_uri_utf8 (const gchar *uri,
1987 gchar **hostname,
1988 GError **error) G_GNUC_MALLOC;
1989 _GLIB_EXTERN gchar *g_filename_to_uri_utf8 (const gchar *filename,
1990 const gchar *hostname,
1991 GError **error) G_GNUC_MALLOC;
1992
1993 gchar *
g_filename_to_utf8_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1994 g_filename_to_utf8_utf8 (const gchar *opsysstring,
1995 gssize len,
1996 gsize *bytes_read,
1997 gsize *bytes_written,
1998 GError **error)
1999 {
2000 return g_filename_to_utf8 (opsysstring, len, bytes_read, bytes_written, error);
2001 }
2002
2003 gchar *
g_filename_from_utf8_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)2004 g_filename_from_utf8_utf8 (const gchar *utf8string,
2005 gssize len,
2006 gsize *bytes_read,
2007 gsize *bytes_written,
2008 GError **error)
2009 {
2010 return g_filename_from_utf8 (utf8string, len, bytes_read, bytes_written, error);
2011 }
2012
2013 gchar *
g_filename_from_uri_utf8(const gchar * uri,gchar ** hostname,GError ** error)2014 g_filename_from_uri_utf8 (const gchar *uri,
2015 gchar **hostname,
2016 GError **error)
2017 {
2018 return g_filename_from_uri (uri, hostname, error);
2019 }
2020
2021 gchar *
g_filename_to_uri_utf8(const gchar * filename,const gchar * hostname,GError ** error)2022 g_filename_to_uri_utf8 (const gchar *filename,
2023 const gchar *hostname,
2024 GError **error)
2025 {
2026 return g_filename_to_uri (filename, hostname, error);
2027 }
2028
2029 #endif
2030