• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GLIB - Library of useful routines for C programming
2  *
3  * gconvert.c: Convert between character sets using iconv
4  * Copyright Red Hat Inc., 2000
5  * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com>
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the
19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20  * Boston, MA 02111-1307, USA.
21  */
22 
23 #include "config.h"
24 
25 #include "glib.h"
26 
27 
28 #ifndef ANDROID_STUB
29 #ifndef G_OS_WIN32
30 #include <iconv.h>
31 #endif
32 #endif
33 
34 #include <errno.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <stdlib.h>
38 
39 #include "gprintfint.h"
40 #include "gthreadprivate.h"
41 #include "gunicode.h"
42 
43 #ifdef G_OS_WIN32
44 #include "win_iconv.c"
45 #endif
46 
47 #ifdef G_PLATFORM_WIN32
48 #define STRICT
49 #include <windows.h>
50 #undef STRICT
51 #endif
52 
53 #include "glibintl.h"
54 
55 #if defined(USE_LIBICONV_GNU) && !defined (_LIBICONV_H)
56 #error GNU libiconv in use but included iconv.h not from libiconv
57 #endif
58 #if !defined(USE_LIBICONV_GNU) && defined (_LIBICONV_H)
59 #error GNU libiconv not in use but included iconv.h is from libiconv
60 #endif
61 
62 #include "galias.h"
63 
64 typedef void iconv_t;
65 
66 GQuark
g_convert_error_quark(void)67 g_convert_error_quark (void)
68 {
69   return g_quark_from_static_string ("g_convert_error");
70 }
71 
72 static gboolean
try_conversion(const char * to_codeset,const char * from_codeset,iconv_t * cd)73 try_conversion (const char *to_codeset,
74 		const char *from_codeset,
75 		iconv_t    *cd)
76 {
77 #ifndef ANDROID_STUB
78   *cd = iconv_open (to_codeset, from_codeset);
79 
80   if (*cd == (iconv_t)-1 && errno == EINVAL)
81     return FALSE;
82   else
83     return TRUE;
84 #else
85   return FALSE;
86 #endif
87 }
88 
89 static gboolean
try_to_aliases(const char ** to_aliases,const char * from_codeset,iconv_t * cd)90 try_to_aliases (const char **to_aliases,
91 		const char  *from_codeset,
92 		iconv_t     *cd)
93 {
94   if (to_aliases)
95     {
96       const char **p = to_aliases;
97       while (*p)
98 	{
99 	  if (try_conversion (*p, from_codeset, cd))
100 	    return TRUE;
101 
102 	  p++;
103 	}
104     }
105 
106   return FALSE;
107 }
108 
109 #ifndef ANDROID_STUB
110 G_GNUC_INTERNAL extern const char **
111 _g_charset_get_aliases (const char *canonical_name);
112 #endif
113 
114 /**
115  * g_iconv_open:
116  * @to_codeset: destination codeset
117  * @from_codeset: source codeset
118  *
119  * Same as the standard UNIX routine iconv_open(), but
120  * may be implemented via libiconv on UNIX flavors that lack
121  * a native implementation.
122  *
123  * GLib provides g_convert() and g_locale_to_utf8() which are likely
124  * more convenient than the raw iconv wrappers.
125  *
126  * Return value: a "conversion descriptor", or (GIConv)-1 if
127  *  opening the converter failed.
128  **/
129 GIConv
g_iconv_open(const gchar * to_codeset,const gchar * from_codeset)130 g_iconv_open (const gchar  *to_codeset,
131 	      const gchar  *from_codeset)
132 {
133 #ifndef ANDROID_STUB
134   iconv_t cd;
135 
136   if (!try_conversion (to_codeset, from_codeset, &cd))
137     {
138       const char **to_aliases = _g_charset_get_aliases (to_codeset);
139       const char **from_aliases = _g_charset_get_aliases (from_codeset);
140 
141       if (from_aliases)
142 	{
143 	  const char **p = from_aliases;
144 	  while (*p)
145 	    {
146 	      if (try_conversion (to_codeset, *p, &cd))
147 		goto out;
148 
149 	      if (try_to_aliases (to_aliases, *p, &cd))
150 		goto out;
151 
152 	      p++;
153 	    }
154 	}
155 
156       if (try_to_aliases (to_aliases, from_codeset, &cd))
157 	goto out;
158     }
159 
160  out:
161   return (cd == (iconv_t)-1) ? (GIConv)-1 : (GIConv)cd;
162 #else
163   return (GIConv) -1;
164 #endif
165 }
166 
167 /**
168  * g_iconv:
169  * @converter: conversion descriptor from g_iconv_open()
170  * @inbuf: bytes to convert
171  * @inbytes_left: inout parameter, bytes remaining to convert in @inbuf
172  * @outbuf: converted output bytes
173  * @outbytes_left: inout parameter, bytes available to fill in @outbuf
174  *
175  * Same as the standard UNIX routine iconv(), but
176  * may be implemented via libiconv on UNIX flavors that lack
177  * a native implementation.
178  *
179  * GLib provides g_convert() and g_locale_to_utf8() which are likely
180  * more convenient than the raw iconv wrappers.
181  *
182  * Return value: count of non-reversible conversions, or -1 on error
183  **/
184 gsize
g_iconv(GIConv converter,gchar ** inbuf,gsize * inbytes_left,gchar ** outbuf,gsize * outbytes_left)185 g_iconv (GIConv   converter,
186 	 gchar  **inbuf,
187 	 gsize   *inbytes_left,
188 	 gchar  **outbuf,
189 	 gsize   *outbytes_left)
190 {
191 #ifndef ANDROID_STUB
192   iconv_t cd = (iconv_t)converter;
193 
194   return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
195 #else
196   return -1;
197 #endif
198 }
199 
200 /**
201  * g_iconv_close:
202  * @converter: a conversion descriptor from g_iconv_open()
203  *
204  * Same as the standard UNIX routine iconv_close(), but
205  * may be implemented via libiconv on UNIX flavors that lack
206  * a native implementation. Should be called to clean up
207  * the conversion descriptor from g_iconv_open() when
208  * you are done converting things.
209  *
210  * GLib provides g_convert() and g_locale_to_utf8() which are likely
211  * more convenient than the raw iconv wrappers.
212  *
213  * Return value: -1 on error, 0 on success
214  **/
215 gint
g_iconv_close(GIConv converter)216 g_iconv_close (GIConv converter)
217 {
218 #ifndef ANDROID_STUB
219   iconv_t cd = (iconv_t)converter;
220 
221   return iconv_close (cd);
222 #else
223   return -1;
224 #endif
225 }
226 
227 
228 #ifdef NEED_ICONV_CACHE
229 
230 #define ICONV_CACHE_SIZE   (16)
231 
232 struct _iconv_cache_bucket {
233   gchar *key;
234   guint32 refcount;
235   gboolean used;
236   GIConv cd;
237 };
238 
239 static GList *iconv_cache_list;
240 static GHashTable *iconv_cache;
241 static GHashTable *iconv_open_hash;
242 static guint iconv_cache_size = 0;
243 G_LOCK_DEFINE_STATIC (iconv_cache_lock);
244 
245 /* caller *must* hold the iconv_cache_lock */
246 static void
iconv_cache_init(void)247 iconv_cache_init (void)
248 {
249   static gboolean initialized = FALSE;
250 
251   if (initialized)
252     return;
253 
254   iconv_cache_list = NULL;
255   iconv_cache = g_hash_table_new (g_str_hash, g_str_equal);
256   iconv_open_hash = g_hash_table_new (g_direct_hash, g_direct_equal);
257 
258   initialized = TRUE;
259 }
260 
261 
262 /*
263  * iconv_cache_bucket_new:
264  * @key: cache key
265  * @cd: iconv descriptor
266  *
267  * Creates a new cache bucket, inserts it into the cache and
268  * increments the cache size.
269  *
270  * This assumes ownership of @key.
271  *
272  * Returns a pointer to the newly allocated cache bucket.
273  **/
274 static struct _iconv_cache_bucket *
iconv_cache_bucket_new(gchar * key,GIConv cd)275 iconv_cache_bucket_new (gchar *key, GIConv cd)
276 {
277   struct _iconv_cache_bucket *bucket;
278 
279   bucket = g_new (struct _iconv_cache_bucket, 1);
280   bucket->key = key;
281   bucket->refcount = 1;
282   bucket->used = TRUE;
283   bucket->cd = cd;
284 
285   g_hash_table_insert (iconv_cache, bucket->key, bucket);
286 
287   /* FIXME: if we sorted the list so items with few refcounts were
288      first, then we could expire them faster in iconv_cache_expire_unused () */
289   iconv_cache_list = g_list_prepend (iconv_cache_list, bucket);
290 
291   iconv_cache_size++;
292 
293   return bucket;
294 }
295 
296 
297 /*
298  * iconv_cache_bucket_expire:
299  * @node: cache bucket's node
300  * @bucket: cache bucket
301  *
302  * Expires a single cache bucket @bucket. This should only ever be
303  * called on a bucket that currently has no used iconv descriptors
304  * open.
305  *
306  * @node is not a required argument. If @node is not supplied, we
307  * search for it ourselves.
308  **/
309 static void
iconv_cache_bucket_expire(GList * node,struct _iconv_cache_bucket * bucket)310 iconv_cache_bucket_expire (GList *node, struct _iconv_cache_bucket *bucket)
311 {
312   g_hash_table_remove (iconv_cache, bucket->key);
313 
314   if (node == NULL)
315     node = g_list_find (iconv_cache_list, bucket);
316 
317   g_assert (node != NULL);
318 
319   if (node->prev)
320     {
321       node->prev->next = node->next;
322       if (node->next)
323         node->next->prev = node->prev;
324     }
325   else
326     {
327       iconv_cache_list = node->next;
328       if (node->next)
329         node->next->prev = NULL;
330     }
331 
332   g_list_free_1 (node);
333 
334   g_free (bucket->key);
335   g_iconv_close (bucket->cd);
336   g_free (bucket);
337 
338   iconv_cache_size--;
339 }
340 
341 
342 /*
343  * iconv_cache_expire_unused:
344  *
345  * Expires as many unused cache buckets as it needs to in order to get
346  * the total number of buckets < ICONV_CACHE_SIZE.
347  **/
348 static void
iconv_cache_expire_unused(void)349 iconv_cache_expire_unused (void)
350 {
351   struct _iconv_cache_bucket *bucket;
352   GList *node, *next;
353 
354   node = iconv_cache_list;
355   while (node && iconv_cache_size >= ICONV_CACHE_SIZE)
356     {
357       next = node->next;
358 
359       bucket = node->data;
360       if (bucket->refcount == 0)
361         iconv_cache_bucket_expire (node, bucket);
362 
363       node = next;
364     }
365 }
366 
367 static GIConv
open_converter(const gchar * to_codeset,const gchar * from_codeset,GError ** error)368 open_converter (const gchar *to_codeset,
369 		const gchar *from_codeset,
370 		GError     **error)
371 {
372   struct _iconv_cache_bucket *bucket;
373   gchar *key, *dyn_key, auto_key[80];
374   GIConv cd;
375   gsize len_from_codeset, len_to_codeset;
376 
377   /* create our key */
378   len_from_codeset = strlen (from_codeset);
379   len_to_codeset = strlen (to_codeset);
380   if (len_from_codeset + len_to_codeset + 2 < sizeof (auto_key))
381     {
382       key = auto_key;
383       dyn_key = NULL;
384     }
385   else
386     key = dyn_key = g_malloc (len_from_codeset + len_to_codeset + 2);
387   memcpy (key, from_codeset, len_from_codeset);
388   key[len_from_codeset] = ':';
389   strcpy (key + len_from_codeset + 1, to_codeset);
390 
391   G_LOCK (iconv_cache_lock);
392 
393   /* make sure the cache has been initialized */
394   iconv_cache_init ();
395 
396   bucket = g_hash_table_lookup (iconv_cache, key);
397   if (bucket)
398     {
399       g_free (dyn_key);
400 
401       if (bucket->used)
402         {
403           cd = g_iconv_open (to_codeset, from_codeset);
404           if (cd == (GIConv) -1)
405             goto error;
406         }
407       else
408         {
409 	  /* Apparently iconv on Solaris <= 7 segfaults if you pass in
410 	   * NULL for anything but inbuf; work around that. (NULL outbuf
411 	   * or NULL *outbuf is allowed by Unix98.)
412 	   */
413 	  gsize inbytes_left = 0;
414 	  gchar *outbuf = NULL;
415 	  gsize outbytes_left = 0;
416 
417           cd = bucket->cd;
418           bucket->used = TRUE;
419 
420           /* reset the descriptor */
421           g_iconv (cd, NULL, &inbytes_left, &outbuf, &outbytes_left);
422         }
423 
424       bucket->refcount++;
425     }
426   else
427     {
428       cd = g_iconv_open (to_codeset, from_codeset);
429       if (cd == (GIConv) -1)
430 	{
431 	  g_free (dyn_key);
432 	  goto error;
433 	}
434 
435       iconv_cache_expire_unused ();
436 
437       bucket = iconv_cache_bucket_new (dyn_key ? dyn_key : g_strdup (key), cd);
438     }
439 
440   g_hash_table_insert (iconv_open_hash, cd, bucket->key);
441 
442   G_UNLOCK (iconv_cache_lock);
443 
444   return cd;
445 
446  error:
447 
448   G_UNLOCK (iconv_cache_lock);
449 
450   /* Something went wrong.  */
451   if (error)
452     {
453       if (errno == EINVAL)
454 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
455 		     _("Conversion from character set '%s' to '%s' is not supported"),
456 		     from_codeset, to_codeset);
457       else
458 	g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
459 		     _("Could not open converter from '%s' to '%s'"),
460 		     from_codeset, to_codeset);
461     }
462 
463   return cd;
464 }
465 
466 static int
close_converter(GIConv converter)467 close_converter (GIConv converter)
468 {
469   struct _iconv_cache_bucket *bucket;
470   const gchar *key;
471   GIConv cd;
472 
473   cd = converter;
474 
475   if (cd == (GIConv) -1)
476     return 0;
477 
478   G_LOCK (iconv_cache_lock);
479 
480   key = g_hash_table_lookup (iconv_open_hash, cd);
481   if (key)
482     {
483       g_hash_table_remove (iconv_open_hash, cd);
484 
485       bucket = g_hash_table_lookup (iconv_cache, key);
486       g_assert (bucket);
487 
488       bucket->refcount--;
489 
490       if (cd == bucket->cd)
491         bucket->used = FALSE;
492       else
493         g_iconv_close (cd);
494 
495       if (!bucket->refcount && iconv_cache_size > ICONV_CACHE_SIZE)
496         {
497           /* expire this cache bucket */
498           iconv_cache_bucket_expire (NULL, bucket);
499         }
500     }
501   else
502     {
503       G_UNLOCK (iconv_cache_lock);
504 
505       g_warning ("This iconv context wasn't opened using open_converter");
506 
507       return g_iconv_close (converter);
508     }
509 
510   G_UNLOCK (iconv_cache_lock);
511 
512   return 0;
513 }
514 
515 #else  /* !NEED_ICONV_CACHE */
516 
517 static GIConv
open_converter(const gchar * to_codeset,const gchar * from_codeset,GError ** error)518 open_converter (const gchar *to_codeset,
519 		const gchar *from_codeset,
520 		GError     **error)
521 {
522   GIConv cd;
523 
524   cd = g_iconv_open (to_codeset, from_codeset);
525 
526   if (cd == (GIConv) -1)
527     {
528       /* Something went wrong.  */
529       if (error)
530 	{
531 	  if (errno == EINVAL)
532 	    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
533 			 _("Conversion from character set '%s' to '%s' is not supported"),
534 			 from_codeset, to_codeset);
535 	  else
536 	    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
537 			 _("Could not open converter from '%s' to '%s'"),
538 			 from_codeset, to_codeset);
539 	}
540     }
541 
542   return cd;
543 }
544 
545 static int
close_converter(GIConv cd)546 close_converter (GIConv cd)
547 {
548   if (cd == (GIConv) -1)
549     return 0;
550 
551   return g_iconv_close (cd);
552 }
553 
554 #endif /* NEED_ICONV_CACHE */
555 
556 /**
557  * g_convert_with_iconv:
558  * @str:           the string to convert
559  * @len:           the length of the string, or -1 if the string is
560  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
561  * @converter:     conversion descriptor from g_iconv_open()
562  * @bytes_read:    location to store the number of bytes in the
563  *                 input string that were successfully converted, or %NULL.
564  *                 Even if the conversion was successful, this may be
565  *                 less than @len if there were partial characters
566  *                 at the end of the input. If the error
567  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
568  *                 stored will the byte offset after the last valid
569  *                 input sequence.
570  * @bytes_written: the number of bytes stored in the output buffer (not
571  *                 including the terminating nul).
572  * @error:         location to store the error occuring, or %NULL to ignore
573  *                 errors. Any of the errors in #GConvertError may occur.
574  *
575  * Converts a string from one character set to another.
576  *
577  * Note that you should use g_iconv() for streaming
578  * conversions<footnote id="streaming-state">
579  *  <para>
580  * Despite the fact that @byes_read can return information about partial
581  * characters, the <literal>g_convert_...</literal> functions
582  * are not generally suitable for streaming. If the underlying converter
583  * being used maintains internal state, then this won't be preserved
584  * across successive calls to g_convert(), g_convert_with_iconv() or
585  * g_convert_with_fallback(). (An example of this is the GNU C converter
586  * for CP1255 which does not emit a base character until it knows that
587  * the next character is not a mark that could combine with the base
588  * character.)
589  *  </para>
590  * </footnote>.
591  *
592  * Return value: If the conversion was successful, a newly allocated
593  *               nul-terminated string, which must be freed with
594  *               g_free(). Otherwise %NULL and @error will be set.
595  **/
596 gchar*
g_convert_with_iconv(const gchar * str,gssize len,GIConv converter,gsize * bytes_read,gsize * bytes_written,GError ** error)597 g_convert_with_iconv (const gchar *str,
598 		      gssize       len,
599 		      GIConv       converter,
600 		      gsize       *bytes_read,
601 		      gsize       *bytes_written,
602 		      GError     **error)
603 {
604   gchar *dest;
605   gchar *outp;
606   const gchar *p;
607   gsize inbytes_remaining;
608   gsize outbytes_remaining;
609   gsize err;
610   gsize outbuf_size;
611   gboolean have_error = FALSE;
612   gboolean done = FALSE;
613   gboolean reset = FALSE;
614 
615   g_return_val_if_fail (converter != (GIConv) -1, NULL);
616 
617   if (len < 0)
618     len = strlen (str);
619 
620   p = str;
621   inbytes_remaining = len;
622   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
623 
624   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
625   outp = dest = g_malloc (outbuf_size);
626 
627   while (!done && !have_error)
628     {
629       if (reset)
630         err = g_iconv (converter, NULL, &inbytes_remaining, &outp, &outbytes_remaining);
631       else
632         err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
633 
634       if (err == (gsize) -1)
635 	{
636 	  switch (errno)
637 	    {
638 	    case EINVAL:
639 	      /* Incomplete text, do not report an error */
640 	      done = TRUE;
641 	      break;
642 	    case E2BIG:
643 	      {
644 		gsize used = outp - dest;
645 
646 		outbuf_size *= 2;
647 		dest = g_realloc (dest, outbuf_size);
648 
649 		outp = dest + used;
650 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
651 	      }
652 	      break;
653 	    case EILSEQ:
654 	      if (error)
655 		g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
656                                      _("Invalid byte sequence in conversion input"));
657 	      have_error = TRUE;
658 	      break;
659 	    default:
660 	      if (error)
661 		g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
662 			     _("Error during conversion: %s"),
663 			     g_strerror (errno));
664 	      have_error = TRUE;
665 	      break;
666 	    }
667 	}
668       else
669 	{
670 	  if (!reset)
671 	    {
672 	      /* call g_iconv with NULL inbuf to cleanup shift state */
673 	      reset = TRUE;
674 	      inbytes_remaining = 0;
675 	    }
676 	  else
677 	    done = TRUE;
678 	}
679     }
680 
681   *outp = '\0';
682 
683   if (bytes_read)
684     *bytes_read = p - str;
685   else
686     {
687       if ((p - str) != len)
688 	{
689           if (!have_error)
690             {
691 	      if (error)
692 		g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
693                                      _("Partial character sequence at end of input"));
694               have_error = TRUE;
695             }
696 	}
697     }
698 
699   if (bytes_written)
700     *bytes_written = outp - dest;	/* Doesn't include '\0' */
701 
702   if (have_error)
703     {
704       g_free (dest);
705       return NULL;
706     }
707   else
708     return dest;
709 }
710 
711 /**
712  * g_convert:
713  * @str:           the string to convert
714  * @len:           the length of the string, or -1 if the string is
715  *                 nul-terminated<footnote id="nul-unsafe">
716                      <para>
717                        Note that some encodings may allow nul bytes to
718                        occur inside strings. In that case, using -1 for
719                        the @len parameter is unsafe.
720                      </para>
721                    </footnote>.
722  * @to_codeset:    name of character set into which to convert @str
723  * @from_codeset:  character set of @str.
724  * @bytes_read:    location to store the number of bytes in the
725  *                 input string that were successfully converted, or %NULL.
726  *                 Even if the conversion was successful, this may be
727  *                 less than @len if there were partial characters
728  *                 at the end of the input. If the error
729  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
730  *                 stored will the byte offset after the last valid
731  *                 input sequence.
732  * @bytes_written: the number of bytes stored in the output buffer (not
733  *                 including the terminating nul).
734  * @error:         location to store the error occuring, or %NULL to ignore
735  *                 errors. Any of the errors in #GConvertError may occur.
736  *
737  * Converts a string from one character set to another.
738  *
739  * Note that you should use g_iconv() for streaming
740  * conversions<footnoteref linkend="streaming-state"/>.
741  *
742  * Return value: If the conversion was successful, a newly allocated
743  *               nul-terminated string, which must be freed with
744  *               g_free(). Otherwise %NULL and @error will be set.
745  **/
746 gchar*
g_convert(const gchar * str,gssize len,const gchar * to_codeset,const gchar * from_codeset,gsize * bytes_read,gsize * bytes_written,GError ** error)747 g_convert (const gchar *str,
748            gssize       len,
749            const gchar *to_codeset,
750            const gchar *from_codeset,
751            gsize       *bytes_read,
752 	   gsize       *bytes_written,
753 	   GError     **error)
754 {
755   gchar *res;
756   GIConv cd;
757 
758   g_return_val_if_fail (str != NULL, NULL);
759   g_return_val_if_fail (to_codeset != NULL, NULL);
760   g_return_val_if_fail (from_codeset != NULL, NULL);
761 
762   cd = open_converter (to_codeset, from_codeset, error);
763 
764   if (cd == (GIConv) -1)
765     {
766       if (bytes_read)
767         *bytes_read = 0;
768 
769       if (bytes_written)
770         *bytes_written = 0;
771 
772       return NULL;
773     }
774 
775   res = g_convert_with_iconv (str, len, cd,
776 			      bytes_read, bytes_written,
777 			      error);
778 
779   close_converter (cd);
780 
781   return res;
782 }
783 
784 /**
785  * g_convert_with_fallback:
786  * @str:          the string to convert
787  * @len:          the length of the string, or -1 if the string is
788  *                nul-terminated<footnoteref linkend="nul-unsafe"/>.
789  * @to_codeset:   name of character set into which to convert @str
790  * @from_codeset: character set of @str.
791  * @fallback:     UTF-8 string to use in place of character not
792  *                present in the target encoding. (The string must be
793  *                representable in the target encoding).
794                   If %NULL, characters not in the target encoding will
795                   be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
796  * @bytes_read:   location to store the number of bytes in the
797  *                input string that were successfully converted, or %NULL.
798  *                Even if the conversion was successful, this may be
799  *                less than @len if there were partial characters
800  *                at the end of the input.
801  * @bytes_written: the number of bytes stored in the output buffer (not
802  *                including the terminating nul).
803  * @error:        location to store the error occuring, or %NULL to ignore
804  *                errors. Any of the errors in #GConvertError may occur.
805  *
806  * Converts a string from one character set to another, possibly
807  * including fallback sequences for characters not representable
808  * in the output. Note that it is not guaranteed that the specification
809  * for the fallback sequences in @fallback will be honored. Some
810  * systems may do an approximate conversion from @from_codeset
811  * to @to_codeset in their iconv() functions,
812  * in which case GLib will simply return that approximate conversion.
813  *
814  * Note that you should use g_iconv() for streaming
815  * conversions<footnoteref linkend="streaming-state"/>.
816  *
817  * Return value: If the conversion was successful, a newly allocated
818  *               nul-terminated string, which must be freed with
819  *               g_free(). Otherwise %NULL and @error will be set.
820  **/
821 gchar*
g_convert_with_fallback(const gchar * str,gssize len,const gchar * to_codeset,const gchar * from_codeset,gchar * fallback,gsize * bytes_read,gsize * bytes_written,GError ** error)822 g_convert_with_fallback (const gchar *str,
823 			 gssize       len,
824 			 const gchar *to_codeset,
825 			 const gchar *from_codeset,
826 			 gchar       *fallback,
827 			 gsize       *bytes_read,
828 			 gsize       *bytes_written,
829 			 GError     **error)
830 {
831   gchar *utf8;
832   gchar *dest;
833   gchar *outp;
834   const gchar *insert_str = NULL;
835   const gchar *p;
836   gsize inbytes_remaining;
837   const gchar *save_p = NULL;
838   gsize save_inbytes = 0;
839   gsize outbytes_remaining;
840   gsize err;
841   GIConv cd;
842   gsize outbuf_size;
843   gboolean have_error = FALSE;
844   gboolean done = FALSE;
845 
846   GError *local_error = NULL;
847 
848   g_return_val_if_fail (str != NULL, NULL);
849   g_return_val_if_fail (to_codeset != NULL, NULL);
850   g_return_val_if_fail (from_codeset != NULL, NULL);
851 
852   if (len < 0)
853     len = strlen (str);
854 
855   /* Try an exact conversion; we only proceed if this fails
856    * due to an illegal sequence in the input string.
857    */
858   dest = g_convert (str, len, to_codeset, from_codeset,
859 		    bytes_read, bytes_written, &local_error);
860   if (!local_error)
861     return dest;
862 
863   if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
864     {
865       g_propagate_error (error, local_error);
866       return NULL;
867     }
868   else
869     g_error_free (local_error);
870 
871   local_error = NULL;
872 
873   /* No go; to proceed, we need a converter from "UTF-8" to
874    * to_codeset, and the string as UTF-8.
875    */
876   cd = open_converter (to_codeset, "UTF-8", error);
877   if (cd == (GIConv) -1)
878     {
879       if (bytes_read)
880         *bytes_read = 0;
881 
882       if (bytes_written)
883         *bytes_written = 0;
884 
885       return NULL;
886     }
887 
888   utf8 = g_convert (str, len, "UTF-8", from_codeset,
889 		    bytes_read, &inbytes_remaining, error);
890   if (!utf8)
891     {
892       close_converter (cd);
893       if (bytes_written)
894         *bytes_written = 0;
895       return NULL;
896     }
897 
898   /* Now the heart of the code. We loop through the UTF-8 string, and
899    * whenever we hit an offending character, we form fallback, convert
900    * the fallback to the target codeset, and then go back to
901    * converting the original string after finishing with the fallback.
902    *
903    * The variables save_p and save_inbytes store the input state
904    * for the original string while we are converting the fallback
905    */
906   p = utf8;
907 
908   outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
909   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
910   outp = dest = g_malloc (outbuf_size);
911 
912   while (!done && !have_error)
913     {
914       gsize inbytes_tmp = inbytes_remaining;
915       err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
916       inbytes_remaining = inbytes_tmp;
917 
918       if (err == (gsize) -1)
919 	{
920 	  switch (errno)
921 	    {
922 	    case EINVAL:
923 	      g_assert_not_reached();
924 	      break;
925 	    case E2BIG:
926 	      {
927 		gsize used = outp - dest;
928 
929 		outbuf_size *= 2;
930 		dest = g_realloc (dest, outbuf_size);
931 
932 		outp = dest + used;
933 		outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
934 
935 		break;
936 	      }
937 	    case EILSEQ:
938 	      if (save_p)
939 		{
940 		  /* Error converting fallback string - fatal
941 		   */
942 		  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
943 			       _("Cannot convert fallback '%s' to codeset '%s'"),
944 			       insert_str, to_codeset);
945 		  have_error = TRUE;
946 		  break;
947 		}
948 	      else if (p)
949 		{
950 		  if (!fallback)
951 		    {
952 		      gunichar ch = g_utf8_get_char (p);
953 		      insert_str = g_strdup_printf (ch < 0x10000 ? "\\u%04x" : "\\U%08x",
954 						    ch);
955 		    }
956 		  else
957 		    insert_str = fallback;
958 
959 		  save_p = g_utf8_next_char (p);
960 		  save_inbytes = inbytes_remaining - (save_p - p);
961 		  p = insert_str;
962 		  inbytes_remaining = strlen (p);
963 		  break;
964 		}
965 	      /* fall thru if p is NULL */
966 	    default:
967 	      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
968 			   _("Error during conversion: %s"),
969 			   g_strerror (errno));
970 	      have_error = TRUE;
971 	      break;
972 	    }
973 	}
974       else
975 	{
976 	  if (save_p)
977 	    {
978 	      if (!fallback)
979 		g_free ((gchar *)insert_str);
980 	      p = save_p;
981 	      inbytes_remaining = save_inbytes;
982 	      save_p = NULL;
983 	    }
984 	  else if (p)
985 	    {
986 	      /* call g_iconv with NULL inbuf to cleanup shift state */
987 	      p = NULL;
988 	      inbytes_remaining = 0;
989 	    }
990 	  else
991 	    done = TRUE;
992 	}
993     }
994 
995   /* Cleanup
996    */
997   *outp = '\0';
998 
999   close_converter (cd);
1000 
1001   if (bytes_written)
1002     *bytes_written = outp - dest;	/* Doesn't include '\0' */
1003 
1004   g_free (utf8);
1005 
1006   if (have_error)
1007     {
1008       if (save_p && !fallback)
1009 	g_free ((gchar *)insert_str);
1010       g_free (dest);
1011       return NULL;
1012     }
1013   else
1014     return dest;
1015 }
1016 
1017 /*
1018  * g_locale_to_utf8
1019  *
1020  *
1021  */
1022 
1023 static gchar *
strdup_len(const gchar * string,gssize len,gsize * bytes_written,gsize * bytes_read,GError ** error)1024 strdup_len (const gchar *string,
1025 	    gssize       len,
1026 	    gsize       *bytes_written,
1027 	    gsize       *bytes_read,
1028 	    GError      **error)
1029 
1030 {
1031   gsize real_len;
1032 
1033   if (!g_utf8_validate (string, len, NULL))
1034     {
1035       if (bytes_read)
1036 	*bytes_read = 0;
1037       if (bytes_written)
1038 	*bytes_written = 0;
1039 
1040       g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1041                            _("Invalid byte sequence in conversion input"));
1042       return NULL;
1043     }
1044 
1045   if (len < 0)
1046     real_len = strlen (string);
1047   else
1048     {
1049       real_len = 0;
1050 
1051       while (real_len < len && string[real_len])
1052 	real_len++;
1053     }
1054 
1055   if (bytes_read)
1056     *bytes_read = real_len;
1057   if (bytes_written)
1058     *bytes_written = real_len;
1059 
1060   return g_strndup (string, real_len);
1061 }
1062 
1063 /**
1064  * g_locale_to_utf8:
1065  * @opsysstring:   a string in the encoding of the current locale. On Windows
1066  *                 this means the system codepage.
1067  * @len:           the length of the string, or -1 if the string is
1068  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
1069  * @bytes_read:    location to store the number of bytes in the
1070  *                 input string that were successfully converted, or %NULL.
1071  *                 Even if the conversion was successful, this may be
1072  *                 less than @len if there were partial characters
1073  *                 at the end of the input. If the error
1074  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1075  *                 stored will the byte offset after the last valid
1076  *                 input sequence.
1077  * @bytes_written: the number of bytes stored in the output buffer (not
1078  *                 including the terminating nul).
1079  * @error:         location to store the error occuring, or %NULL to ignore
1080  *                 errors. Any of the errors in #GConvertError may occur.
1081  *
1082  * Converts a string which is in the encoding used for strings by
1083  * the C runtime (usually the same as that used by the operating
1084  * system) in the <link linkend="setlocale">current locale</link> into a
1085  * UTF-8 string.
1086  *
1087  * Return value: The converted string, or %NULL on an error.
1088  **/
1089 gchar *
g_locale_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1090 g_locale_to_utf8 (const gchar  *opsysstring,
1091 		  gssize        len,
1092 		  gsize        *bytes_read,
1093 		  gsize        *bytes_written,
1094 		  GError      **error)
1095 {
1096   const char *charset;
1097 
1098   if (g_get_charset (&charset))
1099     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1100   else
1101     return g_convert (opsysstring, len,
1102 		      "UTF-8", charset, bytes_read, bytes_written, error);
1103 }
1104 
1105 /**
1106  * g_locale_from_utf8:
1107  * @utf8string:    a UTF-8 encoded string
1108  * @len:           the length of the string, or -1 if the string is
1109  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
1110  * @bytes_read:    location to store the number of bytes in the
1111  *                 input string that were successfully converted, or %NULL.
1112  *                 Even if the conversion was successful, this may be
1113  *                 less than @len if there were partial characters
1114  *                 at the end of the input. If the error
1115  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1116  *                 stored will the byte offset after the last valid
1117  *                 input sequence.
1118  * @bytes_written: the number of bytes stored in the output buffer (not
1119  *                 including the terminating nul).
1120  * @error:         location to store the error occuring, or %NULL to ignore
1121  *                 errors. Any of the errors in #GConvertError may occur.
1122  *
1123  * Converts a string from UTF-8 to the encoding used for strings by
1124  * the C runtime (usually the same as that used by the operating
1125  * system) in the <link linkend="setlocale">current locale</link>. On
1126  * Windows this means the system codepage.
1127  *
1128  * Return value: The converted string, or %NULL on an error.
1129  **/
1130 gchar *
g_locale_from_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1131 g_locale_from_utf8 (const gchar *utf8string,
1132 		    gssize       len,
1133 		    gsize       *bytes_read,
1134 		    gsize       *bytes_written,
1135 		    GError     **error)
1136 {
1137   const gchar *charset;
1138 
1139   if (g_get_charset (&charset))
1140     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1141   else
1142     return g_convert (utf8string, len,
1143 		      charset, "UTF-8", bytes_read, bytes_written, error);
1144 }
1145 
1146 #ifndef G_PLATFORM_WIN32
1147 
1148 typedef struct _GFilenameCharsetCache GFilenameCharsetCache;
1149 
1150 struct _GFilenameCharsetCache {
1151   gboolean is_utf8;
1152   gchar *charset;
1153   gchar **filename_charsets;
1154 };
1155 
1156 static void
filename_charset_cache_free(gpointer data)1157 filename_charset_cache_free (gpointer data)
1158 {
1159   GFilenameCharsetCache *cache = data;
1160   g_free (cache->charset);
1161   g_strfreev (cache->filename_charsets);
1162   g_free (cache);
1163 }
1164 
1165 /**
1166  * g_get_filename_charsets:
1167  * @charsets: return location for the %NULL-terminated list of encoding names
1168  *
1169  * Determines the preferred character sets used for filenames.
1170  * The first character set from the @charsets is the filename encoding, the
1171  * subsequent character sets are used when trying to generate a displayable
1172  * representation of a filename, see g_filename_display_name().
1173  *
1174  * On Unix, the character sets are determined by consulting the
1175  * environment variables <envar>G_FILENAME_ENCODING</envar> and
1176  * <envar>G_BROKEN_FILENAMES</envar>. On Windows, the character set
1177  * used in the GLib API is always UTF-8 and said environment variables
1178  * have no effect.
1179  *
1180  * <envar>G_FILENAME_ENCODING</envar> may be set to a comma-separated list
1181  * of character set names. The special token "&commat;locale" is taken to
1182  * mean the character set for the <link linkend="setlocale">current
1183  * locale</link>. If <envar>G_FILENAME_ENCODING</envar> is not set, but
1184  * <envar>G_BROKEN_FILENAMES</envar> is, the character set of the current
1185  * locale is taken as the filename encoding. If neither environment variable
1186  * is set, UTF-8 is taken as the filename encoding, but the character
1187  * set of the current locale is also put in the list of encodings.
1188  *
1189  * The returned @charsets belong to GLib and must not be freed.
1190  *
1191  * Note that on Unix, regardless of the locale character set or
1192  * <envar>G_FILENAME_ENCODING</envar> value, the actual file names present
1193  * on a system might be in any random encoding or just gibberish.
1194  *
1195  * Return value: %TRUE if the filename encoding is UTF-8.
1196  *
1197  * Since: 2.6
1198  */
1199 gboolean
g_get_filename_charsets(G_CONST_RETURN gchar *** filename_charsets)1200 g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets)
1201 {
1202   static GStaticPrivate cache_private = G_STATIC_PRIVATE_INIT;
1203   GFilenameCharsetCache *cache = g_static_private_get (&cache_private);
1204   const gchar *charset;
1205 
1206   if (!cache)
1207     {
1208       cache = g_new0 (GFilenameCharsetCache, 1);
1209       g_static_private_set (&cache_private, cache, filename_charset_cache_free);
1210     }
1211 
1212   g_get_charset (&charset);
1213 
1214   if (!(cache->charset && strcmp (cache->charset, charset) == 0))
1215     {
1216       const gchar *new_charset;
1217       gchar *p;
1218       gint i;
1219 
1220       g_free (cache->charset);
1221       g_strfreev (cache->filename_charsets);
1222       cache->charset = g_strdup (charset);
1223 
1224       p = getenv ("G_FILENAME_ENCODING");
1225       if (p != NULL && p[0] != '\0')
1226 	{
1227 	  cache->filename_charsets = g_strsplit (p, ",", 0);
1228 	  cache->is_utf8 = (strcmp (cache->filename_charsets[0], "UTF-8") == 0);
1229 
1230 	  for (i = 0; cache->filename_charsets[i]; i++)
1231 	    {
1232 	      if (strcmp ("@locale", cache->filename_charsets[i]) == 0)
1233 		{
1234 		  g_get_charset (&new_charset);
1235 		  g_free (cache->filename_charsets[i]);
1236 		  cache->filename_charsets[i] = g_strdup (new_charset);
1237 		}
1238 	    }
1239 	}
1240       else if (getenv ("G_BROKEN_FILENAMES") != NULL)
1241 	{
1242 	  cache->filename_charsets = g_new0 (gchar *, 2);
1243 	  cache->is_utf8 = g_get_charset (&new_charset);
1244 	  cache->filename_charsets[0] = g_strdup (new_charset);
1245 	}
1246       else
1247 	{
1248 	  cache->filename_charsets = g_new0 (gchar *, 3);
1249 	  cache->is_utf8 = TRUE;
1250 	  cache->filename_charsets[0] = g_strdup ("UTF-8");
1251 	  if (!g_get_charset (&new_charset))
1252 	    cache->filename_charsets[1] = g_strdup (new_charset);
1253 	}
1254     }
1255 
1256   if (filename_charsets)
1257     *filename_charsets = (const gchar **)cache->filename_charsets;
1258 
1259   return cache->is_utf8;
1260 }
1261 
1262 #else /* G_PLATFORM_WIN32 */
1263 
1264 gboolean
g_get_filename_charsets(G_CONST_RETURN gchar *** filename_charsets)1265 g_get_filename_charsets (G_CONST_RETURN gchar ***filename_charsets)
1266 {
1267   static const gchar *charsets[] = {
1268     "UTF-8",
1269     NULL
1270   };
1271 
1272 #ifdef G_OS_WIN32
1273   /* On Windows GLib pretends that the filename charset is UTF-8 */
1274   if (filename_charsets)
1275     *filename_charsets = charsets;
1276 
1277   return TRUE;
1278 #else
1279   gboolean result;
1280 
1281   /* Cygwin works like before */
1282   result = g_get_charset (&(charsets[0]));
1283 
1284   if (filename_charsets)
1285     *filename_charsets = charsets;
1286 
1287   return result;
1288 #endif
1289 }
1290 
1291 #endif /* G_PLATFORM_WIN32 */
1292 
1293 static gboolean
get_filename_charset(const gchar ** filename_charset)1294 get_filename_charset (const gchar **filename_charset)
1295 {
1296   const gchar **charsets;
1297   gboolean is_utf8;
1298 
1299   is_utf8 = g_get_filename_charsets (&charsets);
1300 
1301   if (filename_charset)
1302     *filename_charset = charsets[0];
1303 
1304   return is_utf8;
1305 }
1306 
1307 /* This is called from g_thread_init(). It's used to
1308  * initialize some static data in a threadsafe way.
1309  */
1310 void
_g_convert_thread_init(void)1311 _g_convert_thread_init (void)
1312 {
1313   const gchar **dummy;
1314   (void) g_get_filename_charsets (&dummy);
1315 }
1316 
1317 /**
1318  * g_filename_to_utf8:
1319  * @opsysstring:   a string in the encoding for filenames
1320  * @len:           the length of the string, or -1 if the string is
1321  *                 nul-terminated<footnoteref linkend="nul-unsafe"/>.
1322  * @bytes_read:    location to store the number of bytes in the
1323  *                 input string that were successfully converted, or %NULL.
1324  *                 Even if the conversion was successful, this may be
1325  *                 less than @len if there were partial characters
1326  *                 at the end of the input. If the error
1327  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1328  *                 stored will the byte offset after the last valid
1329  *                 input sequence.
1330  * @bytes_written: the number of bytes stored in the output buffer (not
1331  *                 including the terminating nul).
1332  * @error:         location to store the error occuring, or %NULL to ignore
1333  *                 errors. Any of the errors in #GConvertError may occur.
1334  *
1335  * Converts a string which is in the encoding used by GLib for
1336  * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
1337  * for filenames; on other platforms, this function indirectly depends on
1338  * the <link linkend="setlocale">current locale</link>.
1339  *
1340  * Return value: The converted string, or %NULL on an error.
1341  **/
1342 gchar*
g_filename_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1343 g_filename_to_utf8 (const gchar *opsysstring,
1344 		    gssize       len,
1345 		    gsize       *bytes_read,
1346 		    gsize       *bytes_written,
1347 		    GError     **error)
1348 {
1349   const gchar *charset;
1350 
1351   if (get_filename_charset (&charset))
1352     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1353   else
1354     return g_convert (opsysstring, len,
1355 		      "UTF-8", charset, bytes_read, bytes_written, error);
1356 }
1357 
1358 #if defined (G_OS_WIN32) && !defined (_WIN64)
1359 
1360 #undef g_filename_to_utf8
1361 
1362 /* Binary compatibility version. Not for newly compiled code. Also not needed for
1363  * 64-bit versions as there should be no old deployed binaries that would use
1364  * the old versions.
1365  */
1366 
1367 gchar*
g_filename_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1368 g_filename_to_utf8 (const gchar *opsysstring,
1369 		    gssize       len,
1370 		    gsize       *bytes_read,
1371 		    gsize       *bytes_written,
1372 		    GError     **error)
1373 {
1374   const gchar *charset;
1375 
1376   if (g_get_charset (&charset))
1377     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1378   else
1379     return g_convert (opsysstring, len,
1380 		      "UTF-8", charset, bytes_read, bytes_written, error);
1381 }
1382 
1383 #endif
1384 
1385 /**
1386  * g_filename_from_utf8:
1387  * @utf8string:    a UTF-8 encoded string.
1388  * @len:           the length of the string, or -1 if the string is
1389  *                 nul-terminated.
1390  * @bytes_read:    location to store the number of bytes in the
1391  *                 input string that were successfully converted, or %NULL.
1392  *                 Even if the conversion was successful, this may be
1393  *                 less than @len if there were partial characters
1394  *                 at the end of the input. If the error
1395  *                 #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1396  *                 stored will the byte offset after the last valid
1397  *                 input sequence.
1398  * @bytes_written: the number of bytes stored in the output buffer (not
1399  *                 including the terminating nul).
1400  * @error:         location to store the error occuring, or %NULL to ignore
1401  *                 errors. Any of the errors in #GConvertError may occur.
1402  *
1403  * Converts a string from UTF-8 to the encoding GLib uses for
1404  * filenames. Note that on Windows GLib uses UTF-8 for filenames;
1405  * on other platforms, this function indirectly depends on the
1406  * <link linkend="setlocale">current locale</link>.
1407  *
1408  * Return value: The converted string, or %NULL on an error.
1409  **/
1410 gchar*
g_filename_from_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1411 g_filename_from_utf8 (const gchar *utf8string,
1412 		      gssize       len,
1413 		      gsize       *bytes_read,
1414 		      gsize       *bytes_written,
1415 		      GError     **error)
1416 {
1417   const gchar *charset;
1418 
1419   if (get_filename_charset (&charset))
1420     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1421   else
1422     return g_convert (utf8string, len,
1423 		      charset, "UTF-8", bytes_read, bytes_written, error);
1424 }
1425 
1426 #if defined (G_OS_WIN32) && !defined (_WIN64)
1427 
1428 #undef g_filename_from_utf8
1429 
1430 /* Binary compatibility version. Not for newly compiled code. */
1431 
1432 gchar*
g_filename_from_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1433 g_filename_from_utf8 (const gchar *utf8string,
1434 		      gssize       len,
1435 		      gsize       *bytes_read,
1436 		      gsize       *bytes_written,
1437 		      GError     **error)
1438 {
1439   const gchar *charset;
1440 
1441   if (g_get_charset (&charset))
1442     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1443   else
1444     return g_convert (utf8string, len,
1445 		      charset, "UTF-8", bytes_read, bytes_written, error);
1446 }
1447 
1448 #endif
1449 
1450 /* Test of haystack has the needle prefix, comparing case
1451  * insensitive. haystack may be UTF-8, but needle must
1452  * contain only ascii. */
1453 static gboolean
has_case_prefix(const gchar * haystack,const gchar * needle)1454 has_case_prefix (const gchar *haystack, const gchar *needle)
1455 {
1456   const gchar *h, *n;
1457 
1458   /* Eat one character at a time. */
1459   h = haystack;
1460   n = needle;
1461 
1462   while (*n && *h &&
1463 	 g_ascii_tolower (*n) == g_ascii_tolower (*h))
1464     {
1465       n++;
1466       h++;
1467     }
1468 
1469   return *n == '\0';
1470 }
1471 
1472 typedef enum {
1473   UNSAFE_ALL        = 0x1,  /* Escape all unsafe characters   */
1474   UNSAFE_ALLOW_PLUS = 0x2,  /* Allows '+'  */
1475   UNSAFE_PATH       = 0x8,  /* Allows '/', '&', '=', ':', '@', '+', '$' and ',' */
1476   UNSAFE_HOST       = 0x10, /* Allows '/' and ':' and '@' */
1477   UNSAFE_SLASHES    = 0x20  /* Allows all characters except for '/' and '%' */
1478 } UnsafeCharacterSet;
1479 
1480 static const guchar acceptable[96] = {
1481   /* A table of the ASCII chars from space (32) to DEL (127) */
1482   /*      !    "    #    $    %    &    '    (    )    *    +    ,    -    .    / */
1483   0x00,0x3F,0x20,0x20,0x28,0x00,0x2C,0x3F,0x3F,0x3F,0x3F,0x2A,0x28,0x3F,0x3F,0x1C,
1484   /* 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ? */
1485   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x38,0x20,0x20,0x2C,0x20,0x20,
1486   /* @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O */
1487   0x38,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1488   /* P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _ */
1489   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x20,0x3F,
1490   /* `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o */
1491   0x20,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1492   /* p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~  DEL */
1493   0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x3F,0x20
1494 };
1495 
1496 static const gchar hex[16] = "0123456789ABCDEF";
1497 
1498 /* Note: This escape function works on file: URIs, but if you want to
1499  * escape something else, please read RFC-2396 */
1500 static gchar *
g_escape_uri_string(const gchar * string,UnsafeCharacterSet mask)1501 g_escape_uri_string (const gchar *string,
1502 		     UnsafeCharacterSet mask)
1503 {
1504 #define ACCEPTABLE(a) ((a)>=32 && (a)<128 && (acceptable[(a)-32] & use_mask))
1505 
1506   const gchar *p;
1507   gchar *q;
1508   gchar *result;
1509   int c;
1510   gint unacceptable;
1511   UnsafeCharacterSet use_mask;
1512 
1513   g_return_val_if_fail (mask == UNSAFE_ALL
1514 			|| mask == UNSAFE_ALLOW_PLUS
1515 			|| mask == UNSAFE_PATH
1516 			|| mask == UNSAFE_HOST
1517 			|| mask == UNSAFE_SLASHES, NULL);
1518 
1519   unacceptable = 0;
1520   use_mask = mask;
1521   for (p = string; *p != '\0'; p++)
1522     {
1523       c = (guchar) *p;
1524       if (!ACCEPTABLE (c))
1525 	unacceptable++;
1526     }
1527 
1528   result = g_malloc (p - string + unacceptable * 2 + 1);
1529 
1530   use_mask = mask;
1531   for (q = result, p = string; *p != '\0'; p++)
1532     {
1533       c = (guchar) *p;
1534 
1535       if (!ACCEPTABLE (c))
1536 	{
1537 	  *q++ = '%'; /* means hex coming */
1538 	  *q++ = hex[c >> 4];
1539 	  *q++ = hex[c & 15];
1540 	}
1541       else
1542 	*q++ = *p;
1543     }
1544 
1545   *q = '\0';
1546 
1547   return result;
1548 }
1549 
1550 
1551 static gchar *
g_escape_file_uri(const gchar * hostname,const gchar * pathname)1552 g_escape_file_uri (const gchar *hostname,
1553 		   const gchar *pathname)
1554 {
1555   char *escaped_hostname = NULL;
1556   char *escaped_path;
1557   char *res;
1558 
1559 #ifdef G_OS_WIN32
1560   char *p, *backslash;
1561 
1562   /* Turn backslashes into forward slashes. That's what Netscape
1563    * does, and they are actually more or less equivalent in Windows.
1564    */
1565 
1566   pathname = g_strdup (pathname);
1567   p = (char *) pathname;
1568 
1569   while ((backslash = strchr (p, '\\')) != NULL)
1570     {
1571       *backslash = '/';
1572       p = backslash + 1;
1573     }
1574 #endif
1575 
1576   if (hostname && *hostname != '\0')
1577     {
1578       escaped_hostname = g_escape_uri_string (hostname, UNSAFE_HOST);
1579     }
1580 
1581   escaped_path = g_escape_uri_string (pathname, UNSAFE_PATH);
1582 
1583   res = g_strconcat ("file://",
1584 		     (escaped_hostname) ? escaped_hostname : "",
1585 		     (*escaped_path != '/') ? "/" : "",
1586 		     escaped_path,
1587 		     NULL);
1588 
1589 #ifdef G_OS_WIN32
1590   g_free ((char *) pathname);
1591 #endif
1592 
1593   g_free (escaped_hostname);
1594   g_free (escaped_path);
1595 
1596   return res;
1597 }
1598 
1599 static int
unescape_character(const char * scanner)1600 unescape_character (const char *scanner)
1601 {
1602   int first_digit;
1603   int second_digit;
1604 
1605   first_digit = g_ascii_xdigit_value (scanner[0]);
1606   if (first_digit < 0)
1607     return -1;
1608 
1609   second_digit = g_ascii_xdigit_value (scanner[1]);
1610   if (second_digit < 0)
1611     return -1;
1612 
1613   return (first_digit << 4) | second_digit;
1614 }
1615 
1616 static gchar *
g_unescape_uri_string(const char * escaped,int len,const char * illegal_escaped_characters,gboolean ascii_must_not_be_escaped)1617 g_unescape_uri_string (const char *escaped,
1618 		       int         len,
1619 		       const char *illegal_escaped_characters,
1620 		       gboolean    ascii_must_not_be_escaped)
1621 {
1622   const gchar *in, *in_end;
1623   gchar *out, *result;
1624   int c;
1625 
1626   if (escaped == NULL)
1627     return NULL;
1628 
1629   if (len < 0)
1630     len = strlen (escaped);
1631 
1632   result = g_malloc (len + 1);
1633 
1634   out = result;
1635   for (in = escaped, in_end = escaped + len; in < in_end; in++)
1636     {
1637       c = *in;
1638 
1639       if (c == '%')
1640 	{
1641 	  /* catch partial escape sequences past the end of the substring */
1642 	  if (in + 3 > in_end)
1643 	    break;
1644 
1645 	  c = unescape_character (in + 1);
1646 
1647 	  /* catch bad escape sequences and NUL characters */
1648 	  if (c <= 0)
1649 	    break;
1650 
1651 	  /* catch escaped ASCII */
1652 	  if (ascii_must_not_be_escaped && c <= 0x7F)
1653 	    break;
1654 
1655 	  /* catch other illegal escaped characters */
1656 	  if (strchr (illegal_escaped_characters, c) != NULL)
1657 	    break;
1658 
1659 	  in += 2;
1660 	}
1661 
1662       *out++ = c;
1663     }
1664 
1665   g_assert (out - result <= len);
1666   *out = '\0';
1667 
1668   if (in != in_end)
1669     {
1670       g_free (result);
1671       return NULL;
1672     }
1673 
1674   return result;
1675 }
1676 
1677 static gboolean
is_asciialphanum(gunichar c)1678 is_asciialphanum (gunichar c)
1679 {
1680   return c <= 0x7F && g_ascii_isalnum (c);
1681 }
1682 
1683 static gboolean
is_asciialpha(gunichar c)1684 is_asciialpha (gunichar c)
1685 {
1686   return c <= 0x7F && g_ascii_isalpha (c);
1687 }
1688 
1689 /* allows an empty string */
1690 static gboolean
hostname_validate(const char * hostname)1691 hostname_validate (const char *hostname)
1692 {
1693   const char *p;
1694   gunichar c, first_char, last_char;
1695 
1696   p = hostname;
1697   if (*p == '\0')
1698     return TRUE;
1699   do
1700     {
1701       /* read in a label */
1702       c = g_utf8_get_char (p);
1703       p = g_utf8_next_char (p);
1704       if (!is_asciialphanum (c))
1705 	return FALSE;
1706       first_char = c;
1707       do
1708 	{
1709 	  last_char = c;
1710 	  c = g_utf8_get_char (p);
1711 	  p = g_utf8_next_char (p);
1712 	}
1713       while (is_asciialphanum (c) || c == '-');
1714       if (last_char == '-')
1715 	return FALSE;
1716 
1717       /* if that was the last label, check that it was a toplabel */
1718       if (c == '\0' || (c == '.' && *p == '\0'))
1719 	return is_asciialpha (first_char);
1720     }
1721   while (c == '.');
1722   return FALSE;
1723 }
1724 
1725 /**
1726  * g_filename_from_uri:
1727  * @uri: a uri describing a filename (escaped, encoded in ASCII).
1728  * @hostname: Location to store hostname for the URI, or %NULL.
1729  *            If there is no hostname in the URI, %NULL will be
1730  *            stored in this location.
1731  * @error: location to store the error occuring, or %NULL to ignore
1732  *         errors. Any of the errors in #GConvertError may occur.
1733  *
1734  * Converts an escaped ASCII-encoded URI to a local filename in the
1735  * encoding used for filenames.
1736  *
1737  * Return value: a newly-allocated string holding the resulting
1738  *               filename, or %NULL on an error.
1739  **/
1740 gchar *
g_filename_from_uri(const gchar * uri,gchar ** hostname,GError ** error)1741 g_filename_from_uri (const gchar *uri,
1742 		     gchar      **hostname,
1743 		     GError     **error)
1744 {
1745   const char *path_part;
1746   const char *host_part;
1747   char *unescaped_hostname;
1748   char *result;
1749   char *filename;
1750   int offs;
1751 #ifdef G_OS_WIN32
1752   char *p, *slash;
1753 #endif
1754 
1755   if (hostname)
1756     *hostname = NULL;
1757 
1758   if (!has_case_prefix (uri, "file:/"))
1759     {
1760       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1761 		   _("The URI '%s' is not an absolute URI using the \"file\" scheme"),
1762 		   uri);
1763       return NULL;
1764     }
1765 
1766   path_part = uri + strlen ("file:");
1767 
1768   if (strchr (path_part, '#') != NULL)
1769     {
1770       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1771 		   _("The local file URI '%s' may not include a '#'"),
1772 		   uri);
1773       return NULL;
1774     }
1775 
1776   if (has_case_prefix (path_part, "///"))
1777     path_part += 2;
1778   else if (has_case_prefix (path_part, "//"))
1779     {
1780       path_part += 2;
1781       host_part = path_part;
1782 
1783       path_part = strchr (path_part, '/');
1784 
1785       if (path_part == NULL)
1786 	{
1787 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1788 		       _("The URI '%s' is invalid"),
1789 		       uri);
1790 	  return NULL;
1791 	}
1792 
1793       unescaped_hostname = g_unescape_uri_string (host_part, path_part - host_part, "", TRUE);
1794 
1795       if (unescaped_hostname == NULL ||
1796 	  !hostname_validate (unescaped_hostname))
1797 	{
1798 	  g_free (unescaped_hostname);
1799 	  g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1800 		       _("The hostname of the URI '%s' is invalid"),
1801 		       uri);
1802 	  return NULL;
1803 	}
1804 
1805       if (hostname)
1806 	*hostname = unescaped_hostname;
1807       else
1808 	g_free (unescaped_hostname);
1809     }
1810 
1811   filename = g_unescape_uri_string (path_part, -1, "/", FALSE);
1812 
1813   if (filename == NULL)
1814     {
1815       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1816 		   _("The URI '%s' contains invalidly escaped characters"),
1817 		   uri);
1818       return NULL;
1819     }
1820 
1821   offs = 0;
1822 #ifdef G_OS_WIN32
1823   /* Drop localhost */
1824   if (hostname && *hostname != NULL &&
1825       g_ascii_strcasecmp (*hostname, "localhost") == 0)
1826     {
1827       g_free (*hostname);
1828       *hostname = NULL;
1829     }
1830 
1831   /* Turn slashes into backslashes, because that's the canonical spelling */
1832   p = filename;
1833   while ((slash = strchr (p, '/')) != NULL)
1834     {
1835       *slash = '\\';
1836       p = slash + 1;
1837     }
1838 
1839   /* Windows URIs with a drive letter can be like "file://host/c:/foo"
1840    * or "file://host/c|/foo" (some Netscape versions). In those cases, start
1841    * the filename from the drive letter.
1842    */
1843   if (g_ascii_isalpha (filename[1]))
1844     {
1845       if (filename[2] == ':')
1846 	offs = 1;
1847       else if (filename[2] == '|')
1848 	{
1849 	  filename[2] = ':';
1850 	  offs = 1;
1851 	}
1852     }
1853 #endif
1854 
1855   result = g_strdup (filename + offs);
1856   g_free (filename);
1857 
1858   return result;
1859 }
1860 
1861 #if defined (G_OS_WIN32) && !defined (_WIN64)
1862 
1863 #undef g_filename_from_uri
1864 
1865 gchar *
g_filename_from_uri(const gchar * uri,gchar ** hostname,GError ** error)1866 g_filename_from_uri (const gchar *uri,
1867 		     gchar      **hostname,
1868 		     GError     **error)
1869 {
1870   gchar *utf8_filename;
1871   gchar *retval = NULL;
1872 
1873   utf8_filename = g_filename_from_uri_utf8 (uri, hostname, error);
1874   if (utf8_filename)
1875     {
1876       retval = g_locale_from_utf8 (utf8_filename, -1, NULL, NULL, error);
1877       g_free (utf8_filename);
1878     }
1879   return retval;
1880 }
1881 
1882 #endif
1883 
1884 /**
1885  * g_filename_to_uri:
1886  * @filename: an absolute filename specified in the GLib file name encoding,
1887  *            which is the on-disk file name bytes on Unix, and UTF-8 on
1888  *            Windows
1889  * @hostname: A UTF-8 encoded hostname, or %NULL for none.
1890  * @error: location to store the error occuring, or %NULL to ignore
1891  *         errors. Any of the errors in #GConvertError may occur.
1892  *
1893  * Converts an absolute filename to an escaped ASCII-encoded URI, with the path
1894  * component following Section 3.3. of RFC 2396.
1895  *
1896  * Return value: a newly-allocated string holding the resulting
1897  *               URI, or %NULL on an error.
1898  **/
1899 gchar *
g_filename_to_uri(const gchar * filename,const gchar * hostname,GError ** error)1900 g_filename_to_uri (const gchar *filename,
1901 		   const gchar *hostname,
1902 		   GError     **error)
1903 {
1904   char *escaped_uri;
1905 
1906   g_return_val_if_fail (filename != NULL, NULL);
1907 
1908   if (!g_path_is_absolute (filename))
1909     {
1910       g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
1911 		   _("The pathname '%s' is not an absolute path"),
1912 		   filename);
1913       return NULL;
1914     }
1915 
1916   if (hostname &&
1917       !(g_utf8_validate (hostname, -1, NULL)
1918 	&& hostname_validate (hostname)))
1919     {
1920       g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1921                            _("Invalid hostname"));
1922       return NULL;
1923     }
1924 
1925 #ifdef G_OS_WIN32
1926   /* Don't use localhost unnecessarily */
1927   if (hostname && g_ascii_strcasecmp (hostname, "localhost") == 0)
1928     hostname = NULL;
1929 #endif
1930 
1931   escaped_uri = g_escape_file_uri (hostname, filename);
1932 
1933   return escaped_uri;
1934 }
1935 
1936 #if defined (G_OS_WIN32) && !defined (_WIN64)
1937 
1938 #undef g_filename_to_uri
1939 
1940 gchar *
g_filename_to_uri(const gchar * filename,const gchar * hostname,GError ** error)1941 g_filename_to_uri (const gchar *filename,
1942 		   const gchar *hostname,
1943 		   GError     **error)
1944 {
1945   gchar *utf8_filename;
1946   gchar *retval = NULL;
1947 
1948   utf8_filename = g_locale_to_utf8 (filename, -1, NULL, NULL, error);
1949 
1950   if (utf8_filename)
1951     {
1952       retval = g_filename_to_uri_utf8 (utf8_filename, hostname, error);
1953       g_free (utf8_filename);
1954     }
1955 
1956   return retval;
1957 }
1958 
1959 #endif
1960 
1961 /**
1962  * g_uri_list_extract_uris:
1963  * @uri_list: an URI list
1964  *
1965  * Splits an URI list conforming to the text/uri-list
1966  * mime type defined in RFC 2483 into individual URIs,
1967  * discarding any comments. The URIs are not validated.
1968  *
1969  * Returns: a newly allocated %NULL-terminated list of
1970  *   strings holding the individual URIs. The array should
1971  *   be freed with g_strfreev().
1972  *
1973  * Since: 2.6
1974  */
1975 gchar **
g_uri_list_extract_uris(const gchar * uri_list)1976 g_uri_list_extract_uris (const gchar *uri_list)
1977 {
1978   GSList *uris, *u;
1979   const gchar *p, *q;
1980   gchar **result;
1981   gint n_uris = 0;
1982 
1983   uris = NULL;
1984 
1985   p = uri_list;
1986 
1987   /* We don't actually try to validate the URI according to RFC
1988    * 2396, or even check for allowed characters - we just ignore
1989    * comments and trim whitespace off the ends.  We also
1990    * allow LF delimination as well as the specified CRLF.
1991    *
1992    * We do allow comments like specified in RFC 2483.
1993    */
1994   while (p)
1995     {
1996       if (*p != '#')
1997 	{
1998 	  while (g_ascii_isspace (*p))
1999 	    p++;
2000 
2001 	  q = p;
2002 	  while (*q && (*q != '\n') && (*q != '\r'))
2003 	    q++;
2004 
2005 	  if (q > p)
2006 	    {
2007 	      q--;
2008 	      while (q > p && g_ascii_isspace (*q))
2009 		q--;
2010 
2011 	      if (q > p)
2012 		{
2013 		  uris = g_slist_prepend (uris, g_strndup (p, q - p + 1));
2014 		  n_uris++;
2015 		}
2016 	    }
2017 	}
2018       p = strchr (p, '\n');
2019       if (p)
2020 	p++;
2021     }
2022 
2023   result = g_new (gchar *, n_uris + 1);
2024 
2025   result[n_uris--] = NULL;
2026   for (u = uris; u; u = u->next)
2027     result[n_uris--] = u->data;
2028 
2029   g_slist_free (uris);
2030 
2031   return result;
2032 }
2033 
2034 /**
2035  * g_filename_display_basename:
2036  * @filename: an absolute pathname in the GLib file name encoding
2037  *
2038  * Returns the display basename for the particular filename, guaranteed
2039  * to be valid UTF-8. The display name might not be identical to the filename,
2040  * for instance there might be problems converting it to UTF-8, and some files
2041  * can be translated in the display.
2042  *
2043  * If GLib can not make sense of the encoding of @filename, as a last resort it
2044  * replaces unknown characters with U+FFFD, the Unicode replacement character.
2045  * You can search the result for the UTF-8 encoding of this character (which is
2046  * "\357\277\275" in octal notation) to find out if @filename was in an invalid
2047  * encoding.
2048  *
2049  * You must pass the whole absolute pathname to this functions so that
2050  * translation of well known locations can be done.
2051  *
2052  * This function is preferred over g_filename_display_name() if you know the
2053  * whole path, as it allows translation.
2054  *
2055  * Return value: a newly allocated string containing
2056  *   a rendition of the basename of the filename in valid UTF-8
2057  *
2058  * Since: 2.6
2059  **/
2060 gchar *
g_filename_display_basename(const gchar * filename)2061 g_filename_display_basename (const gchar *filename)
2062 {
2063   char *basename;
2064   char *display_name;
2065 
2066   g_return_val_if_fail (filename != NULL, NULL);
2067 
2068   basename = g_path_get_basename (filename);
2069   display_name = g_filename_display_name (basename);
2070   g_free (basename);
2071   return display_name;
2072 }
2073 
2074 /**
2075  * g_filename_display_name:
2076  * @filename: a pathname hopefully in the GLib file name encoding
2077  *
2078  * Converts a filename into a valid UTF-8 string. The conversion is
2079  * not necessarily reversible, so you should keep the original around
2080  * and use the return value of this function only for display purposes.
2081  * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
2082  * even if the filename actually isn't in the GLib file name encoding.
2083  *
2084  * If GLib can not make sense of the encoding of @filename, as a last resort it
2085  * replaces unknown characters with U+FFFD, the Unicode replacement character.
2086  * You can search the result for the UTF-8 encoding of this character (which is
2087  * "\357\277\275" in octal notation) to find out if @filename was in an invalid
2088  * encoding.
2089  *
2090  * If you know the whole pathname of the file you should use
2091  * g_filename_display_basename(), since that allows location-based
2092  * translation of filenames.
2093  *
2094  * Return value: a newly allocated string containing
2095  *   a rendition of the filename in valid UTF-8
2096  *
2097  * Since: 2.6
2098  **/
2099 gchar *
g_filename_display_name(const gchar * filename)2100 g_filename_display_name (const gchar *filename)
2101 {
2102   gint i;
2103   const gchar **charsets;
2104   gchar *display_name = NULL;
2105   gboolean is_utf8;
2106 
2107   is_utf8 = g_get_filename_charsets (&charsets);
2108 
2109   if (is_utf8)
2110     {
2111       if (g_utf8_validate (filename, -1, NULL))
2112 	display_name = g_strdup (filename);
2113     }
2114 
2115   if (!display_name)
2116     {
2117       /* Try to convert from the filename charsets to UTF-8.
2118        * Skip the first charset if it is UTF-8.
2119        */
2120       for (i = is_utf8 ? 1 : 0; charsets[i]; i++)
2121 	{
2122 	  display_name = g_convert (filename, -1, "UTF-8", charsets[i],
2123 				    NULL, NULL, NULL);
2124 
2125 	  if (display_name)
2126 	    break;
2127 	}
2128     }
2129 
2130   /* if all conversions failed, we replace invalid UTF-8
2131    * by a question mark
2132    */
2133   if (!display_name)
2134     display_name = _g_utf8_make_valid (filename);
2135 
2136   return display_name;
2137 }
2138 
2139 #define __G_CONVERT_C__
2140 #include "galiasdef.c"
2141