• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GIO - GLib Input, Output and Streaming Library
2  *
3  * Copyright (C) 2009 Red Hat, Inc.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General
16  * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Alexander Larsson <alexl@redhat.com>
19  */
20 
21 #include "config.h"
22 
23 #include "gcharsetconverter.h"
24 
25 #include <errno.h>
26 
27 #include "ginitable.h"
28 #include "gioerror.h"
29 #include "glibintl.h"
30 
31 
32 enum {
33   PROP_0,
34   PROP_FROM_CHARSET,
35   PROP_TO_CHARSET,
36   PROP_USE_FALLBACK
37 };
38 
39 /**
40  * SECTION:gcharsetconverter
41  * @short_description: Convert between charsets
42  * @include: gio/gio.h
43  *
44  * #GCharsetConverter is an implementation of #GConverter based on
45  * GIConv.
46  */
47 
48 static void g_charset_converter_iface_init          (GConverterIface *iface);
49 static void g_charset_converter_initable_iface_init (GInitableIface  *iface);
50 
51 /**
52  * GCharsetConverter:
53  *
54  * Conversions between character sets.
55  */
56 struct _GCharsetConverter
57 {
58   GObject parent_instance;
59 
60   char *from;
61   char *to;
62   GIConv iconv;
63   gboolean use_fallback;
64   guint n_fallback_errors;
65 };
66 
G_DEFINE_TYPE_WITH_CODE(GCharsetConverter,g_charset_converter,G_TYPE_OBJECT,G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,g_charset_converter_iface_init);G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,g_charset_converter_initable_iface_init))67 G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
68 			 G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
69 						g_charset_converter_iface_init);
70 			 G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
71 						g_charset_converter_initable_iface_init))
72 
73 static void
74 g_charset_converter_finalize (GObject *object)
75 {
76   GCharsetConverter *conv;
77 
78   conv = G_CHARSET_CONVERTER (object);
79 
80   g_free (conv->from);
81   g_free (conv->to);
82   if (conv->iconv)
83     g_iconv_close (conv->iconv);
84 
85   G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
86 }
87 
88 static void
g_charset_converter_set_property(GObject * object,guint prop_id,const GValue * value,GParamSpec * pspec)89 g_charset_converter_set_property (GObject      *object,
90 				  guint         prop_id,
91 				  const GValue *value,
92 				  GParamSpec   *pspec)
93 {
94   GCharsetConverter *conv;
95 
96   conv = G_CHARSET_CONVERTER (object);
97 
98   switch (prop_id)
99     {
100     case PROP_TO_CHARSET:
101       g_free (conv->to);
102       conv->to = g_value_dup_string (value);
103       break;
104 
105     case PROP_FROM_CHARSET:
106       g_free (conv->from);
107       conv->from = g_value_dup_string (value);
108       break;
109 
110     case PROP_USE_FALLBACK:
111       conv->use_fallback = g_value_get_boolean (value);
112       break;
113 
114     default:
115       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
116       break;
117     }
118 
119 }
120 
121 static void
g_charset_converter_get_property(GObject * object,guint prop_id,GValue * value,GParamSpec * pspec)122 g_charset_converter_get_property (GObject    *object,
123 				  guint       prop_id,
124 				  GValue     *value,
125 				  GParamSpec *pspec)
126 {
127   GCharsetConverter *conv;
128 
129   conv = G_CHARSET_CONVERTER (object);
130 
131   switch (prop_id)
132     {
133     case PROP_TO_CHARSET:
134       g_value_set_string (value, conv->to);
135       break;
136 
137     case PROP_FROM_CHARSET:
138       g_value_set_string (value, conv->from);
139       break;
140 
141     case PROP_USE_FALLBACK:
142       g_value_set_boolean (value, conv->use_fallback);
143       break;
144 
145     default:
146       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
147       break;
148     }
149 }
150 
151 static void
g_charset_converter_class_init(GCharsetConverterClass * klass)152 g_charset_converter_class_init (GCharsetConverterClass *klass)
153 {
154   GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
155 
156   gobject_class->finalize = g_charset_converter_finalize;
157   gobject_class->get_property = g_charset_converter_get_property;
158   gobject_class->set_property = g_charset_converter_set_property;
159 
160   g_object_class_install_property (gobject_class,
161 				   PROP_TO_CHARSET,
162 				   g_param_spec_string ("to-charset",
163 							P_("To Charset"),
164 							P_("The character encoding to convert to"),
165 							NULL,
166 							G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
167 							G_PARAM_STATIC_STRINGS));
168   g_object_class_install_property (gobject_class,
169 				   PROP_FROM_CHARSET,
170 				   g_param_spec_string ("from-charset",
171 							P_("From Charset"),
172 							P_("The character encoding to convert from"),
173 							NULL,
174 							G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
175 							G_PARAM_STATIC_STRINGS));
176   g_object_class_install_property (gobject_class,
177 				   PROP_USE_FALLBACK,
178 				   g_param_spec_boolean ("use-fallback",
179 							 P_("Fallback enabled"),
180 							 P_("Use fallback (of form \\<hexval>) for invalid bytes"),
181 							 FALSE,
182 							 G_PARAM_READWRITE |
183 							 G_PARAM_CONSTRUCT |
184 							 G_PARAM_STATIC_STRINGS));
185 }
186 
187 static void
g_charset_converter_init(GCharsetConverter * local)188 g_charset_converter_init (GCharsetConverter *local)
189 {
190 }
191 
192 
193 /**
194  * g_charset_converter_new:
195  * @to_charset: destination charset
196  * @from_charset: source charset
197  * @error: #GError for error reporting, or %NULL to ignore.
198  *
199  * Creates a new #GCharsetConverter.
200  *
201  * Returns: a new #GCharsetConverter or %NULL on error.
202  *
203  * Since: 2.24
204  **/
205 GCharsetConverter *
g_charset_converter_new(const gchar * to_charset,const gchar * from_charset,GError ** error)206 g_charset_converter_new (const gchar *to_charset,
207 			 const gchar *from_charset,
208 			 GError      **error)
209 {
210   GCharsetConverter *conv;
211 
212   conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
213 			 NULL, error,
214 			 "to-charset", to_charset,
215 			 "from-charset", from_charset,
216 			 NULL);
217 
218   return conv;
219 }
220 
221 static void
g_charset_converter_reset(GConverter * converter)222 g_charset_converter_reset (GConverter *converter)
223 {
224   GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
225 
226   if (conv->iconv == NULL)
227     {
228       g_warning ("Invalid object, not initialized");
229       return;
230     }
231 
232   g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
233   conv->n_fallback_errors = 0;
234 }
235 
236 static GConverterResult
g_charset_converter_convert(GConverter * converter,const void * inbuf,gsize inbuf_size,void * outbuf,gsize outbuf_size,GConverterFlags flags,gsize * bytes_read,gsize * bytes_written,GError ** error)237 g_charset_converter_convert (GConverter       *converter,
238 			     const void       *inbuf,
239 			     gsize             inbuf_size,
240 			     void             *outbuf,
241 			     gsize             outbuf_size,
242 			     GConverterFlags   flags,
243 			     gsize            *bytes_read,
244 			     gsize            *bytes_written,
245 			     GError          **error)
246 {
247   GCharsetConverter  *conv;
248   gsize res;
249   GConverterResult ret;
250   gchar *inbufp, *outbufp;
251   gsize in_left, out_left;
252   int errsv;
253   gboolean reset;
254 
255   conv = G_CHARSET_CONVERTER (converter);
256 
257   if (conv->iconv == NULL)
258     {
259       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
260 			   _("Invalid object, not initialized"));
261       return G_CONVERTER_ERROR;
262     }
263 
264   inbufp = (char *)inbuf;
265   outbufp = (char *)outbuf;
266   in_left = inbuf_size;
267   out_left = outbuf_size;
268   reset = FALSE;
269 
270   /* if there is not input try to flush the data */
271   if (inbuf_size == 0)
272     {
273       if (flags & G_CONVERTER_INPUT_AT_END ||
274           flags & G_CONVERTER_FLUSH)
275         {
276           reset = TRUE;
277         }
278       else
279         {
280           g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
281                                _("Incomplete multibyte sequence in input"));
282           return G_CONVERTER_ERROR;
283         }
284     }
285 
286   if (reset)
287     /* call g_iconv with NULL inbuf to cleanup shift state */
288     res = g_iconv (conv->iconv,
289                    NULL, &in_left,
290                    &outbufp, &out_left);
291   else
292     res = g_iconv (conv->iconv,
293                    &inbufp, &in_left,
294                    &outbufp, &out_left);
295 
296   *bytes_read = inbufp - (char *)inbuf;
297   *bytes_written = outbufp - (char *)outbuf;
298 
299   /* Don't report error if we converted anything */
300   if (res == (gsize) -1 && *bytes_read == 0)
301     {
302       errsv = errno;
303 
304       switch (errsv)
305 	{
306 	case EINVAL:
307 	  /* Incomplete input text */
308 	  g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
309 			       _("Incomplete multibyte sequence in input"));
310 	  break;
311 
312 	case E2BIG:
313 	  /* Not enough destination space */
314 	  g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
315 			       _("Not enough space in destination"));
316 	  break;
317 
318 	case EILSEQ:
319 	  /* Invalid code sequence */
320 	  if (conv->use_fallback)
321 	    {
322 	      if (outbuf_size < 3)
323 		g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
324 				     _("Not enough space in destination"));
325 	      else
326 		{
327 		  const char hex[] = "0123456789ABCDEF";
328 		  guint8 v = *(guint8 *)inbuf;
329 		  guint8 *out = (guint8 *)outbuf;
330 		  out[0] = '\\';
331 		  out[1] = hex[(v & 0xf0) >> 4];
332 		  out[2] = hex[(v & 0x0f) >> 0];
333 		  *bytes_read = 1;
334 		  *bytes_written = 3;
335 		  in_left--;
336 		  conv->n_fallback_errors++;
337 		  goto ok;
338 		}
339 	    }
340 	  else
341 	    g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
342 				 _("Invalid byte sequence in conversion input"));
343 	  break;
344 
345 	default:
346 	  g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
347 		       _("Error during conversion: %s"),
348 		       g_strerror (errsv));
349 	  break;
350 	}
351       ret = G_CONVERTER_ERROR;
352     }
353   else
354     {
355     ok:
356       ret = G_CONVERTER_CONVERTED;
357 
358       if (reset &&
359 	  (flags & G_CONVERTER_INPUT_AT_END))
360         ret = G_CONVERTER_FINISHED;
361       else if (reset &&
362 	       (flags & G_CONVERTER_FLUSH))
363         ret = G_CONVERTER_FLUSHED;
364     }
365 
366   return ret;
367 }
368 
369 /**
370  * g_charset_converter_set_use_fallback:
371  * @converter: a #GCharsetConverter
372  * @use_fallback: %TRUE to use fallbacks
373  *
374  * Sets the #GCharsetConverter:use-fallback property.
375  *
376  * Since: 2.24
377  */
378 void
g_charset_converter_set_use_fallback(GCharsetConverter * converter,gboolean use_fallback)379 g_charset_converter_set_use_fallback (GCharsetConverter *converter,
380 				      gboolean           use_fallback)
381 {
382   use_fallback = !!use_fallback;
383 
384   if (converter->use_fallback != use_fallback)
385     {
386       converter->use_fallback = use_fallback;
387       g_object_notify (G_OBJECT (converter), "use-fallback");
388     }
389 }
390 
391 /**
392  * g_charset_converter_get_use_fallback:
393  * @converter: a #GCharsetConverter
394  *
395  * Gets the #GCharsetConverter:use-fallback property.
396  *
397  * Returns: %TRUE if fallbacks are used by @converter
398  *
399  * Since: 2.24
400  */
401 gboolean
g_charset_converter_get_use_fallback(GCharsetConverter * converter)402 g_charset_converter_get_use_fallback (GCharsetConverter *converter)
403 {
404   return converter->use_fallback;
405 }
406 
407 /**
408  * g_charset_converter_get_num_fallbacks:
409  * @converter: a #GCharsetConverter
410  *
411  * Gets the number of fallbacks that @converter has applied so far.
412  *
413  * Returns: the number of fallbacks that @converter has applied
414  *
415  * Since: 2.24
416  */
417 guint
g_charset_converter_get_num_fallbacks(GCharsetConverter * converter)418 g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
419 {
420   return converter->n_fallback_errors;
421 }
422 
423 static void
g_charset_converter_iface_init(GConverterIface * iface)424 g_charset_converter_iface_init (GConverterIface *iface)
425 {
426   iface->convert = g_charset_converter_convert;
427   iface->reset = g_charset_converter_reset;
428 }
429 
430 static gboolean
g_charset_converter_initable_init(GInitable * initable,GCancellable * cancellable,GError ** error)431 g_charset_converter_initable_init (GInitable     *initable,
432 				   GCancellable  *cancellable,
433 				   GError       **error)
434 {
435   GCharsetConverter  *conv;
436   int errsv;
437 
438   g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
439 
440   conv = G_CHARSET_CONVERTER (initable);
441 
442   if (cancellable != NULL)
443     {
444       g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
445 			   _("Cancellable initialization not supported"));
446       return FALSE;
447     }
448 
449   conv->iconv = g_iconv_open (conv->to, conv->from);
450   errsv = errno;
451 
452   if (conv->iconv == (GIConv)-1)
453     {
454       if (errsv == EINVAL)
455 	g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
456 		     _("Conversion from character set “%s” to “%s” is not supported"),
457 		     conv->from, conv->to);
458       else
459 	g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
460 		     _("Could not open converter from “%s” to “%s”"),
461 		     conv->from, conv->to);
462       return FALSE;
463     }
464 
465   return TRUE;
466 }
467 
468 static void
g_charset_converter_initable_iface_init(GInitableIface * iface)469 g_charset_converter_initable_iface_init (GInitableIface *iface)
470 {
471   iface->init = g_charset_converter_initable_init;
472 }
473