1 /* GIO - GLib Input, Output and Streaming Library
2 *
3 * Copyright (C) 2009 Red Hat, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General
16 * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 *
18 * Author: Alexander Larsson <alexl@redhat.com>
19 */
20
21 #include "config.h"
22
23 #include "gcharsetconverter.h"
24
25 #include <errno.h>
26
27 #include "ginitable.h"
28 #include "gioerror.h"
29 #include "glibintl.h"
30
31
32 enum {
33 PROP_0,
34 PROP_FROM_CHARSET,
35 PROP_TO_CHARSET,
36 PROP_USE_FALLBACK
37 };
38
39 /**
40 * SECTION:gcharsetconverter
41 * @short_description: Convert between charsets
42 * @include: gio/gio.h
43 *
44 * #GCharsetConverter is an implementation of #GConverter based on
45 * GIConv.
46 */
47
48 static void g_charset_converter_iface_init (GConverterIface *iface);
49 static void g_charset_converter_initable_iface_init (GInitableIface *iface);
50
51 /**
52 * GCharsetConverter:
53 *
54 * Conversions between character sets.
55 */
56 struct _GCharsetConverter
57 {
58 GObject parent_instance;
59
60 char *from;
61 char *to;
62 GIConv iconv;
63 gboolean use_fallback;
64 guint n_fallback_errors;
65 };
66
G_DEFINE_TYPE_WITH_CODE(GCharsetConverter,g_charset_converter,G_TYPE_OBJECT,G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,g_charset_converter_iface_init);G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,g_charset_converter_initable_iface_init))67 G_DEFINE_TYPE_WITH_CODE (GCharsetConverter, g_charset_converter, G_TYPE_OBJECT,
68 G_IMPLEMENT_INTERFACE (G_TYPE_CONVERTER,
69 g_charset_converter_iface_init);
70 G_IMPLEMENT_INTERFACE (G_TYPE_INITABLE,
71 g_charset_converter_initable_iface_init))
72
73 static void
74 g_charset_converter_finalize (GObject *object)
75 {
76 GCharsetConverter *conv;
77
78 conv = G_CHARSET_CONVERTER (object);
79
80 g_free (conv->from);
81 g_free (conv->to);
82 if (conv->iconv)
83 g_iconv_close (conv->iconv);
84
85 G_OBJECT_CLASS (g_charset_converter_parent_class)->finalize (object);
86 }
87
88 static void
g_charset_converter_set_property(GObject * object,guint prop_id,const GValue * value,GParamSpec * pspec)89 g_charset_converter_set_property (GObject *object,
90 guint prop_id,
91 const GValue *value,
92 GParamSpec *pspec)
93 {
94 GCharsetConverter *conv;
95
96 conv = G_CHARSET_CONVERTER (object);
97
98 switch (prop_id)
99 {
100 case PROP_TO_CHARSET:
101 g_free (conv->to);
102 conv->to = g_value_dup_string (value);
103 break;
104
105 case PROP_FROM_CHARSET:
106 g_free (conv->from);
107 conv->from = g_value_dup_string (value);
108 break;
109
110 case PROP_USE_FALLBACK:
111 conv->use_fallback = g_value_get_boolean (value);
112 break;
113
114 default:
115 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
116 break;
117 }
118
119 }
120
121 static void
g_charset_converter_get_property(GObject * object,guint prop_id,GValue * value,GParamSpec * pspec)122 g_charset_converter_get_property (GObject *object,
123 guint prop_id,
124 GValue *value,
125 GParamSpec *pspec)
126 {
127 GCharsetConverter *conv;
128
129 conv = G_CHARSET_CONVERTER (object);
130
131 switch (prop_id)
132 {
133 case PROP_TO_CHARSET:
134 g_value_set_string (value, conv->to);
135 break;
136
137 case PROP_FROM_CHARSET:
138 g_value_set_string (value, conv->from);
139 break;
140
141 case PROP_USE_FALLBACK:
142 g_value_set_boolean (value, conv->use_fallback);
143 break;
144
145 default:
146 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
147 break;
148 }
149 }
150
151 static void
g_charset_converter_class_init(GCharsetConverterClass * klass)152 g_charset_converter_class_init (GCharsetConverterClass *klass)
153 {
154 GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
155
156 gobject_class->finalize = g_charset_converter_finalize;
157 gobject_class->get_property = g_charset_converter_get_property;
158 gobject_class->set_property = g_charset_converter_set_property;
159
160 g_object_class_install_property (gobject_class,
161 PROP_TO_CHARSET,
162 g_param_spec_string ("to-charset",
163 P_("To Charset"),
164 P_("The character encoding to convert to"),
165 NULL,
166 G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
167 G_PARAM_STATIC_STRINGS));
168 g_object_class_install_property (gobject_class,
169 PROP_FROM_CHARSET,
170 g_param_spec_string ("from-charset",
171 P_("From Charset"),
172 P_("The character encoding to convert from"),
173 NULL,
174 G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
175 G_PARAM_STATIC_STRINGS));
176 g_object_class_install_property (gobject_class,
177 PROP_USE_FALLBACK,
178 g_param_spec_boolean ("use-fallback",
179 P_("Fallback enabled"),
180 P_("Use fallback (of form \\<hexval>) for invalid bytes"),
181 FALSE,
182 G_PARAM_READWRITE |
183 G_PARAM_CONSTRUCT |
184 G_PARAM_STATIC_STRINGS));
185 }
186
187 static void
g_charset_converter_init(GCharsetConverter * local)188 g_charset_converter_init (GCharsetConverter *local)
189 {
190 }
191
192
193 /**
194 * g_charset_converter_new:
195 * @to_charset: destination charset
196 * @from_charset: source charset
197 * @error: #GError for error reporting, or %NULL to ignore.
198 *
199 * Creates a new #GCharsetConverter.
200 *
201 * Returns: a new #GCharsetConverter or %NULL on error.
202 *
203 * Since: 2.24
204 **/
205 GCharsetConverter *
g_charset_converter_new(const gchar * to_charset,const gchar * from_charset,GError ** error)206 g_charset_converter_new (const gchar *to_charset,
207 const gchar *from_charset,
208 GError **error)
209 {
210 GCharsetConverter *conv;
211
212 conv = g_initable_new (G_TYPE_CHARSET_CONVERTER,
213 NULL, error,
214 "to-charset", to_charset,
215 "from-charset", from_charset,
216 NULL);
217
218 return conv;
219 }
220
221 static void
g_charset_converter_reset(GConverter * converter)222 g_charset_converter_reset (GConverter *converter)
223 {
224 GCharsetConverter *conv = G_CHARSET_CONVERTER (converter);
225
226 if (conv->iconv == NULL)
227 {
228 g_warning ("Invalid object, not initialized");
229 return;
230 }
231
232 g_iconv (conv->iconv, NULL, NULL, NULL, NULL);
233 conv->n_fallback_errors = 0;
234 }
235
236 static GConverterResult
g_charset_converter_convert(GConverter * converter,const void * inbuf,gsize inbuf_size,void * outbuf,gsize outbuf_size,GConverterFlags flags,gsize * bytes_read,gsize * bytes_written,GError ** error)237 g_charset_converter_convert (GConverter *converter,
238 const void *inbuf,
239 gsize inbuf_size,
240 void *outbuf,
241 gsize outbuf_size,
242 GConverterFlags flags,
243 gsize *bytes_read,
244 gsize *bytes_written,
245 GError **error)
246 {
247 GCharsetConverter *conv;
248 gsize res;
249 GConverterResult ret;
250 gchar *inbufp, *outbufp;
251 gsize in_left, out_left;
252 int errsv;
253 gboolean reset;
254
255 conv = G_CHARSET_CONVERTER (converter);
256
257 if (conv->iconv == NULL)
258 {
259 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_INITIALIZED,
260 _("Invalid object, not initialized"));
261 return G_CONVERTER_ERROR;
262 }
263
264 inbufp = (char *)inbuf;
265 outbufp = (char *)outbuf;
266 in_left = inbuf_size;
267 out_left = outbuf_size;
268 reset = FALSE;
269
270 /* if there is not input try to flush the data */
271 if (inbuf_size == 0)
272 {
273 if (flags & G_CONVERTER_INPUT_AT_END ||
274 flags & G_CONVERTER_FLUSH)
275 {
276 reset = TRUE;
277 }
278 else
279 {
280 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
281 _("Incomplete multibyte sequence in input"));
282 return G_CONVERTER_ERROR;
283 }
284 }
285
286 if (reset)
287 /* call g_iconv with NULL inbuf to cleanup shift state */
288 res = g_iconv (conv->iconv,
289 NULL, &in_left,
290 &outbufp, &out_left);
291 else
292 res = g_iconv (conv->iconv,
293 &inbufp, &in_left,
294 &outbufp, &out_left);
295
296 *bytes_read = inbufp - (char *)inbuf;
297 *bytes_written = outbufp - (char *)outbuf;
298
299 /* Don't report error if we converted anything */
300 if (res == (gsize) -1 && *bytes_read == 0)
301 {
302 errsv = errno;
303
304 switch (errsv)
305 {
306 case EINVAL:
307 /* Incomplete input text */
308 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT,
309 _("Incomplete multibyte sequence in input"));
310 break;
311
312 case E2BIG:
313 /* Not enough destination space */
314 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
315 _("Not enough space in destination"));
316 break;
317
318 case EILSEQ:
319 /* Invalid code sequence */
320 if (conv->use_fallback)
321 {
322 if (outbuf_size < 3)
323 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NO_SPACE,
324 _("Not enough space in destination"));
325 else
326 {
327 const char hex[] = "0123456789ABCDEF";
328 guint8 v = *(guint8 *)inbuf;
329 guint8 *out = (guint8 *)outbuf;
330 out[0] = '\\';
331 out[1] = hex[(v & 0xf0) >> 4];
332 out[2] = hex[(v & 0x0f) >> 0];
333 *bytes_read = 1;
334 *bytes_written = 3;
335 in_left--;
336 conv->n_fallback_errors++;
337 goto ok;
338 }
339 }
340 else
341 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA,
342 _("Invalid byte sequence in conversion input"));
343 break;
344
345 default:
346 g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
347 _("Error during conversion: %s"),
348 g_strerror (errsv));
349 break;
350 }
351 ret = G_CONVERTER_ERROR;
352 }
353 else
354 {
355 ok:
356 ret = G_CONVERTER_CONVERTED;
357
358 if (reset &&
359 (flags & G_CONVERTER_INPUT_AT_END))
360 ret = G_CONVERTER_FINISHED;
361 else if (reset &&
362 (flags & G_CONVERTER_FLUSH))
363 ret = G_CONVERTER_FLUSHED;
364 }
365
366 return ret;
367 }
368
369 /**
370 * g_charset_converter_set_use_fallback:
371 * @converter: a #GCharsetConverter
372 * @use_fallback: %TRUE to use fallbacks
373 *
374 * Sets the #GCharsetConverter:use-fallback property.
375 *
376 * Since: 2.24
377 */
378 void
g_charset_converter_set_use_fallback(GCharsetConverter * converter,gboolean use_fallback)379 g_charset_converter_set_use_fallback (GCharsetConverter *converter,
380 gboolean use_fallback)
381 {
382 use_fallback = !!use_fallback;
383
384 if (converter->use_fallback != use_fallback)
385 {
386 converter->use_fallback = use_fallback;
387 g_object_notify (G_OBJECT (converter), "use-fallback");
388 }
389 }
390
391 /**
392 * g_charset_converter_get_use_fallback:
393 * @converter: a #GCharsetConverter
394 *
395 * Gets the #GCharsetConverter:use-fallback property.
396 *
397 * Returns: %TRUE if fallbacks are used by @converter
398 *
399 * Since: 2.24
400 */
401 gboolean
g_charset_converter_get_use_fallback(GCharsetConverter * converter)402 g_charset_converter_get_use_fallback (GCharsetConverter *converter)
403 {
404 return converter->use_fallback;
405 }
406
407 /**
408 * g_charset_converter_get_num_fallbacks:
409 * @converter: a #GCharsetConverter
410 *
411 * Gets the number of fallbacks that @converter has applied so far.
412 *
413 * Returns: the number of fallbacks that @converter has applied
414 *
415 * Since: 2.24
416 */
417 guint
g_charset_converter_get_num_fallbacks(GCharsetConverter * converter)418 g_charset_converter_get_num_fallbacks (GCharsetConverter *converter)
419 {
420 return converter->n_fallback_errors;
421 }
422
423 static void
g_charset_converter_iface_init(GConverterIface * iface)424 g_charset_converter_iface_init (GConverterIface *iface)
425 {
426 iface->convert = g_charset_converter_convert;
427 iface->reset = g_charset_converter_reset;
428 }
429
430 static gboolean
g_charset_converter_initable_init(GInitable * initable,GCancellable * cancellable,GError ** error)431 g_charset_converter_initable_init (GInitable *initable,
432 GCancellable *cancellable,
433 GError **error)
434 {
435 GCharsetConverter *conv;
436 int errsv;
437
438 g_return_val_if_fail (G_IS_CHARSET_CONVERTER (initable), FALSE);
439
440 conv = G_CHARSET_CONVERTER (initable);
441
442 if (cancellable != NULL)
443 {
444 g_set_error_literal (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
445 _("Cancellable initialization not supported"));
446 return FALSE;
447 }
448
449 conv->iconv = g_iconv_open (conv->to, conv->from);
450 errsv = errno;
451
452 if (conv->iconv == (GIConv)-1)
453 {
454 if (errsv == EINVAL)
455 g_set_error (error, G_IO_ERROR, G_IO_ERROR_NOT_SUPPORTED,
456 _("Conversion from character set “%s” to “%s” is not supported"),
457 conv->from, conv->to);
458 else
459 g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
460 _("Could not open converter from “%s” to “%s”"),
461 conv->from, conv->to);
462 return FALSE;
463 }
464
465 return TRUE;
466 }
467
468 static void
g_charset_converter_initable_iface_init(GInitableIface * iface)469 g_charset_converter_initable_iface_init (GInitableIface *iface)
470 {
471 iface->init = g_charset_converter_initable_init;
472 }
473