1 /* Message list charset and locale charset handling.
2 Copyright (C) 2001-2003, 2005-2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 #include <alloca.h>
23
24 /* Specification. */
25 #include "msgl-iconv.h"
26
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #if HAVE_ICONV
32 # include <iconv.h>
33 #endif
34
35 #include "noreturn.h"
36 #include "progname.h"
37 #include "basename-lgpl.h"
38 #include "message.h"
39 #include "po-charset.h"
40 #include "xstriconv.h"
41 #include "xstriconveh.h"
42 #include "msgl-ascii.h"
43 #include "xalloc.h"
44 #include "xmalloca.h"
45 #include "c-strstr.h"
46 #include "xvasprintf.h"
47 #include "po-xerror.h"
48 #include "gettext.h"
49
50 #define _(str) gettext (str)
51
52
53 #if HAVE_ICONV
54
55 _GL_NORETURN_FUNC static void conversion_error (const struct conversion_context* context);
56 static void
conversion_error(const struct conversion_context * context)57 conversion_error (const struct conversion_context* context)
58 {
59 if (context->to_code == po_charset_utf8)
60 /* If a conversion to UTF-8 fails, the problem lies in the input. */
61 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
62 xasprintf (_("%s: input is not valid in \"%s\" encoding"),
63 context->from_filename, context->from_code));
64 else
65 po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
66 xasprintf (_("%s: error while converting from \"%s\" encoding to \"%s\" encoding"),
67 context->from_filename, context->from_code,
68 context->to_code));
69 /* NOTREACHED */
70 abort ();
71 }
72
73 char *
convert_string_directly(iconv_t cd,const char * string,const struct conversion_context * context)74 convert_string_directly (iconv_t cd, const char *string,
75 const struct conversion_context* context)
76 {
77 size_t len = strlen (string) + 1;
78 char *result = NULL;
79 size_t resultlen = 0;
80
81 if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
82 /* Verify the result has exactly one NUL byte, at the end. */
83 if (resultlen > 0 && result[resultlen - 1] == '\0'
84 && strlen (result) == resultlen - 1)
85 return result;
86
87 conversion_error (context);
88 /* NOTREACHED */
89 return NULL;
90 }
91
92 static char *
convert_string(const iconveh_t * cd,const char * string,const struct conversion_context * context)93 convert_string (const iconveh_t *cd, const char *string,
94 const struct conversion_context* context)
95 {
96 size_t len = strlen (string) + 1;
97 char *result = NULL;
98 size_t resultlen = 0;
99
100 if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
101 &result, &resultlen) == 0)
102 /* Verify the result has exactly one NUL byte, at the end. */
103 if (resultlen > 0 && result[resultlen - 1] == '\0'
104 && strlen (result) == resultlen - 1)
105 return result;
106
107 conversion_error (context);
108 /* NOTREACHED */
109 return NULL;
110 }
111
112 static void
convert_string_list(const iconveh_t * cd,string_list_ty * slp,const struct conversion_context * context)113 convert_string_list (const iconveh_t *cd, string_list_ty *slp,
114 const struct conversion_context* context)
115 {
116 size_t i;
117
118 if (slp != NULL)
119 for (i = 0; i < slp->nitems; i++)
120 slp->item[i] = convert_string (cd, slp->item[i], context);
121 }
122
123 static void
convert_prev_msgid(const iconveh_t * cd,message_ty * mp,const struct conversion_context * context)124 convert_prev_msgid (const iconveh_t *cd, message_ty *mp,
125 const struct conversion_context* context)
126 {
127 if (mp->prev_msgctxt != NULL)
128 mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
129 if (mp->prev_msgid != NULL)
130 mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
131 if (mp->prev_msgid_plural != NULL)
132 mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
133 }
134
135 static void
convert_msgid(const iconveh_t * cd,message_ty * mp,const struct conversion_context * context)136 convert_msgid (const iconveh_t *cd, message_ty *mp,
137 const struct conversion_context* context)
138 {
139 if (mp->msgctxt != NULL)
140 mp->msgctxt = convert_string (cd, mp->msgctxt, context);
141 mp->msgid = convert_string (cd, mp->msgid, context);
142 if (mp->msgid_plural != NULL)
143 mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
144 }
145
146 static void
convert_msgstr(const iconveh_t * cd,message_ty * mp,const struct conversion_context * context)147 convert_msgstr (const iconveh_t *cd, message_ty *mp,
148 const struct conversion_context* context)
149 {
150 char *result = NULL;
151 size_t resultlen = 0;
152
153 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
154 abort ();
155
156 if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
157 &result, &resultlen) == 0)
158 /* Verify the result has a NUL byte at the end. */
159 if (resultlen > 0 && result[resultlen - 1] == '\0')
160 /* Verify the result has the same number of NUL bytes. */
161 {
162 const char *p;
163 const char *pend;
164 int nulcount1;
165 int nulcount2;
166
167 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
168 p < pend;
169 p += strlen (p) + 1, nulcount1++);
170 for (p = result, pend = p + resultlen, nulcount2 = 0;
171 p < pend;
172 p += strlen (p) + 1, nulcount2++);
173
174 if (nulcount1 == nulcount2)
175 {
176 mp->msgstr = result;
177 mp->msgstr_len = resultlen;
178 return;
179 }
180 }
181
182 conversion_error (context);
183 }
184
185 #endif
186
187
188 static bool
iconv_message_list_internal(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code,bool update_header,const char * from_filename)189 iconv_message_list_internal (message_list_ty *mlp,
190 const char *canon_from_code,
191 const char *canon_to_code,
192 bool update_header,
193 const char *from_filename)
194 {
195 bool canon_from_code_overridden = (canon_from_code != NULL);
196 bool msgids_changed;
197 size_t j;
198
199 /* If the list is empty, nothing to do. */
200 if (mlp->nitems == 0)
201 return false;
202
203 /* Search the header entry, and extract and replace the charset name. */
204 for (j = 0; j < mlp->nitems; j++)
205 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
206 {
207 const char *header = mlp->item[j]->msgstr;
208
209 if (header != NULL)
210 {
211 const char *charsetstr = c_strstr (header, "charset=");
212
213 if (charsetstr != NULL)
214 {
215 size_t len;
216 char *charset;
217 const char *canon_charset;
218
219 charsetstr += strlen ("charset=");
220 len = strcspn (charsetstr, " \t\n");
221 charset = (char *) xmalloca (len + 1);
222 memcpy (charset, charsetstr, len);
223 charset[len] = '\0';
224
225 canon_charset = po_charset_canonicalize (charset);
226 if (canon_charset == NULL)
227 {
228 if (!canon_from_code_overridden)
229 {
230 /* Don't give an error for POT files, because POT
231 files usually contain only ASCII msgids. */
232 const char *filename = from_filename;
233 size_t filenamelen;
234
235 if (filename != NULL
236 && (filenamelen = strlen (filename)) >= 4
237 && memcmp (filename + filenamelen - 4, ".pot", 4)
238 == 0
239 && strcmp (charset, "CHARSET") == 0)
240 canon_charset = po_charset_ascii;
241 else
242 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
243 false,
244 xasprintf (_("present charset \"%s\" is not a portable encoding name"),
245 charset));
246 }
247 }
248 else
249 {
250 if (canon_from_code == NULL)
251 canon_from_code = canon_charset;
252 else if (canon_from_code != canon_charset)
253 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
254 false,
255 xasprintf (_("two different charsets \"%s\" and \"%s\" in input file"),
256 canon_from_code, canon_charset));
257 }
258 freea (charset);
259
260 if (update_header)
261 {
262 size_t len1, len2, len3;
263 char *new_header;
264
265 len1 = charsetstr - header;
266 len2 = strlen (canon_to_code);
267 len3 = (header + strlen (header)) - (charsetstr + len);
268 new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
269 memcpy (new_header, header, len1);
270 memcpy (new_header + len1, canon_to_code, len2);
271 memcpy (new_header + len1 + len2, charsetstr + len,
272 len3 + 1);
273 mlp->item[j]->msgstr = new_header;
274 mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
275 }
276 }
277 }
278 }
279 if (canon_from_code == NULL)
280 {
281 if (is_ascii_message_list (mlp))
282 canon_from_code = po_charset_ascii;
283 else
284 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
285 _("input file doesn't contain a header entry with a charset specification"));
286 }
287
288 msgids_changed = false;
289
290 /* If the two encodings are the same, nothing to do. */
291 if (canon_from_code != canon_to_code)
292 {
293 #if HAVE_ICONV
294 iconveh_t cd;
295 struct conversion_context context;
296
297 if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
298 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
299 xasprintf (_("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
300 canon_from_code, canon_to_code,
301 last_component (program_name)));
302
303 context.from_code = canon_from_code;
304 context.to_code = canon_to_code;
305 context.from_filename = from_filename;
306
307 for (j = 0; j < mlp->nitems; j++)
308 {
309 message_ty *mp = mlp->item[j];
310
311 if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
312 || !is_ascii_string (mp->msgid))
313 msgids_changed = true;
314 context.message = mp;
315 convert_string_list (&cd, mp->comment, &context);
316 convert_string_list (&cd, mp->comment_dot, &context);
317 convert_prev_msgid (&cd, mp, &context);
318 convert_msgid (&cd, mp, &context);
319 convert_msgstr (&cd, mp, &context);
320 }
321
322 iconveh_close (&cd);
323
324 if (msgids_changed)
325 if (message_list_msgids_changed (mlp))
326 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
327 xasprintf (_("Conversion from \"%s\" to \"%s\" introduces duplicates: some different msgids become equal."),
328 canon_from_code, canon_to_code));
329 #else
330 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
331 xasprintf (_("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). This version was built without iconv()."),
332 canon_from_code, canon_to_code,
333 last_component (program_name)));
334 #endif
335 }
336
337 return msgids_changed;
338 }
339
340 bool
iconv_message_list(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code,const char * from_filename)341 iconv_message_list (message_list_ty *mlp,
342 const char *canon_from_code, const char *canon_to_code,
343 const char *from_filename)
344 {
345 return iconv_message_list_internal (mlp,
346 canon_from_code, canon_to_code, true,
347 from_filename);
348 }
349
350 msgdomain_list_ty *
iconv_msgdomain_list(msgdomain_list_ty * mdlp,const char * to_code,bool update_header,const char * from_filename)351 iconv_msgdomain_list (msgdomain_list_ty *mdlp,
352 const char *to_code,
353 bool update_header,
354 const char *from_filename)
355 {
356 const char *canon_to_code;
357 size_t k;
358
359 /* Canonicalize target encoding. */
360 canon_to_code = po_charset_canonicalize (to_code);
361 if (canon_to_code == NULL)
362 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
363 xasprintf (_("target charset \"%s\" is not a portable encoding name."),
364 to_code));
365
366 for (k = 0; k < mdlp->nitems; k++)
367 iconv_message_list_internal (mdlp->item[k]->messages,
368 mdlp->encoding, canon_to_code, update_header,
369 from_filename);
370
371 mdlp->encoding = canon_to_code;
372 return mdlp;
373 }
374
375 #if HAVE_ICONV
376
377 static bool
iconvable_string(const iconveh_t * cd,const char * string)378 iconvable_string (const iconveh_t *cd, const char *string)
379 {
380 size_t len = strlen (string) + 1;
381 char *result = NULL;
382 size_t resultlen = 0;
383
384 if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
385 &result, &resultlen) == 0)
386 {
387 /* Test if the result has exactly one NUL byte, at the end. */
388 bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
389 && strlen (result) == resultlen - 1);
390 free (result);
391 return ok;
392 }
393 return false;
394 }
395
396 static bool
iconvable_string_list(const iconveh_t * cd,string_list_ty * slp)397 iconvable_string_list (const iconveh_t *cd, string_list_ty *slp)
398 {
399 size_t i;
400
401 if (slp != NULL)
402 for (i = 0; i < slp->nitems; i++)
403 if (!iconvable_string (cd, slp->item[i]))
404 return false;
405 return true;
406 }
407
408 static bool
iconvable_prev_msgid(const iconveh_t * cd,message_ty * mp)409 iconvable_prev_msgid (const iconveh_t *cd, message_ty *mp)
410 {
411 if (mp->prev_msgctxt != NULL)
412 if (!iconvable_string (cd, mp->prev_msgctxt))
413 return false;
414 if (mp->prev_msgid != NULL)
415 if (!iconvable_string (cd, mp->prev_msgid))
416 return false;
417 if (mp->prev_msgid_plural != NULL)
418 if (!iconvable_string (cd, mp->prev_msgid_plural))
419 return false;
420 return true;
421 }
422
423 static bool
iconvable_msgid(const iconveh_t * cd,message_ty * mp)424 iconvable_msgid (const iconveh_t *cd, message_ty *mp)
425 {
426 if (mp->msgctxt != NULL)
427 if (!iconvable_string (cd, mp->msgctxt))
428 return false;
429 if (!iconvable_string (cd, mp->msgid))
430 return false;
431 if (mp->msgid_plural != NULL)
432 if (!iconvable_string (cd, mp->msgid_plural))
433 return false;
434 return true;
435 }
436
437 static bool
iconvable_msgstr(const iconveh_t * cd,message_ty * mp)438 iconvable_msgstr (const iconveh_t *cd, message_ty *mp)
439 {
440 char *result = NULL;
441 size_t resultlen = 0;
442
443 if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
444 abort ();
445
446 if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
447 &result, &resultlen) == 0)
448 {
449 bool ok = false;
450
451 /* Test if the result has a NUL byte at the end. */
452 if (resultlen > 0 && result[resultlen - 1] == '\0')
453 /* Test if the result has the same number of NUL bytes. */
454 {
455 const char *p;
456 const char *pend;
457 int nulcount1;
458 int nulcount2;
459
460 for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
461 p < pend;
462 p += strlen (p) + 1, nulcount1++);
463 for (p = result, pend = p + resultlen, nulcount2 = 0;
464 p < pend;
465 p += strlen (p) + 1, nulcount2++);
466
467 if (nulcount1 == nulcount2)
468 ok = true;
469 }
470
471 free (result);
472 return ok;
473 }
474 return false;
475 }
476
477 #endif
478
479 bool
is_message_list_iconvable(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code)480 is_message_list_iconvable (message_list_ty *mlp,
481 const char *canon_from_code,
482 const char *canon_to_code)
483 {
484 bool canon_from_code_overridden = (canon_from_code != NULL);
485 size_t j;
486
487 /* If the list is empty, nothing to check. */
488 if (mlp->nitems == 0)
489 return true;
490
491 /* Search the header entry, and extract the charset name. */
492 for (j = 0; j < mlp->nitems; j++)
493 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
494 {
495 const char *header = mlp->item[j]->msgstr;
496
497 if (header != NULL)
498 {
499 const char *charsetstr = c_strstr (header, "charset=");
500
501 if (charsetstr != NULL)
502 {
503 size_t len;
504 char *charset;
505 const char *canon_charset;
506
507 charsetstr += strlen ("charset=");
508 len = strcspn (charsetstr, " \t\n");
509 charset = (char *) xmalloca (len + 1);
510 memcpy (charset, charsetstr, len);
511 charset[len] = '\0';
512
513 canon_charset = po_charset_canonicalize (charset);
514 if (canon_charset == NULL)
515 {
516 if (!canon_from_code_overridden)
517 {
518 /* Don't give an error for POT files, because POT
519 files usually contain only ASCII msgids. */
520 if (strcmp (charset, "CHARSET") == 0)
521 canon_charset = po_charset_ascii;
522 else
523 {
524 /* charset is not a portable encoding name. */
525 freea (charset);
526 return false;
527 }
528 }
529 }
530 else
531 {
532 if (canon_from_code == NULL)
533 canon_from_code = canon_charset;
534 else if (canon_from_code != canon_charset)
535 {
536 /* Two different charsets in input file. */
537 freea (charset);
538 return false;
539 }
540 }
541 freea (charset);
542 }
543 }
544 }
545 if (canon_from_code == NULL)
546 {
547 if (is_ascii_message_list (mlp))
548 canon_from_code = po_charset_ascii;
549 else
550 /* Input file lacks a header entry with a charset specification. */
551 return false;
552 }
553
554 /* If the two encodings are the same, nothing to check. */
555 if (canon_from_code != canon_to_code)
556 {
557 #if HAVE_ICONV
558 iconveh_t cd;
559
560 if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
561 /* iconv() doesn't support this conversion. */
562 return false;
563
564 for (j = 0; j < mlp->nitems; j++)
565 {
566 message_ty *mp = mlp->item[j];
567
568 if (!(iconvable_string_list (&cd, mp->comment)
569 && iconvable_string_list (&cd, mp->comment_dot)
570 && iconvable_prev_msgid (&cd, mp)
571 && iconvable_msgid (&cd, mp)
572 && iconvable_msgstr (&cd, mp)))
573 return false;
574 }
575
576 iconveh_close (&cd);
577 #else
578 /* This version was built without iconv(). */
579 return false;
580 #endif
581 }
582
583 return true;
584 }
585