• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Message list charset and locale charset handling.
2    Copyright (C) 2001-2003, 2005-2009, 2019-2020 Free Software Foundation, Inc.
3    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 #include <alloca.h>
23 
24 /* Specification.  */
25 #include "msgl-iconv.h"
26 
27 #include <stdbool.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #if HAVE_ICONV
32 # include <iconv.h>
33 #endif
34 
35 #include "noreturn.h"
36 #include "progname.h"
37 #include "basename-lgpl.h"
38 #include "message.h"
39 #include "po-charset.h"
40 #include "xstriconv.h"
41 #include "xstriconveh.h"
42 #include "msgl-ascii.h"
43 #include "xalloc.h"
44 #include "xmalloca.h"
45 #include "c-strstr.h"
46 #include "xvasprintf.h"
47 #include "po-xerror.h"
48 #include "gettext.h"
49 
50 #define _(str) gettext (str)
51 
52 
53 #if HAVE_ICONV
54 
55 _GL_NORETURN_FUNC static void conversion_error (const struct conversion_context* context);
56 static void
conversion_error(const struct conversion_context * context)57 conversion_error (const struct conversion_context* context)
58 {
59   if (context->to_code == po_charset_utf8)
60     /* If a conversion to UTF-8 fails, the problem lies in the input.  */
61     po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
62                xasprintf (_("%s: input is not valid in \"%s\" encoding"),
63                           context->from_filename, context->from_code));
64   else
65     po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
66                xasprintf (_("%s: error while converting from \"%s\" encoding to \"%s\" encoding"),
67                           context->from_filename, context->from_code,
68                           context->to_code));
69   /* NOTREACHED */
70   abort ();
71 }
72 
73 char *
convert_string_directly(iconv_t cd,const char * string,const struct conversion_context * context)74 convert_string_directly (iconv_t cd, const char *string,
75                          const struct conversion_context* context)
76 {
77   size_t len = strlen (string) + 1;
78   char *result = NULL;
79   size_t resultlen = 0;
80 
81   if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
82     /* Verify the result has exactly one NUL byte, at the end.  */
83     if (resultlen > 0 && result[resultlen - 1] == '\0'
84         && strlen (result) == resultlen - 1)
85       return result;
86 
87   conversion_error (context);
88   /* NOTREACHED */
89   return NULL;
90 }
91 
92 static char *
convert_string(const iconveh_t * cd,const char * string,const struct conversion_context * context)93 convert_string (const iconveh_t *cd, const char *string,
94                 const struct conversion_context* context)
95 {
96   size_t len = strlen (string) + 1;
97   char *result = NULL;
98   size_t resultlen = 0;
99 
100   if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
101                        &result, &resultlen) == 0)
102     /* Verify the result has exactly one NUL byte, at the end.  */
103     if (resultlen > 0 && result[resultlen - 1] == '\0'
104         && strlen (result) == resultlen - 1)
105       return result;
106 
107   conversion_error (context);
108   /* NOTREACHED */
109   return NULL;
110 }
111 
112 static void
convert_string_list(const iconveh_t * cd,string_list_ty * slp,const struct conversion_context * context)113 convert_string_list (const iconveh_t *cd, string_list_ty *slp,
114                      const struct conversion_context* context)
115 {
116   size_t i;
117 
118   if (slp != NULL)
119     for (i = 0; i < slp->nitems; i++)
120       slp->item[i] = convert_string (cd, slp->item[i], context);
121 }
122 
123 static void
convert_prev_msgid(const iconveh_t * cd,message_ty * mp,const struct conversion_context * context)124 convert_prev_msgid (const iconveh_t *cd, message_ty *mp,
125                     const struct conversion_context* context)
126 {
127   if (mp->prev_msgctxt != NULL)
128     mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
129   if (mp->prev_msgid != NULL)
130     mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
131   if (mp->prev_msgid_plural != NULL)
132     mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
133 }
134 
135 static void
convert_msgid(const iconveh_t * cd,message_ty * mp,const struct conversion_context * context)136 convert_msgid (const iconveh_t *cd, message_ty *mp,
137                const struct conversion_context* context)
138 {
139   if (mp->msgctxt != NULL)
140     mp->msgctxt = convert_string (cd, mp->msgctxt, context);
141   mp->msgid = convert_string (cd, mp->msgid, context);
142   if (mp->msgid_plural != NULL)
143     mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
144 }
145 
146 static void
convert_msgstr(const iconveh_t * cd,message_ty * mp,const struct conversion_context * context)147 convert_msgstr (const iconveh_t *cd, message_ty *mp,
148                 const struct conversion_context* context)
149 {
150   char *result = NULL;
151   size_t resultlen = 0;
152 
153   if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
154     abort ();
155 
156   if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
157                        &result, &resultlen) == 0)
158     /* Verify the result has a NUL byte at the end.  */
159     if (resultlen > 0 && result[resultlen - 1] == '\0')
160       /* Verify the result has the same number of NUL bytes.  */
161       {
162         const char *p;
163         const char *pend;
164         int nulcount1;
165         int nulcount2;
166 
167         for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
168              p < pend;
169              p += strlen (p) + 1, nulcount1++);
170         for (p = result, pend = p + resultlen, nulcount2 = 0;
171              p < pend;
172              p += strlen (p) + 1, nulcount2++);
173 
174         if (nulcount1 == nulcount2)
175           {
176             mp->msgstr = result;
177             mp->msgstr_len = resultlen;
178             return;
179           }
180       }
181 
182   conversion_error (context);
183 }
184 
185 #endif
186 
187 
188 static bool
iconv_message_list_internal(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code,bool update_header,const char * from_filename)189 iconv_message_list_internal (message_list_ty *mlp,
190                              const char *canon_from_code,
191                              const char *canon_to_code,
192                              bool update_header,
193                              const char *from_filename)
194 {
195   bool canon_from_code_overridden = (canon_from_code != NULL);
196   bool msgids_changed;
197   size_t j;
198 
199   /* If the list is empty, nothing to do.  */
200   if (mlp->nitems == 0)
201     return false;
202 
203   /* Search the header entry, and extract and replace the charset name.  */
204   for (j = 0; j < mlp->nitems; j++)
205     if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
206       {
207         const char *header = mlp->item[j]->msgstr;
208 
209         if (header != NULL)
210           {
211             const char *charsetstr = c_strstr (header, "charset=");
212 
213             if (charsetstr != NULL)
214               {
215                 size_t len;
216                 char *charset;
217                 const char *canon_charset;
218 
219                 charsetstr += strlen ("charset=");
220                 len = strcspn (charsetstr, " \t\n");
221                 charset = (char *) xmalloca (len + 1);
222                 memcpy (charset, charsetstr, len);
223                 charset[len] = '\0';
224 
225                 canon_charset = po_charset_canonicalize (charset);
226                 if (canon_charset == NULL)
227                   {
228                     if (!canon_from_code_overridden)
229                       {
230                         /* Don't give an error for POT files, because POT
231                            files usually contain only ASCII msgids.  */
232                         const char *filename = from_filename;
233                         size_t filenamelen;
234 
235                         if (filename != NULL
236                             && (filenamelen = strlen (filename)) >= 4
237                             && memcmp (filename + filenamelen - 4, ".pot", 4)
238                                == 0
239                             && strcmp (charset, "CHARSET") == 0)
240                           canon_charset = po_charset_ascii;
241                         else
242                           po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
243                                      false,
244                                      xasprintf (_("present charset \"%s\" is not a portable encoding name"),
245                                                 charset));
246                       }
247                   }
248                 else
249                   {
250                     if (canon_from_code == NULL)
251                       canon_from_code = canon_charset;
252                     else if (canon_from_code != canon_charset)
253                       po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0,  0,
254                                  false,
255                                  xasprintf (_("two different charsets \"%s\" and \"%s\" in input file"),
256                                             canon_from_code, canon_charset));
257                   }
258                 freea (charset);
259 
260                 if (update_header)
261                   {
262                     size_t len1, len2, len3;
263                     char *new_header;
264 
265                     len1 = charsetstr - header;
266                     len2 = strlen (canon_to_code);
267                     len3 = (header + strlen (header)) - (charsetstr + len);
268                     new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
269                     memcpy (new_header, header, len1);
270                     memcpy (new_header + len1, canon_to_code, len2);
271                     memcpy (new_header + len1 + len2, charsetstr + len,
272                             len3 + 1);
273                     mlp->item[j]->msgstr = new_header;
274                     mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
275                   }
276               }
277           }
278       }
279   if (canon_from_code == NULL)
280     {
281       if (is_ascii_message_list (mlp))
282         canon_from_code = po_charset_ascii;
283       else
284         po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
285                    _("input file doesn't contain a header entry with a charset specification"));
286     }
287 
288   msgids_changed = false;
289 
290   /* If the two encodings are the same, nothing to do.  */
291   if (canon_from_code != canon_to_code)
292     {
293 #if HAVE_ICONV
294       iconveh_t cd;
295       struct conversion_context context;
296 
297       if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
298         po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
299                    xasprintf (_("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
300                               canon_from_code, canon_to_code,
301                               last_component (program_name)));
302 
303       context.from_code = canon_from_code;
304       context.to_code = canon_to_code;
305       context.from_filename = from_filename;
306 
307       for (j = 0; j < mlp->nitems; j++)
308         {
309           message_ty *mp = mlp->item[j];
310 
311           if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
312               || !is_ascii_string (mp->msgid))
313             msgids_changed = true;
314           context.message = mp;
315           convert_string_list (&cd, mp->comment, &context);
316           convert_string_list (&cd, mp->comment_dot, &context);
317           convert_prev_msgid (&cd, mp, &context);
318           convert_msgid (&cd, mp, &context);
319           convert_msgstr (&cd, mp, &context);
320         }
321 
322       iconveh_close (&cd);
323 
324       if (msgids_changed)
325         if (message_list_msgids_changed (mlp))
326           po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
327                      xasprintf (_("Conversion from \"%s\" to \"%s\" introduces duplicates: some different msgids become equal."),
328                                 canon_from_code, canon_to_code));
329 #else
330           po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
331                      xasprintf (_("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). This version was built without iconv()."),
332                                 canon_from_code, canon_to_code,
333                                 last_component (program_name)));
334 #endif
335     }
336 
337   return msgids_changed;
338 }
339 
340 bool
iconv_message_list(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code,const char * from_filename)341 iconv_message_list (message_list_ty *mlp,
342                     const char *canon_from_code, const char *canon_to_code,
343                     const char *from_filename)
344 {
345   return iconv_message_list_internal (mlp,
346                                       canon_from_code, canon_to_code, true,
347                                       from_filename);
348 }
349 
350 msgdomain_list_ty *
iconv_msgdomain_list(msgdomain_list_ty * mdlp,const char * to_code,bool update_header,const char * from_filename)351 iconv_msgdomain_list (msgdomain_list_ty *mdlp,
352                       const char *to_code,
353                       bool update_header,
354                       const char *from_filename)
355 {
356   const char *canon_to_code;
357   size_t k;
358 
359   /* Canonicalize target encoding.  */
360   canon_to_code = po_charset_canonicalize (to_code);
361   if (canon_to_code == NULL)
362     po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
363                xasprintf (_("target charset \"%s\" is not a portable encoding name."),
364                           to_code));
365 
366   for (k = 0; k < mdlp->nitems; k++)
367     iconv_message_list_internal (mdlp->item[k]->messages,
368                                  mdlp->encoding, canon_to_code, update_header,
369                                  from_filename);
370 
371   mdlp->encoding = canon_to_code;
372   return mdlp;
373 }
374 
375 #if HAVE_ICONV
376 
377 static bool
iconvable_string(const iconveh_t * cd,const char * string)378 iconvable_string (const iconveh_t *cd, const char *string)
379 {
380   size_t len = strlen (string) + 1;
381   char *result = NULL;
382   size_t resultlen = 0;
383 
384   if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
385                        &result, &resultlen) == 0)
386     {
387       /* Test if the result has exactly one NUL byte, at the end.  */
388       bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
389                  && strlen (result) == resultlen - 1);
390       free (result);
391       return ok;
392     }
393   return false;
394 }
395 
396 static bool
iconvable_string_list(const iconveh_t * cd,string_list_ty * slp)397 iconvable_string_list (const iconveh_t *cd, string_list_ty *slp)
398 {
399   size_t i;
400 
401   if (slp != NULL)
402     for (i = 0; i < slp->nitems; i++)
403       if (!iconvable_string (cd, slp->item[i]))
404         return false;
405   return true;
406 }
407 
408 static bool
iconvable_prev_msgid(const iconveh_t * cd,message_ty * mp)409 iconvable_prev_msgid (const iconveh_t *cd, message_ty *mp)
410 {
411   if (mp->prev_msgctxt != NULL)
412     if (!iconvable_string (cd, mp->prev_msgctxt))
413       return false;
414   if (mp->prev_msgid != NULL)
415     if (!iconvable_string (cd, mp->prev_msgid))
416       return false;
417   if (mp->prev_msgid_plural != NULL)
418     if (!iconvable_string (cd, mp->prev_msgid_plural))
419       return false;
420   return true;
421 }
422 
423 static bool
iconvable_msgid(const iconveh_t * cd,message_ty * mp)424 iconvable_msgid (const iconveh_t *cd, message_ty *mp)
425 {
426   if (mp->msgctxt != NULL)
427     if (!iconvable_string (cd, mp->msgctxt))
428       return false;
429   if (!iconvable_string (cd, mp->msgid))
430     return false;
431   if (mp->msgid_plural != NULL)
432     if (!iconvable_string (cd, mp->msgid_plural))
433       return false;
434   return true;
435 }
436 
437 static bool
iconvable_msgstr(const iconveh_t * cd,message_ty * mp)438 iconvable_msgstr (const iconveh_t *cd, message_ty *mp)
439 {
440   char *result = NULL;
441   size_t resultlen = 0;
442 
443   if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
444     abort ();
445 
446   if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
447                        &result, &resultlen) == 0)
448     {
449       bool ok = false;
450 
451       /* Test if the result has a NUL byte at the end.  */
452       if (resultlen > 0 && result[resultlen - 1] == '\0')
453         /* Test if the result has the same number of NUL bytes.  */
454         {
455           const char *p;
456           const char *pend;
457           int nulcount1;
458           int nulcount2;
459 
460           for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
461                p < pend;
462                p += strlen (p) + 1, nulcount1++);
463           for (p = result, pend = p + resultlen, nulcount2 = 0;
464                p < pend;
465                p += strlen (p) + 1, nulcount2++);
466 
467           if (nulcount1 == nulcount2)
468             ok = true;
469         }
470 
471       free (result);
472       return ok;
473     }
474   return false;
475 }
476 
477 #endif
478 
479 bool
is_message_list_iconvable(message_list_ty * mlp,const char * canon_from_code,const char * canon_to_code)480 is_message_list_iconvable (message_list_ty *mlp,
481                            const char *canon_from_code,
482                            const char *canon_to_code)
483 {
484   bool canon_from_code_overridden = (canon_from_code != NULL);
485   size_t j;
486 
487   /* If the list is empty, nothing to check.  */
488   if (mlp->nitems == 0)
489     return true;
490 
491   /* Search the header entry, and extract the charset name.  */
492   for (j = 0; j < mlp->nitems; j++)
493     if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
494       {
495         const char *header = mlp->item[j]->msgstr;
496 
497         if (header != NULL)
498           {
499             const char *charsetstr = c_strstr (header, "charset=");
500 
501             if (charsetstr != NULL)
502               {
503                 size_t len;
504                 char *charset;
505                 const char *canon_charset;
506 
507                 charsetstr += strlen ("charset=");
508                 len = strcspn (charsetstr, " \t\n");
509                 charset = (char *) xmalloca (len + 1);
510                 memcpy (charset, charsetstr, len);
511                 charset[len] = '\0';
512 
513                 canon_charset = po_charset_canonicalize (charset);
514                 if (canon_charset == NULL)
515                   {
516                     if (!canon_from_code_overridden)
517                       {
518                         /* Don't give an error for POT files, because POT
519                            files usually contain only ASCII msgids.  */
520                         if (strcmp (charset, "CHARSET") == 0)
521                           canon_charset = po_charset_ascii;
522                         else
523                           {
524                             /* charset is not a portable encoding name.  */
525                             freea (charset);
526                             return false;
527                           }
528                       }
529                   }
530                 else
531                   {
532                     if (canon_from_code == NULL)
533                       canon_from_code = canon_charset;
534                     else if (canon_from_code != canon_charset)
535                       {
536                         /* Two different charsets in input file.  */
537                         freea (charset);
538                         return false;
539                       }
540                   }
541                 freea (charset);
542               }
543           }
544       }
545   if (canon_from_code == NULL)
546     {
547       if (is_ascii_message_list (mlp))
548         canon_from_code = po_charset_ascii;
549       else
550         /* Input file lacks a header entry with a charset specification.  */
551         return false;
552     }
553 
554   /* If the two encodings are the same, nothing to check.  */
555   if (canon_from_code != canon_to_code)
556     {
557 #if HAVE_ICONV
558       iconveh_t cd;
559 
560       if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
561         /* iconv() doesn't support this conversion.  */
562         return false;
563 
564       for (j = 0; j < mlp->nitems; j++)
565         {
566           message_ty *mp = mlp->item[j];
567 
568           if (!(iconvable_string_list (&cd, mp->comment)
569                 && iconvable_string_list (&cd, mp->comment_dot)
570                 && iconvable_prev_msgid (&cd, mp)
571                 && iconvable_msgid (&cd, mp)
572                 && iconvable_msgstr (&cd, mp)))
573             return false;
574         }
575 
576       iconveh_close (&cd);
577 #else
578       /* This version was built without iconv().  */
579       return false;
580 #endif
581     }
582 
583   return true;
584 }
585