• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Checking of messages in PO files.
2    Copyright (C) 1995-1998, 2000-2008, 2010-2016, 2019 Free Software Foundation, Inc.
3    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 /* Specification.  */
23 #include "msgl-check.h"
24 
25 #include <limits.h>
26 #include <setjmp.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32 
33 #include "c-ctype.h"
34 #include "xalloc.h"
35 #include "xvasprintf.h"
36 #include "po-xerror.h"
37 #include "format.h"
38 #include "plural-exp.h"
39 #include "plural-eval.h"
40 #include "plural-table.h"
41 #include "c-strstr.h"
42 #include "message.h"
43 #include "quote.h"
44 #include "sentence.h"
45 #include "unictype.h"
46 #include "unistr.h"
47 #include "gettext.h"
48 
49 #define _(str) gettext (str)
50 
51 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
52 
53 
54 /* Evaluates the plural formula for min <= n <= max
55    and returns the estimated number of times the value j was assumed.  */
56 static unsigned int
plural_expression_histogram(const struct plural_distribution * self,int min,int max,unsigned long j)57 plural_expression_histogram (const struct plural_distribution *self,
58                              int min, int max, unsigned long j)
59 {
60   if (min < 0)
61     min = 0;
62   /* Limit the number of evaluations.  Nothing interesting happens beyond
63      1000.  */
64   if (max - min > 1000)
65     max = min + 1000;
66   if (min <= max)
67     {
68       const struct expression *expr = self->expr;
69       unsigned long n;
70       unsigned int count;
71 
72       /* Protect against arithmetic exceptions.  */
73       install_sigfpe_handler ();
74 
75       count = 0;
76       for (n = min; n <= max; n++)
77         {
78           unsigned long val = plural_eval (expr, n);
79 
80           if (val == j)
81             count++;
82         }
83 
84       /* End of protection against arithmetic exceptions.  */
85       uninstall_sigfpe_handler ();
86 
87       return count;
88     }
89   else
90     return 0;
91 }
92 
93 
94 /* Check the values returned by plural_eval.
95    Signals the errors through po_xerror.
96    Return the number of errors that were seen.
97    If no errors, returns in *DISTRIBUTION information about the plural_eval
98    values distribution.  */
99 int
check_plural_eval(const struct expression * plural_expr,unsigned long nplurals_value,const message_ty * header,struct plural_distribution * distribution)100 check_plural_eval (const struct expression *plural_expr,
101                    unsigned long nplurals_value,
102                    const message_ty *header,
103                    struct plural_distribution *distribution)
104 {
105   /* Do as if the plural formula assumes a value N infinitely often if it
106      assumes it at least 5 times.  */
107 #define OFTEN 5
108   unsigned char * volatile array;
109 
110   /* Allocate a distribution array.  */
111   if (nplurals_value <= 100)
112     array = XCALLOC (nplurals_value, unsigned char);
113   else
114     /* nplurals_value is nonsense.  Don't risk an out-of-memory.  */
115     array = NULL;
116 
117   if (sigsetjmp (sigfpe_exit, 1) == 0)
118     {
119       unsigned long n;
120 
121       /* Protect against arithmetic exceptions.  */
122       install_sigfpe_handler ();
123 
124       for (n = 0; n <= 1000; n++)
125         {
126           unsigned long val = plural_eval (plural_expr, n);
127 
128           if ((long) val < 0)
129             {
130               /* End of protection against arithmetic exceptions.  */
131               uninstall_sigfpe_handler ();
132 
133               po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false,
134                          _("plural expression can produce negative values"));
135               free (array);
136               return 1;
137             }
138           else if (val >= nplurals_value)
139             {
140               char *msg;
141 
142               /* End of protection against arithmetic exceptions.  */
143               uninstall_sigfpe_handler ();
144 
145               msg = xasprintf (_("nplurals = %lu but plural expression can produce values as large as %lu"),
146                                nplurals_value, val);
147               po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
148               free (msg);
149               free (array);
150               return 1;
151             }
152 
153           if (array != NULL && array[val] < OFTEN)
154             array[val]++;
155         }
156 
157       /* End of protection against arithmetic exceptions.  */
158       uninstall_sigfpe_handler ();
159 
160       /* Normalize the array[val] statistics.  */
161       if (array != NULL)
162         {
163           unsigned long val;
164 
165           for (val = 0; val < nplurals_value; val++)
166             array[val] = (array[val] == OFTEN ? 1 : 0);
167         }
168 
169       distribution->expr = plural_expr;
170       distribution->often = array;
171       distribution->often_length = (array != NULL ? nplurals_value : 0);
172       distribution->histogram = plural_expression_histogram;
173 
174       return 0;
175     }
176   else
177     {
178       /* Caught an arithmetic exception.  */
179       const char *msg;
180 
181       /* End of protection against arithmetic exceptions.  */
182       uninstall_sigfpe_handler ();
183 
184 #if USE_SIGINFO
185       switch (sigfpe_code)
186 #endif
187         {
188 #if USE_SIGINFO
189 # ifdef FPE_INTDIV
190         case FPE_INTDIV:
191           msg = _("plural expression can produce division by zero");
192           break;
193 # endif
194 # ifdef FPE_INTOVF
195         case FPE_INTOVF:
196           msg = _("plural expression can produce integer overflow");
197           break;
198 # endif
199         default:
200 #endif
201           msg = _("plural expression can produce arithmetic exceptions, possibly division by zero");
202         }
203 
204       po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
205 
206       free (array);
207 
208       return 1;
209     }
210 #undef OFTEN
211 }
212 
213 
214 /* Try to help the translator by looking up the right plural formula for her.
215    Return a freshly allocated multiline help string, or NULL.  */
216 static char *
plural_help(const char * nullentry)217 plural_help (const char *nullentry)
218 {
219   struct plural_table_entry *ptentry = NULL;
220 
221   {
222     const char *language;
223 
224     language = c_strstr (nullentry, "Language: ");
225     if (language != NULL)
226       {
227         size_t len;
228 
229         language += 10;
230         len = strcspn (language, " \t\n");
231         if (len > 0)
232           {
233             size_t j;
234 
235             for (j = 0; j < plural_table_size; j++)
236               if (len == strlen (plural_table[j].lang)
237                   && strncmp (language, plural_table[j].lang, len) == 0)
238                 {
239                   ptentry = &plural_table[j];
240                   break;
241                 }
242           }
243       }
244   }
245 
246   if (ptentry == NULL)
247     {
248       const char *language;
249 
250       language = c_strstr (nullentry, "Language-Team: ");
251       if (language != NULL)
252         {
253           size_t j;
254 
255           language += 15;
256           for (j = 0; j < plural_table_size; j++)
257             if (strncmp (language,
258                          plural_table[j].language,
259                          strlen (plural_table[j].language)) == 0)
260               {
261                 ptentry = &plural_table[j];
262                 break;
263               }
264         }
265     }
266 
267   if (ptentry != NULL)
268     {
269       char *helpline1 =
270         xasprintf (_("Try using the following, valid for %s:"),
271                    ptentry->language);
272       char *help =
273         xasprintf ("%s\n\"Plural-Forms: %s\\n\"\n",
274                    helpline1, ptentry->value);
275       free (helpline1);
276       return help;
277     }
278   return NULL;
279 }
280 
281 
282 /* Perform plural expression checking.
283    Return the number of errors that were seen.
284    If no errors, returns in *DISTRIBUTION information about the plural_eval
285    values distribution.  */
286 static int
check_plural(message_list_ty * mlp,int ignore_untranslated_messages,int ignore_fuzzy_messages,struct plural_distribution * distributionp)287 check_plural (message_list_ty *mlp,
288               int ignore_untranslated_messages,
289               int ignore_fuzzy_messages,
290               struct plural_distribution *distributionp)
291 {
292   int seen_errors = 0;
293   const message_ty *has_plural;
294   unsigned long min_nplurals;
295   const message_ty *min_pos;
296   unsigned long max_nplurals;
297   const message_ty *max_pos;
298   struct plural_distribution distribution;
299   size_t j;
300   message_ty *header;
301 
302   /* Determine whether mlp has plural entries.  */
303   has_plural = NULL;
304   min_nplurals = ULONG_MAX;
305   min_pos = NULL;
306   max_nplurals = 0;
307   max_pos = NULL;
308   distribution.expr = NULL;
309   distribution.often = NULL;
310   distribution.often_length = 0;
311   distribution.histogram = NULL;
312   for (j = 0; j < mlp->nitems; j++)
313     {
314       message_ty *mp = mlp->item[j];
315 
316       if (!mp->obsolete
317           && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
318           && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp)))
319           && mp->msgid_plural != NULL)
320         {
321           const char *p;
322           const char *p_end;
323           unsigned long n;
324 
325           if (has_plural == NULL)
326             has_plural = mp;
327 
328           n = 0;
329           for (p = mp->msgstr, p_end = p + mp->msgstr_len;
330                p < p_end;
331                p += strlen (p) + 1)
332             n++;
333           if (min_nplurals > n)
334             {
335               min_nplurals = n;
336               min_pos = mp;
337             }
338           if (max_nplurals < n)
339             {
340               max_nplurals = n;
341               max_pos = mp;
342             }
343         }
344     }
345 
346   /* Look at the plural entry for this domain.
347      Cf, function extract_plural_expression.  */
348   header = message_list_search (mlp, NULL, "");
349   if (header != NULL && !header->obsolete)
350     {
351       const char *nullentry;
352       const char *plural;
353       const char *nplurals;
354 
355       nullentry = header->msgstr;
356 
357       plural = c_strstr (nullentry, "plural=");
358       nplurals = c_strstr (nullentry, "nplurals=");
359       if (plural == NULL && has_plural != NULL)
360         {
361           const char *msg1 =
362             _("message catalog has plural form translations");
363           const char *msg2 =
364             _("but header entry lacks a \"plural=EXPRESSION\" attribute");
365           char *help = plural_help (nullentry);
366 
367           if (help != NULL)
368             {
369               char *msg2ext = xasprintf ("%s\n%s", msg2, help);
370               po_xerror2 (PO_SEVERITY_ERROR,
371                           has_plural, NULL, 0, 0, false, msg1,
372                           header, NULL, 0, 0, true, msg2ext);
373               free (msg2ext);
374               free (help);
375             }
376           else
377             po_xerror2 (PO_SEVERITY_ERROR,
378                         has_plural, NULL, 0, 0, false, msg1,
379                         header, NULL, 0, 0, false, msg2);
380 
381           seen_errors++;
382         }
383       if (nplurals == NULL && has_plural != NULL)
384         {
385           const char *msg1 =
386             _("message catalog has plural form translations");
387           const char *msg2 =
388             _("but header entry lacks a \"nplurals=INTEGER\" attribute");
389           char *help = plural_help (nullentry);
390 
391           if (help != NULL)
392             {
393               char *msg2ext = xasprintf ("%s\n%s", msg2, help);
394               po_xerror2 (PO_SEVERITY_ERROR,
395                           has_plural, NULL, 0, 0, false, msg1,
396                           header, NULL, 0, 0, true, msg2ext);
397               free (msg2ext);
398               free (help);
399             }
400           else
401             po_xerror2 (PO_SEVERITY_ERROR,
402                         has_plural, NULL, 0, 0, false, msg1,
403                         header, NULL, 0, 0, false, msg2);
404 
405           seen_errors++;
406         }
407       if (plural != NULL && nplurals != NULL)
408         {
409           const char *endp;
410           unsigned long int nplurals_value;
411           struct parse_args args;
412           const struct expression *plural_expr;
413 
414           /* First check the number.  */
415           nplurals += 9;
416           while (*nplurals != '\0' && c_isspace ((unsigned char) *nplurals))
417             ++nplurals;
418           endp = nplurals;
419           nplurals_value = 0;
420           if (*nplurals >= '0' && *nplurals <= '9')
421             nplurals_value = strtoul (nplurals, (char **) &endp, 10);
422           if (nplurals == endp)
423             {
424               const char *msg = _("invalid nplurals value");
425               char *help = plural_help (nullentry);
426 
427               if (help != NULL)
428                 {
429                   char *msgext = xasprintf ("%s\n%s", msg, help);
430                   po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
431                              msgext);
432                   free (msgext);
433                   free (help);
434                 }
435               else
436                 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
437 
438               seen_errors++;
439             }
440 
441           /* Then check the expression.  */
442           plural += 7;
443           args.cp = plural;
444           if (parse_plural_expression (&args) != 0)
445             {
446               const char *msg = _("invalid plural expression");
447               char *help = plural_help (nullentry);
448 
449               if (help != NULL)
450                 {
451                   char *msgext = xasprintf ("%s\n%s", msg, help);
452                   po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
453                              msgext);
454                   free (msgext);
455                   free (help);
456                 }
457               else
458                 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
459 
460               seen_errors++;
461             }
462           plural_expr = args.res;
463 
464           /* See whether nplurals and plural fit together.  */
465           if (!seen_errors)
466             seen_errors =
467               check_plural_eval (plural_expr, nplurals_value, header,
468                                  &distribution);
469 
470           /* Check the number of plurals of the translations.  */
471           if (!seen_errors)
472             {
473               if (min_nplurals < nplurals_value)
474                 {
475                   char *msg1 =
476                     xasprintf (_("nplurals = %lu"), nplurals_value);
477                   char *msg2 =
478                     xasprintf (ngettext ("but some messages have only one plural form",
479                                          "but some messages have only %lu plural forms",
480                                          min_nplurals),
481                                min_nplurals);
482                   po_xerror2 (PO_SEVERITY_ERROR,
483                               header, NULL, 0, 0, false, msg1,
484                               min_pos, NULL, 0, 0, false, msg2);
485                   free (msg2);
486                   free (msg1);
487                   seen_errors++;
488                 }
489               else if (max_nplurals > nplurals_value)
490                 {
491                   char *msg1 =
492                     xasprintf (_("nplurals = %lu"), nplurals_value);
493                   char *msg2 =
494                     xasprintf (ngettext ("but some messages have one plural form",
495                                          "but some messages have %lu plural forms",
496                                          max_nplurals),
497                                max_nplurals);
498                   po_xerror2 (PO_SEVERITY_ERROR,
499                               header, NULL, 0, 0, false, msg1,
500                               max_pos, NULL, 0, 0, false, msg2);
501                   free (msg2);
502                   free (msg1);
503                   seen_errors++;
504                 }
505               /* The only valid case is max_nplurals <= n <= min_nplurals,
506                  which means either has_plural == NULL or
507                  max_nplurals = n = min_nplurals.  */
508             }
509         }
510       else
511         goto no_plural;
512     }
513   else
514     {
515       if (has_plural != NULL)
516         {
517           po_xerror (PO_SEVERITY_ERROR, has_plural, NULL, 0, 0, false,
518                      _("message catalog has plural form translations, but lacks a header entry with \"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\""));
519           seen_errors++;
520         }
521      no_plural:
522       /* By default, the Germanic formula (n != 1) is used.  */
523       distribution.expr = &germanic_plural;
524       {
525         unsigned char *array = XCALLOC (2, unsigned char);
526         array[1] = 1;
527         distribution.often = array;
528       }
529       distribution.often_length = 2;
530       distribution.histogram = plural_expression_histogram;
531     }
532 
533   /* distribution is not needed if we report errors.
534      Also, if there was an error due to  max_nplurals > nplurals_value,
535      we must not use distribution because we would be doing out-of-bounds
536      array accesses.  */
537   if (seen_errors > 0)
538     free ((unsigned char *) distribution.often);
539   else
540     *distributionp = distribution;
541 
542   return seen_errors;
543 }
544 
545 
546 /* Signal an error when checking format strings.  */
547 static const message_ty *curr_mp;
548 static lex_pos_ty curr_msgid_pos;
549 static void
550 formatstring_error_logger (const char *format, ...)
551 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 7) || __GNUC__ > 2)
552      __attribute__ ((__format__ (__printf__, 1, 2)))
553 #endif
554 ;
555 static void
formatstring_error_logger(const char * format,...)556 formatstring_error_logger (const char *format, ...)
557 {
558   va_list args;
559   char *msg;
560 
561   va_start (args, format);
562   if (vasprintf (&msg, format, args) < 0)
563     error (EXIT_FAILURE, 0, _("memory exhausted"));
564   va_end (args);
565   po_xerror (PO_SEVERITY_ERROR,
566              curr_mp, curr_msgid_pos.file_name, curr_msgid_pos.line_number,
567              (size_t)(-1), false, msg);
568   free (msg);
569 }
570 
571 
572 /* Perform miscellaneous checks on a message.
573    PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements,
574    PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed
575    infinitely often by the plural formula.
576    PLURAL_DISTRIBUTION_LENGTH is the length of the PLURAL_DISTRIBUTION
577    array.  */
578 static int
check_pair(const message_ty * mp,const char * msgid,const lex_pos_ty * msgid_pos,const char * msgid_plural,const char * msgstr,size_t msgstr_len,const enum is_format is_format[NFORMATS],int check_newlines,int check_format_strings,const struct plural_distribution * distribution,int check_compatibility,int check_accelerators,char accelerator_char)579 check_pair (const message_ty *mp,
580             const char *msgid,
581             const lex_pos_ty *msgid_pos,
582             const char *msgid_plural,
583             const char *msgstr, size_t msgstr_len,
584             const enum is_format is_format[NFORMATS],
585             int check_newlines,
586             int check_format_strings,
587             const struct plural_distribution *distribution,
588             int check_compatibility,
589             int check_accelerators, char accelerator_char)
590 {
591   int seen_errors;
592   int has_newline;
593   unsigned int j;
594 
595   /* If the msgid string is empty we have the special entry reserved for
596      information about the translation.  */
597   if (msgid[0] == '\0')
598     return 0;
599 
600   seen_errors = 0;
601 
602   if (check_newlines)
603     {
604       /* Test 1: check whether all or none of the strings begin with a '\n'.  */
605       has_newline = (msgid[0] == '\n');
606 #define TEST_NEWLINE(p) (p[0] == '\n')
607       if (msgid_plural != NULL)
608         {
609           const char *p;
610 
611           if (TEST_NEWLINE(msgid_plural) != has_newline)
612             {
613               po_xerror (PO_SEVERITY_ERROR,
614                          mp, msgid_pos->file_name, msgid_pos->line_number,
615                          (size_t)(-1), false,
616                          _("'msgid' and 'msgid_plural' entries do not both begin with '\\n'"));
617               seen_errors++;
618             }
619           for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
620             if (TEST_NEWLINE(p) != has_newline)
621               {
622                 char *msg =
623                   xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both begin with '\\n'"),
624                              j);
625                 po_xerror (PO_SEVERITY_ERROR,
626                            mp, msgid_pos->file_name, msgid_pos->line_number,
627                            (size_t)(-1), false, msg);
628                 free (msg);
629                 seen_errors++;
630               }
631         }
632       else
633         {
634           if (TEST_NEWLINE(msgstr) != has_newline)
635             {
636               po_xerror (PO_SEVERITY_ERROR,
637                          mp, msgid_pos->file_name, msgid_pos->line_number,
638                          (size_t)(-1), false,
639                          _("'msgid' and 'msgstr' entries do not both begin with '\\n'"));
640               seen_errors++;
641             }
642         }
643 #undef TEST_NEWLINE
644 
645       /* Test 2: check whether all or none of the strings end with a '\n'.  */
646       has_newline = (msgid[strlen (msgid) - 1] == '\n');
647 #define TEST_NEWLINE(p) (p[0] != '\0' && p[strlen (p) - 1] == '\n')
648       if (msgid_plural != NULL)
649         {
650           const char *p;
651 
652           if (TEST_NEWLINE(msgid_plural) != has_newline)
653             {
654               po_xerror (PO_SEVERITY_ERROR,
655                          mp, msgid_pos->file_name, msgid_pos->line_number,
656                          (size_t)(-1), false,
657                          _("'msgid' and 'msgid_plural' entries do not both end with '\\n'"));
658               seen_errors++;
659             }
660           for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
661             if (TEST_NEWLINE(p) != has_newline)
662               {
663                 char *msg =
664                   xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both end with '\\n'"),
665                              j);
666                 po_xerror (PO_SEVERITY_ERROR,
667                            mp, msgid_pos->file_name, msgid_pos->line_number,
668                            (size_t)(-1), false, msg);
669                 free (msg);
670                 seen_errors++;
671               }
672         }
673       else
674         {
675           if (TEST_NEWLINE(msgstr) != has_newline)
676             {
677               po_xerror (PO_SEVERITY_ERROR,
678                          mp, msgid_pos->file_name, msgid_pos->line_number,
679                          (size_t)(-1), false,
680                          _("'msgid' and 'msgstr' entries do not both end with '\\n'"));
681               seen_errors++;
682             }
683         }
684 #undef TEST_NEWLINE
685     }
686 
687   if (check_compatibility && msgid_plural != NULL)
688     {
689       po_xerror (PO_SEVERITY_ERROR,
690                  mp, msgid_pos->file_name, msgid_pos->line_number,
691                  (size_t)(-1), false,
692                  _("plural handling is a GNU gettext extension"));
693       seen_errors++;
694     }
695 
696   if (check_format_strings)
697     /* Test 3: Check whether both formats strings contain the same number
698        of format specifications.  */
699     {
700       curr_mp = mp;
701       curr_msgid_pos = *msgid_pos;
702       seen_errors +=
703         check_msgid_msgstr_format (msgid, msgid_plural, msgstr, msgstr_len,
704                                    is_format, mp->range, distribution,
705                                    formatstring_error_logger);
706     }
707 
708   if (check_accelerators && msgid_plural == NULL)
709     /* Test 4: Check that if msgid is a menu item with a keyboard accelerator,
710        the msgstr has an accelerator as well.  A keyboard accelerator is
711        designated by an immediately preceding '&'.  We cannot check whether
712        two accelerators collide, only whether the translator has bothered
713        thinking about them.  */
714     {
715       const char *p;
716 
717       /* We are only interested in msgids that contain exactly one '&'.  */
718       p = strchr (msgid, accelerator_char);
719       if (p != NULL && strchr (p + 1, accelerator_char) == NULL)
720         {
721           /* Count the number of '&' in msgstr, but ignore '&&'.  */
722           unsigned int count = 0;
723 
724           for (p = msgstr; (p = strchr (p, accelerator_char)) != NULL; p++)
725             if (p[1] == accelerator_char)
726               p++;
727             else
728               count++;
729 
730           if (count == 0)
731             {
732               char *msg =
733                 xasprintf (_("msgstr lacks the keyboard accelerator mark '%c'"),
734                            accelerator_char);
735               po_xerror (PO_SEVERITY_ERROR,
736                          mp, msgid_pos->file_name, msgid_pos->line_number,
737                          (size_t)(-1), false, msg);
738               free (msg);
739               seen_errors++;
740             }
741           else if (count > 1)
742             {
743               char *msg =
744                 xasprintf (_("msgstr has too many keyboard accelerator marks '%c'"),
745                            accelerator_char);
746               po_xerror (PO_SEVERITY_ERROR,
747                          mp, msgid_pos->file_name, msgid_pos->line_number,
748                          (size_t)(-1), false, msg);
749               free (msg);
750               seen_errors++;
751             }
752         }
753     }
754 
755   return seen_errors;
756 }
757 
758 
759 /* Perform miscellaneous checks on a header entry.  */
760 static int
check_header_entry(const message_ty * mp,const char * msgstr_string)761 check_header_entry (const message_ty *mp, const char *msgstr_string)
762 {
763   static const char *required_fields[] =
764   {
765     "Project-Id-Version", "PO-Revision-Date", "Last-Translator",
766     "Language-Team", "MIME-Version", "Content-Type",
767     "Content-Transfer-Encoding",
768     /* These are recommended but not yet required.  */
769     "Language"
770   };
771   static const char *default_values[] =
772   {
773     "PACKAGE VERSION", "YEAR-MO-DA HO:MI+ZONE", "FULL NAME <EMAIL@ADDRESS>", "LANGUAGE <LL@li.org>", NULL,
774     "text/plain; charset=CHARSET", "ENCODING",
775     ""
776   };
777   const size_t nfields = SIZEOF (required_fields);
778   /* FIXME: We could check if a required header field is missing and
779      report it as error.  However, it's could be too rigorous and
780      break backward compatibility.  */
781 #if 0
782   const size_t nrequiredfields = nfields - 1;
783 #endif
784   int seen_errors = 0;
785   int cnt;
786 
787   for (cnt = 0; cnt < nfields; ++cnt)
788     {
789 #if 0
790       int severity =
791         (cnt < nrequiredfields ? PO_SEVERITY_ERROR : PO_SEVERITY_WARNING);
792 #else
793       int severity =
794         PO_SEVERITY_WARNING;
795 #endif
796       const char *field = required_fields[cnt];
797       size_t len = strlen (field);
798       const char *line;
799 
800       for (line = msgstr_string; *line != '\0'; )
801         {
802           if (strncmp (line, field, len) == 0 && line[len] == ':')
803             {
804               const char *p = line + len + 1;
805 
806               /* Test whether the field's value, starting at p, is the default
807                  value.  */
808               if (*p == ' ')
809                 p++;
810               if (default_values[cnt] != NULL
811                   && strncmp (p, default_values[cnt],
812                               strlen (default_values[cnt])) == 0)
813                 {
814                   p += strlen (default_values[cnt]);
815                   if (*p == '\0' || *p == '\n')
816                     {
817                       char *msg =
818                         xasprintf (_("header field '%s' still has the initial default value\n"),
819                                    field);
820                       po_xerror (severity, mp, NULL, 0, 0, true, msg);
821                       free (msg);
822                       if (severity == PO_SEVERITY_ERROR)
823                         seen_errors++;
824                     }
825                 }
826               break;
827             }
828           line = strchrnul (line, '\n');
829           if (*line == '\n')
830             line++;
831         }
832       if (*line == '\0')
833         {
834           char *msg =
835             xasprintf (_("header field '%s' missing in header\n"),
836                        field);
837           po_xerror (severity, mp, NULL, 0, 0, true, msg);
838           free (msg);
839           if (severity == PO_SEVERITY_ERROR)
840             seen_errors++;
841         }
842     }
843   return seen_errors;
844 }
845 
846 
847 /* Perform all checks on a non-obsolete message.
848    Return the number of errors that were seen.  */
849 int
check_message(const message_ty * mp,const lex_pos_ty * msgid_pos,int check_newlines,int check_format_strings,const struct plural_distribution * distribution,int check_header,int check_compatibility,int check_accelerators,char accelerator_char)850 check_message (const message_ty *mp,
851                const lex_pos_ty *msgid_pos,
852                int check_newlines,
853                int check_format_strings,
854                const struct plural_distribution *distribution,
855                int check_header,
856                int check_compatibility,
857                int check_accelerators, char accelerator_char)
858 {
859   int seen_errors = 0;
860 
861   if (check_header && is_header (mp))
862     seen_errors += check_header_entry (mp, mp->msgstr);
863 
864   seen_errors += check_pair (mp,
865                              mp->msgid, msgid_pos, mp->msgid_plural,
866                              mp->msgstr, mp->msgstr_len,
867                              mp->is_format,
868                              check_newlines,
869                              check_format_strings,
870                              distribution,
871                              check_compatibility,
872                              check_accelerators, accelerator_char);
873   return seen_errors;
874 }
875 
876 
877 /* Perform all checks on a message list.
878    Return the number of errors that were seen.  */
879 int
check_message_list(message_list_ty * mlp,int ignore_untranslated_messages,int ignore_fuzzy_messages,int check_newlines,int check_format_strings,int check_header,int check_compatibility,int check_accelerators,char accelerator_char)880 check_message_list (message_list_ty *mlp,
881                     int ignore_untranslated_messages,
882                     int ignore_fuzzy_messages,
883                     int check_newlines,
884                     int check_format_strings,
885                     int check_header,
886                     int check_compatibility,
887                     int check_accelerators, char accelerator_char)
888 {
889   int seen_errors = 0;
890   struct plural_distribution distribution;
891   size_t j;
892 
893   distribution.expr = NULL;
894   distribution.often = NULL;
895   distribution.often_length = 0;
896   distribution.histogram = NULL;
897 
898   if (check_header)
899     seen_errors += check_plural (mlp, ignore_untranslated_messages,
900                                  ignore_fuzzy_messages, &distribution);
901 
902   for (j = 0; j < mlp->nitems; j++)
903     {
904       message_ty *mp = mlp->item[j];
905 
906       if (!mp->obsolete
907           && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
908           && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp))))
909         seen_errors += check_message (mp, &mp->pos,
910                                       check_newlines,
911                                       check_format_strings,
912                                       &distribution,
913                                       check_header, check_compatibility,
914                                       check_accelerators, accelerator_char);
915     }
916 
917   return seen_errors;
918 }
919 
920 
921 static int
syntax_check_ellipsis_unicode(const message_ty * mp,const char * msgid)922 syntax_check_ellipsis_unicode (const message_ty *mp, const char *msgid)
923 {
924   const char *str = msgid;
925   const char *str_limit = str + strlen (msgid);
926   int seen_errors = 0;
927 
928   while (str < str_limit)
929     {
930       const char *end, *cp;
931       ucs4_t ending_char;
932 
933       end = sentence_end (str, &ending_char);
934 
935       /* sentence_end doesn't treat '...' specially.  */
936       cp = end - (ending_char == '.' ? 2 : 3);
937       if (cp >= str && memcmp (cp, "...", 3) == 0)
938         {
939           po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
940                      _("ASCII ellipsis ('...') instead of Unicode"));
941           seen_errors++;
942         }
943 
944       str = end + 1;
945     }
946 
947   return seen_errors;
948 }
949 
950 
951 static int
syntax_check_space_ellipsis(const message_ty * mp,const char * msgid)952 syntax_check_space_ellipsis (const message_ty *mp, const char *msgid)
953 {
954   const char *str = msgid;
955   const char *str_limit = str + strlen (msgid);
956   int seen_errors = 0;
957 
958   while (str < str_limit)
959     {
960       const char *end, *ellipsis = NULL;
961       ucs4_t ending_char;
962 
963       end = sentence_end (str, &ending_char);
964 
965       if (ending_char == 0x2026)
966         ellipsis = end;
967       else if (ending_char == '.')
968         {
969           /* sentence_end doesn't treat '...' specially.  */
970           const char *cp = end - 2;
971           if (cp >= str && memcmp (cp, "...", 3) == 0)
972             ellipsis = cp;
973         }
974       else
975         {
976           /* Look for a '...'.  */
977           const char *cp = end - 3;
978           if (cp >= str && memcmp (cp, "...", 3) == 0)
979             ellipsis = cp;
980           else
981             {
982               ucs4_t uc = 0xfffd;
983 
984               /* Look for a U+2026.  */
985               for (cp = end - 1; cp >= str; cp--)
986                 {
987                   u8_mbtouc (&uc, (const unsigned char *) cp, end - cp);
988                   if (uc != 0xfffd)
989                     break;
990                 }
991 
992               if (uc == 0x2026)
993                 ellipsis = cp;
994             }
995         }
996 
997       if (ellipsis)
998         {
999           const char *cp;
1000           ucs4_t uc = 0xfffd;
1001 
1002           /* Look at the character before ellipsis.  */
1003           for (cp = ellipsis - 1; cp >= str; cp--)
1004             {
1005               u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
1006               if (uc != 0xfffd)
1007                 break;
1008             }
1009 
1010           if (uc != 0xfffd && uc_is_space (uc))
1011             {
1012               po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
1013                          _("space before ellipsis found in user visible strings"));
1014               seen_errors++;
1015             }
1016         }
1017 
1018       str = end + 1;
1019     }
1020 
1021   return seen_errors;
1022 }
1023 
1024 
1025 struct callback_arg
1026 {
1027   const message_ty *mp;
1028   int seen_errors;
1029 };
1030 
1031 static void
syntax_check_quote_unicode_callback(char quote,const char * quoted,size_t quoted_length,void * data)1032 syntax_check_quote_unicode_callback (char quote, const char *quoted,
1033                                      size_t quoted_length, void *data)
1034 {
1035   struct callback_arg *arg = data;
1036 
1037   switch (quote)
1038     {
1039     case '"':
1040       po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
1041                  _("ASCII double quote used instead of Unicode"));
1042       arg->seen_errors++;
1043       break;
1044 
1045     case '\'':
1046       po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
1047                  _("ASCII single quote used instead of Unicode"));
1048       arg->seen_errors++;
1049       break;
1050 
1051     default:
1052       break;
1053     }
1054 }
1055 
1056 static int
syntax_check_quote_unicode(const message_ty * mp,const char * msgid)1057 syntax_check_quote_unicode (const message_ty *mp, const char *msgid)
1058 {
1059   struct callback_arg arg;
1060 
1061   arg.mp = mp;
1062   arg.seen_errors = 0;
1063 
1064   scan_quoted (msgid, strlen (msgid),
1065                syntax_check_quote_unicode_callback, &arg);
1066 
1067   return arg.seen_errors;
1068 }
1069 
1070 struct bullet_ty
1071 {
1072   int c;
1073   size_t depth;
1074 };
1075 
1076 struct bullet_stack_ty
1077 {
1078   struct bullet_ty *items;
1079   size_t nitems;
1080   size_t nitems_max;
1081 };
1082 
1083 static struct bullet_stack_ty bullet_stack;
1084 
1085 static int
syntax_check_bullet_unicode(const message_ty * mp,const char * msgid)1086 syntax_check_bullet_unicode (const message_ty *mp, const char *msgid)
1087 {
1088   const char *str = msgid;
1089   const char *str_limit = str + strlen (msgid);
1090   struct bullet_ty *last_bullet = NULL;
1091   bool seen_error = false;
1092 
1093   bullet_stack.nitems = 0;
1094 
1095   while (str < str_limit)
1096     {
1097       const char *p = str, *end;
1098 
1099       while (p < str_limit && c_isspace (*p))
1100         p++;
1101 
1102       if ((*p == '*' || *p == '-') && *(p + 1) == ' ')
1103         {
1104           size_t depth = p - str;
1105           if (last_bullet == NULL || depth > last_bullet->depth)
1106             {
1107               struct bullet_ty bullet;
1108 
1109               bullet.c = *p;
1110               bullet.depth = depth;
1111 
1112               if (bullet_stack.nitems >= bullet_stack.nitems_max)
1113                 {
1114                   bullet_stack.nitems_max = 2 * bullet_stack.nitems_max + 4;
1115                   bullet_stack.items = xrealloc (bullet_stack.items,
1116                                                  bullet_stack.nitems_max
1117                                                  * sizeof (struct bullet_ty));
1118                 }
1119 
1120               last_bullet = &bullet_stack.items[bullet_stack.nitems++];
1121               memcpy (last_bullet, &bullet, sizeof (struct bullet_ty));
1122             }
1123           else
1124             {
1125               if (depth < last_bullet->depth)
1126                 {
1127                   if (bullet_stack.nitems > 1)
1128                     {
1129                       bullet_stack.nitems--;
1130                       last_bullet =
1131                         &bullet_stack.items[bullet_stack.nitems - 1];
1132                     }
1133                   else
1134                     last_bullet = NULL;
1135                 }
1136 
1137               if (last_bullet && depth == last_bullet->depth)
1138                 {
1139                   if (last_bullet->c != *p)
1140                     last_bullet->c = *p;
1141                   else
1142                     {
1143                       seen_error = true;
1144                       break;
1145                     }
1146                 }
1147             }
1148         }
1149       else
1150         {
1151           bullet_stack.nitems = 0;
1152           last_bullet = NULL;
1153         }
1154 
1155       end = strchrnul (str, '\n');
1156       str = end + 1;
1157     }
1158 
1159   if (seen_error)
1160     {
1161       char *msg;
1162       msg = xasprintf (_("ASCII bullet ('%c') instead of Unicode"),
1163                        last_bullet->c);
1164       po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false, msg);
1165       free (msg);
1166       return 1;
1167     }
1168 
1169   return 0;
1170 }
1171 
1172 
1173 typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid);
1174 static const syntax_check_function sc_funcs[NSYNTAXCHECKS] =
1175 {
1176   syntax_check_ellipsis_unicode,
1177   syntax_check_space_ellipsis,
1178   syntax_check_quote_unicode,
1179   syntax_check_bullet_unicode
1180 };
1181 
1182 /* Perform all syntax checks on a non-obsolete message.
1183    Return the number of errors that were seen.  */
1184 static int
syntax_check_message(const message_ty * mp)1185 syntax_check_message (const message_ty *mp)
1186 {
1187   int seen_errors = 0;
1188   int i;
1189 
1190   for (i = 0; i < NSYNTAXCHECKS; i++)
1191     {
1192       if (mp->do_syntax_check[i] == yes)
1193         {
1194           seen_errors += sc_funcs[i] (mp, mp->msgid);
1195           if (mp->msgid_plural)
1196             seen_errors += sc_funcs[i] (mp, mp->msgid_plural);
1197         }
1198     }
1199 
1200   return seen_errors;
1201 }
1202 
1203 
1204 /* Perform all syntax checks on a message list.
1205    Return the number of errors that were seen.  */
1206 int
syntax_check_message_list(message_list_ty * mlp)1207 syntax_check_message_list (message_list_ty *mlp)
1208 {
1209   int seen_errors = 0;
1210   size_t j;
1211 
1212   for (j = 0; j < mlp->nitems; j++)
1213     {
1214       message_ty *mp = mlp->item[j];
1215 
1216       if (!is_header (mp))
1217         seen_errors += syntax_check_message (mp);
1218     }
1219 
1220   return seen_errors;
1221 }
1222