1 /* Checking of messages in PO files.
2 Copyright (C) 1995-1998, 2000-2008, 2010-2016, 2019 Free Software Foundation, Inc.
3 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 /* Specification. */
23 #include "msgl-check.h"
24
25 #include <limits.h>
26 #include <setjmp.h>
27 #include <signal.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32
33 #include "c-ctype.h"
34 #include "xalloc.h"
35 #include "xvasprintf.h"
36 #include "po-xerror.h"
37 #include "format.h"
38 #include "plural-exp.h"
39 #include "plural-eval.h"
40 #include "plural-table.h"
41 #include "c-strstr.h"
42 #include "message.h"
43 #include "quote.h"
44 #include "sentence.h"
45 #include "unictype.h"
46 #include "unistr.h"
47 #include "gettext.h"
48
49 #define _(str) gettext (str)
50
51 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
52
53
54 /* Evaluates the plural formula for min <= n <= max
55 and returns the estimated number of times the value j was assumed. */
56 static unsigned int
plural_expression_histogram(const struct plural_distribution * self,int min,int max,unsigned long j)57 plural_expression_histogram (const struct plural_distribution *self,
58 int min, int max, unsigned long j)
59 {
60 if (min < 0)
61 min = 0;
62 /* Limit the number of evaluations. Nothing interesting happens beyond
63 1000. */
64 if (max - min > 1000)
65 max = min + 1000;
66 if (min <= max)
67 {
68 const struct expression *expr = self->expr;
69 unsigned long n;
70 unsigned int count;
71
72 /* Protect against arithmetic exceptions. */
73 install_sigfpe_handler ();
74
75 count = 0;
76 for (n = min; n <= max; n++)
77 {
78 unsigned long val = plural_eval (expr, n);
79
80 if (val == j)
81 count++;
82 }
83
84 /* End of protection against arithmetic exceptions. */
85 uninstall_sigfpe_handler ();
86
87 return count;
88 }
89 else
90 return 0;
91 }
92
93
94 /* Check the values returned by plural_eval.
95 Signals the errors through po_xerror.
96 Return the number of errors that were seen.
97 If no errors, returns in *DISTRIBUTION information about the plural_eval
98 values distribution. */
99 int
check_plural_eval(const struct expression * plural_expr,unsigned long nplurals_value,const message_ty * header,struct plural_distribution * distribution)100 check_plural_eval (const struct expression *plural_expr,
101 unsigned long nplurals_value,
102 const message_ty *header,
103 struct plural_distribution *distribution)
104 {
105 /* Do as if the plural formula assumes a value N infinitely often if it
106 assumes it at least 5 times. */
107 #define OFTEN 5
108 unsigned char * volatile array;
109
110 /* Allocate a distribution array. */
111 if (nplurals_value <= 100)
112 array = XCALLOC (nplurals_value, unsigned char);
113 else
114 /* nplurals_value is nonsense. Don't risk an out-of-memory. */
115 array = NULL;
116
117 if (sigsetjmp (sigfpe_exit, 1) == 0)
118 {
119 unsigned long n;
120
121 /* Protect against arithmetic exceptions. */
122 install_sigfpe_handler ();
123
124 for (n = 0; n <= 1000; n++)
125 {
126 unsigned long val = plural_eval (plural_expr, n);
127
128 if ((long) val < 0)
129 {
130 /* End of protection against arithmetic exceptions. */
131 uninstall_sigfpe_handler ();
132
133 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false,
134 _("plural expression can produce negative values"));
135 free (array);
136 return 1;
137 }
138 else if (val >= nplurals_value)
139 {
140 char *msg;
141
142 /* End of protection against arithmetic exceptions. */
143 uninstall_sigfpe_handler ();
144
145 msg = xasprintf (_("nplurals = %lu but plural expression can produce values as large as %lu"),
146 nplurals_value, val);
147 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
148 free (msg);
149 free (array);
150 return 1;
151 }
152
153 if (array != NULL && array[val] < OFTEN)
154 array[val]++;
155 }
156
157 /* End of protection against arithmetic exceptions. */
158 uninstall_sigfpe_handler ();
159
160 /* Normalize the array[val] statistics. */
161 if (array != NULL)
162 {
163 unsigned long val;
164
165 for (val = 0; val < nplurals_value; val++)
166 array[val] = (array[val] == OFTEN ? 1 : 0);
167 }
168
169 distribution->expr = plural_expr;
170 distribution->often = array;
171 distribution->often_length = (array != NULL ? nplurals_value : 0);
172 distribution->histogram = plural_expression_histogram;
173
174 return 0;
175 }
176 else
177 {
178 /* Caught an arithmetic exception. */
179 const char *msg;
180
181 /* End of protection against arithmetic exceptions. */
182 uninstall_sigfpe_handler ();
183
184 #if USE_SIGINFO
185 switch (sigfpe_code)
186 #endif
187 {
188 #if USE_SIGINFO
189 # ifdef FPE_INTDIV
190 case FPE_INTDIV:
191 msg = _("plural expression can produce division by zero");
192 break;
193 # endif
194 # ifdef FPE_INTOVF
195 case FPE_INTOVF:
196 msg = _("plural expression can produce integer overflow");
197 break;
198 # endif
199 default:
200 #endif
201 msg = _("plural expression can produce arithmetic exceptions, possibly division by zero");
202 }
203
204 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
205
206 free (array);
207
208 return 1;
209 }
210 #undef OFTEN
211 }
212
213
214 /* Try to help the translator by looking up the right plural formula for her.
215 Return a freshly allocated multiline help string, or NULL. */
216 static char *
plural_help(const char * nullentry)217 plural_help (const char *nullentry)
218 {
219 struct plural_table_entry *ptentry = NULL;
220
221 {
222 const char *language;
223
224 language = c_strstr (nullentry, "Language: ");
225 if (language != NULL)
226 {
227 size_t len;
228
229 language += 10;
230 len = strcspn (language, " \t\n");
231 if (len > 0)
232 {
233 size_t j;
234
235 for (j = 0; j < plural_table_size; j++)
236 if (len == strlen (plural_table[j].lang)
237 && strncmp (language, plural_table[j].lang, len) == 0)
238 {
239 ptentry = &plural_table[j];
240 break;
241 }
242 }
243 }
244 }
245
246 if (ptentry == NULL)
247 {
248 const char *language;
249
250 language = c_strstr (nullentry, "Language-Team: ");
251 if (language != NULL)
252 {
253 size_t j;
254
255 language += 15;
256 for (j = 0; j < plural_table_size; j++)
257 if (strncmp (language,
258 plural_table[j].language,
259 strlen (plural_table[j].language)) == 0)
260 {
261 ptentry = &plural_table[j];
262 break;
263 }
264 }
265 }
266
267 if (ptentry != NULL)
268 {
269 char *helpline1 =
270 xasprintf (_("Try using the following, valid for %s:"),
271 ptentry->language);
272 char *help =
273 xasprintf ("%s\n\"Plural-Forms: %s\\n\"\n",
274 helpline1, ptentry->value);
275 free (helpline1);
276 return help;
277 }
278 return NULL;
279 }
280
281
282 /* Perform plural expression checking.
283 Return the number of errors that were seen.
284 If no errors, returns in *DISTRIBUTION information about the plural_eval
285 values distribution. */
286 static int
check_plural(message_list_ty * mlp,int ignore_untranslated_messages,int ignore_fuzzy_messages,struct plural_distribution * distributionp)287 check_plural (message_list_ty *mlp,
288 int ignore_untranslated_messages,
289 int ignore_fuzzy_messages,
290 struct plural_distribution *distributionp)
291 {
292 int seen_errors = 0;
293 const message_ty *has_plural;
294 unsigned long min_nplurals;
295 const message_ty *min_pos;
296 unsigned long max_nplurals;
297 const message_ty *max_pos;
298 struct plural_distribution distribution;
299 size_t j;
300 message_ty *header;
301
302 /* Determine whether mlp has plural entries. */
303 has_plural = NULL;
304 min_nplurals = ULONG_MAX;
305 min_pos = NULL;
306 max_nplurals = 0;
307 max_pos = NULL;
308 distribution.expr = NULL;
309 distribution.often = NULL;
310 distribution.often_length = 0;
311 distribution.histogram = NULL;
312 for (j = 0; j < mlp->nitems; j++)
313 {
314 message_ty *mp = mlp->item[j];
315
316 if (!mp->obsolete
317 && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
318 && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp)))
319 && mp->msgid_plural != NULL)
320 {
321 const char *p;
322 const char *p_end;
323 unsigned long n;
324
325 if (has_plural == NULL)
326 has_plural = mp;
327
328 n = 0;
329 for (p = mp->msgstr, p_end = p + mp->msgstr_len;
330 p < p_end;
331 p += strlen (p) + 1)
332 n++;
333 if (min_nplurals > n)
334 {
335 min_nplurals = n;
336 min_pos = mp;
337 }
338 if (max_nplurals < n)
339 {
340 max_nplurals = n;
341 max_pos = mp;
342 }
343 }
344 }
345
346 /* Look at the plural entry for this domain.
347 Cf, function extract_plural_expression. */
348 header = message_list_search (mlp, NULL, "");
349 if (header != NULL && !header->obsolete)
350 {
351 const char *nullentry;
352 const char *plural;
353 const char *nplurals;
354
355 nullentry = header->msgstr;
356
357 plural = c_strstr (nullentry, "plural=");
358 nplurals = c_strstr (nullentry, "nplurals=");
359 if (plural == NULL && has_plural != NULL)
360 {
361 const char *msg1 =
362 _("message catalog has plural form translations");
363 const char *msg2 =
364 _("but header entry lacks a \"plural=EXPRESSION\" attribute");
365 char *help = plural_help (nullentry);
366
367 if (help != NULL)
368 {
369 char *msg2ext = xasprintf ("%s\n%s", msg2, help);
370 po_xerror2 (PO_SEVERITY_ERROR,
371 has_plural, NULL, 0, 0, false, msg1,
372 header, NULL, 0, 0, true, msg2ext);
373 free (msg2ext);
374 free (help);
375 }
376 else
377 po_xerror2 (PO_SEVERITY_ERROR,
378 has_plural, NULL, 0, 0, false, msg1,
379 header, NULL, 0, 0, false, msg2);
380
381 seen_errors++;
382 }
383 if (nplurals == NULL && has_plural != NULL)
384 {
385 const char *msg1 =
386 _("message catalog has plural form translations");
387 const char *msg2 =
388 _("but header entry lacks a \"nplurals=INTEGER\" attribute");
389 char *help = plural_help (nullentry);
390
391 if (help != NULL)
392 {
393 char *msg2ext = xasprintf ("%s\n%s", msg2, help);
394 po_xerror2 (PO_SEVERITY_ERROR,
395 has_plural, NULL, 0, 0, false, msg1,
396 header, NULL, 0, 0, true, msg2ext);
397 free (msg2ext);
398 free (help);
399 }
400 else
401 po_xerror2 (PO_SEVERITY_ERROR,
402 has_plural, NULL, 0, 0, false, msg1,
403 header, NULL, 0, 0, false, msg2);
404
405 seen_errors++;
406 }
407 if (plural != NULL && nplurals != NULL)
408 {
409 const char *endp;
410 unsigned long int nplurals_value;
411 struct parse_args args;
412 const struct expression *plural_expr;
413
414 /* First check the number. */
415 nplurals += 9;
416 while (*nplurals != '\0' && c_isspace ((unsigned char) *nplurals))
417 ++nplurals;
418 endp = nplurals;
419 nplurals_value = 0;
420 if (*nplurals >= '0' && *nplurals <= '9')
421 nplurals_value = strtoul (nplurals, (char **) &endp, 10);
422 if (nplurals == endp)
423 {
424 const char *msg = _("invalid nplurals value");
425 char *help = plural_help (nullentry);
426
427 if (help != NULL)
428 {
429 char *msgext = xasprintf ("%s\n%s", msg, help);
430 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
431 msgext);
432 free (msgext);
433 free (help);
434 }
435 else
436 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
437
438 seen_errors++;
439 }
440
441 /* Then check the expression. */
442 plural += 7;
443 args.cp = plural;
444 if (parse_plural_expression (&args) != 0)
445 {
446 const char *msg = _("invalid plural expression");
447 char *help = plural_help (nullentry);
448
449 if (help != NULL)
450 {
451 char *msgext = xasprintf ("%s\n%s", msg, help);
452 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
453 msgext);
454 free (msgext);
455 free (help);
456 }
457 else
458 po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
459
460 seen_errors++;
461 }
462 plural_expr = args.res;
463
464 /* See whether nplurals and plural fit together. */
465 if (!seen_errors)
466 seen_errors =
467 check_plural_eval (plural_expr, nplurals_value, header,
468 &distribution);
469
470 /* Check the number of plurals of the translations. */
471 if (!seen_errors)
472 {
473 if (min_nplurals < nplurals_value)
474 {
475 char *msg1 =
476 xasprintf (_("nplurals = %lu"), nplurals_value);
477 char *msg2 =
478 xasprintf (ngettext ("but some messages have only one plural form",
479 "but some messages have only %lu plural forms",
480 min_nplurals),
481 min_nplurals);
482 po_xerror2 (PO_SEVERITY_ERROR,
483 header, NULL, 0, 0, false, msg1,
484 min_pos, NULL, 0, 0, false, msg2);
485 free (msg2);
486 free (msg1);
487 seen_errors++;
488 }
489 else if (max_nplurals > nplurals_value)
490 {
491 char *msg1 =
492 xasprintf (_("nplurals = %lu"), nplurals_value);
493 char *msg2 =
494 xasprintf (ngettext ("but some messages have one plural form",
495 "but some messages have %lu plural forms",
496 max_nplurals),
497 max_nplurals);
498 po_xerror2 (PO_SEVERITY_ERROR,
499 header, NULL, 0, 0, false, msg1,
500 max_pos, NULL, 0, 0, false, msg2);
501 free (msg2);
502 free (msg1);
503 seen_errors++;
504 }
505 /* The only valid case is max_nplurals <= n <= min_nplurals,
506 which means either has_plural == NULL or
507 max_nplurals = n = min_nplurals. */
508 }
509 }
510 else
511 goto no_plural;
512 }
513 else
514 {
515 if (has_plural != NULL)
516 {
517 po_xerror (PO_SEVERITY_ERROR, has_plural, NULL, 0, 0, false,
518 _("message catalog has plural form translations, but lacks a header entry with \"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\""));
519 seen_errors++;
520 }
521 no_plural:
522 /* By default, the Germanic formula (n != 1) is used. */
523 distribution.expr = &germanic_plural;
524 {
525 unsigned char *array = XCALLOC (2, unsigned char);
526 array[1] = 1;
527 distribution.often = array;
528 }
529 distribution.often_length = 2;
530 distribution.histogram = plural_expression_histogram;
531 }
532
533 /* distribution is not needed if we report errors.
534 Also, if there was an error due to max_nplurals > nplurals_value,
535 we must not use distribution because we would be doing out-of-bounds
536 array accesses. */
537 if (seen_errors > 0)
538 free ((unsigned char *) distribution.often);
539 else
540 *distributionp = distribution;
541
542 return seen_errors;
543 }
544
545
546 /* Signal an error when checking format strings. */
547 static const message_ty *curr_mp;
548 static lex_pos_ty curr_msgid_pos;
549 static void
550 formatstring_error_logger (const char *format, ...)
551 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 7) || __GNUC__ > 2)
552 __attribute__ ((__format__ (__printf__, 1, 2)))
553 #endif
554 ;
555 static void
formatstring_error_logger(const char * format,...)556 formatstring_error_logger (const char *format, ...)
557 {
558 va_list args;
559 char *msg;
560
561 va_start (args, format);
562 if (vasprintf (&msg, format, args) < 0)
563 error (EXIT_FAILURE, 0, _("memory exhausted"));
564 va_end (args);
565 po_xerror (PO_SEVERITY_ERROR,
566 curr_mp, curr_msgid_pos.file_name, curr_msgid_pos.line_number,
567 (size_t)(-1), false, msg);
568 free (msg);
569 }
570
571
572 /* Perform miscellaneous checks on a message.
573 PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements,
574 PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed
575 infinitely often by the plural formula.
576 PLURAL_DISTRIBUTION_LENGTH is the length of the PLURAL_DISTRIBUTION
577 array. */
578 static int
check_pair(const message_ty * mp,const char * msgid,const lex_pos_ty * msgid_pos,const char * msgid_plural,const char * msgstr,size_t msgstr_len,const enum is_format is_format[NFORMATS],int check_newlines,int check_format_strings,const struct plural_distribution * distribution,int check_compatibility,int check_accelerators,char accelerator_char)579 check_pair (const message_ty *mp,
580 const char *msgid,
581 const lex_pos_ty *msgid_pos,
582 const char *msgid_plural,
583 const char *msgstr, size_t msgstr_len,
584 const enum is_format is_format[NFORMATS],
585 int check_newlines,
586 int check_format_strings,
587 const struct plural_distribution *distribution,
588 int check_compatibility,
589 int check_accelerators, char accelerator_char)
590 {
591 int seen_errors;
592 int has_newline;
593 unsigned int j;
594
595 /* If the msgid string is empty we have the special entry reserved for
596 information about the translation. */
597 if (msgid[0] == '\0')
598 return 0;
599
600 seen_errors = 0;
601
602 if (check_newlines)
603 {
604 /* Test 1: check whether all or none of the strings begin with a '\n'. */
605 has_newline = (msgid[0] == '\n');
606 #define TEST_NEWLINE(p) (p[0] == '\n')
607 if (msgid_plural != NULL)
608 {
609 const char *p;
610
611 if (TEST_NEWLINE(msgid_plural) != has_newline)
612 {
613 po_xerror (PO_SEVERITY_ERROR,
614 mp, msgid_pos->file_name, msgid_pos->line_number,
615 (size_t)(-1), false,
616 _("'msgid' and 'msgid_plural' entries do not both begin with '\\n'"));
617 seen_errors++;
618 }
619 for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
620 if (TEST_NEWLINE(p) != has_newline)
621 {
622 char *msg =
623 xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both begin with '\\n'"),
624 j);
625 po_xerror (PO_SEVERITY_ERROR,
626 mp, msgid_pos->file_name, msgid_pos->line_number,
627 (size_t)(-1), false, msg);
628 free (msg);
629 seen_errors++;
630 }
631 }
632 else
633 {
634 if (TEST_NEWLINE(msgstr) != has_newline)
635 {
636 po_xerror (PO_SEVERITY_ERROR,
637 mp, msgid_pos->file_name, msgid_pos->line_number,
638 (size_t)(-1), false,
639 _("'msgid' and 'msgstr' entries do not both begin with '\\n'"));
640 seen_errors++;
641 }
642 }
643 #undef TEST_NEWLINE
644
645 /* Test 2: check whether all or none of the strings end with a '\n'. */
646 has_newline = (msgid[strlen (msgid) - 1] == '\n');
647 #define TEST_NEWLINE(p) (p[0] != '\0' && p[strlen (p) - 1] == '\n')
648 if (msgid_plural != NULL)
649 {
650 const char *p;
651
652 if (TEST_NEWLINE(msgid_plural) != has_newline)
653 {
654 po_xerror (PO_SEVERITY_ERROR,
655 mp, msgid_pos->file_name, msgid_pos->line_number,
656 (size_t)(-1), false,
657 _("'msgid' and 'msgid_plural' entries do not both end with '\\n'"));
658 seen_errors++;
659 }
660 for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
661 if (TEST_NEWLINE(p) != has_newline)
662 {
663 char *msg =
664 xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both end with '\\n'"),
665 j);
666 po_xerror (PO_SEVERITY_ERROR,
667 mp, msgid_pos->file_name, msgid_pos->line_number,
668 (size_t)(-1), false, msg);
669 free (msg);
670 seen_errors++;
671 }
672 }
673 else
674 {
675 if (TEST_NEWLINE(msgstr) != has_newline)
676 {
677 po_xerror (PO_SEVERITY_ERROR,
678 mp, msgid_pos->file_name, msgid_pos->line_number,
679 (size_t)(-1), false,
680 _("'msgid' and 'msgstr' entries do not both end with '\\n'"));
681 seen_errors++;
682 }
683 }
684 #undef TEST_NEWLINE
685 }
686
687 if (check_compatibility && msgid_plural != NULL)
688 {
689 po_xerror (PO_SEVERITY_ERROR,
690 mp, msgid_pos->file_name, msgid_pos->line_number,
691 (size_t)(-1), false,
692 _("plural handling is a GNU gettext extension"));
693 seen_errors++;
694 }
695
696 if (check_format_strings)
697 /* Test 3: Check whether both formats strings contain the same number
698 of format specifications. */
699 {
700 curr_mp = mp;
701 curr_msgid_pos = *msgid_pos;
702 seen_errors +=
703 check_msgid_msgstr_format (msgid, msgid_plural, msgstr, msgstr_len,
704 is_format, mp->range, distribution,
705 formatstring_error_logger);
706 }
707
708 if (check_accelerators && msgid_plural == NULL)
709 /* Test 4: Check that if msgid is a menu item with a keyboard accelerator,
710 the msgstr has an accelerator as well. A keyboard accelerator is
711 designated by an immediately preceding '&'. We cannot check whether
712 two accelerators collide, only whether the translator has bothered
713 thinking about them. */
714 {
715 const char *p;
716
717 /* We are only interested in msgids that contain exactly one '&'. */
718 p = strchr (msgid, accelerator_char);
719 if (p != NULL && strchr (p + 1, accelerator_char) == NULL)
720 {
721 /* Count the number of '&' in msgstr, but ignore '&&'. */
722 unsigned int count = 0;
723
724 for (p = msgstr; (p = strchr (p, accelerator_char)) != NULL; p++)
725 if (p[1] == accelerator_char)
726 p++;
727 else
728 count++;
729
730 if (count == 0)
731 {
732 char *msg =
733 xasprintf (_("msgstr lacks the keyboard accelerator mark '%c'"),
734 accelerator_char);
735 po_xerror (PO_SEVERITY_ERROR,
736 mp, msgid_pos->file_name, msgid_pos->line_number,
737 (size_t)(-1), false, msg);
738 free (msg);
739 seen_errors++;
740 }
741 else if (count > 1)
742 {
743 char *msg =
744 xasprintf (_("msgstr has too many keyboard accelerator marks '%c'"),
745 accelerator_char);
746 po_xerror (PO_SEVERITY_ERROR,
747 mp, msgid_pos->file_name, msgid_pos->line_number,
748 (size_t)(-1), false, msg);
749 free (msg);
750 seen_errors++;
751 }
752 }
753 }
754
755 return seen_errors;
756 }
757
758
759 /* Perform miscellaneous checks on a header entry. */
760 static int
check_header_entry(const message_ty * mp,const char * msgstr_string)761 check_header_entry (const message_ty *mp, const char *msgstr_string)
762 {
763 static const char *required_fields[] =
764 {
765 "Project-Id-Version", "PO-Revision-Date", "Last-Translator",
766 "Language-Team", "MIME-Version", "Content-Type",
767 "Content-Transfer-Encoding",
768 /* These are recommended but not yet required. */
769 "Language"
770 };
771 static const char *default_values[] =
772 {
773 "PACKAGE VERSION", "YEAR-MO-DA HO:MI+ZONE", "FULL NAME <EMAIL@ADDRESS>", "LANGUAGE <LL@li.org>", NULL,
774 "text/plain; charset=CHARSET", "ENCODING",
775 ""
776 };
777 const size_t nfields = SIZEOF (required_fields);
778 /* FIXME: We could check if a required header field is missing and
779 report it as error. However, it's could be too rigorous and
780 break backward compatibility. */
781 #if 0
782 const size_t nrequiredfields = nfields - 1;
783 #endif
784 int seen_errors = 0;
785 int cnt;
786
787 for (cnt = 0; cnt < nfields; ++cnt)
788 {
789 #if 0
790 int severity =
791 (cnt < nrequiredfields ? PO_SEVERITY_ERROR : PO_SEVERITY_WARNING);
792 #else
793 int severity =
794 PO_SEVERITY_WARNING;
795 #endif
796 const char *field = required_fields[cnt];
797 size_t len = strlen (field);
798 const char *line;
799
800 for (line = msgstr_string; *line != '\0'; )
801 {
802 if (strncmp (line, field, len) == 0 && line[len] == ':')
803 {
804 const char *p = line + len + 1;
805
806 /* Test whether the field's value, starting at p, is the default
807 value. */
808 if (*p == ' ')
809 p++;
810 if (default_values[cnt] != NULL
811 && strncmp (p, default_values[cnt],
812 strlen (default_values[cnt])) == 0)
813 {
814 p += strlen (default_values[cnt]);
815 if (*p == '\0' || *p == '\n')
816 {
817 char *msg =
818 xasprintf (_("header field '%s' still has the initial default value\n"),
819 field);
820 po_xerror (severity, mp, NULL, 0, 0, true, msg);
821 free (msg);
822 if (severity == PO_SEVERITY_ERROR)
823 seen_errors++;
824 }
825 }
826 break;
827 }
828 line = strchrnul (line, '\n');
829 if (*line == '\n')
830 line++;
831 }
832 if (*line == '\0')
833 {
834 char *msg =
835 xasprintf (_("header field '%s' missing in header\n"),
836 field);
837 po_xerror (severity, mp, NULL, 0, 0, true, msg);
838 free (msg);
839 if (severity == PO_SEVERITY_ERROR)
840 seen_errors++;
841 }
842 }
843 return seen_errors;
844 }
845
846
847 /* Perform all checks on a non-obsolete message.
848 Return the number of errors that were seen. */
849 int
check_message(const message_ty * mp,const lex_pos_ty * msgid_pos,int check_newlines,int check_format_strings,const struct plural_distribution * distribution,int check_header,int check_compatibility,int check_accelerators,char accelerator_char)850 check_message (const message_ty *mp,
851 const lex_pos_ty *msgid_pos,
852 int check_newlines,
853 int check_format_strings,
854 const struct plural_distribution *distribution,
855 int check_header,
856 int check_compatibility,
857 int check_accelerators, char accelerator_char)
858 {
859 int seen_errors = 0;
860
861 if (check_header && is_header (mp))
862 seen_errors += check_header_entry (mp, mp->msgstr);
863
864 seen_errors += check_pair (mp,
865 mp->msgid, msgid_pos, mp->msgid_plural,
866 mp->msgstr, mp->msgstr_len,
867 mp->is_format,
868 check_newlines,
869 check_format_strings,
870 distribution,
871 check_compatibility,
872 check_accelerators, accelerator_char);
873 return seen_errors;
874 }
875
876
877 /* Perform all checks on a message list.
878 Return the number of errors that were seen. */
879 int
check_message_list(message_list_ty * mlp,int ignore_untranslated_messages,int ignore_fuzzy_messages,int check_newlines,int check_format_strings,int check_header,int check_compatibility,int check_accelerators,char accelerator_char)880 check_message_list (message_list_ty *mlp,
881 int ignore_untranslated_messages,
882 int ignore_fuzzy_messages,
883 int check_newlines,
884 int check_format_strings,
885 int check_header,
886 int check_compatibility,
887 int check_accelerators, char accelerator_char)
888 {
889 int seen_errors = 0;
890 struct plural_distribution distribution;
891 size_t j;
892
893 distribution.expr = NULL;
894 distribution.often = NULL;
895 distribution.often_length = 0;
896 distribution.histogram = NULL;
897
898 if (check_header)
899 seen_errors += check_plural (mlp, ignore_untranslated_messages,
900 ignore_fuzzy_messages, &distribution);
901
902 for (j = 0; j < mlp->nitems; j++)
903 {
904 message_ty *mp = mlp->item[j];
905
906 if (!mp->obsolete
907 && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
908 && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp))))
909 seen_errors += check_message (mp, &mp->pos,
910 check_newlines,
911 check_format_strings,
912 &distribution,
913 check_header, check_compatibility,
914 check_accelerators, accelerator_char);
915 }
916
917 return seen_errors;
918 }
919
920
921 static int
syntax_check_ellipsis_unicode(const message_ty * mp,const char * msgid)922 syntax_check_ellipsis_unicode (const message_ty *mp, const char *msgid)
923 {
924 const char *str = msgid;
925 const char *str_limit = str + strlen (msgid);
926 int seen_errors = 0;
927
928 while (str < str_limit)
929 {
930 const char *end, *cp;
931 ucs4_t ending_char;
932
933 end = sentence_end (str, &ending_char);
934
935 /* sentence_end doesn't treat '...' specially. */
936 cp = end - (ending_char == '.' ? 2 : 3);
937 if (cp >= str && memcmp (cp, "...", 3) == 0)
938 {
939 po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
940 _("ASCII ellipsis ('...') instead of Unicode"));
941 seen_errors++;
942 }
943
944 str = end + 1;
945 }
946
947 return seen_errors;
948 }
949
950
951 static int
syntax_check_space_ellipsis(const message_ty * mp,const char * msgid)952 syntax_check_space_ellipsis (const message_ty *mp, const char *msgid)
953 {
954 const char *str = msgid;
955 const char *str_limit = str + strlen (msgid);
956 int seen_errors = 0;
957
958 while (str < str_limit)
959 {
960 const char *end, *ellipsis = NULL;
961 ucs4_t ending_char;
962
963 end = sentence_end (str, &ending_char);
964
965 if (ending_char == 0x2026)
966 ellipsis = end;
967 else if (ending_char == '.')
968 {
969 /* sentence_end doesn't treat '...' specially. */
970 const char *cp = end - 2;
971 if (cp >= str && memcmp (cp, "...", 3) == 0)
972 ellipsis = cp;
973 }
974 else
975 {
976 /* Look for a '...'. */
977 const char *cp = end - 3;
978 if (cp >= str && memcmp (cp, "...", 3) == 0)
979 ellipsis = cp;
980 else
981 {
982 ucs4_t uc = 0xfffd;
983
984 /* Look for a U+2026. */
985 for (cp = end - 1; cp >= str; cp--)
986 {
987 u8_mbtouc (&uc, (const unsigned char *) cp, end - cp);
988 if (uc != 0xfffd)
989 break;
990 }
991
992 if (uc == 0x2026)
993 ellipsis = cp;
994 }
995 }
996
997 if (ellipsis)
998 {
999 const char *cp;
1000 ucs4_t uc = 0xfffd;
1001
1002 /* Look at the character before ellipsis. */
1003 for (cp = ellipsis - 1; cp >= str; cp--)
1004 {
1005 u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
1006 if (uc != 0xfffd)
1007 break;
1008 }
1009
1010 if (uc != 0xfffd && uc_is_space (uc))
1011 {
1012 po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
1013 _("space before ellipsis found in user visible strings"));
1014 seen_errors++;
1015 }
1016 }
1017
1018 str = end + 1;
1019 }
1020
1021 return seen_errors;
1022 }
1023
1024
1025 struct callback_arg
1026 {
1027 const message_ty *mp;
1028 int seen_errors;
1029 };
1030
1031 static void
syntax_check_quote_unicode_callback(char quote,const char * quoted,size_t quoted_length,void * data)1032 syntax_check_quote_unicode_callback (char quote, const char *quoted,
1033 size_t quoted_length, void *data)
1034 {
1035 struct callback_arg *arg = data;
1036
1037 switch (quote)
1038 {
1039 case '"':
1040 po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
1041 _("ASCII double quote used instead of Unicode"));
1042 arg->seen_errors++;
1043 break;
1044
1045 case '\'':
1046 po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
1047 _("ASCII single quote used instead of Unicode"));
1048 arg->seen_errors++;
1049 break;
1050
1051 default:
1052 break;
1053 }
1054 }
1055
1056 static int
syntax_check_quote_unicode(const message_ty * mp,const char * msgid)1057 syntax_check_quote_unicode (const message_ty *mp, const char *msgid)
1058 {
1059 struct callback_arg arg;
1060
1061 arg.mp = mp;
1062 arg.seen_errors = 0;
1063
1064 scan_quoted (msgid, strlen (msgid),
1065 syntax_check_quote_unicode_callback, &arg);
1066
1067 return arg.seen_errors;
1068 }
1069
1070 struct bullet_ty
1071 {
1072 int c;
1073 size_t depth;
1074 };
1075
1076 struct bullet_stack_ty
1077 {
1078 struct bullet_ty *items;
1079 size_t nitems;
1080 size_t nitems_max;
1081 };
1082
1083 static struct bullet_stack_ty bullet_stack;
1084
1085 static int
syntax_check_bullet_unicode(const message_ty * mp,const char * msgid)1086 syntax_check_bullet_unicode (const message_ty *mp, const char *msgid)
1087 {
1088 const char *str = msgid;
1089 const char *str_limit = str + strlen (msgid);
1090 struct bullet_ty *last_bullet = NULL;
1091 bool seen_error = false;
1092
1093 bullet_stack.nitems = 0;
1094
1095 while (str < str_limit)
1096 {
1097 const char *p = str, *end;
1098
1099 while (p < str_limit && c_isspace (*p))
1100 p++;
1101
1102 if ((*p == '*' || *p == '-') && *(p + 1) == ' ')
1103 {
1104 size_t depth = p - str;
1105 if (last_bullet == NULL || depth > last_bullet->depth)
1106 {
1107 struct bullet_ty bullet;
1108
1109 bullet.c = *p;
1110 bullet.depth = depth;
1111
1112 if (bullet_stack.nitems >= bullet_stack.nitems_max)
1113 {
1114 bullet_stack.nitems_max = 2 * bullet_stack.nitems_max + 4;
1115 bullet_stack.items = xrealloc (bullet_stack.items,
1116 bullet_stack.nitems_max
1117 * sizeof (struct bullet_ty));
1118 }
1119
1120 last_bullet = &bullet_stack.items[bullet_stack.nitems++];
1121 memcpy (last_bullet, &bullet, sizeof (struct bullet_ty));
1122 }
1123 else
1124 {
1125 if (depth < last_bullet->depth)
1126 {
1127 if (bullet_stack.nitems > 1)
1128 {
1129 bullet_stack.nitems--;
1130 last_bullet =
1131 &bullet_stack.items[bullet_stack.nitems - 1];
1132 }
1133 else
1134 last_bullet = NULL;
1135 }
1136
1137 if (last_bullet && depth == last_bullet->depth)
1138 {
1139 if (last_bullet->c != *p)
1140 last_bullet->c = *p;
1141 else
1142 {
1143 seen_error = true;
1144 break;
1145 }
1146 }
1147 }
1148 }
1149 else
1150 {
1151 bullet_stack.nitems = 0;
1152 last_bullet = NULL;
1153 }
1154
1155 end = strchrnul (str, '\n');
1156 str = end + 1;
1157 }
1158
1159 if (seen_error)
1160 {
1161 char *msg;
1162 msg = xasprintf (_("ASCII bullet ('%c') instead of Unicode"),
1163 last_bullet->c);
1164 po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false, msg);
1165 free (msg);
1166 return 1;
1167 }
1168
1169 return 0;
1170 }
1171
1172
1173 typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid);
1174 static const syntax_check_function sc_funcs[NSYNTAXCHECKS] =
1175 {
1176 syntax_check_ellipsis_unicode,
1177 syntax_check_space_ellipsis,
1178 syntax_check_quote_unicode,
1179 syntax_check_bullet_unicode
1180 };
1181
1182 /* Perform all syntax checks on a non-obsolete message.
1183 Return the number of errors that were seen. */
1184 static int
syntax_check_message(const message_ty * mp)1185 syntax_check_message (const message_ty *mp)
1186 {
1187 int seen_errors = 0;
1188 int i;
1189
1190 for (i = 0; i < NSYNTAXCHECKS; i++)
1191 {
1192 if (mp->do_syntax_check[i] == yes)
1193 {
1194 seen_errors += sc_funcs[i] (mp, mp->msgid);
1195 if (mp->msgid_plural)
1196 seen_errors += sc_funcs[i] (mp, mp->msgid_plural);
1197 }
1198 }
1199
1200 return seen_errors;
1201 }
1202
1203
1204 /* Perform all syntax checks on a message list.
1205 Return the number of errors that were seen. */
1206 int
syntax_check_message_list(message_list_ty * mlp)1207 syntax_check_message_list (message_list_ty *mlp)
1208 {
1209 int seen_errors = 0;
1210 size_t j;
1211
1212 for (j = 0; j < mlp->nitems; j++)
1213 {
1214 message_ty *mp = mlp->item[j];
1215
1216 if (!is_header (mp))
1217 seen_errors += syntax_check_message (mp);
1218 }
1219
1220 return seen_errors;
1221 }
1222