1 /* Extracting a message. Accumulating the message list.
2 Copyright (C) 2001-2020 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20
21 /* Specification. */
22 #include "xg-message.h"
23
24 #include <stdio.h>
25
26 #include "c-strstr.h"
27 #include "error-progname.h"
28 #include "format.h"
29 #include "read-catalog-abstract.h"
30 #include "xalloc.h"
31 #include "xerror.h"
32 #include "xvasprintf.h"
33
34 #include "xgettext.h"
35
36 #include "gettext.h"
37 #define _(str) gettext (str)
38
39
40 #define CONVERT_STRING(string, lcontext) \
41 string = from_current_source_encoding (string, lcontext, pos->file_name, \
42 pos->line_number);
43
44
45 /* Update the is_format[] flags depending on the information given in the
46 context. */
47 static void
set_format_flags_from_context(enum is_format is_format[NFORMATS],flag_context_ty context,const char * string,lex_pos_ty * pos,const char * pretty_msgstr)48 set_format_flags_from_context (enum is_format is_format[NFORMATS],
49 flag_context_ty context, const char *string,
50 lex_pos_ty *pos, const char *pretty_msgstr)
51 {
52 size_t i;
53
54 if (context.is_format1 != undecided
55 || context.is_format2 != undecided
56 || context.is_format3 != undecided)
57 for (i = 0; i < NFORMATS; i++)
58 {
59 if (is_format[i] == undecided)
60 {
61 if (formatstring_parsers[i] == current_formatstring_parser1
62 && context.is_format1 != undecided)
63 is_format[i] = (enum is_format) context.is_format1;
64 if (formatstring_parsers[i] == current_formatstring_parser2
65 && context.is_format2 != undecided)
66 is_format[i] = (enum is_format) context.is_format2;
67 if (formatstring_parsers[i] == current_formatstring_parser3
68 && context.is_format3 != undecided)
69 is_format[i] = (enum is_format) context.is_format3;
70 }
71 if (possible_format_p (is_format[i]))
72 {
73 struct formatstring_parser *parser = formatstring_parsers[i];
74 char *invalid_reason = NULL;
75 void *descr = parser->parse (string, false, NULL, &invalid_reason);
76
77 if (descr != NULL)
78 parser->free (descr);
79 else
80 {
81 /* The string is not a valid format string. */
82 if (is_format[i] != possible)
83 {
84 char buffer[21];
85
86 error_with_progname = false;
87 if (pos->line_number == (size_t)(-1))
88 buffer[0] = '\0';
89 else
90 sprintf (buffer, ":%ld", (long) pos->line_number);
91 multiline_warning (xasprintf (_("%s%s: warning: "),
92 pos->file_name, buffer),
93 xasprintf (is_format[i] == yes_according_to_context
94 ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n")
95 : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
96 pretty_msgstr,
97 format_language_pretty[i],
98 invalid_reason));
99 error_with_progname = true;
100 }
101
102 is_format[i] = impossible;
103 free (invalid_reason);
104 }
105 }
106 }
107 }
108
109
110 void
decide_is_format(message_ty * mp)111 decide_is_format (message_ty *mp)
112 {
113 size_t i;
114
115 /* If it is not already decided, through programmer comments, whether the
116 msgid is a format string, examine the msgid. This is a heuristic. */
117 for (i = 0; i < NFORMATS; i++)
118 {
119 if (mp->is_format[i] == undecided
120 && (formatstring_parsers[i] == current_formatstring_parser1
121 || formatstring_parsers[i] == current_formatstring_parser2
122 || formatstring_parsers[i] == current_formatstring_parser3)
123 /* But avoid redundancy: objc-format is stronger than c-format. */
124 && !(i == format_c && possible_format_p (mp->is_format[format_objc]))
125 && !(i == format_objc && possible_format_p (mp->is_format[format_c]))
126 /* Avoid flagging a string as c-format when it's known to be a
127 qt-format or qt-plural-format or kde-format or boost-format
128 string. */
129 && !(i == format_c
130 && (possible_format_p (mp->is_format[format_qt])
131 || possible_format_p (mp->is_format[format_qt_plural])
132 || possible_format_p (mp->is_format[format_kde])
133 || possible_format_p (mp->is_format[format_kde_kuit])
134 || possible_format_p (mp->is_format[format_boost])))
135 /* Avoid flagging a string as kde-format when it's known to
136 be a kde-kuit-format string. */
137 && !(i == format_kde
138 && possible_format_p (mp->is_format[format_kde_kuit]))
139 /* Avoid flagging a string as kde-kuit-format when it's
140 known to be a kde-format string. Note that this relies
141 on the fact that format_kde < format_kde_kuit, so a
142 string will be marked as kde-format if both are
143 undecided. */
144 && !(i == format_kde_kuit
145 && possible_format_p (mp->is_format[format_kde])))
146 {
147 struct formatstring_parser *parser = formatstring_parsers[i];
148 char *invalid_reason = NULL;
149 void *descr = parser->parse (mp->msgid, false, NULL, &invalid_reason);
150
151 if (descr != NULL)
152 {
153 /* msgid is a valid format string. We mark only those msgids
154 as format strings which contain at least one format directive
155 and thus are format strings with a high probability. We
156 don't mark strings without directives as format strings,
157 because that would force the programmer to add
158 "xgettext: no-c-format" anywhere where a translator wishes
159 to use a percent sign. So, the msgfmt checking will not be
160 perfect. Oh well. */
161 if (parser->get_number_of_directives (descr) > 0
162 && !(parser->is_unlikely_intentional != NULL
163 && parser->is_unlikely_intentional (descr)))
164 mp->is_format[i] = possible;
165
166 parser->free (descr);
167 }
168 else
169 {
170 /* msgid is not a valid format string. */
171 mp->is_format[i] = impossible;
172 free (invalid_reason);
173 }
174 }
175 }
176 }
177
178 void
intersect_range(message_ty * mp,const struct argument_range * range)179 intersect_range (message_ty *mp, const struct argument_range *range)
180 {
181 if (has_range_p (*range))
182 {
183 if (has_range_p (mp->range))
184 {
185 if (range->min < mp->range.min)
186 mp->range.min = range->min;
187 if (range->max > mp->range.max)
188 mp->range.max = range->max;
189 }
190 else
191 mp->range = *range;
192 }
193 }
194
195 void
decide_do_wrap(message_ty * mp)196 decide_do_wrap (message_ty *mp)
197 {
198 /* By default we wrap. */
199 mp->do_wrap = (mp->do_wrap == no ? no : yes);
200 }
201
202 void
decide_syntax_check(message_ty * mp)203 decide_syntax_check (message_ty *mp)
204 {
205 size_t i;
206
207 for (i = 0; i < NSYNTAXCHECKS; i++)
208 if (mp->do_syntax_check[i] == undecided)
209 mp->do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no;
210 }
211
212
213 static void
warn_format_string(enum is_format is_format[NFORMATS],const char * string,lex_pos_ty * pos,const char * pretty_msgstr)214 warn_format_string (enum is_format is_format[NFORMATS], const char *string,
215 lex_pos_ty *pos, const char *pretty_msgstr)
216 {
217 if (possible_format_p (is_format[format_python])
218 && get_python_format_unnamed_arg_count (string) > 1)
219 {
220 char buffer[21];
221
222 error_with_progname = false;
223 if (pos->line_number == (size_t)(-1))
224 buffer[0] = '\0';
225 else
226 sprintf (buffer, ":%ld", (long) pos->line_number);
227 multiline_warning (xasprintf (_("%s%s: warning: "),
228 pos->file_name, buffer),
229 xasprintf (_("\
230 '%s' format string with unnamed arguments cannot be properly localized:\n\
231 The translator cannot reorder the arguments.\n\
232 Please consider using a format string with named arguments,\n\
233 and a mapping instead of a tuple for the arguments.\n"),
234 pretty_msgstr));
235 error_with_progname = true;
236 }
237 }
238
239
240 message_ty *
remember_a_message(message_list_ty * mlp,char * msgctxt,char * msgid,bool is_utf8,bool pluralp,flag_context_ty context,lex_pos_ty * pos,const char * extracted_comment,refcounted_string_list_ty * comment,bool comment_is_utf8)241 remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
242 bool is_utf8, bool pluralp, flag_context_ty context,
243 lex_pos_ty *pos,
244 const char *extracted_comment,
245 refcounted_string_list_ty *comment, bool comment_is_utf8)
246 {
247 enum is_format is_format[NFORMATS];
248 struct argument_range range;
249 enum is_wrap do_wrap;
250 enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
251 message_ty *mp;
252 char *msgstr;
253 size_t i;
254
255 /* See whether we shall exclude this message. */
256 if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
257 {
258 /* Tell the lexer to reset its comment buffer, so that the next
259 message gets the correct comments. */
260 xgettext_comment_reset ();
261 savable_comment_reset ();
262
263 if (msgctxt != NULL)
264 free (msgctxt);
265 free (msgid);
266
267 return NULL;
268 }
269
270 savable_comment_to_xgettext_comment (comment);
271
272 for (i = 0; i < NFORMATS; i++)
273 is_format[i] = undecided;
274 range.min = -1;
275 range.max = -1;
276 do_wrap = undecided;
277 for (i = 0; i < NSYNTAXCHECKS; i++)
278 do_syntax_check[i] = undecided;
279
280 if (!is_utf8)
281 {
282 if (msgctxt != NULL)
283 CONVERT_STRING (msgctxt, lc_string);
284 CONVERT_STRING (msgid, lc_string);
285 }
286
287 if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
288 {
289 char buffer[21];
290
291 error_with_progname = false;
292 if (pos->line_number == (size_t)(-1))
293 buffer[0] = '\0';
294 else
295 sprintf (buffer, ":%ld", (long) pos->line_number);
296 multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
297 buffer),
298 xstrdup (_("\
299 Empty msgid. It is reserved by GNU gettext:\n\
300 gettext(\"\") returns the header entry with\n\
301 meta information, not the empty string.\n")));
302 error_with_progname = true;
303 }
304
305 /* See if we have seen this message before. */
306 mp = message_list_search (mlp, msgctxt, msgid);
307 if (mp != NULL)
308 {
309 if (pluralp != (mp->msgid_plural != NULL))
310 {
311 lex_pos_ty pos1;
312 lex_pos_ty pos2;
313 char buffer1[21];
314 char buffer2[21];
315
316 if (pluralp)
317 {
318 pos1 = mp->pos;
319 pos2 = *pos;
320 }
321 else
322 {
323 pos1 = *pos;
324 pos2 = mp->pos;
325 }
326
327 if (pos1.line_number == (size_t)(-1))
328 buffer1[0] = '\0';
329 else
330 sprintf (buffer1, ":%ld", (long) pos1.line_number);
331 if (pos2.line_number == (size_t)(-1))
332 buffer2[0] = '\0';
333 else
334 sprintf (buffer2, ":%ld", (long) pos2.line_number);
335 multiline_warning (xstrdup (_("warning: ")),
336 xasprintf ("%s\n%s\n%s\n%s\n",
337 xasprintf (_("msgid '%s' is used without plural and with plural."),
338 msgid),
339 xasprintf (_("%s%s: Here is the occurrence without plural."),
340 pos1.file_name, buffer1),
341 xasprintf (_("%s%s: Here is the occurrence with plural."),
342 pos2.file_name, buffer2),
343 xstrdup (_("Workaround: If the msgid is a sentence, change the wording of the sentence; otherwise, use contexts for disambiguation."))));
344 }
345
346 if (msgctxt != NULL)
347 free (msgctxt);
348 free (msgid);
349 for (i = 0; i < NFORMATS; i++)
350 is_format[i] = mp->is_format[i];
351 do_wrap = mp->do_wrap;
352 for (i = 0; i < NSYNTAXCHECKS; i++)
353 do_syntax_check[i] = mp->do_syntax_check[i];
354 }
355 else
356 {
357 /* Construct the msgstr from the prefix and suffix, otherwise use the
358 empty string. */
359 if (msgstr_prefix)
360 msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix);
361 else
362 msgstr = "";
363
364 /* Allocate a new message and append the message to the list. */
365 mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1,
366 pos);
367 /* Do not free msgctxt and msgid. */
368 message_list_append (mlp, mp);
369 }
370
371 /* Determine whether the context specifies that the msgid is a format
372 string. */
373 set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
374
375 /* Ask the lexer for the comments it has seen. */
376 {
377 size_t nitems_before;
378 size_t nitems_after;
379 int j;
380 bool add_all_remaining_comments;
381 /* The string before the comment tag. For example, If "** TRANSLATORS:"
382 is seen and the comment tag is "TRANSLATORS:",
383 then comment_tag_prefix is set to "** ". */
384 const char *comment_tag_prefix = "";
385 size_t comment_tag_prefix_length = 0;
386
387 nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
388
389 if (extracted_comment != NULL)
390 {
391 char *copy = xstrdup (extracted_comment);
392 char *rest;
393
394 rest = copy;
395 while (*rest != '\0')
396 {
397 char *newline = strchr (rest, '\n');
398
399 if (newline != NULL)
400 {
401 *newline = '\0';
402 message_comment_dot_append (mp, rest);
403 rest = newline + 1;
404 }
405 else
406 {
407 message_comment_dot_append (mp, rest);
408 break;
409 }
410 }
411 free (copy);
412 }
413
414 add_all_remaining_comments = add_all_comments;
415 for (j = 0; ; ++j)
416 {
417 const char *s = xgettext_comment (j);
418 const char *t;
419 if (s == NULL)
420 break;
421
422 if (!comment_is_utf8)
423 CONVERT_STRING (s, lc_comment);
424
425 /* To reduce the possibility of unwanted matches we do a two
426 step match: the line must contain 'xgettext:' and one of
427 the possible format description strings. */
428 if ((t = c_strstr (s, "xgettext:")) != NULL)
429 {
430 bool tmp_fuzzy;
431 enum is_format tmp_format[NFORMATS];
432 struct argument_range tmp_range;
433 enum is_wrap tmp_wrap;
434 enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS];
435 bool interesting;
436
437 t += strlen ("xgettext:");
438
439 po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
440 &tmp_wrap, tmp_syntax_check);
441
442 interesting = false;
443 for (i = 0; i < NFORMATS; i++)
444 if (tmp_format[i] != undecided)
445 {
446 is_format[i] = tmp_format[i];
447 interesting = true;
448 }
449 if (has_range_p (tmp_range))
450 {
451 range = tmp_range;
452 interesting = true;
453 }
454 if (tmp_wrap != undecided)
455 {
456 do_wrap = tmp_wrap;
457 interesting = true;
458 }
459 for (i = 0; i < NSYNTAXCHECKS; i++)
460 if (tmp_syntax_check[i] != undecided)
461 {
462 do_syntax_check[i] = tmp_syntax_check[i];
463 interesting = true;
464 }
465
466 /* If the "xgettext:" marker was followed by an interesting
467 keyword, and we updated our is_format/do_wrap variables,
468 we don't print the comment as a #. comment. */
469 if (interesting)
470 continue;
471 }
472
473 if (!add_all_remaining_comments && comment_tag != NULL)
474 {
475 /* When the comment tag is seen, it drags in not only the line
476 which it starts, but all remaining comment lines. */
477 if ((t = c_strstr (s, comment_tag)) != NULL)
478 {
479 add_all_remaining_comments = true;
480 comment_tag_prefix = s;
481 comment_tag_prefix_length = t - s;
482 }
483 }
484
485 if (add_all_remaining_comments)
486 {
487 if (strncmp (s, comment_tag_prefix, comment_tag_prefix_length) == 0)
488 s += comment_tag_prefix_length;
489 message_comment_dot_append (mp, s);
490 }
491 }
492
493 nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
494
495 /* Don't add the comments if they are a repetition of the tail of the
496 already present comments. This avoids unneeded duplication if the
497 same message appears several times, each time with the same comment. */
498 if (nitems_before < nitems_after)
499 {
500 size_t added = nitems_after - nitems_before;
501
502 if (added <= nitems_before)
503 {
504 bool repeated = true;
505
506 for (i = 0; i < added; i++)
507 if (strcmp (mp->comment_dot->item[nitems_before - added + i],
508 mp->comment_dot->item[nitems_before + i]) != 0)
509 {
510 repeated = false;
511 break;
512 }
513
514 if (repeated)
515 {
516 for (i = 0; i < added; i++)
517 free ((char *) mp->comment_dot->item[nitems_before + i]);
518 mp->comment_dot->nitems = nitems_before;
519 }
520 }
521 }
522 }
523
524 for (i = 0; i < NFORMATS; i++)
525 mp->is_format[i] = is_format[i];
526 decide_is_format (mp);
527
528 intersect_range (mp, &range);
529
530 mp->do_wrap = do_wrap;
531 decide_do_wrap (mp);
532
533 for (i = 0; i < NSYNTAXCHECKS; i++)
534 mp->do_syntax_check[i] = do_syntax_check[i];
535 decide_syntax_check (mp);
536
537 /* Warn about the use of non-reorderable format strings when the programming
538 language also provides reorderable format strings. */
539 warn_format_string (is_format, mp->msgid, pos, "msgid");
540
541 /* Remember where we saw this msgid. */
542 message_comment_filepos (mp, pos->file_name, pos->line_number);
543
544 /* Tell the lexer to reset its comment buffer, so that the next
545 message gets the correct comments. */
546 xgettext_comment_reset ();
547 savable_comment_reset ();
548
549 return mp;
550 }
551
552
553 void
remember_a_message_plural(message_ty * mp,char * string,bool is_utf8,flag_context_ty context,lex_pos_ty * pos,refcounted_string_list_ty * comment,bool comment_is_utf8)554 remember_a_message_plural (message_ty *mp, char *string, bool is_utf8,
555 flag_context_ty context, lex_pos_ty *pos,
556 refcounted_string_list_ty *comment,
557 bool comment_is_utf8)
558 {
559 char *msgid_plural;
560 char *msgstr1;
561 size_t msgstr1_len;
562 char *msgstr;
563 size_t i;
564
565 msgid_plural = string;
566
567 savable_comment_to_xgettext_comment (comment);
568
569 if (!is_utf8)
570 CONVERT_STRING (msgid_plural, lc_string);
571
572 /* See if the message is already a plural message. */
573 if (mp->msgid_plural == NULL)
574 {
575 mp->msgid_plural = msgid_plural;
576
577 /* Construct the first plural form from the prefix and suffix,
578 otherwise use the empty string. The translator will have to
579 provide additional plural forms. */
580 if (msgstr_prefix)
581 msgstr1 =
582 xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix);
583 else
584 msgstr1 = "";
585 msgstr1_len = strlen (msgstr1) + 1;
586 msgstr = XNMALLOC (mp->msgstr_len + msgstr1_len, char);
587 memcpy (msgstr, mp->msgstr, mp->msgstr_len);
588 memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
589 mp->msgstr = msgstr;
590 mp->msgstr_len = mp->msgstr_len + msgstr1_len;
591 if (msgstr_prefix)
592 free (msgstr1);
593
594 /* Determine whether the context specifies that the msgid_plural is a
595 format string. */
596 set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
597 pos, "msgid_plural");
598
599 /* If it is not already decided, through programmer comments or
600 the msgid, whether the msgid is a format string, examine the
601 msgid_plural. This is a heuristic. */
602 for (i = 0; i < NFORMATS; i++)
603 if ((formatstring_parsers[i] == current_formatstring_parser1
604 || formatstring_parsers[i] == current_formatstring_parser2
605 || formatstring_parsers[i] == current_formatstring_parser3)
606 && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
607 /* But avoid redundancy: objc-format is stronger than c-format. */
608 && !(i == format_c
609 && possible_format_p (mp->is_format[format_objc]))
610 && !(i == format_objc
611 && possible_format_p (mp->is_format[format_c]))
612 /* Avoid flagging a string as c-format when it's known to be a
613 qt-format or qt-plural-format or boost-format string. */
614 && !(i == format_c
615 && (possible_format_p (mp->is_format[format_qt])
616 || possible_format_p (mp->is_format[format_qt_plural])
617 || possible_format_p (mp->is_format[format_kde])
618 || possible_format_p (mp->is_format[format_kde_kuit])
619 || possible_format_p (mp->is_format[format_boost])))
620 /* Avoid flagging a string as kde-format when it's known
621 to be a kde-kuit-format string. */
622 && !(i == format_kde
623 && possible_format_p (mp->is_format[format_kde_kuit]))
624 /* Avoid flagging a string as kde-kuit-format when it's
625 known to be a kde-format string. Note that this relies
626 on the fact that format_kde < format_kde_kuit, so a
627 string will be marked as kde-format if both are
628 undecided. */
629 && !(i == format_kde_kuit
630 && possible_format_p (mp->is_format[format_kde])))
631 {
632 struct formatstring_parser *parser = formatstring_parsers[i];
633 char *invalid_reason = NULL;
634 void *descr =
635 parser->parse (mp->msgid_plural, false, NULL, &invalid_reason);
636
637 if (descr != NULL)
638 {
639 /* Same heuristic as in remember_a_message. */
640 if (parser->get_number_of_directives (descr) > 0
641 && !(parser->is_unlikely_intentional != NULL
642 && parser->is_unlikely_intentional (descr)))
643 mp->is_format[i] = possible;
644
645 parser->free (descr);
646 }
647 else
648 {
649 /* msgid_plural is not a valid format string. */
650 mp->is_format[i] = impossible;
651 free (invalid_reason);
652 }
653 }
654
655 /* Warn about the use of non-reorderable format strings when the programming
656 language also provides reorderable format strings. */
657 warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural");
658 }
659 else
660 free (msgid_plural);
661
662 /* Tell the lexer to reset its comment buffer, so that the next
663 message gets the correct comments. */
664 xgettext_comment_reset ();
665 savable_comment_reset ();
666 }
667