• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Extracting a message.  Accumulating the message list.
2    Copyright (C) 2001-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 #ifdef HAVE_CONFIG_H
18 # include <config.h>
19 #endif
20 
21 /* Specification.  */
22 #include "xg-message.h"
23 
24 #include <stdio.h>
25 
26 #include "c-strstr.h"
27 #include "error-progname.h"
28 #include "format.h"
29 #include "read-catalog-abstract.h"
30 #include "xalloc.h"
31 #include "xerror.h"
32 #include "xvasprintf.h"
33 
34 #include "xgettext.h"
35 
36 #include "gettext.h"
37 #define _(str) gettext (str)
38 
39 
40 #define CONVERT_STRING(string, lcontext) \
41   string = from_current_source_encoding (string, lcontext, pos->file_name, \
42                                          pos->line_number);
43 
44 
45 /* Update the is_format[] flags depending on the information given in the
46    context.  */
47 static void
set_format_flags_from_context(enum is_format is_format[NFORMATS],flag_context_ty context,const char * string,lex_pos_ty * pos,const char * pretty_msgstr)48 set_format_flags_from_context (enum is_format is_format[NFORMATS],
49                                flag_context_ty context, const char *string,
50                                lex_pos_ty *pos, const char *pretty_msgstr)
51 {
52   size_t i;
53 
54   if (context.is_format1 != undecided
55       || context.is_format2 != undecided
56       || context.is_format3 != undecided)
57     for (i = 0; i < NFORMATS; i++)
58       {
59         if (is_format[i] == undecided)
60           {
61             if (formatstring_parsers[i] == current_formatstring_parser1
62                 && context.is_format1 != undecided)
63               is_format[i] = (enum is_format) context.is_format1;
64             if (formatstring_parsers[i] == current_formatstring_parser2
65                 && context.is_format2 != undecided)
66               is_format[i] = (enum is_format) context.is_format2;
67             if (formatstring_parsers[i] == current_formatstring_parser3
68                 && context.is_format3 != undecided)
69               is_format[i] = (enum is_format) context.is_format3;
70           }
71         if (possible_format_p (is_format[i]))
72           {
73             struct formatstring_parser *parser = formatstring_parsers[i];
74             char *invalid_reason = NULL;
75             void *descr = parser->parse (string, false, NULL, &invalid_reason);
76 
77             if (descr != NULL)
78               parser->free (descr);
79             else
80               {
81                 /* The string is not a valid format string.  */
82                 if (is_format[i] != possible)
83                   {
84                     char buffer[21];
85 
86                     error_with_progname = false;
87                     if (pos->line_number == (size_t)(-1))
88                       buffer[0] = '\0';
89                     else
90                       sprintf (buffer, ":%ld", (long) pos->line_number);
91                     multiline_warning (xasprintf (_("%s%s: warning: "),
92                                                   pos->file_name, buffer),
93                                        xasprintf (is_format[i] == yes_according_to_context
94                                                   ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n")
95                                                   : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
96                                                   pretty_msgstr,
97                                                   format_language_pretty[i],
98                                                   invalid_reason));
99                     error_with_progname = true;
100                   }
101 
102                 is_format[i] = impossible;
103                 free (invalid_reason);
104               }
105           }
106       }
107 }
108 
109 
110 void
decide_is_format(message_ty * mp)111 decide_is_format (message_ty *mp)
112 {
113   size_t i;
114 
115   /* If it is not already decided, through programmer comments, whether the
116      msgid is a format string, examine the msgid.  This is a heuristic.  */
117   for (i = 0; i < NFORMATS; i++)
118     {
119       if (mp->is_format[i] == undecided
120           && (formatstring_parsers[i] == current_formatstring_parser1
121               || formatstring_parsers[i] == current_formatstring_parser2
122               || formatstring_parsers[i] == current_formatstring_parser3)
123           /* But avoid redundancy: objc-format is stronger than c-format.  */
124           && !(i == format_c && possible_format_p (mp->is_format[format_objc]))
125           && !(i == format_objc && possible_format_p (mp->is_format[format_c]))
126           /* Avoid flagging a string as c-format when it's known to be a
127              qt-format or qt-plural-format or kde-format or boost-format
128              string.  */
129           && !(i == format_c
130                && (possible_format_p (mp->is_format[format_qt])
131                    || possible_format_p (mp->is_format[format_qt_plural])
132                    || possible_format_p (mp->is_format[format_kde])
133                    || possible_format_p (mp->is_format[format_kde_kuit])
134                    || possible_format_p (mp->is_format[format_boost])))
135           /* Avoid flagging a string as kde-format when it's known to
136              be a kde-kuit-format string.  */
137           && !(i == format_kde
138                && possible_format_p (mp->is_format[format_kde_kuit]))
139           /* Avoid flagging a string as kde-kuit-format when it's
140              known to be a kde-format string.  Note that this relies
141              on the fact that format_kde < format_kde_kuit, so a
142              string will be marked as kde-format if both are
143              undecided.  */
144           && !(i == format_kde_kuit
145                && possible_format_p (mp->is_format[format_kde])))
146         {
147           struct formatstring_parser *parser = formatstring_parsers[i];
148           char *invalid_reason = NULL;
149           void *descr = parser->parse (mp->msgid, false, NULL, &invalid_reason);
150 
151           if (descr != NULL)
152             {
153               /* msgid is a valid format string.  We mark only those msgids
154                  as format strings which contain at least one format directive
155                  and thus are format strings with a high probability.  We
156                  don't mark strings without directives as format strings,
157                  because that would force the programmer to add
158                  "xgettext: no-c-format" anywhere where a translator wishes
159                  to use a percent sign.  So, the msgfmt checking will not be
160                  perfect.  Oh well.  */
161               if (parser->get_number_of_directives (descr) > 0
162                   && !(parser->is_unlikely_intentional != NULL
163                        && parser->is_unlikely_intentional (descr)))
164                 mp->is_format[i] = possible;
165 
166               parser->free (descr);
167             }
168           else
169             {
170               /* msgid is not a valid format string.  */
171               mp->is_format[i] = impossible;
172               free (invalid_reason);
173             }
174         }
175     }
176 }
177 
178 void
intersect_range(message_ty * mp,const struct argument_range * range)179 intersect_range (message_ty *mp, const struct argument_range *range)
180 {
181   if (has_range_p (*range))
182     {
183       if (has_range_p (mp->range))
184         {
185           if (range->min < mp->range.min)
186             mp->range.min = range->min;
187           if (range->max > mp->range.max)
188             mp->range.max = range->max;
189         }
190       else
191         mp->range = *range;
192     }
193 }
194 
195 void
decide_do_wrap(message_ty * mp)196 decide_do_wrap (message_ty *mp)
197 {
198   /* By default we wrap.  */
199   mp->do_wrap = (mp->do_wrap == no ? no : yes);
200 }
201 
202 void
decide_syntax_check(message_ty * mp)203 decide_syntax_check (message_ty *mp)
204 {
205   size_t i;
206 
207   for (i = 0; i < NSYNTAXCHECKS; i++)
208     if (mp->do_syntax_check[i] == undecided)
209       mp->do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no;
210 }
211 
212 
213 static void
warn_format_string(enum is_format is_format[NFORMATS],const char * string,lex_pos_ty * pos,const char * pretty_msgstr)214 warn_format_string (enum is_format is_format[NFORMATS], const char *string,
215                     lex_pos_ty *pos, const char *pretty_msgstr)
216 {
217   if (possible_format_p (is_format[format_python])
218       && get_python_format_unnamed_arg_count (string) > 1)
219     {
220       char buffer[21];
221 
222       error_with_progname = false;
223       if (pos->line_number == (size_t)(-1))
224         buffer[0] = '\0';
225       else
226         sprintf (buffer, ":%ld", (long) pos->line_number);
227       multiline_warning (xasprintf (_("%s%s: warning: "),
228                                     pos->file_name, buffer),
229                          xasprintf (_("\
230 '%s' format string with unnamed arguments cannot be properly localized:\n\
231 The translator cannot reorder the arguments.\n\
232 Please consider using a format string with named arguments,\n\
233 and a mapping instead of a tuple for the arguments.\n"),
234                                     pretty_msgstr));
235       error_with_progname = true;
236     }
237 }
238 
239 
240 message_ty *
remember_a_message(message_list_ty * mlp,char * msgctxt,char * msgid,bool is_utf8,bool pluralp,flag_context_ty context,lex_pos_ty * pos,const char * extracted_comment,refcounted_string_list_ty * comment,bool comment_is_utf8)241 remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
242                     bool is_utf8, bool pluralp, flag_context_ty context,
243                     lex_pos_ty *pos,
244                     const char *extracted_comment,
245                     refcounted_string_list_ty *comment, bool comment_is_utf8)
246 {
247   enum is_format is_format[NFORMATS];
248   struct argument_range range;
249   enum is_wrap do_wrap;
250   enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
251   message_ty *mp;
252   char *msgstr;
253   size_t i;
254 
255   /* See whether we shall exclude this message.  */
256   if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
257     {
258       /* Tell the lexer to reset its comment buffer, so that the next
259          message gets the correct comments.  */
260       xgettext_comment_reset ();
261       savable_comment_reset ();
262 
263       if (msgctxt != NULL)
264         free (msgctxt);
265       free (msgid);
266 
267       return NULL;
268     }
269 
270   savable_comment_to_xgettext_comment (comment);
271 
272   for (i = 0; i < NFORMATS; i++)
273     is_format[i] = undecided;
274   range.min = -1;
275   range.max = -1;
276   do_wrap = undecided;
277   for (i = 0; i < NSYNTAXCHECKS; i++)
278     do_syntax_check[i] = undecided;
279 
280   if (!is_utf8)
281     {
282       if (msgctxt != NULL)
283         CONVERT_STRING (msgctxt, lc_string);
284       CONVERT_STRING (msgid, lc_string);
285     }
286 
287   if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
288     {
289       char buffer[21];
290 
291       error_with_progname = false;
292       if (pos->line_number == (size_t)(-1))
293         buffer[0] = '\0';
294       else
295         sprintf (buffer, ":%ld", (long) pos->line_number);
296       multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
297                                     buffer),
298                          xstrdup (_("\
299 Empty msgid.  It is reserved by GNU gettext:\n\
300 gettext(\"\") returns the header entry with\n\
301 meta information, not the empty string.\n")));
302       error_with_progname = true;
303     }
304 
305   /* See if we have seen this message before.  */
306   mp = message_list_search (mlp, msgctxt, msgid);
307   if (mp != NULL)
308     {
309       if (pluralp != (mp->msgid_plural != NULL))
310         {
311           lex_pos_ty pos1;
312           lex_pos_ty pos2;
313           char buffer1[21];
314           char buffer2[21];
315 
316           if (pluralp)
317             {
318               pos1 = mp->pos;
319               pos2 = *pos;
320             }
321           else
322             {
323               pos1 = *pos;
324               pos2 = mp->pos;
325             }
326 
327           if (pos1.line_number == (size_t)(-1))
328             buffer1[0] = '\0';
329           else
330             sprintf (buffer1, ":%ld", (long) pos1.line_number);
331           if (pos2.line_number == (size_t)(-1))
332             buffer2[0] = '\0';
333           else
334             sprintf (buffer2, ":%ld", (long) pos2.line_number);
335           multiline_warning (xstrdup (_("warning: ")),
336                              xasprintf ("%s\n%s\n%s\n%s\n",
337                                         xasprintf (_("msgid '%s' is used without plural and with plural."),
338                                                    msgid),
339                                         xasprintf (_("%s%s: Here is the occurrence without plural."),
340                                                    pos1.file_name, buffer1),
341                                         xasprintf (_("%s%s: Here is the occurrence with plural."),
342                                                    pos2.file_name, buffer2),
343                                         xstrdup (_("Workaround: If the msgid is a sentence, change the wording of the sentence; otherwise, use contexts for disambiguation."))));
344         }
345 
346       if (msgctxt != NULL)
347         free (msgctxt);
348       free (msgid);
349       for (i = 0; i < NFORMATS; i++)
350         is_format[i] = mp->is_format[i];
351       do_wrap = mp->do_wrap;
352       for (i = 0; i < NSYNTAXCHECKS; i++)
353         do_syntax_check[i] = mp->do_syntax_check[i];
354     }
355   else
356     {
357       /* Construct the msgstr from the prefix and suffix, otherwise use the
358          empty string.  */
359       if (msgstr_prefix)
360         msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix);
361       else
362         msgstr = "";
363 
364       /* Allocate a new message and append the message to the list.  */
365       mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1,
366                           pos);
367       /* Do not free msgctxt and msgid.  */
368       message_list_append (mlp, mp);
369     }
370 
371   /* Determine whether the context specifies that the msgid is a format
372      string.  */
373   set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
374 
375   /* Ask the lexer for the comments it has seen.  */
376   {
377     size_t nitems_before;
378     size_t nitems_after;
379     int j;
380     bool add_all_remaining_comments;
381     /* The string before the comment tag.  For example, If "** TRANSLATORS:"
382        is seen and the comment tag is "TRANSLATORS:",
383        then comment_tag_prefix is set to "** ".  */
384     const char *comment_tag_prefix = "";
385     size_t comment_tag_prefix_length = 0;
386 
387     nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
388 
389     if (extracted_comment != NULL)
390       {
391         char *copy = xstrdup (extracted_comment);
392         char *rest;
393 
394         rest = copy;
395         while (*rest != '\0')
396           {
397             char *newline = strchr (rest, '\n');
398 
399             if (newline != NULL)
400               {
401                 *newline = '\0';
402                 message_comment_dot_append (mp, rest);
403                 rest = newline + 1;
404               }
405             else
406               {
407                 message_comment_dot_append (mp, rest);
408                 break;
409               }
410           }
411         free (copy);
412       }
413 
414     add_all_remaining_comments = add_all_comments;
415     for (j = 0; ; ++j)
416       {
417         const char *s = xgettext_comment (j);
418         const char *t;
419         if (s == NULL)
420           break;
421 
422         if (!comment_is_utf8)
423           CONVERT_STRING (s, lc_comment);
424 
425         /* To reduce the possibility of unwanted matches we do a two
426            step match: the line must contain 'xgettext:' and one of
427            the possible format description strings.  */
428         if ((t = c_strstr (s, "xgettext:")) != NULL)
429           {
430             bool tmp_fuzzy;
431             enum is_format tmp_format[NFORMATS];
432             struct argument_range tmp_range;
433             enum is_wrap tmp_wrap;
434             enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS];
435             bool interesting;
436 
437             t += strlen ("xgettext:");
438 
439             po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
440                                       &tmp_wrap, tmp_syntax_check);
441 
442             interesting = false;
443             for (i = 0; i < NFORMATS; i++)
444               if (tmp_format[i] != undecided)
445                 {
446                   is_format[i] = tmp_format[i];
447                   interesting = true;
448                 }
449             if (has_range_p (tmp_range))
450               {
451                 range = tmp_range;
452                 interesting = true;
453               }
454             if (tmp_wrap != undecided)
455               {
456                 do_wrap = tmp_wrap;
457                 interesting = true;
458               }
459             for (i = 0; i < NSYNTAXCHECKS; i++)
460               if (tmp_syntax_check[i] != undecided)
461                 {
462                   do_syntax_check[i] = tmp_syntax_check[i];
463                   interesting = true;
464                 }
465 
466             /* If the "xgettext:" marker was followed by an interesting
467                keyword, and we updated our is_format/do_wrap variables,
468                we don't print the comment as a #. comment.  */
469             if (interesting)
470               continue;
471           }
472 
473         if (!add_all_remaining_comments && comment_tag != NULL)
474           {
475             /* When the comment tag is seen, it drags in not only the line
476                which it starts, but all remaining comment lines.  */
477             if ((t = c_strstr (s, comment_tag)) != NULL)
478               {
479                 add_all_remaining_comments = true;
480                 comment_tag_prefix = s;
481                 comment_tag_prefix_length = t - s;
482               }
483           }
484 
485         if (add_all_remaining_comments)
486           {
487             if (strncmp (s, comment_tag_prefix, comment_tag_prefix_length) == 0)
488               s += comment_tag_prefix_length;
489             message_comment_dot_append (mp, s);
490           }
491       }
492 
493     nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
494 
495     /* Don't add the comments if they are a repetition of the tail of the
496        already present comments.  This avoids unneeded duplication if the
497        same message appears several times, each time with the same comment.  */
498     if (nitems_before < nitems_after)
499       {
500         size_t added = nitems_after - nitems_before;
501 
502         if (added <= nitems_before)
503           {
504             bool repeated = true;
505 
506             for (i = 0; i < added; i++)
507               if (strcmp (mp->comment_dot->item[nitems_before - added + i],
508                           mp->comment_dot->item[nitems_before + i]) != 0)
509                 {
510                   repeated = false;
511                   break;
512                 }
513 
514             if (repeated)
515               {
516                 for (i = 0; i < added; i++)
517                   free ((char *) mp->comment_dot->item[nitems_before + i]);
518                 mp->comment_dot->nitems = nitems_before;
519               }
520           }
521       }
522   }
523 
524   for (i = 0; i < NFORMATS; i++)
525     mp->is_format[i] = is_format[i];
526   decide_is_format (mp);
527 
528   intersect_range (mp, &range);
529 
530   mp->do_wrap = do_wrap;
531   decide_do_wrap (mp);
532 
533   for (i = 0; i < NSYNTAXCHECKS; i++)
534     mp->do_syntax_check[i] = do_syntax_check[i];
535   decide_syntax_check (mp);
536 
537   /* Warn about the use of non-reorderable format strings when the programming
538      language also provides reorderable format strings.  */
539   warn_format_string (is_format, mp->msgid, pos, "msgid");
540 
541   /* Remember where we saw this msgid.  */
542   message_comment_filepos (mp, pos->file_name, pos->line_number);
543 
544   /* Tell the lexer to reset its comment buffer, so that the next
545      message gets the correct comments.  */
546   xgettext_comment_reset ();
547   savable_comment_reset ();
548 
549   return mp;
550 }
551 
552 
553 void
remember_a_message_plural(message_ty * mp,char * string,bool is_utf8,flag_context_ty context,lex_pos_ty * pos,refcounted_string_list_ty * comment,bool comment_is_utf8)554 remember_a_message_plural (message_ty *mp, char *string, bool is_utf8,
555                            flag_context_ty context, lex_pos_ty *pos,
556                            refcounted_string_list_ty *comment,
557                            bool comment_is_utf8)
558 {
559   char *msgid_plural;
560   char *msgstr1;
561   size_t msgstr1_len;
562   char *msgstr;
563   size_t i;
564 
565   msgid_plural = string;
566 
567   savable_comment_to_xgettext_comment (comment);
568 
569   if (!is_utf8)
570     CONVERT_STRING (msgid_plural, lc_string);
571 
572   /* See if the message is already a plural message.  */
573   if (mp->msgid_plural == NULL)
574     {
575       mp->msgid_plural = msgid_plural;
576 
577       /* Construct the first plural form from the prefix and suffix,
578          otherwise use the empty string.  The translator will have to
579          provide additional plural forms.  */
580       if (msgstr_prefix)
581         msgstr1 =
582           xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix);
583       else
584         msgstr1 = "";
585       msgstr1_len = strlen (msgstr1) + 1;
586       msgstr = XNMALLOC (mp->msgstr_len + msgstr1_len, char);
587       memcpy (msgstr, mp->msgstr, mp->msgstr_len);
588       memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
589       mp->msgstr = msgstr;
590       mp->msgstr_len = mp->msgstr_len + msgstr1_len;
591       if (msgstr_prefix)
592         free (msgstr1);
593 
594       /* Determine whether the context specifies that the msgid_plural is a
595          format string.  */
596       set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
597                                      pos, "msgid_plural");
598 
599       /* If it is not already decided, through programmer comments or
600          the msgid, whether the msgid is a format string, examine the
601          msgid_plural.  This is a heuristic.  */
602       for (i = 0; i < NFORMATS; i++)
603         if ((formatstring_parsers[i] == current_formatstring_parser1
604              || formatstring_parsers[i] == current_formatstring_parser2
605              || formatstring_parsers[i] == current_formatstring_parser3)
606             && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
607             /* But avoid redundancy: objc-format is stronger than c-format.  */
608             && !(i == format_c
609                  && possible_format_p (mp->is_format[format_objc]))
610             && !(i == format_objc
611                  && possible_format_p (mp->is_format[format_c]))
612             /* Avoid flagging a string as c-format when it's known to be a
613                qt-format or qt-plural-format or boost-format string.  */
614             && !(i == format_c
615                  && (possible_format_p (mp->is_format[format_qt])
616                      || possible_format_p (mp->is_format[format_qt_plural])
617                      || possible_format_p (mp->is_format[format_kde])
618                      || possible_format_p (mp->is_format[format_kde_kuit])
619                      || possible_format_p (mp->is_format[format_boost])))
620             /* Avoid flagging a string as kde-format when it's known
621                to be a kde-kuit-format string.  */
622             && !(i == format_kde
623                  && possible_format_p (mp->is_format[format_kde_kuit]))
624             /* Avoid flagging a string as kde-kuit-format when it's
625                known to be a kde-format string.  Note that this relies
626                on the fact that format_kde < format_kde_kuit, so a
627                string will be marked as kde-format if both are
628                undecided.  */
629             && !(i == format_kde_kuit
630                  && possible_format_p (mp->is_format[format_kde])))
631           {
632             struct formatstring_parser *parser = formatstring_parsers[i];
633             char *invalid_reason = NULL;
634             void *descr =
635               parser->parse (mp->msgid_plural, false, NULL, &invalid_reason);
636 
637             if (descr != NULL)
638               {
639                 /* Same heuristic as in remember_a_message.  */
640                 if (parser->get_number_of_directives (descr) > 0
641                     && !(parser->is_unlikely_intentional != NULL
642                          && parser->is_unlikely_intentional (descr)))
643                   mp->is_format[i] = possible;
644 
645                 parser->free (descr);
646               }
647             else
648               {
649                 /* msgid_plural is not a valid format string.  */
650                 mp->is_format[i] = impossible;
651                 free (invalid_reason);
652               }
653           }
654 
655       /* Warn about the use of non-reorderable format strings when the programming
656          language also provides reorderable format strings.  */
657       warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural");
658     }
659   else
660     free (msgid_plural);
661 
662   /* Tell the lexer to reset its comment buffer, so that the next
663      message gets the correct comments.  */
664   xgettext_comment_reset ();
665   savable_comment_reset ();
666 }
667