OpenHarmony-v3.2.1-Release/s

/* Extracting a message.  Accumulating the message list.
   Copyright (C) 2001-2020 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

/* Specification.  */
#include "xg-message.h"

#include <stdio.h>

#include "c-strstr.h"
#include "error-progname.h"
#include "format.h"
#include "read-catalog-abstract.h"
#include "xalloc.h"
#include "xerror.h"
#include "xvasprintf.h"

#include "xgettext.h"

#include "gettext.h"
#define _(str) gettext (str)


#define CONVERT_STRING(string, lcontext) \
  string = from_current_source_encoding (string, lcontext, pos->file_name, \
                                         pos->line_number);


/* Update the is_format[] flags depending on the information given in the
   context.  */
static void
set_format_flags_from_context (enum is_format is_format[NFORMATS],
                               flag_context_ty context, const char *string,
                               lex_pos_ty *pos, const char *pretty_msgstr)
{
  size_t i;

  if (context.is_format1 != undecided
      || context.is_format2 != undecided
      || context.is_format3 != undecided)
    for (i = 0; i < NFORMATS; i++)
      {
        if (is_format[i] == undecided)
          {
            if (formatstring_parsers[i] == current_formatstring_parser1
                && context.is_format1 != undecided)
              is_format[i] = (enum is_format) context.is_format1;
            if (formatstring_parsers[i] == current_formatstring_parser2
                && context.is_format2 != undecided)
              is_format[i] = (enum is_format) context.is_format2;
            if (formatstring_parsers[i] == current_formatstring_parser3
                && context.is_format3 != undecided)
              is_format[i] = (enum is_format) context.is_format3;
          }
        if (possible_format_p (is_format[i]))
          {
            struct formatstring_parser *parser = formatstring_parsers[i];
            char *invalid_reason = NULL;
            void *descr = parser->parse (string, false, NULL, &invalid_reason);

            if (descr != NULL)
              parser->free (descr);
            else
              {
                /* The string is not a valid format string.  */
                if (is_format[i] != possible)
                  {
                    char buffer[21];

                    error_with_progname = false;
                    if (pos->line_number == (size_t)(-1))
                      buffer[0] = '\0';
                    else
                      sprintf (buffer, ":%ld", (long) pos->line_number);
                    multiline_warning (xasprintf (_("%s%s: warning: "),
                                                  pos->file_name, buffer),
                                       xasprintf (is_format[i] == yes_according_to_context
                                                  ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n")
                                                  : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
                                                  pretty_msgstr,
                                                  format_language_pretty[i],
                                                  invalid_reason));
                    error_with_progname = true;
                  }

                is_format[i] = impossible;
                free (invalid_reason);
              }
          }
      }
}


void
decide_is_format (message_ty *mp)
{
  size_t i;

  /* If it is not already decided, through programmer comments, whether the
     msgid is a format string, examine the msgid.  This is a heuristic.  */
  for (i = 0; i < NFORMATS; i++)
    {
      if (mp->is_format[i] == undecided
          && (formatstring_parsers[i] == current_formatstring_parser1
              || formatstring_parsers[i] == current_formatstring_parser2
              || formatstring_parsers[i] == current_formatstring_parser3)
          /* But avoid redundancy: objc-format is stronger than c-format.  */
          && !(i == format_c && possible_format_p (mp->is_format[format_objc]))
          && !(i == format_objc && possible_format_p (mp->is_format[format_c]))
          /* Avoid flagging a string as c-format when it's known to be a
             qt-format or qt-plural-format or kde-format or boost-format
             string.  */
          && !(i == format_c
               && (possible_format_p (mp->is_format[format_qt])
                   || possible_format_p (mp->is_format[format_qt_plural])
                   || possible_format_p (mp->is_format[format_kde])
                   || possible_format_p (mp->is_format[format_kde_kuit])
                   || possible_format_p (mp->is_format[format_boost])))
          /* Avoid flagging a string as kde-format when it's known to
             be a kde-kuit-format string.  */
          && !(i == format_kde
               && possible_format_p (mp->is_format[format_kde_kuit]))
          /* Avoid flagging a string as kde-kuit-format when it's
             known to be a kde-format string.  Note that this relies
             on the fact that format_kde < format_kde_kuit, so a
             string will be marked as kde-format if both are
             undecided.  */
          && !(i == format_kde_kuit
               && possible_format_p (mp->is_format[format_kde])))
        {
          struct formatstring_parser *parser = formatstring_parsers[i];
          char *invalid_reason = NULL;
          void *descr = parser->parse (mp->msgid, false, NULL, &invalid_reason);

          if (descr != NULL)
            {
              /* msgid is a valid format string.  We mark only those msgids
                 as format strings which contain at least one format directive
                 and thus are format strings with a high probability.  We
                 don't mark strings without directives as format strings,
                 because that would force the programmer to add
                 "xgettext: no-c-format" anywhere where a translator wishes
                 to use a percent sign.  So, the msgfmt checking will not be
                 perfect.  Oh well.  */
              if (parser->get_number_of_directives (descr) > 0
                  && !(parser->is_unlikely_intentional != NULL
                       && parser->is_unlikely_intentional (descr)))
                mp->is_format[i] = possible;

              parser->free (descr);
            }
          else
            {
              /* msgid is not a valid format string.  */
              mp->is_format[i] = impossible;
              free (invalid_reason);
            }
        }
    }
}

void
intersect_range (message_ty *mp, const struct argument_range *range)
{
  if (has_range_p (*range))
    {
      if (has_range_p (mp->range))
        {
          if (range->min < mp->range.min)
            mp->range.min = range->min;
          if (range->max > mp->range.max)
            mp->range.max = range->max;
        }
      else
        mp->range = *range;
    }
}

void
decide_do_wrap (message_ty *mp)
{
  /* By default we wrap.  */
  mp->do_wrap = (mp->do_wrap == no ? no : yes);
}

void
decide_syntax_check (message_ty *mp)
{
  size_t i;

  for (i = 0; i < NSYNTAXCHECKS; i++)
    if (mp->do_syntax_check[i] == undecided)
      mp->do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no;
}


static void
warn_format_string (enum is_format is_format[NFORMATS], const char *string,
                    lex_pos_ty *pos, const char *pretty_msgstr)
{
  if (possible_format_p (is_format[format_python])
      && get_python_format_unnamed_arg_count (string) > 1)
    {
      char buffer[21];

      error_with_progname = false;
      if (pos->line_number == (size_t)(-1))
        buffer[0] = '\0';
      else
        sprintf (buffer, ":%ld", (long) pos->line_number);
      multiline_warning (xasprintf (_("%s%s: warning: "),
                                    pos->file_name, buffer),
                         xasprintf (_("\
'%s' format string with unnamed arguments cannot be properly localized:\n\
The translator cannot reorder the arguments.\n\
Please consider using a format string with named arguments,\n\
and a mapping instead of a tuple for the arguments.\n"),
                                    pretty_msgstr));
      error_with_progname = true;
    }
}


message_ty *
remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
                    bool is_utf8, bool pluralp, flag_context_ty context,
                    lex_pos_ty *pos,
                    const char *extracted_comment,
                    refcounted_string_list_ty *comment, bool comment_is_utf8)
{
  enum is_format is_format[NFORMATS];
  struct argument_range range;
  enum is_wrap do_wrap;
  enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
  message_ty *mp;
  char *msgstr;
  size_t i;

  /* See whether we shall exclude this message.  */
  if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
    {
      /* Tell the lexer to reset its comment buffer, so that the next
         message gets the correct comments.  */
      xgettext_comment_reset ();
      savable_comment_reset ();

      if (msgctxt != NULL)
        free (msgctxt);
      free (msgid);

      return NULL;
    }

  savable_comment_to_xgettext_comment (comment);

  for (i = 0; i < NFORMATS; i++)
    is_format[i] = undecided;
  range.min = -1;
  range.max = -1;
  do_wrap = undecided;
  for (i = 0; i < NSYNTAXCHECKS; i++)
    do_syntax_check[i] = undecided;

  if (!is_utf8)
    {
      if (msgctxt != NULL)
        CONVERT_STRING (msgctxt, lc_string);
      CONVERT_STRING (msgid, lc_string);
    }

  if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
    {
      char buffer[21];

      error_with_progname = false;
      if (pos->line_number == (size_t)(-1))
        buffer[0] = '\0';
      else
        sprintf (buffer, ":%ld", (long) pos->line_number);
      multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
                                    buffer),
                         xstrdup (_("\
Empty msgid.  It is reserved by GNU gettext:\n\
gettext(\"\") returns the header entry with\n\
meta information, not the empty string.\n")));
      error_with_progname = true;
    }

  /* See if we have seen this message before.  */
  mp = message_list_search (mlp, msgctxt, msgid);
  if (mp != NULL)
    {
      if (pluralp != (mp->msgid_plural != NULL))
        {
          lex_pos_ty pos1;
          lex_pos_ty pos2;
          char buffer1[21];
          char buffer2[21];

          if (pluralp)
            {
              pos1 = mp->pos;
              pos2 = *pos;
            }
          else
            {
              pos1 = *pos;
              pos2 = mp->pos;
            }

          if (pos1.line_number == (size_t)(-1))
            buffer1[0] = '\0';
          else
            sprintf (buffer1, ":%ld", (long) pos1.line_number);
          if (pos2.line_number == (size_t)(-1))
            buffer2[0] = '\0';
          else
            sprintf (buffer2, ":%ld", (long) pos2.line_number);
          multiline_warning (xstrdup (_("warning: ")),
                             xasprintf ("%s\n%s\n%s\n%s\n",
                                        xasprintf (_("msgid '%s' is used without plural and with plural."),
                                                   msgid),
                                        xasprintf (_("%s%s: Here is the occurrence without plural."),
                                                   pos1.file_name, buffer1),
                                        xasprintf (_("%s%s: Here is the occurrence with plural."),
                                                   pos2.file_name, buffer2),
                                        xstrdup (_("Workaround: If the msgid is a sentence, change the wording of the sentence; otherwise, use contexts for disambiguation."))));
        }

      if (msgctxt != NULL)
        free (msgctxt);
      free (msgid);
      for (i = 0; i < NFORMATS; i++)
        is_format[i] = mp->is_format[i];
      do_wrap = mp->do_wrap;
      for (i = 0; i < NSYNTAXCHECKS; i++)
        do_syntax_check[i] = mp->do_syntax_check[i];
    }
  else
    {
      /* Construct the msgstr from the prefix and suffix, otherwise use the
         empty string.  */
      if (msgstr_prefix)
        msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix);
      else
        msgstr = "";

      /* Allocate a new message and append the message to the list.  */
      mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1,
                          pos);
      /* Do not free msgctxt and msgid.  */
      message_list_append (mlp, mp);
    }

  /* Determine whether the context specifies that the msgid is a format
     string.  */
  set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");

  /* Ask the lexer for the comments it has seen.  */
  {
    size_t nitems_before;
    size_t nitems_after;
    int j;
    bool add_all_remaining_comments;
    /* The string before the comment tag.  For example, If "** TRANSLATORS:"
       is seen and the comment tag is "TRANSLATORS:",
       then comment_tag_prefix is set to "** ".  */
    const char *comment_tag_prefix = "";
    size_t comment_tag_prefix_length = 0;

    nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);

    if (extracted_comment != NULL)
      {
        char *copy = xstrdup (extracted_comment);
        char *rest;

        rest = copy;
        while (*rest != '\0')
          {
            char *newline = strchr (rest, '\n');

            if (newline != NULL)
              {
                *newline = '\0';
                message_comment_dot_append (mp, rest);
                rest = newline + 1;
              }
            else
              {
                message_comment_dot_append (mp, rest);
                break;
              }
          }
        free (copy);
      }

    add_all_remaining_comments = add_all_comments;
    for (j = 0; ; ++j)
      {
        const char *s = xgettext_comment (j);
        const char *t;
        if (s == NULL)
          break;

        if (!comment_is_utf8)
          CONVERT_STRING (s, lc_comment);

        /* To reduce the possibility of unwanted matches we do a two
           step match: the line must contain 'xgettext:' and one of
           the possible format description strings.  */
        if ((t = c_strstr (s, "xgettext:")) != NULL)
          {
            bool tmp_fuzzy;
            enum is_format tmp_format[NFORMATS];
            struct argument_range tmp_range;
            enum is_wrap tmp_wrap;
            enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS];
            bool interesting;

            t += strlen ("xgettext:");

            po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
                                      &tmp_wrap, tmp_syntax_check);

            interesting = false;
            for (i = 0; i < NFORMATS; i++)
              if (tmp_format[i] != undecided)
                {
                  is_format[i] = tmp_format[i];
                  interesting = true;
                }
            if (has_range_p (tmp_range))
              {
                range = tmp_range;
                interesting = true;
              }
            if (tmp_wrap != undecided)
              {
                do_wrap = tmp_wrap;
                interesting = true;
              }
            for (i = 0; i < NSYNTAXCHECKS; i++)
              if (tmp_syntax_check[i] != undecided)
                {
                  do_syntax_check[i] = tmp_syntax_check[i];
                  interesting = true;
                }

            /* If the "xgettext:" marker was followed by an interesting
               keyword, and we updated our is_format/do_wrap variables,
               we don't print the comment as a #. comment.  */
            if (interesting)
              continue;
          }

        if (!add_all_remaining_comments && comment_tag != NULL)
          {
            /* When the comment tag is seen, it drags in not only the line
               which it starts, but all remaining comment lines.  */
            if ((t = c_strstr (s, comment_tag)) != NULL)
              {
                add_all_remaining_comments = true;
                comment_tag_prefix = s;
                comment_tag_prefix_length = t - s;
              }
          }

        if (add_all_remaining_comments)
          {
            if (strncmp (s, comment_tag_prefix, comment_tag_prefix_length) == 0)
              s += comment_tag_prefix_length;
            message_comment_dot_append (mp, s);
          }
      }

    nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);

    /* Don't add the comments if they are a repetition of the tail of the
       already present comments.  This avoids unneeded duplication if the
       same message appears several times, each time with the same comment.  */
    if (nitems_before < nitems_after)
      {
        size_t added = nitems_after - nitems_before;

        if (added <= nitems_before)
          {
            bool repeated = true;

            for (i = 0; i < added; i++)
              if (strcmp (mp->comment_dot->item[nitems_before - added + i],
                          mp->comment_dot->item[nitems_before + i]) != 0)
                {
                  repeated = false;
                  break;
                }

            if (repeated)
              {
                for (i = 0; i < added; i++)
                  free ((char *) mp->comment_dot->item[nitems_before + i]);
                mp->comment_dot->nitems = nitems_before;
              }
          }
      }
  }

  for (i = 0; i < NFORMATS; i++)
    mp->is_format[i] = is_format[i];
  decide_is_format (mp);

  intersect_range (mp, &range);

  mp->do_wrap = do_wrap;
  decide_do_wrap (mp);

  for (i = 0; i < NSYNTAXCHECKS; i++)
    mp->do_syntax_check[i] = do_syntax_check[i];
  decide_syntax_check (mp);

  /* Warn about the use of non-reorderable format strings when the programming
     language also provides reorderable format strings.  */
  warn_format_string (is_format, mp->msgid, pos, "msgid");

  /* Remember where we saw this msgid.  */
  message_comment_filepos (mp, pos->file_name, pos->line_number);

  /* Tell the lexer to reset its comment buffer, so that the next
     message gets the correct comments.  */
  xgettext_comment_reset ();
  savable_comment_reset ();

  return mp;
}


void
remember_a_message_plural (message_ty *mp, char *string, bool is_utf8,
                           flag_context_ty context, lex_pos_ty *pos,
                           refcounted_string_list_ty *comment,
                           bool comment_is_utf8)
{
  char *msgid_plural;
  char *msgstr1;
  size_t msgstr1_len;
  char *msgstr;
  size_t i;

  msgid_plural = string;

  savable_comment_to_xgettext_comment (comment);

  if (!is_utf8)
    CONVERT_STRING (msgid_plural, lc_string);

  /* See if the message is already a plural message.  */
  if (mp->msgid_plural == NULL)
    {
      mp->msgid_plural = msgid_plural;

      /* Construct the first plural form from the prefix and suffix,
         otherwise use the empty string.  The translator will have to
         provide additional plural forms.  */
      if (msgstr_prefix)
        msgstr1 =
          xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix);
      else
        msgstr1 = "";
      msgstr1_len = strlen (msgstr1) + 1;
      msgstr = XNMALLOC (mp->msgstr_len + msgstr1_len, char);
      memcpy (msgstr, mp->msgstr, mp->msgstr_len);
      memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
      mp->msgstr = msgstr;
      mp->msgstr_len = mp->msgstr_len + msgstr1_len;
      if (msgstr_prefix)
        free (msgstr1);

      /* Determine whether the context specifies that the msgid_plural is a
         format string.  */
      set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
                                     pos, "msgid_plural");

      /* If it is not already decided, through programmer comments or
         the msgid, whether the msgid is a format string, examine the
         msgid_plural.  This is a heuristic.  */
      for (i = 0; i < NFORMATS; i++)
        if ((formatstring_parsers[i] == current_formatstring_parser1
             || formatstring_parsers[i] == current_formatstring_parser2
             || formatstring_parsers[i] == current_formatstring_parser3)
            && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
            /* But avoid redundancy: objc-format is stronger than c-format.  */
            && !(i == format_c
                 && possible_format_p (mp->is_format[format_objc]))
            && !(i == format_objc
                 && possible_format_p (mp->is_format[format_c]))
            /* Avoid flagging a string as c-format when it's known to be a
               qt-format or qt-plural-format or boost-format string.  */
            && !(i == format_c
                 && (possible_format_p (mp->is_format[format_qt])
                     || possible_format_p (mp->is_format[format_qt_plural])
                     || possible_format_p (mp->is_format[format_kde])
                     || possible_format_p (mp->is_format[format_kde_kuit])
                     || possible_format_p (mp->is_format[format_boost])))
            /* Avoid flagging a string as kde-format when it's known
               to be a kde-kuit-format string.  */
            && !(i == format_kde
                 && possible_format_p (mp->is_format[format_kde_kuit]))
            /* Avoid flagging a string as kde-kuit-format when it's
               known to be a kde-format string.  Note that this relies
               on the fact that format_kde < format_kde_kuit, so a
               string will be marked as kde-format if both are
               undecided.  */
            && !(i == format_kde_kuit
                 && possible_format_p (mp->is_format[format_kde])))
          {
            struct formatstring_parser *parser = formatstring_parsers[i];
            char *invalid_reason = NULL;
            void *descr =
              parser->parse (mp->msgid_plural, false, NULL, &invalid_reason);

            if (descr != NULL)
              {
                /* Same heuristic as in remember_a_message.  */
                if (parser->get_number_of_directives (descr) > 0
                    && !(parser->is_unlikely_intentional != NULL
                         && parser->is_unlikely_intentional (descr)))
                  mp->is_format[i] = possible;

                parser->free (descr);
              }
            else
              {
                /* msgid_plural is not a valid format string.  */
                mp->is_format[i] = impossible;
                free (invalid_reason);
              }
          }

      /* Warn about the use of non-reorderable format strings when the programming
         language also provides reorderable format strings.  */
      warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural");
    }
  else
    free (msgid_plural);

  /* Tell the lexer to reset its comment buffer, so that the next
     message gets the correct comments.  */
  xgettext_comment_reset ();
  savable_comment_reset ();
}