gettext-tools/src/msgl-check.c

/* Checking of messages in PO files.
   Copyright (C) 1995-1998, 2000-2008, 2010-2016, 2019 Free Software Foundation, Inc.
   Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

/* Specification.  */
#include "msgl-check.h"

#include <limits.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>

#include "c-ctype.h"
#include "xalloc.h"
#include "xvasprintf.h"
#include "po-xerror.h"
#include "format.h"
#include "plural-exp.h"
#include "plural-eval.h"
#include "plural-table.h"
#include "c-strstr.h"
#include "message.h"
#include "quote.h"
#include "sentence.h"
#include "unictype.h"
#include "unistr.h"
#include "gettext.h"

#define _(str) gettext (str)

#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))


/* Evaluates the plural formula for min <= n <= max
   and returns the estimated number of times the value j was assumed.  */
static unsigned int
plural_expression_histogram (const struct plural_distribution *self,
                             int min, int max, unsigned long j)
{
  if (min < 0)
    min = 0;
  /* Limit the number of evaluations.  Nothing interesting happens beyond
     1000.  */
  if (max - min > 1000)
    max = min + 1000;
  if (min <= max)
    {
      const struct expression *expr = self->expr;
      unsigned long n;
      unsigned int count;

      /* Protect against arithmetic exceptions.  */
      install_sigfpe_handler ();

      count = 0;
      for (n = min; n <= max; n++)
        {
          unsigned long val = plural_eval (expr, n);

          if (val == j)
            count++;
        }

      /* End of protection against arithmetic exceptions.  */
      uninstall_sigfpe_handler ();

      return count;
    }
  else
    return 0;
}


/* Check the values returned by plural_eval.
   Signals the errors through po_xerror.
   Return the number of errors that were seen.
   If no errors, returns in *DISTRIBUTION information about the plural_eval
   values distribution.  */
int
check_plural_eval (const struct expression *plural_expr,
                   unsigned long nplurals_value,
                   const message_ty *header,
                   struct plural_distribution *distribution)
{
  /* Do as if the plural formula assumes a value N infinitely often if it
     assumes it at least 5 times.  */
#define OFTEN 5
  unsigned char * volatile array;

  /* Allocate a distribution array.  */
  if (nplurals_value <= 100)
    array = XCALLOC (nplurals_value, unsigned char);
  else
    /* nplurals_value is nonsense.  Don't risk an out-of-memory.  */
    array = NULL;

  if (sigsetjmp (sigfpe_exit, 1) == 0)
    {
      unsigned long n;

      /* Protect against arithmetic exceptions.  */
      install_sigfpe_handler ();

      for (n = 0; n <= 1000; n++)
        {
          unsigned long val = plural_eval (plural_expr, n);

          if ((long) val < 0)
            {
              /* End of protection against arithmetic exceptions.  */
              uninstall_sigfpe_handler ();

              po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false,
                         _("plural expression can produce negative values"));
              free (array);
              return 1;
            }
          else if (val >= nplurals_value)
            {
              char *msg;

              /* End of protection against arithmetic exceptions.  */
              uninstall_sigfpe_handler ();

              msg = xasprintf (_("nplurals = %lu but plural expression can produce values as large as %lu"),
                               nplurals_value, val);
              po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);
              free (msg);
              free (array);
              return 1;
            }

          if (array != NULL && array[val] < OFTEN)
            array[val]++;
        }

      /* End of protection against arithmetic exceptions.  */
      uninstall_sigfpe_handler ();

      /* Normalize the array[val] statistics.  */
      if (array != NULL)
        {
          unsigned long val;

          for (val = 0; val < nplurals_value; val++)
            array[val] = (array[val] == OFTEN ? 1 : 0);
        }

      distribution->expr = plural_expr;
      distribution->often = array;
      distribution->often_length = (array != NULL ? nplurals_value : 0);
      distribution->histogram = plural_expression_histogram;

      return 0;
    }
  else
    {
      /* Caught an arithmetic exception.  */
      const char *msg;

      /* End of protection against arithmetic exceptions.  */
      uninstall_sigfpe_handler ();

#if USE_SIGINFO
      switch (sigfpe_code)
#endif
        {
#if USE_SIGINFO
# ifdef FPE_INTDIV
        case FPE_INTDIV:
          msg = _("plural expression can produce division by zero");
          break;
# endif
# ifdef FPE_INTOVF
        case FPE_INTOVF:
          msg = _("plural expression can produce integer overflow");
          break;
# endif
        default:
#endif
          msg = _("plural expression can produce arithmetic exceptions, possibly division by zero");
        }

      po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);

      free (array);

      return 1;
    }
#undef OFTEN
}


/* Try to help the translator by looking up the right plural formula for her.
   Return a freshly allocated multiline help string, or NULL.  */
static char *
plural_help (const char *nullentry)
{
  struct plural_table_entry *ptentry = NULL;

  {
    const char *language;

    language = c_strstr (nullentry, "Language: ");
    if (language != NULL)
      {
        size_t len;

        language += 10;
        len = strcspn (language, " \t\n");
        if (len > 0)
          {
            size_t j;

            for (j = 0; j < plural_table_size; j++)
              if (len == strlen (plural_table[j].lang)
                  && strncmp (language, plural_table[j].lang, len) == 0)
                {
                  ptentry = &plural_table[j];
                  break;
                }
          }
      }
  }

  if (ptentry == NULL)
    {
      const char *language;

      language = c_strstr (nullentry, "Language-Team: ");
      if (language != NULL)
        {
          size_t j;

          language += 15;
          for (j = 0; j < plural_table_size; j++)
            if (strncmp (language,
                         plural_table[j].language,
                         strlen (plural_table[j].language)) == 0)
              {
                ptentry = &plural_table[j];
                break;
              }
        }
    }

  if (ptentry != NULL)
    {
      char *helpline1 =
        xasprintf (_("Try using the following, valid for %s:"),
                   ptentry->language);
      char *help =
        xasprintf ("%s\n\"Plural-Forms: %s\\n\"\n",
                   helpline1, ptentry->value);
      free (helpline1);
      return help;
    }
  return NULL;
}


/* Perform plural expression checking.
   Return the number of errors that were seen.
   If no errors, returns in *DISTRIBUTION information about the plural_eval
   values distribution.  */
static int
check_plural (message_list_ty *mlp,
              int ignore_untranslated_messages,
              int ignore_fuzzy_messages,
              struct plural_distribution *distributionp)
{
  int seen_errors = 0;
  const message_ty *has_plural;
  unsigned long min_nplurals;
  const message_ty *min_pos;
  unsigned long max_nplurals;
  const message_ty *max_pos;
  struct plural_distribution distribution;
  size_t j;
  message_ty *header;

  /* Determine whether mlp has plural entries.  */
  has_plural = NULL;
  min_nplurals = ULONG_MAX;
  min_pos = NULL;
  max_nplurals = 0;
  max_pos = NULL;
  distribution.expr = NULL;
  distribution.often = NULL;
  distribution.often_length = 0;
  distribution.histogram = NULL;
  for (j = 0; j < mlp->nitems; j++)
    {
      message_ty *mp = mlp->item[j];

      if (!mp->obsolete
          && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
          && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp)))
          && mp->msgid_plural != NULL)
        {
          const char *p;
          const char *p_end;
          unsigned long n;

          if (has_plural == NULL)
            has_plural = mp;

          n = 0;
          for (p = mp->msgstr, p_end = p + mp->msgstr_len;
               p < p_end;
               p += strlen (p) + 1)
            n++;
          if (min_nplurals > n)
            {
              min_nplurals = n;
              min_pos = mp;
            }
          if (max_nplurals < n)
            {
              max_nplurals = n;
              max_pos = mp;
            }
        }
    }

  /* Look at the plural entry for this domain.
     Cf, function extract_plural_expression.  */
  header = message_list_search (mlp, NULL, "");
  if (header != NULL && !header->obsolete)
    {
      const char *nullentry;
      const char *plural;
      const char *nplurals;

      nullentry = header->msgstr;

      plural = c_strstr (nullentry, "plural=");
      nplurals = c_strstr (nullentry, "nplurals=");
      if (plural == NULL && has_plural != NULL)
        {
          const char *msg1 =
            _("message catalog has plural form translations");
          const char *msg2 =
            _("but header entry lacks a \"plural=EXPRESSION\" attribute");
          char *help = plural_help (nullentry);

          if (help != NULL)
            {
              char *msg2ext = xasprintf ("%s\n%s", msg2, help);
              po_xerror2 (PO_SEVERITY_ERROR,
                          has_plural, NULL, 0, 0, false, msg1,
                          header, NULL, 0, 0, true, msg2ext);
              free (msg2ext);
              free (help);
            }
          else
            po_xerror2 (PO_SEVERITY_ERROR,
                        has_plural, NULL, 0, 0, false, msg1,
                        header, NULL, 0, 0, false, msg2);

          seen_errors++;
        }
      if (nplurals == NULL && has_plural != NULL)
        {
          const char *msg1 =
            _("message catalog has plural form translations");
          const char *msg2 =
            _("but header entry lacks a \"nplurals=INTEGER\" attribute");
          char *help = plural_help (nullentry);

          if (help != NULL)
            {
              char *msg2ext = xasprintf ("%s\n%s", msg2, help);
              po_xerror2 (PO_SEVERITY_ERROR,
                          has_plural, NULL, 0, 0, false, msg1,
                          header, NULL, 0, 0, true, msg2ext);
              free (msg2ext);
              free (help);
            }
          else
            po_xerror2 (PO_SEVERITY_ERROR,
                        has_plural, NULL, 0, 0, false, msg1,
                        header, NULL, 0, 0, false, msg2);

          seen_errors++;
        }
      if (plural != NULL && nplurals != NULL)
        {
          const char *endp;
          unsigned long int nplurals_value;
          struct parse_args args;
          const struct expression *plural_expr;

          /* First check the number.  */
          nplurals += 9;
          while (*nplurals != '\0' && c_isspace ((unsigned char) *nplurals))
            ++nplurals;
          endp = nplurals;
          nplurals_value = 0;
          if (*nplurals >= '0' && *nplurals <= '9')
            nplurals_value = strtoul (nplurals, (char **) &endp, 10);
          if (nplurals == endp)
            {
              const char *msg = _("invalid nplurals value");
              char *help = plural_help (nullentry);

              if (help != NULL)
                {
                  char *msgext = xasprintf ("%s\n%s", msg, help);
                  po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
                             msgext);
                  free (msgext);
                  free (help);
                }
              else
                po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);

              seen_errors++;
            }

          /* Then check the expression.  */
          plural += 7;
          args.cp = plural;
          if (parse_plural_expression (&args) != 0)
            {
              const char *msg = _("invalid plural expression");
              char *help = plural_help (nullentry);

              if (help != NULL)
                {
                  char *msgext = xasprintf ("%s\n%s", msg, help);
                  po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, true,
                             msgext);
                  free (msgext);
                  free (help);
                }
              else
                po_xerror (PO_SEVERITY_ERROR, header, NULL, 0, 0, false, msg);

              seen_errors++;
            }
          plural_expr = args.res;

          /* See whether nplurals and plural fit together.  */
          if (!seen_errors)
            seen_errors =
              check_plural_eval (plural_expr, nplurals_value, header,
                                 &distribution);

          /* Check the number of plurals of the translations.  */
          if (!seen_errors)
            {
              if (min_nplurals < nplurals_value)
                {
                  char *msg1 =
                    xasprintf (_("nplurals = %lu"), nplurals_value);
                  char *msg2 =
                    xasprintf (ngettext ("but some messages have only one plural form",
                                         "but some messages have only %lu plural forms",
                                         min_nplurals),
                               min_nplurals);
                  po_xerror2 (PO_SEVERITY_ERROR,
                              header, NULL, 0, 0, false, msg1,
                              min_pos, NULL, 0, 0, false, msg2);
                  free (msg2);
                  free (msg1);
                  seen_errors++;
                }
              else if (max_nplurals > nplurals_value)
                {
                  char *msg1 =
                    xasprintf (_("nplurals = %lu"), nplurals_value);
                  char *msg2 =
                    xasprintf (ngettext ("but some messages have one plural form",
                                         "but some messages have %lu plural forms",
                                         max_nplurals),
                               max_nplurals);
                  po_xerror2 (PO_SEVERITY_ERROR,
                              header, NULL, 0, 0, false, msg1,
                              max_pos, NULL, 0, 0, false, msg2);
                  free (msg2);
                  free (msg1);
                  seen_errors++;
                }
              /* The only valid case is max_nplurals <= n <= min_nplurals,
                 which means either has_plural == NULL or
                 max_nplurals = n = min_nplurals.  */
            }
        }
      else
        goto no_plural;
    }
  else
    {
      if (has_plural != NULL)
        {
          po_xerror (PO_SEVERITY_ERROR, has_plural, NULL, 0, 0, false,
                     _("message catalog has plural form translations, but lacks a header entry with \"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\""));
          seen_errors++;
        }
     no_plural:
      /* By default, the Germanic formula (n != 1) is used.  */
      distribution.expr = &germanic_plural;
      {
        unsigned char *array = XCALLOC (2, unsigned char);
        array[1] = 1;
        distribution.often = array;
      }
      distribution.often_length = 2;
      distribution.histogram = plural_expression_histogram;
    }

  /* distribution is not needed if we report errors.
     Also, if there was an error due to  max_nplurals > nplurals_value,
     we must not use distribution because we would be doing out-of-bounds
     array accesses.  */
  if (seen_errors > 0)
    free ((unsigned char *) distribution.often);
  else
    *distributionp = distribution;

  return seen_errors;
}


/* Signal an error when checking format strings.  */
static const message_ty *curr_mp;
static lex_pos_ty curr_msgid_pos;
static void
formatstring_error_logger (const char *format, ...)
#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 7) || __GNUC__ > 2)
     __attribute__ ((__format__ (__printf__, 1, 2)))
#endif
;
static void
formatstring_error_logger (const char *format, ...)
{
  va_list args;
  char *msg;

  va_start (args, format);
  if (vasprintf (&msg, format, args) < 0)
    error (EXIT_FAILURE, 0, _("memory exhausted"));
  va_end (args);
  po_xerror (PO_SEVERITY_ERROR,
             curr_mp, curr_msgid_pos.file_name, curr_msgid_pos.line_number,
             (size_t)(-1), false, msg);
  free (msg);
}


/* Perform miscellaneous checks on a message.
   PLURAL_DISTRIBUTION is either NULL or an array of nplurals elements,
   PLURAL_DISTRIBUTION[j] being true if the value j appears to be assumed
   infinitely often by the plural formula.
   PLURAL_DISTRIBUTION_LENGTH is the length of the PLURAL_DISTRIBUTION
   array.  */
static int
check_pair (const message_ty *mp,
            const char *msgid,
            const lex_pos_ty *msgid_pos,
            const char *msgid_plural,
            const char *msgstr, size_t msgstr_len,
            const enum is_format is_format[NFORMATS],
            int check_newlines,
            int check_format_strings,
            const struct plural_distribution *distribution,
            int check_compatibility,
            int check_accelerators, char accelerator_char)
{
  int seen_errors;
  int has_newline;
  unsigned int j;

  /* If the msgid string is empty we have the special entry reserved for
     information about the translation.  */
  if (msgid[0] == '\0')
    return 0;

  seen_errors = 0;

  if (check_newlines)
    {
      /* Test 1: check whether all or none of the strings begin with a '\n'.  */
      has_newline = (msgid[0] == '\n');
#define TEST_NEWLINE(p) (p[0] == '\n')
      if (msgid_plural != NULL)
        {
          const char *p;

          if (TEST_NEWLINE(msgid_plural) != has_newline)
            {
              po_xerror (PO_SEVERITY_ERROR,
                         mp, msgid_pos->file_name, msgid_pos->line_number,
                         (size_t)(-1), false,
                         _("'msgid' and 'msgid_plural' entries do not both begin with '\\n'"));
              seen_errors++;
            }
          for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
            if (TEST_NEWLINE(p) != has_newline)
              {
                char *msg =
                  xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both begin with '\\n'"),
                             j);
                po_xerror (PO_SEVERITY_ERROR,
                           mp, msgid_pos->file_name, msgid_pos->line_number,
                           (size_t)(-1), false, msg);
                free (msg);
                seen_errors++;
              }
        }
      else
        {
          if (TEST_NEWLINE(msgstr) != has_newline)
            {
              po_xerror (PO_SEVERITY_ERROR,
                         mp, msgid_pos->file_name, msgid_pos->line_number,
                         (size_t)(-1), false,
                         _("'msgid' and 'msgstr' entries do not both begin with '\\n'"));
              seen_errors++;
            }
        }
#undef TEST_NEWLINE

      /* Test 2: check whether all or none of the strings end with a '\n'.  */
      has_newline = (msgid[strlen (msgid) - 1] == '\n');
#define TEST_NEWLINE(p) (p[0] != '\0' && p[strlen (p) - 1] == '\n')
      if (msgid_plural != NULL)
        {
          const char *p;

          if (TEST_NEWLINE(msgid_plural) != has_newline)
            {
              po_xerror (PO_SEVERITY_ERROR,
                         mp, msgid_pos->file_name, msgid_pos->line_number,
                         (size_t)(-1), false,
                         _("'msgid' and 'msgid_plural' entries do not both end with '\\n'"));
              seen_errors++;
            }
          for (p = msgstr, j = 0; p < msgstr + msgstr_len; p += strlen (p) + 1, j++)
            if (TEST_NEWLINE(p) != has_newline)
              {
                char *msg =
                  xasprintf (_("'msgid' and 'msgstr[%u]' entries do not both end with '\\n'"),
                             j);
                po_xerror (PO_SEVERITY_ERROR,
                           mp, msgid_pos->file_name, msgid_pos->line_number,
                           (size_t)(-1), false, msg);
                free (msg);
                seen_errors++;
              }
        }
      else
        {
          if (TEST_NEWLINE(msgstr) != has_newline)
            {
              po_xerror (PO_SEVERITY_ERROR,
                         mp, msgid_pos->file_name, msgid_pos->line_number,
                         (size_t)(-1), false,
                         _("'msgid' and 'msgstr' entries do not both end with '\\n'"));
              seen_errors++;
            }
        }
#undef TEST_NEWLINE
    }

  if (check_compatibility && msgid_plural != NULL)
    {
      po_xerror (PO_SEVERITY_ERROR,
                 mp, msgid_pos->file_name, msgid_pos->line_number,
                 (size_t)(-1), false,
                 _("plural handling is a GNU gettext extension"));
      seen_errors++;
    }

  if (check_format_strings)
    /* Test 3: Check whether both formats strings contain the same number
       of format specifications.  */
    {
      curr_mp = mp;
      curr_msgid_pos = *msgid_pos;
      seen_errors +=
        check_msgid_msgstr_format (msgid, msgid_plural, msgstr, msgstr_len,
                                   is_format, mp->range, distribution,
                                   formatstring_error_logger);
    }

  if (check_accelerators && msgid_plural == NULL)
    /* Test 4: Check that if msgid is a menu item with a keyboard accelerator,
       the msgstr has an accelerator as well.  A keyboard accelerator is
       designated by an immediately preceding '&'.  We cannot check whether
       two accelerators collide, only whether the translator has bothered
       thinking about them.  */
    {
      const char *p;

      /* We are only interested in msgids that contain exactly one '&'.  */
      p = strchr (msgid, accelerator_char);
      if (p != NULL && strchr (p + 1, accelerator_char) == NULL)
        {
          /* Count the number of '&' in msgstr, but ignore '&&'.  */
          unsigned int count = 0;

          for (p = msgstr; (p = strchr (p, accelerator_char)) != NULL; p++)
            if (p[1] == accelerator_char)
              p++;
            else
              count++;

          if (count == 0)
            {
              char *msg =
                xasprintf (_("msgstr lacks the keyboard accelerator mark '%c'"),
                           accelerator_char);
              po_xerror (PO_SEVERITY_ERROR,
                         mp, msgid_pos->file_name, msgid_pos->line_number,
                         (size_t)(-1), false, msg);
              free (msg);
              seen_errors++;
            }
          else if (count > 1)
            {
              char *msg =
                xasprintf (_("msgstr has too many keyboard accelerator marks '%c'"),
                           accelerator_char);
              po_xerror (PO_SEVERITY_ERROR,
                         mp, msgid_pos->file_name, msgid_pos->line_number,
                         (size_t)(-1), false, msg);
              free (msg);
              seen_errors++;
            }
        }
    }

  return seen_errors;
}


/* Perform miscellaneous checks on a header entry.  */
static int
check_header_entry (const message_ty *mp, const char *msgstr_string)
{
  static const char *required_fields[] =
  {
    "Project-Id-Version", "PO-Revision-Date", "Last-Translator",
    "Language-Team", "MIME-Version", "Content-Type",
    "Content-Transfer-Encoding",
    /* These are recommended but not yet required.  */
    "Language"
  };
  static const char *default_values[] =
  {
    "PACKAGE VERSION", "YEAR-MO-DA HO:MI+ZONE", "FULL NAME <EMAIL@ADDRESS>", "LANGUAGE <LL@li.org>", NULL,
    "text/plain; charset=CHARSET", "ENCODING",
    ""
  };
  const size_t nfields = SIZEOF (required_fields);
  /* FIXME: We could check if a required header field is missing and
     report it as error.  However, it's could be too rigorous and
     break backward compatibility.  */
#if 0
  const size_t nrequiredfields = nfields - 1;
#endif
  int seen_errors = 0;
  int cnt;

  for (cnt = 0; cnt < nfields; ++cnt)
    {
#if 0
      int severity =
        (cnt < nrequiredfields ? PO_SEVERITY_ERROR : PO_SEVERITY_WARNING);
#else
      int severity =
        PO_SEVERITY_WARNING;
#endif
      const char *field = required_fields[cnt];
      size_t len = strlen (field);
      const char *line;

      for (line = msgstr_string; *line != '\0'; )
        {
          if (strncmp (line, field, len) == 0 && line[len] == ':')
            {
              const char *p = line + len + 1;

              /* Test whether the field's value, starting at p, is the default
                 value.  */
              if (*p == ' ')
                p++;
              if (default_values[cnt] != NULL
                  && strncmp (p, default_values[cnt],
                              strlen (default_values[cnt])) == 0)
                {
                  p += strlen (default_values[cnt]);
                  if (*p == '\0' || *p == '\n')
                    {
                      char *msg =
                        xasprintf (_("header field '%s' still has the initial default value\n"),
                                   field);
                      po_xerror (severity, mp, NULL, 0, 0, true, msg);
                      free (msg);
                      if (severity == PO_SEVERITY_ERROR)
                        seen_errors++;
                    }
                }
              break;
            }
          line = strchrnul (line, '\n');
          if (*line == '\n')
            line++;
        }
      if (*line == '\0')
        {
          char *msg =
            xasprintf (_("header field '%s' missing in header\n"),
                       field);
          po_xerror (severity, mp, NULL, 0, 0, true, msg);
          free (msg);
          if (severity == PO_SEVERITY_ERROR)
            seen_errors++;
        }
    }
  return seen_errors;
}


/* Perform all checks on a non-obsolete message.
   Return the number of errors that were seen.  */
int
check_message (const message_ty *mp,
               const lex_pos_ty *msgid_pos,
               int check_newlines,
               int check_format_strings,
               const struct plural_distribution *distribution,
               int check_header,
               int check_compatibility,
               int check_accelerators, char accelerator_char)
{
  int seen_errors = 0;

  if (check_header && is_header (mp))
    seen_errors += check_header_entry (mp, mp->msgstr);

  seen_errors += check_pair (mp,
                             mp->msgid, msgid_pos, mp->msgid_plural,
                             mp->msgstr, mp->msgstr_len,
                             mp->is_format,
                             check_newlines,
                             check_format_strings,
                             distribution,
                             check_compatibility,
                             check_accelerators, accelerator_char);
  return seen_errors;
}


/* Perform all checks on a message list.
   Return the number of errors that were seen.  */
int
check_message_list (message_list_ty *mlp,
                    int ignore_untranslated_messages,
                    int ignore_fuzzy_messages,
                    int check_newlines,
                    int check_format_strings,
                    int check_header,
                    int check_compatibility,
                    int check_accelerators, char accelerator_char)
{
  int seen_errors = 0;
  struct plural_distribution distribution;
  size_t j;

  distribution.expr = NULL;
  distribution.often = NULL;
  distribution.often_length = 0;
  distribution.histogram = NULL;

  if (check_header)
    seen_errors += check_plural (mlp, ignore_untranslated_messages,
                                 ignore_fuzzy_messages, &distribution);

  for (j = 0; j < mlp->nitems; j++)
    {
      message_ty *mp = mlp->item[j];

      if (!mp->obsolete
          && !(ignore_untranslated_messages && mp->msgstr[0] == '\0')
          && !(ignore_fuzzy_messages && (mp->is_fuzzy && !is_header (mp))))
        seen_errors += check_message (mp, &mp->pos,
                                      check_newlines,
                                      check_format_strings,
                                      &distribution,
                                      check_header, check_compatibility,
                                      check_accelerators, accelerator_char);
    }

  return seen_errors;
}


static int
syntax_check_ellipsis_unicode (const message_ty *mp, const char *msgid)
{
  const char *str = msgid;
  const char *str_limit = str + strlen (msgid);
  int seen_errors = 0;

  while (str < str_limit)
    {
      const char *end, *cp;
      ucs4_t ending_char;

      end = sentence_end (str, &ending_char);

      /* sentence_end doesn't treat '...' specially.  */
      cp = end - (ending_char == '.' ? 2 : 3);
      if (cp >= str && memcmp (cp, "...", 3) == 0)
        {
          po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
                     _("ASCII ellipsis ('...') instead of Unicode"));
          seen_errors++;
        }

      str = end + 1;
    }

  return seen_errors;
}


static int
syntax_check_space_ellipsis (const message_ty *mp, const char *msgid)
{
  const char *str = msgid;
  const char *str_limit = str + strlen (msgid);
  int seen_errors = 0;

  while (str < str_limit)
    {
      const char *end, *ellipsis = NULL;
      ucs4_t ending_char;

      end = sentence_end (str, &ending_char);

      if (ending_char == 0x2026)
        ellipsis = end;
      else if (ending_char == '.')
        {
          /* sentence_end doesn't treat '...' specially.  */
          const char *cp = end - 2;
          if (cp >= str && memcmp (cp, "...", 3) == 0)
            ellipsis = cp;
        }
      else
        {
          /* Look for a '...'.  */
          const char *cp = end - 3;
          if (cp >= str && memcmp (cp, "...", 3) == 0)
            ellipsis = cp;
          else
            {
              ucs4_t uc = 0xfffd;

              /* Look for a U+2026.  */
              for (cp = end - 1; cp >= str; cp--)
                {
                  u8_mbtouc (&uc, (const unsigned char *) cp, end - cp);
                  if (uc != 0xfffd)
                    break;
                }

              if (uc == 0x2026)
                ellipsis = cp;
            }
        }

      if (ellipsis)
        {
          const char *cp;
          ucs4_t uc = 0xfffd;

          /* Look at the character before ellipsis.  */
          for (cp = ellipsis - 1; cp >= str; cp--)
            {
              u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
              if (uc != 0xfffd)
                break;
            }

          if (uc != 0xfffd && uc_is_space (uc))
            {
              po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
                         _("space before ellipsis found in user visible strings"));
              seen_errors++;
            }
        }

      str = end + 1;
    }

  return seen_errors;
}


struct callback_arg
{
  const message_ty *mp;
  int seen_errors;
};

static void
syntax_check_quote_unicode_callback (char quote, const char *quoted,
                                     size_t quoted_length, void *data)
{
  struct callback_arg *arg = data;

  switch (quote)
    {
    case '"':
      po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
                 _("ASCII double quote used instead of Unicode"));
      arg->seen_errors++;
      break;

    case '\'':
      po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
                 _("ASCII single quote used instead of Unicode"));
      arg->seen_errors++;
      break;

    default:
      break;
    }
}

static int
syntax_check_quote_unicode (const message_ty *mp, const char *msgid)
{
  struct callback_arg arg;

  arg.mp = mp;
  arg.seen_errors = 0;

  scan_quoted (msgid, strlen (msgid),
               syntax_check_quote_unicode_callback, &arg);

  return arg.seen_errors;
}

struct bullet_ty
{
  int c;
  size_t depth;
};

struct bullet_stack_ty
{
  struct bullet_ty *items;
  size_t nitems;
  size_t nitems_max;
};

static struct bullet_stack_ty bullet_stack;

static int
syntax_check_bullet_unicode (const message_ty *mp, const char *msgid)
{
  const char *str = msgid;
  const char *str_limit = str + strlen (msgid);
  struct bullet_ty *last_bullet = NULL;
  bool seen_error = false;

  bullet_stack.nitems = 0;

  while (str < str_limit)
    {
      const char *p = str, *end;

      while (p < str_limit && c_isspace (*p))
        p++;

      if ((*p == '*' || *p == '-') && *(p + 1) == ' ')
        {
          size_t depth = p - str;
          if (last_bullet == NULL || depth > last_bullet->depth)
            {
              struct bullet_ty bullet;

              bullet.c = *p;
              bullet.depth = depth;

              if (bullet_stack.nitems >= bullet_stack.nitems_max)
                {
                  bullet_stack.nitems_max = 2 * bullet_stack.nitems_max + 4;
                  bullet_stack.items = xrealloc (bullet_stack.items,
                                                 bullet_stack.nitems_max
                                                 * sizeof (struct bullet_ty));
                }

              last_bullet = &bullet_stack.items[bullet_stack.nitems++];
              memcpy (last_bullet, &bullet, sizeof (struct bullet_ty));
            }
          else
            {
              if (depth < last_bullet->depth)
                {
                  if (bullet_stack.nitems > 1)
                    {
                      bullet_stack.nitems--;
                      last_bullet =
                        &bullet_stack.items[bullet_stack.nitems - 1];
                    }
                  else
                    last_bullet = NULL;
                }

              if (last_bullet && depth == last_bullet->depth)
                {
                  if (last_bullet->c != *p)
                    last_bullet->c = *p;
                  else
                    {
                      seen_error = true;
                      break;
                    }
                }
            }
        }
      else
        {
          bullet_stack.nitems = 0;
          last_bullet = NULL;
        }

      end = strchrnul (str, '\n');
      str = end + 1;
    }

  if (seen_error)
    {
      char *msg;
      msg = xasprintf (_("ASCII bullet ('%c') instead of Unicode"),
                       last_bullet->c);
      po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false, msg);
      free (msg);
      return 1;
    }

  return 0;
}


typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid);
static const syntax_check_function sc_funcs[NSYNTAXCHECKS] =
{
  syntax_check_ellipsis_unicode,
  syntax_check_space_ellipsis,
  syntax_check_quote_unicode,
  syntax_check_bullet_unicode
};

/* Perform all syntax checks on a non-obsolete message.
   Return the number of errors that were seen.  */
static int
syntax_check_message (const message_ty *mp)
{
  int seen_errors = 0;
  int i;

  for (i = 0; i < NSYNTAXCHECKS; i++)
    {
      if (mp->do_syntax_check[i] == yes)
        {
          seen_errors += sc_funcs[i] (mp, mp->msgid);
          if (mp->msgid_plural)
            seen_errors += sc_funcs[i] (mp, mp->msgid_plural);
        }
    }

  return seen_errors;
}


/* Perform all syntax checks on a message list.
   Return the number of errors that were seen.  */
int
syntax_check_message_list (message_list_ty *mlp)
{
  int seen_errors = 0;
  size_t j;

  for (j = 0; j < mlp->nitems; j++)
    {
      message_ty *mp = mlp->item[j];

      if (!is_header (mp))
        seen_errors += syntax_check_message (mp);
    }

  return seen_errors;
}