• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Java printf format strings.
2    Copyright (C) 2001-2004, 2006-2007, 2009-2010, 2018-2020 Free Software
3    Foundation, Inc.
4    Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32 
33 #define _(str) gettext (str)
34 
35 /* Java printf format strings are described in java/util/Formatter.html.
36    A directive
37    - starts with '%' or '%<' or '%m$' where m is a positive integer,
38    - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
39      ',', '(',
40    - is optionally followed by a width specification: a nonempty digit sequence,
41    - is optionally followed by '.' and a precision specification: a nonempty
42      digit sequence,
43    - is finished by a specifier
44        - '%', 'n', that need no argument,
45          Restrictions:
46          - For '%': flags other than '-' are invalid, and a precision is
47                     invalid.
48          - For 'n': flags, width, and precision are invalid.
49        - 'b', 'B', 'h', 'H', 's', 'S', that need a general argument.
50          Restrictions:
51          Flags other than '#' and '-' are invalid.
52        - 'c', 'C', that need a character argument,
53          Restrictions:
54          Flags other than '-' are invalid.
55          A precision is invalid.
56        - 'd', 'o', 'x', 'X', that need an integer argument,
57          Restrictions:
58          - For 'd': The flag '#' is invalid.
59          - For 'o', 'x', 'X': The flag ',' is invalid.
60          A precision is invalid.
61        - 'e', 'E', 'f', 'g', 'G', 'a', 'A', that need a floating-point argument,
62          Restrictions:
63          - For 'a', 'A': The flags ',', '(' are invalid.
64        - 't', 'T', followed by one of
65            'H', 'I', 'k', 'l', 'M', 'S', 'L', 'N', 'p', 'z', 'Z', 's', 'Q',
66            'B', 'b', 'h', 'A', 'a', 'C', 'Y', 'y', 'j', 'm', 'd', 'e',
67            'R', 'T', 'r', 'D', 'F', 'c'
68          that need a date/time argument.
69          Restrictions:
70          Flags other than '-' are invalid.
71          A precision is invalid.
72    Numbered ('%m$') and unnumbered argument specifications can be mixed in the
73    same string.  Numbered argument specifications have no influence on the
74    unnumbered argument counter.
75  */
76 
77 enum format_arg_type
78 {
79   FAT_NONE              = 0,
80   /* Basic types */
81   FAT_GENERAL           = 1,
82   FAT_CHARACTER         = 2,
83   FAT_INTEGER           = 3,
84   FAT_FLOATINGPOINT     = 4,
85   FAT_DATETIME          = 5
86 };
87 #ifdef __cplusplus
88 typedef int format_arg_type_t;
89 #else
90 typedef enum format_arg_type format_arg_type_t;
91 #endif
92 
93 enum
94 {
95   /* Flags */
96   FAT_ALTERNATE         = 1 << 0, /* '#' */
97   FAT_ZERO_PADDED       = 1 << 1, /* '0' */
98   FAT_LEFT_JUSTIFIED    = 1 << 2, /* '-' */
99   FAT_SPACE_SIGN        = 1 << 3, /* ' ' */
100   FAT_SIGN              = 1 << 4, /* '+' */
101   FAT_OBEY_LOCALE       = 1 << 5, /* ',' */
102   FAT_MONETARY          = 1 << 6, /* '(' */
103   /* Width */
104   FAT_WIDTH             = 1 << 7,
105   /* Precision */
106   FAT_PRECISION         = 1 << 8,
107 };
108 
109 struct numbered_arg
110 {
111   unsigned int number;
112   format_arg_type_t type;
113 };
114 
115 struct spec
116 {
117   unsigned int directives;
118   unsigned int numbered_arg_count;
119   struct numbered_arg *numbered;
120 };
121 
122 /* Locale independent test for a decimal digit.
123    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
124    <ctype.h> isdigit must be an 'unsigned char'.)  */
125 #undef isdigit
126 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
127 
128 
129 static int
numbered_arg_compare(const void * p1,const void * p2)130 numbered_arg_compare (const void *p1, const void *p2)
131 {
132   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
133   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
134 
135   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
136 }
137 
138 #define INVALID_LAST_ARG(directive_number) \
139   xasprintf (_("In the directive number %u, the reference to the argument of the previous directive is invalid."), directive_number)
140 
141 #define INVALID_PRECISION_MISSING(directive_number) \
142   xasprintf (_("In the directive number %u, the precision is missing."), directive_number)
143 
144 #define INVALID_FLAG_FOR(directive_number,flag_char,conv_char) \
145   xasprintf (_("In the directive number %u, the flag '%c' is invalid for the conversion '%c'."), directive_number, flag_char, conv_char)
146 
147 #define INVALID_WIDTH_FOR(directive_number,conv_char) \
148   xasprintf (_("In the directive number %u, a width is invalid for the conversion '%c'."), directive_number, conv_char)
149 
150 #define INVALID_PRECISION_FOR(directive_number,conv_char) \
151   xasprintf (_("In the directive number %u, a precision is invalid for the conversion '%c'."), directive_number, conv_char)
152 
153 #define INVALID_DATETIME_CONVERSION_SUFFIX(directive_number,conv_char,suffix_char) \
154   (c_isprint (conv_char) \
155    ? xasprintf (_("In the directive number %u, for the conversion '%c', the character '%c' is not a valid conversion suffix."), directive_number, conv_char, suffix_char) \
156    : xasprintf (_("The character that terminates the directive number %u, for the conversion '%c', is not a valid conversion suffix."), directive_number, conv_char))
157 
158 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)159 format_parse (const char *format, bool translated, char *fdi,
160               char **invalid_reason)
161 {
162   const char *const format_start = format;
163   struct spec spec;
164   unsigned int numbered_allocated;
165   struct spec *result;
166   unsigned int unnumbered_arg_count;
167   unsigned int last_arg_number;
168 
169   spec.directives = 0;
170   spec.numbered_arg_count = 0;
171   spec.numbered = NULL;
172   numbered_allocated = 0;
173   unnumbered_arg_count = 0;
174   last_arg_number = 0;
175 
176   for (; *format != '\0';)
177     if (*format++ == '%')
178       {
179         /* A directive.  */
180         unsigned int number = 0;
181         unsigned int flags;
182         format_arg_type_t type;
183         unsigned int invalid_flags;
184 
185         FDI_SET (format - 1, FMTDIR_START);
186         spec.directives++;
187 
188         if (*format == '<')
189           {
190             if (last_arg_number == 0)
191               {
192                 *invalid_reason = INVALID_LAST_ARG (spec.directives);
193                 FDI_SET (format, FMTDIR_ERROR);
194                 goto bad_format;
195               }
196             number = last_arg_number;
197             format++;
198           }
199         else if (isdigit (*format))
200           {
201             const char *f = format;
202             unsigned int m = 0;
203 
204             do
205               {
206                 m = 10 * m + (*f - '0');
207                 f++;
208               }
209             while (isdigit (*f));
210 
211             if (*f == '$')
212               {
213                 if (m == 0)
214                   {
215                     *invalid_reason = INVALID_ARGNO_0 (spec.directives);
216                     FDI_SET (f, FMTDIR_ERROR);
217                     goto bad_format;
218                   }
219                 number = m;
220                 format = ++f;
221               }
222           }
223 
224         flags = 0;
225 
226         /* Parse flags.  */
227         for (;;)
228           {
229             if (*format == '#')
230               {
231                 flags |= FAT_ALTERNATE;
232                 format++;
233               }
234             else if (*format == '0')
235               {
236                 flags |= FAT_ZERO_PADDED;
237                 format++;
238               }
239             else if (*format == '-')
240               {
241                 flags |= FAT_LEFT_JUSTIFIED;
242                 format++;
243               }
244             else if (*format == ' ')
245               {
246                 flags |= FAT_SPACE_SIGN;
247                 format++;
248               }
249             else if (*format == '+')
250               {
251                 flags |= FAT_SIGN;
252                 format++;
253               }
254             else if (*format == ',')
255               {
256                 flags |= FAT_OBEY_LOCALE;
257                 format++;
258               }
259             else if (*format == '(')
260               {
261                 flags |= FAT_MONETARY;
262                 format++;
263               }
264             else
265               break;
266           }
267 
268         /* Parse width.  */
269         if (isdigit (*format))
270           {
271             do format++; while (isdigit (*format));
272             flags |= FAT_WIDTH;
273           }
274 
275         /* Parse precision.  */
276         if (*format == '.')
277           {
278             format++;
279 
280             if (!isdigit (*format))
281               {
282                 if (*format == '\0')
283                   {
284                     *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
285                     FDI_SET (format - 1, FMTDIR_ERROR);
286                   }
287                 else
288                   {
289                     *invalid_reason = INVALID_PRECISION_MISSING (spec.directives);
290                     FDI_SET (format, FMTDIR_ERROR);
291                   }
292                 goto bad_format;
293               }
294 
295             do format++; while (isdigit (*format));
296             flags |= FAT_PRECISION;
297           }
298 
299         /* Parse conversion.  */
300         switch (*format)
301           {
302           case '%':
303             type = FAT_NONE;
304             invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
305                              | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
306                             | FAT_PRECISION;
307             break;
308           case 'n':
309             type = FAT_NONE;
310             invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_LEFT_JUSTIFIED
311                              | FAT_SPACE_SIGN | FAT_SIGN | FAT_OBEY_LOCALE
312                              | FAT_MONETARY)
313                             | FAT_WIDTH | FAT_PRECISION;
314             break;
315           case 'b': case 'B':
316           case 'h': case 'H':
317           case 's': case 'S':
318             type = FAT_GENERAL;
319             invalid_flags = (FAT_ZERO_PADDED | FAT_SPACE_SIGN | FAT_SIGN
320                              | FAT_OBEY_LOCALE | FAT_MONETARY);
321             break;
322           case 'c': case 'C':
323             type = FAT_CHARACTER;
324             invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
325                              | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
326                             | FAT_PRECISION;
327             break;
328           case 'd':
329             type = FAT_INTEGER;
330             invalid_flags = FAT_ALTERNATE | FAT_PRECISION;
331             break;
332           case 'o': case 'x': case 'X':
333             type = FAT_INTEGER;
334             invalid_flags = FAT_OBEY_LOCALE | FAT_PRECISION;
335             break;
336           case 'e': case 'E':
337           case 'f':
338           case 'g': case 'G':
339             type = FAT_FLOATINGPOINT;
340             invalid_flags = 0;
341             break;
342           case 'a': case 'A':
343             type = FAT_FLOATINGPOINT;
344             invalid_flags = FAT_OBEY_LOCALE | FAT_MONETARY;
345             break;
346           case 't': case 'T':
347             type = FAT_DATETIME;
348             invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
349                              | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
350                             | FAT_PRECISION;
351             break;
352           default:
353             if (*format == '\0')
354               {
355                 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
356                 FDI_SET (format - 1, FMTDIR_ERROR);
357               }
358             else
359               {
360                 *invalid_reason =
361                   INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
362                 FDI_SET (format, FMTDIR_ERROR);
363               }
364             goto bad_format;
365           }
366 
367         /* Report invalid flags, width, precision.  */
368         invalid_flags &= flags;
369         if (invalid_flags & FAT_ALTERNATE)
370           {
371             *invalid_reason = INVALID_FLAG_FOR (spec.directives, '#', *format);
372             FDI_SET (format, FMTDIR_ERROR);
373             goto bad_format;
374           }
375         if (invalid_flags & FAT_ZERO_PADDED)
376           {
377             *invalid_reason = INVALID_FLAG_FOR (spec.directives, '0', *format);
378             FDI_SET (format, FMTDIR_ERROR);
379             goto bad_format;
380           }
381         if (invalid_flags & FAT_LEFT_JUSTIFIED)
382           {
383             *invalid_reason = INVALID_FLAG_FOR (spec.directives, '-', *format);
384             FDI_SET (format, FMTDIR_ERROR);
385             goto bad_format;
386           }
387         if (invalid_flags & FAT_SPACE_SIGN)
388           {
389             *invalid_reason = INVALID_FLAG_FOR (spec.directives, ' ', *format);
390             FDI_SET (format, FMTDIR_ERROR);
391             goto bad_format;
392           }
393         if (invalid_flags & FAT_SIGN)
394           {
395             *invalid_reason = INVALID_FLAG_FOR (spec.directives, '+', *format);
396             FDI_SET (format, FMTDIR_ERROR);
397             goto bad_format;
398           }
399         if (invalid_flags & FAT_OBEY_LOCALE)
400           {
401             *invalid_reason = INVALID_FLAG_FOR (spec.directives, ',', *format);
402             FDI_SET (format, FMTDIR_ERROR);
403             goto bad_format;
404           }
405         if (invalid_flags & FAT_MONETARY)
406           {
407             *invalid_reason = INVALID_FLAG_FOR (spec.directives, '(', *format);
408             FDI_SET (format, FMTDIR_ERROR);
409             goto bad_format;
410           }
411         if (invalid_flags & FAT_WIDTH)
412           {
413             *invalid_reason = INVALID_WIDTH_FOR (spec.directives, *format);
414             FDI_SET (format, FMTDIR_ERROR);
415             goto bad_format;
416           }
417         if (invalid_flags & FAT_PRECISION)
418           {
419             *invalid_reason = INVALID_PRECISION_FOR (spec.directives, *format);
420             FDI_SET (format, FMTDIR_ERROR);
421             goto bad_format;
422           }
423 
424         if (type == FAT_DATETIME)
425           {
426             format++;
427 
428             /* Parse conversion suffix.  */
429             switch (*format)
430               {
431               case 'H': case 'I': case 'k': case 'l': case 'M': case 'S':
432               case 'L': case 'N': case 'p': case 'z': case 'Z': case 's':
433               case 'Q':
434               case 'B': case 'b': case 'h': case 'A': case 'a': case 'C':
435               case 'Y': case 'y': case 'j': case 'm': case 'd': case 'e':
436               case 'R': case 'T': case 'r': case 'D': case 'F': case 'c':
437                 break;
438               default:
439                 if (*format == '\0')
440                   {
441                     *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
442                     FDI_SET (format - 1, FMTDIR_ERROR);
443                   }
444                 else
445                   {
446                     *invalid_reason =
447                       INVALID_DATETIME_CONVERSION_SUFFIX (spec.directives,
448                                                           format[-1], *format);
449                     FDI_SET (format, FMTDIR_ERROR);
450                   }
451                 goto bad_format;
452               }
453           }
454 
455         if (type != FAT_NONE)
456           {
457             if (number == 0)
458               number = ++unnumbered_arg_count;
459 
460             if (numbered_allocated == spec.numbered_arg_count)
461               {
462                 numbered_allocated = 2 * numbered_allocated + 1;
463                 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
464               }
465             spec.numbered[spec.numbered_arg_count].number = number;
466             spec.numbered[spec.numbered_arg_count].type = type;
467             spec.numbered_arg_count++;
468 
469             last_arg_number = number;
470           }
471 
472         FDI_SET (format, FMTDIR_END);
473 
474         format++;
475       }
476 
477   /* Sort the numbered argument array, and eliminate duplicates.  */
478   if (spec.numbered_arg_count > 1)
479     {
480       unsigned int i, j;
481       bool err;
482 
483       qsort (spec.numbered, spec.numbered_arg_count,
484              sizeof (struct numbered_arg), numbered_arg_compare);
485 
486       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
487       err = false;
488       for (i = j = 0; i < spec.numbered_arg_count; i++)
489         if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
490           {
491             enum format_arg_type type1 = spec.numbered[i].type;
492             enum format_arg_type type2 = spec.numbered[j-1].type;
493             enum format_arg_type type_both;
494 
495             if (type1 == type2)
496               type_both = type1;
497             else
498               {
499                 /* Incompatible types.  */
500                 type_both = FAT_NONE;
501                 if (!err)
502                   *invalid_reason =
503                     INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
504                 err = true;
505               }
506 
507             spec.numbered[j-1].type = type_both;
508           }
509         else
510           {
511             if (j < i)
512               {
513                 spec.numbered[j].number = spec.numbered[i].number;
514                 spec.numbered[j].type = spec.numbered[i].type;
515               }
516             j++;
517           }
518       spec.numbered_arg_count = j;
519       if (err)
520         /* *invalid_reason has already been set above.  */
521         goto bad_format;
522     }
523 
524   result = XMALLOC (struct spec);
525   *result = spec;
526   return result;
527 
528  bad_format:
529   if (spec.numbered != NULL)
530     free (spec.numbered);
531   return NULL;
532 }
533 
534 static void
format_free(void * descr)535 format_free (void *descr)
536 {
537   struct spec *spec = (struct spec *) descr;
538 
539   if (spec->numbered != NULL)
540     free (spec->numbered);
541   free (spec);
542 }
543 
544 static int
format_get_number_of_directives(void * descr)545 format_get_number_of_directives (void *descr)
546 {
547   struct spec *spec = (struct spec *) descr;
548 
549   return spec->directives;
550 }
551 
552 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)553 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
554               formatstring_error_logger_t error_logger,
555               const char *pretty_msgid, const char *pretty_msgstr)
556 {
557   struct spec *spec1 = (struct spec *) msgid_descr;
558   struct spec *spec2 = (struct spec *) msgstr_descr;
559   bool err = false;
560 
561   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
562     {
563       unsigned int i, j;
564       unsigned int n1 = spec1->numbered_arg_count;
565       unsigned int n2 = spec2->numbered_arg_count;
566 
567       /* Check the argument names are the same.
568          Both arrays are sorted.  We search for the first difference.  */
569       for (i = 0, j = 0; i < n1 || j < n2; )
570         {
571           int cmp = (i >= n1 ? 1 :
572                      j >= n2 ? -1 :
573                      spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
574                      spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
575                      0);
576 
577           if (cmp > 0)
578             {
579               if (error_logger)
580                 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
581                               spec2->numbered[j].number, pretty_msgstr,
582                               pretty_msgid);
583               err = true;
584               break;
585             }
586           else if (cmp < 0)
587             {
588               if (equality)
589                 {
590                   if (error_logger)
591                     error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
592                                   spec1->numbered[i].number, pretty_msgstr);
593                   err = true;
594                   break;
595                 }
596               else
597                 i++;
598             }
599           else
600             j++, i++;
601         }
602       /* Check the argument types are the same.  */
603       if (!err)
604         for (i = 0, j = 0; j < n2; )
605           {
606             if (spec1->numbered[i].number == spec2->numbered[j].number)
607               {
608                 if (spec1->numbered[i].type != spec2->numbered[j].type)
609                   {
610                     if (error_logger)
611                       error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
612                                     pretty_msgid, pretty_msgstr,
613                                     spec2->numbered[j].number);
614                     err = true;
615                     break;
616                   }
617                 j++, i++;
618               }
619             else
620               i++;
621           }
622     }
623 
624   return err;
625 }
626 
627 
628 struct formatstring_parser formatstring_java_printf =
629 {
630   format_parse,
631   format_free,
632   format_get_number_of_directives,
633   NULL,
634   format_check
635 };
636 
637 
638 #ifdef TEST
639 
640 /* Test program: Print the argument list specification returned by
641    format_parse for strings read from standard input.  */
642 
643 #include <stdio.h>
644 
645 static void
format_print(void * descr)646 format_print (void *descr)
647 {
648   struct spec *spec = (struct spec *) descr;
649   unsigned int i;
650 
651   if (spec == NULL)
652     {
653       printf ("INVALID");
654       return;
655     }
656 
657   printf ("(");
658   for (i = 0; i < spec->numbered_arg_count; i++)
659     {
660       if (i > 0)
661         printf (" ");
662       switch (spec->numbered[i].type)
663         {
664         case FAT_GENERAL:
665           printf ("s");
666           break;
667         case FAT_CHARACTER:
668           printf ("c");
669           break;
670         case FAT_INTEGER:
671           printf ("d");
672           break;
673         case FAT_FLOATINGPOINT:
674           printf ("f");
675           break;
676         case FAT_DATETIME:
677           printf ("t");
678           break;
679         default:
680           abort ();
681         }
682     }
683   printf (")");
684 }
685 
686 int
main()687 main ()
688 {
689   for (;;)
690     {
691       char *line = NULL;
692       size_t line_size = 0;
693       int line_len;
694       char *invalid_reason;
695       void *descr;
696 
697       line_len = getline (&line, &line_size, stdin);
698       if (line_len < 0)
699         break;
700       if (line_len > 0 && line[line_len - 1] == '\n')
701         line[--line_len] = '\0';
702 
703       invalid_reason = NULL;
704       descr = format_parse (line, false, NULL, &invalid_reason);
705 
706       format_print (descr);
707       printf ("\n");
708       if (descr == NULL)
709         printf ("%s\n", invalid_reason);
710 
711       free (invalid_reason);
712       free (line);
713     }
714 
715   return 0;
716 }
717 
718 /*
719  * For Emacs M-x compile
720  * Local Variables:
721  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-java-printf.c ../gnulib-lib/libgettextlib.la"
722  * End:
723  */
724 
725 #endif /* TEST */
726