1 /* Java printf format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009-2010, 2018-2020 Free Software
3 Foundation, Inc.
4 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdbool.h>
24 #include <stdlib.h>
25
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32
33 #define _(str) gettext (str)
34
35 /* Java printf format strings are described in java/util/Formatter.html.
36 A directive
37 - starts with '%' or '%<' or '%m$' where m is a positive integer,
38 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
39 ',', '(',
40 - is optionally followed by a width specification: a nonempty digit sequence,
41 - is optionally followed by '.' and a precision specification: a nonempty
42 digit sequence,
43 - is finished by a specifier
44 - '%', 'n', that need no argument,
45 Restrictions:
46 - For '%': flags other than '-' are invalid, and a precision is
47 invalid.
48 - For 'n': flags, width, and precision are invalid.
49 - 'b', 'B', 'h', 'H', 's', 'S', that need a general argument.
50 Restrictions:
51 Flags other than '#' and '-' are invalid.
52 - 'c', 'C', that need a character argument,
53 Restrictions:
54 Flags other than '-' are invalid.
55 A precision is invalid.
56 - 'd', 'o', 'x', 'X', that need an integer argument,
57 Restrictions:
58 - For 'd': The flag '#' is invalid.
59 - For 'o', 'x', 'X': The flag ',' is invalid.
60 A precision is invalid.
61 - 'e', 'E', 'f', 'g', 'G', 'a', 'A', that need a floating-point argument,
62 Restrictions:
63 - For 'a', 'A': The flags ',', '(' are invalid.
64 - 't', 'T', followed by one of
65 'H', 'I', 'k', 'l', 'M', 'S', 'L', 'N', 'p', 'z', 'Z', 's', 'Q',
66 'B', 'b', 'h', 'A', 'a', 'C', 'Y', 'y', 'j', 'm', 'd', 'e',
67 'R', 'T', 'r', 'D', 'F', 'c'
68 that need a date/time argument.
69 Restrictions:
70 Flags other than '-' are invalid.
71 A precision is invalid.
72 Numbered ('%m$') and unnumbered argument specifications can be mixed in the
73 same string. Numbered argument specifications have no influence on the
74 unnumbered argument counter.
75 */
76
77 enum format_arg_type
78 {
79 FAT_NONE = 0,
80 /* Basic types */
81 FAT_GENERAL = 1,
82 FAT_CHARACTER = 2,
83 FAT_INTEGER = 3,
84 FAT_FLOATINGPOINT = 4,
85 FAT_DATETIME = 5
86 };
87 #ifdef __cplusplus
88 typedef int format_arg_type_t;
89 #else
90 typedef enum format_arg_type format_arg_type_t;
91 #endif
92
93 enum
94 {
95 /* Flags */
96 FAT_ALTERNATE = 1 << 0, /* '#' */
97 FAT_ZERO_PADDED = 1 << 1, /* '0' */
98 FAT_LEFT_JUSTIFIED = 1 << 2, /* '-' */
99 FAT_SPACE_SIGN = 1 << 3, /* ' ' */
100 FAT_SIGN = 1 << 4, /* '+' */
101 FAT_OBEY_LOCALE = 1 << 5, /* ',' */
102 FAT_MONETARY = 1 << 6, /* '(' */
103 /* Width */
104 FAT_WIDTH = 1 << 7,
105 /* Precision */
106 FAT_PRECISION = 1 << 8,
107 };
108
109 struct numbered_arg
110 {
111 unsigned int number;
112 format_arg_type_t type;
113 };
114
115 struct spec
116 {
117 unsigned int directives;
118 unsigned int numbered_arg_count;
119 struct numbered_arg *numbered;
120 };
121
122 /* Locale independent test for a decimal digit.
123 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
124 <ctype.h> isdigit must be an 'unsigned char'.) */
125 #undef isdigit
126 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
127
128
129 static int
numbered_arg_compare(const void * p1,const void * p2)130 numbered_arg_compare (const void *p1, const void *p2)
131 {
132 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
133 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
134
135 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
136 }
137
138 #define INVALID_LAST_ARG(directive_number) \
139 xasprintf (_("In the directive number %u, the reference to the argument of the previous directive is invalid."), directive_number)
140
141 #define INVALID_PRECISION_MISSING(directive_number) \
142 xasprintf (_("In the directive number %u, the precision is missing."), directive_number)
143
144 #define INVALID_FLAG_FOR(directive_number,flag_char,conv_char) \
145 xasprintf (_("In the directive number %u, the flag '%c' is invalid for the conversion '%c'."), directive_number, flag_char, conv_char)
146
147 #define INVALID_WIDTH_FOR(directive_number,conv_char) \
148 xasprintf (_("In the directive number %u, a width is invalid for the conversion '%c'."), directive_number, conv_char)
149
150 #define INVALID_PRECISION_FOR(directive_number,conv_char) \
151 xasprintf (_("In the directive number %u, a precision is invalid for the conversion '%c'."), directive_number, conv_char)
152
153 #define INVALID_DATETIME_CONVERSION_SUFFIX(directive_number,conv_char,suffix_char) \
154 (c_isprint (conv_char) \
155 ? xasprintf (_("In the directive number %u, for the conversion '%c', the character '%c' is not a valid conversion suffix."), directive_number, conv_char, suffix_char) \
156 : xasprintf (_("The character that terminates the directive number %u, for the conversion '%c', is not a valid conversion suffix."), directive_number, conv_char))
157
158 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)159 format_parse (const char *format, bool translated, char *fdi,
160 char **invalid_reason)
161 {
162 const char *const format_start = format;
163 struct spec spec;
164 unsigned int numbered_allocated;
165 struct spec *result;
166 unsigned int unnumbered_arg_count;
167 unsigned int last_arg_number;
168
169 spec.directives = 0;
170 spec.numbered_arg_count = 0;
171 spec.numbered = NULL;
172 numbered_allocated = 0;
173 unnumbered_arg_count = 0;
174 last_arg_number = 0;
175
176 for (; *format != '\0';)
177 if (*format++ == '%')
178 {
179 /* A directive. */
180 unsigned int number = 0;
181 unsigned int flags;
182 format_arg_type_t type;
183 unsigned int invalid_flags;
184
185 FDI_SET (format - 1, FMTDIR_START);
186 spec.directives++;
187
188 if (*format == '<')
189 {
190 if (last_arg_number == 0)
191 {
192 *invalid_reason = INVALID_LAST_ARG (spec.directives);
193 FDI_SET (format, FMTDIR_ERROR);
194 goto bad_format;
195 }
196 number = last_arg_number;
197 format++;
198 }
199 else if (isdigit (*format))
200 {
201 const char *f = format;
202 unsigned int m = 0;
203
204 do
205 {
206 m = 10 * m + (*f - '0');
207 f++;
208 }
209 while (isdigit (*f));
210
211 if (*f == '$')
212 {
213 if (m == 0)
214 {
215 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
216 FDI_SET (f, FMTDIR_ERROR);
217 goto bad_format;
218 }
219 number = m;
220 format = ++f;
221 }
222 }
223
224 flags = 0;
225
226 /* Parse flags. */
227 for (;;)
228 {
229 if (*format == '#')
230 {
231 flags |= FAT_ALTERNATE;
232 format++;
233 }
234 else if (*format == '0')
235 {
236 flags |= FAT_ZERO_PADDED;
237 format++;
238 }
239 else if (*format == '-')
240 {
241 flags |= FAT_LEFT_JUSTIFIED;
242 format++;
243 }
244 else if (*format == ' ')
245 {
246 flags |= FAT_SPACE_SIGN;
247 format++;
248 }
249 else if (*format == '+')
250 {
251 flags |= FAT_SIGN;
252 format++;
253 }
254 else if (*format == ',')
255 {
256 flags |= FAT_OBEY_LOCALE;
257 format++;
258 }
259 else if (*format == '(')
260 {
261 flags |= FAT_MONETARY;
262 format++;
263 }
264 else
265 break;
266 }
267
268 /* Parse width. */
269 if (isdigit (*format))
270 {
271 do format++; while (isdigit (*format));
272 flags |= FAT_WIDTH;
273 }
274
275 /* Parse precision. */
276 if (*format == '.')
277 {
278 format++;
279
280 if (!isdigit (*format))
281 {
282 if (*format == '\0')
283 {
284 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
285 FDI_SET (format - 1, FMTDIR_ERROR);
286 }
287 else
288 {
289 *invalid_reason = INVALID_PRECISION_MISSING (spec.directives);
290 FDI_SET (format, FMTDIR_ERROR);
291 }
292 goto bad_format;
293 }
294
295 do format++; while (isdigit (*format));
296 flags |= FAT_PRECISION;
297 }
298
299 /* Parse conversion. */
300 switch (*format)
301 {
302 case '%':
303 type = FAT_NONE;
304 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
305 | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
306 | FAT_PRECISION;
307 break;
308 case 'n':
309 type = FAT_NONE;
310 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_LEFT_JUSTIFIED
311 | FAT_SPACE_SIGN | FAT_SIGN | FAT_OBEY_LOCALE
312 | FAT_MONETARY)
313 | FAT_WIDTH | FAT_PRECISION;
314 break;
315 case 'b': case 'B':
316 case 'h': case 'H':
317 case 's': case 'S':
318 type = FAT_GENERAL;
319 invalid_flags = (FAT_ZERO_PADDED | FAT_SPACE_SIGN | FAT_SIGN
320 | FAT_OBEY_LOCALE | FAT_MONETARY);
321 break;
322 case 'c': case 'C':
323 type = FAT_CHARACTER;
324 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
325 | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
326 | FAT_PRECISION;
327 break;
328 case 'd':
329 type = FAT_INTEGER;
330 invalid_flags = FAT_ALTERNATE | FAT_PRECISION;
331 break;
332 case 'o': case 'x': case 'X':
333 type = FAT_INTEGER;
334 invalid_flags = FAT_OBEY_LOCALE | FAT_PRECISION;
335 break;
336 case 'e': case 'E':
337 case 'f':
338 case 'g': case 'G':
339 type = FAT_FLOATINGPOINT;
340 invalid_flags = 0;
341 break;
342 case 'a': case 'A':
343 type = FAT_FLOATINGPOINT;
344 invalid_flags = FAT_OBEY_LOCALE | FAT_MONETARY;
345 break;
346 case 't': case 'T':
347 type = FAT_DATETIME;
348 invalid_flags = (FAT_ALTERNATE | FAT_ZERO_PADDED | FAT_SPACE_SIGN
349 | FAT_SIGN | FAT_OBEY_LOCALE | FAT_MONETARY)
350 | FAT_PRECISION;
351 break;
352 default:
353 if (*format == '\0')
354 {
355 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
356 FDI_SET (format - 1, FMTDIR_ERROR);
357 }
358 else
359 {
360 *invalid_reason =
361 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
362 FDI_SET (format, FMTDIR_ERROR);
363 }
364 goto bad_format;
365 }
366
367 /* Report invalid flags, width, precision. */
368 invalid_flags &= flags;
369 if (invalid_flags & FAT_ALTERNATE)
370 {
371 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '#', *format);
372 FDI_SET (format, FMTDIR_ERROR);
373 goto bad_format;
374 }
375 if (invalid_flags & FAT_ZERO_PADDED)
376 {
377 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '0', *format);
378 FDI_SET (format, FMTDIR_ERROR);
379 goto bad_format;
380 }
381 if (invalid_flags & FAT_LEFT_JUSTIFIED)
382 {
383 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '-', *format);
384 FDI_SET (format, FMTDIR_ERROR);
385 goto bad_format;
386 }
387 if (invalid_flags & FAT_SPACE_SIGN)
388 {
389 *invalid_reason = INVALID_FLAG_FOR (spec.directives, ' ', *format);
390 FDI_SET (format, FMTDIR_ERROR);
391 goto bad_format;
392 }
393 if (invalid_flags & FAT_SIGN)
394 {
395 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '+', *format);
396 FDI_SET (format, FMTDIR_ERROR);
397 goto bad_format;
398 }
399 if (invalid_flags & FAT_OBEY_LOCALE)
400 {
401 *invalid_reason = INVALID_FLAG_FOR (spec.directives, ',', *format);
402 FDI_SET (format, FMTDIR_ERROR);
403 goto bad_format;
404 }
405 if (invalid_flags & FAT_MONETARY)
406 {
407 *invalid_reason = INVALID_FLAG_FOR (spec.directives, '(', *format);
408 FDI_SET (format, FMTDIR_ERROR);
409 goto bad_format;
410 }
411 if (invalid_flags & FAT_WIDTH)
412 {
413 *invalid_reason = INVALID_WIDTH_FOR (spec.directives, *format);
414 FDI_SET (format, FMTDIR_ERROR);
415 goto bad_format;
416 }
417 if (invalid_flags & FAT_PRECISION)
418 {
419 *invalid_reason = INVALID_PRECISION_FOR (spec.directives, *format);
420 FDI_SET (format, FMTDIR_ERROR);
421 goto bad_format;
422 }
423
424 if (type == FAT_DATETIME)
425 {
426 format++;
427
428 /* Parse conversion suffix. */
429 switch (*format)
430 {
431 case 'H': case 'I': case 'k': case 'l': case 'M': case 'S':
432 case 'L': case 'N': case 'p': case 'z': case 'Z': case 's':
433 case 'Q':
434 case 'B': case 'b': case 'h': case 'A': case 'a': case 'C':
435 case 'Y': case 'y': case 'j': case 'm': case 'd': case 'e':
436 case 'R': case 'T': case 'r': case 'D': case 'F': case 'c':
437 break;
438 default:
439 if (*format == '\0')
440 {
441 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
442 FDI_SET (format - 1, FMTDIR_ERROR);
443 }
444 else
445 {
446 *invalid_reason =
447 INVALID_DATETIME_CONVERSION_SUFFIX (spec.directives,
448 format[-1], *format);
449 FDI_SET (format, FMTDIR_ERROR);
450 }
451 goto bad_format;
452 }
453 }
454
455 if (type != FAT_NONE)
456 {
457 if (number == 0)
458 number = ++unnumbered_arg_count;
459
460 if (numbered_allocated == spec.numbered_arg_count)
461 {
462 numbered_allocated = 2 * numbered_allocated + 1;
463 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
464 }
465 spec.numbered[spec.numbered_arg_count].number = number;
466 spec.numbered[spec.numbered_arg_count].type = type;
467 spec.numbered_arg_count++;
468
469 last_arg_number = number;
470 }
471
472 FDI_SET (format, FMTDIR_END);
473
474 format++;
475 }
476
477 /* Sort the numbered argument array, and eliminate duplicates. */
478 if (spec.numbered_arg_count > 1)
479 {
480 unsigned int i, j;
481 bool err;
482
483 qsort (spec.numbered, spec.numbered_arg_count,
484 sizeof (struct numbered_arg), numbered_arg_compare);
485
486 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
487 err = false;
488 for (i = j = 0; i < spec.numbered_arg_count; i++)
489 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
490 {
491 enum format_arg_type type1 = spec.numbered[i].type;
492 enum format_arg_type type2 = spec.numbered[j-1].type;
493 enum format_arg_type type_both;
494
495 if (type1 == type2)
496 type_both = type1;
497 else
498 {
499 /* Incompatible types. */
500 type_both = FAT_NONE;
501 if (!err)
502 *invalid_reason =
503 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
504 err = true;
505 }
506
507 spec.numbered[j-1].type = type_both;
508 }
509 else
510 {
511 if (j < i)
512 {
513 spec.numbered[j].number = spec.numbered[i].number;
514 spec.numbered[j].type = spec.numbered[i].type;
515 }
516 j++;
517 }
518 spec.numbered_arg_count = j;
519 if (err)
520 /* *invalid_reason has already been set above. */
521 goto bad_format;
522 }
523
524 result = XMALLOC (struct spec);
525 *result = spec;
526 return result;
527
528 bad_format:
529 if (spec.numbered != NULL)
530 free (spec.numbered);
531 return NULL;
532 }
533
534 static void
format_free(void * descr)535 format_free (void *descr)
536 {
537 struct spec *spec = (struct spec *) descr;
538
539 if (spec->numbered != NULL)
540 free (spec->numbered);
541 free (spec);
542 }
543
544 static int
format_get_number_of_directives(void * descr)545 format_get_number_of_directives (void *descr)
546 {
547 struct spec *spec = (struct spec *) descr;
548
549 return spec->directives;
550 }
551
552 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)553 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
554 formatstring_error_logger_t error_logger,
555 const char *pretty_msgid, const char *pretty_msgstr)
556 {
557 struct spec *spec1 = (struct spec *) msgid_descr;
558 struct spec *spec2 = (struct spec *) msgstr_descr;
559 bool err = false;
560
561 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
562 {
563 unsigned int i, j;
564 unsigned int n1 = spec1->numbered_arg_count;
565 unsigned int n2 = spec2->numbered_arg_count;
566
567 /* Check the argument names are the same.
568 Both arrays are sorted. We search for the first difference. */
569 for (i = 0, j = 0; i < n1 || j < n2; )
570 {
571 int cmp = (i >= n1 ? 1 :
572 j >= n2 ? -1 :
573 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
574 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
575 0);
576
577 if (cmp > 0)
578 {
579 if (error_logger)
580 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
581 spec2->numbered[j].number, pretty_msgstr,
582 pretty_msgid);
583 err = true;
584 break;
585 }
586 else if (cmp < 0)
587 {
588 if (equality)
589 {
590 if (error_logger)
591 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
592 spec1->numbered[i].number, pretty_msgstr);
593 err = true;
594 break;
595 }
596 else
597 i++;
598 }
599 else
600 j++, i++;
601 }
602 /* Check the argument types are the same. */
603 if (!err)
604 for (i = 0, j = 0; j < n2; )
605 {
606 if (spec1->numbered[i].number == spec2->numbered[j].number)
607 {
608 if (spec1->numbered[i].type != spec2->numbered[j].type)
609 {
610 if (error_logger)
611 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
612 pretty_msgid, pretty_msgstr,
613 spec2->numbered[j].number);
614 err = true;
615 break;
616 }
617 j++, i++;
618 }
619 else
620 i++;
621 }
622 }
623
624 return err;
625 }
626
627
628 struct formatstring_parser formatstring_java_printf =
629 {
630 format_parse,
631 format_free,
632 format_get_number_of_directives,
633 NULL,
634 format_check
635 };
636
637
638 #ifdef TEST
639
640 /* Test program: Print the argument list specification returned by
641 format_parse for strings read from standard input. */
642
643 #include <stdio.h>
644
645 static void
format_print(void * descr)646 format_print (void *descr)
647 {
648 struct spec *spec = (struct spec *) descr;
649 unsigned int i;
650
651 if (spec == NULL)
652 {
653 printf ("INVALID");
654 return;
655 }
656
657 printf ("(");
658 for (i = 0; i < spec->numbered_arg_count; i++)
659 {
660 if (i > 0)
661 printf (" ");
662 switch (spec->numbered[i].type)
663 {
664 case FAT_GENERAL:
665 printf ("s");
666 break;
667 case FAT_CHARACTER:
668 printf ("c");
669 break;
670 case FAT_INTEGER:
671 printf ("d");
672 break;
673 case FAT_FLOATINGPOINT:
674 printf ("f");
675 break;
676 case FAT_DATETIME:
677 printf ("t");
678 break;
679 default:
680 abort ();
681 }
682 }
683 printf (")");
684 }
685
686 int
main()687 main ()
688 {
689 for (;;)
690 {
691 char *line = NULL;
692 size_t line_size = 0;
693 int line_len;
694 char *invalid_reason;
695 void *descr;
696
697 line_len = getline (&line, &line_size, stdin);
698 if (line_len < 0)
699 break;
700 if (line_len > 0 && line[line_len - 1] == '\n')
701 line[--line_len] = '\0';
702
703 invalid_reason = NULL;
704 descr = format_parse (line, false, NULL, &invalid_reason);
705
706 format_print (descr);
707 printf ("\n");
708 if (descr == NULL)
709 printf ("%s\n", invalid_reason);
710
711 free (invalid_reason);
712 free (line);
713 }
714
715 return 0;
716 }
717
718 /*
719 * For Emacs M-x compile
720 * Local Variables:
721 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-java-printf.c ../gnulib-lib/libgettextlib.la"
722 * End:
723 */
724
725 #endif /* TEST */
726