1 /* Perl format strings.
2 Copyright (C) 2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31
32 #define _(str) gettext (str)
33
34 /* Perl format strings are implemented in function Perl_sv_vcatpvfn in
35 perl-5.8.0/sv.c.
36 A directive
37 - starts with '%' or '%m$' where m is a positive integer starting with a
38 nonzero digit,
39 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40 each of which acts as a flag,
41 - is optionally followed by a vector specification: 'v' or '*v' (reads an
42 argument) or '*m$v' where m is a positive integer starting with a nonzero
43 digit,
44 - is optionally followed by a width specification: '*' (reads an argument)
45 or '*m$' where m is a positive integer starting with a nonzero digit or
46 a nonempty digit sequence starting with a nonzero digit,
47 - is optionally followed by '.' and a precision specification: '*' (reads
48 an argument) or '*m$' where m is a positive integer starting with a
49 nonzero digit or a digit sequence,
50 - is optionally followed by a size specifier, one of 'h' 'l' 'll' 'L' 'q'
51 'V' 'I32' 'I64' 'I',
52 - is finished by a specifier
53 - '%', that needs no argument,
54 - 'c', that needs a small integer argument,
55 - 's', that needs a string argument,
56 - '_', that needs a scalar vector argument,
57 - 'p', that needs a pointer argument,
58 - 'i', 'd', 'D', that need an integer argument,
59 - 'u', 'U', 'b', 'o', 'O', 'x', 'X', that need an unsigned integer
60 argument,
61 - 'e', 'E', 'f', 'F', 'g', 'G', that need a floating-point argument,
62 - 'n', that needs a pointer to integer.
63 So there can be numbered argument specifications:
64 - '%m$' for the format string,
65 - '*m$v' for the vector,
66 - '*m$' for the width,
67 - '.*m$' for the precision.
68 Numbered and unnumbered argument specifications can be used in the same
69 string. The effect of '%m$' is to take argument number m, without affecting
70 the current argument number. The current argument number is incremented
71 after processing a directive with an unnumbered argument specification.
72 */
73
74 enum format_arg_type
75 {
76 FAT_NONE = 0,
77 /* Basic types */
78 FAT_INTEGER = 1,
79 FAT_DOUBLE = 2,
80 FAT_CHAR = 3,
81 FAT_STRING = 4,
82 FAT_SCALAR_VECTOR = 5,
83 FAT_POINTER = 6,
84 FAT_COUNT_POINTER = 7,
85 /* Flags */
86 FAT_UNSIGNED = 1 << 3,
87 FAT_SIZE_SHORT = 1 << 4,
88 FAT_SIZE_V = 2 << 4,
89 FAT_SIZE_PTR = 3 << 4,
90 FAT_SIZE_LONG = 4 << 4,
91 FAT_SIZE_LONGLONG = 5 << 4,
92 /* Bitmasks */
93 FAT_SIZE_MASK = (FAT_SIZE_SHORT | FAT_SIZE_V | FAT_SIZE_PTR
94 | FAT_SIZE_LONG | FAT_SIZE_LONGLONG)
95 };
96 #ifdef __cplusplus
97 typedef int format_arg_type_t;
98 #else
99 typedef enum format_arg_type format_arg_type_t;
100 #endif
101
102 struct numbered_arg
103 {
104 unsigned int number;
105 format_arg_type_t type;
106 };
107
108 struct spec
109 {
110 unsigned int directives;
111 unsigned int numbered_arg_count;
112 struct numbered_arg *numbered;
113 };
114
115 /* Locale independent test for a decimal digit.
116 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
117 <ctype.h> isdigit must be an 'unsigned char'.) */
118 #undef isdigit
119 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
120
121 /* Locale independent test for a nonzero decimal digit. */
122 #define isnonzerodigit(c) ((unsigned int) ((c) - '1') < 9)
123
124
125 static int
numbered_arg_compare(const void * p1,const void * p2)126 numbered_arg_compare (const void *p1, const void *p2)
127 {
128 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
129 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
130
131 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
132 }
133
134 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)135 format_parse (const char *format, bool translated, char *fdi,
136 char **invalid_reason)
137 {
138 const char *const format_start = format;
139 unsigned int directives;
140 unsigned int numbered_arg_count;
141 struct numbered_arg *numbered;
142 unsigned int numbered_allocated;
143 unsigned int unnumbered_arg_count;
144 struct spec *result;
145
146 directives = 0;
147 numbered_arg_count = 0;
148 numbered = NULL;
149 numbered_allocated = 0;
150 unnumbered_arg_count = 0;
151
152 for (; *format != '\0';)
153 if (*format++ == '%')
154 {
155 /* A directive. */
156 unsigned int number = 0;
157 bool vectorize = false;
158 format_arg_type_t type;
159 format_arg_type_t size;
160
161 FDI_SET (format - 1, FMTDIR_START);
162 directives++;
163
164 if (isnonzerodigit (*format))
165 {
166 const char *f = format;
167 unsigned int m = 0;
168
169 do
170 {
171 m = 10 * m + (*f - '0');
172 f++;
173 }
174 while (isdigit (*f));
175
176 if (*f == '$')
177 {
178 number = m;
179 format = ++f;
180 }
181 }
182
183 /* Parse flags. */
184 while (*format == ' ' || *format == '+' || *format == '-'
185 || *format == '#' || *format == '0')
186 format++;
187
188 /* Parse vector. */
189 if (*format == 'v')
190 {
191 format++;
192 vectorize = true;
193 }
194 else if (*format == '*')
195 {
196 const char *f = format;
197
198 f++;
199 if (*f == 'v')
200 {
201 format = ++f;
202 vectorize = true;
203
204 /* Unnumbered argument. */
205 if (numbered_allocated == numbered_arg_count)
206 {
207 numbered_allocated = 2 * numbered_allocated + 1;
208 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
209 }
210 numbered[numbered_arg_count].number = ++unnumbered_arg_count;
211 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
212 numbered_arg_count++;
213 }
214 else if (isnonzerodigit (*f))
215 {
216 unsigned int m = 0;
217
218 do
219 {
220 m = 10 * m + (*f - '0');
221 f++;
222 }
223 while (isdigit (*f));
224
225 if (*f == '$')
226 {
227 f++;
228 if (*f == 'v')
229 {
230 unsigned int vector_number = m;
231
232 format = ++f;
233 vectorize = true;
234
235 /* Numbered argument. */
236 /* Note: As of perl-5.8.0, this is not correctly
237 implemented in perl's sv.c. */
238 if (numbered_allocated == numbered_arg_count)
239 {
240 numbered_allocated = 2 * numbered_allocated + 1;
241 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
242 }
243 numbered[numbered_arg_count].number = vector_number;
244 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR; /* or FAT_STRING? */
245 numbered_arg_count++;
246 }
247 }
248 }
249 }
250
251 if (vectorize)
252 {
253 /* Numbered or unnumbered argument. */
254 if (numbered_allocated == numbered_arg_count)
255 {
256 numbered_allocated = 2 * numbered_allocated + 1;
257 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
258 }
259 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
260 numbered[numbered_arg_count].type = FAT_SCALAR_VECTOR;
261 numbered_arg_count++;
262 }
263
264 /* Parse width. */
265 if (*format == '*')
266 {
267 unsigned int width_number = 0;
268
269 format++;
270
271 if (isnonzerodigit (*format))
272 {
273 const char *f = format;
274 unsigned int m = 0;
275
276 do
277 {
278 m = 10 * m + (*f - '0');
279 f++;
280 }
281 while (isdigit (*f));
282
283 if (*f == '$')
284 {
285 width_number = m;
286 format = ++f;
287 }
288 }
289
290 /* Numbered or unnumbered argument. */
291 /* Note: As of perl-5.8.0, this is not correctly
292 implemented in perl's sv.c. */
293 if (numbered_allocated == numbered_arg_count)
294 {
295 numbered_allocated = 2 * numbered_allocated + 1;
296 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
297 }
298 numbered[numbered_arg_count].number = (width_number ? width_number : ++unnumbered_arg_count);
299 numbered[numbered_arg_count].type = FAT_INTEGER;
300 numbered_arg_count++;
301 }
302 else if (isnonzerodigit (*format))
303 {
304 do format++; while (isdigit (*format));
305 }
306
307 /* Parse precision. */
308 if (*format == '.')
309 {
310 format++;
311
312 if (*format == '*')
313 {
314 unsigned int precision_number = 0;
315
316 format++;
317
318 if (isnonzerodigit (*format))
319 {
320 const char *f = format;
321 unsigned int m = 0;
322
323 do
324 {
325 m = 10 * m + (*f - '0');
326 f++;
327 }
328 while (isdigit (*f));
329
330 if (*f == '$')
331 {
332 precision_number = m;
333 format = ++f;
334 }
335 }
336
337 /* Numbered or unnumbered argument. */
338 if (numbered_allocated == numbered_arg_count)
339 {
340 numbered_allocated = 2 * numbered_allocated + 1;
341 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
342 }
343 numbered[numbered_arg_count].number = (precision_number ? precision_number : ++unnumbered_arg_count);
344 numbered[numbered_arg_count].type = FAT_INTEGER;
345 numbered_arg_count++;
346 }
347 else
348 {
349 while (isdigit (*format)) format++;
350 }
351 }
352
353 /* Parse size. */
354 size = 0;
355 if (*format == 'h')
356 {
357 size = FAT_SIZE_SHORT;
358 format++;
359 }
360 else if (*format == 'l')
361 {
362 if (format[1] == 'l')
363 {
364 size = FAT_SIZE_LONGLONG;
365 format += 2;
366 }
367 else
368 {
369 size = FAT_SIZE_LONG;
370 format++;
371 }
372 }
373 else if (*format == 'L' || *format == 'q')
374 {
375 size = FAT_SIZE_LONGLONG;
376 format++;
377 }
378 else if (*format == 'V')
379 {
380 size = FAT_SIZE_V;
381 format++;
382 }
383 else if (*format == 'I')
384 {
385 if (format[1] == '6' && format[2] == '4')
386 {
387 size = FAT_SIZE_LONGLONG;
388 format += 3;
389 }
390 else if (format[1] == '3' && format[2] == '2')
391 {
392 size = 0; /* FAT_SIZE_INT */
393 format += 3;
394 }
395 else
396 {
397 size = FAT_SIZE_PTR;
398 format++;
399 }
400 }
401
402 switch (*format)
403 {
404 case '%':
405 type = FAT_NONE;
406 break;
407 case 'c':
408 type = FAT_CHAR;
409 break;
410 case 's':
411 type = FAT_STRING;
412 break;
413 case '_':
414 type = FAT_SCALAR_VECTOR;
415 break;
416 case 'D':
417 type = FAT_INTEGER | FAT_SIZE_V;
418 break;
419 case 'i': case 'd':
420 type = FAT_INTEGER | size;
421 break;
422 case 'U': case 'O':
423 type = FAT_INTEGER | FAT_UNSIGNED | FAT_SIZE_V;
424 break;
425 case 'u': case 'b': case 'o': case 'x': case 'X':
426 type = FAT_INTEGER | FAT_UNSIGNED | size;
427 break;
428 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
429 if (size == FAT_SIZE_SHORT || size == FAT_SIZE_LONG)
430 {
431 *invalid_reason =
432 xasprintf (_("In the directive number %u, the size specifier is incompatible with the conversion specifier '%c'."), directives, *format);
433 FDI_SET (format, FMTDIR_ERROR);
434 goto bad_format;
435 }
436 type = FAT_DOUBLE | size;
437 break;
438 case 'p':
439 type = FAT_POINTER;
440 break;
441 case 'n':
442 type = FAT_COUNT_POINTER | size;
443 break;
444 default:
445 if (*format == '\0')
446 {
447 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
448 FDI_SET (format - 1, FMTDIR_ERROR);
449 }
450 else
451 {
452 *invalid_reason =
453 INVALID_CONVERSION_SPECIFIER (directives, *format);
454 FDI_SET (format, FMTDIR_ERROR);
455 }
456 goto bad_format;
457 }
458
459 if (type != FAT_NONE && !vectorize)
460 {
461 /* Numbered or unnumbered argument. */
462 if (numbered_allocated == numbered_arg_count)
463 {
464 numbered_allocated = 2 * numbered_allocated + 1;
465 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
466 }
467 numbered[numbered_arg_count].number = (number ? number : ++unnumbered_arg_count);
468 numbered[numbered_arg_count].type = type;
469 numbered_arg_count++;
470 }
471
472 FDI_SET (format, FMTDIR_END);
473
474 format++;
475 }
476
477 /* Sort the numbered argument array, and eliminate duplicates. */
478 if (numbered_arg_count > 1)
479 {
480 unsigned int i, j;
481 bool err;
482
483 qsort (numbered, numbered_arg_count,
484 sizeof (struct numbered_arg), numbered_arg_compare);
485
486 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
487 err = false;
488 for (i = j = 0; i < numbered_arg_count; i++)
489 if (j > 0 && numbered[i].number == numbered[j-1].number)
490 {
491 format_arg_type_t type1 = numbered[i].type;
492 format_arg_type_t type2 = numbered[j-1].type;
493 format_arg_type_t type_both;
494
495 if (type1 == type2)
496 type_both = type1;
497 else
498 {
499 /* Incompatible types. */
500 type_both = FAT_NONE;
501 if (!err)
502 *invalid_reason =
503 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
504 err = true;
505 }
506
507 numbered[j-1].type = type_both;
508 }
509 else
510 {
511 if (j < i)
512 {
513 numbered[j].number = numbered[i].number;
514 numbered[j].type = numbered[i].type;
515 }
516 j++;
517 }
518 numbered_arg_count = j;
519 if (err)
520 /* *invalid_reason has already been set above. */
521 goto bad_format;
522 }
523
524 result = XMALLOC (struct spec);
525 result->directives = directives;
526 result->numbered_arg_count = numbered_arg_count;
527 result->numbered = numbered;
528 return result;
529
530 bad_format:
531 if (numbered != NULL)
532 free (numbered);
533 return NULL;
534 }
535
536 static void
format_free(void * descr)537 format_free (void *descr)
538 {
539 struct spec *spec = (struct spec *) descr;
540
541 if (spec->numbered != NULL)
542 free (spec->numbered);
543 free (spec);
544 }
545
546 static int
format_get_number_of_directives(void * descr)547 format_get_number_of_directives (void *descr)
548 {
549 struct spec *spec = (struct spec *) descr;
550
551 return spec->directives;
552 }
553
554 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)555 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
556 formatstring_error_logger_t error_logger,
557 const char *pretty_msgid, const char *pretty_msgstr)
558 {
559 struct spec *spec1 = (struct spec *) msgid_descr;
560 struct spec *spec2 = (struct spec *) msgstr_descr;
561 bool err = false;
562
563 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
564 {
565 unsigned int i, j;
566 unsigned int n1 = spec1->numbered_arg_count;
567 unsigned int n2 = spec2->numbered_arg_count;
568
569 /* Check the argument names are the same.
570 Both arrays are sorted. We search for the first difference. */
571 for (i = 0, j = 0; i < n1 || j < n2; )
572 {
573 int cmp = (i >= n1 ? 1 :
574 j >= n2 ? -1 :
575 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
576 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
577 0);
578
579 if (cmp > 0)
580 {
581 if (error_logger)
582 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
583 spec2->numbered[j].number, pretty_msgstr,
584 pretty_msgid);
585 err = true;
586 break;
587 }
588 else if (cmp < 0)
589 {
590 if (equality)
591 {
592 if (error_logger)
593 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
594 spec1->numbered[i].number, pretty_msgstr);
595 err = true;
596 break;
597 }
598 else
599 i++;
600 }
601 else
602 j++, i++;
603 }
604 /* Check the argument types are the same. */
605 if (!err)
606 for (i = 0, j = 0; j < n2; )
607 {
608 if (spec1->numbered[i].number == spec2->numbered[j].number)
609 {
610 if (spec1->numbered[i].type != spec2->numbered[j].type)
611 {
612 if (error_logger)
613 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
614 pretty_msgid, pretty_msgstr,
615 spec2->numbered[j].number);
616 err = true;
617 break;
618 }
619 j++, i++;
620 }
621 else
622 i++;
623 }
624 }
625
626 return err;
627 }
628
629
630 struct formatstring_parser formatstring_perl =
631 {
632 format_parse,
633 format_free,
634 format_get_number_of_directives,
635 NULL,
636 format_check
637 };
638
639
640 #ifdef TEST
641
642 /* Test program: Print the argument list specification returned by
643 format_parse for strings read from standard input. */
644
645 #include <stdio.h>
646
647 static void
format_print(void * descr)648 format_print (void *descr)
649 {
650 struct spec *spec = (struct spec *) descr;
651 unsigned int last;
652 unsigned int i;
653
654 if (spec == NULL)
655 {
656 printf ("INVALID");
657 return;
658 }
659
660 printf ("(");
661 last = 1;
662 for (i = 0; i < spec->numbered_arg_count; i++)
663 {
664 unsigned int number = spec->numbered[i].number;
665
666 if (i > 0)
667 printf (" ");
668 if (number < last)
669 abort ();
670 for (; last < number; last++)
671 printf ("_ ");
672 if (spec->numbered[i].type & FAT_UNSIGNED)
673 printf ("[unsigned]");
674 switch (spec->numbered[i].type & FAT_SIZE_MASK)
675 {
676 case 0:
677 break;
678 case FAT_SIZE_SHORT:
679 printf ("[short]");
680 break;
681 case FAT_SIZE_V:
682 printf ("[IV]");
683 break;
684 case FAT_SIZE_PTR:
685 printf ("[PTR]");
686 break;
687 case FAT_SIZE_LONG:
688 printf ("[long]");
689 break;
690 case FAT_SIZE_LONGLONG:
691 printf ("[long long]");
692 break;
693 default:
694 abort ();
695 }
696 switch (spec->numbered[i].type & ~(FAT_UNSIGNED | FAT_SIZE_MASK))
697 {
698 case FAT_INTEGER:
699 printf ("i");
700 break;
701 case FAT_DOUBLE:
702 printf ("f");
703 break;
704 case FAT_CHAR:
705 printf ("c");
706 break;
707 case FAT_STRING:
708 printf ("s");
709 break;
710 case FAT_SCALAR_VECTOR:
711 printf ("sv");
712 break;
713 case FAT_POINTER:
714 printf ("p");
715 break;
716 case FAT_COUNT_POINTER:
717 printf ("n");
718 break;
719 default:
720 abort ();
721 }
722 last = number + 1;
723 }
724 printf (")");
725 }
726
727 int
main()728 main ()
729 {
730 for (;;)
731 {
732 char *line = NULL;
733 size_t line_size = 0;
734 int line_len;
735 char *invalid_reason;
736 void *descr;
737
738 line_len = getline (&line, &line_size, stdin);
739 if (line_len < 0)
740 break;
741 if (line_len > 0 && line[line_len - 1] == '\n')
742 line[--line_len] = '\0';
743
744 invalid_reason = NULL;
745 descr = format_parse (line, false, NULL, &invalid_reason);
746
747 format_print (descr);
748 printf ("\n");
749 if (descr == NULL)
750 printf ("%s\n", invalid_reason);
751
752 free (invalid_reason);
753 free (line);
754 }
755
756 return 0;
757 }
758
759 /*
760 * For Emacs M-x compile
761 * Local Variables:
762 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-perl.c ../gnulib-lib/libgettextlib.la"
763 * End:
764 */
765
766 #endif /* TEST */
767