1 /* Python format strings.
2 Copyright (C) 2001-2004, 2006-2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include "format.h"
27 #include "c-ctype.h"
28 #include "xalloc.h"
29 #include "xvasprintf.h"
30 #include "format-invalid.h"
31 #include "gettext.h"
32
33 #define _(str) gettext (str)
34
35 /* Python format strings are described in
36 Python Library reference
37 2. Built-in Types, Exceptions and Functions
38 2.1. Built-in Types
39 2.1.5. Sequence Types
40 2.1.5.2. String Formatting Operations
41 Any string or Unicode string can act as format string via the '%' operator,
42 implemented in stringobject.c and unicodeobject.c.
43 A directive
44 - starts with '%'
45 - is optionally followed by '(ident)' where ident is any sequence of
46 characters with balanced left and right parentheses,
47 - is optionally followed by any of the characters '-' (left justification),
48 '+' (sign), ' ' (blank), '#' (alt), '0' (zero), each of which acts as a
49 flag,
50 - is optionally followed by a width specification: '*' (reads an argument)
51 or a nonempty digit sequence,
52 - is optionally followed by '.' and a precision specification: '*' (reads
53 an argument) or a nonempty digit sequence,
54 - is optionally followed by a size specifier, one of 'h' 'l' 'L'.
55 - is finished by a specifier
56 - '%', that needs no argument,
57 - 'c', that needs a character argument,
58 - 's', 'r', that need a string argument (or, when a precision of 0 is
59 given, an argument of any type),
60 - 'i', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
61 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
62 Use of '(ident)' and use of unnamed argument specifications are exclusive,
63 because the first requires a mapping as argument, while the second requires
64 a tuple as argument. When unnamed arguments are used, the number of
65 arguments in the format string and the number of elements in the argument
66 tuple (to the right of the '%' operator) must be the same.
67 */
68
69 enum format_arg_type
70 {
71 FAT_NONE,
72 FAT_ANY,
73 FAT_CHARACTER,
74 FAT_STRING,
75 FAT_INTEGER,
76 FAT_FLOAT
77 };
78
79 struct named_arg
80 {
81 char *name;
82 enum format_arg_type type;
83 };
84
85 struct unnamed_arg
86 {
87 enum format_arg_type type;
88 };
89
90 struct spec
91 {
92 unsigned int directives;
93 unsigned int named_arg_count;
94 unsigned int unnamed_arg_count;
95 struct named_arg *named;
96 struct unnamed_arg *unnamed;
97 };
98
99 /* Locale independent test for a decimal digit.
100 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
101 <ctype.h> isdigit must be an 'unsigned char'.) */
102 #undef isdigit
103 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
104
105
106 static int
named_arg_compare(const void * p1,const void * p2)107 named_arg_compare (const void *p1, const void *p2)
108 {
109 return strcmp (((const struct named_arg *) p1)->name,
110 ((const struct named_arg *) p2)->name);
111 }
112
113 #define INVALID_MIXES_NAMED_UNNAMED() \
114 xstrdup (_("The string refers to arguments both through argument names and through unnamed argument specifications."))
115
116 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)117 format_parse (const char *format, bool translated, char *fdi,
118 char **invalid_reason)
119 {
120 const char *const format_start = format;
121 struct spec spec;
122 unsigned int allocated;
123 struct spec *result;
124
125 spec.directives = 0;
126 spec.named_arg_count = 0;
127 spec.unnamed_arg_count = 0;
128 spec.named = NULL;
129 spec.unnamed = NULL;
130 allocated = 0;
131
132 for (; *format != '\0';)
133 if (*format++ == '%')
134 {
135 /* A directive. */
136 char *name = NULL;
137 bool zero_precision = false;
138 enum format_arg_type type;
139
140 FDI_SET (format - 1, FMTDIR_START);
141 spec.directives++;
142
143 if (*format == '(')
144 {
145 unsigned int depth;
146 const char *name_start;
147 const char *name_end;
148 size_t n;
149
150 name_start = ++format;
151 depth = 0;
152 for (; *format != '\0'; format++)
153 {
154 if (*format == '(')
155 depth++;
156 else if (*format == ')')
157 {
158 if (depth == 0)
159 break;
160 else
161 depth--;
162 }
163 }
164 if (*format == '\0')
165 {
166 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
167 FDI_SET (format - 1, FMTDIR_ERROR);
168 goto bad_format;
169 }
170 name_end = format++;
171
172 n = name_end - name_start;
173 name = XNMALLOC (n + 1, char);
174 memcpy (name, name_start, n);
175 name[n] = '\0';
176 }
177
178 while (*format == '-' || *format == '+' || *format == ' '
179 || *format == '#' || *format == '0')
180 format++;
181
182 if (*format == '*')
183 {
184 format++;
185
186 /* Named and unnamed specifications are exclusive. */
187 if (spec.named_arg_count > 0)
188 {
189 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
190 FDI_SET (format - 1, FMTDIR_ERROR);
191 goto bad_format;
192 }
193
194 if (allocated == spec.unnamed_arg_count)
195 {
196 allocated = 2 * allocated + 1;
197 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
198 }
199 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
200 spec.unnamed_arg_count++;
201 }
202 else if (isdigit (*format))
203 {
204 do format++; while (isdigit (*format));
205 }
206
207 if (*format == '.')
208 {
209 format++;
210
211 if (*format == '*')
212 {
213 format++;
214
215 /* Named and unnamed specifications are exclusive. */
216 if (spec.named_arg_count > 0)
217 {
218 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
219 FDI_SET (format - 1, FMTDIR_ERROR);
220 goto bad_format;
221 }
222
223 if (allocated == spec.unnamed_arg_count)
224 {
225 allocated = 2 * allocated + 1;
226 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
227 }
228 spec.unnamed[spec.unnamed_arg_count].type = FAT_INTEGER;
229 spec.unnamed_arg_count++;
230 }
231 else if (isdigit (*format))
232 {
233 zero_precision = true;
234 do
235 {
236 if (*format != '0')
237 zero_precision = false;
238 format++;
239 }
240 while (isdigit (*format));
241 }
242 }
243
244 if (*format == 'h' || *format == 'l' || *format == 'L')
245 format++;
246
247 switch (*format)
248 {
249 case '%':
250 type = FAT_NONE;
251 break;
252 case 'c':
253 type = FAT_CHARACTER;
254 break;
255 case 's': case 'r':
256 type = (zero_precision ? FAT_ANY : FAT_STRING);
257 break;
258 case 'i': case 'd': case 'u': case 'o': case 'x': case 'X':
259 type = FAT_INTEGER;
260 break;
261 case 'e': case 'E': case 'f': case 'g': case 'G':
262 type = FAT_FLOAT;
263 break;
264 default:
265 if (*format == '\0')
266 {
267 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
268 FDI_SET (format - 1, FMTDIR_ERROR);
269 }
270 else
271 {
272 *invalid_reason =
273 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
274 FDI_SET (format, FMTDIR_ERROR);
275 }
276 goto bad_format;
277 }
278
279 if (name != NULL)
280 {
281 /* Named argument. */
282
283 /* Named and unnamed specifications are exclusive. */
284 if (spec.unnamed_arg_count > 0)
285 {
286 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
287 FDI_SET (format, FMTDIR_ERROR);
288 goto bad_format;
289 }
290
291 if (allocated == spec.named_arg_count)
292 {
293 allocated = 2 * allocated + 1;
294 spec.named = (struct named_arg *) xrealloc (spec.named, allocated * sizeof (struct named_arg));
295 }
296 spec.named[spec.named_arg_count].name = name;
297 spec.named[spec.named_arg_count].type = type;
298 spec.named_arg_count++;
299 }
300 else if (*format != '%')
301 {
302 /* Unnamed argument. */
303
304 /* Named and unnamed specifications are exclusive. */
305 if (spec.named_arg_count > 0)
306 {
307 *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
308 FDI_SET (format, FMTDIR_ERROR);
309 goto bad_format;
310 }
311
312 if (allocated == spec.unnamed_arg_count)
313 {
314 allocated = 2 * allocated + 1;
315 spec.unnamed = (struct unnamed_arg *) xrealloc (spec.unnamed, allocated * sizeof (struct unnamed_arg));
316 }
317 spec.unnamed[spec.unnamed_arg_count].type = type;
318 spec.unnamed_arg_count++;
319 }
320
321 FDI_SET (format, FMTDIR_END);
322
323 format++;
324 }
325
326 /* Sort the named argument array, and eliminate duplicates. */
327 if (spec.named_arg_count > 1)
328 {
329 unsigned int i, j;
330 bool err;
331
332 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
333 named_arg_compare);
334
335 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
336 err = false;
337 for (i = j = 0; i < spec.named_arg_count; i++)
338 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
339 {
340 enum format_arg_type type1 = spec.named[i].type;
341 enum format_arg_type type2 = spec.named[j-1].type;
342 enum format_arg_type type_both;
343
344 if (type1 == type2 || type2 == FAT_ANY)
345 type_both = type1;
346 else if (type1 == FAT_ANY)
347 type_both = type2;
348 else
349 {
350 /* Incompatible types. */
351 type_both = FAT_NONE;
352 if (!err)
353 *invalid_reason =
354 xasprintf (_("The string refers to the argument named '%s' in incompatible ways."), spec.named[i].name);
355 err = true;
356 }
357
358 spec.named[j-1].type = type_both;
359 free (spec.named[i].name);
360 }
361 else
362 {
363 if (j < i)
364 {
365 spec.named[j].name = spec.named[i].name;
366 spec.named[j].type = spec.named[i].type;
367 }
368 j++;
369 }
370 spec.named_arg_count = j;
371 if (err)
372 /* *invalid_reason has already been set above. */
373 goto bad_format;
374 }
375
376 result = XMALLOC (struct spec);
377 *result = spec;
378 return result;
379
380 bad_format:
381 if (spec.named != NULL)
382 {
383 unsigned int i;
384 for (i = 0; i < spec.named_arg_count; i++)
385 free (spec.named[i].name);
386 free (spec.named);
387 }
388 if (spec.unnamed != NULL)
389 free (spec.unnamed);
390 return NULL;
391 }
392
393 static void
format_free(void * descr)394 format_free (void *descr)
395 {
396 struct spec *spec = (struct spec *) descr;
397
398 if (spec->named != NULL)
399 {
400 unsigned int i;
401 for (i = 0; i < spec->named_arg_count; i++)
402 free (spec->named[i].name);
403 free (spec->named);
404 }
405 if (spec->unnamed != NULL)
406 free (spec->unnamed);
407 free (spec);
408 }
409
410 static int
format_get_number_of_directives(void * descr)411 format_get_number_of_directives (void *descr)
412 {
413 struct spec *spec = (struct spec *) descr;
414
415 return spec->directives;
416 }
417
418 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)419 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
420 formatstring_error_logger_t error_logger,
421 const char *pretty_msgid, const char *pretty_msgstr)
422 {
423 struct spec *spec1 = (struct spec *) msgid_descr;
424 struct spec *spec2 = (struct spec *) msgstr_descr;
425 bool err = false;
426
427 if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
428 {
429 if (error_logger)
430 error_logger (_("format specifications in '%s' expect a mapping, those in '%s' expect a tuple"),
431 pretty_msgid, pretty_msgstr);
432 err = true;
433 }
434 else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
435 {
436 if (error_logger)
437 error_logger (_("format specifications in '%s' expect a tuple, those in '%s' expect a mapping"),
438 pretty_msgid, pretty_msgstr);
439 err = true;
440 }
441 else
442 {
443 if (spec1->named_arg_count + spec2->named_arg_count > 0)
444 {
445 unsigned int i, j;
446 unsigned int n1 = spec1->named_arg_count;
447 unsigned int n2 = spec2->named_arg_count;
448
449 /* Check the argument names are the same.
450 Both arrays are sorted. We search for the first difference. */
451 for (i = 0, j = 0; i < n1 || j < n2; )
452 {
453 int cmp = (i >= n1 ? 1 :
454 j >= n2 ? -1 :
455 strcmp (spec1->named[i].name, spec2->named[j].name));
456
457 if (cmp > 0)
458 {
459 if (error_logger)
460 error_logger (_("a format specification for argument '%s', as in '%s', doesn't exist in '%s'"),
461 spec2->named[j].name, pretty_msgstr,
462 pretty_msgid);
463 err = true;
464 break;
465 }
466 else if (cmp < 0)
467 {
468 if (equality)
469 {
470 if (error_logger)
471 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
472 spec1->named[i].name, pretty_msgstr);
473 err = true;
474 break;
475 }
476 else
477 i++;
478 }
479 else
480 j++, i++;
481 }
482 /* Check the argument types are the same. */
483 if (!err)
484 for (i = 0, j = 0; j < n2; )
485 {
486 if (strcmp (spec1->named[i].name, spec2->named[j].name) == 0)
487 {
488 if (!(spec1->named[i].type == spec2->named[j].type
489 || (!equality
490 && (spec1->named[i].type == FAT_ANY
491 || spec2->named[j].type == FAT_ANY))))
492 {
493 if (error_logger)
494 error_logger (_("format specifications in '%s' and '%s' for argument '%s' are not the same"),
495 pretty_msgid, pretty_msgstr,
496 spec2->named[j].name);
497 err = true;
498 break;
499 }
500 j++, i++;
501 }
502 else
503 i++;
504 }
505 }
506
507 if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
508 {
509 unsigned int i;
510
511 /* Check the argument types are the same. */
512 if (spec1->unnamed_arg_count != spec2->unnamed_arg_count)
513 {
514 if (error_logger)
515 error_logger (_("number of format specifications in '%s' and '%s' does not match"),
516 pretty_msgid, pretty_msgstr);
517 err = true;
518 }
519 else
520 for (i = 0; i < spec2->unnamed_arg_count; i++)
521 if (!(spec1->unnamed[i].type == spec2->unnamed[i].type
522 || (!equality
523 && (spec1->unnamed[i].type == FAT_ANY
524 || spec2->unnamed[i].type == FAT_ANY))))
525 {
526 if (error_logger)
527 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
528 pretty_msgid, pretty_msgstr, i + 1);
529 err = true;
530 }
531 }
532 }
533
534 return err;
535 }
536
537
538 struct formatstring_parser formatstring_python =
539 {
540 format_parse,
541 format_free,
542 format_get_number_of_directives,
543 NULL,
544 format_check
545 };
546
547
548 unsigned int
get_python_format_unnamed_arg_count(const char * string)549 get_python_format_unnamed_arg_count (const char *string)
550 {
551 /* Parse the format string. */
552 char *invalid_reason = NULL;
553 struct spec *descr =
554 (struct spec *) format_parse (string, false, NULL, &invalid_reason);
555
556 if (descr != NULL)
557 {
558 unsigned int result = descr->unnamed_arg_count;
559
560 format_free (descr);
561 return result;
562 }
563 else
564 {
565 free (invalid_reason);
566 return 0;
567 }
568 }
569
570
571 #ifdef TEST
572
573 /* Test program: Print the argument list specification returned by
574 format_parse for strings read from standard input. */
575
576 #include <stdio.h>
577
578 static void
format_print(void * descr)579 format_print (void *descr)
580 {
581 struct spec *spec = (struct spec *) descr;
582 unsigned int i;
583
584 if (spec == NULL)
585 {
586 printf ("INVALID");
587 return;
588 }
589
590 if (spec->named_arg_count > 0)
591 {
592 if (spec->unnamed_arg_count > 0)
593 abort ();
594
595 printf ("{");
596 for (i = 0; i < spec->named_arg_count; i++)
597 {
598 if (i > 0)
599 printf (", ");
600 printf ("'%s':", spec->named[i].name);
601 switch (spec->named[i].type)
602 {
603 case FAT_ANY:
604 printf ("*");
605 break;
606 case FAT_CHARACTER:
607 printf ("c");
608 break;
609 case FAT_STRING:
610 printf ("s");
611 break;
612 case FAT_INTEGER:
613 printf ("i");
614 break;
615 case FAT_FLOAT:
616 printf ("f");
617 break;
618 default:
619 abort ();
620 }
621 }
622 printf ("}");
623 }
624 else
625 {
626 printf ("(");
627 for (i = 0; i < spec->unnamed_arg_count; i++)
628 {
629 if (i > 0)
630 printf (" ");
631 switch (spec->unnamed[i].type)
632 {
633 case FAT_ANY:
634 printf ("*");
635 break;
636 case FAT_CHARACTER:
637 printf ("c");
638 break;
639 case FAT_STRING:
640 printf ("s");
641 break;
642 case FAT_INTEGER:
643 printf ("i");
644 break;
645 case FAT_FLOAT:
646 printf ("f");
647 break;
648 default:
649 abort ();
650 }
651 }
652 printf (")");
653 }
654 }
655
656 int
main()657 main ()
658 {
659 for (;;)
660 {
661 char *line = NULL;
662 size_t line_size = 0;
663 int line_len;
664 char *invalid_reason;
665 void *descr;
666
667 line_len = getline (&line, &line_size, stdin);
668 if (line_len < 0)
669 break;
670 if (line_len > 0 && line[line_len - 1] == '\n')
671 line[--line_len] = '\0';
672
673 invalid_reason = NULL;
674 descr = format_parse (line, false, NULL, &invalid_reason);
675
676 format_print (descr);
677 printf ("\n");
678 if (descr == NULL)
679 printf ("%s\n", invalid_reason);
680
681 free (invalid_reason);
682 free (line);
683 }
684
685 return 0;
686 }
687
688 /*
689 * For Emacs M-x compile
690 * Local Variables:
691 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-python.c ../gnulib-lib/libgettextlib.la"
692 * End:
693 */
694
695 #endif /* TEST */
696