• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Python brace format strings.
2    Copyright (C) 2004, 2006-2007, 2013-2014, 2016, 2019 Free Software Foundation,
3    Inc.
4    Written by Daiki Ueno <ueno@gnu.org>, 2013.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "format.h"
28 #include "c-ctype.h"
29 #include "xalloc.h"
30 #include "xvasprintf.h"
31 #include "format-invalid.h"
32 #include "gettext.h"
33 
34 #define _(str) gettext (str)
35 
36 /* Python brace format strings are defined by PEP3101 together with
37    'format' method of string class.
38    A format string directive here consists of
39      - an opening brace '{',
40      - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
41      - an optional getattr ('.') or getitem ('['..']') operator with
42        an identifier as argument,
43      - an optional format specifier starting with ':', with a
44        (unnested) format string as argument,
45      - a closing brace '}'.
46    Brace characters '{' and '}' can be escaped by doubles '{{' and '}}'.
47 */
48 
49 struct named_arg
50 {
51   char *name;
52 };
53 
54 struct spec
55 {
56   unsigned int directives;
57   unsigned int named_arg_count;
58   unsigned int allocated;
59   struct named_arg *named;
60 };
61 
62 
63 static bool parse_upto (struct spec *spec, const char **formatp,
64                         bool is_toplevel, char terminator,
65                         bool translated, char *fdi, char **invalid_reason);
66 static void free_named_args (struct spec *spec);
67 
68 
69 /* All the parse_* functions (except parse_upto) follow the same
70    calling convention.  FORMATP shall point to the beginning of a token.
71    If parsing succeeds, FORMATP will point to the next character after
72    the token, and true is returned.  Otherwise, FORMATP will be
73    unchanged and false is returned.  */
74 
75 static bool
parse_named_field(struct spec * spec,const char ** formatp,bool translated,char * fdi,char ** invalid_reason)76 parse_named_field (struct spec *spec,
77                    const char **formatp, bool translated, char *fdi,
78                    char **invalid_reason)
79 {
80   const char *format = *formatp;
81   char c;
82 
83   c = *format;
84   if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
85     {
86       do
87         c = *++format;
88       while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
89              || (c >= '0' && c <= '9'));
90       *formatp = format;
91       return true;
92     }
93   return false;
94 }
95 
96 static bool
parse_numeric_field(struct spec * spec,const char ** formatp,bool translated,char * fdi,char ** invalid_reason)97 parse_numeric_field (struct spec *spec,
98                      const char **formatp, bool translated, char *fdi,
99                      char **invalid_reason)
100 {
101   const char *format = *formatp;
102   char c;
103 
104   c = *format;
105   if (c >= '0' && c <= '9')
106     {
107       do
108         c = *++format;
109       while (c >= '0' && c <= '9');
110       *formatp = format;
111       return true;
112     }
113   return false;
114 }
115 
116 static bool
parse_directive(struct spec * spec,const char ** formatp,bool is_toplevel,bool translated,char * fdi,char ** invalid_reason)117 parse_directive (struct spec *spec,
118                  const char **formatp, bool is_toplevel,
119                  bool translated, char *fdi, char **invalid_reason)
120 {
121   const char *format = *formatp;
122   const char *const format_start = format;
123   const char *name_start;
124   char c;
125 
126   c = *++format;
127   if (c == '{')
128     {
129       *formatp = ++format;
130       return true;
131     }
132 
133   name_start = format;
134   if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)
135       && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason))
136     {
137       *invalid_reason =
138         xasprintf (_("In the directive number %u, '%c' cannot start a field name."), spec->directives, *format);
139       FDI_SET (format, FMTDIR_ERROR);
140       return false;
141     }
142 
143   /* Parse '.' (getattr) or '[..]' (getitem) operators followed by a
144      name.  If must not recurse, but can be specifed in a chain, such
145      as "foo.bar.baz[0]".  */
146   for (;;)
147     {
148       c = *format;
149 
150       if (c == '.')
151         {
152           format++;
153           if (!parse_named_field (spec, &format, translated, fdi,
154                                   invalid_reason))
155             {
156               *invalid_reason =
157                 xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."), spec->directives, *format);
158               FDI_SET (format, FMTDIR_ERROR);
159               return false;
160             }
161         }
162       else if (c == '[')
163         {
164           format++;
165           if (!parse_named_field (spec, &format, translated, fdi,
166                                   invalid_reason)
167               && !parse_numeric_field (spec, &format, translated, fdi,
168                                        invalid_reason))
169             {
170               *invalid_reason =
171                 xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."), spec->directives, *format);
172               FDI_SET (format, FMTDIR_ERROR);
173               return false;
174             }
175 
176           c = *format++;
177           if (c != ']')
178             {
179               *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
180               FDI_SET (format, FMTDIR_ERROR);
181               return false;
182             }
183         }
184       else
185         break;
186     }
187 
188   if (c == ':')
189     {
190       if (!is_toplevel)
191         {
192           *invalid_reason =
193             xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."), spec->directives);
194           FDI_SET (format, FMTDIR_ERROR);
195           return false;
196         }
197 
198       /* Format specifiers.  Although a format specifier can be any
199          string in theory, we can only recognize two types of format
200          specifiers below, because otherwise we would need to evaluate
201          Python expressions by ourselves:
202 
203            - A nested format directive expanding to the whole string
204            - The Standard Format Specifiers, as described in PEP3101,
205              not including a nested format directive  */
206       format++;
207       if (*format == '{')
208         {
209           /* Nested format directive.  */
210           if (!parse_directive (spec, &format, false, translated, fdi,
211                                 invalid_reason))
212             {
213               /* FDI and INVALID_REASON will be set by a recursive call of
214                  parse_directive.  */
215               return false;
216             }
217 
218           if (*format != '}')
219             {
220               *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
221               FDI_SET (format, FMTDIR_ERROR);
222               return false;
223             }
224         }
225       else
226         {
227           /* Standard format specifiers is in the form:
228              [[fill]align][sign][#][0][minimumwidth][.precision][type]  */
229 
230           /* Look ahead two characters to skip [[fill]align].  */
231           int c1, c2;
232 
233           c1 = format[0];
234           c2 = format[1];
235 
236           if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^')
237             format += 2;
238           else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^')
239             format++;
240           if (*format == '+' || *format == '-' || *format == ' ')
241             format++;
242           if (*format == '#')
243             format++;
244           if (*format == '0')
245             format++;
246           while (c_isdigit (*format))
247             format++;
248           if (*format == '.')
249             {
250               format++;
251               while (c_isdigit (*format))
252                 format++;
253             }
254           switch (*format)
255             {
256             case 'b': case 'c': case 'd': case 'o': case 'x': case 'X':
257             case 'n':
258             case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
259             case '%':
260               format++;
261               break;
262             default:
263               break;
264             }
265           if (*format != '}')
266             {
267               *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
268               FDI_SET (format, FMTDIR_ERROR);
269               return false;
270             }
271         }
272       c = *format;
273     }
274 
275   if (c != '}')
276     {
277       *invalid_reason =
278         xasprintf (_("In the directive number %u, there is an unterminated format directive."), spec->directives);
279       FDI_SET (format, FMTDIR_ERROR);
280       return false;
281     }
282 
283   if (is_toplevel)
284     {
285       char *name;
286       size_t n = format - name_start;
287 
288       FDI_SET (name_start - 1, FMTDIR_START);
289 
290       name = XNMALLOC (n + 1, char);
291       memcpy (name, name_start, n);
292       name[n] = '\0';
293 
294       spec->directives++;
295 
296       if (spec->allocated == spec->named_arg_count)
297         {
298           spec->allocated = 2 * spec->allocated + 1;
299           spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg));
300         }
301       spec->named[spec->named_arg_count].name = name;
302       spec->named_arg_count++;
303 
304       FDI_SET (format, FMTDIR_END);
305     }
306 
307   *formatp = ++format;
308   return true;
309 }
310 
311 static bool
parse_upto(struct spec * spec,const char ** formatp,bool is_toplevel,char terminator,bool translated,char * fdi,char ** invalid_reason)312 parse_upto (struct spec *spec,
313             const char **formatp, bool is_toplevel, char terminator,
314             bool translated, char *fdi, char **invalid_reason)
315 {
316   const char *format = *formatp;
317 
318   for (; *format != terminator && *format != '\0';)
319     {
320       if (*format == '{')
321         {
322           if (!parse_directive (spec, &format, is_toplevel, translated, fdi,
323                                 invalid_reason))
324             return false;
325         }
326       else
327         format++;
328     }
329 
330   *formatp = format;
331   return true;
332 }
333 
334 static int
named_arg_compare(const void * p1,const void * p2)335 named_arg_compare (const void *p1, const void *p2)
336 {
337   return strcmp (((const struct named_arg *) p1)->name,
338                  ((const struct named_arg *) p2)->name);
339 }
340 
341 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)342 format_parse (const char *format, bool translated, char *fdi,
343               char **invalid_reason)
344 {
345   struct spec spec;
346   struct spec *result;
347 
348   spec.directives = 0;
349   spec.named_arg_count = 0;
350   spec.allocated = 0;
351   spec.named = NULL;
352 
353   if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason))
354     {
355       free_named_args (&spec);
356       return NULL;
357     }
358 
359   /* Sort the named argument array, and eliminate duplicates.  */
360   if (spec.named_arg_count > 1)
361     {
362       unsigned int i, j;
363 
364       qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
365              named_arg_compare);
366 
367       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
368       for (i = j = 0; i < spec.named_arg_count; i++)
369         if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
370           free (spec.named[i].name);
371         else
372           {
373             if (j < i)
374               spec.named[j].name = spec.named[i].name;
375             j++;
376           }
377       spec.named_arg_count = j;
378     }
379 
380   result = XMALLOC (struct spec);
381   *result = spec;
382   return result;
383 }
384 
385 static void
free_named_args(struct spec * spec)386 free_named_args (struct spec *spec)
387 {
388   if (spec->named != NULL)
389     {
390       unsigned int i;
391       for (i = 0; i < spec->named_arg_count; i++)
392         free (spec->named[i].name);
393       free (spec->named);
394     }
395 }
396 
397 static void
format_free(void * descr)398 format_free (void *descr)
399 {
400   struct spec *spec = (struct spec *) descr;
401 
402   free_named_args (spec);
403   free (spec);
404 }
405 
406 static int
format_get_number_of_directives(void * descr)407 format_get_number_of_directives (void *descr)
408 {
409   struct spec *spec = (struct spec *) descr;
410 
411   return spec->directives;
412 }
413 
414 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)415 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
416               formatstring_error_logger_t error_logger,
417               const char *pretty_msgid, const char *pretty_msgstr)
418 {
419   struct spec *spec1 = (struct spec *) msgid_descr;
420   struct spec *spec2 = (struct spec *) msgstr_descr;
421   bool err = false;
422 
423   if (spec1->named_arg_count + spec2->named_arg_count > 0)
424     {
425       unsigned int i, j;
426       unsigned int n1 = spec1->named_arg_count;
427       unsigned int n2 = spec2->named_arg_count;
428 
429       /* Check the argument names in spec1 are contained in those of spec2.
430          Both arrays are sorted.  We search for the differences.  */
431       for (i = 0, j = 0; i < n1 || j < n2; )
432         {
433           int cmp = (i >= n1 ? 1 :
434                      j >= n2 ? -1 :
435                      strcmp (spec1->named[i].name, spec2->named[j].name));
436 
437           if (cmp > 0)
438             {
439               if (equality)
440                 {
441                   if (error_logger)
442                     error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
443                                   spec2->named[i].name, pretty_msgid);
444                   err = true;
445                   break;
446                 }
447               else
448                 j++;
449             }
450           else if (cmp < 0)
451             {
452               if (equality)
453                 {
454                   if (error_logger)
455                     error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
456                                   spec1->named[i].name, pretty_msgstr);
457                   err = true;
458                   break;
459                 }
460               else
461                 i++;
462             }
463           else
464             j++, i++;
465         }
466     }
467 
468   return err;
469 }
470 
471 
472 struct formatstring_parser formatstring_python_brace =
473 {
474   format_parse,
475   format_free,
476   format_get_number_of_directives,
477   NULL,
478   format_check
479 };
480 
481 
482 #ifdef TEST
483 
484 /* Test program: Print the argument list specification returned by
485    format_parse for strings read from standard input.  */
486 
487 #include <stdio.h>
488 
489 static void
format_print(void * descr)490 format_print (void *descr)
491 {
492   struct spec *spec = (struct spec *) descr;
493   unsigned int i;
494 
495   if (spec == NULL)
496     {
497       printf ("INVALID");
498       return;
499     }
500 
501   printf ("{");
502   for (i = 0; i < spec->named_arg_count; i++)
503     {
504       if (i > 0)
505         printf (", ");
506       printf ("'%s'", spec->named[i].name);
507     }
508   printf ("}");
509 }
510 
511 int
main()512 main ()
513 {
514   for (;;)
515     {
516       char *line = NULL;
517       size_t line_size = 0;
518       int line_len;
519       char *invalid_reason;
520       void *descr;
521 
522       line_len = getline (&line, &line_size, stdin);
523       if (line_len < 0)
524         break;
525       if (line_len > 0 && line[line_len - 1] == '\n')
526         line[--line_len] = '\0';
527 
528       invalid_reason = NULL;
529       descr = format_parse (line, false, NULL, &invalid_reason);
530 
531       format_print (descr);
532       printf ("\n");
533       if (descr == NULL)
534         printf ("%s\n", invalid_reason);
535 
536       free (invalid_reason);
537       free (line);
538     }
539 
540   return 0;
541 }
542 
543 /*
544  * For Emacs M-x compile
545  * Local Variables:
546  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la"
547  * End:
548  */
549 
550 #endif /* TEST */
551