1 /* Python brace format strings.
2 Copyright (C) 2004, 2006-2007, 2013-2014, 2016, 2019 Free Software Foundation,
3 Inc.
4 Written by Daiki Ueno <ueno@gnu.org>, 2013.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdbool.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include "format.h"
28 #include "c-ctype.h"
29 #include "xalloc.h"
30 #include "xvasprintf.h"
31 #include "format-invalid.h"
32 #include "gettext.h"
33
34 #define _(str) gettext (str)
35
36 /* Python brace format strings are defined by PEP3101 together with
37 'format' method of string class.
38 A format string directive here consists of
39 - an opening brace '{',
40 - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
41 - an optional getattr ('.') or getitem ('['..']') operator with
42 an identifier as argument,
43 - an optional format specifier starting with ':', with a
44 (unnested) format string as argument,
45 - a closing brace '}'.
46 Brace characters '{' and '}' can be escaped by doubles '{{' and '}}'.
47 */
48
49 struct named_arg
50 {
51 char *name;
52 };
53
54 struct spec
55 {
56 unsigned int directives;
57 unsigned int named_arg_count;
58 unsigned int allocated;
59 struct named_arg *named;
60 };
61
62
63 static bool parse_upto (struct spec *spec, const char **formatp,
64 bool is_toplevel, char terminator,
65 bool translated, char *fdi, char **invalid_reason);
66 static void free_named_args (struct spec *spec);
67
68
69 /* All the parse_* functions (except parse_upto) follow the same
70 calling convention. FORMATP shall point to the beginning of a token.
71 If parsing succeeds, FORMATP will point to the next character after
72 the token, and true is returned. Otherwise, FORMATP will be
73 unchanged and false is returned. */
74
75 static bool
parse_named_field(struct spec * spec,const char ** formatp,bool translated,char * fdi,char ** invalid_reason)76 parse_named_field (struct spec *spec,
77 const char **formatp, bool translated, char *fdi,
78 char **invalid_reason)
79 {
80 const char *format = *formatp;
81 char c;
82
83 c = *format;
84 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_')
85 {
86 do
87 c = *++format;
88 while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'
89 || (c >= '0' && c <= '9'));
90 *formatp = format;
91 return true;
92 }
93 return false;
94 }
95
96 static bool
parse_numeric_field(struct spec * spec,const char ** formatp,bool translated,char * fdi,char ** invalid_reason)97 parse_numeric_field (struct spec *spec,
98 const char **formatp, bool translated, char *fdi,
99 char **invalid_reason)
100 {
101 const char *format = *formatp;
102 char c;
103
104 c = *format;
105 if (c >= '0' && c <= '9')
106 {
107 do
108 c = *++format;
109 while (c >= '0' && c <= '9');
110 *formatp = format;
111 return true;
112 }
113 return false;
114 }
115
116 static bool
parse_directive(struct spec * spec,const char ** formatp,bool is_toplevel,bool translated,char * fdi,char ** invalid_reason)117 parse_directive (struct spec *spec,
118 const char **formatp, bool is_toplevel,
119 bool translated, char *fdi, char **invalid_reason)
120 {
121 const char *format = *formatp;
122 const char *const format_start = format;
123 const char *name_start;
124 char c;
125
126 c = *++format;
127 if (c == '{')
128 {
129 *formatp = ++format;
130 return true;
131 }
132
133 name_start = format;
134 if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)
135 && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason))
136 {
137 *invalid_reason =
138 xasprintf (_("In the directive number %u, '%c' cannot start a field name."), spec->directives, *format);
139 FDI_SET (format, FMTDIR_ERROR);
140 return false;
141 }
142
143 /* Parse '.' (getattr) or '[..]' (getitem) operators followed by a
144 name. If must not recurse, but can be specifed in a chain, such
145 as "foo.bar.baz[0]". */
146 for (;;)
147 {
148 c = *format;
149
150 if (c == '.')
151 {
152 format++;
153 if (!parse_named_field (spec, &format, translated, fdi,
154 invalid_reason))
155 {
156 *invalid_reason =
157 xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."), spec->directives, *format);
158 FDI_SET (format, FMTDIR_ERROR);
159 return false;
160 }
161 }
162 else if (c == '[')
163 {
164 format++;
165 if (!parse_named_field (spec, &format, translated, fdi,
166 invalid_reason)
167 && !parse_numeric_field (spec, &format, translated, fdi,
168 invalid_reason))
169 {
170 *invalid_reason =
171 xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."), spec->directives, *format);
172 FDI_SET (format, FMTDIR_ERROR);
173 return false;
174 }
175
176 c = *format++;
177 if (c != ']')
178 {
179 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
180 FDI_SET (format, FMTDIR_ERROR);
181 return false;
182 }
183 }
184 else
185 break;
186 }
187
188 if (c == ':')
189 {
190 if (!is_toplevel)
191 {
192 *invalid_reason =
193 xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."), spec->directives);
194 FDI_SET (format, FMTDIR_ERROR);
195 return false;
196 }
197
198 /* Format specifiers. Although a format specifier can be any
199 string in theory, we can only recognize two types of format
200 specifiers below, because otherwise we would need to evaluate
201 Python expressions by ourselves:
202
203 - A nested format directive expanding to the whole string
204 - The Standard Format Specifiers, as described in PEP3101,
205 not including a nested format directive */
206 format++;
207 if (*format == '{')
208 {
209 /* Nested format directive. */
210 if (!parse_directive (spec, &format, false, translated, fdi,
211 invalid_reason))
212 {
213 /* FDI and INVALID_REASON will be set by a recursive call of
214 parse_directive. */
215 return false;
216 }
217
218 if (*format != '}')
219 {
220 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
221 FDI_SET (format, FMTDIR_ERROR);
222 return false;
223 }
224 }
225 else
226 {
227 /* Standard format specifiers is in the form:
228 [[fill]align][sign][#][0][minimumwidth][.precision][type] */
229
230 /* Look ahead two characters to skip [[fill]align]. */
231 int c1, c2;
232
233 c1 = format[0];
234 c2 = format[1];
235
236 if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^')
237 format += 2;
238 else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^')
239 format++;
240 if (*format == '+' || *format == '-' || *format == ' ')
241 format++;
242 if (*format == '#')
243 format++;
244 if (*format == '0')
245 format++;
246 while (c_isdigit (*format))
247 format++;
248 if (*format == '.')
249 {
250 format++;
251 while (c_isdigit (*format))
252 format++;
253 }
254 switch (*format)
255 {
256 case 'b': case 'c': case 'd': case 'o': case 'x': case 'X':
257 case 'n':
258 case 'e': case 'E': case 'f': case 'F': case 'g': case 'G':
259 case '%':
260 format++;
261 break;
262 default:
263 break;
264 }
265 if (*format != '}')
266 {
267 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
268 FDI_SET (format, FMTDIR_ERROR);
269 return false;
270 }
271 }
272 c = *format;
273 }
274
275 if (c != '}')
276 {
277 *invalid_reason =
278 xasprintf (_("In the directive number %u, there is an unterminated format directive."), spec->directives);
279 FDI_SET (format, FMTDIR_ERROR);
280 return false;
281 }
282
283 if (is_toplevel)
284 {
285 char *name;
286 size_t n = format - name_start;
287
288 FDI_SET (name_start - 1, FMTDIR_START);
289
290 name = XNMALLOC (n + 1, char);
291 memcpy (name, name_start, n);
292 name[n] = '\0';
293
294 spec->directives++;
295
296 if (spec->allocated == spec->named_arg_count)
297 {
298 spec->allocated = 2 * spec->allocated + 1;
299 spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg));
300 }
301 spec->named[spec->named_arg_count].name = name;
302 spec->named_arg_count++;
303
304 FDI_SET (format, FMTDIR_END);
305 }
306
307 *formatp = ++format;
308 return true;
309 }
310
311 static bool
parse_upto(struct spec * spec,const char ** formatp,bool is_toplevel,char terminator,bool translated,char * fdi,char ** invalid_reason)312 parse_upto (struct spec *spec,
313 const char **formatp, bool is_toplevel, char terminator,
314 bool translated, char *fdi, char **invalid_reason)
315 {
316 const char *format = *formatp;
317
318 for (; *format != terminator && *format != '\0';)
319 {
320 if (*format == '{')
321 {
322 if (!parse_directive (spec, &format, is_toplevel, translated, fdi,
323 invalid_reason))
324 return false;
325 }
326 else
327 format++;
328 }
329
330 *formatp = format;
331 return true;
332 }
333
334 static int
named_arg_compare(const void * p1,const void * p2)335 named_arg_compare (const void *p1, const void *p2)
336 {
337 return strcmp (((const struct named_arg *) p1)->name,
338 ((const struct named_arg *) p2)->name);
339 }
340
341 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)342 format_parse (const char *format, bool translated, char *fdi,
343 char **invalid_reason)
344 {
345 struct spec spec;
346 struct spec *result;
347
348 spec.directives = 0;
349 spec.named_arg_count = 0;
350 spec.allocated = 0;
351 spec.named = NULL;
352
353 if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason))
354 {
355 free_named_args (&spec);
356 return NULL;
357 }
358
359 /* Sort the named argument array, and eliminate duplicates. */
360 if (spec.named_arg_count > 1)
361 {
362 unsigned int i, j;
363
364 qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg),
365 named_arg_compare);
366
367 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
368 for (i = j = 0; i < spec.named_arg_count; i++)
369 if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0)
370 free (spec.named[i].name);
371 else
372 {
373 if (j < i)
374 spec.named[j].name = spec.named[i].name;
375 j++;
376 }
377 spec.named_arg_count = j;
378 }
379
380 result = XMALLOC (struct spec);
381 *result = spec;
382 return result;
383 }
384
385 static void
free_named_args(struct spec * spec)386 free_named_args (struct spec *spec)
387 {
388 if (spec->named != NULL)
389 {
390 unsigned int i;
391 for (i = 0; i < spec->named_arg_count; i++)
392 free (spec->named[i].name);
393 free (spec->named);
394 }
395 }
396
397 static void
format_free(void * descr)398 format_free (void *descr)
399 {
400 struct spec *spec = (struct spec *) descr;
401
402 free_named_args (spec);
403 free (spec);
404 }
405
406 static int
format_get_number_of_directives(void * descr)407 format_get_number_of_directives (void *descr)
408 {
409 struct spec *spec = (struct spec *) descr;
410
411 return spec->directives;
412 }
413
414 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)415 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
416 formatstring_error_logger_t error_logger,
417 const char *pretty_msgid, const char *pretty_msgstr)
418 {
419 struct spec *spec1 = (struct spec *) msgid_descr;
420 struct spec *spec2 = (struct spec *) msgstr_descr;
421 bool err = false;
422
423 if (spec1->named_arg_count + spec2->named_arg_count > 0)
424 {
425 unsigned int i, j;
426 unsigned int n1 = spec1->named_arg_count;
427 unsigned int n2 = spec2->named_arg_count;
428
429 /* Check the argument names in spec1 are contained in those of spec2.
430 Both arrays are sorted. We search for the differences. */
431 for (i = 0, j = 0; i < n1 || j < n2; )
432 {
433 int cmp = (i >= n1 ? 1 :
434 j >= n2 ? -1 :
435 strcmp (spec1->named[i].name, spec2->named[j].name));
436
437 if (cmp > 0)
438 {
439 if (equality)
440 {
441 if (error_logger)
442 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
443 spec2->named[i].name, pretty_msgid);
444 err = true;
445 break;
446 }
447 else
448 j++;
449 }
450 else if (cmp < 0)
451 {
452 if (equality)
453 {
454 if (error_logger)
455 error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"),
456 spec1->named[i].name, pretty_msgstr);
457 err = true;
458 break;
459 }
460 else
461 i++;
462 }
463 else
464 j++, i++;
465 }
466 }
467
468 return err;
469 }
470
471
472 struct formatstring_parser formatstring_python_brace =
473 {
474 format_parse,
475 format_free,
476 format_get_number_of_directives,
477 NULL,
478 format_check
479 };
480
481
482 #ifdef TEST
483
484 /* Test program: Print the argument list specification returned by
485 format_parse for strings read from standard input. */
486
487 #include <stdio.h>
488
489 static void
format_print(void * descr)490 format_print (void *descr)
491 {
492 struct spec *spec = (struct spec *) descr;
493 unsigned int i;
494
495 if (spec == NULL)
496 {
497 printf ("INVALID");
498 return;
499 }
500
501 printf ("{");
502 for (i = 0; i < spec->named_arg_count; i++)
503 {
504 if (i > 0)
505 printf (", ");
506 printf ("'%s'", spec->named[i].name);
507 }
508 printf ("}");
509 }
510
511 int
main()512 main ()
513 {
514 for (;;)
515 {
516 char *line = NULL;
517 size_t line_size = 0;
518 int line_len;
519 char *invalid_reason;
520 void *descr;
521
522 line_len = getline (&line, &line_size, stdin);
523 if (line_len < 0)
524 break;
525 if (line_len > 0 && line[line_len - 1] == '\n')
526 line[--line_len] = '\0';
527
528 invalid_reason = NULL;
529 descr = format_parse (line, false, NULL, &invalid_reason);
530
531 format_print (descr);
532 printf ("\n");
533 if (descr == NULL)
534 printf ("%s\n", invalid_reason);
535
536 free (invalid_reason);
537 free (line);
538 }
539
540 return 0;
541 }
542
543 /*
544 * For Emacs M-x compile
545 * Local Variables:
546 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la"
547 * End:
548 */
549
550 #endif /* TEST */
551