1 /* Tcl format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2002.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31
32 #define _(str) gettext (str)
33
34 /* Tcl format strings are described in the tcl8.3.3/doc/format.n manual
35 page and implemented in the function Tcl_FormatObjCmd in
36 tcl8.3.3/generic/tclCmdAH.c.
37 A directive
38 - starts with '%' or '%m$' where m is a positive integer,
39 - is optionally followed by any of the characters '#', '0', '-', ' ', '+',
40 each of which acts as a flag,
41 - is optionally followed by a width specification: '*' (reads an argument)
42 or a nonempty digit sequence,
43 - is optionally followed by '.' and a precision specification: '*' (reads
44 an argument) or a nonempty digit sequence,
45 - is optionally followed by a size specifier, 'h' or 'l'. 'l' is ignored.
46 - is finished by a specifier
47 - '%', that needs no argument,
48 - 'c', that needs a character argument,
49 - 's', that needs a string argument,
50 - 'i', 'd', that need a signed integer argument,
51 - 'o', 'u', 'x', 'X', that need an unsigned integer argument,
52 - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument.
53 Numbered ('%m$') and unnumbered argument specifications cannot be used
54 in the same string.
55 */
56
57 enum format_arg_type
58 {
59 FAT_NONE,
60 FAT_CHARACTER,
61 FAT_STRING,
62 FAT_INTEGER,
63 FAT_UNSIGNED_INTEGER,
64 FAT_SHORT_INTEGER,
65 FAT_SHORT_UNSIGNED_INTEGER,
66 FAT_FLOAT
67 };
68
69 struct numbered_arg
70 {
71 unsigned int number;
72 enum format_arg_type type;
73 };
74
75 struct spec
76 {
77 unsigned int directives;
78 unsigned int numbered_arg_count;
79 struct numbered_arg *numbered;
80 };
81
82 /* Locale independent test for a decimal digit.
83 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
84 <ctype.h> isdigit must be an 'unsigned char'.) */
85 #undef isdigit
86 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
87
88
89 static int
numbered_arg_compare(const void * p1,const void * p2)90 numbered_arg_compare (const void *p1, const void *p2)
91 {
92 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
93 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
94
95 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
96 }
97
98 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)99 format_parse (const char *format, bool translated, char *fdi,
100 char **invalid_reason)
101 {
102 const char *const format_start = format;
103 struct spec spec;
104 unsigned int numbered_allocated;
105 struct spec *result;
106 bool seen_numbered_arg;
107 bool seen_unnumbered_arg;
108 unsigned int number;
109
110 spec.directives = 0;
111 spec.numbered_arg_count = 0;
112 spec.numbered = NULL;
113 numbered_allocated = 0;
114 seen_numbered_arg = false;
115 seen_unnumbered_arg = false;
116 number = 1;
117
118 for (; *format != '\0';)
119 if (*format++ == '%')
120 {
121 /* A directive. */
122 FDI_SET (format - 1, FMTDIR_START);
123 spec.directives++;
124
125 if (*format != '%')
126 {
127 bool is_numbered_arg;
128 bool short_flag;
129 enum format_arg_type type;
130
131 is_numbered_arg = false;
132 if (isdigit (*format))
133 {
134 const char *f = format;
135 unsigned int m = 0;
136
137 do
138 {
139 m = 10 * m + (*f - '0');
140 f++;
141 }
142 while (isdigit (*f));
143
144 if (*f == '$')
145 {
146 if (m == 0)
147 {
148 *invalid_reason = INVALID_ARGNO_0 (spec.directives);
149 FDI_SET (f, FMTDIR_ERROR);
150 goto bad_format;
151 }
152 number = m;
153 format = ++f;
154
155 /* Numbered and unnumbered specifications are exclusive. */
156 if (seen_unnumbered_arg)
157 {
158 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
159 FDI_SET (format - 1, FMTDIR_ERROR);
160 goto bad_format;
161 }
162 is_numbered_arg = true;
163 seen_numbered_arg = true;
164 }
165 }
166
167 /* Numbered and unnumbered specifications are exclusive. */
168 if (!is_numbered_arg)
169 {
170 if (seen_numbered_arg)
171 {
172 *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
173 FDI_SET (format - 1, FMTDIR_ERROR);
174 goto bad_format;
175 }
176 seen_unnumbered_arg = true;
177 }
178
179 /* Parse flags. */
180 while (*format == ' ' || *format == '+' || *format == '-'
181 || *format == '#' || *format == '0')
182 format++;
183
184 /* Parse width. */
185 if (*format == '*')
186 {
187 format++;
188
189 if (numbered_allocated == spec.numbered_arg_count)
190 {
191 numbered_allocated = 2 * numbered_allocated + 1;
192 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
193 }
194 spec.numbered[spec.numbered_arg_count].number = number;
195 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
196 spec.numbered_arg_count++;
197
198 number++;
199 }
200 else if (isdigit (*format))
201 {
202 do format++; while (isdigit (*format));
203 }
204
205 /* Parse precision. */
206 if (*format == '.')
207 {
208 format++;
209
210 if (*format == '*')
211 {
212 format++;
213
214 if (numbered_allocated == spec.numbered_arg_count)
215 {
216 numbered_allocated = 2 * numbered_allocated + 1;
217 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
218 }
219 spec.numbered[spec.numbered_arg_count].number = number;
220 spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER;
221 spec.numbered_arg_count++;
222
223 number++;
224 }
225 else if (isdigit (*format))
226 {
227 do format++; while (isdigit (*format));
228 }
229 }
230
231 /* Parse optional size specification. */
232 short_flag = false;
233 if (*format == 'h')
234 short_flag = true, format++;
235 else if (*format == 'l')
236 format++;
237
238 switch (*format)
239 {
240 case 'c':
241 type = FAT_CHARACTER;
242 break;
243 case 's':
244 type = FAT_STRING;
245 break;
246 case 'i': case 'd':
247 type = (short_flag ? FAT_SHORT_INTEGER : FAT_INTEGER);
248 break;
249 case 'u': case 'o': case 'x': case 'X':
250 type = (short_flag ? FAT_SHORT_UNSIGNED_INTEGER : FAT_UNSIGNED_INTEGER);
251 break;
252 case 'e': case 'E': case 'f': case 'g': case 'G':
253 type = FAT_FLOAT;
254 break;
255 default:
256 if (*format == '\0')
257 {
258 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
259 FDI_SET (format - 1, FMTDIR_ERROR);
260 }
261 else
262 {
263 *invalid_reason =
264 INVALID_CONVERSION_SPECIFIER (spec.directives, *format);
265 FDI_SET (format, FMTDIR_ERROR);
266 }
267 goto bad_format;
268 }
269
270 if (numbered_allocated == spec.numbered_arg_count)
271 {
272 numbered_allocated = 2 * numbered_allocated + 1;
273 spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
274 }
275 spec.numbered[spec.numbered_arg_count].number = number;
276 spec.numbered[spec.numbered_arg_count].type = type;
277 spec.numbered_arg_count++;
278
279 number++;
280 }
281
282 FDI_SET (format, FMTDIR_END);
283
284 format++;
285 }
286
287 /* Sort the numbered argument array, and eliminate duplicates. */
288 if (spec.numbered_arg_count > 1)
289 {
290 unsigned int i, j;
291 bool err;
292
293 qsort (spec.numbered, spec.numbered_arg_count,
294 sizeof (struct numbered_arg), numbered_arg_compare);
295
296 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
297 err = false;
298 for (i = j = 0; i < spec.numbered_arg_count; i++)
299 if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
300 {
301 enum format_arg_type type1 = spec.numbered[i].type;
302 enum format_arg_type type2 = spec.numbered[j-1].type;
303 enum format_arg_type type_both;
304
305 if (type1 == type2)
306 type_both = type1;
307 else
308 {
309 /* Incompatible types. */
310 type_both = FAT_NONE;
311 if (!err)
312 *invalid_reason =
313 INVALID_INCOMPATIBLE_ARG_TYPES (spec.numbered[i].number);
314 err = true;
315 }
316
317 spec.numbered[j-1].type = type_both;
318 }
319 else
320 {
321 if (j < i)
322 {
323 spec.numbered[j].number = spec.numbered[i].number;
324 spec.numbered[j].type = spec.numbered[i].type;
325 }
326 j++;
327 }
328 spec.numbered_arg_count = j;
329 if (err)
330 /* *invalid_reason has already been set above. */
331 goto bad_format;
332 }
333
334 result = XMALLOC (struct spec);
335 *result = spec;
336 return result;
337
338 bad_format:
339 if (spec.numbered != NULL)
340 free (spec.numbered);
341 return NULL;
342 }
343
344 static void
format_free(void * descr)345 format_free (void *descr)
346 {
347 struct spec *spec = (struct spec *) descr;
348
349 if (spec->numbered != NULL)
350 free (spec->numbered);
351 free (spec);
352 }
353
354 static int
format_get_number_of_directives(void * descr)355 format_get_number_of_directives (void *descr)
356 {
357 struct spec *spec = (struct spec *) descr;
358
359 return spec->directives;
360 }
361
362 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)363 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
364 formatstring_error_logger_t error_logger,
365 const char *pretty_msgid, const char *pretty_msgstr)
366 {
367 struct spec *spec1 = (struct spec *) msgid_descr;
368 struct spec *spec2 = (struct spec *) msgstr_descr;
369 bool err = false;
370
371 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
372 {
373 unsigned int i, j;
374 unsigned int n1 = spec1->numbered_arg_count;
375 unsigned int n2 = spec2->numbered_arg_count;
376
377 /* Check the argument names are the same.
378 Both arrays are sorted. We search for the first difference. */
379 for (i = 0, j = 0; i < n1 || j < n2; )
380 {
381 int cmp = (i >= n1 ? 1 :
382 j >= n2 ? -1 :
383 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
384 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
385 0);
386
387 if (cmp > 0)
388 {
389 if (error_logger)
390 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
391 spec2->numbered[j].number, pretty_msgstr,
392 pretty_msgid);
393 err = true;
394 break;
395 }
396 else if (cmp < 0)
397 {
398 if (equality)
399 {
400 if (error_logger)
401 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
402 spec1->numbered[i].number, pretty_msgstr);
403 err = true;
404 break;
405 }
406 else
407 i++;
408 }
409 else
410 j++, i++;
411 }
412 /* Check the argument types are the same. */
413 if (!err)
414 for (i = 0, j = 0; j < n2; )
415 {
416 if (spec1->numbered[i].number == spec2->numbered[j].number)
417 {
418 if (spec1->numbered[i].type != spec2->numbered[j].type)
419 {
420 if (error_logger)
421 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
422 pretty_msgid, pretty_msgstr,
423 spec2->numbered[j].number);
424 err = true;
425 break;
426 }
427 j++, i++;
428 }
429 else
430 i++;
431 }
432 }
433
434 return err;
435 }
436
437
438 struct formatstring_parser formatstring_tcl =
439 {
440 format_parse,
441 format_free,
442 format_get_number_of_directives,
443 NULL,
444 format_check
445 };
446
447
448 #ifdef TEST
449
450 /* Test program: Print the argument list specification returned by
451 format_parse for strings read from standard input. */
452
453 #include <stdio.h>
454
455 static void
format_print(void * descr)456 format_print (void *descr)
457 {
458 struct spec *spec = (struct spec *) descr;
459 unsigned int last;
460 unsigned int i;
461
462 if (spec == NULL)
463 {
464 printf ("INVALID");
465 return;
466 }
467
468 printf ("(");
469 last = 1;
470 for (i = 0; i < spec->numbered_arg_count; i++)
471 {
472 unsigned int number = spec->numbered[i].number;
473
474 if (i > 0)
475 printf (" ");
476 if (number < last)
477 abort ();
478 for (; last < number; last++)
479 printf ("_ ");
480 switch (spec->numbered[i].type)
481 {
482 case FAT_CHARACTER:
483 printf ("c");
484 break;
485 case FAT_STRING:
486 printf ("s");
487 break;
488 case FAT_INTEGER:
489 printf ("i");
490 break;
491 case FAT_UNSIGNED_INTEGER:
492 printf ("[unsigned]i");
493 break;
494 case FAT_SHORT_INTEGER:
495 printf ("hi");
496 break;
497 case FAT_SHORT_UNSIGNED_INTEGER:
498 printf ("[unsigned]hi");
499 break;
500 case FAT_FLOAT:
501 printf ("f");
502 break;
503 default:
504 abort ();
505 }
506 last = number + 1;
507 }
508 printf (")");
509 }
510
511 int
main()512 main ()
513 {
514 for (;;)
515 {
516 char *line = NULL;
517 size_t line_size = 0;
518 int line_len;
519 char *invalid_reason;
520 void *descr;
521
522 line_len = getline (&line, &line_size, stdin);
523 if (line_len < 0)
524 break;
525 if (line_len > 0 && line[line_len - 1] == '\n')
526 line[--line_len] = '\0';
527
528 invalid_reason = NULL;
529 descr = format_parse (line, false, NULL, &invalid_reason);
530
531 format_print (descr);
532 printf ("\n");
533 if (descr == NULL)
534 printf ("%s\n", invalid_reason);
535
536 free (invalid_reason);
537 free (line);
538 }
539
540 return 0;
541 }
542
543 /*
544 * For Emacs M-x compile
545 * Local Variables:
546 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-tcl.c ../gnulib-lib/libgettextlib.la"
547 * End:
548 */
549
550 #endif /* TEST */
551