1 /* PHP format strings.
2 Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2002.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 #include <stdbool.h>
23 #include <stdlib.h>
24
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31
32 #define _(str) gettext (str)
33
34 /* PHP format strings are described in phpdoc-4.0.6, file
35 phpdoc/manual/function.sprintf.html, and are implemented in
36 php-4.1.0/ext/standard/formatted_print.c.
37 A directive
38 - starts with '%' or '%m$' where m is a positive integer,
39 - is optionally followed by any of the characters '0', '-', ' ', or
40 "'<anychar>", each of which acts as a flag,
41 - is optionally followed by a width specification: a nonempty digit
42 sequence,
43 - is optionally followed by '.' and a precision specification: a nonempty
44 digit sequence,
45 - is optionally followed by a size specifier 'l', which is ignored,
46 - is finished by a specifier
47 - 's', that needs a string argument,
48 - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
49 - 'e', 'f', that need a floating-point argument,
50 - 'c', that needs a character argument.
51 Additionally there is the directive '%%', which takes no argument.
52 Numbered and unnumbered argument specifications can be used in the same
53 string. Numbered argument specifications have no influence on the
54 "current argument index", that is incremented each time an argument is read.
55 */
56
57 enum format_arg_type
58 {
59 FAT_INTEGER,
60 FAT_FLOAT,
61 FAT_CHARACTER,
62 FAT_STRING
63 };
64
65 struct numbered_arg
66 {
67 unsigned int number;
68 enum format_arg_type type;
69 };
70
71 struct spec
72 {
73 unsigned int directives;
74 unsigned int numbered_arg_count;
75 struct numbered_arg *numbered;
76 };
77
78 /* Locale independent test for a decimal digit.
79 Argument can be 'char' or 'unsigned char'. (Whereas the argument of
80 <ctype.h> isdigit must be an 'unsigned char'.) */
81 #undef isdigit
82 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
83
84
85 static int
numbered_arg_compare(const void * p1,const void * p2)86 numbered_arg_compare (const void *p1, const void *p2)
87 {
88 unsigned int n1 = ((const struct numbered_arg *) p1)->number;
89 unsigned int n2 = ((const struct numbered_arg *) p2)->number;
90
91 return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
92 }
93
94 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)95 format_parse (const char *format, bool translated, char *fdi,
96 char **invalid_reason)
97 {
98 const char *const format_start = format;
99 unsigned int directives;
100 unsigned int numbered_arg_count;
101 struct numbered_arg *numbered;
102 unsigned int numbered_allocated;
103 unsigned int unnumbered_arg_count;
104 struct spec *result;
105
106 directives = 0;
107 numbered_arg_count = 0;
108 numbered = NULL;
109 numbered_allocated = 0;
110 unnumbered_arg_count = 0;
111
112 for (; *format != '\0';)
113 if (*format++ == '%')
114 {
115 /* A directive. */
116 FDI_SET (format - 1, FMTDIR_START);
117 directives++;
118
119 if (*format != '%')
120 {
121 /* A complex directive. */
122 unsigned int number;
123 enum format_arg_type type;
124
125 number = ++unnumbered_arg_count;
126 if (isdigit (*format))
127 {
128 const char *f = format;
129 unsigned int m = 0;
130
131 do
132 {
133 m = 10 * m + (*f - '0');
134 f++;
135 }
136 while (isdigit (*f));
137
138 if (*f == '$')
139 {
140 if (m == 0)
141 {
142 *invalid_reason = INVALID_ARGNO_0 (directives);
143 FDI_SET (f, FMTDIR_ERROR);
144 goto bad_format;
145 }
146 number = m;
147 format = ++f;
148 --unnumbered_arg_count;
149 }
150 }
151
152 /* Parse flags. */
153 for (;;)
154 {
155 if (*format == '0' || *format == '-' || *format == ' ')
156 format++;
157 else if (*format == '\'')
158 {
159 format++;
160 if (*format == '\0')
161 {
162 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
163 FDI_SET (format - 1, FMTDIR_ERROR);
164 goto bad_format;
165 }
166 format++;
167 }
168 else
169 break;
170 }
171
172 /* Parse width. */
173 if (isdigit (*format))
174 {
175 do
176 format++;
177 while (isdigit (*format));
178 }
179
180 /* Parse precision. */
181 if (*format == '.')
182 {
183 format++;
184
185 if (isdigit (*format))
186 {
187 do
188 format++;
189 while (isdigit (*format));
190 }
191 else
192 --format; /* will jump to bad_format */
193 }
194
195 /* Parse size. */
196 if (*format == 'l')
197 format++;
198
199 switch (*format)
200 {
201 case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
202 type = FAT_INTEGER;
203 break;
204 case 'e': case 'f':
205 type = FAT_FLOAT;
206 break;
207 case 'c':
208 type = FAT_CHARACTER;
209 break;
210 case 's':
211 type = FAT_STRING;
212 break;
213 default:
214 if (*format == '\0')
215 {
216 *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
217 FDI_SET (format - 1, FMTDIR_ERROR);
218 }
219 else
220 {
221 *invalid_reason =
222 INVALID_CONVERSION_SPECIFIER (directives, *format);
223 FDI_SET (format, FMTDIR_ERROR);
224 }
225 goto bad_format;
226 }
227
228 if (numbered_allocated == numbered_arg_count)
229 {
230 numbered_allocated = 2 * numbered_allocated + 1;
231 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
232 }
233 numbered[numbered_arg_count].number = number;
234 numbered[numbered_arg_count].type = type;
235 numbered_arg_count++;
236 }
237
238 FDI_SET (format, FMTDIR_END);
239
240 format++;
241 }
242
243 /* Sort the numbered argument array, and eliminate duplicates. */
244 if (numbered_arg_count > 1)
245 {
246 unsigned int i, j;
247 bool err;
248
249 qsort (numbered, numbered_arg_count,
250 sizeof (struct numbered_arg), numbered_arg_compare);
251
252 /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
253 err = false;
254 for (i = j = 0; i < numbered_arg_count; i++)
255 if (j > 0 && numbered[i].number == numbered[j-1].number)
256 {
257 enum format_arg_type type1 = numbered[i].type;
258 enum format_arg_type type2 = numbered[j-1].type;
259 enum format_arg_type type_both;
260
261 if (type1 == type2)
262 type_both = type1;
263 else
264 {
265 /* Incompatible types. */
266 type_both = type1;
267 if (!err)
268 *invalid_reason =
269 INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
270 err = true;
271 }
272
273 numbered[j-1].type = type_both;
274 }
275 else
276 {
277 if (j < i)
278 {
279 numbered[j].number = numbered[i].number;
280 numbered[j].type = numbered[i].type;
281 }
282 j++;
283 }
284 numbered_arg_count = j;
285 if (err)
286 /* *invalid_reason has already been set above. */
287 goto bad_format;
288 }
289
290 result = XMALLOC (struct spec);
291 result->directives = directives;
292 result->numbered_arg_count = numbered_arg_count;
293 result->numbered = numbered;
294 return result;
295
296 bad_format:
297 if (numbered != NULL)
298 free (numbered);
299 return NULL;
300 }
301
302 static void
format_free(void * descr)303 format_free (void *descr)
304 {
305 struct spec *spec = (struct spec *) descr;
306
307 if (spec->numbered != NULL)
308 free (spec->numbered);
309 free (spec);
310 }
311
312 static int
format_get_number_of_directives(void * descr)313 format_get_number_of_directives (void *descr)
314 {
315 struct spec *spec = (struct spec *) descr;
316
317 return spec->directives;
318 }
319
320 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)321 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
322 formatstring_error_logger_t error_logger,
323 const char *pretty_msgid, const char *pretty_msgstr)
324 {
325 struct spec *spec1 = (struct spec *) msgid_descr;
326 struct spec *spec2 = (struct spec *) msgstr_descr;
327 bool err = false;
328
329 if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
330 {
331 unsigned int i, j;
332 unsigned int n1 = spec1->numbered_arg_count;
333 unsigned int n2 = spec2->numbered_arg_count;
334
335 /* Check the argument names are the same.
336 Both arrays are sorted. We search for the first difference. */
337 for (i = 0, j = 0; i < n1 || j < n2; )
338 {
339 int cmp = (i >= n1 ? 1 :
340 j >= n2 ? -1 :
341 spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
342 spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
343 0);
344
345 if (cmp > 0)
346 {
347 if (error_logger)
348 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
349 spec2->numbered[j].number, pretty_msgstr,
350 pretty_msgid);
351 err = true;
352 break;
353 }
354 else if (cmp < 0)
355 {
356 if (equality)
357 {
358 if (error_logger)
359 error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
360 spec1->numbered[i].number, pretty_msgstr);
361 err = true;
362 break;
363 }
364 else
365 i++;
366 }
367 else
368 j++, i++;
369 }
370 /* Check the argument types are the same. */
371 if (!err)
372 for (i = 0, j = 0; j < n2; )
373 {
374 if (spec1->numbered[i].number == spec2->numbered[j].number)
375 {
376 if (spec1->numbered[i].type != spec2->numbered[j].type)
377 {
378 if (error_logger)
379 error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
380 pretty_msgid, pretty_msgstr,
381 spec2->numbered[j].number);
382 err = true;
383 break;
384 }
385 j++, i++;
386 }
387 else
388 i++;
389 }
390 }
391
392 return err;
393 }
394
395
396 struct formatstring_parser formatstring_php =
397 {
398 format_parse,
399 format_free,
400 format_get_number_of_directives,
401 NULL,
402 format_check
403 };
404
405
406 #ifdef TEST
407
408 /* Test program: Print the argument list specification returned by
409 format_parse for strings read from standard input. */
410
411 #include <stdio.h>
412
413 static void
format_print(void * descr)414 format_print (void *descr)
415 {
416 struct spec *spec = (struct spec *) descr;
417 unsigned int last;
418 unsigned int i;
419
420 if (spec == NULL)
421 {
422 printf ("INVALID");
423 return;
424 }
425
426 printf ("(");
427 last = 1;
428 for (i = 0; i < spec->numbered_arg_count; i++)
429 {
430 unsigned int number = spec->numbered[i].number;
431
432 if (i > 0)
433 printf (" ");
434 if (number < last)
435 abort ();
436 for (; last < number; last++)
437 printf ("_ ");
438 switch (spec->numbered[i].type)
439 {
440 case FAT_INTEGER:
441 printf ("i");
442 break;
443 case FAT_FLOAT:
444 printf ("f");
445 break;
446 case FAT_CHARACTER:
447 printf ("c");
448 break;
449 case FAT_STRING:
450 printf ("s");
451 break;
452 default:
453 abort ();
454 }
455 last = number + 1;
456 }
457 printf (")");
458 }
459
460 int
main()461 main ()
462 {
463 for (;;)
464 {
465 char *line = NULL;
466 size_t line_size = 0;
467 int line_len;
468 char *invalid_reason;
469 void *descr;
470
471 line_len = getline (&line, &line_size, stdin);
472 if (line_len < 0)
473 break;
474 if (line_len > 0 && line[line_len - 1] == '\n')
475 line[--line_len] = '\0';
476
477 invalid_reason = NULL;
478 descr = format_parse (line, false, NULL, &invalid_reason);
479
480 format_print (descr);
481 printf ("\n");
482 if (descr == NULL)
483 printf ("%s\n", invalid_reason);
484
485 free (invalid_reason);
486 free (line);
487 }
488
489 return 0;
490 }
491
492 /*
493 * For Emacs M-x compile
494 * Local Variables:
495 * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-php.c ../gnulib-lib/libgettextlib.la"
496 * End:
497 */
498
499 #endif /* TEST */
500