• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* PHP format strings.
2    Copyright (C) 2001-2004, 2006-2007, 2009, 2019-2020 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2002.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 #include <stdbool.h>
23 #include <stdlib.h>
24 
25 #include "format.h"
26 #include "c-ctype.h"
27 #include "xalloc.h"
28 #include "xvasprintf.h"
29 #include "format-invalid.h"
30 #include "gettext.h"
31 
32 #define _(str) gettext (str)
33 
34 /* PHP format strings are described in phpdoc-4.0.6, file
35    phpdoc/manual/function.sprintf.html, and are implemented in
36    php-4.1.0/ext/standard/formatted_print.c.
37    A directive
38    - starts with '%' or '%m$' where m is a positive integer,
39    - is optionally followed by any of the characters '0', '-', ' ', or
40      "'<anychar>", each of which acts as a flag,
41    - is optionally followed by a width specification: a nonempty digit
42      sequence,
43    - is optionally followed by '.' and a precision specification: a nonempty
44      digit sequence,
45    - is optionally followed by a size specifier 'l', which is ignored,
46    - is finished by a specifier
47        - 's', that needs a string argument,
48        - 'b', 'd', 'u', 'o', 'x', 'X', that need an integer argument,
49        - 'e', 'f', that need a floating-point argument,
50        - 'c', that needs a character argument.
51    Additionally there is the directive '%%', which takes no argument.
52    Numbered and unnumbered argument specifications can be used in the same
53    string.  Numbered argument specifications have no influence on the
54    "current argument index", that is incremented each time an argument is read.
55  */
56 
57 enum format_arg_type
58 {
59   FAT_INTEGER,
60   FAT_FLOAT,
61   FAT_CHARACTER,
62   FAT_STRING
63 };
64 
65 struct numbered_arg
66 {
67   unsigned int number;
68   enum format_arg_type type;
69 };
70 
71 struct spec
72 {
73   unsigned int directives;
74   unsigned int numbered_arg_count;
75   struct numbered_arg *numbered;
76 };
77 
78 /* Locale independent test for a decimal digit.
79    Argument can be  'char' or 'unsigned char'.  (Whereas the argument of
80    <ctype.h> isdigit must be an 'unsigned char'.)  */
81 #undef isdigit
82 #define isdigit(c) ((unsigned int) ((c) - '0') < 10)
83 
84 
85 static int
numbered_arg_compare(const void * p1,const void * p2)86 numbered_arg_compare (const void *p1, const void *p2)
87 {
88   unsigned int n1 = ((const struct numbered_arg *) p1)->number;
89   unsigned int n2 = ((const struct numbered_arg *) p2)->number;
90 
91   return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
92 }
93 
94 static void *
format_parse(const char * format,bool translated,char * fdi,char ** invalid_reason)95 format_parse (const char *format, bool translated, char *fdi,
96               char **invalid_reason)
97 {
98   const char *const format_start = format;
99   unsigned int directives;
100   unsigned int numbered_arg_count;
101   struct numbered_arg *numbered;
102   unsigned int numbered_allocated;
103   unsigned int unnumbered_arg_count;
104   struct spec *result;
105 
106   directives = 0;
107   numbered_arg_count = 0;
108   numbered = NULL;
109   numbered_allocated = 0;
110   unnumbered_arg_count = 0;
111 
112   for (; *format != '\0';)
113     if (*format++ == '%')
114       {
115         /* A directive.  */
116         FDI_SET (format - 1, FMTDIR_START);
117         directives++;
118 
119         if (*format != '%')
120           {
121             /* A complex directive.  */
122             unsigned int number;
123             enum format_arg_type type;
124 
125             number = ++unnumbered_arg_count;
126             if (isdigit (*format))
127               {
128                 const char *f = format;
129                 unsigned int m = 0;
130 
131                 do
132                   {
133                     m = 10 * m + (*f - '0');
134                     f++;
135                   }
136                 while (isdigit (*f));
137 
138                 if (*f == '$')
139                   {
140                     if (m == 0)
141                       {
142                         *invalid_reason = INVALID_ARGNO_0 (directives);
143                         FDI_SET (f, FMTDIR_ERROR);
144                         goto bad_format;
145                       }
146                     number = m;
147                     format = ++f;
148                     --unnumbered_arg_count;
149                   }
150               }
151 
152             /* Parse flags.  */
153             for (;;)
154               {
155                 if (*format == '0' || *format == '-' || *format == ' ')
156                   format++;
157                 else if (*format == '\'')
158                   {
159                     format++;
160                     if (*format == '\0')
161                       {
162                         *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
163                         FDI_SET (format - 1, FMTDIR_ERROR);
164                         goto bad_format;
165                       }
166                     format++;
167                   }
168                 else
169                   break;
170               }
171 
172             /* Parse width.  */
173             if (isdigit (*format))
174               {
175                 do
176                   format++;
177                 while (isdigit (*format));
178               }
179 
180             /* Parse precision.  */
181             if (*format == '.')
182               {
183                 format++;
184 
185                 if (isdigit (*format))
186                   {
187                     do
188                       format++;
189                     while (isdigit (*format));
190                   }
191                 else
192                   --format;     /* will jump to bad_format */
193               }
194 
195             /* Parse size.  */
196             if (*format == 'l')
197               format++;
198 
199             switch (*format)
200               {
201               case 'b': case 'd': case 'u': case 'o': case 'x': case 'X':
202                 type = FAT_INTEGER;
203                 break;
204               case 'e': case 'f':
205                 type = FAT_FLOAT;
206                 break;
207               case 'c':
208                 type = FAT_CHARACTER;
209                 break;
210               case 's':
211                 type = FAT_STRING;
212                 break;
213               default:
214                 if (*format == '\0')
215                   {
216                     *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
217                     FDI_SET (format - 1, FMTDIR_ERROR);
218                   }
219                 else
220                   {
221                     *invalid_reason =
222                       INVALID_CONVERSION_SPECIFIER (directives, *format);
223                     FDI_SET (format, FMTDIR_ERROR);
224                   }
225                 goto bad_format;
226               }
227 
228             if (numbered_allocated == numbered_arg_count)
229               {
230                 numbered_allocated = 2 * numbered_allocated + 1;
231                 numbered = (struct numbered_arg *) xrealloc (numbered, numbered_allocated * sizeof (struct numbered_arg));
232               }
233             numbered[numbered_arg_count].number = number;
234             numbered[numbered_arg_count].type = type;
235             numbered_arg_count++;
236           }
237 
238         FDI_SET (format, FMTDIR_END);
239 
240         format++;
241       }
242 
243   /* Sort the numbered argument array, and eliminate duplicates.  */
244   if (numbered_arg_count > 1)
245     {
246       unsigned int i, j;
247       bool err;
248 
249       qsort (numbered, numbered_arg_count,
250              sizeof (struct numbered_arg), numbered_arg_compare);
251 
252       /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
253       err = false;
254       for (i = j = 0; i < numbered_arg_count; i++)
255         if (j > 0 && numbered[i].number == numbered[j-1].number)
256           {
257             enum format_arg_type type1 = numbered[i].type;
258             enum format_arg_type type2 = numbered[j-1].type;
259             enum format_arg_type type_both;
260 
261             if (type1 == type2)
262               type_both = type1;
263             else
264               {
265                 /* Incompatible types.  */
266                 type_both = type1;
267                 if (!err)
268                   *invalid_reason =
269                     INVALID_INCOMPATIBLE_ARG_TYPES (numbered[i].number);
270                 err = true;
271               }
272 
273             numbered[j-1].type = type_both;
274           }
275         else
276           {
277             if (j < i)
278               {
279                 numbered[j].number = numbered[i].number;
280                 numbered[j].type = numbered[i].type;
281               }
282             j++;
283           }
284       numbered_arg_count = j;
285       if (err)
286         /* *invalid_reason has already been set above.  */
287         goto bad_format;
288     }
289 
290   result = XMALLOC (struct spec);
291   result->directives = directives;
292   result->numbered_arg_count = numbered_arg_count;
293   result->numbered = numbered;
294   return result;
295 
296  bad_format:
297   if (numbered != NULL)
298     free (numbered);
299   return NULL;
300 }
301 
302 static void
format_free(void * descr)303 format_free (void *descr)
304 {
305   struct spec *spec = (struct spec *) descr;
306 
307   if (spec->numbered != NULL)
308     free (spec->numbered);
309   free (spec);
310 }
311 
312 static int
format_get_number_of_directives(void * descr)313 format_get_number_of_directives (void *descr)
314 {
315   struct spec *spec = (struct spec *) descr;
316 
317   return spec->directives;
318 }
319 
320 static bool
format_check(void * msgid_descr,void * msgstr_descr,bool equality,formatstring_error_logger_t error_logger,const char * pretty_msgid,const char * pretty_msgstr)321 format_check (void *msgid_descr, void *msgstr_descr, bool equality,
322               formatstring_error_logger_t error_logger,
323               const char *pretty_msgid, const char *pretty_msgstr)
324 {
325   struct spec *spec1 = (struct spec *) msgid_descr;
326   struct spec *spec2 = (struct spec *) msgstr_descr;
327   bool err = false;
328 
329   if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
330     {
331       unsigned int i, j;
332       unsigned int n1 = spec1->numbered_arg_count;
333       unsigned int n2 = spec2->numbered_arg_count;
334 
335       /* Check the argument names are the same.
336          Both arrays are sorted.  We search for the first difference.  */
337       for (i = 0, j = 0; i < n1 || j < n2; )
338         {
339           int cmp = (i >= n1 ? 1 :
340                      j >= n2 ? -1 :
341                      spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
342                      spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
343                      0);
344 
345           if (cmp > 0)
346             {
347               if (error_logger)
348                 error_logger (_("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
349                               spec2->numbered[j].number, pretty_msgstr,
350                               pretty_msgid);
351               err = true;
352               break;
353             }
354           else if (cmp < 0)
355             {
356               if (equality)
357                 {
358                   if (error_logger)
359                     error_logger (_("a format specification for argument %u doesn't exist in '%s'"),
360                                   spec1->numbered[i].number, pretty_msgstr);
361                   err = true;
362                   break;
363                 }
364               else
365                 i++;
366             }
367           else
368             j++, i++;
369         }
370       /* Check the argument types are the same.  */
371       if (!err)
372         for (i = 0, j = 0; j < n2; )
373           {
374             if (spec1->numbered[i].number == spec2->numbered[j].number)
375               {
376                 if (spec1->numbered[i].type != spec2->numbered[j].type)
377                   {
378                     if (error_logger)
379                       error_logger (_("format specifications in '%s' and '%s' for argument %u are not the same"),
380                                     pretty_msgid, pretty_msgstr,
381                                     spec2->numbered[j].number);
382                     err = true;
383                     break;
384                   }
385                 j++, i++;
386               }
387             else
388               i++;
389           }
390     }
391 
392   return err;
393 }
394 
395 
396 struct formatstring_parser formatstring_php =
397 {
398   format_parse,
399   format_free,
400   format_get_number_of_directives,
401   NULL,
402   format_check
403 };
404 
405 
406 #ifdef TEST
407 
408 /* Test program: Print the argument list specification returned by
409    format_parse for strings read from standard input.  */
410 
411 #include <stdio.h>
412 
413 static void
format_print(void * descr)414 format_print (void *descr)
415 {
416   struct spec *spec = (struct spec *) descr;
417   unsigned int last;
418   unsigned int i;
419 
420   if (spec == NULL)
421     {
422       printf ("INVALID");
423       return;
424     }
425 
426   printf ("(");
427   last = 1;
428   for (i = 0; i < spec->numbered_arg_count; i++)
429     {
430       unsigned int number = spec->numbered[i].number;
431 
432       if (i > 0)
433         printf (" ");
434       if (number < last)
435         abort ();
436       for (; last < number; last++)
437         printf ("_ ");
438       switch (spec->numbered[i].type)
439         {
440         case FAT_INTEGER:
441           printf ("i");
442           break;
443         case FAT_FLOAT:
444           printf ("f");
445           break;
446         case FAT_CHARACTER:
447           printf ("c");
448           break;
449         case FAT_STRING:
450           printf ("s");
451           break;
452         default:
453           abort ();
454         }
455       last = number + 1;
456     }
457   printf (")");
458 }
459 
460 int
main()461 main ()
462 {
463   for (;;)
464     {
465       char *line = NULL;
466       size_t line_size = 0;
467       int line_len;
468       char *invalid_reason;
469       void *descr;
470 
471       line_len = getline (&line, &line_size, stdin);
472       if (line_len < 0)
473         break;
474       if (line_len > 0 && line[line_len - 1] == '\n')
475         line[--line_len] = '\0';
476 
477       invalid_reason = NULL;
478       descr = format_parse (line, false, NULL, &invalid_reason);
479 
480       format_print (descr);
481       printf ("\n");
482       if (descr == NULL)
483         printf ("%s\n", invalid_reason);
484 
485       free (invalid_reason);
486       free (line);
487     }
488 
489   return 0;
490 }
491 
492 /*
493  * For Emacs M-x compile
494  * Local Variables:
495  * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-php.c ../gnulib-lib/libgettextlib.la"
496  * End:
497  */
498 
499 #endif /* TEST */
500