1 /* Formatted output to strings.
2 Copyright (C) 1999-2000, 2002-2003, 2006-2012 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, see <http://www.gnu.org/licenses/>. */
16
17 /* This file can be parametrized with the following macros:
18 CHAR_T The element type of the format string.
19 CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
20 in the format string are ASCII.
21 DIRECTIVE Structure denoting a format directive.
22 Depends on CHAR_T.
23 DIRECTIVES Structure denoting the set of format directives of a
24 format string. Depends on CHAR_T.
25 PRINTF_PARSE Function that parses a format string.
26 Depends on CHAR_T.
27 STATIC Set to 'static' to declare the function static.
28 ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
29
30 #ifndef PRINTF_PARSE
31 # include <config.h>
32 #endif
33
34 /* Specification. */
35 #ifndef PRINTF_PARSE
36 # include "printf-parse.h"
37 #endif
38
39 /* Default parameters. */
40 #ifndef PRINTF_PARSE
41 # define PRINTF_PARSE printf_parse
42 # define CHAR_T char
43 # define DIRECTIVE char_directive
44 # define DIRECTIVES char_directives
45 #endif
46
47 /* Get size_t, NULL. */
48 #include <stddef.h>
49
50 /* Get intmax_t. */
51 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
52 # if HAVE_STDINT_H_WITH_UINTMAX
53 # include <stdint.h>
54 # endif
55 # if HAVE_INTTYPES_H_WITH_UINTMAX
56 # include <inttypes.h>
57 # endif
58 #else
59 # include <stdint.h>
60 #endif
61
62 /* malloc(), realloc(), free(). */
63 #include <stdlib.h>
64
65 /* memcpy(). */
66 #include <string.h>
67
68 /* errno. */
69 #include <errno.h>
70
71 /* Checked size_t computations. */
72 #include "xsize.h"
73
74 #if CHAR_T_ONLY_ASCII
75 /* c_isascii(). */
76 # include "c-ctype.h"
77 #endif
78
79 #ifdef STATIC
80 STATIC
81 #endif
82 int
PRINTF_PARSE(const CHAR_T * format,DIRECTIVES * d,arguments * a)83 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
84 {
85 const CHAR_T *cp = format; /* pointer into format */
86 size_t arg_posn = 0; /* number of regular arguments consumed */
87 size_t d_allocated; /* allocated elements of d->dir */
88 size_t a_allocated; /* allocated elements of a->arg */
89 size_t max_width_length = 0;
90 size_t max_precision_length = 0;
91
92 d->count = 0;
93 d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
94 d->dir = d->direct_alloc_dir;
95
96 a->count = 0;
97 a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
98 a->arg = a->direct_alloc_arg;
99
100 #define REGISTER_ARG(_index_,_type_) \
101 { \
102 size_t n = (_index_); \
103 if (n >= a_allocated) \
104 { \
105 size_t memory_size; \
106 argument *memory; \
107 \
108 a_allocated = xtimes (a_allocated, 2); \
109 if (a_allocated <= n) \
110 a_allocated = xsum (n, 1); \
111 memory_size = xtimes (a_allocated, sizeof (argument)); \
112 if (size_overflow_p (memory_size)) \
113 /* Overflow, would lead to out of memory. */ \
114 goto out_of_memory; \
115 memory = (argument *) (a->arg != a->direct_alloc_arg \
116 ? realloc (a->arg, memory_size) \
117 : malloc (memory_size)); \
118 if (memory == NULL) \
119 /* Out of memory. */ \
120 goto out_of_memory; \
121 if (a->arg == a->direct_alloc_arg) \
122 memcpy (memory, a->arg, a->count * sizeof (argument)); \
123 a->arg = memory; \
124 } \
125 while (a->count <= n) \
126 a->arg[a->count++].type = TYPE_NONE; \
127 if (a->arg[n].type == TYPE_NONE) \
128 a->arg[n].type = (_type_); \
129 else if (a->arg[n].type != (_type_)) \
130 /* Ambiguous type for positional argument. */ \
131 goto error; \
132 }
133
134 while (*cp != '\0')
135 {
136 CHAR_T c = *cp++;
137 if (c == '%')
138 {
139 size_t arg_index = ARG_NONE;
140 DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
141
142 /* Initialize the next directive. */
143 dp->dir_start = cp - 1;
144 dp->flags = 0;
145 dp->width_start = NULL;
146 dp->width_end = NULL;
147 dp->width_arg_index = ARG_NONE;
148 dp->precision_start = NULL;
149 dp->precision_end = NULL;
150 dp->precision_arg_index = ARG_NONE;
151 dp->arg_index = ARG_NONE;
152
153 /* Test for positional argument. */
154 if (*cp >= '0' && *cp <= '9')
155 {
156 const CHAR_T *np;
157
158 for (np = cp; *np >= '0' && *np <= '9'; np++)
159 ;
160 if (*np == '$')
161 {
162 size_t n = 0;
163
164 for (np = cp; *np >= '0' && *np <= '9'; np++)
165 n = xsum (xtimes (n, 10), *np - '0');
166 if (n == 0)
167 /* Positional argument 0. */
168 goto error;
169 if (size_overflow_p (n))
170 /* n too large, would lead to out of memory later. */
171 goto error;
172 arg_index = n - 1;
173 cp = np + 1;
174 }
175 }
176
177 /* Read the flags. */
178 for (;;)
179 {
180 if (*cp == '\'')
181 {
182 dp->flags |= FLAG_GROUP;
183 cp++;
184 }
185 else if (*cp == '-')
186 {
187 dp->flags |= FLAG_LEFT;
188 cp++;
189 }
190 else if (*cp == '+')
191 {
192 dp->flags |= FLAG_SHOWSIGN;
193 cp++;
194 }
195 else if (*cp == ' ')
196 {
197 dp->flags |= FLAG_SPACE;
198 cp++;
199 }
200 else if (*cp == '#')
201 {
202 dp->flags |= FLAG_ALT;
203 cp++;
204 }
205 else if (*cp == '0')
206 {
207 dp->flags |= FLAG_ZERO;
208 cp++;
209 }
210 #if __GLIBC__ >= 2 && !defined __UCLIBC__
211 else if (*cp == 'I')
212 {
213 dp->flags |= FLAG_LOCALIZED;
214 cp++;
215 }
216 #endif
217 else
218 break;
219 }
220
221 /* Parse the field width. */
222 if (*cp == '*')
223 {
224 dp->width_start = cp;
225 cp++;
226 dp->width_end = cp;
227 if (max_width_length < 1)
228 max_width_length = 1;
229
230 /* Test for positional argument. */
231 if (*cp >= '0' && *cp <= '9')
232 {
233 const CHAR_T *np;
234
235 for (np = cp; *np >= '0' && *np <= '9'; np++)
236 ;
237 if (*np == '$')
238 {
239 size_t n = 0;
240
241 for (np = cp; *np >= '0' && *np <= '9'; np++)
242 n = xsum (xtimes (n, 10), *np - '0');
243 if (n == 0)
244 /* Positional argument 0. */
245 goto error;
246 if (size_overflow_p (n))
247 /* n too large, would lead to out of memory later. */
248 goto error;
249 dp->width_arg_index = n - 1;
250 cp = np + 1;
251 }
252 }
253 if (dp->width_arg_index == ARG_NONE)
254 {
255 dp->width_arg_index = arg_posn++;
256 if (dp->width_arg_index == ARG_NONE)
257 /* arg_posn wrapped around. */
258 goto error;
259 }
260 REGISTER_ARG (dp->width_arg_index, TYPE_INT);
261 }
262 else if (*cp >= '0' && *cp <= '9')
263 {
264 size_t width_length;
265
266 dp->width_start = cp;
267 for (; *cp >= '0' && *cp <= '9'; cp++)
268 ;
269 dp->width_end = cp;
270 width_length = dp->width_end - dp->width_start;
271 if (max_width_length < width_length)
272 max_width_length = width_length;
273 }
274
275 /* Parse the precision. */
276 if (*cp == '.')
277 {
278 cp++;
279 if (*cp == '*')
280 {
281 dp->precision_start = cp - 1;
282 cp++;
283 dp->precision_end = cp;
284 if (max_precision_length < 2)
285 max_precision_length = 2;
286
287 /* Test for positional argument. */
288 if (*cp >= '0' && *cp <= '9')
289 {
290 const CHAR_T *np;
291
292 for (np = cp; *np >= '0' && *np <= '9'; np++)
293 ;
294 if (*np == '$')
295 {
296 size_t n = 0;
297
298 for (np = cp; *np >= '0' && *np <= '9'; np++)
299 n = xsum (xtimes (n, 10), *np - '0');
300 if (n == 0)
301 /* Positional argument 0. */
302 goto error;
303 if (size_overflow_p (n))
304 /* n too large, would lead to out of memory
305 later. */
306 goto error;
307 dp->precision_arg_index = n - 1;
308 cp = np + 1;
309 }
310 }
311 if (dp->precision_arg_index == ARG_NONE)
312 {
313 dp->precision_arg_index = arg_posn++;
314 if (dp->precision_arg_index == ARG_NONE)
315 /* arg_posn wrapped around. */
316 goto error;
317 }
318 REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
319 }
320 else
321 {
322 size_t precision_length;
323
324 dp->precision_start = cp - 1;
325 for (; *cp >= '0' && *cp <= '9'; cp++)
326 ;
327 dp->precision_end = cp;
328 precision_length = dp->precision_end - dp->precision_start;
329 if (max_precision_length < precision_length)
330 max_precision_length = precision_length;
331 }
332 }
333
334 {
335 arg_type type;
336
337 /* Parse argument type/size specifiers. */
338 {
339 int flags = 0;
340
341 for (;;)
342 {
343 if (*cp == 'h')
344 {
345 flags |= (1 << (flags & 1));
346 cp++;
347 }
348 else if (*cp == 'L')
349 {
350 flags |= 4;
351 cp++;
352 }
353 else if (*cp == 'l')
354 {
355 flags += 8;
356 cp++;
357 }
358 else if (*cp == 'j')
359 {
360 if (sizeof (intmax_t) > sizeof (long))
361 {
362 /* intmax_t = long long */
363 flags += 16;
364 }
365 else if (sizeof (intmax_t) > sizeof (int))
366 {
367 /* intmax_t = long */
368 flags += 8;
369 }
370 cp++;
371 }
372 else if (*cp == 'z' || *cp == 'Z')
373 {
374 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
375 because the warning facility in gcc-2.95.2 understands
376 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
377 if (sizeof (size_t) > sizeof (long))
378 {
379 /* size_t = long long */
380 flags += 16;
381 }
382 else if (sizeof (size_t) > sizeof (int))
383 {
384 /* size_t = long */
385 flags += 8;
386 }
387 cp++;
388 }
389 else if (*cp == 't')
390 {
391 if (sizeof (ptrdiff_t) > sizeof (long))
392 {
393 /* ptrdiff_t = long long */
394 flags += 16;
395 }
396 else if (sizeof (ptrdiff_t) > sizeof (int))
397 {
398 /* ptrdiff_t = long */
399 flags += 8;
400 }
401 cp++;
402 }
403 #if defined __APPLE__ && defined __MACH__
404 /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
405 We cannot change it to "lld" because PRIdMAX must also
406 be understood by the system's printf routines. */
407 else if (*cp == 'q')
408 {
409 if (64 / 8 > sizeof (long))
410 {
411 /* int64_t = long long */
412 flags += 16;
413 }
414 else
415 {
416 /* int64_t = long */
417 flags += 8;
418 }
419 cp++;
420 }
421 #endif
422 #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
423 /* On native Windows, PRIdMAX is defined as "I64d".
424 We cannot change it to "lld" because PRIdMAX must also
425 be understood by the system's printf routines. */
426 else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
427 {
428 if (64 / 8 > sizeof (long))
429 {
430 /* __int64 = long long */
431 flags += 16;
432 }
433 else
434 {
435 /* __int64 = long */
436 flags += 8;
437 }
438 cp += 3;
439 }
440 #endif
441 else
442 break;
443 }
444
445 /* Read the conversion character. */
446 c = *cp++;
447 switch (c)
448 {
449 case 'd': case 'i':
450 #if HAVE_LONG_LONG_INT
451 /* If 'long long' exists and is larger than 'long': */
452 if (flags >= 16 || (flags & 4))
453 type = TYPE_LONGLONGINT;
454 else
455 #endif
456 /* If 'long long' exists and is the same as 'long', we parse
457 "lld" into TYPE_LONGINT. */
458 if (flags >= 8)
459 type = TYPE_LONGINT;
460 else if (flags & 2)
461 type = TYPE_SCHAR;
462 else if (flags & 1)
463 type = TYPE_SHORT;
464 else
465 type = TYPE_INT;
466 break;
467 case 'o': case 'u': case 'x': case 'X':
468 #if HAVE_LONG_LONG_INT
469 /* If 'long long' exists and is larger than 'long': */
470 if (flags >= 16 || (flags & 4))
471 type = TYPE_ULONGLONGINT;
472 else
473 #endif
474 /* If 'unsigned long long' exists and is the same as
475 'unsigned long', we parse "llu" into TYPE_ULONGINT. */
476 if (flags >= 8)
477 type = TYPE_ULONGINT;
478 else if (flags & 2)
479 type = TYPE_UCHAR;
480 else if (flags & 1)
481 type = TYPE_USHORT;
482 else
483 type = TYPE_UINT;
484 break;
485 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
486 case 'a': case 'A':
487 if (flags >= 16 || (flags & 4))
488 type = TYPE_LONGDOUBLE;
489 else
490 type = TYPE_DOUBLE;
491 break;
492 case 'c':
493 if (flags >= 8)
494 #if HAVE_WINT_T
495 type = TYPE_WIDE_CHAR;
496 #else
497 goto error;
498 #endif
499 else
500 type = TYPE_CHAR;
501 break;
502 #if HAVE_WINT_T
503 case 'C':
504 type = TYPE_WIDE_CHAR;
505 c = 'c';
506 break;
507 #endif
508 case 's':
509 if (flags >= 8)
510 #if HAVE_WCHAR_T
511 type = TYPE_WIDE_STRING;
512 #else
513 goto error;
514 #endif
515 else
516 type = TYPE_STRING;
517 break;
518 #if HAVE_WCHAR_T
519 case 'S':
520 type = TYPE_WIDE_STRING;
521 c = 's';
522 break;
523 #endif
524 case 'p':
525 type = TYPE_POINTER;
526 break;
527 case 'n':
528 #if HAVE_LONG_LONG_INT
529 /* If 'long long' exists and is larger than 'long': */
530 if (flags >= 16 || (flags & 4))
531 type = TYPE_COUNT_LONGLONGINT_POINTER;
532 else
533 #endif
534 /* If 'long long' exists and is the same as 'long', we parse
535 "lln" into TYPE_COUNT_LONGINT_POINTER. */
536 if (flags >= 8)
537 type = TYPE_COUNT_LONGINT_POINTER;
538 else if (flags & 2)
539 type = TYPE_COUNT_SCHAR_POINTER;
540 else if (flags & 1)
541 type = TYPE_COUNT_SHORT_POINTER;
542 else
543 type = TYPE_COUNT_INT_POINTER;
544 break;
545 #if ENABLE_UNISTDIO
546 /* The unistdio extensions. */
547 case 'U':
548 if (flags >= 16)
549 type = TYPE_U32_STRING;
550 else if (flags >= 8)
551 type = TYPE_U16_STRING;
552 else
553 type = TYPE_U8_STRING;
554 break;
555 #endif
556 case '%':
557 type = TYPE_NONE;
558 break;
559 default:
560 /* Unknown conversion character. */
561 goto error;
562 }
563 }
564
565 if (type != TYPE_NONE)
566 {
567 dp->arg_index = arg_index;
568 if (dp->arg_index == ARG_NONE)
569 {
570 dp->arg_index = arg_posn++;
571 if (dp->arg_index == ARG_NONE)
572 /* arg_posn wrapped around. */
573 goto error;
574 }
575 REGISTER_ARG (dp->arg_index, type);
576 }
577 dp->conversion = c;
578 dp->dir_end = cp;
579 }
580
581 d->count++;
582 if (d->count >= d_allocated)
583 {
584 size_t memory_size;
585 DIRECTIVE *memory;
586
587 d_allocated = xtimes (d_allocated, 2);
588 memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
589 if (size_overflow_p (memory_size))
590 /* Overflow, would lead to out of memory. */
591 goto out_of_memory;
592 memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
593 ? realloc (d->dir, memory_size)
594 : malloc (memory_size));
595 if (memory == NULL)
596 /* Out of memory. */
597 goto out_of_memory;
598 if (d->dir == d->direct_alloc_dir)
599 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
600 d->dir = memory;
601 }
602 }
603 #if CHAR_T_ONLY_ASCII
604 else if (!c_isascii (c))
605 {
606 /* Non-ASCII character. Not supported. */
607 goto error;
608 }
609 #endif
610 }
611 d->dir[d->count].dir_start = cp;
612
613 d->max_width_length = max_width_length;
614 d->max_precision_length = max_precision_length;
615 return 0;
616
617 error:
618 if (a->arg != a->direct_alloc_arg)
619 free (a->arg);
620 if (d->dir != d->direct_alloc_dir)
621 free (d->dir);
622 errno = EINVAL;
623 return -1;
624
625 out_of_memory:
626 if (a->arg != a->direct_alloc_arg)
627 free (a->arg);
628 if (d->dir != d->direct_alloc_dir)
629 free (d->dir);
630 errno = ENOMEM;
631 return -1;
632 }
633
634 #undef PRINTF_PARSE
635 #undef DIRECTIVES
636 #undef DIRECTIVE
637 #undef CHAR_T_ONLY_ASCII
638 #undef CHAR_T
639