1 /* $OpenBSD: vfscanf.c,v 1.21 2006/01/13 21:33:28 millert Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <ctype.h>
35 #include <inttypes.h>
36 #include <stdarg.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "local.h"
41
42 #ifdef FLOATING_POINT
43 #include "floatio.h"
44 #endif
45
46 #define BUF 513 /* Maximum length of numeric string. */
47
48 /*
49 * Flags used during conversion.
50 */
51 #define LONG 0x00001 /* l: long or double */
52 #define LONGDBL 0x00002 /* L: long double; unimplemented */
53 #define SHORT 0x00004 /* h: short */
54 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */
55 #define LLONG 0x00010 /* ll: long long (+ deprecated q: quad) */
56 #define POINTER 0x00020 /* p: void * (as hex) */
57 #define SIZEINT 0x00040 /* z: (signed) size_t */
58 #define MAXINT 0x00080 /* j: intmax_t */
59 #define PTRINT 0x00100 /* t: ptrdiff_t */
60 #define NOSKIP 0x00200 /* [ or c: do not skip blanks */
61 #define SUPPRESS 0x00400 /* *: suppress assignment */
62 #define UNSIGNED 0x00800 /* %[oupxX] conversions */
63
64 /*
65 * The following are used in numeric conversions only:
66 * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
67 * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
68 */
69 #define SIGNOK 0x01000 /* +/- is (still) legal */
70 #define HAVESIGN 0x02000 /* sign detected */
71 #define NDIGITS 0x04000 /* no digits detected */
72
73 #define DPTOK 0x08000 /* (float) decimal point is still legal */
74 #define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */
75
76 #define PFXOK 0x08000 /* 0x prefix is (still) legal */
77 #define NZDIGITS 0x10000 /* no zero digits detected */
78
79 /*
80 * Conversion types.
81 */
82 #define CT_CHAR 0 /* %c conversion */
83 #define CT_CCL 1 /* %[...] conversion */
84 #define CT_STRING 2 /* %s conversion */
85 #define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
86 #define CT_FLOAT 4 /* floating, i.e., strtod */
87
88 #define u_char unsigned char
89 #define u_long unsigned long
90
91 static u_char *__sccl(char *, u_char *);
92
93 #if !defined(VFSCANF)
94 #define VFSCANF vfscanf
95 #endif
96
97 /*
98 * vfscanf
99 */
100 int
VFSCANF(FILE * fp,const char * fmt0,__va_list ap)101 VFSCANF(FILE *fp, const char *fmt0, __va_list ap)
102 {
103 u_char *fmt = (u_char *)fmt0;
104 int c; /* character from format, or conversion */
105 size_t width; /* field width, or 0 */
106 char *p; /* points into all kinds of strings */
107 int n; /* handy integer */
108 int flags; /* flags as defined above */
109 char *p0; /* saves original value of p when necessary */
110 int nassigned; /* number of fields assigned */
111 int nread; /* number of characters consumed from fp */
112 int base; /* base argument to strtoimax/strtouimax */
113 char ccltab[256]; /* character class table for %[...] */
114 char buf[BUF]; /* buffer for numeric conversions */
115
116 /* `basefix' is used to avoid `if' tests in the integer scanner */
117 static short basefix[17] =
118 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
119
120 _SET_ORIENTATION(fp, -1);
121
122 nassigned = 0;
123 nread = 0;
124 base = 0; /* XXX just to keep gcc happy */
125 for (;;) {
126 c = *fmt++;
127 if (c == 0)
128 return (nassigned);
129 if (isspace(c)) {
130 while ((fp->_r > 0 || __srefill(fp) == 0) &&
131 isspace(*fp->_p))
132 nread++, fp->_r--, fp->_p++;
133 continue;
134 }
135 if (c != '%')
136 goto literal;
137 width = 0;
138 flags = 0;
139 /*
140 * switch on the format. continue if done;
141 * break once format type is derived.
142 */
143 again: c = *fmt++;
144 switch (c) {
145 case '%':
146 literal:
147 if (fp->_r <= 0 && __srefill(fp))
148 goto input_failure;
149 if (*fp->_p != c)
150 goto match_failure;
151 fp->_r--, fp->_p++;
152 nread++;
153 continue;
154
155 case '*':
156 flags |= SUPPRESS;
157 goto again;
158 case 'j':
159 flags |= MAXINT;
160 goto again;
161 case 'L':
162 flags |= LONGDBL;
163 goto again;
164 case 'h':
165 if (*fmt == 'h') {
166 fmt++;
167 flags |= SHORTSHORT;
168 } else {
169 flags |= SHORT;
170 }
171 goto again;
172 case 'l':
173 if (*fmt == 'l') {
174 fmt++;
175 flags |= LLONG;
176 } else {
177 flags |= LONG;
178 }
179 goto again;
180 case 'q':
181 flags |= LLONG; /* deprecated */
182 goto again;
183 case 't':
184 flags |= PTRINT;
185 goto again;
186 case 'z':
187 flags |= SIZEINT;
188 goto again;
189
190 case '0': case '1': case '2': case '3': case '4':
191 case '5': case '6': case '7': case '8': case '9':
192 width = width * 10 + c - '0';
193 goto again;
194
195 /*
196 * Conversions.
197 * Those marked `compat' are for 4.[123]BSD compatibility.
198 *
199 * (According to ANSI, E and X formats are supposed
200 * to the same as e and x. Sorry about that.)
201 */
202 case 'D': /* compat */
203 flags |= LONG;
204 /* FALLTHROUGH */
205 case 'd':
206 c = CT_INT;
207 base = 10;
208 break;
209
210 case 'i':
211 c = CT_INT;
212 base = 0;
213 break;
214
215 case 'O': /* compat */
216 flags |= LONG;
217 /* FALLTHROUGH */
218 case 'o':
219 c = CT_INT;
220 flags |= UNSIGNED;
221 base = 8;
222 break;
223
224 case 'u':
225 c = CT_INT;
226 flags |= UNSIGNED;
227 base = 10;
228 break;
229
230 case 'X':
231 case 'x':
232 flags |= PFXOK; /* enable 0x prefixing */
233 c = CT_INT;
234 flags |= UNSIGNED;
235 base = 16;
236 break;
237
238 #ifdef FLOATING_POINT
239 case 'E':
240 case 'G':
241 case 'e':
242 case 'f':
243 case 'g':
244 c = CT_FLOAT;
245 break;
246 #endif
247
248 case 's':
249 c = CT_STRING;
250 break;
251
252 case '[':
253 fmt = __sccl(ccltab, fmt);
254 flags |= NOSKIP;
255 c = CT_CCL;
256 break;
257
258 case 'c':
259 flags |= NOSKIP;
260 c = CT_CHAR;
261 break;
262
263 case 'p': /* pointer format is like hex */
264 flags |= POINTER | PFXOK;
265 c = CT_INT;
266 flags |= UNSIGNED;
267 base = 16;
268 break;
269
270 case 'n':
271 if (flags & SUPPRESS)
272 continue;
273 if (flags & SHORTSHORT)
274 *va_arg(ap, __signed char *) = nread;
275 else if (flags & SHORT)
276 *va_arg(ap, short *) = nread;
277 else if (flags & LONG)
278 *va_arg(ap, long *) = nread;
279 else if (flags & SIZEINT)
280 *va_arg(ap, ssize_t *) = nread;
281 else if (flags & PTRINT)
282 *va_arg(ap, ptrdiff_t *) = nread;
283 else if (flags & LLONG)
284 *va_arg(ap, long long *) = nread;
285 else if (flags & MAXINT)
286 *va_arg(ap, intmax_t *) = nread;
287 else
288 *va_arg(ap, int *) = nread;
289 continue;
290
291 /*
292 * Disgusting backwards compatibility hacks. XXX
293 */
294 case '\0': /* compat */
295 return (EOF);
296
297 default: /* compat */
298 if (isupper(c))
299 flags |= LONG;
300 c = CT_INT;
301 base = 10;
302 break;
303 }
304
305 /*
306 * We have a conversion that requires input.
307 */
308 if (fp->_r <= 0 && __srefill(fp))
309 goto input_failure;
310
311 /*
312 * Consume leading white space, except for formats
313 * that suppress this.
314 */
315 if ((flags & NOSKIP) == 0) {
316 while (isspace(*fp->_p)) {
317 nread++;
318 if (--fp->_r > 0)
319 fp->_p++;
320 else if (__srefill(fp))
321 goto input_failure;
322 }
323 /*
324 * Note that there is at least one character in
325 * the buffer, so conversions that do not set NOSKIP
326 * ca no longer result in an input failure.
327 */
328 }
329
330 /*
331 * Do the conversion.
332 */
333 switch (c) {
334
335 case CT_CHAR:
336 /* scan arbitrary characters (sets NOSKIP) */
337 if (width == 0)
338 width = 1;
339 if (flags & SUPPRESS) {
340 size_t sum = 0;
341 for (;;) {
342 if ((n = fp->_r) < (int)width) {
343 sum += n;
344 width -= n;
345 fp->_p += n;
346 if (__srefill(fp)) {
347 if (sum == 0)
348 goto input_failure;
349 break;
350 }
351 } else {
352 sum += width;
353 fp->_r -= width;
354 fp->_p += width;
355 break;
356 }
357 }
358 nread += sum;
359 } else {
360 size_t r = fread((void *)va_arg(ap, char *), 1,
361 width, fp);
362
363 if (r == 0)
364 goto input_failure;
365 nread += r;
366 nassigned++;
367 }
368 break;
369
370 case CT_CCL:
371 /* scan a (nonempty) character class (sets NOSKIP) */
372 if (width == 0)
373 width = (size_t)~0; /* `infinity' */
374 /* take only those things in the class */
375 if (flags & SUPPRESS) {
376 n = 0;
377 while (ccltab[*fp->_p]) {
378 n++, fp->_r--, fp->_p++;
379 if (--width == 0)
380 break;
381 if (fp->_r <= 0 && __srefill(fp)) {
382 if (n == 0)
383 goto input_failure;
384 break;
385 }
386 }
387 if (n == 0)
388 goto match_failure;
389 } else {
390 p0 = p = va_arg(ap, char *);
391 while (ccltab[*fp->_p]) {
392 fp->_r--;
393 *p++ = *fp->_p++;
394 if (--width == 0)
395 break;
396 if (fp->_r <= 0 && __srefill(fp)) {
397 if (p == p0)
398 goto input_failure;
399 break;
400 }
401 }
402 n = p - p0;
403 if (n == 0)
404 goto match_failure;
405 *p = '\0';
406 nassigned++;
407 }
408 nread += n;
409 break;
410
411 case CT_STRING:
412 /* like CCL, but zero-length string OK, & no NOSKIP */
413 if (width == 0)
414 width = (size_t)~0;
415 if (flags & SUPPRESS) {
416 n = 0;
417 while (!isspace(*fp->_p)) {
418 n++, fp->_r--, fp->_p++;
419 if (--width == 0)
420 break;
421 if (fp->_r <= 0 && __srefill(fp))
422 break;
423 }
424 nread += n;
425 } else {
426 p0 = p = va_arg(ap, char *);
427 while (!isspace(*fp->_p)) {
428 fp->_r--;
429 *p++ = *fp->_p++;
430 if (--width == 0)
431 break;
432 if (fp->_r <= 0 && __srefill(fp))
433 break;
434 }
435 *p = '\0';
436 nread += p - p0;
437 nassigned++;
438 }
439 continue;
440
441 case CT_INT:
442 /* scan an integer as if by strtoimax/strtoumax */
443 #ifdef hardway
444 if (width == 0 || width > sizeof(buf) - 1)
445 width = sizeof(buf) - 1;
446 #else
447 /* size_t is unsigned, hence this optimisation */
448 if (--width > sizeof(buf) - 2)
449 width = sizeof(buf) - 2;
450 width++;
451 #endif
452 flags |= SIGNOK | NDIGITS | NZDIGITS;
453 for (p = buf; width; width--) {
454 c = *fp->_p;
455 /*
456 * Switch on the character; `goto ok'
457 * if we accept it as a part of number.
458 */
459 switch (c) {
460
461 /*
462 * The digit 0 is always legal, but is
463 * special. For %i conversions, if no
464 * digits (zero or nonzero) have been
465 * scanned (only signs), we will have
466 * base==0. In that case, we should set
467 * it to 8 and enable 0x prefixing.
468 * Also, if we have not scanned zero digits
469 * before this, do not turn off prefixing
470 * (someone else will turn it off if we
471 * have scanned any nonzero digits).
472 */
473 case '0':
474 if (base == 0) {
475 base = 8;
476 flags |= PFXOK;
477 }
478 if (flags & NZDIGITS)
479 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
480 else
481 flags &= ~(SIGNOK|PFXOK|NDIGITS);
482 goto ok;
483
484 /* 1 through 7 always legal */
485 case '1': case '2': case '3':
486 case '4': case '5': case '6': case '7':
487 base = basefix[base];
488 flags &= ~(SIGNOK | PFXOK | NDIGITS);
489 goto ok;
490
491 /* digits 8 and 9 ok iff decimal or hex */
492 case '8': case '9':
493 base = basefix[base];
494 if (base <= 8)
495 break; /* not legal here */
496 flags &= ~(SIGNOK | PFXOK | NDIGITS);
497 goto ok;
498
499 /* letters ok iff hex */
500 case 'A': case 'B': case 'C':
501 case 'D': case 'E': case 'F':
502 case 'a': case 'b': case 'c':
503 case 'd': case 'e': case 'f':
504 /* no need to fix base here */
505 if (base <= 10)
506 break; /* not legal here */
507 flags &= ~(SIGNOK | PFXOK | NDIGITS);
508 goto ok;
509
510 /* sign ok only as first character */
511 case '+': case '-':
512 if (flags & SIGNOK) {
513 flags &= ~SIGNOK;
514 flags |= HAVESIGN;
515 goto ok;
516 }
517 break;
518
519 /*
520 * x ok iff flag still set and 2nd char (or
521 * 3rd char if we have a sign).
522 */
523 case 'x': case 'X':
524 if ((flags & PFXOK) && p ==
525 buf + 1 + !!(flags & HAVESIGN)) {
526 base = 16; /* if %i */
527 flags &= ~PFXOK;
528 goto ok;
529 }
530 break;
531 }
532
533 /*
534 * If we got here, c is not a legal character
535 * for a number. Stop accumulating digits.
536 */
537 break;
538 ok:
539 /*
540 * c is legal: store it and look at the next.
541 */
542 *p++ = c;
543 if (--fp->_r > 0)
544 fp->_p++;
545 else if (__srefill(fp))
546 break; /* EOF */
547 }
548 /*
549 * If we had only a sign, it is no good; push
550 * back the sign. If the number ends in `x',
551 * it was [sign] '0' 'x', so push back the x
552 * and treat it as [sign] '0'.
553 */
554 if (flags & NDIGITS) {
555 if (p > buf)
556 (void) ungetc(*(u_char *)--p, fp);
557 goto match_failure;
558 }
559 c = ((u_char *)p)[-1];
560 if (c == 'x' || c == 'X') {
561 --p;
562 (void) ungetc(c, fp);
563 }
564 if ((flags & SUPPRESS) == 0) {
565 uintmax_t res;
566
567 *p = '\0';
568 if (flags & UNSIGNED)
569 res = strtoumax(buf, NULL, base);
570 else
571 res = strtoimax(buf, NULL, base);
572 if (flags & POINTER)
573 *va_arg(ap, void **) =
574 (void *)(uintptr_t)res;
575 else if (flags & MAXINT)
576 *va_arg(ap, intmax_t *) = res;
577 else if (flags & LLONG)
578 *va_arg(ap, long long *) = res;
579 else if (flags & SIZEINT)
580 *va_arg(ap, ssize_t *) = res;
581 else if (flags & PTRINT)
582 *va_arg(ap, ptrdiff_t *) = res;
583 else if (flags & LONG)
584 *va_arg(ap, long *) = res;
585 else if (flags & SHORT)
586 *va_arg(ap, short *) = res;
587 else if (flags & SHORTSHORT)
588 *va_arg(ap, __signed char *) = res;
589 else
590 *va_arg(ap, int *) = res;
591 nassigned++;
592 }
593 nread += p - buf;
594 break;
595
596 #ifdef FLOATING_POINT
597 case CT_FLOAT:
598 /* scan a floating point number as if by strtod */
599 #ifdef hardway
600 if (width == 0 || width > sizeof(buf) - 1)
601 width = sizeof(buf) - 1;
602 #else
603 /* size_t is unsigned, hence this optimisation */
604 if (--width > sizeof(buf) - 2)
605 width = sizeof(buf) - 2;
606 width++;
607 #endif
608 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
609 for (p = buf; width; width--) {
610 c = *fp->_p;
611 /*
612 * This code mimicks the integer conversion
613 * code, but is much simpler.
614 */
615 switch (c) {
616
617 case '0': case '1': case '2': case '3':
618 case '4': case '5': case '6': case '7':
619 case '8': case '9':
620 flags &= ~(SIGNOK | NDIGITS);
621 goto fok;
622
623 case '+': case '-':
624 if (flags & SIGNOK) {
625 flags &= ~SIGNOK;
626 goto fok;
627 }
628 break;
629 case '.':
630 if (flags & DPTOK) {
631 flags &= ~(SIGNOK | DPTOK);
632 goto fok;
633 }
634 break;
635 case 'e': case 'E':
636 /* no exponent without some digits */
637 if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
638 flags =
639 (flags & ~(EXPOK|DPTOK)) |
640 SIGNOK | NDIGITS;
641 goto fok;
642 }
643 break;
644 }
645 break;
646 fok:
647 *p++ = c;
648 if (--fp->_r > 0)
649 fp->_p++;
650 else if (__srefill(fp))
651 break; /* EOF */
652 }
653 /*
654 * If no digits, might be missing exponent digits
655 * (just give back the exponent) or might be missing
656 * regular digits, but had sign and/or decimal point.
657 */
658 if (flags & NDIGITS) {
659 if (flags & EXPOK) {
660 /* no digits at all */
661 while (p > buf)
662 ungetc(*(u_char *)--p, fp);
663 goto match_failure;
664 }
665 /* just a bad exponent (e and maybe sign) */
666 c = *(u_char *)--p;
667 if (c != 'e' && c != 'E') {
668 (void) ungetc(c, fp);/* sign */
669 c = *(u_char *)--p;
670 }
671 (void) ungetc(c, fp);
672 }
673 if ((flags & SUPPRESS) == 0) {
674 double res;
675
676 *p = '\0';
677 res = strtod(buf, (char **) NULL);
678 if (flags & LONGDBL)
679 *va_arg(ap, long double *) = res;
680 else if (flags & LONG)
681 *va_arg(ap, double *) = res;
682 else
683 *va_arg(ap, float *) = res;
684 nassigned++;
685 }
686 nread += p - buf;
687 break;
688 #endif /* FLOATING_POINT */
689 }
690 }
691 input_failure:
692 return (nassigned ? nassigned : -1);
693 match_failure:
694 return (nassigned);
695 }
696
697 /*
698 * Fill in the given table from the scanset at the given format
699 * (just after `['). Return a pointer to the character past the
700 * closing `]'. The table has a 1 wherever characters should be
701 * considered part of the scanset.
702 */
703 static u_char *
__sccl(char * tab,u_char * fmt)704 __sccl(char *tab, u_char *fmt)
705 {
706 int c, n, v;
707
708 /* first `clear' the whole table */
709 c = *fmt++; /* first char hat => negated scanset */
710 if (c == '^') {
711 v = 1; /* default => accept */
712 c = *fmt++; /* get new first char */
713 } else
714 v = 0; /* default => reject */
715 /* should probably use memset here */
716 for (n = 0; n < 256; n++)
717 tab[n] = v;
718 if (c == 0)
719 return (fmt - 1);/* format ended before closing ] */
720
721 /*
722 * Now set the entries corresponding to the actual scanset
723 * to the opposite of the above.
724 *
725 * The first character may be ']' (or '-') without being special;
726 * the last character may be '-'.
727 */
728 v = 1 - v;
729 for (;;) {
730 tab[c] = v; /* take character c */
731 doswitch:
732 n = *fmt++; /* and examine the next */
733 switch (n) {
734
735 case 0: /* format ended too soon */
736 return (fmt - 1);
737
738 case '-':
739 /*
740 * A scanset of the form
741 * [01+-]
742 * is defined as `the digit 0, the digit 1,
743 * the character +, the character -', but
744 * the effect of a scanset such as
745 * [a-zA-Z0-9]
746 * is implementation defined. The V7 Unix
747 * scanf treats `a-z' as `the letters a through
748 * z', but treats `a-a' as `the letter a, the
749 * character -, and the letter a'.
750 *
751 * For compatibility, the `-' is not considerd
752 * to define a range if the character following
753 * it is either a close bracket (required by ANSI)
754 * or is not numerically greater than the character
755 * we just stored in the table (c).
756 */
757 n = *fmt;
758 if (n == ']' || n < c) {
759 c = '-';
760 break; /* resume the for(;;) */
761 }
762 fmt++;
763 do { /* fill in the range */
764 tab[++c] = v;
765 } while (c < n);
766 #if 1 /* XXX another disgusting compatibility hack */
767 /*
768 * Alas, the V7 Unix scanf also treats formats
769 * such as [a-c-e] as `the letters a through e'.
770 * This too is permitted by the standard....
771 */
772 goto doswitch;
773 #else
774 c = *fmt++;
775 if (c == 0)
776 return (fmt - 1);
777 if (c == ']')
778 return (fmt);
779 #endif
780 break;
781
782 case ']': /* end of scanset */
783 return (fmt);
784
785 default: /* just another character */
786 c = n;
787 break;
788 }
789 }
790 /* NOTREACHED */
791 }
792