• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*	$OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Chris Torek.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include <inttypes.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdarg.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wctype.h>
43 #include "local.h"
44 
45 #include <platform/bionic/macros.h>
46 
47 #define BUF 513 /* Maximum length of numeric string. */
48 
49 /*
50  * Flags used during conversion.
51  */
52 #define LONG 0x00001       /* l: long or double */
53 #define LONGDBL 0x00002    /* L: long double */
54 #define SHORT 0x00004      /* h: short */
55 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */
56 #define LLONG 0x00010      /* ll: long long (+ deprecated q: quad) */
57 #define POINTER 0x00020    /* p: void * (as hex) */
58 #define SIZEINT 0x00040    /* z: (signed) size_t */
59 #define MAXINT 0x00080     /* j: intmax_t */
60 #define PTRINT 0x00100     /* t: ptrdiff_t */
61 #define NOSKIP 0x00200     /* [ or c: do not skip blanks */
62 #define SUPPRESS 0x00400   /* *: suppress assignment */
63 #define UNSIGNED 0x00800   /* %[oupxX] conversions */
64 
65 /*
66  * The following are used in numeric conversions only:
67  * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
68  * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
69  */
70 #define SIGNOK   0x01000  /* +/- is (still) legal */
71 #define HAVESIGN 0x02000 /* sign detected */
72 #define NDIGITS  0x04000 /* no digits detected */
73 
74 #define DPTOK    0x08000 /* (float) decimal point is still legal */
75 #define EXPOK    0x10000 /* (float) exponent (e+3, etc) still legal */
76 
77 #define PFBOK    0x20000 /* 0x prefix is (still) legal */
78 #define PFXOK    0x40000 /* 0x prefix is (still) legal */
79 #define NZDIGITS 0x80000 /* no zero digits detected */
80 
81 /*
82  * Conversion types.
83  */
84 #define CT_CHAR 0   /* %c conversion */
85 #define CT_CCL 1    /* %[...] conversion */
86 #define CT_STRING 2 /* %s conversion */
87 #define CT_INT 3    /* integer, i.e., strtoimax or strtoumax */
88 #define CT_FLOAT 4  /* floating, i.e., strtod */
89 
90 // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
91 // be a little too expensive, and some kind of compressed version isn't worth the trouble.
in_ccl(wchar_t wc,const wchar_t * ccl)92 static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
93   // Is this a negated set?
94   bool member_result = true;
95   if (*ccl == '^') {
96     member_result = false;
97     ++ccl;
98   }
99 
100   // The first character may be ']' or '-' without being special.
101   if (*ccl == '-' || *ccl == ']') {
102     // A literal match?
103     if (*ccl == wc) return member_result;
104     ++ccl;
105   }
106 
107   while (*ccl && *ccl != ']') {
108     // The last character may be '-' without being special.
109     if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
110       wchar_t first = *(ccl - 1);
111       wchar_t last = *(ccl + 1);
112       if (first <= last) {
113         // In the range?
114         if (wc >= first && wc <= last) return member_result;
115         ccl += 2;
116         continue;
117       }
118       // A '-' is not considered to be part of a range if the character after
119       // is not greater than the character before, so fall through...
120     }
121     // A literal match?
122     if (*ccl == wc) return member_result;
123     ++ccl;
124   }
125   return !member_result;
126 }
127 
128 #pragma GCC diagnostic push
129 #pragma GCC diagnostic ignored "-Wframe-larger-than="
130 
131 /*
132  * vfwscanf
133  */
__vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)134 int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
135   wint_t c;               /* character from format, or conversion */
136   size_t width;           /* field width, or 0 */
137   wchar_t* p;             /* points into all kinds of strings */
138   int n;                  /* handy integer */
139   int flags;              /* flags as defined above */
140   wchar_t* p0;            /* saves original value of p when necessary */
141   int nassigned;          /* number of fields assigned */
142   int nconversions;       /* number of conversions */
143   int nread;              /* number of characters consumed from fp */
144   int base;               /* base argument to strtoimax/strtouimax */
145   wchar_t buf[BUF];       /* buffer for numeric conversions */
146   const wchar_t* ccl;
147   wint_t wi;              /* handy wint_t */
148   char* mbp;              /* multibyte string pointer for %c %s %[ */
149   size_t nconv;           /* number of bytes in mb. conversion */
150   char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
151   mbstate_t mbs;
152 
153   _SET_ORIENTATION(fp, 1);
154 
155   nassigned = 0;
156   nconversions = 0;
157   nread = 0;
158   base = 0; /* XXX just to keep gcc happy */
159   for (;;) {
160     c = *fmt++;
161     if (c == 0) {
162       return (nassigned);
163     }
164     if (iswspace(c)) {
165       while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
166         ;
167       if (c != WEOF) __ungetwc(c, fp);
168       continue;
169     }
170     if (c != '%') goto literal;
171     width = 0;
172     flags = 0;
173     /*
174      * switch on the format.  continue if done;
175      * break once format type is derived.
176      */
177   again:
178     c = *fmt++;
179     switch (c) {
180       case '%':
181       literal:
182         if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
183         if (wi != c) {
184           __ungetwc(wi, fp);
185           goto match_failure;
186         }
187         nread++;
188         continue;
189 
190       case '*':
191         flags |= SUPPRESS;
192         goto again;
193       case 'j':
194         flags |= MAXINT;
195         goto again;
196       case 'L':
197         flags |= LONGDBL;
198         goto again;
199       case 'h':
200         if (*fmt == 'h') {
201           fmt++;
202           flags |= SHORTSHORT;
203         } else {
204           flags |= SHORT;
205         }
206         goto again;
207       case 'l':
208         if (*fmt == 'l') {
209           fmt++;
210           flags |= LLONG;
211         } else {
212           flags |= LONG;
213         }
214         goto again;
215       case 'q':
216         flags |= LLONG; /* deprecated */
217         goto again;
218       case 't':
219         flags |= PTRINT;
220         goto again;
221       case 'z':
222         flags |= SIZEINT;
223         goto again;
224 
225       case '0':
226       case '1':
227       case '2':
228       case '3':
229       case '4':
230       case '5':
231       case '6':
232       case '7':
233       case '8':
234       case '9':
235         width = width * 10 + c - '0';
236         goto again;
237 
238       /*
239        * Conversions.
240        * Those marked `compat' are for 4.[123]BSD compatibility.
241        */
242       case 'b':
243         c = CT_INT;
244         base = 2;
245         flags |= PFBOK; /* enable 0b prefixing */
246         break;
247 
248       case 'D': /* compat */
249         flags |= LONG;
250         __BIONIC_FALLTHROUGH;
251       case 'd':
252         c = CT_INT;
253         base = 10;
254         break;
255 
256       case 'i':
257         c = CT_INT;
258         base = 0;
259         break;
260 
261       case 'O': /* compat */
262         flags |= LONG;
263         __BIONIC_FALLTHROUGH;
264       case 'o':
265         c = CT_INT;
266         flags |= UNSIGNED;
267         base = 8;
268         break;
269 
270       case 'u':
271         c = CT_INT;
272         flags |= UNSIGNED;
273         base = 10;
274         break;
275 
276       case 'X':
277       case 'x':
278         flags |= PFXOK; /* enable 0x prefixing */
279         c = CT_INT;
280         flags |= UNSIGNED;
281         base = 16;
282         break;
283 
284       case 'e':
285       case 'E':
286       case 'f':
287       case 'F':
288       case 'g':
289       case 'G':
290       case 'a':
291       case 'A':
292         c = CT_FLOAT;
293         break;
294 
295       case 's':
296         c = CT_STRING;
297         break;
298 
299       case '[':
300         ccl = fmt;
301         if (*fmt == '^') fmt++;
302         if (*fmt == ']') fmt++;
303         while (*fmt != '\0' && *fmt != ']') fmt++;
304         fmt++;
305         flags |= NOSKIP;
306         c = CT_CCL;
307         break;
308 
309       case 'c':
310         flags |= NOSKIP;
311         c = CT_CHAR;
312         break;
313 
314       case 'p': /* pointer format is like hex */
315         flags |= POINTER | PFXOK;
316         c = CT_INT;
317         flags |= UNSIGNED;
318         base = 16;
319         break;
320 
321       case 'n':
322         nconversions++;
323         if (flags & SUPPRESS) continue;
324         if (flags & SHORTSHORT)
325           *va_arg(ap, signed char*) = nread;
326         else if (flags & SHORT)
327           *va_arg(ap, short*) = nread;
328         else if (flags & LONG)
329           *va_arg(ap, long*) = nread;
330         else if (flags & SIZEINT)
331           *va_arg(ap, ssize_t*) = nread;
332         else if (flags & PTRINT)
333           *va_arg(ap, ptrdiff_t*) = nread;
334         else if (flags & LLONG)
335           *va_arg(ap, long long*) = nread;
336         else if (flags & MAXINT)
337           *va_arg(ap, intmax_t*) = nread;
338         else
339           *va_arg(ap, int*) = nread;
340         continue;
341 
342       /*
343        * Disgusting backwards compatibility hacks.	XXX
344        */
345       case '\0': /* compat */
346         return (EOF);
347 
348       default: /* compat */
349         if (iswupper(c)) flags |= LONG;
350         c = CT_INT;
351         base = 10;
352         break;
353     }
354 
355     /*
356      * Consume leading white space, except for formats
357      * that suppress this.
358      */
359     if ((flags & NOSKIP) == 0) {
360       while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
361       if (wi == WEOF) goto input_failure;
362       __ungetwc(wi, fp);
363     }
364 
365     /*
366      * Do the conversion.
367      */
368     switch (c) {
369       case CT_CHAR:
370         /* scan arbitrary characters (sets NOSKIP) */
371         if (width == 0) width = 1;
372         if (flags & LONG) {
373           if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
374           n = 0;
375           while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
376             if (!(flags & SUPPRESS)) *p++ = (wchar_t)wi;
377             n++;
378           }
379           if (n == 0) goto input_failure;
380           nread += n;
381           if (!(flags & SUPPRESS)) nassigned++;
382         } else {
383           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
384           n = 0;
385           memset(&mbs, 0, sizeof(mbs));
386           while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
387             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
388               nconv = wcrtomb(mbp, wi, &mbs);
389               if (nconv == (size_t)-1) goto input_failure;
390             } else {
391               nconv = wcrtomb(mbbuf, wi, &mbs);
392               if (nconv == (size_t)-1) goto input_failure;
393               if (nconv > width) {
394                 __ungetwc(wi, fp);
395                 break;
396               }
397               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
398             }
399             if (!(flags & SUPPRESS)) mbp += nconv;
400             width -= nconv;
401             n++;
402           }
403           if (n == 0) goto input_failure;
404           nread += n;
405           if (!(flags & SUPPRESS)) nassigned++;
406         }
407         nconversions++;
408         break;
409 
410       case CT_CCL:
411       case CT_STRING:
412         // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
413         // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
414         if (width == 0) width = (size_t)~0; // 'infinity'.
415         if ((flags & SUPPRESS) && (flags & LONG)) {
416           n = 0;
417           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
418           if (wi != WEOF) __ungetwc(wi, fp);
419         } else if (flags & LONG) {
420           p0 = p = va_arg(ap, wchar_t*);
421           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
422             *p++ = (wchar_t)wi;
423           }
424           if (wi != WEOF) __ungetwc(wi, fp);
425           n = p - p0;
426         } else {
427           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
428           n = 0;
429           memset(&mbs, 0, sizeof(mbs));
430           while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
431             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
432               nconv = wcrtomb(mbp, wi, &mbs);
433               if (nconv == (size_t)-1) goto input_failure;
434             } else {
435               nconv = wcrtomb(mbbuf, wi, &mbs);
436               if (nconv == (size_t)-1) goto input_failure;
437               if (nconv > width) break;
438               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
439             }
440             if (!(flags & SUPPRESS)) mbp += nconv;
441             width -= nconv;
442             n++;
443           }
444           if (wi != WEOF) __ungetwc(wi, fp);
445         }
446         if (c == CT_CCL && n == 0) goto match_failure;
447         if (!(flags & SUPPRESS)) {
448           if (flags & LONG) {
449             *p = L'\0';
450           } else {
451             *mbp = '\0';
452           }
453           ++nassigned;
454         }
455         nread += n;
456         nconversions++;
457         break;
458 
459       case CT_INT:
460         /* scan an integer as if by strtoimax/strtoumax */
461         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
462           width = sizeof(buf) / sizeof(*buf) - 1;
463         flags |= SIGNOK | NDIGITS | NZDIGITS;
464         for (p = buf; width; width--) {
465           c = __fgetwc_unlock(fp);
466           /*
467            * Switch on the character; `goto ok'
468            * if we accept it as a part of number.
469            */
470           switch (c) {
471             /*
472              * The digit 0 is always legal, but is
473              * special.  For %i conversions, if no
474              * digits (zero or nonzero) have been
475              * scanned (only signs), we will have
476              * base==0.  In that case, we should set
477              * it to 8 and enable 0b/0x prefixing.
478              * Also, if we have not scanned zero digits
479              * before this, do not turn off prefixing
480              * (someone else will turn it off if we
481              * have scanned any nonzero digits).
482              */
483             case '0':
484               if (base == 0) {
485                 base = 8;
486                 flags |= PFBOK | PFXOK;
487               }
488               if (flags & NZDIGITS) {
489                 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
490               } else {
491                 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
492               }
493               goto ok;
494 
495             /* 1 through 7 always legal */
496             case 'B':
497             case 'b':
498               // Is this 'b' potentially part of an "0b" prefix?
499               if ((flags & PFBOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
500                 base = 2;
501                 flags &= ~PFBOK;
502                 goto ok;
503               }
504               // No? Fall through and see if it's a hex digit instead then...
505               __BIONIC_FALLTHROUGH;
506             case '1':
507             case '2':
508             case '3':
509             case '4':
510             case '5':
511             case '6':
512             case '7':
513             case '8':
514             case '9':
515             case 'A':
516             case 'C':
517             case 'D':
518             case 'E':
519             case 'F':
520             case 'a':
521             case 'c':
522             case 'd':
523             case 'e':
524             case 'f':
525               if (base == 0) base = 10;
526               if (base != 16 && (int)(c - '0') >= base) break; /* not legal here */
527               flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
528               goto ok;
529 
530             /* sign ok only as first character */
531             case '+':
532             case '-':
533               if (flags & SIGNOK) {
534                 flags &= ~SIGNOK;
535                 flags |= HAVESIGN;
536                 goto ok;
537               }
538               break;
539 
540             /*
541              * x ok iff flag still set and 2nd char (or
542              * 3rd char if we have a sign).
543              */
544             case 'x':
545             case 'X':
546               if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
547                 base = 16; /* if %i */
548                 flags &= ~PFXOK;
549                 goto ok;
550               }
551               break;
552           }
553 
554           /*
555            * If we got here, c is not a legal character
556            * for a number.  Stop accumulating digits.
557            */
558           if (c != WEOF) __ungetwc(c, fp);
559           break;
560         ok:
561           /*
562            * c is legal: store it and look at the next.
563            */
564           *p++ = (wchar_t)c;
565         }
566         /*
567          * If we had only a sign, it is no good; push back the sign.
568          * If the number was `[-+]0[BbXx]`, push back and treat it
569          * as `[-+]0`.
570          */
571         if (flags & NDIGITS) {
572           if (p > buf) __ungetwc(*--p, fp);
573           goto match_failure;
574         }
575         c = p[-1];
576         if ((base == 2 && (c == 'b' || c == 'B')) || c == 'x' || c == 'X') {
577           --p;
578           __ungetwc(c, fp);
579         }
580         if ((flags & SUPPRESS) == 0) {
581           uintmax_t res;
582 
583           *p = '\0';
584           if (flags & UNSIGNED)
585             res = wcstoimax(buf, NULL, base);
586           else
587             res = wcstoumax(buf, NULL, base);
588           if (flags & POINTER)
589             *va_arg(ap, void**) = (void*)(uintptr_t)res;
590           else if (flags & MAXINT)
591             *va_arg(ap, intmax_t*) = res;
592           else if (flags & LLONG)
593             *va_arg(ap, long long*) = res;
594           else if (flags & SIZEINT)
595             *va_arg(ap, ssize_t*) = res;
596           else if (flags & PTRINT)
597             *va_arg(ap, ptrdiff_t*) = res;
598           else if (flags & LONG)
599             *va_arg(ap, long*) = res;
600           else if (flags & SHORT)
601             *va_arg(ap, short*) = res;
602           else if (flags & SHORTSHORT)
603             *va_arg(ap, signed char*) = res;
604           else
605             *va_arg(ap, int*) = res;
606           nassigned++;
607         }
608         nread += p - buf;
609         nconversions++;
610         break;
611 
612       case CT_FLOAT:
613         /* scan a floating point number as if by strtod */
614         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
615           width = sizeof(buf) / sizeof(*buf) - 1;
616         if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
617         if ((flags & SUPPRESS) == 0) {
618           if (flags & LONGDBL) {
619             long double res = wcstold(buf, &p);
620             *va_arg(ap, long double*) = res;
621           } else if (flags & LONG) {
622             double res = wcstod(buf, &p);
623             *va_arg(ap, double*) = res;
624           } else {
625             float res = wcstof(buf, &p);
626             *va_arg(ap, float*) = res;
627           }
628           if (p - buf != (ptrdiff_t)width) abort();
629           nassigned++;
630         }
631         nread += width;
632         nconversions++;
633         break;
634     }
635   }
636 input_failure:
637   return (nconversions != 0 ? nassigned : EOF);
638 match_failure:
639   return (nassigned);
640 }
641 #pragma GCC diagnostic pop
642