1 /* $OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <inttypes.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <stdarg.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wctype.h>
43 #include "local.h"
44
45 #include <platform/bionic/macros.h>
46
47 #define BUF 513 /* Maximum length of numeric string. */
48
49 /*
50 * Flags used during conversion.
51 */
52 #define LONG 0x00001 /* l: long or double */
53 #define LONGDBL 0x00002 /* L: long double */
54 #define SHORT 0x00004 /* h: short */
55 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */
56 #define LLONG 0x00010 /* ll: long long (+ deprecated q: quad) */
57 #define POINTER 0x00020 /* p: void * (as hex) */
58 #define SIZEINT 0x00040 /* z: (signed) size_t */
59 #define MAXINT 0x00080 /* j: intmax_t */
60 #define PTRINT 0x00100 /* t: ptrdiff_t */
61 #define NOSKIP 0x00200 /* [ or c: do not skip blanks */
62 #define SUPPRESS 0x00400 /* *: suppress assignment */
63 #define UNSIGNED 0x00800 /* %[oupxX] conversions */
64
65 /*
66 * The following are used in numeric conversions only:
67 * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
68 * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
69 */
70 #define SIGNOK 0x01000 /* +/- is (still) legal */
71 #define HAVESIGN 0x02000 /* sign detected */
72 #define NDIGITS 0x04000 /* no digits detected */
73
74 #define DPTOK 0x08000 /* (float) decimal point is still legal */
75 #define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */
76
77 #define PFBOK 0x20000 /* 0x prefix is (still) legal */
78 #define PFXOK 0x40000 /* 0x prefix is (still) legal */
79 #define NZDIGITS 0x80000 /* no zero digits detected */
80
81 /*
82 * Conversion types.
83 */
84 #define CT_CHAR 0 /* %c conversion */
85 #define CT_CCL 1 /* %[...] conversion */
86 #define CT_STRING 2 /* %s conversion */
87 #define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */
88 #define CT_FLOAT 4 /* floating, i.e., strtod */
89
90 // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
91 // be a little too expensive, and some kind of compressed version isn't worth the trouble.
in_ccl(wchar_t wc,const wchar_t * ccl)92 static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
93 // Is this a negated set?
94 bool member_result = true;
95 if (*ccl == '^') {
96 member_result = false;
97 ++ccl;
98 }
99
100 // The first character may be ']' or '-' without being special.
101 if (*ccl == '-' || *ccl == ']') {
102 // A literal match?
103 if (*ccl == wc) return member_result;
104 ++ccl;
105 }
106
107 while (*ccl && *ccl != ']') {
108 // The last character may be '-' without being special.
109 if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
110 wchar_t first = *(ccl - 1);
111 wchar_t last = *(ccl + 1);
112 if (first <= last) {
113 // In the range?
114 if (wc >= first && wc <= last) return member_result;
115 ccl += 2;
116 continue;
117 }
118 // A '-' is not considered to be part of a range if the character after
119 // is not greater than the character before, so fall through...
120 }
121 // A literal match?
122 if (*ccl == wc) return member_result;
123 ++ccl;
124 }
125 return !member_result;
126 }
127
128 #pragma GCC diagnostic push
129 #pragma GCC diagnostic ignored "-Wframe-larger-than="
130
131 /*
132 * vfwscanf
133 */
__vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)134 int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
135 wint_t c; /* character from format, or conversion */
136 size_t width; /* field width, or 0 */
137 wchar_t* p; /* points into all kinds of strings */
138 int n; /* handy integer */
139 int flags; /* flags as defined above */
140 wchar_t* p0; /* saves original value of p when necessary */
141 int nassigned; /* number of fields assigned */
142 int nconversions; /* number of conversions */
143 int nread; /* number of characters consumed from fp */
144 int base; /* base argument to strtoimax/strtouimax */
145 wchar_t buf[BUF]; /* buffer for numeric conversions */
146 const wchar_t* ccl;
147 wint_t wi; /* handy wint_t */
148 char* mbp; /* multibyte string pointer for %c %s %[ */
149 size_t nconv; /* number of bytes in mb. conversion */
150 char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
151 mbstate_t mbs;
152
153 _SET_ORIENTATION(fp, 1);
154
155 nassigned = 0;
156 nconversions = 0;
157 nread = 0;
158 base = 0; /* XXX just to keep gcc happy */
159 for (;;) {
160 c = *fmt++;
161 if (c == 0) {
162 return (nassigned);
163 }
164 if (iswspace(c)) {
165 while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
166 ;
167 if (c != WEOF) __ungetwc(c, fp);
168 continue;
169 }
170 if (c != '%') goto literal;
171 width = 0;
172 flags = 0;
173 /*
174 * switch on the format. continue if done;
175 * break once format type is derived.
176 */
177 again:
178 c = *fmt++;
179 switch (c) {
180 case '%':
181 literal:
182 if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
183 if (wi != c) {
184 __ungetwc(wi, fp);
185 goto match_failure;
186 }
187 nread++;
188 continue;
189
190 case '*':
191 flags |= SUPPRESS;
192 goto again;
193 case 'j':
194 flags |= MAXINT;
195 goto again;
196 case 'L':
197 flags |= LONGDBL;
198 goto again;
199 case 'h':
200 if (*fmt == 'h') {
201 fmt++;
202 flags |= SHORTSHORT;
203 } else {
204 flags |= SHORT;
205 }
206 goto again;
207 case 'l':
208 if (*fmt == 'l') {
209 fmt++;
210 flags |= LLONG;
211 } else {
212 flags |= LONG;
213 }
214 goto again;
215 case 'q':
216 flags |= LLONG; /* deprecated */
217 goto again;
218 case 't':
219 flags |= PTRINT;
220 goto again;
221 case 'z':
222 flags |= SIZEINT;
223 goto again;
224
225 case '0':
226 case '1':
227 case '2':
228 case '3':
229 case '4':
230 case '5':
231 case '6':
232 case '7':
233 case '8':
234 case '9':
235 width = width * 10 + c - '0';
236 goto again;
237
238 /*
239 * Conversions.
240 * Those marked `compat' are for 4.[123]BSD compatibility.
241 */
242 case 'b':
243 c = CT_INT;
244 base = 2;
245 flags |= PFBOK; /* enable 0b prefixing */
246 break;
247
248 case 'D': /* compat */
249 flags |= LONG;
250 __BIONIC_FALLTHROUGH;
251 case 'd':
252 c = CT_INT;
253 base = 10;
254 break;
255
256 case 'i':
257 c = CT_INT;
258 base = 0;
259 break;
260
261 case 'O': /* compat */
262 flags |= LONG;
263 __BIONIC_FALLTHROUGH;
264 case 'o':
265 c = CT_INT;
266 flags |= UNSIGNED;
267 base = 8;
268 break;
269
270 case 'u':
271 c = CT_INT;
272 flags |= UNSIGNED;
273 base = 10;
274 break;
275
276 case 'X':
277 case 'x':
278 flags |= PFXOK; /* enable 0x prefixing */
279 c = CT_INT;
280 flags |= UNSIGNED;
281 base = 16;
282 break;
283
284 case 'e':
285 case 'E':
286 case 'f':
287 case 'F':
288 case 'g':
289 case 'G':
290 case 'a':
291 case 'A':
292 c = CT_FLOAT;
293 break;
294
295 case 's':
296 c = CT_STRING;
297 break;
298
299 case '[':
300 ccl = fmt;
301 if (*fmt == '^') fmt++;
302 if (*fmt == ']') fmt++;
303 while (*fmt != '\0' && *fmt != ']') fmt++;
304 fmt++;
305 flags |= NOSKIP;
306 c = CT_CCL;
307 break;
308
309 case 'c':
310 flags |= NOSKIP;
311 c = CT_CHAR;
312 break;
313
314 case 'p': /* pointer format is like hex */
315 flags |= POINTER | PFXOK;
316 c = CT_INT;
317 flags |= UNSIGNED;
318 base = 16;
319 break;
320
321 case 'n':
322 nconversions++;
323 if (flags & SUPPRESS) continue;
324 if (flags & SHORTSHORT)
325 *va_arg(ap, signed char*) = nread;
326 else if (flags & SHORT)
327 *va_arg(ap, short*) = nread;
328 else if (flags & LONG)
329 *va_arg(ap, long*) = nread;
330 else if (flags & SIZEINT)
331 *va_arg(ap, ssize_t*) = nread;
332 else if (flags & PTRINT)
333 *va_arg(ap, ptrdiff_t*) = nread;
334 else if (flags & LLONG)
335 *va_arg(ap, long long*) = nread;
336 else if (flags & MAXINT)
337 *va_arg(ap, intmax_t*) = nread;
338 else
339 *va_arg(ap, int*) = nread;
340 continue;
341
342 /*
343 * Disgusting backwards compatibility hacks. XXX
344 */
345 case '\0': /* compat */
346 return (EOF);
347
348 default: /* compat */
349 if (iswupper(c)) flags |= LONG;
350 c = CT_INT;
351 base = 10;
352 break;
353 }
354
355 /*
356 * Consume leading white space, except for formats
357 * that suppress this.
358 */
359 if ((flags & NOSKIP) == 0) {
360 while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
361 if (wi == WEOF) goto input_failure;
362 __ungetwc(wi, fp);
363 }
364
365 /*
366 * Do the conversion.
367 */
368 switch (c) {
369 case CT_CHAR:
370 /* scan arbitrary characters (sets NOSKIP) */
371 if (width == 0) width = 1;
372 if (flags & LONG) {
373 if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
374 n = 0;
375 while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
376 if (!(flags & SUPPRESS)) *p++ = (wchar_t)wi;
377 n++;
378 }
379 if (n == 0) goto input_failure;
380 nread += n;
381 if (!(flags & SUPPRESS)) nassigned++;
382 } else {
383 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
384 n = 0;
385 memset(&mbs, 0, sizeof(mbs));
386 while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
387 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
388 nconv = wcrtomb(mbp, wi, &mbs);
389 if (nconv == (size_t)-1) goto input_failure;
390 } else {
391 nconv = wcrtomb(mbbuf, wi, &mbs);
392 if (nconv == (size_t)-1) goto input_failure;
393 if (nconv > width) {
394 __ungetwc(wi, fp);
395 break;
396 }
397 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
398 }
399 if (!(flags & SUPPRESS)) mbp += nconv;
400 width -= nconv;
401 n++;
402 }
403 if (n == 0) goto input_failure;
404 nread += n;
405 if (!(flags & SUPPRESS)) nassigned++;
406 }
407 nconversions++;
408 break;
409
410 case CT_CCL:
411 case CT_STRING:
412 // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
413 // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
414 if (width == 0) width = (size_t)~0; // 'infinity'.
415 if ((flags & SUPPRESS) && (flags & LONG)) {
416 n = 0;
417 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
418 if (wi != WEOF) __ungetwc(wi, fp);
419 } else if (flags & LONG) {
420 p0 = p = va_arg(ap, wchar_t*);
421 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
422 *p++ = (wchar_t)wi;
423 }
424 if (wi != WEOF) __ungetwc(wi, fp);
425 n = p - p0;
426 } else {
427 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
428 n = 0;
429 memset(&mbs, 0, sizeof(mbs));
430 while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
431 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
432 nconv = wcrtomb(mbp, wi, &mbs);
433 if (nconv == (size_t)-1) goto input_failure;
434 } else {
435 nconv = wcrtomb(mbbuf, wi, &mbs);
436 if (nconv == (size_t)-1) goto input_failure;
437 if (nconv > width) break;
438 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
439 }
440 if (!(flags & SUPPRESS)) mbp += nconv;
441 width -= nconv;
442 n++;
443 }
444 if (wi != WEOF) __ungetwc(wi, fp);
445 }
446 if (c == CT_CCL && n == 0) goto match_failure;
447 if (!(flags & SUPPRESS)) {
448 if (flags & LONG) {
449 *p = L'\0';
450 } else {
451 *mbp = '\0';
452 }
453 ++nassigned;
454 }
455 nread += n;
456 nconversions++;
457 break;
458
459 case CT_INT:
460 /* scan an integer as if by strtoimax/strtoumax */
461 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
462 width = sizeof(buf) / sizeof(*buf) - 1;
463 flags |= SIGNOK | NDIGITS | NZDIGITS;
464 for (p = buf; width; width--) {
465 c = __fgetwc_unlock(fp);
466 /*
467 * Switch on the character; `goto ok'
468 * if we accept it as a part of number.
469 */
470 switch (c) {
471 /*
472 * The digit 0 is always legal, but is
473 * special. For %i conversions, if no
474 * digits (zero or nonzero) have been
475 * scanned (only signs), we will have
476 * base==0. In that case, we should set
477 * it to 8 and enable 0b/0x prefixing.
478 * Also, if we have not scanned zero digits
479 * before this, do not turn off prefixing
480 * (someone else will turn it off if we
481 * have scanned any nonzero digits).
482 */
483 case '0':
484 if (base == 0) {
485 base = 8;
486 flags |= PFBOK | PFXOK;
487 }
488 if (flags & NZDIGITS) {
489 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
490 } else {
491 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
492 }
493 goto ok;
494
495 /* 1 through 7 always legal */
496 case 'B':
497 case 'b':
498 // Is this 'b' potentially part of an "0b" prefix?
499 if ((flags & PFBOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
500 base = 2;
501 flags &= ~PFBOK;
502 goto ok;
503 }
504 // No? Fall through and see if it's a hex digit instead then...
505 __BIONIC_FALLTHROUGH;
506 case '1':
507 case '2':
508 case '3':
509 case '4':
510 case '5':
511 case '6':
512 case '7':
513 case '8':
514 case '9':
515 case 'A':
516 case 'C':
517 case 'D':
518 case 'E':
519 case 'F':
520 case 'a':
521 case 'c':
522 case 'd':
523 case 'e':
524 case 'f':
525 if (base == 0) base = 10;
526 if (base != 16 && (int)(c - '0') >= base) break; /* not legal here */
527 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
528 goto ok;
529
530 /* sign ok only as first character */
531 case '+':
532 case '-':
533 if (flags & SIGNOK) {
534 flags &= ~SIGNOK;
535 flags |= HAVESIGN;
536 goto ok;
537 }
538 break;
539
540 /*
541 * x ok iff flag still set and 2nd char (or
542 * 3rd char if we have a sign).
543 */
544 case 'x':
545 case 'X':
546 if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
547 base = 16; /* if %i */
548 flags &= ~PFXOK;
549 goto ok;
550 }
551 break;
552 }
553
554 /*
555 * If we got here, c is not a legal character
556 * for a number. Stop accumulating digits.
557 */
558 if (c != WEOF) __ungetwc(c, fp);
559 break;
560 ok:
561 /*
562 * c is legal: store it and look at the next.
563 */
564 *p++ = (wchar_t)c;
565 }
566 /*
567 * If we had only a sign, it is no good; push back the sign.
568 * If the number was `[-+]0[BbXx]`, push back and treat it
569 * as `[-+]0`.
570 */
571 if (flags & NDIGITS) {
572 if (p > buf) __ungetwc(*--p, fp);
573 goto match_failure;
574 }
575 c = p[-1];
576 if ((base == 2 && (c == 'b' || c == 'B')) || c == 'x' || c == 'X') {
577 --p;
578 __ungetwc(c, fp);
579 }
580 if ((flags & SUPPRESS) == 0) {
581 uintmax_t res;
582
583 *p = '\0';
584 if (flags & UNSIGNED)
585 res = wcstoimax(buf, NULL, base);
586 else
587 res = wcstoumax(buf, NULL, base);
588 if (flags & POINTER)
589 *va_arg(ap, void**) = (void*)(uintptr_t)res;
590 else if (flags & MAXINT)
591 *va_arg(ap, intmax_t*) = res;
592 else if (flags & LLONG)
593 *va_arg(ap, long long*) = res;
594 else if (flags & SIZEINT)
595 *va_arg(ap, ssize_t*) = res;
596 else if (flags & PTRINT)
597 *va_arg(ap, ptrdiff_t*) = res;
598 else if (flags & LONG)
599 *va_arg(ap, long*) = res;
600 else if (flags & SHORT)
601 *va_arg(ap, short*) = res;
602 else if (flags & SHORTSHORT)
603 *va_arg(ap, signed char*) = res;
604 else
605 *va_arg(ap, int*) = res;
606 nassigned++;
607 }
608 nread += p - buf;
609 nconversions++;
610 break;
611
612 case CT_FLOAT:
613 /* scan a floating point number as if by strtod */
614 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
615 width = sizeof(buf) / sizeof(*buf) - 1;
616 if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
617 if ((flags & SUPPRESS) == 0) {
618 if (flags & LONGDBL) {
619 long double res = wcstold(buf, &p);
620 *va_arg(ap, long double*) = res;
621 } else if (flags & LONG) {
622 double res = wcstod(buf, &p);
623 *va_arg(ap, double*) = res;
624 } else {
625 float res = wcstof(buf, &p);
626 *va_arg(ap, float*) = res;
627 }
628 if (p - buf != (ptrdiff_t)width) abort();
629 nassigned++;
630 }
631 nread += width;
632 nconversions++;
633 break;
634 }
635 }
636 input_failure:
637 return (nconversions != 0 ? nassigned : EOF);
638 match_failure:
639 return (nassigned);
640 }
641 #pragma GCC diagnostic pop
642