1 /* $OpenBSD: vfscanf.c,v 1.31 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <ctype.h>
35 #include <inttypes.h>
36 #include <stdarg.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/param.h>
42 #include <wctype.h>
43 #include "local.h"
44
45 #include <private/bionic_fortify.h>
46 #include <platform/bionic/macros.h>
47 #include <private/bionic_mbstate.h>
48
49 #define BUF 513 /* Maximum length of numeric string. */
50
51 // Flags used during conversion.
52 // Size/type:
53 #define LONG 0x00001 // l: long or double
54 #define LONGDBL 0x00002 // L: long double
55 #define SHORT 0x00004 // h: short
56 #define SHORTSHORT 0x00008 // hh: 8 bit integer
57 #define LLONG 0x00010 // ll: long long (+ deprecated q: quad)
58 #define POINTER 0x00020 // p: void* (as hex)
59 #define SIZEINT 0x00040 // z: (signed) size_t
60 #define MAXINT 0x00080 // j: intmax_t
61 #define PTRINT 0x00100 // t: ptrdiff_t
62 #define NOSKIP 0x00200 // [ or c: do not skip blanks
63 // Modifiers:
64 #define SUPPRESS 0x00400 // *: suppress assignment
65 #define UNSIGNED 0x00800 // %[oupxX] conversions
66 #define ALLOCATE 0x01000 // m: allocate a char*
67 // Internal use during integer parsing:
68 #define SIGNOK 0x02000 // +/- is (still) legal
69 #define HAVESIGN 0x04000 // Sign detected
70 #define NDIGITS 0x08000 // No digits detected
71 #define PFXOK 0x10000 // "0x" prefix is (still) legal
72 #define PFBOK 0x20000 // "0b" prefix is (still) legal
73 #define NZDIGITS 0x40000 // No zero digits detected
74
75 // Conversion types.
76 #define CT_CHAR 0 // %c conversion
77 #define CT_CCL 1 // %[...] conversion
78 #define CT_STRING 2 // %s conversion
79 #define CT_INT 3 // Integer: strtoimax/strtoumax
80 #define CT_FLOAT 4 // Float: strtod
81
82 static const unsigned char* __sccl(char*, const unsigned char*);
83
84 /*
85 * Internal, unlocked version of vfscanf
86 */
__svfscanf(FILE * fp,const char * fmt0,va_list ap)87 int __svfscanf(FILE* fp, const char* fmt0, va_list ap) {
88 const unsigned char* fmt = reinterpret_cast<const unsigned char*>(fmt0);
89 int c; /* character from format, or conversion */
90 size_t width; /* field width, or 0 */
91 char* p;
92 wchar_t* wcp;
93 size_t n;
94 int flags; /* flags as defined above */
95 int nassigned; /* number of fields assigned */
96 int nread; /* number of characters consumed from fp */
97 int base; /* base argument to strtoimax/strtouimax */
98 char ccltab[256]; /* character class table for %[...] */
99 char buf[BUF]; /* buffer for numeric conversions */
100 size_t nconv; /* length of multibyte sequence converted */
101 mbstate_t mbs;
102 void* allocation = nullptr; // Allocated but unassigned result for %mc/%ms/%m[.
103 size_t capacity = 0; // Number of char/wchar_t units allocated in `allocation`.
104
105 _SET_ORIENTATION(fp, -1);
106
107 nassigned = 0;
108 nread = 0;
109 for (;;) {
110 c = *fmt++;
111 if (c == 0) return nassigned;
112 if (isspace(c)) {
113 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) nread++, fp->_r--, fp->_p++;
114 continue;
115 }
116 if (c != '%') goto literal;
117 width = 0;
118 flags = 0;
119 /*
120 * switch on the format. continue if done;
121 * break once format type is derived.
122 */
123 again:
124 c = *fmt++;
125 switch (c) {
126 case '%':
127 literal:
128 if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
129 if (*fp->_p != c) goto match_failure;
130 fp->_r--, fp->_p++;
131 nread++;
132 continue;
133
134 case '*':
135 flags |= SUPPRESS;
136 goto again;
137 case 'j':
138 flags |= MAXINT;
139 goto again;
140 case 'L':
141 flags |= LONGDBL;
142 goto again;
143 case 'h':
144 if (*fmt == 'h') {
145 fmt++;
146 flags |= SHORTSHORT;
147 } else {
148 flags |= SHORT;
149 }
150 goto again;
151 case 'l':
152 if (*fmt == 'l') {
153 fmt++;
154 flags |= LLONG;
155 } else {
156 flags |= LONG;
157 }
158 goto again;
159 case 'm':
160 flags |= ALLOCATE;
161 goto again;
162 case 'q':
163 flags |= LLONG; /* deprecated */
164 goto again;
165 case 't':
166 flags |= PTRINT;
167 goto again;
168 case 'z':
169 flags |= SIZEINT;
170 goto again;
171
172 case '0':
173 case '1':
174 case '2':
175 case '3':
176 case '4':
177 case '5':
178 case '6':
179 case '7':
180 case '8':
181 case '9':
182 width = width * 10 + c - '0';
183 goto again;
184
185 /*
186 * Conversions.
187 * Those marked `compat' are for 4.[123]BSD compatibility.
188 */
189 case 'b':
190 c = CT_INT;
191 base = 2;
192 flags |= PFBOK; /* enable 0b prefixing */
193 break;
194
195 case 'D': /* compat */
196 flags |= LONG;
197 __BIONIC_FALLTHROUGH;
198 case 'd':
199 c = CT_INT;
200 base = 10;
201 break;
202
203 case 'i':
204 c = CT_INT;
205 base = 0;
206 break;
207
208 case 'O': /* compat */
209 flags |= LONG;
210 __BIONIC_FALLTHROUGH;
211 case 'o':
212 c = CT_INT;
213 flags |= UNSIGNED;
214 base = 8;
215 break;
216
217 case 'u':
218 c = CT_INT;
219 flags |= UNSIGNED;
220 base = 10;
221 break;
222
223 case 'X':
224 case 'x':
225 flags |= PFXOK; /* enable 0x prefixing */
226 c = CT_INT;
227 flags |= UNSIGNED;
228 base = 16;
229 break;
230
231 case 'e':
232 case 'E':
233 case 'f':
234 case 'F':
235 case 'g':
236 case 'G':
237 case 'a':
238 case 'A':
239 c = CT_FLOAT;
240 break;
241
242 case 's':
243 memset(ccltab, 1, 256);
244 ccltab['\t'] = ccltab['\n'] = ccltab['\v'] = ccltab['\f'] = ccltab['\r'] = ccltab[' '] = 0;
245 c = CT_STRING;
246 break;
247
248 case '[':
249 fmt = __sccl(ccltab, fmt);
250 flags |= NOSKIP;
251 c = CT_CCL;
252 break;
253
254 case 'c':
255 flags |= NOSKIP;
256 c = CT_CHAR;
257 break;
258
259 case 'p': /* pointer format is like hex */
260 flags |= POINTER | PFXOK;
261 c = CT_INT;
262 flags |= UNSIGNED;
263 base = 16;
264 break;
265
266 case 'n':
267 if (flags & SUPPRESS) continue;
268 if (flags & SHORTSHORT) {
269 *va_arg(ap, signed char*) = nread;
270 } else if (flags & SHORT) {
271 *va_arg(ap, short*) = nread;
272 } else if (flags & LONG) {
273 *va_arg(ap, long*) = nread;
274 } else if (flags & SIZEINT) {
275 *va_arg(ap, ssize_t*) = nread;
276 } else if (flags & PTRINT) {
277 *va_arg(ap, ptrdiff_t*) = nread;
278 } else if (flags & LLONG) {
279 *va_arg(ap, long long*) = nread;
280 } else if (flags & MAXINT) {
281 *va_arg(ap, intmax_t*) = nread;
282 } else {
283 *va_arg(ap, int*) = nread;
284 }
285 continue;
286
287 /*
288 * Disgusting backwards compatibility hacks. XXX
289 */
290 case '\0': /* compat */
291 return EOF;
292
293 default: /* compat */
294 if (isupper(c)) flags |= LONG;
295 c = CT_INT;
296 base = 10;
297 break;
298 }
299
300 if ((flags & ALLOCATE) != 0 && c > CT_STRING) {
301 __fortify_fatal("scanf 'm' only works with %%c/%%s/%%[");
302 }
303 if ((flags & (ALLOCATE|SUPPRESS)) == (ALLOCATE|SUPPRESS)) {
304 __fortify_fatal("scanf 'm' makes no sense with '*'");
305 }
306
307 /*
308 * We have a conversion that requires input.
309 */
310 if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
311
312 /*
313 * Consume leading white space, except for formats
314 * that suppress this.
315 */
316 if ((flags & NOSKIP) == 0) {
317 while (isspace(*fp->_p)) {
318 nread++;
319 if (--fp->_r > 0) {
320 fp->_p++;
321 } else if (__srefill(fp)) {
322 goto input_failure;
323 }
324 }
325 /*
326 * Note that there is at least one character in
327 * the buffer, so conversions that do not set NOSKIP
328 * ca no longer result in an input failure.
329 */
330 }
331
332 /*
333 * Do the conversion.
334 */
335 switch (c) {
336 case CT_CHAR:
337 /* scan arbitrary characters (sets NOSKIP) */
338 if (width == 0) width = 1;
339 if (flags & LONG) {
340 if (flags & ALLOCATE) {
341 allocation = wcp = reinterpret_cast<wchar_t*>(malloc(width * sizeof(wchar_t)));
342 if (allocation == nullptr) goto allocation_failure;
343 } else if (flags & SUPPRESS) {
344 wcp = nullptr;
345 } else {
346 wcp = va_arg(ap, wchar_t*);
347 }
348 size_t bytes = 0;
349 while (width != 0) {
350 if (bytes == MB_CUR_MAX) {
351 fp->_flags |= __SERR;
352 goto input_failure;
353 }
354 buf[bytes++] = *fp->_p;
355 fp->_p++;
356 fp->_r--;
357 memset(&mbs, 0, sizeof(mbs));
358 nconv = mbrtowc(wcp, buf, bytes, &mbs);
359 if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
360 fp->_flags |= __SERR;
361 goto input_failure;
362 }
363 if (nconv == 0 && !(flags & SUPPRESS)) *wcp = L'\0';
364 if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
365 nread += bytes;
366 width--;
367 if (!(flags & SUPPRESS)) wcp++;
368 bytes = 0;
369 }
370 if (fp->_r <= 0 && __srefill(fp)) {
371 if (bytes != 0) {
372 fp->_flags |= __SERR;
373 goto input_failure;
374 }
375 break;
376 }
377 }
378 if (allocation != nullptr) {
379 *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
380 allocation = nullptr;
381 }
382 if (!(flags & SUPPRESS)) nassigned++;
383 } else if (flags & SUPPRESS) {
384 size_t sum = 0;
385 for (;;) {
386 if ((n = fp->_r) < width) {
387 sum += n;
388 width -= n;
389 fp->_p += n;
390 if (__srefill(fp)) {
391 if (sum == 0) goto input_failure;
392 break;
393 }
394 } else {
395 sum += width;
396 fp->_r -= width;
397 fp->_p += width;
398 break;
399 }
400 }
401 nread += sum;
402 } else {
403 if (flags & ALLOCATE) {
404 allocation = p = reinterpret_cast<char*>(malloc(width));
405 if (allocation == nullptr) goto allocation_failure;
406 } else {
407 p = va_arg(ap, char*);
408 }
409 size_t r = fread(p, 1, width, fp);
410 if (r == 0) goto input_failure;
411 if (allocation != nullptr) {
412 *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
413 allocation = nullptr;
414 }
415 nread += r;
416 nassigned++;
417 }
418 break;
419
420 case CT_CCL:
421 case CT_STRING:
422 // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
423 // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
424 if (width == 0) width = SIZE_MAX;
425 if (flags & LONG) {
426 // TODO: since no-one cares, replace this with a simple fgetwc loop?
427 n = 0;
428 if (flags & ALLOCATE) {
429 capacity = MIN(width, 32);
430 allocation = wcp = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * capacity));
431 if (allocation == nullptr) goto allocation_failure;
432 } else if (flags & SUPPRESS) {
433 wcp = nullptr;
434 } else {
435 wcp = va_arg(ap, wchar_t*);
436 }
437 size_t bytes = 0;
438 while ((c == CT_CCL || !isspace(*fp->_p)) && width != 0) {
439 if (bytes == MB_CUR_MAX) {
440 fp->_flags |= __SERR;
441 goto input_failure;
442 }
443 buf[bytes++] = *fp->_p;
444 fp->_p++;
445 fp->_r--;
446 wchar_t wc = L'\0';
447 memset(&mbs, 0, sizeof(mbs));
448 nconv = mbrtowc(&wc, buf, bytes, &mbs);
449 if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
450 fp->_flags |= __SERR;
451 goto input_failure;
452 }
453 if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
454 if ((c == CT_CCL && wctob(wc) != EOF && !ccltab[wctob(wc)]) || (c == CT_STRING && iswspace(wc))) {
455 while (bytes != 0) {
456 bytes--;
457 ungetc(buf[bytes], fp);
458 }
459 break;
460 }
461 if (wcp) wcp[n] = wc;
462 n++;
463 if (allocation != nullptr && n == capacity) {
464 capacity *= 2;
465 wchar_t* new_allocation =
466 reinterpret_cast<wchar_t*>(realloc(allocation, sizeof(wchar_t) * capacity));
467 if (new_allocation == nullptr) goto allocation_failure;
468 allocation = wcp = new_allocation;
469 }
470 nread += bytes;
471 width--;
472 bytes = 0;
473 }
474 if (fp->_r <= 0 && __srefill(fp)) {
475 if (bytes != 0) {
476 fp->_flags |= __SERR;
477 goto input_failure;
478 }
479 break;
480 }
481 }
482 if (c == CT_CCL && bytes != 0) {
483 fp->_flags |= __SERR;
484 goto input_failure;
485 }
486 if (allocation != nullptr) {
487 *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
488 allocation = nullptr;
489 }
490 } else if (flags & SUPPRESS) {
491 n = 0;
492 while (ccltab[*fp->_p]) {
493 n++, fp->_r--, fp->_p++;
494 if (--width == 0) break;
495 if (fp->_r <= 0 && __srefill(fp)) {
496 if (c == CT_CCL && n == 0) goto input_failure;
497 break;
498 }
499 }
500 nread += n;
501 } else {
502 if (flags & ALLOCATE) {
503 capacity = MIN(width, 32);
504 allocation = p = reinterpret_cast<char*>(malloc(capacity));
505 if (allocation == nullptr) goto allocation_failure;
506 } else {
507 p = va_arg(ap, char*);
508 }
509 n = 0;
510 while (ccltab[*fp->_p]) {
511 fp->_r--;
512 p[n++] = *fp->_p++;
513 if (allocation != nullptr && n == capacity) {
514 capacity *= 2;
515 char* new_allocation = reinterpret_cast<char*>(realloc(allocation, capacity));
516 if (new_allocation == nullptr) goto allocation_failure;
517 allocation = p = new_allocation;
518 }
519 if (--width == 0) break;
520 if (fp->_r <= 0 && __srefill(fp)) {
521 if (c == CT_CCL && n == 0) goto input_failure;
522 break;
523 }
524 }
525 nread += n;
526 if (allocation != nullptr) {
527 *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
528 allocation = nullptr;
529 }
530 }
531 if (c == CT_CCL && n == 0) goto match_failure;
532 if (!(flags & SUPPRESS)) {
533 if (flags & LONG) {
534 wcp[n] = L'\0';
535 } else {
536 p[n] = '\0';
537 }
538 ++nassigned;
539 }
540 break;
541
542 case CT_INT:
543 /* scan an integer as if by strtoimax/strtoumax */
544 #ifdef hardway
545 if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
546 #else
547 /* size_t is unsigned, hence this optimisation */
548 if (--width > sizeof(buf) - 2) width = sizeof(buf) - 2;
549 width++;
550 #endif
551 flags |= SIGNOK | NDIGITS | NZDIGITS;
552 for (p = buf; width; width--) {
553 c = *fp->_p;
554 /*
555 * Switch on the character; `goto ok'
556 * if we accept it as a part of number.
557 */
558 switch (c) {
559 /*
560 * The digit 0 is always legal, but is
561 * special. For %i conversions, if no
562 * digits (zero or nonzero) have been
563 * scanned (only signs), we will have
564 * base==0. In that case, we should set
565 * it to 8 and enable 0b/0x prefixing.
566 * Also, if we have not scanned zero digits
567 * before this, do not turn off prefixing
568 * (someone else will turn it off if we
569 * have scanned any nonzero digits).
570 */
571 case '0':
572 if (base == 0) {
573 base = 8;
574 flags |= PFBOK | PFXOK;
575 }
576 if (flags & NZDIGITS) {
577 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
578 } else {
579 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
580 }
581 goto ok;
582 case 'B':
583 case 'b':
584 // Is this 'b' or 'B' potentially part of an "0b" prefix?
585 if ((flags & PFBOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
586 base = 2;
587 flags &= ~PFBOK;
588 goto ok;
589 }
590 // No? Fall through and see if it's a hex digit instead then...
591 __BIONIC_FALLTHROUGH;
592 case '1':
593 case '2':
594 case '3':
595 case '4':
596 case '5':
597 case '6':
598 case '7':
599 case '8':
600 case '9':
601 case 'A':
602 case 'C':
603 case 'D':
604 case 'E':
605 case 'F':
606 case 'a':
607 case 'c':
608 case 'd':
609 case 'e':
610 case 'f':
611 if (base == 0) base = 10;
612 if (base != 16 && (c - '0') >= base) break; /* not legal here */
613 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
614 goto ok;
615
616 /* sign ok only as first character */
617 case '+':
618 case '-':
619 if (flags & SIGNOK) {
620 flags &= ~SIGNOK;
621 flags |= HAVESIGN;
622 goto ok;
623 }
624 break;
625
626 /*
627 * x ok iff flag still set and 2nd char (or
628 * 3rd char if we have a sign).
629 */
630 case 'x':
631 case 'X':
632 if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
633 base = 16; /* if %i */
634 flags &= ~PFXOK;
635 goto ok;
636 }
637 break;
638 }
639
640 /*
641 * If we got here, c is not a legal character
642 * for a number. Stop accumulating digits.
643 */
644 break;
645 ok:
646 /*
647 * c is legal: store it and look at the next.
648 */
649 *p++ = c;
650 if (--fp->_r > 0)
651 fp->_p++;
652 else if (__srefill(fp))
653 break; /* EOF */
654 }
655 /*
656 * If we had only a sign, it is no good; push back the sign.
657 * If the number was `[-+]0[BbXx]`, push back and treat it
658 * as `[-+]0`.
659 */
660 if (flags & NDIGITS) {
661 if (p > buf) (void)ungetc(*(u_char*)--p, fp);
662 goto match_failure;
663 }
664 c = ((u_char*)p)[-1];
665 if ((base == 2 && (c == 'b' || c == 'B')) || c == 'x' || c == 'X') {
666 --p;
667 (void)ungetc(c, fp);
668 }
669 if ((flags & SUPPRESS) == 0) {
670 uintmax_t res;
671
672 *p = '\0';
673 if (flags & UNSIGNED) {
674 res = strtoumax(buf, nullptr, base);
675 } else {
676 res = strtoimax(buf, nullptr, base);
677 }
678 if (flags & POINTER) {
679 *va_arg(ap, void**) = (void*)(uintptr_t)res;
680 } else if (flags & MAXINT) {
681 *va_arg(ap, intmax_t*) = res;
682 } else if (flags & LLONG) {
683 *va_arg(ap, long long*) = res;
684 } else if (flags & SIZEINT) {
685 *va_arg(ap, ssize_t*) = res;
686 } else if (flags & PTRINT) {
687 *va_arg(ap, ptrdiff_t*) = res;
688 } else if (flags & LONG) {
689 *va_arg(ap, long*) = res;
690 } else if (flags & SHORT) {
691 *va_arg(ap, short*) = res;
692 } else if (flags & SHORTSHORT) {
693 *va_arg(ap, signed char*) = res;
694 } else {
695 *va_arg(ap, int*) = res;
696 }
697 nassigned++;
698 }
699 nread += p - buf;
700 break;
701
702 case CT_FLOAT:
703 /* scan a floating point number as if by strtod */
704 if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
705 if ((width = parsefloat(fp, buf, buf + width)) == 0) goto match_failure;
706 if ((flags & SUPPRESS) == 0) {
707 if (flags & LONGDBL) {
708 long double res = strtold(buf, &p);
709 *va_arg(ap, long double*) = res;
710 } else if (flags & LONG) {
711 double res = strtod(buf, &p);
712 *va_arg(ap, double*) = res;
713 } else {
714 float res = strtof(buf, &p);
715 *va_arg(ap, float*) = res;
716 }
717 if ((size_t)(p - buf) != width) abort();
718 nassigned++;
719 }
720 nread += width;
721 break;
722 }
723 }
724 allocation_failure:
725 input_failure:
726 free(allocation);
727 if (nassigned == 0) nassigned = -1;
728 match_failure:
729 return nassigned;
730 }
731
732 /*
733 * Fill in the given table from the scanset at the given format
734 * (just after `['). Return a pointer to the character past the
735 * closing `]'. The table has a 1 wherever characters should be
736 * considered part of the scanset.
737 */
__sccl(char * tab,const unsigned char * fmt)738 static const unsigned char* __sccl(char* tab, const unsigned char* fmt) {
739 int c, n, v;
740
741 /* first `clear' the whole table */
742 c = *fmt++; /* first char hat => negated scanset */
743 if (c == '^') {
744 v = 1; /* default => accept */
745 c = *fmt++; /* get new first char */
746 } else {
747 v = 0; /* default => reject */
748 }
749 memset(tab, v, 256);
750 if (c == 0) return (fmt - 1); /* format ended before closing ] */
751
752 /*
753 * Now set the entries corresponding to the actual scanset
754 * to the opposite of the above.
755 *
756 * The first character may be ']' (or '-') without being special;
757 * the last character may be '-'.
758 */
759 v = 1 - v;
760 for (;;) {
761 tab[c] = v; /* take character c */
762 doswitch:
763 n = *fmt++; /* and examine the next */
764 switch (n) {
765 case 0: /* format ended too soon */
766 return (fmt - 1);
767
768 case '-':
769 /*
770 * A scanset of the form
771 * [01+-]
772 * is defined as `the digit 0, the digit 1,
773 * the character +, the character -', but
774 * the effect of a scanset such as
775 * [a-zA-Z0-9]
776 * is implementation defined. The V7 Unix
777 * scanf treats `a-z' as `the letters a through
778 * z', but treats `a-a' as `the letter a, the
779 * character -, and the letter a'.
780 *
781 * For compatibility, the `-' is not considerd
782 * to define a range if the character following
783 * it is either a close bracket (required by ANSI)
784 * or is not numerically greater than the character
785 * we just stored in the table (c).
786 */
787 n = *fmt;
788 if (n == ']' || n < c) {
789 c = '-';
790 break; /* resume the for(;;) */
791 }
792 fmt++;
793 do { /* fill in the range */
794 tab[++c] = v;
795 } while (c < n);
796 #if 1 /* XXX another disgusting compatibility hack */
797 /*
798 * Alas, the V7 Unix scanf also treats formats
799 * such as [a-c-e] as `the letters a through e'.
800 * This too is permitted by the standard....
801 */
802 goto doswitch;
803 #else
804 c = *fmt++;
805 if (c == 0) return (fmt - 1);
806 if (c == ']') return (fmt);
807 #endif
808 break;
809
810 case ']': /* end of scanset */
811 return fmt;
812
813 default: /* just another character */
814 c = n;
815 break;
816 }
817 }
818 /* NOTREACHED */
819 }
820