1 /* $OpenBSD: vfscanf.c,v 1.31 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <ctype.h>
35 #include <inttypes.h>
36 #include <stdarg.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/param.h>
42 #include <wctype.h>
43 #include "local.h"
44
45 #include <private/bionic_fortify.h>
46 #include <platform/bionic/macros.h>
47 #include <private/bionic_mbstate.h>
48
49 #define BUF 513 /* Maximum length of numeric string. */
50
51 // Flags used during conversion.
52 // Size/type:
53 #define LONG 0x00001 // l: long or double
54 #define LONGDBL 0x00002 // L: long double
55 #define SHORT 0x00004 // h: short
56 #define SHORTSHORT 0x00008 // hh: 8 bit integer
57 #define LLONG 0x00010 // ll: long long (+ deprecated q: quad)
58 #define POINTER 0x00020 // p: void* (as hex)
59 #define SIZEINT 0x00040 // z: (signed) size_t
60 #define MAXINT 0x00080 // j: intmax_t
61 #define PTRINT 0x00100 // t: ptrdiff_t
62 #define NOSKIP 0x00200 // [ or c: do not skip blanks
63 // Modifiers:
64 #define SUPPRESS 0x00400 // *: suppress assignment
65 #define UNSIGNED 0x00800 // %[oupxX] conversions
66 #define ALLOCATE 0x01000 // m: allocate a char*
67 // Internal use during integer parsing:
68 #define SIGNOK 0x02000 // +/- is (still) legal
69 #define HAVESIGN 0x04000 // Sign detected
70 #define NDIGITS 0x08000 // No digits detected
71 #define PFXOK 0x10000 // "0x" prefix is (still) legal
72 #define NZDIGITS 0x20000 // No zero digits detected
73
74 // Conversion types.
75 #define CT_CHAR 0 // %c conversion
76 #define CT_CCL 1 // %[...] conversion
77 #define CT_STRING 2 // %s conversion
78 #define CT_INT 3 // Integer: strtoimax/strtoumax
79 #define CT_FLOAT 4 // Float: strtod
80
81 static const unsigned char* __sccl(char*, const unsigned char*);
82
83 /*
84 * Internal, unlocked version of vfscanf
85 */
__svfscanf(FILE * fp,const char * fmt0,va_list ap)86 int __svfscanf(FILE* fp, const char* fmt0, va_list ap) {
87 const unsigned char* fmt = reinterpret_cast<const unsigned char*>(fmt0);
88 int c; /* character from format, or conversion */
89 size_t width; /* field width, or 0 */
90 char* p;
91 wchar_t* wcp;
92 size_t n;
93 int flags; /* flags as defined above */
94 int nassigned; /* number of fields assigned */
95 int nread; /* number of characters consumed from fp */
96 int base; /* base argument to strtoimax/strtouimax */
97 char ccltab[256]; /* character class table for %[...] */
98 char buf[BUF]; /* buffer for numeric conversions */
99 size_t nconv; /* length of multibyte sequence converted */
100 mbstate_t mbs;
101 void* allocation = nullptr; // Allocated but unassigned result for %mc/%ms/%m[.
102 size_t capacity = 0; // Number of char/wchar_t units allocated in `allocation`.
103
104 /* `basefix' is used to avoid `if' tests in the integer scanner */
105 static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
106
107 _SET_ORIENTATION(fp, -1);
108
109 nassigned = 0;
110 nread = 0;
111 for (;;) {
112 c = *fmt++;
113 if (c == 0) return nassigned;
114 if (isspace(c)) {
115 while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) nread++, fp->_r--, fp->_p++;
116 continue;
117 }
118 if (c != '%') goto literal;
119 width = 0;
120 flags = 0;
121 /*
122 * switch on the format. continue if done;
123 * break once format type is derived.
124 */
125 again:
126 c = *fmt++;
127 switch (c) {
128 case '%':
129 literal:
130 if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
131 if (*fp->_p != c) goto match_failure;
132 fp->_r--, fp->_p++;
133 nread++;
134 continue;
135
136 case '*':
137 flags |= SUPPRESS;
138 goto again;
139 case 'j':
140 flags |= MAXINT;
141 goto again;
142 case 'L':
143 flags |= LONGDBL;
144 goto again;
145 case 'h':
146 if (*fmt == 'h') {
147 fmt++;
148 flags |= SHORTSHORT;
149 } else {
150 flags |= SHORT;
151 }
152 goto again;
153 case 'l':
154 if (*fmt == 'l') {
155 fmt++;
156 flags |= LLONG;
157 } else {
158 flags |= LONG;
159 }
160 goto again;
161 case 'm':
162 flags |= ALLOCATE;
163 goto again;
164 case 'q':
165 flags |= LLONG; /* deprecated */
166 goto again;
167 case 't':
168 flags |= PTRINT;
169 goto again;
170 case 'z':
171 flags |= SIZEINT;
172 goto again;
173
174 case '0':
175 case '1':
176 case '2':
177 case '3':
178 case '4':
179 case '5':
180 case '6':
181 case '7':
182 case '8':
183 case '9':
184 width = width * 10 + c - '0';
185 goto again;
186
187 /*
188 * Conversions.
189 * Those marked `compat' are for 4.[123]BSD compatibility.
190 */
191 case 'D': /* compat */
192 flags |= LONG;
193 __BIONIC_FALLTHROUGH;
194 case 'd':
195 c = CT_INT;
196 base = 10;
197 break;
198
199 case 'i':
200 c = CT_INT;
201 base = 0;
202 break;
203
204 case 'O': /* compat */
205 flags |= LONG;
206 __BIONIC_FALLTHROUGH;
207 case 'o':
208 c = CT_INT;
209 flags |= UNSIGNED;
210 base = 8;
211 break;
212
213 case 'u':
214 c = CT_INT;
215 flags |= UNSIGNED;
216 base = 10;
217 break;
218
219 case 'X':
220 case 'x':
221 flags |= PFXOK; /* enable 0x prefixing */
222 c = CT_INT;
223 flags |= UNSIGNED;
224 base = 16;
225 break;
226
227 case 'e':
228 case 'E':
229 case 'f':
230 case 'F':
231 case 'g':
232 case 'G':
233 case 'a':
234 case 'A':
235 c = CT_FLOAT;
236 break;
237
238 case 's':
239 memset(ccltab, 1, 256);
240 ccltab['\t'] = ccltab['\n'] = ccltab['\v'] = ccltab['\f'] = ccltab['\r'] = ccltab[' '] = 0;
241 c = CT_STRING;
242 break;
243
244 case '[':
245 fmt = __sccl(ccltab, fmt);
246 flags |= NOSKIP;
247 c = CT_CCL;
248 break;
249
250 case 'c':
251 flags |= NOSKIP;
252 c = CT_CHAR;
253 break;
254
255 case 'p': /* pointer format is like hex */
256 flags |= POINTER | PFXOK;
257 c = CT_INT;
258 flags |= UNSIGNED;
259 base = 16;
260 break;
261
262 case 'n':
263 if (flags & SUPPRESS) continue;
264 if (flags & SHORTSHORT) {
265 *va_arg(ap, signed char*) = nread;
266 } else if (flags & SHORT) {
267 *va_arg(ap, short*) = nread;
268 } else if (flags & LONG) {
269 *va_arg(ap, long*) = nread;
270 } else if (flags & SIZEINT) {
271 *va_arg(ap, ssize_t*) = nread;
272 } else if (flags & PTRINT) {
273 *va_arg(ap, ptrdiff_t*) = nread;
274 } else if (flags & LLONG) {
275 *va_arg(ap, long long*) = nread;
276 } else if (flags & MAXINT) {
277 *va_arg(ap, intmax_t*) = nread;
278 } else {
279 *va_arg(ap, int*) = nread;
280 }
281 continue;
282
283 /*
284 * Disgusting backwards compatibility hacks. XXX
285 */
286 case '\0': /* compat */
287 return EOF;
288
289 default: /* compat */
290 if (isupper(c)) flags |= LONG;
291 c = CT_INT;
292 base = 10;
293 break;
294 }
295
296 if ((flags & ALLOCATE) != 0 && c > CT_STRING) {
297 __fortify_fatal("scanf 'm' only works with %%c/%%s/%%[");
298 }
299 if ((flags & (ALLOCATE|SUPPRESS)) == (ALLOCATE|SUPPRESS)) {
300 __fortify_fatal("scanf 'm' makes no sense with '*'");
301 }
302
303 /*
304 * We have a conversion that requires input.
305 */
306 if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
307
308 /*
309 * Consume leading white space, except for formats
310 * that suppress this.
311 */
312 if ((flags & NOSKIP) == 0) {
313 while (isspace(*fp->_p)) {
314 nread++;
315 if (--fp->_r > 0) {
316 fp->_p++;
317 } else if (__srefill(fp)) {
318 goto input_failure;
319 }
320 }
321 /*
322 * Note that there is at least one character in
323 * the buffer, so conversions that do not set NOSKIP
324 * ca no longer result in an input failure.
325 */
326 }
327
328 /*
329 * Do the conversion.
330 */
331 switch (c) {
332 case CT_CHAR:
333 /* scan arbitrary characters (sets NOSKIP) */
334 if (width == 0) width = 1;
335 if (flags & LONG) {
336 if (flags & ALLOCATE) {
337 allocation = wcp = reinterpret_cast<wchar_t*>(malloc(width * sizeof(wchar_t)));
338 if (allocation == nullptr) goto allocation_failure;
339 } else if (flags & SUPPRESS) {
340 wcp = nullptr;
341 } else {
342 wcp = va_arg(ap, wchar_t*);
343 }
344 size_t bytes = 0;
345 while (width != 0) {
346 if (bytes == MB_CUR_MAX) {
347 fp->_flags |= __SERR;
348 goto input_failure;
349 }
350 buf[bytes++] = *fp->_p;
351 fp->_p++;
352 fp->_r--;
353 memset(&mbs, 0, sizeof(mbs));
354 nconv = mbrtowc(wcp, buf, bytes, &mbs);
355 if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
356 fp->_flags |= __SERR;
357 goto input_failure;
358 }
359 if (nconv == 0 && !(flags & SUPPRESS)) *wcp = L'\0';
360 if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
361 nread += bytes;
362 width--;
363 if (!(flags & SUPPRESS)) wcp++;
364 bytes = 0;
365 }
366 if (fp->_r <= 0 && __srefill(fp)) {
367 if (bytes != 0) {
368 fp->_flags |= __SERR;
369 goto input_failure;
370 }
371 break;
372 }
373 }
374 if (allocation != nullptr) {
375 *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
376 allocation = nullptr;
377 }
378 if (!(flags & SUPPRESS)) nassigned++;
379 } else if (flags & SUPPRESS) {
380 size_t sum = 0;
381 for (;;) {
382 if ((n = fp->_r) < width) {
383 sum += n;
384 width -= n;
385 fp->_p += n;
386 if (__srefill(fp)) {
387 if (sum == 0) goto input_failure;
388 break;
389 }
390 } else {
391 sum += width;
392 fp->_r -= width;
393 fp->_p += width;
394 break;
395 }
396 }
397 nread += sum;
398 } else {
399 if (flags & ALLOCATE) {
400 allocation = p = reinterpret_cast<char*>(malloc(width));
401 if (allocation == nullptr) goto allocation_failure;
402 } else {
403 p = va_arg(ap, char*);
404 }
405 size_t r = fread(p, 1, width, fp);
406 if (r == 0) goto input_failure;
407 if (allocation != nullptr) {
408 *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
409 allocation = nullptr;
410 }
411 nread += r;
412 nassigned++;
413 }
414 break;
415
416 case CT_CCL:
417 case CT_STRING:
418 // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
419 // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
420 if (width == 0) width = SIZE_MAX;
421 if (flags & LONG) {
422 // TODO: since no-one cares, replace this with a simple fgetwc loop?
423 n = 0;
424 if (flags & ALLOCATE) {
425 capacity = MIN(width, 32);
426 allocation = wcp = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * capacity));
427 if (allocation == nullptr) goto allocation_failure;
428 } else if (flags & SUPPRESS) {
429 wcp = nullptr;
430 } else {
431 wcp = va_arg(ap, wchar_t*);
432 }
433 size_t bytes = 0;
434 while ((c == CT_CCL || !isspace(*fp->_p)) && width != 0) {
435 if (bytes == MB_CUR_MAX) {
436 fp->_flags |= __SERR;
437 goto input_failure;
438 }
439 buf[bytes++] = *fp->_p;
440 fp->_p++;
441 fp->_r--;
442 wchar_t wc = L'\0';
443 memset(&mbs, 0, sizeof(mbs));
444 nconv = mbrtowc(&wc, buf, bytes, &mbs);
445 if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
446 fp->_flags |= __SERR;
447 goto input_failure;
448 }
449 if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
450 if ((c == CT_CCL && wctob(wc) != EOF && !ccltab[wctob(wc)]) || (c == CT_STRING && iswspace(wc))) {
451 while (bytes != 0) {
452 bytes--;
453 ungetc(buf[bytes], fp);
454 }
455 break;
456 }
457 if (wcp) wcp[n] = wc;
458 n++;
459 if (allocation != nullptr && n == capacity) {
460 capacity *= 2;
461 wchar_t* new_allocation =
462 reinterpret_cast<wchar_t*>(realloc(allocation, sizeof(wchar_t) * capacity));
463 if (new_allocation == nullptr) goto allocation_failure;
464 allocation = wcp = new_allocation;
465 }
466 nread += bytes;
467 width--;
468 bytes = 0;
469 }
470 if (fp->_r <= 0 && __srefill(fp)) {
471 if (bytes != 0) {
472 fp->_flags |= __SERR;
473 goto input_failure;
474 }
475 break;
476 }
477 }
478 if (c == CT_CCL && bytes != 0) {
479 fp->_flags |= __SERR;
480 goto input_failure;
481 }
482 if (allocation != nullptr) {
483 *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
484 allocation = nullptr;
485 }
486 } else if (flags & SUPPRESS) {
487 n = 0;
488 while (ccltab[*fp->_p]) {
489 n++, fp->_r--, fp->_p++;
490 if (--width == 0) break;
491 if (fp->_r <= 0 && __srefill(fp)) {
492 if (c == CT_CCL && n == 0) goto input_failure;
493 break;
494 }
495 }
496 nread += n;
497 } else {
498 if (flags & ALLOCATE) {
499 capacity = MIN(width, 32);
500 allocation = p = reinterpret_cast<char*>(malloc(capacity));
501 if (allocation == nullptr) goto allocation_failure;
502 } else {
503 p = va_arg(ap, char*);
504 }
505 n = 0;
506 while (ccltab[*fp->_p]) {
507 fp->_r--;
508 p[n++] = *fp->_p++;
509 if (allocation != nullptr && n == capacity) {
510 capacity *= 2;
511 char* new_allocation = reinterpret_cast<char*>(realloc(allocation, capacity));
512 if (new_allocation == nullptr) goto allocation_failure;
513 allocation = p = new_allocation;
514 }
515 if (--width == 0) break;
516 if (fp->_r <= 0 && __srefill(fp)) {
517 if (c == CT_CCL && n == 0) goto input_failure;
518 break;
519 }
520 }
521 nread += n;
522 if (allocation != nullptr) {
523 *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
524 allocation = nullptr;
525 }
526 }
527 if (c == CT_CCL && n == 0) goto match_failure;
528 if (!(flags & SUPPRESS)) {
529 if (flags & LONG) {
530 wcp[n] = L'\0';
531 } else {
532 p[n] = '\0';
533 }
534 ++nassigned;
535 }
536 break;
537
538 case CT_INT:
539 /* scan an integer as if by strtoimax/strtoumax */
540 #ifdef hardway
541 if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
542 #else
543 /* size_t is unsigned, hence this optimisation */
544 if (--width > sizeof(buf) - 2) width = sizeof(buf) - 2;
545 width++;
546 #endif
547 flags |= SIGNOK | NDIGITS | NZDIGITS;
548 for (p = buf; width; width--) {
549 c = *fp->_p;
550 /*
551 * Switch on the character; `goto ok'
552 * if we accept it as a part of number.
553 */
554 switch (c) {
555 /*
556 * The digit 0 is always legal, but is
557 * special. For %i conversions, if no
558 * digits (zero or nonzero) have been
559 * scanned (only signs), we will have
560 * base==0. In that case, we should set
561 * it to 8 and enable 0x prefixing.
562 * Also, if we have not scanned zero digits
563 * before this, do not turn off prefixing
564 * (someone else will turn it off if we
565 * have scanned any nonzero digits).
566 */
567 case '0':
568 if (base == 0) {
569 base = 8;
570 flags |= PFXOK;
571 }
572 if (flags & NZDIGITS)
573 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
574 else
575 flags &= ~(SIGNOK | PFXOK | NDIGITS);
576 goto ok;
577
578 /* 1 through 7 always legal */
579 case '1':
580 case '2':
581 case '3':
582 case '4':
583 case '5':
584 case '6':
585 case '7':
586 base = basefix[base];
587 flags &= ~(SIGNOK | PFXOK | NDIGITS);
588 goto ok;
589
590 /* digits 8 and 9 ok iff decimal or hex */
591 case '8':
592 case '9':
593 base = basefix[base];
594 if (base <= 8) break; /* not legal here */
595 flags &= ~(SIGNOK | PFXOK | NDIGITS);
596 goto ok;
597
598 /* letters ok iff hex */
599 case 'A':
600 case 'B':
601 case 'C':
602 case 'D':
603 case 'E':
604 case 'F':
605 case 'a':
606 case 'b':
607 case 'c':
608 case 'd':
609 case 'e':
610 case 'f':
611 /* no need to fix base here */
612 if (base <= 10) break; /* not legal here */
613 flags &= ~(SIGNOK | PFXOK | NDIGITS);
614 goto ok;
615
616 /* sign ok only as first character */
617 case '+':
618 case '-':
619 if (flags & SIGNOK) {
620 flags &= ~SIGNOK;
621 flags |= HAVESIGN;
622 goto ok;
623 }
624 break;
625
626 /*
627 * x ok iff flag still set and 2nd char (or
628 * 3rd char if we have a sign).
629 */
630 case 'x':
631 case 'X':
632 if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
633 base = 16; /* if %i */
634 flags &= ~PFXOK;
635 goto ok;
636 }
637 break;
638 }
639
640 /*
641 * If we got here, c is not a legal character
642 * for a number. Stop accumulating digits.
643 */
644 break;
645 ok:
646 /*
647 * c is legal: store it and look at the next.
648 */
649 *p++ = c;
650 if (--fp->_r > 0)
651 fp->_p++;
652 else if (__srefill(fp))
653 break; /* EOF */
654 }
655 /*
656 * If we had only a sign, it is no good; push
657 * back the sign. If the number ends in `x',
658 * it was [sign] '0' 'x', so push back the x
659 * and treat it as [sign] '0'.
660 */
661 if (flags & NDIGITS) {
662 if (p > buf) (void)ungetc(*(u_char*)--p, fp);
663 goto match_failure;
664 }
665 c = ((u_char*)p)[-1];
666 if (c == 'x' || c == 'X') {
667 --p;
668 (void)ungetc(c, fp);
669 }
670 if ((flags & SUPPRESS) == 0) {
671 uintmax_t res;
672
673 *p = '\0';
674 if (flags & UNSIGNED) {
675 res = strtoumax(buf, nullptr, base);
676 } else {
677 res = strtoimax(buf, nullptr, base);
678 }
679 if (flags & POINTER) {
680 *va_arg(ap, void**) = (void*)(uintptr_t)res;
681 } else if (flags & MAXINT) {
682 *va_arg(ap, intmax_t*) = res;
683 } else if (flags & LLONG) {
684 *va_arg(ap, long long*) = res;
685 } else if (flags & SIZEINT) {
686 *va_arg(ap, ssize_t*) = res;
687 } else if (flags & PTRINT) {
688 *va_arg(ap, ptrdiff_t*) = res;
689 } else if (flags & LONG) {
690 *va_arg(ap, long*) = res;
691 } else if (flags & SHORT) {
692 *va_arg(ap, short*) = res;
693 } else if (flags & SHORTSHORT) {
694 *va_arg(ap, signed char*) = res;
695 } else {
696 *va_arg(ap, int*) = res;
697 }
698 nassigned++;
699 }
700 nread += p - buf;
701 break;
702
703 case CT_FLOAT:
704 /* scan a floating point number as if by strtod */
705 if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
706 if ((width = parsefloat(fp, buf, buf + width)) == 0) goto match_failure;
707 if ((flags & SUPPRESS) == 0) {
708 if (flags & LONGDBL) {
709 long double res = strtold(buf, &p);
710 *va_arg(ap, long double*) = res;
711 } else if (flags & LONG) {
712 double res = strtod(buf, &p);
713 *va_arg(ap, double*) = res;
714 } else {
715 float res = strtof(buf, &p);
716 *va_arg(ap, float*) = res;
717 }
718 if ((size_t)(p - buf) != width) abort();
719 nassigned++;
720 }
721 nread += width;
722 break;
723 }
724 }
725 allocation_failure:
726 input_failure:
727 free(allocation);
728 if (nassigned == 0) nassigned = -1;
729 match_failure:
730 return nassigned;
731 }
732
733 /*
734 * Fill in the given table from the scanset at the given format
735 * (just after `['). Return a pointer to the character past the
736 * closing `]'. The table has a 1 wherever characters should be
737 * considered part of the scanset.
738 */
__sccl(char * tab,const unsigned char * fmt)739 static const unsigned char* __sccl(char* tab, const unsigned char* fmt) {
740 int c, n, v;
741
742 /* first `clear' the whole table */
743 c = *fmt++; /* first char hat => negated scanset */
744 if (c == '^') {
745 v = 1; /* default => accept */
746 c = *fmt++; /* get new first char */
747 } else {
748 v = 0; /* default => reject */
749 }
750 memset(tab, v, 256);
751 if (c == 0) return (fmt - 1); /* format ended before closing ] */
752
753 /*
754 * Now set the entries corresponding to the actual scanset
755 * to the opposite of the above.
756 *
757 * The first character may be ']' (or '-') without being special;
758 * the last character may be '-'.
759 */
760 v = 1 - v;
761 for (;;) {
762 tab[c] = v; /* take character c */
763 doswitch:
764 n = *fmt++; /* and examine the next */
765 switch (n) {
766 case 0: /* format ended too soon */
767 return (fmt - 1);
768
769 case '-':
770 /*
771 * A scanset of the form
772 * [01+-]
773 * is defined as `the digit 0, the digit 1,
774 * the character +, the character -', but
775 * the effect of a scanset such as
776 * [a-zA-Z0-9]
777 * is implementation defined. The V7 Unix
778 * scanf treats `a-z' as `the letters a through
779 * z', but treats `a-a' as `the letter a, the
780 * character -, and the letter a'.
781 *
782 * For compatibility, the `-' is not considerd
783 * to define a range if the character following
784 * it is either a close bracket (required by ANSI)
785 * or is not numerically greater than the character
786 * we just stored in the table (c).
787 */
788 n = *fmt;
789 if (n == ']' || n < c) {
790 c = '-';
791 break; /* resume the for(;;) */
792 }
793 fmt++;
794 do { /* fill in the range */
795 tab[++c] = v;
796 } while (c < n);
797 #if 1 /* XXX another disgusting compatibility hack */
798 /*
799 * Alas, the V7 Unix scanf also treats formats
800 * such as [a-c-e] as `the letters a through e'.
801 * This too is permitted by the standard....
802 */
803 goto doswitch;
804 #else
805 c = *fmt++;
806 if (c == 0) return (fmt - 1);
807 if (c == ']') return (fmt);
808 #endif
809 break;
810
811 case ']': /* end of scanset */
812 return fmt;
813
814 default: /* just another character */
815 c = n;
816 break;
817 }
818 }
819 /* NOTREACHED */
820 }
821