1 /* $OpenBSD: vfscanf.c,v 1.31 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <inttypes.h>
35 #include <stdarg.h>
36 #include <stddef.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/param.h>
41 #include <wctype.h>
42 #include "local.h"
43
44 #include <private/bionic_ctype.h>
45 #include <private/bionic_fortify.h>
46 #include <private/bionic_mbstate.h>
47
48 #define BUF 513 /* Maximum length of numeric string. */
49
50 // Flags used during conversion.
51 // Size/type:
52 #define LONG 0x00001 // l: long or double
53 #define LONGDBL 0x00002 // L: long double
54 #define SHORT 0x00004 // h: short
55 #define SHORTSHORT 0x00008 // hh: 8 bit integer
56 #define LLONG 0x00010 // ll: long long (+ deprecated q: quad)
57 #define POINTER 0x00020 // p: void* (as hex)
58 #define SIZEINT 0x00040 // z: (signed) size_t
59 #define MAXINT 0x00080 // j: intmax_t
60 #define PTRINT 0x00100 // t: ptrdiff_t
61 #define NOSKIP 0x00200 // [ or c: do not skip blanks
62 // Modifiers:
63 #define SUPPRESS 0x00400 // *: suppress assignment
64 #define UNSIGNED 0x00800 // %[oupxX] conversions
65 #define ALLOCATE 0x01000 // m: allocate a char*
66 // Internal use during integer parsing:
67 #define SIGNOK 0x02000 // +/- is (still) legal
68 #define HAVESIGN 0x04000 // Sign detected
69 #define NDIGITS 0x08000 // No digits detected
70 #define PFXOK 0x10000 // "0x" prefix is (still) legal
71 #define NZDIGITS 0x20000 // No zero digits detected
72
73 // Conversion types.
74 #define CT_CHAR 0 // %c conversion
75 #define CT_CCL 1 // %[...] conversion
76 #define CT_STRING 2 // %s conversion
77 #define CT_INT 3 // Integer: strtoimax/strtoumax
78 #define CT_FLOAT 4 // Float: strtod
79
80 static const unsigned char* __sccl(char*, const unsigned char*);
81
82 /*
83 * Internal, unlocked version of vfscanf
84 */
__svfscanf(FILE * fp,const char * fmt0,va_list ap)85 int __svfscanf(FILE* fp, const char* fmt0, va_list ap) {
86 const unsigned char* fmt = reinterpret_cast<const unsigned char*>(fmt0);
87 int c; /* character from format, or conversion */
88 size_t width; /* field width, or 0 */
89 char* p;
90 wchar_t* wcp;
91 size_t n;
92 int flags; /* flags as defined above */
93 int nassigned; /* number of fields assigned */
94 int nread; /* number of characters consumed from fp */
95 int base; /* base argument to strtoimax/strtouimax */
96 char ccltab[256]; /* character class table for %[...] */
97 char buf[BUF]; /* buffer for numeric conversions */
98 size_t nconv; /* length of multibyte sequence converted */
99 mbstate_t mbs;
100 void* allocation = NULL; // Allocated but unassigned result for %mc/%ms/%m[.
101 size_t capacity = 0; // Number of char/wchar_t units allocated in `allocation`.
102
103 /* `basefix' is used to avoid `if' tests in the integer scanner */
104 static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
105
106 _SET_ORIENTATION(fp, -1);
107
108 nassigned = 0;
109 nread = 0;
110 for (;;) {
111 c = *fmt++;
112 if (c == 0) return nassigned;
113 if (IsSpace(c)) {
114 while ((fp->_r > 0 || __srefill(fp) == 0) && IsSpace(*fp->_p)) nread++, fp->_r--, fp->_p++;
115 continue;
116 }
117 if (c != '%') goto literal;
118 width = 0;
119 flags = 0;
120 /*
121 * switch on the format. continue if done;
122 * break once format type is derived.
123 */
124 again:
125 c = *fmt++;
126 switch (c) {
127 case '%':
128 literal:
129 if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
130 if (*fp->_p != c) goto match_failure;
131 fp->_r--, fp->_p++;
132 nread++;
133 continue;
134
135 case '*':
136 flags |= SUPPRESS;
137 goto again;
138 case 'j':
139 flags |= MAXINT;
140 goto again;
141 case 'L':
142 flags |= LONGDBL;
143 goto again;
144 case 'h':
145 if (*fmt == 'h') {
146 fmt++;
147 flags |= SHORTSHORT;
148 } else {
149 flags |= SHORT;
150 }
151 goto again;
152 case 'l':
153 if (*fmt == 'l') {
154 fmt++;
155 flags |= LLONG;
156 } else {
157 flags |= LONG;
158 }
159 goto again;
160 case 'm':
161 flags |= ALLOCATE;
162 goto again;
163 case 'q':
164 flags |= LLONG; /* deprecated */
165 goto again;
166 case 't':
167 flags |= PTRINT;
168 goto again;
169 case 'z':
170 flags |= SIZEINT;
171 goto again;
172
173 case '0':
174 case '1':
175 case '2':
176 case '3':
177 case '4':
178 case '5':
179 case '6':
180 case '7':
181 case '8':
182 case '9':
183 width = width * 10 + c - '0';
184 goto again;
185
186 /*
187 * Conversions.
188 * Those marked `compat' are for 4.[123]BSD compatibility.
189 */
190 case 'D': /* compat */
191 flags |= LONG;
192 /* FALLTHROUGH */
193 case 'd':
194 c = CT_INT;
195 base = 10;
196 break;
197
198 case 'i':
199 c = CT_INT;
200 base = 0;
201 break;
202
203 case 'O': /* compat */
204 flags |= LONG;
205 /* FALLTHROUGH */
206 case 'o':
207 c = CT_INT;
208 flags |= UNSIGNED;
209 base = 8;
210 break;
211
212 case 'u':
213 c = CT_INT;
214 flags |= UNSIGNED;
215 base = 10;
216 break;
217
218 case 'X':
219 case 'x':
220 flags |= PFXOK; /* enable 0x prefixing */
221 c = CT_INT;
222 flags |= UNSIGNED;
223 base = 16;
224 break;
225
226 case 'e':
227 case 'E':
228 case 'f':
229 case 'F':
230 case 'g':
231 case 'G':
232 case 'a':
233 case 'A':
234 c = CT_FLOAT;
235 break;
236
237 case 's':
238 memset(ccltab, 1, 256);
239 ccltab['\t'] = ccltab['\n'] = ccltab['\v'] = ccltab['\f'] = ccltab['\r'] = ccltab[' '] = 0;
240 c = CT_STRING;
241 break;
242
243 case '[':
244 fmt = __sccl(ccltab, fmt);
245 flags |= NOSKIP;
246 c = CT_CCL;
247 break;
248
249 case 'c':
250 flags |= NOSKIP;
251 c = CT_CHAR;
252 break;
253
254 case 'p': /* pointer format is like hex */
255 flags |= POINTER | PFXOK;
256 c = CT_INT;
257 flags |= UNSIGNED;
258 base = 16;
259 break;
260
261 case 'n':
262 if (flags & SUPPRESS) continue;
263 if (flags & SHORTSHORT) {
264 *va_arg(ap, signed char*) = nread;
265 } else if (flags & SHORT) {
266 *va_arg(ap, short*) = nread;
267 } else if (flags & LONG) {
268 *va_arg(ap, long*) = nread;
269 } else if (flags & SIZEINT) {
270 *va_arg(ap, ssize_t*) = nread;
271 } else if (flags & PTRINT) {
272 *va_arg(ap, ptrdiff_t*) = nread;
273 } else if (flags & LLONG) {
274 *va_arg(ap, long long*) = nread;
275 } else if (flags & MAXINT) {
276 *va_arg(ap, intmax_t*) = nread;
277 } else {
278 *va_arg(ap, int*) = nread;
279 }
280 continue;
281
282 /*
283 * Disgusting backwards compatibility hacks. XXX
284 */
285 case '\0': /* compat */
286 return EOF;
287
288 default: /* compat */
289 if (IsUpper(c)) flags |= LONG;
290 c = CT_INT;
291 base = 10;
292 break;
293 }
294
295 if ((flags & ALLOCATE) != 0 && c > CT_STRING) {
296 __fortify_fatal("scanf 'm' only works with %%c/%%s/%%[");
297 }
298 if ((flags & (ALLOCATE|SUPPRESS)) == (ALLOCATE|SUPPRESS)) {
299 __fortify_fatal("scanf 'm' makes no sense with '*'");
300 }
301
302 /*
303 * We have a conversion that requires input.
304 */
305 if (fp->_r <= 0 && __srefill(fp)) goto input_failure;
306
307 /*
308 * Consume leading white space, except for formats
309 * that suppress this.
310 */
311 if ((flags & NOSKIP) == 0) {
312 while (IsSpace(*fp->_p)) {
313 nread++;
314 if (--fp->_r > 0) {
315 fp->_p++;
316 } else if (__srefill(fp)) {
317 goto input_failure;
318 }
319 }
320 /*
321 * Note that there is at least one character in
322 * the buffer, so conversions that do not set NOSKIP
323 * ca no longer result in an input failure.
324 */
325 }
326
327 /*
328 * Do the conversion.
329 */
330 switch (c) {
331 case CT_CHAR:
332 /* scan arbitrary characters (sets NOSKIP) */
333 if (width == 0) width = 1;
334 if (flags & LONG) {
335 if (flags & ALLOCATE) {
336 allocation = wcp = reinterpret_cast<wchar_t*>(malloc(width * sizeof(wchar_t)));
337 if (allocation == NULL) goto allocation_failure;
338 } else if (flags & SUPPRESS) {
339 wcp = NULL;
340 } else {
341 wcp = va_arg(ap, wchar_t*);
342 }
343 size_t bytes = 0;
344 while (width != 0) {
345 if (bytes == MB_CUR_MAX) {
346 fp->_flags |= __SERR;
347 goto input_failure;
348 }
349 buf[bytes++] = *fp->_p;
350 fp->_p++;
351 fp->_r--;
352 memset(&mbs, 0, sizeof(mbs));
353 nconv = mbrtowc(wcp, buf, bytes, &mbs);
354 if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
355 fp->_flags |= __SERR;
356 goto input_failure;
357 }
358 if (nconv == 0 && !(flags & SUPPRESS)) *wcp = L'\0';
359 if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
360 nread += bytes;
361 width--;
362 if (!(flags & SUPPRESS)) wcp++;
363 bytes = 0;
364 }
365 if (fp->_r <= 0 && __srefill(fp)) {
366 if (bytes != 0) {
367 fp->_flags |= __SERR;
368 goto input_failure;
369 }
370 break;
371 }
372 }
373 if (allocation != NULL) {
374 *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
375 allocation = NULL;
376 }
377 if (!(flags & SUPPRESS)) nassigned++;
378 } else if (flags & SUPPRESS) {
379 size_t sum = 0;
380 for (;;) {
381 if ((n = fp->_r) < width) {
382 sum += n;
383 width -= n;
384 fp->_p += n;
385 if (__srefill(fp)) {
386 if (sum == 0) goto input_failure;
387 break;
388 }
389 } else {
390 sum += width;
391 fp->_r -= width;
392 fp->_p += width;
393 break;
394 }
395 }
396 nread += sum;
397 } else {
398 if (flags & ALLOCATE) {
399 allocation = p = reinterpret_cast<char*>(malloc(width));
400 if (allocation == NULL) goto allocation_failure;
401 } else {
402 p = va_arg(ap, char*);
403 }
404 size_t r = fread(p, 1, width, fp);
405 if (r == 0) goto input_failure;
406 if (allocation != NULL) {
407 *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
408 allocation = NULL;
409 }
410 nread += r;
411 nassigned++;
412 }
413 break;
414
415 case CT_CCL:
416 case CT_STRING:
417 // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
418 // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
419 if (width == 0) width = SIZE_MAX;
420 if (flags & LONG) {
421 // TODO: since no-one cares, replace this with a simple fgetwc loop?
422 n = 0;
423 if (flags & ALLOCATE) {
424 capacity = MIN(width, 32);
425 allocation = wcp = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * capacity));
426 if (allocation == NULL) goto allocation_failure;
427 } else if (flags & SUPPRESS) {
428 wcp = NULL;
429 } else {
430 wcp = va_arg(ap, wchar_t*);
431 }
432 size_t bytes = 0;
433 while ((c == CT_CCL || !IsSpace(*fp->_p)) && width != 0) {
434 if (bytes == MB_CUR_MAX) {
435 fp->_flags |= __SERR;
436 goto input_failure;
437 }
438 buf[bytes++] = *fp->_p;
439 fp->_p++;
440 fp->_r--;
441 wchar_t wc = L'\0';
442 memset(&mbs, 0, sizeof(mbs));
443 nconv = mbrtowc(&wc, buf, bytes, &mbs);
444 if (nconv == __MB_ERR_ILLEGAL_SEQUENCE) {
445 fp->_flags |= __SERR;
446 goto input_failure;
447 }
448 if (nconv != __MB_ERR_INCOMPLETE_SEQUENCE) {
449 if ((c == CT_CCL && wctob(wc) != EOF && !ccltab[wctob(wc)]) || (c == CT_STRING && iswspace(wc))) {
450 while (bytes != 0) {
451 bytes--;
452 ungetc(buf[bytes], fp);
453 }
454 break;
455 }
456 if (wcp) wcp[n] = wc;
457 n++;
458 if (allocation != NULL && n == capacity) {
459 capacity *= 2;
460 wchar_t* new_allocation =
461 reinterpret_cast<wchar_t*>(realloc(allocation, sizeof(wchar_t) * capacity));
462 if (new_allocation == NULL) goto allocation_failure;
463 allocation = wcp = new_allocation;
464 }
465 nread += bytes;
466 width--;
467 bytes = 0;
468 }
469 if (fp->_r <= 0 && __srefill(fp)) {
470 if (bytes != 0) {
471 fp->_flags |= __SERR;
472 goto input_failure;
473 }
474 break;
475 }
476 }
477 if (c == CT_CCL && bytes != 0) {
478 fp->_flags |= __SERR;
479 goto input_failure;
480 }
481 if (allocation != NULL) {
482 *va_arg(ap, wchar_t**) = reinterpret_cast<wchar_t*>(allocation);
483 allocation = NULL;
484 }
485 } else if (flags & SUPPRESS) {
486 n = 0;
487 while (ccltab[*fp->_p]) {
488 n++, fp->_r--, fp->_p++;
489 if (--width == 0) break;
490 if (fp->_r <= 0 && __srefill(fp)) {
491 if (c == CT_CCL && n == 0) goto input_failure;
492 break;
493 }
494 }
495 nread += n;
496 } else {
497 if (flags & ALLOCATE) {
498 capacity = MIN(width, 32);
499 allocation = p = reinterpret_cast<char*>(malloc(capacity));
500 if (allocation == NULL) goto allocation_failure;
501 } else {
502 p = va_arg(ap, char*);
503 }
504 n = 0;
505 while (ccltab[*fp->_p]) {
506 fp->_r--;
507 p[n++] = *fp->_p++;
508 if (allocation != NULL && n == capacity) {
509 capacity *= 2;
510 char* new_allocation = reinterpret_cast<char*>(realloc(allocation, capacity));
511 if (new_allocation == NULL) goto allocation_failure;
512 allocation = p = new_allocation;
513 }
514 if (--width == 0) break;
515 if (fp->_r <= 0 && __srefill(fp)) {
516 if (c == CT_CCL && n == 0) goto input_failure;
517 break;
518 }
519 }
520 nread += n;
521 if (allocation != NULL) {
522 *va_arg(ap, char**) = reinterpret_cast<char*>(allocation);
523 allocation = NULL;
524 }
525 }
526 if (c == CT_CCL && n == 0) goto match_failure;
527 if (!(flags & SUPPRESS)) {
528 if (flags & LONG) {
529 wcp[n] = L'\0';
530 } else {
531 p[n] = '\0';
532 }
533 ++nassigned;
534 }
535 break;
536
537 case CT_INT:
538 /* scan an integer as if by strtoimax/strtoumax */
539 #ifdef hardway
540 if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
541 #else
542 /* size_t is unsigned, hence this optimisation */
543 if (--width > sizeof(buf) - 2) width = sizeof(buf) - 2;
544 width++;
545 #endif
546 flags |= SIGNOK | NDIGITS | NZDIGITS;
547 for (p = buf; width; width--) {
548 c = *fp->_p;
549 /*
550 * Switch on the character; `goto ok'
551 * if we accept it as a part of number.
552 */
553 switch (c) {
554 /*
555 * The digit 0 is always legal, but is
556 * special. For %i conversions, if no
557 * digits (zero or nonzero) have been
558 * scanned (only signs), we will have
559 * base==0. In that case, we should set
560 * it to 8 and enable 0x prefixing.
561 * Also, if we have not scanned zero digits
562 * before this, do not turn off prefixing
563 * (someone else will turn it off if we
564 * have scanned any nonzero digits).
565 */
566 case '0':
567 if (base == 0) {
568 base = 8;
569 flags |= PFXOK;
570 }
571 if (flags & NZDIGITS)
572 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
573 else
574 flags &= ~(SIGNOK | PFXOK | NDIGITS);
575 goto ok;
576
577 /* 1 through 7 always legal */
578 case '1':
579 case '2':
580 case '3':
581 case '4':
582 case '5':
583 case '6':
584 case '7':
585 base = basefix[base];
586 flags &= ~(SIGNOK | PFXOK | NDIGITS);
587 goto ok;
588
589 /* digits 8 and 9 ok iff decimal or hex */
590 case '8':
591 case '9':
592 base = basefix[base];
593 if (base <= 8) break; /* not legal here */
594 flags &= ~(SIGNOK | PFXOK | NDIGITS);
595 goto ok;
596
597 /* letters ok iff hex */
598 case 'A':
599 case 'B':
600 case 'C':
601 case 'D':
602 case 'E':
603 case 'F':
604 case 'a':
605 case 'b':
606 case 'c':
607 case 'd':
608 case 'e':
609 case 'f':
610 /* no need to fix base here */
611 if (base <= 10) break; /* not legal here */
612 flags &= ~(SIGNOK | PFXOK | NDIGITS);
613 goto ok;
614
615 /* sign ok only as first character */
616 case '+':
617 case '-':
618 if (flags & SIGNOK) {
619 flags &= ~SIGNOK;
620 flags |= HAVESIGN;
621 goto ok;
622 }
623 break;
624
625 /*
626 * x ok iff flag still set and 2nd char (or
627 * 3rd char if we have a sign).
628 */
629 case 'x':
630 case 'X':
631 if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
632 base = 16; /* if %i */
633 flags &= ~PFXOK;
634 goto ok;
635 }
636 break;
637 }
638
639 /*
640 * If we got here, c is not a legal character
641 * for a number. Stop accumulating digits.
642 */
643 break;
644 ok:
645 /*
646 * c is legal: store it and look at the next.
647 */
648 *p++ = c;
649 if (--fp->_r > 0)
650 fp->_p++;
651 else if (__srefill(fp))
652 break; /* EOF */
653 }
654 /*
655 * If we had only a sign, it is no good; push
656 * back the sign. If the number ends in `x',
657 * it was [sign] '0' 'x', so push back the x
658 * and treat it as [sign] '0'.
659 */
660 if (flags & NDIGITS) {
661 if (p > buf) (void)ungetc(*(u_char*)--p, fp);
662 goto match_failure;
663 }
664 c = ((u_char*)p)[-1];
665 if (c == 'x' || c == 'X') {
666 --p;
667 (void)ungetc(c, fp);
668 }
669 if ((flags & SUPPRESS) == 0) {
670 uintmax_t res;
671
672 *p = '\0';
673 if (flags & UNSIGNED) {
674 res = strtoumax(buf, NULL, base);
675 } else {
676 res = strtoimax(buf, NULL, base);
677 }
678 if (flags & POINTER) {
679 *va_arg(ap, void**) = (void*)(uintptr_t)res;
680 } else if (flags & MAXINT) {
681 *va_arg(ap, intmax_t*) = res;
682 } else if (flags & LLONG) {
683 *va_arg(ap, long long*) = res;
684 } else if (flags & SIZEINT) {
685 *va_arg(ap, ssize_t*) = res;
686 } else if (flags & PTRINT) {
687 *va_arg(ap, ptrdiff_t*) = res;
688 } else if (flags & LONG) {
689 *va_arg(ap, long*) = res;
690 } else if (flags & SHORT) {
691 *va_arg(ap, short*) = res;
692 } else if (flags & SHORTSHORT) {
693 *va_arg(ap, signed char*) = res;
694 } else {
695 *va_arg(ap, int*) = res;
696 }
697 nassigned++;
698 }
699 nread += p - buf;
700 break;
701
702 case CT_FLOAT:
703 /* scan a floating point number as if by strtod */
704 if (width == 0 || width > sizeof(buf) - 1) width = sizeof(buf) - 1;
705 if ((width = parsefloat(fp, buf, buf + width)) == 0) goto match_failure;
706 if ((flags & SUPPRESS) == 0) {
707 if (flags & LONGDBL) {
708 long double res = strtold(buf, &p);
709 *va_arg(ap, long double*) = res;
710 } else if (flags & LONG) {
711 double res = strtod(buf, &p);
712 *va_arg(ap, double*) = res;
713 } else {
714 float res = strtof(buf, &p);
715 *va_arg(ap, float*) = res;
716 }
717 if ((size_t)(p - buf) != width) abort();
718 nassigned++;
719 }
720 nread += width;
721 break;
722 }
723 }
724 allocation_failure:
725 input_failure:
726 free(allocation);
727 if (nassigned == 0) nassigned = -1;
728 match_failure:
729 return nassigned;
730 }
731
732 /*
733 * Fill in the given table from the scanset at the given format
734 * (just after `['). Return a pointer to the character past the
735 * closing `]'. The table has a 1 wherever characters should be
736 * considered part of the scanset.
737 */
__sccl(char * tab,const unsigned char * fmt)738 static const unsigned char* __sccl(char* tab, const unsigned char* fmt) {
739 int c, n, v;
740
741 /* first `clear' the whole table */
742 c = *fmt++; /* first char hat => negated scanset */
743 if (c == '^') {
744 v = 1; /* default => accept */
745 c = *fmt++; /* get new first char */
746 } else {
747 v = 0; /* default => reject */
748 }
749 memset(tab, v, 256);
750 if (c == 0) return (fmt - 1); /* format ended before closing ] */
751
752 /*
753 * Now set the entries corresponding to the actual scanset
754 * to the opposite of the above.
755 *
756 * The first character may be ']' (or '-') without being special;
757 * the last character may be '-'.
758 */
759 v = 1 - v;
760 for (;;) {
761 tab[c] = v; /* take character c */
762 doswitch:
763 n = *fmt++; /* and examine the next */
764 switch (n) {
765 case 0: /* format ended too soon */
766 return (fmt - 1);
767
768 case '-':
769 /*
770 * A scanset of the form
771 * [01+-]
772 * is defined as `the digit 0, the digit 1,
773 * the character +, the character -', but
774 * the effect of a scanset such as
775 * [a-zA-Z0-9]
776 * is implementation defined. The V7 Unix
777 * scanf treats `a-z' as `the letters a through
778 * z', but treats `a-a' as `the letter a, the
779 * character -, and the letter a'.
780 *
781 * For compatibility, the `-' is not considerd
782 * to define a range if the character following
783 * it is either a close bracket (required by ANSI)
784 * or is not numerically greater than the character
785 * we just stored in the table (c).
786 */
787 n = *fmt;
788 if (n == ']' || n < c) {
789 c = '-';
790 break; /* resume the for(;;) */
791 }
792 fmt++;
793 do { /* fill in the range */
794 tab[++c] = v;
795 } while (c < n);
796 #if 1 /* XXX another disgusting compatibility hack */
797 /*
798 * Alas, the V7 Unix scanf also treats formats
799 * such as [a-c-e] as `the letters a through e'.
800 * This too is permitted by the standard....
801 */
802 goto doswitch;
803 #else
804 c = *fmt++;
805 if (c == 0) return (fmt - 1);
806 if (c == ']') return (fmt);
807 #endif
808 break;
809
810 case ']': /* end of scanset */
811 return fmt;
812
813 default: /* just another character */
814 c = n;
815 break;
816 }
817 }
818 /* NOTREACHED */
819 }
820