1 /*
2 * vsscanf.c
3 *
4 * vsscanf(), from which the rest of the scanf()
5 * family is built
6 */
7
8 #include <ctype.h>
9 #include <stdarg.h>
10 #include <stddef.h>
11 #include <inttypes.h>
12 #include <string.h>
13 #include <limits.h>
14 #include <stdio.h>
15 #include <sys/bitops.h>
16
17 #ifndef LONG_BIT
18 #define LONG_BIT (CHAR_BIT*sizeof(long))
19 #endif
20
21 enum flags {
22 FL_SPLAT = 0x01, /* Drop the value, do not assign */
23 FL_WIDTH = 0x02, /* Field width specified */
24 FL_MINUS = 0x04, /* Negative number */
25 };
26
27 enum ranks {
28 rank_char = -2,
29 rank_short = -1,
30 rank_int = 0,
31 rank_long = 1,
32 rank_longlong = 2,
33 rank_ptr = INT_MAX /* Special value used for pointers */
34 };
35
36 #define MIN_RANK rank_char
37 #define MAX_RANK rank_longlong
38
39 #define INTMAX_RANK rank_longlong
40 #define SIZE_T_RANK rank_long
41 #define PTRDIFF_T_RANK rank_long
42
43 enum bail {
44 bail_none = 0, /* No error condition */
45 bail_eof, /* Hit EOF */
46 bail_err /* Conversion mismatch */
47 };
48
vsscanf(const char * buffer,const char * format,va_list ap)49 int vsscanf(const char *buffer, const char *format, va_list ap)
50 {
51 const char *p = format;
52 char ch;
53 const char *q = buffer;
54 const char *qq;
55 uintmax_t val = 0;
56 int rank = rank_int; /* Default rank */
57 unsigned int width = UINT_MAX;
58 int base;
59 enum flags flags = 0;
60 enum {
61 st_normal, /* Ground state */
62 st_flags, /* Special flags */
63 st_width, /* Field width */
64 st_modifiers, /* Length or conversion modifiers */
65 st_match_init, /* Initial state of %[ sequence */
66 st_match, /* Main state of %[ sequence */
67 st_match_range, /* After - in a %[ sequence */
68 } state = st_normal;
69 char *sarg = NULL; /* %s %c or %[ string argument */
70 enum bail bail = bail_none;
71 int converted = 0; /* Successful conversions */
72 unsigned long matchmap[((1 << CHAR_BIT) + (LONG_BIT - 1)) / LONG_BIT];
73 int matchinv = 0; /* Is match map inverted? */
74 unsigned char range_start = 0;
75
76 while ((ch = *p++) && !bail) {
77 switch (state) {
78 case st_normal:
79 if (ch == '%') {
80 state = st_flags;
81 flags = 0;
82 rank = rank_int;
83 width = UINT_MAX;
84 } else if (isspace((unsigned char)ch)) {
85 q = skipspace(q);
86 } else {
87 if (*q == ch)
88 q++;
89 else
90 bail = bail_err; /* Match failure */
91 }
92 break;
93
94 case st_flags:
95 switch (ch) {
96 case '*':
97 flags |= FL_SPLAT;
98 break;
99 case '0' ... '9':
100 width = (ch - '0');
101 state = st_width;
102 flags |= FL_WIDTH;
103 break;
104 default:
105 state = st_modifiers;
106 p--; /* Process this character again */
107 break;
108 }
109 break;
110
111 case st_width:
112 if (ch >= '0' && ch <= '9') {
113 width = width * 10 + (ch - '0');
114 } else {
115 state = st_modifiers;
116 p--; /* Process this character again */
117 }
118 break;
119
120 case st_modifiers:
121 switch (ch) {
122 /* Length modifiers - nonterminal sequences */
123 case 'h':
124 rank--; /* Shorter rank */
125 break;
126 case 'l':
127 rank++; /* Longer rank */
128 break;
129 case 'j':
130 rank = INTMAX_RANK;
131 break;
132 case 'z':
133 rank = SIZE_T_RANK;
134 break;
135 case 't':
136 rank = PTRDIFF_T_RANK;
137 break;
138 case 'L':
139 case 'q':
140 rank = rank_longlong; /* long double/long long */
141 break;
142
143 default:
144 /* Output modifiers - terminal sequences */
145 state = st_normal; /* Next state will be normal */
146 if (rank < MIN_RANK) /* Canonicalize rank */
147 rank = MIN_RANK;
148 else if (rank > MAX_RANK)
149 rank = MAX_RANK;
150
151 switch (ch) {
152 case 'P': /* Upper case pointer */
153 case 'p': /* Pointer */
154 #if 0 /* Enable this to allow null pointers by name */
155 q = skipspace(q);
156 if (!isdigit((unsigned char)*q)) {
157 static const char *const nullnames[] =
158 { "null", "nul", "nil", "(null)", "(nul)", "(nil)",
159 0 };
160 const char *const *np;
161
162 /* Check to see if it's a null pointer by name */
163 for (np = nullnames; *np; np++) {
164 if (!strncasecmp(q, *np, strlen(*np))) {
165 val = (uintmax_t) ((void *)NULL);
166 goto set_integer;
167 }
168 }
169 /* Failure */
170 bail = bail_err;
171 break;
172 }
173 /* else */
174 #endif
175 rank = rank_ptr;
176 base = 0;
177 goto scan_int;
178
179 case 'i': /* Base-independent integer */
180 base = 0;
181 goto scan_int;
182
183 case 'd': /* Decimal integer */
184 base = 10;
185 goto scan_int;
186
187 case 'o': /* Octal integer */
188 base = 8;
189 goto scan_int;
190
191 case 'u': /* Unsigned decimal integer */
192 base = 10;
193 goto scan_int;
194
195 case 'x': /* Hexadecimal integer */
196 case 'X':
197 base = 16;
198 goto scan_int;
199
200 case 'n': /* Number of characters consumed */
201 val = (q - buffer);
202 goto set_integer;
203
204 scan_int:
205 q = skipspace(q);
206 if (!*q) {
207 bail = bail_eof;
208 break;
209 }
210 val = strntoumax(q, (char **)&qq, base, width);
211 if (qq == q) {
212 bail = bail_err;
213 break;
214 }
215 q = qq;
216 converted++;
217 /* fall through */
218
219 set_integer:
220 if (!(flags & FL_SPLAT)) {
221 switch (rank) {
222 case rank_char:
223 *va_arg(ap, unsigned char *) = (unsigned char)val;
224 break;
225 case rank_short:
226 *va_arg(ap, unsigned short *) = (unsigned short)val;
227 break;
228 case rank_int:
229 *va_arg(ap, unsigned int *) = (unsigned int)val;
230 break;
231 case rank_long:
232 *va_arg(ap, unsigned long *) = (unsigned long)val;
233 break;
234 case rank_longlong:
235 *va_arg(ap, unsigned long long *) =
236 (unsigned long long)val;
237 break;
238 case rank_ptr:
239 *va_arg(ap, void **) = (void *)(uintptr_t) val;
240 break;
241 }
242 }
243 break;
244
245 case 'c': /* Character */
246 width = (flags & FL_WIDTH) ? width : 1; /* Default width == 1 */
247 sarg = va_arg(ap, char *);
248 while (width--) {
249 if (!*q) {
250 bail = bail_eof;
251 break;
252 }
253 *sarg++ = *q++;
254 }
255 if (!bail)
256 converted++;
257 break;
258
259 case 's': /* String */
260 {
261 char *sp;
262 sp = sarg = va_arg(ap, char *);
263 while (width-- && *q && !isspace((unsigned char)*q)) {
264 *sp++ = *q++;
265 }
266 if (sarg != sp) {
267 *sp = '\0'; /* Terminate output */
268 converted++;
269 } else {
270 bail = bail_eof;
271 }
272 }
273 break;
274
275 case '[': /* Character range */
276 sarg = va_arg(ap, char *);
277 state = st_match_init;
278 matchinv = 0;
279 memset(matchmap, 0, sizeof matchmap);
280 break;
281
282 case '%': /* %% sequence */
283 if (*q == '%')
284 q++;
285 else
286 bail = bail_err;
287 break;
288
289 default: /* Anything else */
290 bail = bail_err; /* Unknown sequence */
291 break;
292 }
293 }
294 break;
295
296 case st_match_init: /* Initial state for %[ match */
297 if (ch == '^' && !matchinv) {
298 matchinv = 1;
299 } else {
300 range_start = (unsigned char)ch;
301 set_bit((unsigned char)ch, matchmap);
302 state = st_match;
303 }
304 break;
305
306 case st_match: /* Main state for %[ match */
307 if (ch == ']') {
308 goto match_run;
309 } else if (ch == '-') {
310 state = st_match_range;
311 } else {
312 range_start = (unsigned char)ch;
313 set_bit((unsigned char)ch, matchmap);
314 }
315 break;
316
317 case st_match_range: /* %[ match after - */
318 if (ch == ']') {
319 set_bit((unsigned char)'-', matchmap); /* - was last character */
320 goto match_run;
321 } else {
322 int i;
323 for (i = range_start; i <= (unsigned char)ch; i++)
324 set_bit(i, matchmap);
325 state = st_match;
326 }
327 break;
328
329 match_run: /* Match expression finished */
330 qq = q;
331 while (width && *q
332 && test_bit((unsigned char)*q, matchmap) ^ matchinv) {
333 *sarg++ = *q++;
334 }
335 if (q != qq) {
336 *sarg = '\0';
337 converted++;
338 } else {
339 bail = *q ? bail_err : bail_eof;
340 }
341 break;
342 }
343 }
344
345 if (bail == bail_eof && !converted)
346 converted = -1; /* Return EOF (-1) */
347
348 return converted;
349 }
350