• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1998-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File uscnnf_p.c
12 *
13 * Modification History:
14 *
15 *   Date        Name        Description
16 *   12/02/98    stephen        Creation.
17 *   03/13/99    stephen     Modified for new C API.
18 *******************************************************************************
19 */
20 
21 #include "unicode/utypes.h"
22 
23 #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION
24 
25 #include "unicode/uchar.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unum.h"
28 #include "unicode/udat.h"
29 #include "unicode/uset.h"
30 #include "uscanf.h"
31 #include "ufmt_cmn.h"
32 #include "ufile.h"
33 #include "locbund.h"
34 
35 #include "cmemory.h"
36 #include "ustr_cnv.h"
37 
38 /* flag characters for u_scanf */
39 #define FLAG_ASTERISK 0x002A
40 #define FLAG_PAREN 0x0028
41 
42 #define ISFLAG(s)    (s) == FLAG_ASTERISK || \
43             (s) == FLAG_PAREN
44 
45 /* special characters for u_scanf */
46 #define SPEC_DOLLARSIGN 0x0024
47 
48 /* unicode digits */
49 #define DIGIT_ZERO 0x0030
50 #define DIGIT_ONE 0x0031
51 #define DIGIT_TWO 0x0032
52 #define DIGIT_THREE 0x0033
53 #define DIGIT_FOUR 0x0034
54 #define DIGIT_FIVE 0x0035
55 #define DIGIT_SIX 0x0036
56 #define DIGIT_SEVEN 0x0037
57 #define DIGIT_EIGHT 0x0038
58 #define DIGIT_NINE 0x0039
59 
60 #define ISDIGIT(s)    (s) == DIGIT_ZERO || \
61             (s) == DIGIT_ONE || \
62             (s) == DIGIT_TWO || \
63             (s) == DIGIT_THREE || \
64             (s) == DIGIT_FOUR || \
65             (s) == DIGIT_FIVE || \
66             (s) == DIGIT_SIX || \
67             (s) == DIGIT_SEVEN || \
68             (s) == DIGIT_EIGHT || \
69             (s) == DIGIT_NINE
70 
71 /* u_scanf modifiers */
72 #define MOD_H 0x0068
73 #define MOD_LOWERL 0x006C
74 #define MOD_L 0x004C
75 
76 #define ISMOD(s)    (s) == MOD_H || \
77             (s) == MOD_LOWERL || \
78             (s) == MOD_L
79 
80 /**
81  * Struct encapsulating a single uscanf format specification.
82  */
83 typedef struct u_scanf_spec_info {
84     int32_t fWidth;         /* Width  */
85 
86     UChar   fSpec;          /* Format specification  */
87 
88     UChar   fPadChar;       /* Padding character  */
89 
90     UBool   fSkipArg;       /* TRUE if arg should be skipped */
91     UBool   fIsLongDouble;  /* L flag  */
92     UBool   fIsShort;       /* h flag  */
93     UBool   fIsLong;        /* l flag  */
94     UBool   fIsLongLong;    /* ll flag  */
95     UBool   fIsString;      /* TRUE if this is a NULL-terminated string. */
96 } u_scanf_spec_info;
97 
98 
99 /**
100  * Struct encapsulating a single u_scanf format specification.
101  */
102 typedef struct u_scanf_spec {
103     u_scanf_spec_info    fInfo;        /* Information on this spec */
104     int32_t        fArgPos;    /* Position of data in arg list */
105 } u_scanf_spec;
106 
107 /**
108  * Parse a single u_scanf format specifier in Unicode.
109  * @param fmt A pointer to a '%' character in a u_scanf format specification.
110  * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
111  * format specifier.
112  * @return The number of characters contained in this specifier.
113  */
114 static int32_t
u_scanf_parse_spec(const UChar * fmt,u_scanf_spec * spec)115 u_scanf_parse_spec (const UChar     *fmt,
116             u_scanf_spec    *spec)
117 {
118     const UChar *s = fmt;
119     const UChar *backup;
120     u_scanf_spec_info *info = &(spec->fInfo);
121 
122     /* initialize spec to default values */
123     spec->fArgPos             = -1;
124 
125     info->fWidth        = -1;
126     info->fSpec         = 0x0000;
127     info->fPadChar      = 0x0020;
128     info->fSkipArg      = FALSE;
129     info->fIsLongDouble = FALSE;
130     info->fIsShort      = FALSE;
131     info->fIsLong       = FALSE;
132     info->fIsLongLong   = FALSE;
133     info->fIsString     = TRUE;
134 
135 
136     /* skip over the initial '%' */
137     s++;
138 
139     /* Check for positional argument */
140     if(ISDIGIT(*s)) {
141 
142         /* Save the current position */
143         backup = s;
144 
145         /* handle positional parameters */
146         if(ISDIGIT(*s)) {
147             spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
148 
149             while(ISDIGIT(*s)) {
150                 spec->fArgPos *= 10;
151                 spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
152             }
153         }
154 
155         /* if there is no '$', don't read anything */
156         if(*s != SPEC_DOLLARSIGN) {
157             spec->fArgPos = -1;
158             s = backup;
159         }
160         /* munge the '$' */
161         else
162             s++;
163     }
164 
165     /* Get any format flags */
166     while(ISFLAG(*s)) {
167         switch(*s++) {
168 
169             /* skip argument */
170         case FLAG_ASTERISK:
171             info->fSkipArg = TRUE;
172             break;
173 
174             /* pad character specified */
175         case FLAG_PAREN:
176 
177             /* first four characters are hex values for pad char */
178             info->fPadChar = (UChar)ufmt_digitvalue(*s++);
179             info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180             info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
181             info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
182 
183             /* final character is ignored */
184             s++;
185 
186             break;
187         }
188     }
189 
190     /* Get the width */
191     if(ISDIGIT(*s)){
192         info->fWidth = (int) (*s++ - DIGIT_ZERO);
193 
194         while(ISDIGIT(*s)) {
195             info->fWidth *= 10;
196             info->fWidth += (int) (*s++ - DIGIT_ZERO);
197         }
198     }
199 
200     /* Get any modifiers */
201     if(ISMOD(*s)) {
202         switch(*s++) {
203 
204             /* short */
205         case MOD_H:
206             info->fIsShort = TRUE;
207             break;
208 
209             /* long or long long */
210         case MOD_LOWERL:
211             if(*s == MOD_LOWERL) {
212                 info->fIsLongLong = TRUE;
213                 /* skip over the next 'l' */
214                 s++;
215             }
216             else
217                 info->fIsLong = TRUE;
218             break;
219 
220             /* long double */
221         case MOD_L:
222             info->fIsLongDouble = TRUE;
223             break;
224         }
225     }
226 
227     /* finally, get the specifier letter */
228     info->fSpec = *s++;
229 
230     /* return # of characters in this specifier */
231     return (int32_t)(s - fmt);
232 }
233 
234 #define UP_PERCENT 0x0025
235 
236 
237 /* ANSI style formatting */
238 /* Use US-ASCII characters only for formatting */
239 
240 /* % */
241 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
242 /* s */
243 #define UFMT_STRING         {ufmt_string, u_scanf_string_handler}
244 /* c */
245 #define UFMT_CHAR           {ufmt_string, u_scanf_char_handler}
246 /* d, i */
247 #define UFMT_INT            {ufmt_int, u_scanf_integer_handler}
248 /* u */
249 #define UFMT_UINT           {ufmt_int, u_scanf_uinteger_handler}
250 /* o */
251 #define UFMT_OCTAL          {ufmt_int, u_scanf_octal_handler}
252 /* x, X */
253 #define UFMT_HEX            {ufmt_int, u_scanf_hex_handler}
254 /* f */
255 #define UFMT_DOUBLE         {ufmt_double, u_scanf_double_handler}
256 /* e, E */
257 #define UFMT_SCIENTIFIC     {ufmt_double, u_scanf_scientific_handler}
258 /* g, G */
259 #define UFMT_SCIDBL         {ufmt_double, u_scanf_scidbl_handler}
260 /* n */
261 #define UFMT_COUNT          {ufmt_count, u_scanf_count_handler}
262 /* [ */
263 #define UFMT_SCANSET        {ufmt_string, u_scanf_scanset_handler}
264 
265 /* non-ANSI extensions */
266 /* Use US-ASCII characters only for formatting */
267 
268 /* p */
269 #define UFMT_POINTER        {ufmt_pointer, u_scanf_pointer_handler}
270 /* V */
271 #define UFMT_SPELLOUT       {ufmt_double, u_scanf_spellout_handler}
272 /* P */
273 #define UFMT_PERCENT        {ufmt_double, u_scanf_percent_handler}
274 /* C  K is old format */
275 #define UFMT_UCHAR          {ufmt_uchar, u_scanf_uchar_handler}
276 /* S  U is old format */
277 #define UFMT_USTRING        {ufmt_ustring, u_scanf_ustring_handler}
278 
279 
280 #define UFMT_EMPTY {ufmt_empty, NULL}
281 
282 /**
283  * A u_scanf handler function.
284  * A u_scanf handler is responsible for handling a single u_scanf
285  * format specification, for example 'd' or 's'.
286  * @param stream The UFILE to which to write output.
287  * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
288  * information on the format specification.
289  * @param args A pointer to the argument data
290  * @param fmt A pointer to the first character in the format string
291  * following the spec.
292  * @param fmtConsumed On output, set to the number of characters consumed
293  * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
294  * @param argConverted The number of arguments converted and assigned, or -1 if an
295  * error occurred.
296  * @return The number of code points consumed during reading.
297  */
298 typedef int32_t (*u_scanf_handler) (UFILE   *stream,
299                    u_scanf_spec_info  *info,
300                    ufmt_args                *args,
301                    const UChar              *fmt,
302                    int32_t                  *fmtConsumed,
303                    int32_t                  *argConverted);
304 
305 typedef struct u_scanf_info {
306     ufmt_type_info info;
307     u_scanf_handler handler;
308 } u_scanf_info;
309 
310 #define USCANF_NUM_FMT_HANDLERS 108
311 #define USCANF_SYMBOL_BUFFER_SIZE 8
312 
313 /* We do not use handlers for 0-0x1f */
314 #define USCANF_BASE_FMT_HANDLERS 0x20
315 
316 
317 static int32_t
u_scanf_skip_leading_ws(UFILE * input,UChar pad)318 u_scanf_skip_leading_ws(UFILE   *input,
319                         UChar   pad)
320 {
321     UChar   c;
322     int32_t count = 0;
323     UBool isNotEOF;
324 
325     /* skip all leading ws in the input */
326     while( ((isNotEOF = ufile_getch(input, &c)) == TRUE) && (c == pad || u_isWhitespace(c)) )
327     {
328         count++;
329     }
330 
331     /* put the final character back on the input */
332     if(isNotEOF)
333         u_fungetc(c, input);
334 
335     return count;
336 }
337 
338 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
339 static int32_t
u_scanf_skip_leading_positive_sign(UFILE * input,UNumberFormat * format,UErrorCode * status)340 u_scanf_skip_leading_positive_sign(UFILE   *input,
341                                    UNumberFormat *format,
342                                    UErrorCode *status)
343 {
344     UChar   c;
345     int32_t count = 0;
346     UBool isNotEOF;
347     UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
348     int32_t symbolLen;
349     UErrorCode localStatus = U_ZERO_ERROR;
350 
351     if (U_SUCCESS(*status)) {
352         symbolLen = unum_getSymbol(format,
353             UNUM_PLUS_SIGN_SYMBOL,
354             plusSymbol,
355             UPRV_LENGTHOF(plusSymbol),
356             &localStatus);
357 
358         if (U_SUCCESS(localStatus)) {
359             /* skip all leading ws in the input */
360             while( ((isNotEOF = ufile_getch(input, &c)) == TRUE) && (count < symbolLen && c == plusSymbol[count]) )
361             {
362                 count++;
363             }
364 
365             /* put the final character back on the input */
366             if(isNotEOF) {
367                 u_fungetc(c, input);
368             }
369         }
370     }
371 
372     return count;
373 }
374 
375 static int32_t
u_scanf_simple_percent_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)376 u_scanf_simple_percent_handler(UFILE        *input,
377                                u_scanf_spec_info *info,
378                                ufmt_args    *args,
379                                const UChar  *fmt,
380                                int32_t      *fmtConsumed,
381                                int32_t      *argConverted)
382 {
383     (void)info;
384     (void)args;
385     (void)fmt;
386     (void)fmtConsumed;
387 
388     /* make sure the next character in the input is a percent */
389     *argConverted = 0;
390     if(u_fgetc(input) != 0x0025) {
391         *argConverted = -1;
392     }
393     return 1;
394 }
395 
396 static int32_t
u_scanf_count_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)397 u_scanf_count_handler(UFILE         *input,
398                       u_scanf_spec_info *info,
399                       ufmt_args     *args,
400                       const UChar   *fmt,
401                       int32_t       *fmtConsumed,
402                       int32_t       *argConverted)
403 {
404     (void)input;
405     (void)fmt;
406     (void)fmtConsumed;
407 
408     /* in the special case of count, the u_scanf_spec_info's width */
409     /* will contain the # of items converted thus far */
410     if (!info->fSkipArg) {
411         if (info->fIsShort)
412             *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
413         else if (info->fIsLongLong)
414             *(int64_t*)(args[0].ptrValue) = info->fWidth;
415         else
416             *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
417     }
418     *argConverted = 0;
419 
420     /* we converted 0 args */
421     return 0;
422 }
423 
424 static int32_t
u_scanf_double_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)425 u_scanf_double_handler(UFILE        *input,
426                        u_scanf_spec_info *info,
427                        ufmt_args    *args,
428                        const UChar  *fmt,
429                        int32_t      *fmtConsumed,
430                        int32_t      *argConverted)
431 {
432     (void)fmt;
433     (void)fmtConsumed;
434 
435     int32_t         len;
436     double          num;
437     UNumberFormat   *format;
438     int32_t         parsePos    = 0;
439     int32_t         skipped;
440     UErrorCode      status      = U_ZERO_ERROR;
441 
442 
443     /* skip all ws in the input */
444     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
445 
446     /* fill the input's internal buffer */
447     ufile_fill_uchar_buffer(input);
448 
449     /* determine the size of the input's buffer */
450     len = (int32_t)(input->str.fLimit - input->str.fPos);
451 
452     /* truncate to the width, if specified */
453     if(info->fWidth != -1)
454         len = ufmt_min(len, info->fWidth);
455 
456     /* get the formatter */
457     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
458 
459     /* handle error */
460     if(format == 0)
461         return 0;
462 
463     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
464     skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
465 
466     /* parse the number */
467     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
468 
469     if (!info->fSkipArg) {
470         if (info->fIsLong)
471             *(double*)(args[0].ptrValue) = num;
472         else if (info->fIsLongDouble)
473             *(long double*)(args[0].ptrValue) = num;
474         else
475             *(float*)(args[0].ptrValue) = (float)num;
476     }
477 
478     /* mask off any necessary bits */
479     /*  if(! info->fIsLong_double)
480     num &= DBL_MAX;*/
481 
482     /* update the input's position to reflect consumed data */
483     input->str.fPos += parsePos;
484 
485     /* we converted 1 arg */
486     *argConverted = !info->fSkipArg;
487     return parsePos + skipped;
488 }
489 
490 #define UPRINTF_SYMBOL_BUFFER_SIZE 8
491 
492 static int32_t
u_scanf_scientific_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)493 u_scanf_scientific_handler(UFILE        *input,
494                            u_scanf_spec_info *info,
495                            ufmt_args    *args,
496                            const UChar  *fmt,
497                            int32_t      *fmtConsumed,
498                            int32_t      *argConverted)
499 {
500     (void)fmt;
501     (void)fmtConsumed;
502 
503     int32_t         len;
504     double          num;
505     UNumberFormat   *format;
506     int32_t         parsePos    = 0;
507     int32_t         skipped;
508     UErrorCode      status      = U_ZERO_ERROR;
509     UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
510     int32_t srcLen, expLen;
511     UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
512 
513 
514     /* skip all ws in the input */
515     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
516 
517     /* fill the input's internal buffer */
518     ufile_fill_uchar_buffer(input);
519 
520     /* determine the size of the input's buffer */
521     len = (int32_t)(input->str.fLimit - input->str.fPos);
522 
523     /* truncate to the width, if specified */
524     if(info->fWidth != -1)
525         len = ufmt_min(len, info->fWidth);
526 
527     /* get the formatter */
528     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
529 
530     /* handle error */
531     if(format == 0)
532         return 0;
533 
534     /* set the appropriate flags on the formatter */
535 
536     srcLen = unum_getSymbol(format,
537         UNUM_EXPONENTIAL_SYMBOL,
538         srcExpBuf,
539         sizeof(srcExpBuf),
540         &status);
541 
542     /* Upper/lower case the e */
543     if (info->fSpec == (UChar)0x65 /* e */) {
544         expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
545             srcExpBuf, srcLen,
546             input->str.fBundle.fLocale,
547             &status);
548     }
549     else {
550         expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
551             srcExpBuf, srcLen,
552             input->str.fBundle.fLocale,
553             &status);
554     }
555 
556     unum_setSymbol(format,
557         UNUM_EXPONENTIAL_SYMBOL,
558         expBuf,
559         expLen,
560         &status);
561 
562 
563 
564 
565     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
566     skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
567 
568     /* parse the number */
569     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
570 
571     if (!info->fSkipArg) {
572         if (info->fIsLong)
573             *(double*)(args[0].ptrValue) = num;
574         else if (info->fIsLongDouble)
575             *(long double*)(args[0].ptrValue) = num;
576         else
577             *(float*)(args[0].ptrValue) = (float)num;
578     }
579 
580     /* mask off any necessary bits */
581     /*  if(! info->fIsLong_double)
582     num &= DBL_MAX;*/
583 
584     /* update the input's position to reflect consumed data */
585     input->str.fPos += parsePos;
586 
587     /* we converted 1 arg */
588     *argConverted = !info->fSkipArg;
589     return parsePos + skipped;
590 }
591 
592 static int32_t
u_scanf_scidbl_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)593 u_scanf_scidbl_handler(UFILE        *input,
594                        u_scanf_spec_info *info,
595                        ufmt_args    *args,
596                        const UChar  *fmt,
597                        int32_t      *fmtConsumed,
598                        int32_t      *argConverted)
599 {
600     (void)fmt;
601     (void)fmtConsumed;
602 
603     int32_t       len;
604     double        num;
605     UNumberFormat *scientificFormat, *genericFormat;
606     /*int32_t       scientificResult, genericResult;*/
607     double        scientificResult, genericResult;
608     int32_t       scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
609     int32_t       skipped;
610     UErrorCode    scientificStatus = U_ZERO_ERROR;
611     UErrorCode    genericStatus = U_ZERO_ERROR;
612 
613 
614     /* since we can't determine by scanning the characters whether */
615     /* a number was formatted in the 'f' or 'g' styles, parse the */
616     /* string with both formatters, and assume whichever one */
617     /* parsed the most is the correct formatter to use */
618 
619 
620     /* skip all ws in the input */
621     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
622 
623     /* fill the input's internal buffer */
624     ufile_fill_uchar_buffer(input);
625 
626     /* determine the size of the input's buffer */
627     len = (int32_t)(input->str.fLimit - input->str.fPos);
628 
629     /* truncate to the width, if specified */
630     if(info->fWidth != -1)
631         len = ufmt_min(len, info->fWidth);
632 
633     /* get the formatters */
634     scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
635     genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
636 
637     /* handle error */
638     if(scientificFormat == 0 || genericFormat == 0)
639         return 0;
640 
641     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
642     skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
643 
644     /* parse the number using each format*/
645 
646     scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
647         &scientificParsePos, &scientificStatus);
648 
649     genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
650         &genericParsePos, &genericStatus);
651 
652     /* determine which parse made it farther */
653     if(scientificParsePos > genericParsePos) {
654         /* stash the result in num */
655         num = scientificResult;
656         /* update the input's position to reflect consumed data */
657         parsePos += scientificParsePos;
658     }
659     else {
660         /* stash the result in num */
661         num = genericResult;
662         /* update the input's position to reflect consumed data */
663         parsePos += genericParsePos;
664     }
665     input->str.fPos += parsePos;
666 
667     if (!info->fSkipArg) {
668         if (info->fIsLong)
669             *(double*)(args[0].ptrValue) = num;
670         else if (info->fIsLongDouble)
671             *(long double*)(args[0].ptrValue) = num;
672         else
673             *(float*)(args[0].ptrValue) = (float)num;
674     }
675 
676     /* mask off any necessary bits */
677     /*  if(! info->fIsLong_double)
678     num &= DBL_MAX;*/
679 
680     /* we converted 1 arg */
681     *argConverted = !info->fSkipArg;
682     return parsePos + skipped;
683 }
684 
685 static int32_t
u_scanf_integer_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)686 u_scanf_integer_handler(UFILE       *input,
687                         u_scanf_spec_info *info,
688                         ufmt_args   *args,
689                         const UChar *fmt,
690                         int32_t     *fmtConsumed,
691                         int32_t     *argConverted)
692 {
693     (void)fmt;
694     (void)fmtConsumed;
695 
696     int32_t         len;
697     void            *num        = (void*) (args[0].ptrValue);
698     UNumberFormat   *format, *localFormat;
699     int32_t         parsePos    = 0;
700     int32_t         skipped;
701     int32_t         parseIntOnly = 0;
702     UErrorCode      status      = U_ZERO_ERROR;
703     int64_t         result;
704 
705 
706     /* skip all ws in the input */
707     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
708 
709     /* fill the input's internal buffer */
710     ufile_fill_uchar_buffer(input);
711 
712     /* determine the size of the input's buffer */
713     len = (int32_t)(input->str.fLimit - input->str.fPos);
714 
715     /* truncate to the width, if specified */
716     if(info->fWidth != -1)
717         len = ufmt_min(len, info->fWidth);
718 
719     /* get the formatter */
720     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
721 
722     /* handle error */
723     if(format == 0)
724         return 0;
725 
726     /* for integer types, do not attempt to parse fractions */
727     localFormat = unum_clone(format, &status);
728     if(U_FAILURE(status))
729         return 0;
730 
731     if(info->fSpec == 'd' || info->fSpec == 'i' || info->fSpec == 'u')
732         parseIntOnly = 1;
733     unum_setAttribute(localFormat, UNUM_PARSE_INT_ONLY, parseIntOnly);
734 
735     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
736     skipped += u_scanf_skip_leading_positive_sign(input, localFormat, &status);
737 
738     /* parse the number */
739     result = unum_parseInt64(localFormat, input->str.fPos, len, &parsePos, &status);
740 
741     /* mask off any necessary bits */
742     if (!info->fSkipArg) {
743         if (info->fIsShort)
744             *(int16_t*)num = (int16_t)(UINT16_MAX & result);
745         else if (info->fIsLongLong)
746             *(int64_t*)num = result;
747         else
748             *(int32_t*)num = (int32_t)(UINT32_MAX & result);
749     }
750 
751     /* update the input's position to reflect consumed data */
752     input->str.fPos += parsePos;
753 
754     /* cleanup cloned formatter */
755     unum_close(localFormat);
756 
757     /* we converted 1 arg */
758     *argConverted = !info->fSkipArg;
759     return parsePos + skipped;
760 }
761 
762 static int32_t
u_scanf_uinteger_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)763 u_scanf_uinteger_handler(UFILE          *input,
764                          u_scanf_spec_info *info,
765                          ufmt_args      *args,
766                          const UChar    *fmt,
767                          int32_t        *fmtConsumed,
768                          int32_t        *argConverted)
769 {
770     /* TODO Fix this when Numberformat handles uint64_t */
771     return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
772 }
773 
774 static int32_t
u_scanf_percent_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)775 u_scanf_percent_handler(UFILE       *input,
776                         u_scanf_spec_info *info,
777                         ufmt_args   *args,
778                         const UChar *fmt,
779                         int32_t     *fmtConsumed,
780                         int32_t     *argConverted)
781 {
782     (void)fmt;
783     (void)fmtConsumed;
784 
785     int32_t         len;
786     double          num;
787     UNumberFormat   *format;
788     int32_t         parsePos    = 0;
789     UErrorCode      status      = U_ZERO_ERROR;
790 
791 
792     /* skip all ws in the input */
793     u_scanf_skip_leading_ws(input, info->fPadChar);
794 
795     /* fill the input's internal buffer */
796     ufile_fill_uchar_buffer(input);
797 
798     /* determine the size of the input's buffer */
799     len = (int32_t)(input->str.fLimit - input->str.fPos);
800 
801     /* truncate to the width, if specified */
802     if(info->fWidth != -1)
803         len = ufmt_min(len, info->fWidth);
804 
805     /* get the formatter */
806     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
807 
808     /* handle error */
809     if(format == 0)
810         return 0;
811 
812     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
813     u_scanf_skip_leading_positive_sign(input, format, &status);
814 
815     /* parse the number */
816     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
817 
818     if (!info->fSkipArg) {
819         *(double*)(args[0].ptrValue) = num;
820     }
821 
822     /* mask off any necessary bits */
823     /*  if(! info->fIsLong_double)
824     num &= DBL_MAX;*/
825 
826     /* update the input's position to reflect consumed data */
827     input->str.fPos += parsePos;
828 
829     /* we converted 1 arg */
830     *argConverted = !info->fSkipArg;
831     return parsePos;
832 }
833 
834 static int32_t
u_scanf_string_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)835 u_scanf_string_handler(UFILE        *input,
836                        u_scanf_spec_info *info,
837                        ufmt_args    *args,
838                        const UChar  *fmt,
839                        int32_t      *fmtConsumed,
840                        int32_t      *argConverted)
841 {
842     (void)fmt;
843     (void)fmtConsumed;
844 
845     const UChar *source;
846     UConverter  *conv;
847     char        *arg    = (char*)(args[0].ptrValue);
848     char        *alias  = arg;
849     char        *limit;
850     UErrorCode  status  = U_ZERO_ERROR;
851     int32_t     count;
852     int32_t     skipped = 0;
853     UChar       c;
854     UBool       isNotEOF = FALSE;
855 
856     /* skip all ws in the input */
857     if (info->fIsString) {
858         skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
859     }
860 
861     /* get the string one character at a time, truncating to the width */
862     count = 0;
863 
864     /* open the default converter */
865     conv = u_getDefaultConverter(&status);
866 
867     if(U_FAILURE(status))
868         return -1;
869 
870     while( (info->fWidth == -1 || count < info->fWidth)
871         && ((isNotEOF = ufile_getch(input, &c)) == TRUE)
872         && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
873     {
874 
875         if (!info->fSkipArg) {
876             /* put the character from the input onto the target */
877             source = &c;
878             /* Since we do this one character at a time, do it this way. */
879             if (info->fWidth > 0) {
880                 limit = alias + info->fWidth - count;
881             }
882             else {
883                 limit = alias + ucnv_getMaxCharSize(conv);
884             }
885 
886             /* convert the character to the default codepage */
887             ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
888                 NULL, TRUE, &status);
889 
890             if(U_FAILURE(status)) {
891                 /* clean up */
892                 u_releaseDefaultConverter(conv);
893                 return -1;
894             }
895         }
896 
897         /* increment the count */
898         ++count;
899     }
900 
901     /* put the final character we read back on the input */
902     if (!info->fSkipArg) {
903         if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
904             u_fungetc(c, input);
905 
906         /* add the terminator */
907         if (info->fIsString) {
908             *alias = 0x00;
909         }
910     }
911 
912     /* clean up */
913     u_releaseDefaultConverter(conv);
914 
915     /* we converted 1 arg */
916     *argConverted = !info->fSkipArg;
917     return count + skipped;
918 }
919 
920 static int32_t
u_scanf_char_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)921 u_scanf_char_handler(UFILE          *input,
922                      u_scanf_spec_info *info,
923                      ufmt_args      *args,
924                      const UChar    *fmt,
925                      int32_t        *fmtConsumed,
926                      int32_t        *argConverted)
927 {
928     if (info->fWidth < 0) {
929         info->fWidth = 1;
930     }
931     info->fIsString = FALSE;
932     return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
933 }
934 
935 static int32_t
u_scanf_ustring_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)936 u_scanf_ustring_handler(UFILE       *input,
937                         u_scanf_spec_info *info,
938                         ufmt_args   *args,
939                         const UChar *fmt,
940                         int32_t     *fmtConsumed,
941                         int32_t     *argConverted)
942 {
943     (void)fmt;
944     (void)fmtConsumed;
945 
946     UChar   *arg     = (UChar*)(args[0].ptrValue);
947     UChar   *alias     = arg;
948     int32_t count;
949     int32_t skipped = 0;
950     UChar   c;
951     UBool   isNotEOF = FALSE;
952 
953     /* skip all ws in the input */
954     if (info->fIsString) {
955         skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
956     }
957 
958     /* get the string one character at a time, truncating to the width */
959     count = 0;
960 
961     while( (info->fWidth == -1 || count < info->fWidth)
962         && ((isNotEOF = ufile_getch(input, &c)) == TRUE)
963         && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
964     {
965 
966         /* put the character from the input onto the target */
967         if (!info->fSkipArg) {
968             *alias++ = c;
969         }
970 
971         /* increment the count */
972         ++count;
973     }
974 
975     /* put the final character we read back on the input */
976     if (!info->fSkipArg) {
977         if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
978             u_fungetc(c, input);
979         }
980 
981         /* add the terminator */
982         if (info->fIsString) {
983             *alias = 0x0000;
984         }
985     }
986 
987     /* we converted 1 arg */
988     *argConverted = !info->fSkipArg;
989     return count + skipped;
990 }
991 
992 static int32_t
u_scanf_uchar_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)993 u_scanf_uchar_handler(UFILE         *input,
994                       u_scanf_spec_info *info,
995                       ufmt_args     *args,
996                       const UChar   *fmt,
997                       int32_t       *fmtConsumed,
998                       int32_t       *argConverted)
999 {
1000     if (info->fWidth < 0) {
1001         info->fWidth = 1;
1002     }
1003     info->fIsString = FALSE;
1004     return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
1005 }
1006 
1007 static int32_t
u_scanf_spellout_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)1008 u_scanf_spellout_handler(UFILE          *input,
1009                          u_scanf_spec_info *info,
1010                          ufmt_args      *args,
1011                          const UChar    *fmt,
1012                          int32_t        *fmtConsumed,
1013                          int32_t        *argConverted)
1014 {
1015     (void)fmt;
1016     (void)fmtConsumed;
1017 
1018     int32_t         len;
1019     double          num;
1020     UNumberFormat   *format;
1021     int32_t         parsePos    = 0;
1022     int32_t         skipped;
1023     UErrorCode      status      = U_ZERO_ERROR;
1024 
1025 
1026     /* skip all ws in the input */
1027     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1028 
1029     /* fill the input's internal buffer */
1030     ufile_fill_uchar_buffer(input);
1031 
1032     /* determine the size of the input's buffer */
1033     len = (int32_t)(input->str.fLimit - input->str.fPos);
1034 
1035     /* truncate to the width, if specified */
1036     if(info->fWidth != -1)
1037         len = ufmt_min(len, info->fWidth);
1038 
1039     /* get the formatter */
1040     format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
1041 
1042     /* handle error */
1043     if(format == 0)
1044         return 0;
1045 
1046     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
1047     /* This is not applicable to RBNF. */
1048     /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1049 
1050     /* parse the number */
1051     num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
1052 
1053     if (!info->fSkipArg) {
1054         *(double*)(args[0].ptrValue) = num;
1055     }
1056 
1057     /* mask off any necessary bits */
1058     /*  if(! info->fIsLong_double)
1059     num &= DBL_MAX;*/
1060 
1061     /* update the input's position to reflect consumed data */
1062     input->str.fPos += parsePos;
1063 
1064     /* we converted 1 arg */
1065     *argConverted = !info->fSkipArg;
1066     return parsePos + skipped;
1067 }
1068 
1069 static int32_t
u_scanf_hex_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)1070 u_scanf_hex_handler(UFILE       *input,
1071                     u_scanf_spec_info *info,
1072                     ufmt_args   *args,
1073                     const UChar *fmt,
1074                     int32_t     *fmtConsumed,
1075                     int32_t     *argConverted)
1076 {
1077     (void)fmt;
1078     (void)fmtConsumed;
1079 
1080     int32_t     len;
1081     int32_t     skipped;
1082     void        *num    = (void*) (args[0].ptrValue);
1083     int64_t     result;
1084 
1085     /* skip all ws in the input */
1086     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1087 
1088     /* fill the input's internal buffer */
1089     ufile_fill_uchar_buffer(input);
1090 
1091     /* determine the size of the input's buffer */
1092     len = (int32_t)(input->str.fLimit - input->str.fPos);
1093 
1094     /* truncate to the width, if specified */
1095     if(info->fWidth != -1)
1096         len = ufmt_min(len, info->fWidth);
1097 
1098     /* check for alternate form */
1099     if( *(input->str.fPos) == 0x0030 &&
1100         (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1101 
1102         /* skip the '0' and 'x' or 'X' if present */
1103         input->str.fPos += 2;
1104         len -= 2;
1105     }
1106 
1107     /* parse the number */
1108     result = ufmt_uto64(input->str.fPos, &len, 16);
1109 
1110     /* update the input's position to reflect consumed data */
1111     input->str.fPos += len;
1112 
1113     /* mask off any necessary bits */
1114     if (!info->fSkipArg) {
1115         if (info->fIsShort)
1116             *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1117         else if (info->fIsLongLong)
1118             *(int64_t*)num = result;
1119         else
1120             *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1121     }
1122 
1123     /* we converted 1 arg */
1124     *argConverted = !info->fSkipArg;
1125     return len + skipped;
1126 }
1127 
1128 static int32_t
u_scanf_octal_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)1129 u_scanf_octal_handler(UFILE         *input,
1130                       u_scanf_spec_info *info,
1131                       ufmt_args     *args,
1132                       const UChar   *fmt,
1133                       int32_t       *fmtConsumed,
1134                       int32_t       *argConverted)
1135 {
1136     (void)fmt;
1137     (void)fmtConsumed;
1138 
1139     int32_t     len;
1140     int32_t     skipped;
1141     void        *num         = (void*) (args[0].ptrValue);
1142     int64_t     result;
1143 
1144     /* skip all ws in the input */
1145     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1146 
1147     /* fill the input's internal buffer */
1148     ufile_fill_uchar_buffer(input);
1149 
1150     /* determine the size of the input's buffer */
1151     len = (int32_t)(input->str.fLimit - input->str.fPos);
1152 
1153     /* truncate to the width, if specified */
1154     if(info->fWidth != -1)
1155         len = ufmt_min(len, info->fWidth);
1156 
1157     /* parse the number */
1158     result = ufmt_uto64(input->str.fPos, &len, 8);
1159 
1160     /* update the input's position to reflect consumed data */
1161     input->str.fPos += len;
1162 
1163     /* mask off any necessary bits */
1164     if (!info->fSkipArg) {
1165         if (info->fIsShort)
1166             *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1167         else if (info->fIsLongLong)
1168             *(int64_t*)num = result;
1169         else
1170             *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1171     }
1172 
1173     /* we converted 1 arg */
1174     *argConverted = !info->fSkipArg;
1175     return len + skipped;
1176 }
1177 
1178 static int32_t
u_scanf_pointer_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)1179 u_scanf_pointer_handler(UFILE       *input,
1180                         u_scanf_spec_info *info,
1181                         ufmt_args   *args,
1182                         const UChar *fmt,
1183                         int32_t     *fmtConsumed,
1184                         int32_t     *argConverted)
1185 {
1186     (void)fmt;
1187     (void)fmtConsumed;
1188 
1189     int32_t len;
1190     int32_t skipped;
1191     void    *result;
1192     void    **p     = (void**)(args[0].ptrValue);
1193 
1194 
1195     /* skip all ws in the input */
1196     skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1197 
1198     /* fill the input's internal buffer */
1199     ufile_fill_uchar_buffer(input);
1200 
1201     /* determine the size of the input's buffer */
1202     len = (int32_t)(input->str.fLimit - input->str.fPos);
1203 
1204     /* truncate to the width, if specified */
1205     if(info->fWidth != -1) {
1206         len = ufmt_min(len, info->fWidth);
1207     }
1208 
1209     /* Make sure that we don't consume too much */
1210     if (len > (int32_t)(sizeof(void*)*2)) {
1211         len = (int32_t)(sizeof(void*)*2);
1212     }
1213 
1214     /* parse the pointer - assign to temporary value */
1215     result = ufmt_utop(input->str.fPos, &len);
1216 
1217     if (!info->fSkipArg) {
1218         *p = result;
1219     }
1220 
1221     /* update the input's position to reflect consumed data */
1222     input->str.fPos += len;
1223 
1224     /* we converted 1 arg */
1225     *argConverted = !info->fSkipArg;
1226     return len + skipped;
1227 }
1228 
1229 static int32_t
u_scanf_scanset_handler(UFILE * input,u_scanf_spec_info * info,ufmt_args * args,const UChar * fmt,int32_t * fmtConsumed,int32_t * argConverted)1230 u_scanf_scanset_handler(UFILE       *input,
1231                         u_scanf_spec_info *info,
1232                         ufmt_args   *args,
1233                         const UChar *fmt,
1234                         int32_t     *fmtConsumed,
1235                         int32_t     *argConverted)
1236 {
1237     USet        *scanset;
1238     UErrorCode  status = U_ZERO_ERROR;
1239     int32_t     chLeft = INT32_MAX;
1240     UChar32     c;
1241     UChar       *alias = (UChar*) (args[0].ptrValue);
1242     UBool       isNotEOF = FALSE;
1243     UBool       readCharacter = FALSE;
1244 
1245     /* Create an empty set */
1246     scanset = uset_open(0, -1);
1247 
1248     /* Back up one to get the [ */
1249     fmt--;
1250 
1251     /* truncate to the width, if specified and alias the target */
1252     if(info->fWidth >= 0) {
1253         chLeft = info->fWidth;
1254     }
1255 
1256     /* parse the scanset from the fmt string */
1257     *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1258 
1259     /* verify that the parse was successful */
1260     if (U_SUCCESS(status)) {
1261         c=0;
1262 
1263         /* grab characters one at a time and make sure they are in the scanset */
1264         while(chLeft > 0) {
1265             if ( ((isNotEOF = ufile_getch32(input, &c)) == TRUE) && uset_contains(scanset, c) ) {
1266                 readCharacter = TRUE;
1267                 if (!info->fSkipArg) {
1268                     int32_t idx = 0;
1269                     UBool isError = FALSE;
1270 
1271                     U16_APPEND(alias, idx, chLeft, c, isError);
1272                     if (isError) {
1273                         break;
1274                     }
1275                     alias += idx;
1276                 }
1277                 chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1278             }
1279             else {
1280                 /* if the character's not in the scanset, break out */
1281                 break;
1282             }
1283         }
1284 
1285         /* put the final character we read back on the input */
1286         if(isNotEOF && chLeft > 0) {
1287             u_fungetc(c, input);
1288         }
1289     }
1290 
1291     uset_close(scanset);
1292 
1293     /* if we didn't match at least 1 character, fail */
1294     if(!readCharacter)
1295         return -1;
1296     /* otherwise, add the terminator */
1297     else if (!info->fSkipArg) {
1298         *alias = 0x00;
1299     }
1300 
1301     /* we converted 1 arg */
1302     *argConverted = !info->fSkipArg;
1303     return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1304 }
1305 
1306 /* Use US-ASCII characters only for formatting. Most codepages have
1307  characters 20-7F from Unicode. Using any other codepage specific
1308  characters will make it very difficult to format the string on
1309  non-Unicode machines */
1310 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1311 /* 0x20 */
1312     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1313     UFMT_EMPTY,         UFMT_SIMPLE_PERCENT,UFMT_EMPTY,         UFMT_EMPTY,
1314     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1315     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1316 
1317 /* 0x30 */
1318     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1319     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1320     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1321     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1322 
1323 /* 0x40 */
1324     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR,
1325     UFMT_EMPTY,         UFMT_SCIENTIFIC,    UFMT_EMPTY,         UFMT_SCIDBL,
1326 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1327     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR/*deprecated*/,
1328 #else
1329     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1330 #endif
1331     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1332 
1333 /* 0x50 */
1334     UFMT_PERCENT,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_USTRING,
1335 #ifdef U_USE_OBSOLETE_IO_FORMATTING
1336     UFMT_EMPTY,         UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT,      UFMT_EMPTY,
1337 #else
1338     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SPELLOUT,      UFMT_EMPTY,
1339 #endif
1340     UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SCANSET,
1341     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1342 
1343 /* 0x60 */
1344     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_CHAR,
1345     UFMT_INT,           UFMT_SCIENTIFIC,    UFMT_DOUBLE,        UFMT_SCIDBL,
1346     UFMT_EMPTY,         UFMT_INT,           UFMT_EMPTY,         UFMT_EMPTY,
1347     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_COUNT,         UFMT_OCTAL,
1348 
1349 /* 0x70 */
1350     UFMT_POINTER,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_STRING,
1351     UFMT_EMPTY,         UFMT_UINT,          UFMT_EMPTY,         UFMT_EMPTY,
1352     UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1353     UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1354 };
1355 
1356 U_CFUNC int32_t
u_scanf_parse(UFILE * f,const UChar * patternSpecification,va_list ap)1357 u_scanf_parse(UFILE     *f,
1358             const UChar *patternSpecification,
1359             va_list     ap)
1360 {
1361     const UChar     *alias;
1362     int32_t         count, converted, argConsumed, cpConsumed;
1363     uint16_t        handlerNum;
1364 
1365     ufmt_args       args;
1366     u_scanf_spec    spec;
1367     ufmt_type_info  info;
1368     u_scanf_handler handler;
1369 
1370     /* alias the pattern */
1371     alias = patternSpecification;
1372 
1373     /* haven't converted anything yet */
1374     argConsumed = 0;
1375     converted = 0;
1376     cpConsumed = 0;
1377 
1378     /* iterate through the pattern */
1379     for(;;) {
1380 
1381         /* match any characters up to the next '%' */
1382         while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1383             alias++;
1384         }
1385 
1386         /* if we aren't at a '%', or if we're at end of string, break*/
1387         if(*alias != UP_PERCENT || *alias == 0x0000)
1388             break;
1389 
1390         /* parse the specifier */
1391         count = u_scanf_parse_spec(alias, &spec);
1392 
1393         /* update the pointer in pattern */
1394         alias += count;
1395 
1396         handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1397         if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1398             /* skip the argument, if necessary */
1399             /* query the info function for argument information */
1400             info = g_u_scanf_infos[ handlerNum ].info;
1401             if (info != ufmt_count && u_feof(f)) {
1402                 break;
1403             }
1404             else if(spec.fInfo.fSkipArg) {
1405                 args.ptrValue = NULL;
1406             }
1407             else {
1408                 switch(info) {
1409                 case ufmt_count:
1410                     /* set the spec's width to the # of items converted */
1411                     spec.fInfo.fWidth = cpConsumed;
1412                     U_FALLTHROUGH;
1413                 case ufmt_char:
1414                 case ufmt_uchar:
1415                 case ufmt_int:
1416                 case ufmt_string:
1417                 case ufmt_ustring:
1418                 case ufmt_pointer:
1419                 case ufmt_float:
1420                 case ufmt_double:
1421                     args.ptrValue = va_arg(ap, void*);
1422                     break;
1423 
1424                 default:
1425                     /* else args is ignored */
1426                     args.ptrValue = NULL;
1427                     break;
1428                 }
1429             }
1430 
1431             /* call the handler function */
1432             handler = g_u_scanf_infos[ handlerNum ].handler;
1433             if(handler != 0) {
1434 
1435                 /* reset count to 1 so that += for alias works. */
1436                 count = 1;
1437 
1438                 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1439 
1440                 /* if the handler encountered an error condition, break */
1441                 if(argConsumed < 0) {
1442                     converted = -1;
1443                     break;
1444                 }
1445 
1446                 /* add to the # of items converted */
1447                 converted += argConsumed;
1448 
1449                 /* update the pointer in pattern */
1450                 alias += count-1;
1451             }
1452             /* else do nothing */
1453         }
1454         /* else do nothing */
1455 
1456         /* just ignore unknown tags */
1457     }
1458 
1459     /* return # of items converted */
1460     return converted;
1461 }
1462 
1463 #endif /* #if !UCONFIG_NO_FORMATTING */
1464