• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006 Google Inc.
2 // All Rights Reserved.
3 // Author: renn
4 //
5 // The fscanf, vfscanf and creat functions are implemented so that their
6 // functionality is mostly like their stdio counterparts. However, currently
7 // these functions do not use any buffering, making them rather slow.
8 // File streams are thus processed one character at a time.
9 // Although the implementations of the scanf functions do lack a few minor
10 // features, they should be sufficient for their use in tesseract.
11 //
12 // Licensed under the Apache License, Version 2.0 (the "License");
13 // you may not use this file except in compliance with the License.
14 // You may obtain a copy of the License at
15 // http://www.apache.org/licenses/LICENSE-2.0
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stddef.h>
25 #include <inttypes.h>
26 #include <string.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <fcntl.h>
32 
33 #include "scanutils.h"
34 #include "tprintf.h"
35 
36 enum Flags {
37   FL_SPLAT  = 0x01,   // Drop the value, do not assign
38   FL_INV    = 0x02,   // Character-set with inverse
39   FL_WIDTH  = 0x04,   // Field width specified
40   FL_MINUS  = 0x08,   // Negative number
41 };
42 
43 enum Ranks {
44   RANK_CHAR = -2,
45   RANK_SHORT  = -1,
46   RANK_INT  = 0,
47   RANK_LONG = 1,
48   RANK_LONGLONG = 2,
49   RANK_PTR      = INT_MAX // Special value used for pointers
50 };
51 
52 const enum Ranks kMinRank = RANK_CHAR;
53 const enum Ranks kMaxRank = RANK_LONGLONG;
54 
55 const enum Ranks kIntMaxRank = RANK_LONGLONG;
56 const enum Ranks kSizeTRank = RANK_LONG;
57 const enum Ranks kPtrDiffRank = RANK_LONG;
58 
59 enum Bail {
60   BAIL_NONE = 0,    // No error condition
61   BAIL_EOF,         // Hit EOF
62   BAIL_ERR          // Conversion mismatch
63 };
64 
65 // Helper functions ------------------------------------------------------------
LongBit()66 inline size_t LongBit() {
67   return CHAR_BIT * sizeof(long);
68 }
69 
70 static inline int
SkipSpace(FILE * s)71 SkipSpace(FILE *s)
72 {
73   int p;
74   while (isspace(p = fgetc(s)));
75   ungetc(p, s);  // Make sure next char is available for reading
76   return p;
77 }
78 
79 static inline void
SetBit(unsigned long * bitmap,unsigned int bit)80 SetBit(unsigned long *bitmap, unsigned int bit)
81 {
82   bitmap[bit/LongBit()] |= 1UL << (bit%LongBit());
83 }
84 
85 static inline int
TestBit(unsigned long * bitmap,unsigned int bit)86 TestBit(unsigned long *bitmap, unsigned int bit)
87 {
88   return static_cast<int>(bitmap[bit/LongBit()] >> (bit%LongBit())) & 1;
89 }
90 
DigitValue(int ch)91 static inline int DigitValue(int ch)
92 {
93   if (ch >= '0' && ch <= '9') {
94     return ch-'0';
95   } else if (ch >= 'A' && ch <= 'Z') {
96     return ch-'A'+10;
97   } else if (ch >= 'a' && ch <= 'z') {
98     return ch-'a'+10;
99   } else {
100     return -1;
101   }
102 }
103 
104 // IO (re-)implementations -----------------------------------------------------
streamtoumax(FILE * s,int base)105 uintmax_t streamtoumax(FILE* s, int base)
106 {
107   int minus = 0;
108   uintmax_t v = 0;
109   int d, c = 0;
110 
111   for (c = fgetc(s);
112     isspace(static_cast<unsigned char>(c)) && (c != EOF);
113     c = fgetc(s))
114 
115   // Single optional + or -
116   if (c == '-' || c == '+') {
117     minus = (c == '-');
118     c = fgetc(s);
119   }
120 
121   // Assign correct base
122   if (base == 0) {
123     if (c == '0') {
124       c = fgetc(s);
125       if (c == 'x' || c == 'X') {
126         base = 16;
127         c = fgetc(s);
128       } else {
129         base = 8;
130       }
131     }
132   } else if (base == 16) {
133     if (c == '0') {
134       c = fgetc(s);
135       if (c == 'x' && c == 'X') c = fgetc(s);
136     }
137   }
138 
139   // Actual number parsing
140   for (; (c != EOF) && (d = DigitValue(c)) >= 0 && d < base; c = fgetc(s))
141     v = v*base + d;
142 
143   ungetc(c, s);
144   return minus ? -v : v;
145 }
146 
streamtofloat(FILE * s)147 double streamtofloat(FILE* s)
148 {
149   int minus = 0;
150   int v = 0;
151   int d, c = 0;
152   int k = 1;
153   int w = 0;
154 
155   for (c = fgetc(s);
156     isspace(static_cast<unsigned char>(c)) && (c != EOF);
157     c = fgetc(s));
158 
159   // Single optional + or -
160   if (c == '-' || c == '+') {
161     minus = (c == '-');
162     c = fgetc(s);
163   }
164 
165   // Actual number parsing
166   for (; (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s))
167     v = v*10 + d;
168   if (c == '.') {
169     for (c = fgetc(s); (c != EOF) && (d = DigitValue(c)) >= 0; c = fgetc(s)) {
170       w = w*10 + d;
171       k *= 10;
172     }
173   } else if (c == 'e' || c == 'E')
174     tprintf("WARNING: Scientific Notation not supported!");
175 
176   ungetc(c, s);
177   double f  = static_cast<double>(v)
178             + static_cast<double>(w) / static_cast<double>(k);
179 
180   return minus ? -f : f;
181 }
182 
strtofloat(const char * s)183 double strtofloat(const char* s)
184 {
185   int minus = 0;
186   int v = 0;
187   int d;
188   int k = 1;
189   int w = 0;
190 
191   while(*s && isspace(static_cast<unsigned char>(*s))) s++;
192 
193   // Single optional + or -
194   if (*s == '-' || *s == '+') {
195     minus = (*s == '-');
196     s++;
197   }
198 
199   // Actual number parsing
200   for (; *s && (d = DigitValue(*s)) >= 0; s++)
201     v = v*10 + d;
202   if (*s == '.') {
203     for (++s; *s && (d = DigitValue(*s)) >= 0; s++) {
204       w = w*10 + d;
205       k *= 10;
206     }
207   } else if (*s == 'e' || *s == 'E')
208     tprintf("WARNING: Scientific Notation not supported!");
209 
210   double f  = static_cast<double>(v)
211             + static_cast<double>(w) / static_cast<double>(k);
212 
213   return minus ? -f : f;
214 }
215 
fscanf(FILE * stream,const char * format,...)216 int fscanf(FILE* stream, const char *format, ...)
217 {
218   va_list ap;
219   int rv;
220 
221   va_start(ap, format);
222   rv = vfscanf(stream, format, ap);
223   va_end(ap);
224 
225   return rv;
226 }
227 
vfscanf(FILE * stream,const char * format,va_list ap)228 int vfscanf(FILE* stream, const char *format, va_list ap)
229 {
230   const char *p = format;
231   char ch;
232   int q = 0;
233   uintmax_t val = 0;
234   int rank = RANK_INT;    // Default rank
235   unsigned int width = ~0;
236   int base;
237   int flags = 0;
238   enum {
239     ST_NORMAL,        // Ground state
240     ST_FLAGS,         // Special flags
241     ST_WIDTH,         // Field width
242     ST_MODIFIERS,     // Length or conversion modifiers
243     ST_MATCH_INIT,    // Initial state of %[ sequence
244     ST_MATCH,         // Main state of %[ sequence
245     ST_MATCH_RANGE,   // After - in a %[ sequence
246   } state = ST_NORMAL;
247   char *sarg = NULL;    // %s %c or %[ string argument
248   enum Bail bail = BAIL_NONE;
249   int sign;
250   int converted = 0;    // Successful conversions
251   unsigned long matchmap[((1 << CHAR_BIT)+(LongBit()-1))/LongBit()];
252   int matchinv = 0;   // Is match map inverted?
253   unsigned char range_start = 0;
254   off_t start_off = ftell(stream);
255 
256   // Skip leading spaces
257   SkipSpace(stream);
258 
259   while ((ch = *p++) && !bail) {
260     switch (state) {
261       case ST_NORMAL:
262         if (ch == '%') {
263           state = ST_FLAGS;
264           flags = 0; rank = RANK_INT; width = ~0;
265         } else if (isspace(static_cast<unsigned char>(ch))) {
266           SkipSpace(stream);
267         } else {
268           if (fgetc(stream) != ch)
269             bail = BAIL_ERR;  // Match failure
270         }
271         break;
272 
273       case ST_FLAGS:
274         switch (ch) {
275           case '*':
276             flags |= FL_SPLAT;
277           break;
278 
279           case '0' ... '9':
280             width = (ch-'0');
281             state = ST_WIDTH;
282             flags |= FL_WIDTH;
283           break;
284 
285           default:
286             state = ST_MODIFIERS;
287             p--;      // Process this character again
288           break;
289         }
290       break;
291 
292       case ST_WIDTH:
293         if (ch >= '0' && ch <= '9') {
294           width = width*10+(ch-'0');
295         } else {
296           state = ST_MODIFIERS;
297           p--;      // Process this character again
298         }
299       break;
300 
301       case ST_MODIFIERS:
302         switch (ch) {
303           // Length modifiers - nonterminal sequences
304           case 'h':
305             rank--;     // Shorter rank
306           break;
307           case 'l':
308             rank++;     // Longer rank
309           break;
310           case 'j':
311             rank = kIntMaxRank;
312           break;
313           case 'z':
314             rank = kSizeTRank;
315           break;
316           case 't':
317             rank = kPtrDiffRank;
318           break;
319           case 'L':
320           case 'q':
321             rank = RANK_LONGLONG; // long double/long long
322           break;
323 
324           default:
325             // Output modifiers - terminal sequences
326             state = ST_NORMAL;  // Next state will be normal
327             if (rank < kMinRank)  // Canonicalize rank
328               rank = kMinRank;
329             else if (rank > kMaxRank)
330               rank = kMaxRank;
331 
332           switch (ch) {
333             case 'P':   // Upper case pointer
334             case 'p':   // Pointer
335               rank = RANK_PTR;
336               base = 0; sign = 0;
337             goto scan_int;
338 
339             case 'i':   // Base-independent integer
340               base = 0; sign = 1;
341             goto scan_int;
342 
343             case 'd':   // Decimal integer
344               base = 10; sign = 1;
345             goto scan_int;
346 
347             case 'o':   // Octal integer
348               base = 8; sign = 0;
349             goto scan_int;
350 
351             case 'u':   // Unsigned decimal integer
352               base = 10; sign = 0;
353             goto scan_int;
354 
355             case 'x':   // Hexadecimal integer
356             case 'X':
357               base = 16; sign = 0;
358             goto scan_int;
359 
360             case 'n':   // Number of characters consumed
361               val = ftell(stream) - start_off;
362             goto set_integer;
363 
364             scan_int:
365               q = SkipSpace(stream);
366               if ( q <= 0 ) {
367                 bail = BAIL_EOF;
368                 break;
369               }
370               val = streamtoumax(stream, base);
371               converted++;
372               // fall through
373 
374             set_integer:
375               if (!(flags & FL_SPLAT)) {
376                 switch(rank) {
377                   case RANK_CHAR:
378                     *va_arg(ap, unsigned char *)
379                       = static_cast<unsigned char>(val);
380                   break;
381                   case RANK_SHORT:
382                     *va_arg(ap, unsigned short *)
383                       = static_cast<unsigned short>(val);
384                   break;
385                   case RANK_INT:
386                     *va_arg(ap, unsigned int *)
387                       = static_cast<unsigned int>(val);
388                   break;
389                   case RANK_LONG:
390                     *va_arg(ap, unsigned long *)
391                       = static_cast<unsigned long>(val);
392                   break;
393                   case RANK_LONGLONG:
394                     *va_arg(ap, unsigned long long *)
395                       = static_cast<unsigned long long>(val);
396                   break;
397                   case RANK_PTR:
398                     *va_arg(ap, void **)
399                       = reinterpret_cast<void *>(static_cast<uintptr_t>(val));
400                   break;
401                 }
402               }
403             break;
404 
405             case 'f':   // Preliminary float value parsing
406             case 'g':
407             case 'G':
408             case 'e':
409             case 'E':
410               q = SkipSpace(stream);
411               if (q <= 0) {
412                 bail = BAIL_EOF;
413                 break;
414               }
415 
416               {
417               double fval = streamtofloat(stream);
418               switch(rank) {
419                 case RANK_INT:
420                   *va_arg(ap, float *) = static_cast<float>(fval);
421                 break;
422                 case RANK_LONG:
423                   *va_arg(ap, double *) = static_cast<double>(fval);
424                 break;
425               }
426               converted++;
427               }
428             break;
429 
430             case 'c':               // Character
431               width = (flags & FL_WIDTH) ? width : 1; // Default width == 1
432               sarg = va_arg(ap, char *);
433               while (width--) {
434                 if ((q = fgetc(stream)) <= 0) {
435                   bail = BAIL_EOF;
436                   break;
437                 }
438                 *sarg++ = q;
439               }
440               if (!bail)
441                 converted++;
442             break;
443 
444             case 's':               // String
445             {
446               char *sp;
447               sp = sarg = va_arg(ap, char *);
448               while (width--) {
449                 q = fgetc(stream);
450                 if (isspace(static_cast<unsigned char>(q)) || q <= 0) {
451                   ungetc(q, stream);
452                   break;
453                 }
454                 *sp++ = q;
455               }
456               if (sarg != sp) {
457                 *sp = '\0'; // Terminate output
458                 converted++;
459               } else {
460                 bail = BAIL_EOF;
461               }
462             }
463             break;
464 
465             case '[':   // Character range
466               sarg = va_arg(ap, char *);
467               state = ST_MATCH_INIT;
468               matchinv = 0;
469               memset(matchmap, 0, sizeof matchmap);
470             break;
471 
472             case '%':   // %% sequence
473               if (fgetc(stream) != '%' )
474                 bail = BAIL_ERR;
475             break;
476 
477             default:    // Anything else
478               bail = BAIL_ERR;  // Unknown sequence
479             break;
480           }
481         }
482       break;
483 
484       case ST_MATCH_INIT:   // Initial state for %[ match
485         if (ch == '^' && !(flags & FL_INV)) {
486           matchinv = 1;
487         } else {
488           SetBit(matchmap, static_cast<unsigned char>(ch));
489           state = ST_MATCH;
490         }
491       break;
492 
493       case ST_MATCH:    // Main state for %[ match
494         if (ch == ']') {
495           goto match_run;
496         } else if (ch == '-') {
497           range_start = static_cast<unsigned char>(ch);
498           state = ST_MATCH_RANGE;
499         } else {
500           SetBit(matchmap, static_cast<unsigned char>(ch));
501         }
502       break;
503 
504       case ST_MATCH_RANGE:    // %[ match after -
505         if (ch == ']') {
506           SetBit(matchmap, static_cast<unsigned char>('-'));
507           goto match_run;
508         } else {
509           int i;
510           for (i = range_start ; i < (static_cast<unsigned char>(ch)) ; i++)
511           SetBit(matchmap, i);
512           state = ST_MATCH;
513         }
514       break;
515 
516       match_run:      // Match expression finished
517         char* oarg = sarg;
518         while (width) {
519           q = fgetc(stream);
520           unsigned char qc = static_cast<unsigned char>(q);
521           if (q <= 0 || !(TestBit(matchmap, qc)^matchinv)) {
522             ungetc(q, stream);
523             break;
524           }
525           *sarg++ = q;
526         }
527         if (oarg != sarg) {
528           *sarg = '\0';
529           converted++;
530         } else {
531           bail = (q <= 0) ? BAIL_EOF : BAIL_ERR;
532         }
533       break;
534     }
535   }
536 
537   if (bail == BAIL_EOF && !converted)
538     converted = -1;   // Return EOF (-1)
539 
540   return converted;
541 }
542 
creat(const char * pathname,mode_t mode)543 int creat(const char *pathname, mode_t mode)
544 {
545   return open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode);
546 }
547