• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===-- sanitizer_common_interceptors_scanf.inc -----------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Scanf implementation for use in *Sanitizer interceptors.
11// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
12// with a few common GNU extensions.
13//
14//===----------------------------------------------------------------------===//
15#include <stdarg.h>
16
17struct ScanfDirective {
18  int argIdx; // argument index, or -1 of not specified ("%n$")
19  int fieldWidth;
20  bool suppressed; // suppress assignment ("*")
21  bool allocate;   // allocate space ("m")
22  char lengthModifier[2];
23  char convSpecifier;
24  bool maybeGnuMalloc;
25};
26
27static const char *parse_number(const char *p, int *out) {
28  *out = internal_atoll(p);
29  while (*p >= '0' && *p <= '9')
30    ++p;
31  return p;
32}
33
34static bool char_is_one_of(char c, const char *s) {
35  return !!internal_strchr(s, c);
36}
37
38// Parse scanf format string. If a valid directive in encountered, it is
39// returned in dir. This function returns the pointer to the first
40// unprocessed character, or 0 in case of error.
41// In case of the end-of-string, a pointer to the closing \0 is returned.
42static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
43                                    ScanfDirective *dir) {
44  internal_memset(dir, 0, sizeof(*dir));
45  dir->argIdx = -1;
46
47  while (*p) {
48    if (*p != '%') {
49      ++p;
50      continue;
51    }
52    ++p;
53    // %%
54    if (*p == '%') {
55      ++p;
56      continue;
57    }
58    if (*p == '\0') {
59      return 0;
60    }
61    // %n$
62    if (*p >= '0' && *p <= '9') {
63      int number;
64      const char *q = parse_number(p, &number);
65      if (*q == '$') {
66        dir->argIdx = number;
67        p = q + 1;
68      }
69      // Otherwise, do not change p. This will be re-parsed later as the field
70      // width.
71    }
72    // *
73    if (*p == '*') {
74      dir->suppressed = true;
75      ++p;
76    }
77    // Field width.
78    if (*p >= '0' && *p <= '9') {
79      p = parse_number(p, &dir->fieldWidth);
80      if (dir->fieldWidth <= 0)
81        return 0;
82    }
83    // m
84    if (*p == 'm') {
85      dir->allocate = true;
86      ++p;
87    }
88    // Length modifier.
89    if (char_is_one_of(*p, "jztLq")) {
90      dir->lengthModifier[0] = *p;
91      ++p;
92    } else if (*p == 'h') {
93      dir->lengthModifier[0] = 'h';
94      ++p;
95      if (*p == 'h') {
96        dir->lengthModifier[1] = 'h';
97        ++p;
98      }
99    } else if (*p == 'l') {
100      dir->lengthModifier[0] = 'l';
101      ++p;
102      if (*p == 'l') {
103        dir->lengthModifier[1] = 'l';
104        ++p;
105      }
106    }
107    // Conversion specifier.
108    dir->convSpecifier = *p++;
109    // Consume %[...] expression.
110    if (dir->convSpecifier == '[') {
111      if (*p == '^')
112        ++p;
113      if (*p == ']')
114        ++p;
115      while (*p && *p != ']')
116        ++p;
117      if (*p == 0)
118        return 0; // unexpected end of string
119                  // Consume the closing ']'.
120      ++p;
121    }
122    // This is unfortunately ambiguous between old GNU extension
123    // of %as, %aS and %a[...] and newer POSIX %a followed by
124    // letters s, S or [.
125    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
126        !dir->lengthModifier[0]) {
127      if (*p == 's' || *p == 'S') {
128        dir->maybeGnuMalloc = true;
129        ++p;
130      } else if (*p == '[') {
131        // Watch for %a[h-j%d], if % appears in the
132        // [...] range, then we need to give up, we don't know
133        // if scanf will parse it as POSIX %a [h-j %d ] or
134        // GNU allocation of string with range dh-j plus %.
135        const char *q = p + 1;
136        if (*q == '^')
137          ++q;
138        if (*q == ']')
139          ++q;
140        while (*q && *q != ']' && *q != '%')
141          ++q;
142        if (*q == 0 || *q == '%')
143          return 0;
144        p = q + 1; // Consume the closing ']'.
145        dir->maybeGnuMalloc = true;
146      }
147    }
148    break;
149  }
150  return p;
151}
152
153// Returns true if the character is an integer conversion specifier.
154static bool scanf_is_integer_conv(char c) {
155  return char_is_one_of(c, "diouxXn");
156}
157
158// Returns true if the character is an floating point conversion specifier.
159static bool scanf_is_float_conv(char c) {
160  return char_is_one_of(c, "aAeEfFgG");
161}
162
163// Returns string output character size for string-like conversions,
164// or 0 if the conversion is invalid.
165static int scanf_get_char_size(ScanfDirective *dir) {
166  if (char_is_one_of(dir->convSpecifier, "CS")) {
167    // wchar_t
168    return 0;
169  }
170
171  if (char_is_one_of(dir->convSpecifier, "cs[")) {
172    if (dir->lengthModifier[0] == 'l')
173      // wchar_t
174      return 0;
175    else if (dir->lengthModifier[0] == 0)
176      return sizeof(char);
177    else
178      return 0;
179  }
180
181  return 0;
182}
183
184enum ScanfStoreSize {
185  // Store size not known in advance; can be calculated as strlen() of the
186  // destination buffer.
187  SSS_STRLEN = -1,
188  // Invalid conversion specifier.
189  SSS_INVALID = 0
190};
191
192// Returns the store size of a scanf directive (if >0), or a value of
193// ScanfStoreSize.
194static int scanf_get_store_size(ScanfDirective *dir) {
195  if (dir->allocate) {
196    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
197      return SSS_INVALID;
198    return sizeof(char *);
199  }
200
201  if (dir->maybeGnuMalloc) {
202    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
203      return SSS_INVALID;
204    // This is ambiguous, so check the smaller size of char * (if it is
205    // a GNU extension of %as, %aS or %a[...]) and float (if it is
206    // POSIX %a followed by s, S or [ letters).
207    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
208  }
209
210  if (scanf_is_integer_conv(dir->convSpecifier)) {
211    switch (dir->lengthModifier[0]) {
212    case 'h':
213      return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
214    case 'l':
215      return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
216    case 'L':
217      return sizeof(long long);
218    case 'j':
219      return sizeof(INTMAX_T);
220    case 'z':
221      return sizeof(SIZE_T);
222    case 't':
223      return sizeof(PTRDIFF_T);
224    case 0:
225      return sizeof(int);
226    default:
227      return SSS_INVALID;
228    }
229  }
230
231  if (scanf_is_float_conv(dir->convSpecifier)) {
232    switch (dir->lengthModifier[0]) {
233    case 'L':
234    case 'q':
235      return sizeof(long double);
236    case 'l':
237      return dir->lengthModifier[1] == 'l' ? sizeof(long double)
238                                           : sizeof(double);
239    case 0:
240      return sizeof(float);
241    default:
242      return SSS_INVALID;
243    }
244  }
245
246  if (char_is_one_of(dir->convSpecifier, "sS[")) {
247    unsigned charSize = scanf_get_char_size(dir);
248    if (charSize == 0)
249      return SSS_INVALID;
250    if (dir->fieldWidth == 0)
251      return SSS_STRLEN;
252    return (dir->fieldWidth + 1) * charSize;
253  }
254
255  if (char_is_one_of(dir->convSpecifier, "cC")) {
256    unsigned charSize = scanf_get_char_size(dir);
257    if (charSize == 0)
258      return SSS_INVALID;
259    if (dir->fieldWidth == 0)
260      return charSize;
261    return dir->fieldWidth * charSize;
262  }
263
264  if (dir->convSpecifier == 'p') {
265    if (dir->lengthModifier[1] != 0)
266      return SSS_INVALID;
267    return sizeof(void *);
268  }
269
270  return SSS_INVALID;
271}
272
273// Common part of *scanf interceptors.
274// Process format string and va_list, and report all store ranges.
275// Stops when "consuming" n_inputs input items.
276static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
277                         const char *format, va_list aq) {
278  CHECK_GT(n_inputs, 0);
279  const char *p = format;
280
281  while (*p && n_inputs) {
282    ScanfDirective dir;
283    p = scanf_parse_next(p, allowGnuMalloc, &dir);
284    if (!p)
285      break;
286    if (dir.convSpecifier == 0) {
287      // This can only happen at the end of the format string.
288      CHECK_EQ(*p, 0);
289      break;
290    }
291    // Here the directive is valid. Do what it says.
292    if (dir.argIdx != -1) {
293      // Unsupported.
294      break;
295    }
296    if (dir.suppressed)
297      continue;
298    int size = scanf_get_store_size(&dir);
299    if (size == SSS_INVALID)
300      break;
301    void *argp = va_arg(aq, void *);
302    if (dir.convSpecifier != 'n')
303      --n_inputs;
304    if (size == SSS_STRLEN) {
305      size = internal_strlen((const char *)argp) + 1;
306    }
307    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
308  }
309}
310