1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Scanf/printf implementation for use in *Sanitizer interceptors. 11// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html 12// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html 13// with a few common GNU extensions. 14// 15//===----------------------------------------------------------------------===// 16#include <stdarg.h> 17 18static const char *parse_number(const char *p, int *out) { 19 *out = internal_atoll(p); 20 while (*p >= '0' && *p <= '9') 21 ++p; 22 return p; 23} 24 25static const char *maybe_parse_param_index(const char *p, int *out) { 26 // n$ 27 if (*p >= '0' && *p <= '9') { 28 int number; 29 const char *q = parse_number(p, &number); 30 CHECK(q); 31 if (*q == '$') { 32 *out = number; 33 p = q + 1; 34 } 35 } 36 37 // Otherwise, do not change p. This will be re-parsed later as the field 38 // width. 39 return p; 40} 41 42static bool char_is_one_of(char c, const char *s) { 43 return !!internal_strchr(s, c); 44} 45 46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { 47 if (char_is_one_of(*p, "jztLq")) { 48 ll[0] = *p; 49 ++p; 50 } else if (*p == 'h') { 51 ll[0] = 'h'; 52 ++p; 53 if (*p == 'h') { 54 ll[1] = 'h'; 55 ++p; 56 } 57 } else if (*p == 'l') { 58 ll[0] = 'l'; 59 ++p; 60 if (*p == 'l') { 61 ll[1] = 'l'; 62 ++p; 63 } 64 } 65 return p; 66} 67 68// Returns true if the character is an integer conversion specifier. 69static bool format_is_integer_conv(char c) { 70 return char_is_one_of(c, "diouxXn"); 71} 72 73// Returns true if the character is an floating point conversion specifier. 74static bool format_is_float_conv(char c) { 75 return char_is_one_of(c, "aAeEfFgG"); 76} 77 78// Returns string output character size for string-like conversions, 79// or 0 if the conversion is invalid. 80static int format_get_char_size(char convSpecifier, 81 const char lengthModifier[2]) { 82 if (char_is_one_of(convSpecifier, "CS")) { 83 return sizeof(wchar_t); 84 } 85 86 if (char_is_one_of(convSpecifier, "cs[")) { 87 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') 88 return sizeof(wchar_t); 89 else if (lengthModifier[0] == '\0') 90 return sizeof(char); 91 } 92 93 return 0; 94} 95 96enum FormatStoreSize { 97 // Store size not known in advance; can be calculated as wcslen() of the 98 // destination buffer. 99 FSS_WCSLEN = -2, 100 // Store size not known in advance; can be calculated as strlen() of the 101 // destination buffer. 102 FSS_STRLEN = -1, 103 // Invalid conversion specifier. 104 FSS_INVALID = 0 105}; 106 107// Returns the memory size of a format directive (if >0), or a value of 108// FormatStoreSize. 109static int format_get_value_size(char convSpecifier, 110 const char lengthModifier[2], 111 bool promote_float) { 112 if (format_is_integer_conv(convSpecifier)) { 113 switch (lengthModifier[0]) { 114 case 'h': 115 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); 116 case 'l': 117 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); 118 case 'q': 119 return sizeof(long long); 120 case 'L': 121 return sizeof(long long); 122 case 'j': 123 return sizeof(INTMAX_T); 124 case 'z': 125 return sizeof(SIZE_T); 126 case 't': 127 return sizeof(PTRDIFF_T); 128 case 0: 129 return sizeof(int); 130 default: 131 return FSS_INVALID; 132 } 133 } 134 135 if (format_is_float_conv(convSpecifier)) { 136 switch (lengthModifier[0]) { 137 case 'L': 138 case 'q': 139 return sizeof(long double); 140 case 'l': 141 return lengthModifier[1] == 'l' ? sizeof(long double) 142 : sizeof(double); 143 case 0: 144 // Printf promotes floats to doubles but scanf does not 145 return promote_float ? sizeof(double) : sizeof(float); 146 default: 147 return FSS_INVALID; 148 } 149 } 150 151 if (convSpecifier == 'p') { 152 if (lengthModifier[0] != 0) 153 return FSS_INVALID; 154 return sizeof(void *); 155 } 156 157 return FSS_INVALID; 158} 159 160struct ScanfDirective { 161 int argIdx; // argument index, or -1 if not specified ("%n$") 162 int fieldWidth; 163 const char *begin; 164 const char *end; 165 bool suppressed; // suppress assignment ("*") 166 bool allocate; // allocate space ("m") 167 char lengthModifier[2]; 168 char convSpecifier; 169 bool maybeGnuMalloc; 170}; 171 172// Parse scanf format string. If a valid directive in encountered, it is 173// returned in dir. This function returns the pointer to the first 174// unprocessed character, or 0 in case of error. 175// In case of the end-of-string, a pointer to the closing \0 is returned. 176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, 177 ScanfDirective *dir) { 178 internal_memset(dir, 0, sizeof(*dir)); 179 dir->argIdx = -1; 180 181 while (*p) { 182 if (*p != '%') { 183 ++p; 184 continue; 185 } 186 dir->begin = p; 187 ++p; 188 // %% 189 if (*p == '%') { 190 ++p; 191 continue; 192 } 193 if (*p == '\0') { 194 return 0; 195 } 196 // %n$ 197 p = maybe_parse_param_index(p, &dir->argIdx); 198 CHECK(p); 199 // * 200 if (*p == '*') { 201 dir->suppressed = true; 202 ++p; 203 } 204 // Field width 205 if (*p >= '0' && *p <= '9') { 206 p = parse_number(p, &dir->fieldWidth); 207 CHECK(p); 208 if (dir->fieldWidth <= 0) // Width if at all must be non-zero 209 return 0; 210 } 211 // m 212 if (*p == 'm') { 213 dir->allocate = true; 214 ++p; 215 } 216 // Length modifier. 217 p = maybe_parse_length_modifier(p, dir->lengthModifier); 218 // Conversion specifier. 219 dir->convSpecifier = *p++; 220 // Consume %[...] expression. 221 if (dir->convSpecifier == '[') { 222 if (*p == '^') 223 ++p; 224 if (*p == ']') 225 ++p; 226 while (*p && *p != ']') 227 ++p; 228 if (*p == 0) 229 return 0; // unexpected end of string 230 // Consume the closing ']'. 231 ++p; 232 } 233 // This is unfortunately ambiguous between old GNU extension 234 // of %as, %aS and %a[...] and newer POSIX %a followed by 235 // letters s, S or [. 236 if (allowGnuMalloc && dir->convSpecifier == 'a' && 237 !dir->lengthModifier[0]) { 238 if (*p == 's' || *p == 'S') { 239 dir->maybeGnuMalloc = true; 240 ++p; 241 } else if (*p == '[') { 242 // Watch for %a[h-j%d], if % appears in the 243 // [...] range, then we need to give up, we don't know 244 // if scanf will parse it as POSIX %a [h-j %d ] or 245 // GNU allocation of string with range dh-j plus %. 246 const char *q = p + 1; 247 if (*q == '^') 248 ++q; 249 if (*q == ']') 250 ++q; 251 while (*q && *q != ']' && *q != '%') 252 ++q; 253 if (*q == 0 || *q == '%') 254 return 0; 255 p = q + 1; // Consume the closing ']'. 256 dir->maybeGnuMalloc = true; 257 } 258 } 259 dir->end = p; 260 break; 261 } 262 return p; 263} 264 265static int scanf_get_value_size(ScanfDirective *dir) { 266 if (dir->allocate) { 267 if (!char_is_one_of(dir->convSpecifier, "cCsS[")) 268 return FSS_INVALID; 269 return sizeof(char *); 270 } 271 272 if (dir->maybeGnuMalloc) { 273 if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) 274 return FSS_INVALID; 275 // This is ambiguous, so check the smaller size of char * (if it is 276 // a GNU extension of %as, %aS or %a[...]) and float (if it is 277 // POSIX %a followed by s, S or [ letters). 278 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); 279 } 280 281 if (char_is_one_of(dir->convSpecifier, "cCsS[")) { 282 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); 283 unsigned charSize = 284 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 285 if (charSize == 0) 286 return FSS_INVALID; 287 if (dir->fieldWidth == 0) { 288 if (!needsTerminator) 289 return charSize; 290 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 291 } 292 return (dir->fieldWidth + needsTerminator) * charSize; 293 } 294 295 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); 296} 297 298// Common part of *scanf interceptors. 299// Process format string and va_list, and report all store ranges. 300// Stops when "consuming" n_inputs input items. 301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, 302 const char *format, va_list aq) { 303 CHECK_GT(n_inputs, 0); 304 const char *p = format; 305 306 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 307 308 while (*p) { 309 ScanfDirective dir; 310 p = scanf_parse_next(p, allowGnuMalloc, &dir); 311 if (!p) 312 break; 313 if (dir.convSpecifier == 0) { 314 // This can only happen at the end of the format string. 315 CHECK_EQ(*p, 0); 316 break; 317 } 318 // Here the directive is valid. Do what it says. 319 if (dir.argIdx != -1) { 320 // Unsupported. 321 break; 322 } 323 if (dir.suppressed) 324 continue; 325 int size = scanf_get_value_size(&dir); 326 if (size == FSS_INVALID) { 327 Report("WARNING: unexpected format specifier in scanf interceptor: " 328 "%.*s\n", dir.end - dir.begin, dir.begin); 329 break; 330 } 331 void *argp = va_arg(aq, void *); 332 if (dir.convSpecifier != 'n') 333 --n_inputs; 334 if (n_inputs < 0) 335 break; 336 if (size == FSS_STRLEN) { 337 size = internal_strlen((const char *)argp) + 1; 338 } else if (size == FSS_WCSLEN) { 339 // FIXME: actually use wcslen() to calculate it. 340 size = 0; 341 } 342 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 343 } 344} 345 346#if SANITIZER_INTERCEPT_PRINTF 347 348struct PrintfDirective { 349 int fieldWidth; 350 int fieldPrecision; 351 int argIdx; // width argument index, or -1 if not specified ("%*n$") 352 int precisionIdx; // precision argument index, or -1 if not specified (".*n$") 353 const char *begin; 354 const char *end; 355 bool starredWidth; 356 bool starredPrecision; 357 char lengthModifier[2]; 358 char convSpecifier; 359}; 360 361static const char *maybe_parse_number(const char *p, int *out) { 362 if (*p >= '0' && *p <= '9') 363 p = parse_number(p, out); 364 return p; 365} 366 367static const char *maybe_parse_number_or_star(const char *p, int *out, 368 bool *star) { 369 if (*p == '*') { 370 *star = true; 371 ++p; 372 } else { 373 *star = false; 374 p = maybe_parse_number(p, out); 375 } 376 return p; 377} 378 379// Parse printf format string. Same as scanf_parse_next. 380static const char *printf_parse_next(const char *p, PrintfDirective *dir) { 381 internal_memset(dir, 0, sizeof(*dir)); 382 dir->argIdx = -1; 383 dir->precisionIdx = -1; 384 385 while (*p) { 386 if (*p != '%') { 387 ++p; 388 continue; 389 } 390 dir->begin = p; 391 ++p; 392 // %% 393 if (*p == '%') { 394 ++p; 395 continue; 396 } 397 if (*p == '\0') { 398 return 0; 399 } 400 // %n$ 401 p = maybe_parse_param_index(p, &dir->precisionIdx); 402 CHECK(p); 403 // Flags 404 while (char_is_one_of(*p, "'-+ #0")) { 405 ++p; 406 } 407 // Field width 408 p = maybe_parse_number_or_star(p, &dir->fieldWidth, 409 &dir->starredWidth); 410 if (!p) 411 return 0; 412 // Precision 413 if (*p == '.') { 414 ++p; 415 // Actual precision is optional (surprise!) 416 p = maybe_parse_number_or_star(p, &dir->fieldPrecision, 417 &dir->starredPrecision); 418 if (!p) 419 return 0; 420 // m$ 421 if (dir->starredPrecision) { 422 p = maybe_parse_param_index(p, &dir->precisionIdx); 423 CHECK(p); 424 } 425 } 426 // Length modifier. 427 p = maybe_parse_length_modifier(p, dir->lengthModifier); 428 // Conversion specifier. 429 dir->convSpecifier = *p++; 430 dir->end = p; 431 break; 432 } 433 return p; 434} 435 436static int printf_get_value_size(PrintfDirective *dir) { 437 if (dir->convSpecifier == 'm') { 438 return sizeof(char *); 439 } 440 441 if (char_is_one_of(dir->convSpecifier, "cCsS")) { 442 unsigned charSize = 443 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 444 if (charSize == 0) 445 return FSS_INVALID; 446 if (char_is_one_of(dir->convSpecifier, "sS")) { 447 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 448 } 449 return charSize; 450 } 451 452 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); 453} 454 455#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ 456 do { \ 457 if (format_is_float_conv(convSpecifier)) { \ 458 switch (size) { \ 459 case 8: \ 460 va_arg(*aq, double); \ 461 break; \ 462 case 12: \ 463 va_arg(*aq, long double); \ 464 break; \ 465 case 16: \ 466 va_arg(*aq, long double); \ 467 break; \ 468 default: \ 469 Report("WARNING: unexpected floating-point arg size" \ 470 " in printf interceptor: %d\n", size); \ 471 return; \ 472 } \ 473 } else { \ 474 switch (size) { \ 475 case 1: \ 476 case 2: \ 477 case 4: \ 478 va_arg(*aq, u32); \ 479 break; \ 480 case 8: \ 481 va_arg(*aq, u64); \ 482 break; \ 483 default: \ 484 Report("WARNING: unexpected arg size" \ 485 " in printf interceptor: %d\n", size); \ 486 return; \ 487 } \ 488 } \ 489 } while (0) 490 491// Common part of *printf interceptors. 492// Process format string and va_list, and report all load ranges. 493static void printf_common(void *ctx, const char *format, va_list aq) { 494 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 495 496 const char *p = format; 497 498 while (*p) { 499 PrintfDirective dir; 500 p = printf_parse_next(p, &dir); 501 if (!p) 502 break; 503 if (dir.convSpecifier == 0) { 504 // This can only happen at the end of the format string. 505 CHECK_EQ(*p, 0); 506 break; 507 } 508 // Here the directive is valid. Do what it says. 509 if (dir.argIdx != -1 || dir.precisionIdx != -1) { 510 // Unsupported. 511 break; 512 } 513 if (dir.starredWidth) { 514 // Dynamic width 515 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 516 } 517 if (dir.starredPrecision) { 518 // Dynamic precision 519 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 520 } 521 int size = printf_get_value_size(&dir); 522 if (size == FSS_INVALID) { 523 Report("WARNING: unexpected format specifier in printf " 524 "interceptor: %.*s\n", dir.end - dir.begin, dir.begin); 525 break; 526 } 527 if (dir.convSpecifier == 'n') { 528 void *argp = va_arg(aq, void *); 529 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 530 continue; 531 } else if (size == FSS_STRLEN) { 532 if (void *argp = va_arg(aq, void *)) { 533 if (dir.starredPrecision) { 534 // FIXME: properly support starred precision for strings. 535 size = 0; 536 } else if (dir.fieldPrecision > 0) { 537 // Won't read more than "precision" symbols. 538 size = internal_strnlen((const char *)argp, dir.fieldPrecision); 539 if (size < dir.fieldPrecision) size++; 540 } else { 541 // Whole string will be accessed. 542 size = internal_strlen((const char *)argp) + 1; 543 } 544 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 545 } 546 } else if (size == FSS_WCSLEN) { 547 if (void *argp = va_arg(aq, void *)) { 548 // FIXME: Properly support wide-character strings (via wcsrtombs). 549 size = 0; 550 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 551 } 552 } else { 553 // Skip non-pointer args 554 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); 555 } 556 } 557} 558 559#endif // SANITIZER_INTERCEPT_PRINTF 560