//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Scanf/printf implementation for use in *Sanitizer interceptors. // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html // with a few common GNU extensions. // //===----------------------------------------------------------------------===// #include static const char *parse_number(const char *p, int *out) { *out = internal_atoll(p); while (*p >= '0' && *p <= '9') ++p; return p; } static const char *maybe_parse_param_index(const char *p, int *out) { // n$ if (*p >= '0' && *p <= '9') { int number; const char *q = parse_number(p, &number); CHECK(q); if (*q == '$') { *out = number; p = q + 1; } } // Otherwise, do not change p. This will be re-parsed later as the field // width. return p; } static bool char_is_one_of(char c, const char *s) { return !!internal_strchr(s, c); } static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { if (char_is_one_of(*p, "jztLq")) { ll[0] = *p; ++p; } else if (*p == 'h') { ll[0] = 'h'; ++p; if (*p == 'h') { ll[1] = 'h'; ++p; } } else if (*p == 'l') { ll[0] = 'l'; ++p; if (*p == 'l') { ll[1] = 'l'; ++p; } } return p; } // Returns true if the character is an integer conversion specifier. static bool format_is_integer_conv(char c) { return char_is_one_of(c, "diouxXn"); } // Returns true if the character is an floating point conversion specifier. static bool format_is_float_conv(char c) { return char_is_one_of(c, "aAeEfFgG"); } // Returns string output character size for string-like conversions, // or 0 if the conversion is invalid. static int format_get_char_size(char convSpecifier, const char lengthModifier[2]) { if (char_is_one_of(convSpecifier, "CS")) { return sizeof(wchar_t); } if (char_is_one_of(convSpecifier, "cs[")) { if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') return sizeof(wchar_t); else if (lengthModifier[0] == '\0') return sizeof(char); } return 0; } enum FormatStoreSize { // Store size not known in advance; can be calculated as wcslen() of the // destination buffer. FSS_WCSLEN = -2, // Store size not known in advance; can be calculated as strlen() of the // destination buffer. FSS_STRLEN = -1, // Invalid conversion specifier. FSS_INVALID = 0 }; // Returns the memory size of a format directive (if >0), or a value of // FormatStoreSize. static int format_get_value_size(char convSpecifier, const char lengthModifier[2], bool promote_float) { if (format_is_integer_conv(convSpecifier)) { switch (lengthModifier[0]) { case 'h': return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); case 'l': return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); case 'q': return sizeof(long long); case 'L': return sizeof(long long); case 'j': return sizeof(INTMAX_T); case 'z': return sizeof(SIZE_T); case 't': return sizeof(PTRDIFF_T); case 0: return sizeof(int); default: return FSS_INVALID; } } if (format_is_float_conv(convSpecifier)) { switch (lengthModifier[0]) { case 'L': case 'q': return sizeof(long double); case 'l': return lengthModifier[1] == 'l' ? sizeof(long double) : sizeof(double); case 0: // Printf promotes floats to doubles but scanf does not return promote_float ? sizeof(double) : sizeof(float); default: return FSS_INVALID; } } if (convSpecifier == 'p') { if (lengthModifier[0] != 0) return FSS_INVALID; return sizeof(void *); } return FSS_INVALID; } struct ScanfDirective { int argIdx; // argument index, or -1 if not specified ("%n$") int fieldWidth; const char *begin; const char *end; bool suppressed; // suppress assignment ("*") bool allocate; // allocate space ("m") char lengthModifier[2]; char convSpecifier; bool maybeGnuMalloc; }; // Parse scanf format string. If a valid directive in encountered, it is // returned in dir. This function returns the pointer to the first // unprocessed character, or 0 in case of error. // In case of the end-of-string, a pointer to the closing \0 is returned. static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, ScanfDirective *dir) { internal_memset(dir, 0, sizeof(*dir)); dir->argIdx = -1; while (*p) { if (*p != '%') { ++p; continue; } dir->begin = p; ++p; // %% if (*p == '%') { ++p; continue; } if (*p == '\0') { return 0; } // %n$ p = maybe_parse_param_index(p, &dir->argIdx); CHECK(p); // * if (*p == '*') { dir->suppressed = true; ++p; } // Field width if (*p >= '0' && *p <= '9') { p = parse_number(p, &dir->fieldWidth); CHECK(p); if (dir->fieldWidth <= 0) // Width if at all must be non-zero return 0; } // m if (*p == 'm') { dir->allocate = true; ++p; } // Length modifier. p = maybe_parse_length_modifier(p, dir->lengthModifier); // Conversion specifier. dir->convSpecifier = *p++; // Consume %[...] expression. if (dir->convSpecifier == '[') { if (*p == '^') ++p; if (*p == ']') ++p; while (*p && *p != ']') ++p; if (*p == 0) return 0; // unexpected end of string // Consume the closing ']'. ++p; } // This is unfortunately ambiguous between old GNU extension // of %as, %aS and %a[...] and newer POSIX %a followed by // letters s, S or [. if (allowGnuMalloc && dir->convSpecifier == 'a' && !dir->lengthModifier[0]) { if (*p == 's' || *p == 'S') { dir->maybeGnuMalloc = true; ++p; } else if (*p == '[') { // Watch for %a[h-j%d], if % appears in the // [...] range, then we need to give up, we don't know // if scanf will parse it as POSIX %a [h-j %d ] or // GNU allocation of string with range dh-j plus %. const char *q = p + 1; if (*q == '^') ++q; if (*q == ']') ++q; while (*q && *q != ']' && *q != '%') ++q; if (*q == 0 || *q == '%') return 0; p = q + 1; // Consume the closing ']'. dir->maybeGnuMalloc = true; } } dir->end = p; break; } return p; } static int scanf_get_value_size(ScanfDirective *dir) { if (dir->allocate) { if (!char_is_one_of(dir->convSpecifier, "cCsS[")) return FSS_INVALID; return sizeof(char *); } if (dir->maybeGnuMalloc) { if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) return FSS_INVALID; // This is ambiguous, so check the smaller size of char * (if it is // a GNU extension of %as, %aS or %a[...]) and float (if it is // POSIX %a followed by s, S or [ letters). return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); } if (char_is_one_of(dir->convSpecifier, "cCsS[")) { bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); unsigned charSize = format_get_char_size(dir->convSpecifier, dir->lengthModifier); if (charSize == 0) return FSS_INVALID; if (dir->fieldWidth == 0) { if (!needsTerminator) return charSize; return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; } return (dir->fieldWidth + needsTerminator) * charSize; } return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); } // Common part of *scanf interceptors. // Process format string and va_list, and report all store ranges. // Stops when "consuming" n_inputs input items. static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, const char *format, va_list aq) { CHECK_GT(n_inputs, 0); const char *p = format; COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); while (*p) { ScanfDirective dir; p = scanf_parse_next(p, allowGnuMalloc, &dir); if (!p) break; if (dir.convSpecifier == 0) { // This can only happen at the end of the format string. CHECK_EQ(*p, 0); break; } // Here the directive is valid. Do what it says. if (dir.argIdx != -1) { // Unsupported. break; } if (dir.suppressed) continue; int size = scanf_get_value_size(&dir); if (size == FSS_INVALID) { Report("WARNING: unexpected format specifier in scanf interceptor: " "%.*s\n", dir.end - dir.begin, dir.begin); break; } void *argp = va_arg(aq, void *); if (dir.convSpecifier != 'n') --n_inputs; if (n_inputs < 0) break; if (size == FSS_STRLEN) { size = internal_strlen((const char *)argp) + 1; } else if (size == FSS_WCSLEN) { // FIXME: actually use wcslen() to calculate it. size = 0; } COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); } } #if SANITIZER_INTERCEPT_PRINTF struct PrintfDirective { int fieldWidth; int fieldPrecision; int argIdx; // width argument index, or -1 if not specified ("%*n$") int precisionIdx; // precision argument index, or -1 if not specified (".*n$") const char *begin; const char *end; bool starredWidth; bool starredPrecision; char lengthModifier[2]; char convSpecifier; }; static const char *maybe_parse_number(const char *p, int *out) { if (*p >= '0' && *p <= '9') p = parse_number(p, out); return p; } static const char *maybe_parse_number_or_star(const char *p, int *out, bool *star) { if (*p == '*') { *star = true; ++p; } else { *star = false; p = maybe_parse_number(p, out); } return p; } // Parse printf format string. Same as scanf_parse_next. static const char *printf_parse_next(const char *p, PrintfDirective *dir) { internal_memset(dir, 0, sizeof(*dir)); dir->argIdx = -1; dir->precisionIdx = -1; while (*p) { if (*p != '%') { ++p; continue; } dir->begin = p; ++p; // %% if (*p == '%') { ++p; continue; } if (*p == '\0') { return 0; } // %n$ p = maybe_parse_param_index(p, &dir->precisionIdx); CHECK(p); // Flags while (char_is_one_of(*p, "'-+ #0")) { ++p; } // Field width p = maybe_parse_number_or_star(p, &dir->fieldWidth, &dir->starredWidth); if (!p) return 0; // Precision if (*p == '.') { ++p; // Actual precision is optional (surprise!) p = maybe_parse_number_or_star(p, &dir->fieldPrecision, &dir->starredPrecision); if (!p) return 0; // m$ if (dir->starredPrecision) { p = maybe_parse_param_index(p, &dir->precisionIdx); CHECK(p); } } // Length modifier. p = maybe_parse_length_modifier(p, dir->lengthModifier); // Conversion specifier. dir->convSpecifier = *p++; dir->end = p; break; } return p; } static int printf_get_value_size(PrintfDirective *dir) { if (dir->convSpecifier == 'm') { return sizeof(char *); } if (char_is_one_of(dir->convSpecifier, "cCsS")) { unsigned charSize = format_get_char_size(dir->convSpecifier, dir->lengthModifier); if (charSize == 0) return FSS_INVALID; if (char_is_one_of(dir->convSpecifier, "sS")) { return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; } return charSize; } return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); } #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ do { \ if (format_is_float_conv(convSpecifier)) { \ switch (size) { \ case 8: \ va_arg(*aq, double); \ break; \ case 12: \ va_arg(*aq, long double); \ break; \ case 16: \ va_arg(*aq, long double); \ break; \ default: \ Report("WARNING: unexpected floating-point arg size" \ " in printf interceptor: %d\n", size); \ return; \ } \ } else { \ switch (size) { \ case 1: \ case 2: \ case 4: \ va_arg(*aq, u32); \ break; \ case 8: \ va_arg(*aq, u64); \ break; \ default: \ Report("WARNING: unexpected arg size" \ " in printf interceptor: %d\n", size); \ return; \ } \ } \ } while (0) // Common part of *printf interceptors. // Process format string and va_list, and report all load ranges. static void printf_common(void *ctx, const char *format, va_list aq) { COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); const char *p = format; while (*p) { PrintfDirective dir; p = printf_parse_next(p, &dir); if (!p) break; if (dir.convSpecifier == 0) { // This can only happen at the end of the format string. CHECK_EQ(*p, 0); break; } // Here the directive is valid. Do what it says. if (dir.argIdx != -1 || dir.precisionIdx != -1) { // Unsupported. break; } if (dir.starredWidth) { // Dynamic width SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); } if (dir.starredPrecision) { // Dynamic precision SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); } int size = printf_get_value_size(&dir); if (size == FSS_INVALID) { Report("WARNING: unexpected format specifier in printf " "interceptor: %.*s\n", dir.end - dir.begin, dir.begin); break; } if (dir.convSpecifier == 'n') { void *argp = va_arg(aq, void *); COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); continue; } else if (size == FSS_STRLEN) { if (void *argp = va_arg(aq, void *)) { if (dir.starredPrecision) { // FIXME: properly support starred precision for strings. size = 0; } else if (dir.fieldPrecision > 0) { // Won't read more than "precision" symbols. size = internal_strnlen((const char *)argp, dir.fieldPrecision); if (size < dir.fieldPrecision) size++; } else { // Whole string will be accessed. size = internal_strlen((const char *)argp) + 1; } COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); } } else if (size == FSS_WCSLEN) { if (void *argp = va_arg(aq, void *)) { // FIXME: Properly support wide-character strings (via wcsrtombs). size = 0; COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); } } else { // Skip non-pointer args SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); } } } #endif // SANITIZER_INTERCEPT_PRINTF