1 // Copyright (c) 2010 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // basic_source_line_resolver.cc: BasicSourceLineResolver implementation.
31 //
32 // See basic_source_line_resolver.h and basic_source_line_resolver_types.h
33 // for documentation.
34
35 #include <assert.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40
41 #include <limits>
42 #include <map>
43 #include <utility>
44 #include <vector>
45
46 #include "google_breakpad/processor/basic_source_line_resolver.h"
47 #include "processor/basic_source_line_resolver_types.h"
48 #include "processor/module_factory.h"
49
50 #include "processor/tokenize.h"
51
52 using std::map;
53 using std::vector;
54 using std::make_pair;
55
56 namespace google_breakpad {
57
58 #ifdef _WIN32
59 #define strtok_r strtok_s
60 #define strtoull _strtoui64
61 #endif
62
63 static const char *kWhitespace = " \r\n";
64 static const int kMaxErrorsPrinted = 5;
65 static const int kMaxErrorsBeforeBailing = 100;
66
BasicSourceLineResolver()67 BasicSourceLineResolver::BasicSourceLineResolver() :
68 SourceLineResolverBase(new BasicModuleFactory) { }
69
70 // static
LogParseError(const string & message,int line_number,int * num_errors)71 void BasicSourceLineResolver::Module::LogParseError(
72 const string &message,
73 int line_number,
74 int *num_errors) {
75 if (++(*num_errors) <= kMaxErrorsPrinted) {
76 if (line_number > 0) {
77 BPLOG(ERROR) << "Line " << line_number << ": " << message;
78 } else {
79 BPLOG(ERROR) << message;
80 }
81 }
82 }
83
LoadMapFromMemory(char * memory_buffer,size_t memory_buffer_size)84 bool BasicSourceLineResolver::Module::LoadMapFromMemory(
85 char *memory_buffer,
86 size_t memory_buffer_size) {
87 linked_ptr<Function> cur_func;
88 int line_number = 0;
89 int num_errors = 0;
90 char *save_ptr;
91
92 // If the length is 0, we can still pretend we have a symbol file. This is
93 // for scenarios that want to test symbol lookup, but don't necessarily care
94 // if certain modules do not have any information, like system libraries.
95 if (memory_buffer_size == 0) {
96 return true;
97 }
98
99 // Make sure the last character is null terminator.
100 size_t last_null_terminator = memory_buffer_size - 1;
101 if (memory_buffer[last_null_terminator] != '\0') {
102 memory_buffer[last_null_terminator] = '\0';
103 }
104
105 // Skip any null terminators at the end of the memory buffer, and make sure
106 // there are no other null terminators in the middle of the memory buffer.
107 bool has_null_terminator_in_the_middle = false;
108 while (last_null_terminator > 0 &&
109 memory_buffer[last_null_terminator - 1] == '\0') {
110 last_null_terminator--;
111 }
112 for (size_t i = 0; i < last_null_terminator; i++) {
113 if (memory_buffer[i] == '\0') {
114 memory_buffer[i] = '_';
115 has_null_terminator_in_the_middle = true;
116 }
117 }
118 if (has_null_terminator_in_the_middle) {
119 LogParseError(
120 "Null terminator is not expected in the middle of the symbol data",
121 line_number,
122 &num_errors);
123 }
124
125 char *buffer;
126 buffer = strtok_r(memory_buffer, "\r\n", &save_ptr);
127
128 while (buffer != NULL) {
129 ++line_number;
130
131 if (strncmp(buffer, "FILE ", 5) == 0) {
132 if (!ParseFile(buffer)) {
133 LogParseError("ParseFile on buffer failed", line_number, &num_errors);
134 }
135 } else if (strncmp(buffer, "STACK ", 6) == 0) {
136 if (!ParseStackInfo(buffer)) {
137 LogParseError("ParseStackInfo failed", line_number, &num_errors);
138 }
139 } else if (strncmp(buffer, "FUNC ", 5) == 0) {
140 cur_func.reset(ParseFunction(buffer));
141 if (!cur_func.get()) {
142 LogParseError("ParseFunction failed", line_number, &num_errors);
143 } else {
144 // StoreRange will fail if the function has an invalid address or size.
145 // We'll silently ignore this, the function and any corresponding lines
146 // will be destroyed when cur_func is released.
147 functions_.StoreRange(cur_func->address, cur_func->size, cur_func);
148 }
149 } else if (strncmp(buffer, "PUBLIC ", 7) == 0) {
150 // Clear cur_func: public symbols don't contain line number information.
151 cur_func.reset();
152
153 if (!ParsePublicSymbol(buffer)) {
154 LogParseError("ParsePublicSymbol failed", line_number, &num_errors);
155 }
156 } else if (strncmp(buffer, "MODULE ", 7) == 0) {
157 // Ignore these. They're not of any use to BasicSourceLineResolver,
158 // which is fed modules by a SymbolSupplier. These lines are present to
159 // aid other tools in properly placing symbol files so that they can
160 // be accessed by a SymbolSupplier.
161 //
162 // MODULE <guid> <age> <filename>
163 } else if (strncmp(buffer, "INFO ", 5) == 0) {
164 // Ignore these as well, they're similarly just for housekeeping.
165 //
166 // INFO CODE_ID <code id> <filename>
167 } else {
168 if (!cur_func.get()) {
169 LogParseError("Found source line data without a function",
170 line_number, &num_errors);
171 } else {
172 Line *line = ParseLine(buffer);
173 if (!line) {
174 LogParseError("ParseLine failed", line_number, &num_errors);
175 } else {
176 cur_func->lines.StoreRange(line->address, line->size,
177 linked_ptr<Line>(line));
178 }
179 }
180 }
181 if (num_errors > kMaxErrorsBeforeBailing) {
182 break;
183 }
184 buffer = strtok_r(NULL, "\r\n", &save_ptr);
185 }
186 is_corrupt_ = num_errors > 0;
187 return true;
188 }
189
LookupAddress(StackFrame * frame) const190 void BasicSourceLineResolver::Module::LookupAddress(StackFrame *frame) const {
191 MemAddr address = frame->instruction - frame->module->base_address();
192
193 // First, look for a FUNC record that covers address. Use
194 // RetrieveNearestRange instead of RetrieveRange so that, if there
195 // is no such function, we can use the next function to bound the
196 // extent of the PUBLIC symbol we find, below. This does mean we
197 // need to check that address indeed falls within the function we
198 // find; do the range comparison in an overflow-friendly way.
199 linked_ptr<Function> func;
200 linked_ptr<PublicSymbol> public_symbol;
201 MemAddr function_base;
202 MemAddr function_size;
203 MemAddr public_address;
204 if (functions_.RetrieveNearestRange(address, &func,
205 &function_base, &function_size) &&
206 address >= function_base && address - function_base < function_size) {
207 frame->function_name = func->name;
208 frame->function_base = frame->module->base_address() + function_base;
209
210 linked_ptr<Line> line;
211 MemAddr line_base;
212 if (func->lines.RetrieveRange(address, &line, &line_base, NULL)) {
213 FileMap::const_iterator it = files_.find(line->source_file_id);
214 if (it != files_.end()) {
215 frame->source_file_name = files_.find(line->source_file_id)->second;
216 }
217 frame->source_line = line->line;
218 frame->source_line_base = frame->module->base_address() + line_base;
219 }
220 } else if (public_symbols_.Retrieve(address,
221 &public_symbol, &public_address) &&
222 (!func.get() || public_address > function_base)) {
223 frame->function_name = public_symbol->name;
224 frame->function_base = frame->module->base_address() + public_address;
225 }
226 }
227
FindWindowsFrameInfo(const StackFrame * frame) const228 WindowsFrameInfo *BasicSourceLineResolver::Module::FindWindowsFrameInfo(
229 const StackFrame *frame) const {
230 MemAddr address = frame->instruction - frame->module->base_address();
231 scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo());
232
233 // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and
234 // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order.
235 // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that
236 // includes its own program string.
237 // WindowsFrameInfo::STACK_INFO_FPO is the older type
238 // corresponding to the FPO_DATA struct. See stackwalker_x86.cc.
239 linked_ptr<WindowsFrameInfo> frame_info;
240 if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA]
241 .RetrieveRange(address, &frame_info))
242 || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO]
243 .RetrieveRange(address, &frame_info))) {
244 result->CopyFrom(*frame_info.get());
245 return result.release();
246 }
247
248 // Even without a relevant STACK line, many functions contain
249 // information about how much space their parameters consume on the
250 // stack. Use RetrieveNearestRange instead of RetrieveRange, so that
251 // we can use the function to bound the extent of the PUBLIC symbol,
252 // below. However, this does mean we need to check that ADDRESS
253 // falls within the retrieved function's range; do the range
254 // comparison in an overflow-friendly way.
255 linked_ptr<Function> function;
256 MemAddr function_base, function_size;
257 if (functions_.RetrieveNearestRange(address, &function,
258 &function_base, &function_size) &&
259 address >= function_base && address - function_base < function_size) {
260 result->parameter_size = function->parameter_size;
261 result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE;
262 return result.release();
263 }
264
265 // PUBLIC symbols might have a parameter size. Use the function we
266 // found above to limit the range the public symbol covers.
267 linked_ptr<PublicSymbol> public_symbol;
268 MemAddr public_address;
269 if (public_symbols_.Retrieve(address, &public_symbol, &public_address) &&
270 (!function.get() || public_address > function_base)) {
271 result->parameter_size = public_symbol->parameter_size;
272 }
273
274 return NULL;
275 }
276
FindCFIFrameInfo(const StackFrame * frame) const277 CFIFrameInfo *BasicSourceLineResolver::Module::FindCFIFrameInfo(
278 const StackFrame *frame) const {
279 MemAddr address = frame->instruction - frame->module->base_address();
280 MemAddr initial_base, initial_size;
281 string initial_rules;
282
283 // Find the initial rule whose range covers this address. That
284 // provides an initial set of register recovery rules. Then, walk
285 // forward from the initial rule's starting address to frame's
286 // instruction address, applying delta rules.
287 if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules,
288 &initial_base, &initial_size)) {
289 return NULL;
290 }
291
292 // Create a frame info structure, and populate it with the rules from
293 // the STACK CFI INIT record.
294 scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo());
295 if (!ParseCFIRuleSet(initial_rules, rules.get()))
296 return NULL;
297
298 // Find the first delta rule that falls within the initial rule's range.
299 map<MemAddr, string>::const_iterator delta =
300 cfi_delta_rules_.lower_bound(initial_base);
301
302 // Apply delta rules up to and including the frame's address.
303 while (delta != cfi_delta_rules_.end() && delta->first <= address) {
304 ParseCFIRuleSet(delta->second, rules.get());
305 delta++;
306 }
307
308 return rules.release();
309 }
310
ParseFile(char * file_line)311 bool BasicSourceLineResolver::Module::ParseFile(char *file_line) {
312 long index;
313 char *filename;
314 if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) {
315 files_.insert(make_pair(index, string(filename)));
316 return true;
317 }
318 return false;
319 }
320
321 BasicSourceLineResolver::Function*
ParseFunction(char * function_line)322 BasicSourceLineResolver::Module::ParseFunction(char *function_line) {
323 uint64_t address;
324 uint64_t size;
325 long stack_param_size;
326 char *name;
327 if (SymbolParseHelper::ParseFunction(function_line, &address, &size,
328 &stack_param_size, &name)) {
329 return new Function(name, address, size, stack_param_size);
330 }
331 return NULL;
332 }
333
ParseLine(char * line_line)334 BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
335 char *line_line) {
336 uint64_t address;
337 uint64_t size;
338 long line_number;
339 long source_file;
340
341 if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number,
342 &source_file)) {
343 return new Line(address, size, source_file, line_number);
344 }
345 return NULL;
346 }
347
ParsePublicSymbol(char * public_line)348 bool BasicSourceLineResolver::Module::ParsePublicSymbol(char *public_line) {
349 uint64_t address;
350 long stack_param_size;
351 char *name;
352
353 if (SymbolParseHelper::ParsePublicSymbol(public_line, &address,
354 &stack_param_size, &name)) {
355 // A few public symbols show up with an address of 0. This has been seen
356 // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
357 // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict
358 // with one another if they were allowed into the public_symbols_ map,
359 // but since the address is obviously invalid, gracefully accept them
360 // as input without putting them into the map.
361 if (address == 0) {
362 return true;
363 }
364
365 linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
366 stack_param_size));
367 return public_symbols_.Store(address, symbol);
368 }
369 return false;
370 }
371
ParseStackInfo(char * stack_info_line)372 bool BasicSourceLineResolver::Module::ParseStackInfo(char *stack_info_line) {
373 // Skip "STACK " prefix.
374 stack_info_line += 6;
375
376 // Find the token indicating what sort of stack frame walking
377 // information this is.
378 while (*stack_info_line == ' ')
379 stack_info_line++;
380 const char *platform = stack_info_line;
381 while (!strchr(kWhitespace, *stack_info_line))
382 stack_info_line++;
383 *stack_info_line++ = '\0';
384
385 // MSVC stack frame info.
386 if (strcmp(platform, "WIN") == 0) {
387 int type = 0;
388 uint64_t rva, code_size;
389 linked_ptr<WindowsFrameInfo>
390 stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line,
391 type,
392 rva,
393 code_size));
394 if (stack_frame_info == NULL)
395 return false;
396
397 // TODO(mmentovai): I wanted to use StoreRange's return value as this
398 // method's return value, but MSVC infrequently outputs stack info that
399 // violates the containment rules. This happens with a section of code
400 // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks
401 // like this:
402 // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...)
403 // STACK WIN 4 4243 2e 9 0 ...
404 // ContainedRangeMap treats these two blocks as conflicting. In reality,
405 // when the prolog lengths are taken into account, the actual code of
406 // these blocks doesn't conflict. However, we can't take the prolog lengths
407 // into account directly here because we'd wind up with a different set
408 // of range conflicts when MSVC outputs stack info like this:
409 // STACK WIN 4 1040 73 33 0 ...
410 // STACK WIN 4 105a 59 19 0 ...
411 // because in both of these entries, the beginning of the code after the
412 // prolog is at 0x1073, and the last byte of contained code is at 0x10b2.
413 // Perhaps we could get away with storing ranges by rva + prolog_size
414 // if ContainedRangeMap were modified to allow replacement of
415 // already-stored values.
416
417 windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info);
418 return true;
419 } else if (strcmp(platform, "CFI") == 0) {
420 // DWARF CFI stack frame info
421 return ParseCFIFrameInfo(stack_info_line);
422 } else {
423 // Something unrecognized.
424 return false;
425 }
426 }
427
ParseCFIFrameInfo(char * stack_info_line)428 bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
429 char *stack_info_line) {
430 char *cursor;
431
432 // Is this an INIT record or a delta record?
433 char *init_or_address = strtok_r(stack_info_line, " \r\n", &cursor);
434 if (!init_or_address)
435 return false;
436
437 if (strcmp(init_or_address, "INIT") == 0) {
438 // This record has the form "STACK INIT <address> <size> <rules...>".
439 char *address_field = strtok_r(NULL, " \r\n", &cursor);
440 if (!address_field) return false;
441
442 char *size_field = strtok_r(NULL, " \r\n", &cursor);
443 if (!size_field) return false;
444
445 char *initial_rules = strtok_r(NULL, "\r\n", &cursor);
446 if (!initial_rules) return false;
447
448 MemAddr address = strtoul(address_field, NULL, 16);
449 MemAddr size = strtoul(size_field, NULL, 16);
450 cfi_initial_rules_.StoreRange(address, size, initial_rules);
451 return true;
452 }
453
454 // This record has the form "STACK <address> <rules...>".
455 char *address_field = init_or_address;
456 char *delta_rules = strtok_r(NULL, "\r\n", &cursor);
457 if (!delta_rules) return false;
458 MemAddr address = strtoul(address_field, NULL, 16);
459 cfi_delta_rules_[address] = delta_rules;
460 return true;
461 }
462
463 // static
ParseFile(char * file_line,long * index,char ** filename)464 bool SymbolParseHelper::ParseFile(char *file_line, long *index,
465 char **filename) {
466 // FILE <id> <filename>
467 assert(strncmp(file_line, "FILE ", 5) == 0);
468 file_line += 5; // skip prefix
469
470 vector<char*> tokens;
471 if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
472 return false;
473 }
474
475 char *after_number;
476 *index = strtol(tokens[0], &after_number, 10);
477 if (!IsValidAfterNumber(after_number) || *index < 0 ||
478 *index == std::numeric_limits<long>::max()) {
479 return false;
480 }
481
482 *filename = tokens[1];
483 if (!filename) {
484 return false;
485 }
486
487 return true;
488 }
489
490 // static
ParseFunction(char * function_line,uint64_t * address,uint64_t * size,long * stack_param_size,char ** name)491 bool SymbolParseHelper::ParseFunction(char *function_line, uint64_t *address,
492 uint64_t *size, long *stack_param_size,
493 char **name) {
494 // FUNC <address> <size> <stack_param_size> <name>
495 assert(strncmp(function_line, "FUNC ", 5) == 0);
496 function_line += 5; // skip prefix
497
498 vector<char*> tokens;
499 if (!Tokenize(function_line, kWhitespace, 4, &tokens)) {
500 return false;
501 }
502
503 char *after_number;
504 *address = strtoull(tokens[0], &after_number, 16);
505 if (!IsValidAfterNumber(after_number) ||
506 *address == std::numeric_limits<unsigned long long>::max()) {
507 return false;
508 }
509 *size = strtoull(tokens[1], &after_number, 16);
510 if (!IsValidAfterNumber(after_number) ||
511 *size == std::numeric_limits<unsigned long long>::max()) {
512 return false;
513 }
514 *stack_param_size = strtol(tokens[2], &after_number, 16);
515 if (!IsValidAfterNumber(after_number) ||
516 *stack_param_size == std::numeric_limits<long>::max() ||
517 *stack_param_size < 0) {
518 return false;
519 }
520 *name = tokens[3];
521
522 return true;
523 }
524
525 // static
ParseLine(char * line_line,uint64_t * address,uint64_t * size,long * line_number,long * source_file)526 bool SymbolParseHelper::ParseLine(char *line_line, uint64_t *address,
527 uint64_t *size, long *line_number,
528 long *source_file) {
529 // <address> <size> <line number> <source file id>
530 vector<char*> tokens;
531 if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
532 return false;
533 }
534
535 char *after_number;
536 *address = strtoull(tokens[0], &after_number, 16);
537 if (!IsValidAfterNumber(after_number) ||
538 *address == std::numeric_limits<unsigned long long>::max()) {
539 return false;
540 }
541 *size = strtoull(tokens[1], &after_number, 16);
542 if (!IsValidAfterNumber(after_number) ||
543 *size == std::numeric_limits<unsigned long long>::max()) {
544 return false;
545 }
546 *line_number = strtol(tokens[2], &after_number, 10);
547 if (!IsValidAfterNumber(after_number) ||
548 *line_number == std::numeric_limits<long>::max()) {
549 return false;
550 }
551 *source_file = strtol(tokens[3], &after_number, 10);
552 if (!IsValidAfterNumber(after_number) || *source_file < 0 ||
553 *source_file == std::numeric_limits<long>::max()) {
554 return false;
555 }
556
557 // Valid line numbers normally start from 1, however there are functions that
558 // are associated with a source file but not associated with any line number
559 // (block helper function) and for such functions the symbol file contains 0
560 // for the line numbers. Hence, 0 should be treated as a valid line number.
561 // For more information on block helper functions, please, take a look at:
562 // http://clang.llvm.org/docs/Block-ABI-Apple.html
563 if (*line_number < 0) {
564 return false;
565 }
566
567 return true;
568 }
569
570 // static
ParsePublicSymbol(char * public_line,uint64_t * address,long * stack_param_size,char ** name)571 bool SymbolParseHelper::ParsePublicSymbol(char *public_line,
572 uint64_t *address,
573 long *stack_param_size,
574 char **name) {
575 // PUBLIC <address> <stack_param_size> <name>
576 assert(strncmp(public_line, "PUBLIC ", 7) == 0);
577 public_line += 7; // skip prefix
578
579 vector<char*> tokens;
580 if (!Tokenize(public_line, kWhitespace, 3, &tokens)) {
581 return false;
582 }
583
584 char *after_number;
585 *address = strtoull(tokens[0], &after_number, 16);
586 if (!IsValidAfterNumber(after_number) ||
587 *address == std::numeric_limits<unsigned long long>::max()) {
588 return false;
589 }
590 *stack_param_size = strtol(tokens[1], &after_number, 16);
591 if (!IsValidAfterNumber(after_number) ||
592 *stack_param_size == std::numeric_limits<long>::max() ||
593 *stack_param_size < 0) {
594 return false;
595 }
596 *name = tokens[2];
597
598 return true;
599 }
600
601 // static
IsValidAfterNumber(char * after_number)602 bool SymbolParseHelper::IsValidAfterNumber(char *after_number) {
603 if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) {
604 return true;
605 }
606 return false;
607 }
608
609 } // namespace google_breakpad
610