1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include "common/windows/pdb_source_line_writer.h"
31
32 #include <windows.h>
33 #include <winnt.h>
34 #include <atlbase.h>
35 #include <dia2.h>
36 #include <diacreate.h>
37 #include <ImageHlp.h>
38 #include <stdio.h>
39
40 #include <algorithm>
41 #include <limits>
42 #include <map>
43 #include <set>
44 #include <utility>
45
46 #include "common/windows/dia_util.h"
47 #include "common/windows/guid_string.h"
48 #include "common/windows/pe_util.h"
49 #include "common/windows/string_utils-inl.h"
50
51 // This constant may be missing from DbgHelp.h. See the documentation for
52 // IDiaSymbol::get_undecoratedNameEx.
53 #ifndef UNDNAME_NO_ECSU
54 #define UNDNAME_NO_ECSU 0x8000 // Suppresses enum/class/struct/union.
55 #endif // UNDNAME_NO_ECSU
56
57 namespace google_breakpad {
58
59 namespace {
60
61 using std::vector;
62
63 // The symbol (among possibly many) selected to represent an rva.
64 struct SelectedSymbol {
SelectedSymbolgoogle_breakpad::__anon36c75bf10111::SelectedSymbol65 SelectedSymbol(const CComPtr<IDiaSymbol>& symbol, bool is_public)
66 : symbol(symbol), is_public(is_public), is_multiple(false) {}
67
68 // The symbol to use for an rva.
69 CComPtr<IDiaSymbol> symbol;
70 // Whether this is a public or function symbol.
71 bool is_public;
72 // Whether the rva has multiple associated symbols. An rva will correspond to
73 // multiple symbols in the case of linker identical symbol folding.
74 bool is_multiple;
75 };
76
77 // Maps rva to the symbol to use for that address.
78 typedef std::map<DWORD, SelectedSymbol> SymbolMap;
79
80 // Record this in the map as the selected symbol for the rva if it satisfies the
81 // necessary conditions.
MaybeRecordSymbol(DWORD rva,const CComPtr<IDiaSymbol> symbol,bool is_public,SymbolMap * map)82 void MaybeRecordSymbol(DWORD rva,
83 const CComPtr<IDiaSymbol> symbol,
84 bool is_public,
85 SymbolMap* map) {
86 SymbolMap::iterator loc = map->find(rva);
87 if (loc == map->end()) {
88 map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public)));
89 return;
90 }
91
92 // Prefer function symbols to public symbols.
93 if (is_public && !loc->second.is_public) {
94 return;
95 }
96
97 loc->second.is_multiple = true;
98
99 // Take the 'least' symbol by lexicographical order of the decorated name. We
100 // use the decorated rather than undecorated name because computing the latter
101 // is expensive.
102 BSTR current_name, new_name;
103 loc->second.symbol->get_name(¤t_name);
104 symbol->get_name(&new_name);
105 if (wcscmp(new_name, current_name) < 0) {
106 loc->second.symbol = symbol;
107 loc->second.is_public = is_public;
108 }
109 }
110
111
112
SymbolsMatch(IDiaSymbol * a,IDiaSymbol * b)113 bool SymbolsMatch(IDiaSymbol* a, IDiaSymbol* b) {
114 DWORD a_section, a_offset, b_section, b_offset;
115 if (FAILED(a->get_addressSection(&a_section)) ||
116 FAILED(a->get_addressOffset(&a_offset)) ||
117 FAILED(b->get_addressSection(&b_section)) ||
118 FAILED(b->get_addressOffset(&b_offset)))
119 return false;
120 return a_section == b_section && a_offset == b_offset;
121 }
122
CreateDiaDataSourceInstance(CComPtr<IDiaDataSource> & data_source)123 bool CreateDiaDataSourceInstance(CComPtr<IDiaDataSource> &data_source) {
124 if (SUCCEEDED(data_source.CoCreateInstance(CLSID_DiaSource))) {
125 return true;
126 }
127
128 class DECLSPEC_UUID("B86AE24D-BF2F-4ac9-B5A2-34B14E4CE11D") DiaSource100;
129 class DECLSPEC_UUID("761D3BCD-1304-41D5-94E8-EAC54E4AC172") DiaSource110;
130 class DECLSPEC_UUID("3BFCEA48-620F-4B6B-81F7-B9AF75454C7D") DiaSource120;
131 class DECLSPEC_UUID("E6756135-1E65-4D17-8576-610761398C3C") DiaSource140;
132
133 // If the CoCreateInstance call above failed, msdia*.dll is not registered.
134 // We can try loading the DLL corresponding to the #included DIA SDK, but
135 // the DIA headers don't provide a version. Lets try to figure out which DIA
136 // version we're compiling against by comparing CLSIDs.
137 const wchar_t *msdia_dll = nullptr;
138 if (CLSID_DiaSource == _uuidof(DiaSource100)) {
139 msdia_dll = L"msdia100.dll";
140 } else if (CLSID_DiaSource == _uuidof(DiaSource110)) {
141 msdia_dll = L"msdia110.dll";
142 } else if (CLSID_DiaSource == _uuidof(DiaSource120)) {
143 msdia_dll = L"msdia120.dll";
144 } else if (CLSID_DiaSource == _uuidof(DiaSource140)) {
145 msdia_dll = L"msdia140.dll";
146 }
147
148 if (msdia_dll &&
149 SUCCEEDED(NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource,
150 reinterpret_cast<void **>(&data_source)))) {
151 return true;
152 }
153
154 return false;
155 }
156
157 } // namespace
158
PDBSourceLineWriter()159 PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) {
160 }
161
~PDBSourceLineWriter()162 PDBSourceLineWriter::~PDBSourceLineWriter() {
163 Close();
164 }
165
SetCodeFile(const wstring & exe_file)166 bool PDBSourceLineWriter::SetCodeFile(const wstring &exe_file) {
167 if (code_file_.empty()) {
168 code_file_ = exe_file;
169 return true;
170 }
171 // Setting a different code file path is an error. It is success only if the
172 // file paths are the same.
173 return exe_file == code_file_;
174 }
175
Open(const wstring & file,FileFormat format)176 bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) {
177 Close();
178 code_file_.clear();
179
180 if (FAILED(CoInitialize(NULL))) {
181 fprintf(stderr, "CoInitialize failed\n");
182 return false;
183 }
184
185 CComPtr<IDiaDataSource> data_source;
186 if (!CreateDiaDataSourceInstance(data_source)) {
187 const int kGuidSize = 64;
188 wchar_t classid[kGuidSize] = {0};
189 StringFromGUID2(CLSID_DiaSource, classid, kGuidSize);
190 fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed "
191 "(msdia*.dll unregistered?)\n", classid);
192 return false;
193 }
194
195 switch (format) {
196 case PDB_FILE:
197 if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
198 fprintf(stderr, "loadDataFromPdb failed for %ws\n", file.c_str());
199 return false;
200 }
201 break;
202 case EXE_FILE:
203 if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
204 fprintf(stderr, "loadDataForExe failed for %ws\n", file.c_str());
205 return false;
206 }
207 code_file_ = file;
208 break;
209 case ANY_FILE:
210 if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
211 if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
212 fprintf(stderr, "loadDataForPdb and loadDataFromExe failed for %ws\n",
213 file.c_str());
214 return false;
215 }
216 code_file_ = file;
217 }
218 break;
219 default:
220 fprintf(stderr, "Unknown file format\n");
221 return false;
222 }
223
224 if (FAILED(data_source->openSession(&session_))) {
225 fprintf(stderr, "openSession failed\n");
226 }
227
228 return true;
229 }
230
PrintLines(IDiaEnumLineNumbers * lines)231 bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) {
232 // The line number format is:
233 // <rva> <line number> <source file id>
234 CComPtr<IDiaLineNumber> line;
235 ULONG count;
236
237 while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
238 DWORD rva;
239 if (FAILED(line->get_relativeVirtualAddress(&rva))) {
240 fprintf(stderr, "failed to get line rva\n");
241 return false;
242 }
243
244 DWORD length;
245 if (FAILED(line->get_length(&length))) {
246 fprintf(stderr, "failed to get line code length\n");
247 return false;
248 }
249
250 DWORD dia_source_id;
251 if (FAILED(line->get_sourceFileId(&dia_source_id))) {
252 fprintf(stderr, "failed to get line source file id\n");
253 return false;
254 }
255 // duplicate file names are coalesced to share one ID
256 DWORD source_id = GetRealFileID(dia_source_id);
257
258 DWORD line_num;
259 if (FAILED(line->get_lineNumber(&line_num))) {
260 fprintf(stderr, "failed to get line number\n");
261 return false;
262 }
263
264 AddressRangeVector ranges;
265 MapAddressRange(image_map_, AddressRange(rva, length), &ranges);
266 for (size_t i = 0; i < ranges.size(); ++i) {
267 fprintf(output_, "%lx %lx %lu %lu\n", ranges[i].rva, ranges[i].length,
268 line_num, source_id);
269 }
270 line.Release();
271 }
272 return true;
273 }
274
PrintFunction(IDiaSymbol * function,IDiaSymbol * block,bool has_multiple_symbols)275 bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function,
276 IDiaSymbol *block,
277 bool has_multiple_symbols) {
278 // The function format is:
279 // FUNC <address> <length> <param_stack_size> <function>
280 DWORD rva;
281 if (FAILED(block->get_relativeVirtualAddress(&rva))) {
282 fprintf(stderr, "couldn't get rva\n");
283 return false;
284 }
285
286 ULONGLONG length;
287 if (FAILED(block->get_length(&length))) {
288 fprintf(stderr, "failed to get function length\n");
289 return false;
290 }
291
292 if (length == 0) {
293 // Silently ignore zero-length functions, which can infrequently pop up.
294 return true;
295 }
296
297 CComBSTR name;
298 int stack_param_size;
299 if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
300 return false;
301 }
302
303 // If the decorated name didn't give the parameter size, try to
304 // calculate it.
305 if (stack_param_size < 0) {
306 stack_param_size = GetFunctionStackParamSize(function);
307 }
308
309 AddressRangeVector ranges;
310 MapAddressRange(image_map_, AddressRange(rva, static_cast<DWORD>(length)),
311 &ranges);
312 for (size_t i = 0; i < ranges.size(); ++i) {
313 const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
314 fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field,
315 ranges[i].rva, ranges[i].length, stack_param_size, name.m_str);
316 }
317
318 CComPtr<IDiaEnumLineNumbers> lines;
319 if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
320 return false;
321 }
322
323 if (!PrintLines(lines)) {
324 return false;
325 }
326 return true;
327 }
328
PrintSourceFiles()329 bool PDBSourceLineWriter::PrintSourceFiles() {
330 CComPtr<IDiaSymbol> global;
331 if (FAILED(session_->get_globalScope(&global))) {
332 fprintf(stderr, "get_globalScope failed\n");
333 return false;
334 }
335
336 CComPtr<IDiaEnumSymbols> compilands;
337 if (FAILED(global->findChildren(SymTagCompiland, NULL,
338 nsNone, &compilands))) {
339 fprintf(stderr, "findChildren failed\n");
340 return false;
341 }
342
343 CComPtr<IDiaSymbol> compiland;
344 ULONG count;
345 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
346 CComPtr<IDiaEnumSourceFiles> source_files;
347 if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
348 return false;
349 }
350 CComPtr<IDiaSourceFile> file;
351 while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
352 DWORD file_id;
353 if (FAILED(file->get_uniqueId(&file_id))) {
354 return false;
355 }
356
357 CComBSTR file_name;
358 if (FAILED(file->get_fileName(&file_name))) {
359 return false;
360 }
361
362 wstring file_name_string(file_name);
363 if (!FileIDIsCached(file_name_string)) {
364 // this is a new file name, cache it and output a FILE line.
365 CacheFileID(file_name_string, file_id);
366 fwprintf(output_, L"FILE %d %ws\n", file_id, file_name_string.c_str());
367 } else {
368 // this file name has already been seen, just save this
369 // ID for later lookup.
370 StoreDuplicateFileID(file_name_string, file_id);
371 }
372 file.Release();
373 }
374 compiland.Release();
375 }
376 return true;
377 }
378
PrintFunctions()379 bool PDBSourceLineWriter::PrintFunctions() {
380 ULONG count = 0;
381 DWORD rva = 0;
382 CComPtr<IDiaSymbol> global;
383 HRESULT hr;
384
385 if (FAILED(session_->get_globalScope(&global))) {
386 fprintf(stderr, "get_globalScope failed\n");
387 return false;
388 }
389
390 CComPtr<IDiaEnumSymbols> symbols = NULL;
391
392 // Find all function symbols first.
393 SymbolMap rva_symbol;
394 hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols);
395
396 if (SUCCEEDED(hr)) {
397 CComPtr<IDiaSymbol> symbol = NULL;
398
399 while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
400 if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
401 // Potentially record this as the canonical symbol for this rva.
402 MaybeRecordSymbol(rva, symbol, false, &rva_symbol);
403 } else {
404 fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
405 return false;
406 }
407
408 symbol.Release();
409 }
410
411 symbols.Release();
412 }
413
414 // Find all public symbols and record public symbols that are not also private
415 // symbols.
416 hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols);
417
418 if (SUCCEEDED(hr)) {
419 CComPtr<IDiaSymbol> symbol = NULL;
420
421 while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
422 if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
423 // Potentially record this as the canonical symbol for this rva.
424 MaybeRecordSymbol(rva, symbol, true, &rva_symbol);
425 } else {
426 fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
427 return false;
428 }
429
430 symbol.Release();
431 }
432
433 symbols.Release();
434 }
435
436 // For each rva, dump the selected symbol at the address.
437 SymbolMap::iterator it;
438 for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) {
439 CComPtr<IDiaSymbol> symbol = it->second.symbol;
440 // Only print public symbols if there is no function symbol for the address.
441 if (!it->second.is_public) {
442 if (!PrintFunction(symbol, symbol, it->second.is_multiple))
443 return false;
444 } else {
445 if (!PrintCodePublicSymbol(symbol, it->second.is_multiple))
446 return false;
447 }
448 }
449
450 // When building with PGO, the compiler can split functions into
451 // "hot" and "cold" blocks, and move the "cold" blocks out to separate
452 // pages, so the function can be noncontiguous. To find these blocks,
453 // we have to iterate over all the compilands, and then find blocks
454 // that are children of them. We can then find the lexical parents
455 // of those blocks and print out an extra FUNC line for blocks
456 // that are not contained in their parent functions.
457 CComPtr<IDiaEnumSymbols> compilands;
458 if (FAILED(global->findChildren(SymTagCompiland, NULL,
459 nsNone, &compilands))) {
460 fprintf(stderr, "findChildren failed on the global\n");
461 return false;
462 }
463
464 CComPtr<IDiaSymbol> compiland;
465 while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
466 CComPtr<IDiaEnumSymbols> blocks;
467 if (FAILED(compiland->findChildren(SymTagBlock, NULL,
468 nsNone, &blocks))) {
469 fprintf(stderr, "findChildren failed on a compiland\n");
470 return false;
471 }
472
473 CComPtr<IDiaSymbol> block;
474 while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) {
475 // find this block's lexical parent function
476 CComPtr<IDiaSymbol> parent;
477 DWORD tag;
478 if (SUCCEEDED(block->get_lexicalParent(&parent)) &&
479 SUCCEEDED(parent->get_symTag(&tag)) &&
480 tag == SymTagFunction) {
481 // now get the block's offset and the function's offset and size,
482 // and determine if the block is outside of the function
483 DWORD func_rva, block_rva;
484 ULONGLONG func_length;
485 if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) &&
486 SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) &&
487 SUCCEEDED(parent->get_length(&func_length))) {
488 if (block_rva < func_rva || block_rva > (func_rva + func_length)) {
489 if (!PrintFunction(parent, block, false)) {
490 return false;
491 }
492 }
493 }
494 }
495 parent.Release();
496 block.Release();
497 }
498 blocks.Release();
499 compiland.Release();
500 }
501
502 global.Release();
503 return true;
504 }
505
506 #undef max
507
PrintFrameDataUsingPDB()508 bool PDBSourceLineWriter::PrintFrameDataUsingPDB() {
509 // It would be nice if it were possible to output frame data alongside the
510 // associated function, as is done with line numbers, but the DIA API
511 // doesn't make it possible to get the frame data in that way.
512
513 CComPtr<IDiaEnumFrameData> frame_data_enum;
514 if (!FindTable(session_, &frame_data_enum))
515 return false;
516
517 DWORD last_type = std::numeric_limits<DWORD>::max();
518 DWORD last_rva = std::numeric_limits<DWORD>::max();
519 DWORD last_code_size = 0;
520 DWORD last_prolog_size = std::numeric_limits<DWORD>::max();
521
522 CComPtr<IDiaFrameData> frame_data;
523 ULONG count = 0;
524 while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
525 count == 1) {
526 DWORD type;
527 if (FAILED(frame_data->get_type(&type)))
528 return false;
529
530 DWORD rva;
531 if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
532 return false;
533
534 DWORD code_size;
535 if (FAILED(frame_data->get_lengthBlock(&code_size)))
536 return false;
537
538 DWORD prolog_size;
539 if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
540 return false;
541
542 // parameter_size is the size of parameters passed on the stack. If any
543 // parameters are not passed on the stack (such as in registers), their
544 // sizes will not be included in parameter_size.
545 DWORD parameter_size;
546 if (FAILED(frame_data->get_lengthParams(¶meter_size)))
547 return false;
548
549 DWORD saved_register_size;
550 if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
551 return false;
552
553 DWORD local_size;
554 if (FAILED(frame_data->get_lengthLocals(&local_size)))
555 return false;
556
557 // get_maxStack can return S_FALSE, just use 0 in that case.
558 DWORD max_stack_size = 0;
559 if (FAILED(frame_data->get_maxStack(&max_stack_size)))
560 return false;
561
562 // get_programString can return S_FALSE, indicating that there is no
563 // program string. In that case, check whether %ebp is used.
564 HRESULT program_string_result;
565 CComBSTR program_string;
566 if (FAILED(program_string_result = frame_data->get_program(
567 &program_string))) {
568 return false;
569 }
570
571 // get_allocatesBasePointer can return S_FALSE, treat that as though
572 // %ebp is not used.
573 BOOL allocates_base_pointer = FALSE;
574 if (program_string_result != S_OK) {
575 if (FAILED(frame_data->get_allocatesBasePointer(
576 &allocates_base_pointer))) {
577 return false;
578 }
579 }
580
581 // Only print out a line if type, rva, code_size, or prolog_size have
582 // changed from the last line. It is surprisingly common (especially in
583 // system library PDBs) for DIA to return a series of identical
584 // IDiaFrameData objects. For kernel32.pdb from Windows XP SP2 on x86,
585 // this check reduces the size of the dumped symbol file by a third.
586 if (type != last_type || rva != last_rva || code_size != last_code_size ||
587 prolog_size != last_prolog_size) {
588 // The prolog and the code portions of the frame have to be treated
589 // independently as they may have independently changed in size, or may
590 // even have been split.
591 // NOTE: If epilog size is ever non-zero, we have to do something
592 // similar with it.
593
594 // Figure out where the prolog bytes have landed.
595 AddressRangeVector prolog_ranges;
596 if (prolog_size > 0) {
597 MapAddressRange(image_map_, AddressRange(rva, prolog_size),
598 &prolog_ranges);
599 }
600
601 // And figure out where the code bytes have landed.
602 AddressRangeVector code_ranges;
603 MapAddressRange(image_map_,
604 AddressRange(rva + prolog_size,
605 code_size - prolog_size),
606 &code_ranges);
607
608 struct FrameInfo {
609 DWORD rva;
610 DWORD code_size;
611 DWORD prolog_size;
612 };
613 std::vector<FrameInfo> frame_infos;
614
615 // Special case: The prolog and the code bytes remain contiguous. This is
616 // only done for compactness of the symbol file, and we could actually
617 // be outputting independent frame info for the prolog and code portions.
618 if (prolog_ranges.size() == 1 && code_ranges.size() == 1 &&
619 prolog_ranges[0].end() == code_ranges[0].rva) {
620 FrameInfo fi = { prolog_ranges[0].rva,
621 prolog_ranges[0].length + code_ranges[0].length,
622 prolog_ranges[0].length };
623 frame_infos.push_back(fi);
624 } else {
625 // Otherwise we output the prolog and code frame info independently.
626 for (size_t i = 0; i < prolog_ranges.size(); ++i) {
627 FrameInfo fi = { prolog_ranges[i].rva,
628 prolog_ranges[i].length,
629 prolog_ranges[i].length };
630 frame_infos.push_back(fi);
631 }
632 for (size_t i = 0; i < code_ranges.size(); ++i) {
633 FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 };
634 frame_infos.push_back(fi);
635 }
636 }
637
638 for (size_t i = 0; i < frame_infos.size(); ++i) {
639 const FrameInfo& fi(frame_infos[i]);
640 fprintf(output_, "STACK WIN %lx %lx %lx %lx %x %lx %lx %lx %lx %d ",
641 type, fi.rva, fi.code_size, fi.prolog_size,
642 0 /* epilog_size */, parameter_size, saved_register_size,
643 local_size, max_stack_size, program_string_result == S_OK);
644 if (program_string_result == S_OK) {
645 fprintf(output_, "%ws\n", program_string.m_str);
646 } else {
647 fprintf(output_, "%d\n", allocates_base_pointer);
648 }
649 }
650
651 last_type = type;
652 last_rva = rva;
653 last_code_size = code_size;
654 last_prolog_size = prolog_size;
655 }
656
657 frame_data.Release();
658 }
659
660 return true;
661 }
662
PrintFrameDataUsingEXE()663 bool PDBSourceLineWriter::PrintFrameDataUsingEXE() {
664 if (code_file_.empty() && !FindPEFile()) {
665 fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
666 return false;
667 }
668
669 return PrintPEFrameData(code_file_, output_);
670 }
671
PrintFrameData()672 bool PDBSourceLineWriter::PrintFrameData() {
673 PDBModuleInfo info;
674 if (GetModuleInfo(&info) && info.cpu == L"x86_64") {
675 return PrintFrameDataUsingEXE();
676 } else {
677 return PrintFrameDataUsingPDB();
678 }
679 return false;
680 }
681
PrintCodePublicSymbol(IDiaSymbol * symbol,bool has_multiple_symbols)682 bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol,
683 bool has_multiple_symbols) {
684 BOOL is_code;
685 if (FAILED(symbol->get_code(&is_code))) {
686 return false;
687 }
688 if (!is_code) {
689 return true;
690 }
691
692 DWORD rva;
693 if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
694 return false;
695 }
696
697 CComBSTR name;
698 int stack_param_size;
699 if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
700 return false;
701 }
702
703 AddressRangeVector ranges;
704 MapAddressRange(image_map_, AddressRange(rva, 1), &ranges);
705 for (size_t i = 0; i < ranges.size(); ++i) {
706 const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
707 fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field,
708 ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0,
709 name.m_str);
710 }
711
712 // Now walk the function in the original untranslated space, asking DIA
713 // what function is at that location, stepping through OMAP blocks. If
714 // we're still in the same function, emit another entry, because the
715 // symbol could have been split into multiple pieces. If we've gotten to
716 // another symbol in the original address space, then we're done for
717 // this symbol. See https://crbug.com/678874.
718 for (;;) {
719 // This steps to the next block in the original image. Simply doing
720 // rva++ would also be correct, but would emit tons of unnecessary
721 // entries.
722 rva = image_map_.subsequent_rva_block[rva];
723 if (rva == 0)
724 break;
725
726 CComPtr<IDiaSymbol> next_sym = NULL;
727 LONG displacement;
728 if (FAILED(session_->findSymbolByRVAEx(rva, SymTagPublicSymbol, &next_sym,
729 &displacement))) {
730 break;
731 }
732
733 if (!SymbolsMatch(symbol, next_sym))
734 break;
735
736 AddressRangeVector next_ranges;
737 MapAddressRange(image_map_, AddressRange(rva, 1), &next_ranges);
738 for (size_t i = 0; i < next_ranges.size(); ++i) {
739 fprintf(output_, "PUBLIC %lx %x %ws\n", next_ranges[i].rva,
740 stack_param_size > 0 ? stack_param_size : 0, name.m_str);
741 }
742 }
743
744 return true;
745 }
746
PrintPDBInfo()747 bool PDBSourceLineWriter::PrintPDBInfo() {
748 PDBModuleInfo info;
749 if (!GetModuleInfo(&info)) {
750 return false;
751 }
752
753 // Hard-code "windows" for the OS because that's the only thing that makes
754 // sense for PDB files. (This might not be strictly correct for Windows CE
755 // support, but we don't care about that at the moment.)
756 fprintf(output_, "MODULE windows %ws %ws %ws\n",
757 info.cpu.c_str(), info.debug_identifier.c_str(),
758 info.debug_file.c_str());
759
760 return true;
761 }
762
PrintPEInfo()763 bool PDBSourceLineWriter::PrintPEInfo() {
764 PEModuleInfo info;
765 if (!GetPEInfo(&info)) {
766 return false;
767 }
768
769 fprintf(output_, "INFO CODE_ID %ws %ws\n",
770 info.code_identifier.c_str(),
771 info.code_file.c_str());
772 return true;
773 }
774
775 // wcstol_positive_strict is sort of like wcstol, but much stricter. string
776 // should be a buffer pointing to a null-terminated string containing only
777 // decimal digits. If the entire string can be converted to an integer
778 // without overflowing, and there are no non-digit characters before the
779 // result is set to the value and this function returns true. Otherwise,
780 // this function returns false. This is an alternative to the strtol, atoi,
781 // and scanf families, which are not as strict about input and in some cases
782 // don't provide a good way for the caller to determine if a conversion was
783 // successful.
wcstol_positive_strict(wchar_t * string,int * result)784 static bool wcstol_positive_strict(wchar_t *string, int *result) {
785 int value = 0;
786 for (wchar_t *c = string; *c != '\0'; ++c) {
787 int last_value = value;
788 value *= 10;
789 // Detect overflow.
790 if (value / 10 != last_value || value < 0) {
791 return false;
792 }
793 if (*c < '0' || *c > '9') {
794 return false;
795 }
796 unsigned int c_value = *c - '0';
797 last_value = value;
798 value += c_value;
799 // Detect overflow.
800 if (value < last_value) {
801 return false;
802 }
803 // Forbid leading zeroes unless the string is just "0".
804 if (value == 0 && *(c+1) != '\0') {
805 return false;
806 }
807 }
808 *result = value;
809 return true;
810 }
811
FindPEFile()812 bool PDBSourceLineWriter::FindPEFile() {
813 CComPtr<IDiaSymbol> global;
814 if (FAILED(session_->get_globalScope(&global))) {
815 fprintf(stderr, "get_globalScope failed\n");
816 return false;
817 }
818
819 CComBSTR symbols_file;
820 if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) {
821 wstring file(symbols_file);
822
823 // Look for an EXE or DLL file.
824 const wchar_t *extensions[] = { L"exe", L"dll" };
825 for (size_t i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) {
826 size_t dot_pos = file.find_last_of(L".");
827 if (dot_pos != wstring::npos) {
828 file.replace(dot_pos + 1, wstring::npos, extensions[i]);
829 // Check if this file exists.
830 if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) {
831 code_file_ = file;
832 return true;
833 }
834 }
835 }
836 }
837
838 return false;
839 }
840
841 // static
GetSymbolFunctionName(IDiaSymbol * function,BSTR * name,int * stack_param_size)842 bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol *function,
843 BSTR *name,
844 int *stack_param_size) {
845 *stack_param_size = -1;
846 const DWORD undecorate_options = UNDNAME_NO_MS_KEYWORDS |
847 UNDNAME_NO_FUNCTION_RETURNS |
848 UNDNAME_NO_ALLOCATION_MODEL |
849 UNDNAME_NO_ALLOCATION_LANGUAGE |
850 UNDNAME_NO_THISTYPE |
851 UNDNAME_NO_ACCESS_SPECIFIERS |
852 UNDNAME_NO_THROW_SIGNATURES |
853 UNDNAME_NO_MEMBER_TYPE |
854 UNDNAME_NO_RETURN_UDT_MODEL |
855 UNDNAME_NO_ECSU;
856
857 // Use get_undecoratedNameEx to get readable C++ names with arguments.
858 if (function->get_undecoratedNameEx(undecorate_options, name) != S_OK) {
859 if (function->get_name(name) != S_OK) {
860 fprintf(stderr, "failed to get function name\n");
861 return false;
862 }
863
864 // It's possible for get_name to return an empty string, so
865 // special-case that.
866 if (wcscmp(*name, L"") == 0) {
867 SysFreeString(*name);
868 // dwarf_cu_to_module.cc uses "<name omitted>", so match that.
869 *name = SysAllocString(L"<name omitted>");
870 return true;
871 }
872
873 // If a name comes from get_name because no undecorated form existed,
874 // it's already formatted properly to be used as output. Don't do any
875 // additional processing.
876 //
877 // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's.
878 // This will result in calling get_name for some C++ symbols, so
879 // all of the parameter and return type information may not be included in
880 // the name string.
881 } else {
882 // C++ uses a bogus "void" argument for functions and methods that don't
883 // take any parameters. Take it out of the undecorated name because it's
884 // ugly and unnecessary.
885 const wchar_t *replace_string = L"(void)";
886 const size_t replace_length = wcslen(replace_string);
887 const wchar_t *replacement_string = L"()";
888 size_t length = wcslen(*name);
889 if (length >= replace_length) {
890 wchar_t *name_end = *name + length - replace_length;
891 if (wcscmp(name_end, replace_string) == 0) {
892 WindowsStringUtils::safe_wcscpy(name_end, replace_length,
893 replacement_string);
894 length = wcslen(*name);
895 }
896 }
897
898 // Undecorate names used for stdcall and fastcall. These names prefix
899 // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
900 // with '@' followed by the number of bytes of parameters, in decimal.
901 // If such a name is found, take note of the size and undecorate it.
902 // Only do this for names that aren't C++, which is determined based on
903 // whether the undecorated name contains any ':' or '(' characters.
904 if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
905 (*name[0] == '_' || *name[0] == '@')) {
906 wchar_t *last_at = wcsrchr(*name + 1, '@');
907 if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
908 // If this function adheres to the fastcall convention, it accepts up
909 // to the first 8 bytes of parameters in registers (%ecx and %edx).
910 // We're only interested in the stack space used for parameters, so
911 // so subtract 8 and don't let the size go below 0.
912 if (*name[0] == '@') {
913 if (*stack_param_size > 8) {
914 *stack_param_size -= 8;
915 } else {
916 *stack_param_size = 0;
917 }
918 }
919
920 // Undecorate the name by moving it one character to the left in its
921 // buffer, and terminating it where the last '@' had been.
922 WindowsStringUtils::safe_wcsncpy(*name, length,
923 *name + 1, last_at - *name - 1);
924 } else if (*name[0] == '_') {
925 // This symbol's name is encoded according to the cdecl rules. The
926 // name doesn't end in a '@' character followed by a decimal positive
927 // integer, so it's not a stdcall name. Strip off the leading
928 // underscore.
929 WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length);
930 }
931 }
932 }
933
934 return true;
935 }
936
937 // static
GetFunctionStackParamSize(IDiaSymbol * function)938 int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol *function) {
939 // This implementation is highly x86-specific.
940
941 // Gather the symbols corresponding to data.
942 CComPtr<IDiaEnumSymbols> data_children;
943 if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
944 &data_children))) {
945 return 0;
946 }
947
948 // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
949 // highest_end is one greater than the highest offset (i.e. base + length).
950 // Stack parameters are assumed to be contiguous, because in reality, they
951 // are.
952 int lowest_base = INT_MAX;
953 int highest_end = INT_MIN;
954
955 CComPtr<IDiaSymbol> child;
956 DWORD count;
957 while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
958 // If any operation fails at this point, just proceed to the next child.
959 // Use the next_child label instead of continue because child needs to
960 // be released before it's reused. Declare constructable/destructable
961 // types early to avoid gotos that cross initializations.
962 CComPtr<IDiaSymbol> child_type;
963
964 // DataIsObjectPtr is only used for |this|. Because |this| can be passed
965 // as a stack parameter, look for it in addition to traditional
966 // parameters.
967 DWORD child_kind;
968 if (FAILED(child->get_dataKind(&child_kind)) ||
969 (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
970 goto next_child;
971 }
972
973 // Only concentrate on register-relative parameters. Parameters may also
974 // be enregistered (passed directly in a register), but those don't
975 // consume any stack space, so they're not of interest.
976 DWORD child_location_type;
977 if (FAILED(child->get_locationType(&child_location_type)) ||
978 child_location_type != LocIsRegRel) {
979 goto next_child;
980 }
981
982 // Of register-relative parameters, the only ones that make any sense are
983 // %ebp- or %esp-relative. Note that MSVC's debugging information always
984 // gives parameters as %ebp-relative even when a function doesn't use a
985 // traditional frame pointer and stack parameters are accessed relative to
986 // %esp, so just look for %ebp-relative parameters. If you wanted to
987 // access parameters, you'd probably want to treat these %ebp-relative
988 // offsets as if they were relative to %esp before a function's prolog
989 // executed.
990 DWORD child_register;
991 if (FAILED(child->get_registerId(&child_register)) ||
992 child_register != CV_REG_EBP) {
993 goto next_child;
994 }
995
996 LONG child_register_offset;
997 if (FAILED(child->get_offset(&child_register_offset))) {
998 goto next_child;
999 }
1000
1001 // IDiaSymbol::get_type can succeed but still pass back a NULL value.
1002 if (FAILED(child->get_type(&child_type)) || !child_type) {
1003 goto next_child;
1004 }
1005
1006 ULONGLONG child_length;
1007 if (FAILED(child_type->get_length(&child_length))) {
1008 goto next_child;
1009 }
1010
1011 // Extra scope to avoid goto jumping over variable initialization
1012 {
1013 int child_end = child_register_offset + static_cast<ULONG>(child_length);
1014 if (child_register_offset < lowest_base) {
1015 lowest_base = child_register_offset;
1016 }
1017 if (child_end > highest_end) {
1018 highest_end = child_end;
1019 }
1020 }
1021
1022 next_child:
1023 child.Release();
1024 }
1025
1026 int param_size = 0;
1027 // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
1028 // possible address to find a stack parameter before executing a function's
1029 // prolog (see above). Some optimizations cause parameter offsets to be
1030 // lower than 4, but we're not concerned with those because we're only
1031 // looking for parameters contained in addresses higher than where the
1032 // return address is stored.
1033 if (lowest_base < 4) {
1034 lowest_base = 4;
1035 }
1036 if (highest_end > lowest_base) {
1037 // All stack parameters are pushed as at least 4-byte quantities. If the
1038 // last type was narrower than 4 bytes, promote it. This assumes that all
1039 // parameters' offsets are 4-byte-aligned, which is always the case. Only
1040 // worry about the last type, because we're not summing the type sizes,
1041 // just looking at the lowest and highest offsets.
1042 int remainder = highest_end % 4;
1043 if (remainder) {
1044 highest_end += 4 - remainder;
1045 }
1046
1047 param_size = highest_end - lowest_base;
1048 }
1049
1050 return param_size;
1051 }
1052
WriteSymbols(FILE * symbol_file)1053 bool PDBSourceLineWriter::WriteSymbols(FILE *symbol_file) {
1054 output_ = symbol_file;
1055
1056 // Load the OMAP information, and disable auto-translation of addresses in
1057 // preference of doing it ourselves.
1058 OmapData omap_data;
1059 if (!GetOmapDataAndDisableTranslation(session_, &omap_data))
1060 return false;
1061 BuildImageMap(omap_data, &image_map_);
1062
1063 bool ret = PrintPDBInfo();
1064 // This is not a critical piece of the symbol file.
1065 PrintPEInfo();
1066 ret = ret &&
1067 PrintSourceFiles() &&
1068 PrintFunctions() &&
1069 PrintFrameData();
1070
1071 output_ = NULL;
1072 return ret;
1073 }
1074
Close()1075 void PDBSourceLineWriter::Close() {
1076 if (session_ != nullptr) {
1077 session_.Release();
1078 }
1079 }
1080
GetModuleInfo(PDBModuleInfo * info)1081 bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo *info) {
1082 if (!info) {
1083 return false;
1084 }
1085
1086 info->debug_file.clear();
1087 info->debug_identifier.clear();
1088 info->cpu.clear();
1089
1090 CComPtr<IDiaSymbol> global;
1091 if (FAILED(session_->get_globalScope(&global))) {
1092 return false;
1093 }
1094
1095 DWORD machine_type;
1096 // get_machineType can return S_FALSE.
1097 if (global->get_machineType(&machine_type) == S_OK) {
1098 // The documentation claims that get_machineType returns a value from
1099 // the CV_CPU_TYPE_e enumeration, but that's not the case.
1100 // Instead, it returns one of the IMAGE_FILE_MACHINE values as
1101 // defined here:
1102 // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx
1103 info->cpu = FileHeaderMachineToCpuString(static_cast<WORD>(machine_type));
1104 } else {
1105 // Unexpected, but handle gracefully.
1106 info->cpu = L"unknown";
1107 }
1108
1109 // DWORD* and int* are not compatible. This is clean and avoids a cast.
1110 DWORD age;
1111 if (FAILED(global->get_age(&age))) {
1112 return false;
1113 }
1114
1115 bool uses_guid;
1116 if (!UsesGUID(&uses_guid)) {
1117 return false;
1118 }
1119
1120 if (uses_guid) {
1121 GUID guid;
1122 if (FAILED(global->get_guid(&guid))) {
1123 return false;
1124 }
1125
1126 info->debug_identifier = GenerateDebugIdentifier(age, guid);
1127 } else {
1128 DWORD signature;
1129 if (FAILED(global->get_signature(&signature))) {
1130 return false;
1131 }
1132
1133 info->debug_identifier = GenerateDebugIdentifier(age, signature);
1134 }
1135
1136 CComBSTR debug_file_string;
1137 if (FAILED(global->get_symbolsFileName(&debug_file_string))) {
1138 return false;
1139 }
1140 info->debug_file =
1141 WindowsStringUtils::GetBaseName(wstring(debug_file_string));
1142
1143 return true;
1144 }
1145
GetPEInfo(PEModuleInfo * info)1146 bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo *info) {
1147 if (!info) {
1148 return false;
1149 }
1150
1151 if (code_file_.empty() && !FindPEFile()) {
1152 fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1153 return false;
1154 }
1155
1156 return ReadPEInfo(code_file_, info);
1157 }
1158
UsesGUID(bool * uses_guid)1159 bool PDBSourceLineWriter::UsesGUID(bool *uses_guid) {
1160 if (!uses_guid)
1161 return false;
1162
1163 CComPtr<IDiaSymbol> global;
1164 if (FAILED(session_->get_globalScope(&global)))
1165 return false;
1166
1167 GUID guid;
1168 if (FAILED(global->get_guid(&guid)))
1169 return false;
1170
1171 DWORD signature;
1172 if (FAILED(global->get_signature(&signature)))
1173 return false;
1174
1175 // There are two possibilities for guid: either it's a real 128-bit GUID
1176 // as identified in a code module by a new-style CodeView record, or it's
1177 // a 32-bit signature (timestamp) as identified by an old-style record.
1178 // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h.
1179 //
1180 // Because DIA doesn't provide a way to directly determine whether a module
1181 // uses a GUID or a 32-bit signature, this code checks whether the first 32
1182 // bits of guid are the same as the signature, and if the rest of guid is
1183 // zero. If so, then with a pretty high degree of certainty, there's an
1184 // old-style CodeView record in use. This method will only falsely find an
1185 // an old-style CodeView record if a real 128-bit GUID has its first 32
1186 // bits set the same as the module's signature (timestamp) and the rest of
1187 // the GUID is set to 0. This is highly unlikely.
1188
1189 GUID signature_guid = {signature}; // 0-initializes other members
1190 *uses_guid = !IsEqualGUID(guid, signature_guid);
1191 return true;
1192 }
1193
1194 } // namespace google_breakpad
1195