• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 #include "common/windows/pdb_source_line_writer.h"
31 
32 #include <windows.h>
33 #include <winnt.h>
34 #include <atlbase.h>
35 #include <dia2.h>
36 #include <diacreate.h>
37 #include <ImageHlp.h>
38 #include <stdio.h>
39 
40 #include <algorithm>
41 #include <limits>
42 #include <map>
43 #include <set>
44 #include <utility>
45 
46 #include "common/windows/dia_util.h"
47 #include "common/windows/guid_string.h"
48 #include "common/windows/pe_util.h"
49 #include "common/windows/string_utils-inl.h"
50 
51 // This constant may be missing from DbgHelp.h.  See the documentation for
52 // IDiaSymbol::get_undecoratedNameEx.
53 #ifndef UNDNAME_NO_ECSU
54 #define UNDNAME_NO_ECSU 0x8000  // Suppresses enum/class/struct/union.
55 #endif  // UNDNAME_NO_ECSU
56 
57 namespace google_breakpad {
58 
59 namespace {
60 
61 using std::vector;
62 
63 // The symbol (among possibly many) selected to represent an rva.
64 struct SelectedSymbol {
SelectedSymbolgoogle_breakpad::__anon36c75bf10111::SelectedSymbol65   SelectedSymbol(const CComPtr<IDiaSymbol>& symbol, bool is_public)
66       : symbol(symbol), is_public(is_public), is_multiple(false) {}
67 
68   // The symbol to use for an rva.
69   CComPtr<IDiaSymbol> symbol;
70   // Whether this is a public or function symbol.
71   bool is_public;
72   // Whether the rva has multiple associated symbols. An rva will correspond to
73   // multiple symbols in the case of linker identical symbol folding.
74   bool is_multiple;
75 };
76 
77 // Maps rva to the symbol to use for that address.
78 typedef std::map<DWORD, SelectedSymbol> SymbolMap;
79 
80 // Record this in the map as the selected symbol for the rva if it satisfies the
81 // necessary conditions.
MaybeRecordSymbol(DWORD rva,const CComPtr<IDiaSymbol> symbol,bool is_public,SymbolMap * map)82 void MaybeRecordSymbol(DWORD rva,
83                        const CComPtr<IDiaSymbol> symbol,
84                        bool is_public,
85                        SymbolMap* map) {
86   SymbolMap::iterator loc = map->find(rva);
87   if (loc == map->end()) {
88     map->insert(std::make_pair(rva, SelectedSymbol(symbol, is_public)));
89     return;
90   }
91 
92   // Prefer function symbols to public symbols.
93   if (is_public && !loc->second.is_public) {
94     return;
95   }
96 
97   loc->second.is_multiple = true;
98 
99   // Take the 'least' symbol by lexicographical order of the decorated name. We
100   // use the decorated rather than undecorated name because computing the latter
101   // is expensive.
102   BSTR current_name, new_name;
103   loc->second.symbol->get_name(&current_name);
104   symbol->get_name(&new_name);
105   if (wcscmp(new_name, current_name) < 0) {
106     loc->second.symbol = symbol;
107     loc->second.is_public = is_public;
108   }
109 }
110 
111 
112 
SymbolsMatch(IDiaSymbol * a,IDiaSymbol * b)113 bool SymbolsMatch(IDiaSymbol* a, IDiaSymbol* b) {
114   DWORD a_section, a_offset, b_section, b_offset;
115   if (FAILED(a->get_addressSection(&a_section)) ||
116       FAILED(a->get_addressOffset(&a_offset)) ||
117       FAILED(b->get_addressSection(&b_section)) ||
118       FAILED(b->get_addressOffset(&b_offset)))
119     return false;
120   return a_section == b_section && a_offset == b_offset;
121 }
122 
CreateDiaDataSourceInstance(CComPtr<IDiaDataSource> & data_source)123 bool CreateDiaDataSourceInstance(CComPtr<IDiaDataSource> &data_source) {
124   if (SUCCEEDED(data_source.CoCreateInstance(CLSID_DiaSource))) {
125     return true;
126   }
127 
128   class DECLSPEC_UUID("B86AE24D-BF2F-4ac9-B5A2-34B14E4CE11D") DiaSource100;
129   class DECLSPEC_UUID("761D3BCD-1304-41D5-94E8-EAC54E4AC172") DiaSource110;
130   class DECLSPEC_UUID("3BFCEA48-620F-4B6B-81F7-B9AF75454C7D") DiaSource120;
131   class DECLSPEC_UUID("E6756135-1E65-4D17-8576-610761398C3C") DiaSource140;
132 
133   // If the CoCreateInstance call above failed, msdia*.dll is not registered.
134   // We can try loading the DLL corresponding to the #included DIA SDK, but
135   // the DIA headers don't provide a version. Lets try to figure out which DIA
136   // version we're compiling against by comparing CLSIDs.
137   const wchar_t *msdia_dll = nullptr;
138   if (CLSID_DiaSource == _uuidof(DiaSource100)) {
139     msdia_dll = L"msdia100.dll";
140   } else if (CLSID_DiaSource == _uuidof(DiaSource110)) {
141     msdia_dll = L"msdia110.dll";
142   } else if (CLSID_DiaSource == _uuidof(DiaSource120)) {
143     msdia_dll = L"msdia120.dll";
144   } else if (CLSID_DiaSource == _uuidof(DiaSource140)) {
145     msdia_dll = L"msdia140.dll";
146   }
147 
148   if (msdia_dll &&
149       SUCCEEDED(NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource,
150                               reinterpret_cast<void **>(&data_source)))) {
151     return true;
152   }
153 
154   return false;
155 }
156 
157 }  // namespace
158 
PDBSourceLineWriter()159 PDBSourceLineWriter::PDBSourceLineWriter() : output_(NULL) {
160 }
161 
~PDBSourceLineWriter()162 PDBSourceLineWriter::~PDBSourceLineWriter() {
163   Close();
164 }
165 
SetCodeFile(const wstring & exe_file)166 bool PDBSourceLineWriter::SetCodeFile(const wstring &exe_file) {
167   if (code_file_.empty()) {
168     code_file_ = exe_file;
169     return true;
170   }
171   // Setting a different code file path is an error.  It is success only if the
172   // file paths are the same.
173   return exe_file == code_file_;
174 }
175 
Open(const wstring & file,FileFormat format)176 bool PDBSourceLineWriter::Open(const wstring &file, FileFormat format) {
177   Close();
178   code_file_.clear();
179 
180   if (FAILED(CoInitialize(NULL))) {
181     fprintf(stderr, "CoInitialize failed\n");
182     return false;
183   }
184 
185   CComPtr<IDiaDataSource> data_source;
186   if (!CreateDiaDataSourceInstance(data_source)) {
187     const int kGuidSize = 64;
188     wchar_t classid[kGuidSize] = {0};
189     StringFromGUID2(CLSID_DiaSource, classid, kGuidSize);
190     fprintf(stderr, "CoCreateInstance CLSID_DiaSource %S failed "
191             "(msdia*.dll unregistered?)\n", classid);
192     return false;
193   }
194 
195   switch (format) {
196     case PDB_FILE:
197       if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
198         fprintf(stderr, "loadDataFromPdb failed for %ws\n", file.c_str());
199         return false;
200       }
201       break;
202     case EXE_FILE:
203       if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
204         fprintf(stderr, "loadDataForExe failed for %ws\n", file.c_str());
205         return false;
206       }
207       code_file_ = file;
208       break;
209     case ANY_FILE:
210       if (FAILED(data_source->loadDataFromPdb(file.c_str()))) {
211         if (FAILED(data_source->loadDataForExe(file.c_str(), NULL, NULL))) {
212           fprintf(stderr, "loadDataForPdb and loadDataFromExe failed for %ws\n",
213                   file.c_str());
214           return false;
215         }
216         code_file_ = file;
217       }
218       break;
219     default:
220       fprintf(stderr, "Unknown file format\n");
221       return false;
222   }
223 
224   if (FAILED(data_source->openSession(&session_))) {
225     fprintf(stderr, "openSession failed\n");
226   }
227 
228   return true;
229 }
230 
PrintLines(IDiaEnumLineNumbers * lines)231 bool PDBSourceLineWriter::PrintLines(IDiaEnumLineNumbers *lines) {
232   // The line number format is:
233   // <rva> <line number> <source file id>
234   CComPtr<IDiaLineNumber> line;
235   ULONG count;
236 
237   while (SUCCEEDED(lines->Next(1, &line, &count)) && count == 1) {
238     DWORD rva;
239     if (FAILED(line->get_relativeVirtualAddress(&rva))) {
240       fprintf(stderr, "failed to get line rva\n");
241       return false;
242     }
243 
244     DWORD length;
245     if (FAILED(line->get_length(&length))) {
246       fprintf(stderr, "failed to get line code length\n");
247       return false;
248     }
249 
250     DWORD dia_source_id;
251     if (FAILED(line->get_sourceFileId(&dia_source_id))) {
252       fprintf(stderr, "failed to get line source file id\n");
253       return false;
254     }
255     // duplicate file names are coalesced to share one ID
256     DWORD source_id = GetRealFileID(dia_source_id);
257 
258     DWORD line_num;
259     if (FAILED(line->get_lineNumber(&line_num))) {
260       fprintf(stderr, "failed to get line number\n");
261       return false;
262     }
263 
264     AddressRangeVector ranges;
265     MapAddressRange(image_map_, AddressRange(rva, length), &ranges);
266     for (size_t i = 0; i < ranges.size(); ++i) {
267       fprintf(output_, "%lx %lx %lu %lu\n", ranges[i].rva, ranges[i].length,
268               line_num, source_id);
269     }
270     line.Release();
271   }
272   return true;
273 }
274 
PrintFunction(IDiaSymbol * function,IDiaSymbol * block,bool has_multiple_symbols)275 bool PDBSourceLineWriter::PrintFunction(IDiaSymbol *function,
276                                         IDiaSymbol *block,
277                                         bool has_multiple_symbols) {
278   // The function format is:
279   // FUNC <address> <length> <param_stack_size> <function>
280   DWORD rva;
281   if (FAILED(block->get_relativeVirtualAddress(&rva))) {
282     fprintf(stderr, "couldn't get rva\n");
283     return false;
284   }
285 
286   ULONGLONG length;
287   if (FAILED(block->get_length(&length))) {
288     fprintf(stderr, "failed to get function length\n");
289     return false;
290   }
291 
292   if (length == 0) {
293     // Silently ignore zero-length functions, which can infrequently pop up.
294     return true;
295   }
296 
297   CComBSTR name;
298   int stack_param_size;
299   if (!GetSymbolFunctionName(function, &name, &stack_param_size)) {
300     return false;
301   }
302 
303   // If the decorated name didn't give the parameter size, try to
304   // calculate it.
305   if (stack_param_size < 0) {
306     stack_param_size = GetFunctionStackParamSize(function);
307   }
308 
309   AddressRangeVector ranges;
310   MapAddressRange(image_map_, AddressRange(rva, static_cast<DWORD>(length)),
311                   &ranges);
312   for (size_t i = 0; i < ranges.size(); ++i) {
313     const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
314     fprintf(output_, "FUNC %s%lx %lx %x %ws\n", optional_multiple_field,
315             ranges[i].rva, ranges[i].length, stack_param_size, name.m_str);
316   }
317 
318   CComPtr<IDiaEnumLineNumbers> lines;
319   if (FAILED(session_->findLinesByRVA(rva, DWORD(length), &lines))) {
320     return false;
321   }
322 
323   if (!PrintLines(lines)) {
324     return false;
325   }
326   return true;
327 }
328 
PrintSourceFiles()329 bool PDBSourceLineWriter::PrintSourceFiles() {
330   CComPtr<IDiaSymbol> global;
331   if (FAILED(session_->get_globalScope(&global))) {
332     fprintf(stderr, "get_globalScope failed\n");
333     return false;
334   }
335 
336   CComPtr<IDiaEnumSymbols> compilands;
337   if (FAILED(global->findChildren(SymTagCompiland, NULL,
338                                   nsNone, &compilands))) {
339     fprintf(stderr, "findChildren failed\n");
340     return false;
341   }
342 
343   CComPtr<IDiaSymbol> compiland;
344   ULONG count;
345   while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
346     CComPtr<IDiaEnumSourceFiles> source_files;
347     if (FAILED(session_->findFile(compiland, NULL, nsNone, &source_files))) {
348       return false;
349     }
350     CComPtr<IDiaSourceFile> file;
351     while (SUCCEEDED(source_files->Next(1, &file, &count)) && count == 1) {
352       DWORD file_id;
353       if (FAILED(file->get_uniqueId(&file_id))) {
354         return false;
355       }
356 
357       CComBSTR file_name;
358       if (FAILED(file->get_fileName(&file_name))) {
359         return false;
360       }
361 
362       wstring file_name_string(file_name);
363       if (!FileIDIsCached(file_name_string)) {
364         // this is a new file name, cache it and output a FILE line.
365         CacheFileID(file_name_string, file_id);
366         fwprintf(output_, L"FILE %d %ws\n", file_id, file_name_string.c_str());
367       } else {
368         // this file name has already been seen, just save this
369         // ID for later lookup.
370         StoreDuplicateFileID(file_name_string, file_id);
371       }
372       file.Release();
373     }
374     compiland.Release();
375   }
376   return true;
377 }
378 
PrintFunctions()379 bool PDBSourceLineWriter::PrintFunctions() {
380   ULONG count = 0;
381   DWORD rva = 0;
382   CComPtr<IDiaSymbol> global;
383   HRESULT hr;
384 
385   if (FAILED(session_->get_globalScope(&global))) {
386     fprintf(stderr, "get_globalScope failed\n");
387     return false;
388   }
389 
390   CComPtr<IDiaEnumSymbols> symbols = NULL;
391 
392   // Find all function symbols first.
393   SymbolMap rva_symbol;
394   hr = global->findChildren(SymTagFunction, NULL, nsNone, &symbols);
395 
396   if (SUCCEEDED(hr)) {
397     CComPtr<IDiaSymbol> symbol = NULL;
398 
399     while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
400       if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
401         // Potentially record this as the canonical symbol for this rva.
402         MaybeRecordSymbol(rva, symbol, false, &rva_symbol);
403       } else {
404         fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
405         return false;
406       }
407 
408       symbol.Release();
409     }
410 
411     symbols.Release();
412   }
413 
414   // Find all public symbols and record public symbols that are not also private
415   // symbols.
416   hr = global->findChildren(SymTagPublicSymbol, NULL, nsNone, &symbols);
417 
418   if (SUCCEEDED(hr)) {
419     CComPtr<IDiaSymbol> symbol = NULL;
420 
421     while (SUCCEEDED(symbols->Next(1, &symbol, &count)) && count == 1) {
422       if (SUCCEEDED(symbol->get_relativeVirtualAddress(&rva))) {
423         // Potentially record this as the canonical symbol for this rva.
424         MaybeRecordSymbol(rva, symbol, true, &rva_symbol);
425       } else {
426         fprintf(stderr, "get_relativeVirtualAddress failed on the symbol\n");
427         return false;
428       }
429 
430       symbol.Release();
431     }
432 
433     symbols.Release();
434   }
435 
436   // For each rva, dump the selected symbol at the address.
437   SymbolMap::iterator it;
438   for (it = rva_symbol.begin(); it != rva_symbol.end(); ++it) {
439     CComPtr<IDiaSymbol> symbol = it->second.symbol;
440     // Only print public symbols if there is no function symbol for the address.
441     if (!it->second.is_public) {
442       if (!PrintFunction(symbol, symbol, it->second.is_multiple))
443         return false;
444     } else {
445       if (!PrintCodePublicSymbol(symbol, it->second.is_multiple))
446         return false;
447     }
448   }
449 
450   // When building with PGO, the compiler can split functions into
451   // "hot" and "cold" blocks, and move the "cold" blocks out to separate
452   // pages, so the function can be noncontiguous. To find these blocks,
453   // we have to iterate over all the compilands, and then find blocks
454   // that are children of them. We can then find the lexical parents
455   // of those blocks and print out an extra FUNC line for blocks
456   // that are not contained in their parent functions.
457   CComPtr<IDiaEnumSymbols> compilands;
458   if (FAILED(global->findChildren(SymTagCompiland, NULL,
459                                   nsNone, &compilands))) {
460     fprintf(stderr, "findChildren failed on the global\n");
461     return false;
462   }
463 
464   CComPtr<IDiaSymbol> compiland;
465   while (SUCCEEDED(compilands->Next(1, &compiland, &count)) && count == 1) {
466     CComPtr<IDiaEnumSymbols> blocks;
467     if (FAILED(compiland->findChildren(SymTagBlock, NULL,
468                                        nsNone, &blocks))) {
469       fprintf(stderr, "findChildren failed on a compiland\n");
470       return false;
471     }
472 
473     CComPtr<IDiaSymbol> block;
474     while (SUCCEEDED(blocks->Next(1, &block, &count)) && count == 1) {
475       // find this block's lexical parent function
476       CComPtr<IDiaSymbol> parent;
477       DWORD tag;
478       if (SUCCEEDED(block->get_lexicalParent(&parent)) &&
479           SUCCEEDED(parent->get_symTag(&tag)) &&
480           tag == SymTagFunction) {
481         // now get the block's offset and the function's offset and size,
482         // and determine if the block is outside of the function
483         DWORD func_rva, block_rva;
484         ULONGLONG func_length;
485         if (SUCCEEDED(block->get_relativeVirtualAddress(&block_rva)) &&
486             SUCCEEDED(parent->get_relativeVirtualAddress(&func_rva)) &&
487             SUCCEEDED(parent->get_length(&func_length))) {
488           if (block_rva < func_rva || block_rva > (func_rva + func_length)) {
489             if (!PrintFunction(parent, block, false)) {
490               return false;
491             }
492           }
493         }
494       }
495       parent.Release();
496       block.Release();
497     }
498     blocks.Release();
499     compiland.Release();
500   }
501 
502   global.Release();
503   return true;
504 }
505 
506 #undef max
507 
PrintFrameDataUsingPDB()508 bool PDBSourceLineWriter::PrintFrameDataUsingPDB() {
509   // It would be nice if it were possible to output frame data alongside the
510   // associated function, as is done with line numbers, but the DIA API
511   // doesn't make it possible to get the frame data in that way.
512 
513   CComPtr<IDiaEnumFrameData> frame_data_enum;
514   if (!FindTable(session_, &frame_data_enum))
515     return false;
516 
517   DWORD last_type = std::numeric_limits<DWORD>::max();
518   DWORD last_rva = std::numeric_limits<DWORD>::max();
519   DWORD last_code_size = 0;
520   DWORD last_prolog_size = std::numeric_limits<DWORD>::max();
521 
522   CComPtr<IDiaFrameData> frame_data;
523   ULONG count = 0;
524   while (SUCCEEDED(frame_data_enum->Next(1, &frame_data, &count)) &&
525          count == 1) {
526     DWORD type;
527     if (FAILED(frame_data->get_type(&type)))
528       return false;
529 
530     DWORD rva;
531     if (FAILED(frame_data->get_relativeVirtualAddress(&rva)))
532       return false;
533 
534     DWORD code_size;
535     if (FAILED(frame_data->get_lengthBlock(&code_size)))
536       return false;
537 
538     DWORD prolog_size;
539     if (FAILED(frame_data->get_lengthProlog(&prolog_size)))
540       return false;
541 
542     // parameter_size is the size of parameters passed on the stack.  If any
543     // parameters are not passed on the stack (such as in registers), their
544     // sizes will not be included in parameter_size.
545     DWORD parameter_size;
546     if (FAILED(frame_data->get_lengthParams(&parameter_size)))
547       return false;
548 
549     DWORD saved_register_size;
550     if (FAILED(frame_data->get_lengthSavedRegisters(&saved_register_size)))
551       return false;
552 
553     DWORD local_size;
554     if (FAILED(frame_data->get_lengthLocals(&local_size)))
555       return false;
556 
557     // get_maxStack can return S_FALSE, just use 0 in that case.
558     DWORD max_stack_size = 0;
559     if (FAILED(frame_data->get_maxStack(&max_stack_size)))
560       return false;
561 
562     // get_programString can return S_FALSE, indicating that there is no
563     // program string.  In that case, check whether %ebp is used.
564     HRESULT program_string_result;
565     CComBSTR program_string;
566     if (FAILED(program_string_result = frame_data->get_program(
567         &program_string))) {
568       return false;
569     }
570 
571     // get_allocatesBasePointer can return S_FALSE, treat that as though
572     // %ebp is not used.
573     BOOL allocates_base_pointer = FALSE;
574     if (program_string_result != S_OK) {
575       if (FAILED(frame_data->get_allocatesBasePointer(
576           &allocates_base_pointer))) {
577         return false;
578       }
579     }
580 
581     // Only print out a line if type, rva, code_size, or prolog_size have
582     // changed from the last line.  It is surprisingly common (especially in
583     // system library PDBs) for DIA to return a series of identical
584     // IDiaFrameData objects.  For kernel32.pdb from Windows XP SP2 on x86,
585     // this check reduces the size of the dumped symbol file by a third.
586     if (type != last_type || rva != last_rva || code_size != last_code_size ||
587         prolog_size != last_prolog_size) {
588       // The prolog and the code portions of the frame have to be treated
589       // independently as they may have independently changed in size, or may
590       // even have been split.
591       // NOTE: If epilog size is ever non-zero, we have to do something
592       //     similar with it.
593 
594       // Figure out where the prolog bytes have landed.
595       AddressRangeVector prolog_ranges;
596       if (prolog_size > 0) {
597         MapAddressRange(image_map_, AddressRange(rva, prolog_size),
598                         &prolog_ranges);
599       }
600 
601       // And figure out where the code bytes have landed.
602       AddressRangeVector code_ranges;
603       MapAddressRange(image_map_,
604                       AddressRange(rva + prolog_size,
605                                    code_size - prolog_size),
606                       &code_ranges);
607 
608       struct FrameInfo {
609         DWORD rva;
610         DWORD code_size;
611         DWORD prolog_size;
612       };
613       std::vector<FrameInfo> frame_infos;
614 
615       // Special case: The prolog and the code bytes remain contiguous. This is
616       // only done for compactness of the symbol file, and we could actually
617       // be outputting independent frame info for the prolog and code portions.
618       if (prolog_ranges.size() == 1 && code_ranges.size() == 1 &&
619           prolog_ranges[0].end() == code_ranges[0].rva) {
620         FrameInfo fi = { prolog_ranges[0].rva,
621                          prolog_ranges[0].length + code_ranges[0].length,
622                          prolog_ranges[0].length };
623         frame_infos.push_back(fi);
624       } else {
625         // Otherwise we output the prolog and code frame info independently.
626         for (size_t i = 0; i < prolog_ranges.size(); ++i) {
627           FrameInfo fi = { prolog_ranges[i].rva,
628                            prolog_ranges[i].length,
629                            prolog_ranges[i].length };
630           frame_infos.push_back(fi);
631         }
632         for (size_t i = 0; i < code_ranges.size(); ++i) {
633           FrameInfo fi = { code_ranges[i].rva, code_ranges[i].length, 0 };
634           frame_infos.push_back(fi);
635         }
636       }
637 
638       for (size_t i = 0; i < frame_infos.size(); ++i) {
639         const FrameInfo& fi(frame_infos[i]);
640         fprintf(output_, "STACK WIN %lx %lx %lx %lx %x %lx %lx %lx %lx %d ",
641                 type, fi.rva, fi.code_size, fi.prolog_size,
642                 0 /* epilog_size */, parameter_size, saved_register_size,
643                 local_size, max_stack_size, program_string_result == S_OK);
644         if (program_string_result == S_OK) {
645           fprintf(output_, "%ws\n", program_string.m_str);
646         } else {
647           fprintf(output_, "%d\n", allocates_base_pointer);
648         }
649       }
650 
651       last_type = type;
652       last_rva = rva;
653       last_code_size = code_size;
654       last_prolog_size = prolog_size;
655     }
656 
657     frame_data.Release();
658   }
659 
660   return true;
661 }
662 
PrintFrameDataUsingEXE()663 bool PDBSourceLineWriter::PrintFrameDataUsingEXE() {
664   if (code_file_.empty() && !FindPEFile()) {
665     fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
666     return false;
667   }
668 
669   return PrintPEFrameData(code_file_, output_);
670 }
671 
PrintFrameData()672 bool PDBSourceLineWriter::PrintFrameData() {
673   PDBModuleInfo info;
674   if (GetModuleInfo(&info) && info.cpu == L"x86_64") {
675     return PrintFrameDataUsingEXE();
676   } else {
677     return PrintFrameDataUsingPDB();
678   }
679   return false;
680 }
681 
PrintCodePublicSymbol(IDiaSymbol * symbol,bool has_multiple_symbols)682 bool PDBSourceLineWriter::PrintCodePublicSymbol(IDiaSymbol *symbol,
683                                                 bool has_multiple_symbols) {
684   BOOL is_code;
685   if (FAILED(symbol->get_code(&is_code))) {
686     return false;
687   }
688   if (!is_code) {
689     return true;
690   }
691 
692   DWORD rva;
693   if (FAILED(symbol->get_relativeVirtualAddress(&rva))) {
694     return false;
695   }
696 
697   CComBSTR name;
698   int stack_param_size;
699   if (!GetSymbolFunctionName(symbol, &name, &stack_param_size)) {
700     return false;
701   }
702 
703   AddressRangeVector ranges;
704   MapAddressRange(image_map_, AddressRange(rva, 1), &ranges);
705   for (size_t i = 0; i < ranges.size(); ++i) {
706     const char* optional_multiple_field = has_multiple_symbols ? "m " : "";
707     fprintf(output_, "PUBLIC %s%lx %x %ws\n", optional_multiple_field,
708             ranges[i].rva, stack_param_size > 0 ? stack_param_size : 0,
709             name.m_str);
710   }
711 
712   // Now walk the function in the original untranslated space, asking DIA
713   // what function is at that location, stepping through OMAP blocks. If
714   // we're still in the same function, emit another entry, because the
715   // symbol could have been split into multiple pieces. If we've gotten to
716   // another symbol in the original address space, then we're done for
717   // this symbol. See https://crbug.com/678874.
718   for (;;) {
719     // This steps to the next block in the original image. Simply doing
720     // rva++ would also be correct, but would emit tons of unnecessary
721     // entries.
722     rva = image_map_.subsequent_rva_block[rva];
723     if (rva == 0)
724       break;
725 
726     CComPtr<IDiaSymbol> next_sym = NULL;
727     LONG displacement;
728     if (FAILED(session_->findSymbolByRVAEx(rva, SymTagPublicSymbol, &next_sym,
729                                            &displacement))) {
730       break;
731     }
732 
733     if (!SymbolsMatch(symbol, next_sym))
734       break;
735 
736     AddressRangeVector next_ranges;
737     MapAddressRange(image_map_, AddressRange(rva, 1), &next_ranges);
738     for (size_t i = 0; i < next_ranges.size(); ++i) {
739       fprintf(output_, "PUBLIC %lx %x %ws\n", next_ranges[i].rva,
740               stack_param_size > 0 ? stack_param_size : 0, name.m_str);
741     }
742   }
743 
744   return true;
745 }
746 
PrintPDBInfo()747 bool PDBSourceLineWriter::PrintPDBInfo() {
748   PDBModuleInfo info;
749   if (!GetModuleInfo(&info)) {
750     return false;
751   }
752 
753   // Hard-code "windows" for the OS because that's the only thing that makes
754   // sense for PDB files.  (This might not be strictly correct for Windows CE
755   // support, but we don't care about that at the moment.)
756   fprintf(output_, "MODULE windows %ws %ws %ws\n",
757           info.cpu.c_str(), info.debug_identifier.c_str(),
758           info.debug_file.c_str());
759 
760   return true;
761 }
762 
PrintPEInfo()763 bool PDBSourceLineWriter::PrintPEInfo() {
764   PEModuleInfo info;
765   if (!GetPEInfo(&info)) {
766     return false;
767   }
768 
769   fprintf(output_, "INFO CODE_ID %ws %ws\n",
770           info.code_identifier.c_str(),
771           info.code_file.c_str());
772   return true;
773 }
774 
775 // wcstol_positive_strict is sort of like wcstol, but much stricter.  string
776 // should be a buffer pointing to a null-terminated string containing only
777 // decimal digits.  If the entire string can be converted to an integer
778 // without overflowing, and there are no non-digit characters before the
779 // result is set to the value and this function returns true.  Otherwise,
780 // this function returns false.  This is an alternative to the strtol, atoi,
781 // and scanf families, which are not as strict about input and in some cases
782 // don't provide a good way for the caller to determine if a conversion was
783 // successful.
wcstol_positive_strict(wchar_t * string,int * result)784 static bool wcstol_positive_strict(wchar_t *string, int *result) {
785   int value = 0;
786   for (wchar_t *c = string; *c != '\0'; ++c) {
787     int last_value = value;
788     value *= 10;
789     // Detect overflow.
790     if (value / 10 != last_value || value < 0) {
791       return false;
792     }
793     if (*c < '0' || *c > '9') {
794       return false;
795     }
796     unsigned int c_value = *c - '0';
797     last_value = value;
798     value += c_value;
799     // Detect overflow.
800     if (value < last_value) {
801       return false;
802     }
803     // Forbid leading zeroes unless the string is just "0".
804     if (value == 0 && *(c+1) != '\0') {
805       return false;
806     }
807   }
808   *result = value;
809   return true;
810 }
811 
FindPEFile()812 bool PDBSourceLineWriter::FindPEFile() {
813   CComPtr<IDiaSymbol> global;
814   if (FAILED(session_->get_globalScope(&global))) {
815     fprintf(stderr, "get_globalScope failed\n");
816     return false;
817   }
818 
819   CComBSTR symbols_file;
820   if (SUCCEEDED(global->get_symbolsFileName(&symbols_file))) {
821     wstring file(symbols_file);
822 
823     // Look for an EXE or DLL file.
824     const wchar_t *extensions[] = { L"exe", L"dll" };
825     for (size_t i = 0; i < sizeof(extensions) / sizeof(extensions[0]); i++) {
826       size_t dot_pos = file.find_last_of(L".");
827       if (dot_pos != wstring::npos) {
828         file.replace(dot_pos + 1, wstring::npos, extensions[i]);
829         // Check if this file exists.
830         if (GetFileAttributesW(file.c_str()) != INVALID_FILE_ATTRIBUTES) {
831           code_file_ = file;
832           return true;
833         }
834       }
835     }
836   }
837 
838   return false;
839 }
840 
841 // static
GetSymbolFunctionName(IDiaSymbol * function,BSTR * name,int * stack_param_size)842 bool PDBSourceLineWriter::GetSymbolFunctionName(IDiaSymbol *function,
843                                                 BSTR *name,
844                                                 int *stack_param_size) {
845   *stack_param_size = -1;
846   const DWORD undecorate_options = UNDNAME_NO_MS_KEYWORDS |
847                                    UNDNAME_NO_FUNCTION_RETURNS |
848                                    UNDNAME_NO_ALLOCATION_MODEL |
849                                    UNDNAME_NO_ALLOCATION_LANGUAGE |
850                                    UNDNAME_NO_THISTYPE |
851                                    UNDNAME_NO_ACCESS_SPECIFIERS |
852                                    UNDNAME_NO_THROW_SIGNATURES |
853                                    UNDNAME_NO_MEMBER_TYPE |
854                                    UNDNAME_NO_RETURN_UDT_MODEL |
855                                    UNDNAME_NO_ECSU;
856 
857   // Use get_undecoratedNameEx to get readable C++ names with arguments.
858   if (function->get_undecoratedNameEx(undecorate_options, name) != S_OK) {
859     if (function->get_name(name) != S_OK) {
860       fprintf(stderr, "failed to get function name\n");
861       return false;
862     }
863 
864     // It's possible for get_name to return an empty string, so
865     // special-case that.
866     if (wcscmp(*name, L"") == 0) {
867       SysFreeString(*name);
868       // dwarf_cu_to_module.cc uses "<name omitted>", so match that.
869       *name = SysAllocString(L"<name omitted>");
870       return true;
871     }
872 
873     // If a name comes from get_name because no undecorated form existed,
874     // it's already formatted properly to be used as output.  Don't do any
875     // additional processing.
876     //
877     // MSVC7's DIA seems to not undecorate names in as many cases as MSVC8's.
878     // This will result in calling get_name for some C++ symbols, so
879     // all of the parameter and return type information may not be included in
880     // the name string.
881   } else {
882     // C++ uses a bogus "void" argument for functions and methods that don't
883     // take any parameters.  Take it out of the undecorated name because it's
884     // ugly and unnecessary.
885     const wchar_t *replace_string = L"(void)";
886     const size_t replace_length = wcslen(replace_string);
887     const wchar_t *replacement_string = L"()";
888     size_t length = wcslen(*name);
889     if (length >= replace_length) {
890       wchar_t *name_end = *name + length - replace_length;
891       if (wcscmp(name_end, replace_string) == 0) {
892         WindowsStringUtils::safe_wcscpy(name_end, replace_length,
893                                         replacement_string);
894         length = wcslen(*name);
895       }
896     }
897 
898     // Undecorate names used for stdcall and fastcall.  These names prefix
899     // the identifier with '_' (stdcall) or '@' (fastcall) and suffix it
900     // with '@' followed by the number of bytes of parameters, in decimal.
901     // If such a name is found, take note of the size and undecorate it.
902     // Only do this for names that aren't C++, which is determined based on
903     // whether the undecorated name contains any ':' or '(' characters.
904     if (!wcschr(*name, ':') && !wcschr(*name, '(') &&
905         (*name[0] == '_' || *name[0] == '@')) {
906       wchar_t *last_at = wcsrchr(*name + 1, '@');
907       if (last_at && wcstol_positive_strict(last_at + 1, stack_param_size)) {
908         // If this function adheres to the fastcall convention, it accepts up
909         // to the first 8 bytes of parameters in registers (%ecx and %edx).
910         // We're only interested in the stack space used for parameters, so
911         // so subtract 8 and don't let the size go below 0.
912         if (*name[0] == '@') {
913           if (*stack_param_size > 8) {
914             *stack_param_size -= 8;
915           } else {
916             *stack_param_size = 0;
917           }
918         }
919 
920         // Undecorate the name by moving it one character to the left in its
921         // buffer, and terminating it where the last '@' had been.
922         WindowsStringUtils::safe_wcsncpy(*name, length,
923                                          *name + 1, last_at - *name - 1);
924      } else if (*name[0] == '_') {
925         // This symbol's name is encoded according to the cdecl rules.  The
926         // name doesn't end in a '@' character followed by a decimal positive
927         // integer, so it's not a stdcall name.  Strip off the leading
928         // underscore.
929         WindowsStringUtils::safe_wcsncpy(*name, length, *name + 1, length);
930       }
931     }
932   }
933 
934   return true;
935 }
936 
937 // static
GetFunctionStackParamSize(IDiaSymbol * function)938 int PDBSourceLineWriter::GetFunctionStackParamSize(IDiaSymbol *function) {
939   // This implementation is highly x86-specific.
940 
941   // Gather the symbols corresponding to data.
942   CComPtr<IDiaEnumSymbols> data_children;
943   if (FAILED(function->findChildren(SymTagData, NULL, nsNone,
944                                     &data_children))) {
945     return 0;
946   }
947 
948   // lowest_base is the lowest %ebp-relative byte offset used for a parameter.
949   // highest_end is one greater than the highest offset (i.e. base + length).
950   // Stack parameters are assumed to be contiguous, because in reality, they
951   // are.
952   int lowest_base = INT_MAX;
953   int highest_end = INT_MIN;
954 
955   CComPtr<IDiaSymbol> child;
956   DWORD count;
957   while (SUCCEEDED(data_children->Next(1, &child, &count)) && count == 1) {
958     // If any operation fails at this point, just proceed to the next child.
959     // Use the next_child label instead of continue because child needs to
960     // be released before it's reused.  Declare constructable/destructable
961     // types early to avoid gotos that cross initializations.
962     CComPtr<IDiaSymbol> child_type;
963 
964     // DataIsObjectPtr is only used for |this|.  Because |this| can be passed
965     // as a stack parameter, look for it in addition to traditional
966     // parameters.
967     DWORD child_kind;
968     if (FAILED(child->get_dataKind(&child_kind)) ||
969         (child_kind != DataIsParam && child_kind != DataIsObjectPtr)) {
970       goto next_child;
971     }
972 
973     // Only concentrate on register-relative parameters.  Parameters may also
974     // be enregistered (passed directly in a register), but those don't
975     // consume any stack space, so they're not of interest.
976     DWORD child_location_type;
977     if (FAILED(child->get_locationType(&child_location_type)) ||
978         child_location_type != LocIsRegRel) {
979       goto next_child;
980     }
981 
982     // Of register-relative parameters, the only ones that make any sense are
983     // %ebp- or %esp-relative.  Note that MSVC's debugging information always
984     // gives parameters as %ebp-relative even when a function doesn't use a
985     // traditional frame pointer and stack parameters are accessed relative to
986     // %esp, so just look for %ebp-relative parameters.  If you wanted to
987     // access parameters, you'd probably want to treat these %ebp-relative
988     // offsets as if they were relative to %esp before a function's prolog
989     // executed.
990     DWORD child_register;
991     if (FAILED(child->get_registerId(&child_register)) ||
992         child_register != CV_REG_EBP) {
993       goto next_child;
994     }
995 
996     LONG child_register_offset;
997     if (FAILED(child->get_offset(&child_register_offset))) {
998       goto next_child;
999     }
1000 
1001     // IDiaSymbol::get_type can succeed but still pass back a NULL value.
1002     if (FAILED(child->get_type(&child_type)) || !child_type) {
1003       goto next_child;
1004     }
1005 
1006     ULONGLONG child_length;
1007     if (FAILED(child_type->get_length(&child_length))) {
1008       goto next_child;
1009     }
1010 
1011     // Extra scope to avoid goto jumping over variable initialization
1012     {
1013       int child_end = child_register_offset + static_cast<ULONG>(child_length);
1014       if (child_register_offset < lowest_base) {
1015         lowest_base = child_register_offset;
1016       }
1017       if (child_end > highest_end) {
1018         highest_end = child_end;
1019       }
1020     }
1021 
1022 next_child:
1023     child.Release();
1024   }
1025 
1026   int param_size = 0;
1027   // Make sure lowest_base isn't less than 4, because [%esp+4] is the lowest
1028   // possible address to find a stack parameter before executing a function's
1029   // prolog (see above).  Some optimizations cause parameter offsets to be
1030   // lower than 4, but we're not concerned with those because we're only
1031   // looking for parameters contained in addresses higher than where the
1032   // return address is stored.
1033   if (lowest_base < 4) {
1034     lowest_base = 4;
1035   }
1036   if (highest_end > lowest_base) {
1037     // All stack parameters are pushed as at least 4-byte quantities.  If the
1038     // last type was narrower than 4 bytes, promote it.  This assumes that all
1039     // parameters' offsets are 4-byte-aligned, which is always the case.  Only
1040     // worry about the last type, because we're not summing the type sizes,
1041     // just looking at the lowest and highest offsets.
1042     int remainder = highest_end % 4;
1043     if (remainder) {
1044       highest_end += 4 - remainder;
1045     }
1046 
1047     param_size = highest_end - lowest_base;
1048   }
1049 
1050   return param_size;
1051 }
1052 
WriteSymbols(FILE * symbol_file)1053 bool PDBSourceLineWriter::WriteSymbols(FILE *symbol_file) {
1054   output_ = symbol_file;
1055 
1056   // Load the OMAP information, and disable auto-translation of addresses in
1057   // preference of doing it ourselves.
1058   OmapData omap_data;
1059   if (!GetOmapDataAndDisableTranslation(session_, &omap_data))
1060     return false;
1061   BuildImageMap(omap_data, &image_map_);
1062 
1063   bool ret = PrintPDBInfo();
1064   // This is not a critical piece of the symbol file.
1065   PrintPEInfo();
1066   ret = ret &&
1067       PrintSourceFiles() &&
1068       PrintFunctions() &&
1069       PrintFrameData();
1070 
1071   output_ = NULL;
1072   return ret;
1073 }
1074 
Close()1075 void PDBSourceLineWriter::Close() {
1076   if (session_ != nullptr) {
1077     session_.Release();
1078   }
1079 }
1080 
GetModuleInfo(PDBModuleInfo * info)1081 bool PDBSourceLineWriter::GetModuleInfo(PDBModuleInfo *info) {
1082   if (!info) {
1083     return false;
1084   }
1085 
1086   info->debug_file.clear();
1087   info->debug_identifier.clear();
1088   info->cpu.clear();
1089 
1090   CComPtr<IDiaSymbol> global;
1091   if (FAILED(session_->get_globalScope(&global))) {
1092     return false;
1093   }
1094 
1095   DWORD machine_type;
1096   // get_machineType can return S_FALSE.
1097   if (global->get_machineType(&machine_type) == S_OK) {
1098     // The documentation claims that get_machineType returns a value from
1099     // the CV_CPU_TYPE_e enumeration, but that's not the case.
1100     // Instead, it returns one of the IMAGE_FILE_MACHINE values as
1101     // defined here:
1102     // http://msdn.microsoft.com/en-us/library/ms680313%28VS.85%29.aspx
1103     info->cpu = FileHeaderMachineToCpuString(static_cast<WORD>(machine_type));
1104   } else {
1105     // Unexpected, but handle gracefully.
1106     info->cpu = L"unknown";
1107   }
1108 
1109   // DWORD* and int* are not compatible.  This is clean and avoids a cast.
1110   DWORD age;
1111   if (FAILED(global->get_age(&age))) {
1112     return false;
1113   }
1114 
1115   bool uses_guid;
1116   if (!UsesGUID(&uses_guid)) {
1117     return false;
1118   }
1119 
1120   if (uses_guid) {
1121     GUID guid;
1122     if (FAILED(global->get_guid(&guid))) {
1123       return false;
1124     }
1125 
1126     info->debug_identifier = GenerateDebugIdentifier(age, guid);
1127   } else {
1128     DWORD signature;
1129     if (FAILED(global->get_signature(&signature))) {
1130       return false;
1131     }
1132 
1133     info->debug_identifier = GenerateDebugIdentifier(age, signature);
1134   }
1135 
1136   CComBSTR debug_file_string;
1137   if (FAILED(global->get_symbolsFileName(&debug_file_string))) {
1138     return false;
1139   }
1140   info->debug_file =
1141       WindowsStringUtils::GetBaseName(wstring(debug_file_string));
1142 
1143   return true;
1144 }
1145 
GetPEInfo(PEModuleInfo * info)1146 bool PDBSourceLineWriter::GetPEInfo(PEModuleInfo *info) {
1147   if (!info) {
1148     return false;
1149   }
1150 
1151   if (code_file_.empty() && !FindPEFile()) {
1152     fprintf(stderr, "Couldn't locate EXE or DLL file.\n");
1153     return false;
1154   }
1155 
1156   return ReadPEInfo(code_file_, info);
1157 }
1158 
UsesGUID(bool * uses_guid)1159 bool PDBSourceLineWriter::UsesGUID(bool *uses_guid) {
1160   if (!uses_guid)
1161     return false;
1162 
1163   CComPtr<IDiaSymbol> global;
1164   if (FAILED(session_->get_globalScope(&global)))
1165     return false;
1166 
1167   GUID guid;
1168   if (FAILED(global->get_guid(&guid)))
1169     return false;
1170 
1171   DWORD signature;
1172   if (FAILED(global->get_signature(&signature)))
1173     return false;
1174 
1175   // There are two possibilities for guid: either it's a real 128-bit GUID
1176   // as identified in a code module by a new-style CodeView record, or it's
1177   // a 32-bit signature (timestamp) as identified by an old-style record.
1178   // See MDCVInfoPDB70 and MDCVInfoPDB20 in minidump_format.h.
1179   //
1180   // Because DIA doesn't provide a way to directly determine whether a module
1181   // uses a GUID or a 32-bit signature, this code checks whether the first 32
1182   // bits of guid are the same as the signature, and if the rest of guid is
1183   // zero.  If so, then with a pretty high degree of certainty, there's an
1184   // old-style CodeView record in use.  This method will only falsely find an
1185   // an old-style CodeView record if a real 128-bit GUID has its first 32
1186   // bits set the same as the module's signature (timestamp) and the rest of
1187   // the GUID is set to 0.  This is highly unlikely.
1188 
1189   GUID signature_guid = {signature};  // 0-initializes other members
1190   *uses_guid = !IsEqualGUID(guid, signature_guid);
1191   return true;
1192 }
1193 
1194 }  // namespace google_breakpad
1195