1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30
31 // Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
32 // and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
33
34 #include "common/dwarf/dwarf2reader.h"
35
36 #include <assert.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <string.h>
40
41 #include <map>
42 #include <memory>
43 #include <stack>
44 #include <string>
45 #include <utility>
46
47 #include <sys/stat.h>
48
49 #include "common/dwarf/bytereader-inl.h"
50 #include "common/dwarf/bytereader.h"
51 #include "common/dwarf/line_state_machine.h"
52 #include "common/using_std_string.h"
53 #include "google_breakpad/common/breakpad_types.h"
54
55 namespace dwarf2reader {
56
CompilationUnit(const string & path,const SectionMap & sections,uint64_t offset,ByteReader * reader,Dwarf2Handler * handler)57 CompilationUnit::CompilationUnit(const string& path,
58 const SectionMap& sections, uint64_t offset,
59 ByteReader* reader, Dwarf2Handler* handler)
60 : path_(path), offset_from_section_start_(offset), reader_(reader),
61 sections_(sections), handler_(handler), abbrevs_(),
62 string_buffer_(NULL), string_buffer_length_(0),
63 str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
64 addr_buffer_(NULL), addr_buffer_length_(0),
65 is_split_dwarf_(false), dwo_id_(0), dwo_name_(),
66 skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
67 have_checked_for_dwp_(false), dwp_path_(),
68 dwp_byte_reader_(), dwp_reader_() {}
69
70 // Initialize a compilation unit from a .dwo or .dwp file.
71 // In this case, we need the .debug_addr section from the
72 // executable file that contains the corresponding skeleton
73 // compilation unit. We also inherit the Dwarf2Handler from
74 // the executable file, and call it as if we were still
75 // processing the original compilation unit.
76
SetSplitDwarf(const uint8_t * addr_buffer,uint64_t addr_buffer_length,uint64_t addr_base,uint64_t ranges_base,uint64_t dwo_id)77 void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
78 uint64_t addr_buffer_length,
79 uint64_t addr_base,
80 uint64_t ranges_base,
81 uint64_t dwo_id) {
82 is_split_dwarf_ = true;
83 addr_buffer_ = addr_buffer;
84 addr_buffer_length_ = addr_buffer_length;
85 addr_base_ = addr_base;
86 ranges_base_ = ranges_base;
87 skeleton_dwo_id_ = dwo_id;
88 }
89
90 // Read a DWARF2/3 abbreviation section.
91 // Each abbrev consists of a abbreviation number, a tag, a byte
92 // specifying whether the tag has children, and a list of
93 // attribute/form pairs.
94 // The list of forms is terminated by a 0 for the attribute, and a
95 // zero for the form. The entire abbreviation section is terminated
96 // by a zero for the code.
97
ReadAbbrevs()98 void CompilationUnit::ReadAbbrevs() {
99 if (abbrevs_)
100 return;
101
102 // First get the debug_abbrev section. ".debug_abbrev" is the name
103 // recommended in the DWARF spec, and used on Linux;
104 // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
105 SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
106 if (iter == sections_.end())
107 iter = sections_.find("__debug_abbrev");
108 assert(iter != sections_.end());
109
110 abbrevs_ = new std::vector<Abbrev>;
111 abbrevs_->resize(1);
112
113 // The only way to check whether we are reading over the end of the
114 // buffer would be to first compute the size of the leb128 data by
115 // reading it, then go back and read it again.
116 const uint8_t *abbrev_start = iter->second.first +
117 header_.abbrev_offset;
118 const uint8_t *abbrevptr = abbrev_start;
119 #ifndef NDEBUG
120 const uint64_t abbrev_length = iter->second.second - header_.abbrev_offset;
121 #endif
122
123 while (1) {
124 CompilationUnit::Abbrev abbrev;
125 size_t len;
126 const uint64_t number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
127
128 if (number == 0)
129 break;
130 abbrev.number = number;
131 abbrevptr += len;
132
133 assert(abbrevptr < abbrev_start + abbrev_length);
134 const uint64_t tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
135 abbrevptr += len;
136 abbrev.tag = static_cast<enum DwarfTag>(tag);
137
138 assert(abbrevptr < abbrev_start + abbrev_length);
139 abbrev.has_children = reader_->ReadOneByte(abbrevptr);
140 abbrevptr += 1;
141
142 assert(abbrevptr < abbrev_start + abbrev_length);
143
144 while (1) {
145 const uint64_t nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
146 abbrevptr += len;
147
148 assert(abbrevptr < abbrev_start + abbrev_length);
149 const uint64_t formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
150 abbrevptr += len;
151 if (nametemp == 0 && formtemp == 0)
152 break;
153
154 const enum DwarfAttribute name =
155 static_cast<enum DwarfAttribute>(nametemp);
156 const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp);
157 abbrev.attributes.push_back(std::make_pair(name, form));
158 }
159 assert(abbrev.number == abbrevs_->size());
160 abbrevs_->push_back(abbrev);
161 }
162 }
163
164 // Skips a single DIE's attributes.
SkipDIE(const uint8_t * start,const Abbrev & abbrev)165 const uint8_t *CompilationUnit::SkipDIE(const uint8_t* start,
166 const Abbrev& abbrev) {
167 for (AttributeList::const_iterator i = abbrev.attributes.begin();
168 i != abbrev.attributes.end();
169 i++) {
170 start = SkipAttribute(start, i->second);
171 }
172 return start;
173 }
174
175 // Skips a single attribute form's data.
SkipAttribute(const uint8_t * start,enum DwarfForm form)176 const uint8_t *CompilationUnit::SkipAttribute(const uint8_t *start,
177 enum DwarfForm form) {
178 size_t len;
179
180 switch (form) {
181 case DW_FORM_indirect:
182 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
183 &len));
184 start += len;
185 return SkipAttribute(start, form);
186
187 case DW_FORM_flag_present:
188 return start;
189 case DW_FORM_data1:
190 case DW_FORM_flag:
191 case DW_FORM_ref1:
192 return start + 1;
193 case DW_FORM_ref2:
194 case DW_FORM_data2:
195 return start + 2;
196 case DW_FORM_ref4:
197 case DW_FORM_data4:
198 return start + 4;
199 case DW_FORM_ref8:
200 case DW_FORM_data8:
201 case DW_FORM_ref_sig8:
202 return start + 8;
203 case DW_FORM_string:
204 return start + strlen(reinterpret_cast<const char *>(start)) + 1;
205 case DW_FORM_udata:
206 case DW_FORM_ref_udata:
207 case DW_FORM_GNU_str_index:
208 case DW_FORM_GNU_addr_index:
209 reader_->ReadUnsignedLEB128(start, &len);
210 return start + len;
211
212 case DW_FORM_sdata:
213 reader_->ReadSignedLEB128(start, &len);
214 return start + len;
215 case DW_FORM_addr:
216 return start + reader_->AddressSize();
217 case DW_FORM_ref_addr:
218 // DWARF2 and 3/4 differ on whether ref_addr is address size or
219 // offset size.
220 assert(header_.version >= 2);
221 if (header_.version == 2) {
222 return start + reader_->AddressSize();
223 } else if (header_.version >= 3) {
224 return start + reader_->OffsetSize();
225 }
226 break;
227
228 case DW_FORM_block1:
229 return start + 1 + reader_->ReadOneByte(start);
230 case DW_FORM_block2:
231 return start + 2 + reader_->ReadTwoBytes(start);
232 case DW_FORM_block4:
233 return start + 4 + reader_->ReadFourBytes(start);
234 case DW_FORM_block:
235 case DW_FORM_exprloc: {
236 uint64_t size = reader_->ReadUnsignedLEB128(start, &len);
237 return start + size + len;
238 }
239 case DW_FORM_strp:
240 case DW_FORM_sec_offset:
241 return start + reader_->OffsetSize();
242 }
243 fprintf(stderr,"Unhandled form type");
244 return NULL;
245 }
246
247 // Read a DWARF2/3 header.
248 // The header is variable length in DWARF3 (and DWARF2 as extended by
249 // most compilers), and consists of an length field, a version number,
250 // the offset in the .debug_abbrev section for our abbrevs, and an
251 // address size.
ReadHeader()252 void CompilationUnit::ReadHeader() {
253 const uint8_t *headerptr = buffer_;
254 size_t initial_length_size;
255
256 assert(headerptr + 4 < buffer_ + buffer_length_);
257 const uint64_t initial_length
258 = reader_->ReadInitialLength(headerptr, &initial_length_size);
259 headerptr += initial_length_size;
260 header_.length = initial_length;
261
262 assert(headerptr + 2 < buffer_ + buffer_length_);
263 header_.version = reader_->ReadTwoBytes(headerptr);
264 headerptr += 2;
265
266 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
267 header_.abbrev_offset = reader_->ReadOffset(headerptr);
268 headerptr += reader_->OffsetSize();
269
270 // Compare against less than or equal because this may be the last
271 // section in the file.
272 assert(headerptr + 1 <= buffer_ + buffer_length_);
273 header_.address_size = reader_->ReadOneByte(headerptr);
274 reader_->SetAddressSize(header_.address_size);
275 headerptr += 1;
276
277 after_header_ = headerptr;
278
279 // This check ensures that we don't have to do checking during the
280 // reading of DIEs. header_.length does not include the size of the
281 // initial length.
282 assert(buffer_ + initial_length_size + header_.length <=
283 buffer_ + buffer_length_);
284 }
285
Start()286 uint64_t CompilationUnit::Start() {
287 // First get the debug_info section. ".debug_info" is the name
288 // recommended in the DWARF spec, and used on Linux; "__debug_info"
289 // is the name used in Mac OS X Mach-O files.
290 SectionMap::const_iterator iter = sections_.find(".debug_info");
291 if (iter == sections_.end())
292 iter = sections_.find("__debug_info");
293 assert(iter != sections_.end());
294
295 // Set up our buffer
296 buffer_ = iter->second.first + offset_from_section_start_;
297 buffer_length_ = iter->second.second - offset_from_section_start_;
298
299 // Read the header
300 ReadHeader();
301
302 // Figure out the real length from the end of the initial length to
303 // the end of the compilation unit, since that is the value we
304 // return.
305 uint64_t ourlength = header_.length;
306 if (reader_->OffsetSize() == 8)
307 ourlength += 12;
308 else
309 ourlength += 4;
310
311 // See if the user wants this compilation unit, and if not, just return.
312 if (!handler_->StartCompilationUnit(offset_from_section_start_,
313 reader_->AddressSize(),
314 reader_->OffsetSize(),
315 header_.length,
316 header_.version))
317 return ourlength;
318
319 // Otherwise, continue by reading our abbreviation entries.
320 ReadAbbrevs();
321
322 // Set the string section if we have one. ".debug_str" is the name
323 // recommended in the DWARF spec, and used on Linux; "__debug_str"
324 // is the name used in Mac OS X Mach-O files.
325 iter = sections_.find(".debug_str");
326 if (iter == sections_.end())
327 iter = sections_.find("__debug_str");
328 if (iter != sections_.end()) {
329 string_buffer_ = iter->second.first;
330 string_buffer_length_ = iter->second.second;
331 }
332
333 // Set the string offsets section if we have one.
334 iter = sections_.find(".debug_str_offsets");
335 if (iter != sections_.end()) {
336 str_offsets_buffer_ = iter->second.first;
337 str_offsets_buffer_length_ = iter->second.second;
338 }
339
340 // Set the address section if we have one.
341 iter = sections_.find(".debug_addr");
342 if (iter != sections_.end()) {
343 addr_buffer_ = iter->second.first;
344 addr_buffer_length_ = iter->second.second;
345 }
346
347 // Now that we have our abbreviations, start processing DIE's.
348 ProcessDIEs();
349
350 // If this is a skeleton compilation unit generated with split DWARF,
351 // and the client needs the full debug info, we need to find the full
352 // compilation unit in a .dwo or .dwp file.
353 if (!is_split_dwarf_
354 && dwo_name_ != NULL
355 && handler_->NeedSplitDebugInfo())
356 ProcessSplitDwarf();
357
358 return ourlength;
359 }
360
361 // If one really wanted, you could merge SkipAttribute and
362 // ProcessAttribute
363 // This is all boring data manipulation and calling of the handler.
ProcessAttribute(uint64_t dieoffset,const uint8_t * start,enum DwarfAttribute attr,enum DwarfForm form)364 const uint8_t *CompilationUnit::ProcessAttribute(
365 uint64_t dieoffset, const uint8_t *start, enum DwarfAttribute attr,
366 enum DwarfForm form) {
367 size_t len;
368
369 switch (form) {
370 // DW_FORM_indirect is never used because it is such a space
371 // waster.
372 case DW_FORM_indirect:
373 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
374 &len));
375 start += len;
376 return ProcessAttribute(dieoffset, start, attr, form);
377
378 case DW_FORM_flag_present:
379 ProcessAttributeUnsigned(dieoffset, attr, form, 1);
380 return start;
381 case DW_FORM_data1:
382 case DW_FORM_flag:
383 ProcessAttributeUnsigned(dieoffset, attr, form,
384 reader_->ReadOneByte(start));
385 return start + 1;
386 case DW_FORM_data2:
387 ProcessAttributeUnsigned(dieoffset, attr, form,
388 reader_->ReadTwoBytes(start));
389 return start + 2;
390 case DW_FORM_data4:
391 ProcessAttributeUnsigned(dieoffset, attr, form,
392 reader_->ReadFourBytes(start));
393 return start + 4;
394 case DW_FORM_data8:
395 ProcessAttributeUnsigned(dieoffset, attr, form,
396 reader_->ReadEightBytes(start));
397 return start + 8;
398 case DW_FORM_string: {
399 const char *str = reinterpret_cast<const char *>(start);
400 ProcessAttributeString(dieoffset, attr, form, str);
401 return start + strlen(str) + 1;
402 }
403 case DW_FORM_udata:
404 ProcessAttributeUnsigned(dieoffset, attr, form,
405 reader_->ReadUnsignedLEB128(start, &len));
406 return start + len;
407
408 case DW_FORM_sdata:
409 ProcessAttributeSigned(dieoffset, attr, form,
410 reader_->ReadSignedLEB128(start, &len));
411 return start + len;
412 case DW_FORM_addr:
413 ProcessAttributeUnsigned(dieoffset, attr, form,
414 reader_->ReadAddress(start));
415 return start + reader_->AddressSize();
416 case DW_FORM_sec_offset:
417 ProcessAttributeUnsigned(dieoffset, attr, form,
418 reader_->ReadOffset(start));
419 return start + reader_->OffsetSize();
420
421 case DW_FORM_ref1:
422 handler_->ProcessAttributeReference(dieoffset, attr, form,
423 reader_->ReadOneByte(start)
424 + offset_from_section_start_);
425 return start + 1;
426 case DW_FORM_ref2:
427 handler_->ProcessAttributeReference(dieoffset, attr, form,
428 reader_->ReadTwoBytes(start)
429 + offset_from_section_start_);
430 return start + 2;
431 case DW_FORM_ref4:
432 handler_->ProcessAttributeReference(dieoffset, attr, form,
433 reader_->ReadFourBytes(start)
434 + offset_from_section_start_);
435 return start + 4;
436 case DW_FORM_ref8:
437 handler_->ProcessAttributeReference(dieoffset, attr, form,
438 reader_->ReadEightBytes(start)
439 + offset_from_section_start_);
440 return start + 8;
441 case DW_FORM_ref_udata:
442 handler_->ProcessAttributeReference(dieoffset, attr, form,
443 reader_->ReadUnsignedLEB128(start,
444 &len)
445 + offset_from_section_start_);
446 return start + len;
447 case DW_FORM_ref_addr:
448 // DWARF2 and 3/4 differ on whether ref_addr is address size or
449 // offset size.
450 assert(header_.version >= 2);
451 if (header_.version == 2) {
452 handler_->ProcessAttributeReference(dieoffset, attr, form,
453 reader_->ReadAddress(start));
454 return start + reader_->AddressSize();
455 } else if (header_.version >= 3) {
456 handler_->ProcessAttributeReference(dieoffset, attr, form,
457 reader_->ReadOffset(start));
458 return start + reader_->OffsetSize();
459 }
460 break;
461 case DW_FORM_ref_sig8:
462 handler_->ProcessAttributeSignature(dieoffset, attr, form,
463 reader_->ReadEightBytes(start));
464 return start + 8;
465
466 case DW_FORM_block1: {
467 uint64_t datalen = reader_->ReadOneByte(start);
468 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
469 datalen);
470 return start + 1 + datalen;
471 }
472 case DW_FORM_block2: {
473 uint64_t datalen = reader_->ReadTwoBytes(start);
474 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
475 datalen);
476 return start + 2 + datalen;
477 }
478 case DW_FORM_block4: {
479 uint64_t datalen = reader_->ReadFourBytes(start);
480 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
481 datalen);
482 return start + 4 + datalen;
483 }
484 case DW_FORM_block:
485 case DW_FORM_exprloc: {
486 uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
487 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
488 datalen);
489 return start + datalen + len;
490 }
491 case DW_FORM_strp: {
492 assert(string_buffer_ != NULL);
493
494 const uint64_t offset = reader_->ReadOffset(start);
495 assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
496
497 const char *str = reinterpret_cast<const char *>(string_buffer_ + offset);
498 ProcessAttributeString(dieoffset, attr, form, str);
499 return start + reader_->OffsetSize();
500 }
501
502 case DW_FORM_GNU_str_index: {
503 uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
504 const uint8_t* offset_ptr =
505 str_offsets_buffer_ + str_index * reader_->OffsetSize();
506 const uint64_t offset = reader_->ReadOffset(offset_ptr);
507 if (offset >= string_buffer_length_) {
508 return NULL;
509 }
510
511 const char* str = reinterpret_cast<const char *>(string_buffer_) + offset;
512 ProcessAttributeString(dieoffset, attr, form, str);
513 return start + len;
514 break;
515 }
516 case DW_FORM_GNU_addr_index: {
517 uint64_t addr_index = reader_->ReadUnsignedLEB128(start, &len);
518 const uint8_t* addr_ptr =
519 addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
520 ProcessAttributeUnsigned(dieoffset, attr, form,
521 reader_->ReadAddress(addr_ptr));
522 return start + len;
523 }
524 }
525 fprintf(stderr, "Unhandled form type\n");
526 return NULL;
527 }
528
ProcessDIE(uint64_t dieoffset,const uint8_t * start,const Abbrev & abbrev)529 const uint8_t *CompilationUnit::ProcessDIE(uint64_t dieoffset,
530 const uint8_t *start,
531 const Abbrev& abbrev) {
532 for (AttributeList::const_iterator i = abbrev.attributes.begin();
533 i != abbrev.attributes.end();
534 i++) {
535 start = ProcessAttribute(dieoffset, start, i->first, i->second);
536 }
537
538 // If this is a compilation unit in a split DWARF object, verify that
539 // the dwo_id matches. If it does not match, we will ignore this
540 // compilation unit.
541 if (abbrev.tag == DW_TAG_compile_unit
542 && is_split_dwarf_
543 && dwo_id_ != skeleton_dwo_id_) {
544 return NULL;
545 }
546
547 return start;
548 }
549
ProcessDIEs()550 void CompilationUnit::ProcessDIEs() {
551 const uint8_t *dieptr = after_header_;
552 size_t len;
553
554 // lengthstart is the place the length field is based on.
555 // It is the point in the header after the initial length field
556 const uint8_t *lengthstart = buffer_;
557
558 // In 64 bit dwarf, the initial length is 12 bytes, because of the
559 // 0xffffffff at the start.
560 if (reader_->OffsetSize() == 8)
561 lengthstart += 12;
562 else
563 lengthstart += 4;
564
565 std::stack<uint64_t> die_stack;
566
567 while (dieptr < (lengthstart + header_.length)) {
568 // We give the user the absolute offset from the beginning of
569 // debug_info, since they need it to deal with ref_addr forms.
570 uint64_t absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
571
572 uint64_t abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
573
574 dieptr += len;
575
576 // Abbrev == 0 represents the end of a list of children, or padding
577 // at the end of the compilation unit.
578 if (abbrev_num == 0) {
579 if (die_stack.size() == 0)
580 // If it is padding, then we are done with the compilation unit's DIEs.
581 return;
582 const uint64_t offset = die_stack.top();
583 die_stack.pop();
584 handler_->EndDIE(offset);
585 continue;
586 }
587
588 const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
589 const enum DwarfTag tag = abbrev.tag;
590 if (!handler_->StartDIE(absolute_offset, tag)) {
591 dieptr = SkipDIE(dieptr, abbrev);
592 } else {
593 dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
594 }
595
596 if (abbrev.has_children) {
597 die_stack.push(absolute_offset);
598 } else {
599 handler_->EndDIE(absolute_offset);
600 }
601 }
602 }
603
604 // Check for a valid ELF file and return the Address size.
605 // Returns 0 if not a valid ELF file.
GetElfWidth(const ElfReader & elf)606 inline int GetElfWidth(const ElfReader& elf) {
607 if (elf.IsElf32File())
608 return 4;
609 if (elf.IsElf64File())
610 return 8;
611 return 0;
612 }
613
ProcessSplitDwarf()614 void CompilationUnit::ProcessSplitDwarf() {
615 struct stat statbuf;
616 if (!have_checked_for_dwp_) {
617 // Look for a .dwp file in the same directory as the executable.
618 have_checked_for_dwp_ = true;
619 string dwp_suffix(".dwp");
620 dwp_path_ = path_ + dwp_suffix;
621 if (stat(dwp_path_.c_str(), &statbuf) != 0) {
622 // Fall back to a split .debug file in the same directory.
623 string debug_suffix(".debug");
624 dwp_path_ = path_;
625 size_t found = path_.rfind(debug_suffix);
626 if (found + debug_suffix.length() == path_.length())
627 dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
628 }
629 if (stat(dwp_path_.c_str(), &statbuf) == 0) {
630 ElfReader* elf = new ElfReader(dwp_path_);
631 int width = GetElfWidth(*elf);
632 if (width != 0) {
633 dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness()));
634 dwp_byte_reader_->SetAddressSize(width);
635 dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf));
636 dwp_reader_->Initialize();
637 } else {
638 delete elf;
639 }
640 }
641 }
642 bool found_in_dwp = false;
643 if (dwp_reader_) {
644 // If we have a .dwp file, read the debug sections for the requested CU.
645 SectionMap sections;
646 dwp_reader_->ReadDebugSectionsForCU(dwo_id_, §ions);
647 if (!sections.empty()) {
648 found_in_dwp = true;
649 CompilationUnit dwp_comp_unit(dwp_path_, sections, 0,
650 dwp_byte_reader_.get(), handler_);
651 dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
652 ranges_base_, dwo_id_);
653 dwp_comp_unit.Start();
654 }
655 }
656 if (!found_in_dwp) {
657 // If no .dwp file, try to open the .dwo file.
658 if (stat(dwo_name_, &statbuf) == 0) {
659 ElfReader elf(dwo_name_);
660 int width = GetElfWidth(elf);
661 if (width != 0) {
662 ByteReader reader(ENDIANNESS_LITTLE);
663 reader.SetAddressSize(width);
664 SectionMap sections;
665 ReadDebugSectionsFromDwo(&elf, §ions);
666 CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader,
667 handler_);
668 dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
669 addr_base_, ranges_base_, dwo_id_);
670 dwo_comp_unit.Start();
671 }
672 }
673 }
674 }
675
ReadDebugSectionsFromDwo(ElfReader * elf_reader,SectionMap * sections)676 void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
677 SectionMap* sections) {
678 static const char* const section_names[] = {
679 ".debug_abbrev",
680 ".debug_info",
681 ".debug_str_offsets",
682 ".debug_str"
683 };
684 for (unsigned int i = 0u;
685 i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
686 string base_name = section_names[i];
687 string dwo_name = base_name + ".dwo";
688 size_t section_size;
689 const char* section_data = elf_reader->GetSectionByName(dwo_name,
690 §ion_size);
691 if (section_data != NULL)
692 sections->insert(std::make_pair(
693 base_name, std::make_pair(
694 reinterpret_cast<const uint8_t *>(section_data),
695 section_size)));
696 }
697 }
698
DwpReader(const ByteReader & byte_reader,ElfReader * elf_reader)699 DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
700 : elf_reader_(elf_reader), byte_reader_(byte_reader),
701 cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
702 string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
703 nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
704 offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
705 abbrev_size_(0), info_data_(NULL), info_size_(0),
706 str_offsets_data_(NULL), str_offsets_size_(0) {}
707
~DwpReader()708 DwpReader::~DwpReader() {
709 if (elf_reader_) delete elf_reader_;
710 }
711
Initialize()712 void DwpReader::Initialize() {
713 cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
714 &cu_index_size_);
715 if (cu_index_ == NULL) {
716 return;
717 }
718 // The .debug_str.dwo section is shared by all CUs in the file.
719 string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
720 &string_buffer_size_);
721
722 version_ = byte_reader_.ReadFourBytes(
723 reinterpret_cast<const uint8_t *>(cu_index_));
724
725 if (version_ == 1) {
726 nslots_ = byte_reader_.ReadFourBytes(
727 reinterpret_cast<const uint8_t *>(cu_index_)
728 + 3 * sizeof(uint32_t));
729 phash_ = cu_index_ + 4 * sizeof(uint32_t);
730 pindex_ = phash_ + nslots_ * sizeof(uint64_t);
731 shndx_pool_ = pindex_ + nslots_ * sizeof(uint32_t);
732 if (shndx_pool_ >= cu_index_ + cu_index_size_) {
733 version_ = 0;
734 }
735 } else if (version_ == 2) {
736 ncolumns_ = byte_reader_.ReadFourBytes(
737 reinterpret_cast<const uint8_t *>(cu_index_) + sizeof(uint32_t));
738 nunits_ = byte_reader_.ReadFourBytes(
739 reinterpret_cast<const uint8_t *>(cu_index_) + 2 * sizeof(uint32_t));
740 nslots_ = byte_reader_.ReadFourBytes(
741 reinterpret_cast<const uint8_t *>(cu_index_) + 3 * sizeof(uint32_t));
742 phash_ = cu_index_ + 4 * sizeof(uint32_t);
743 pindex_ = phash_ + nslots_ * sizeof(uint64_t);
744 offset_table_ = pindex_ + nslots_ * sizeof(uint32_t);
745 size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32_t);
746 abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
747 &abbrev_size_);
748 info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
749 str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
750 &str_offsets_size_);
751 if (size_table_ >= cu_index_ + cu_index_size_) {
752 version_ = 0;
753 }
754 }
755 }
756
ReadDebugSectionsForCU(uint64_t dwo_id,SectionMap * sections)757 void DwpReader::ReadDebugSectionsForCU(uint64_t dwo_id,
758 SectionMap* sections) {
759 if (version_ == 1) {
760 int slot = LookupCU(dwo_id);
761 if (slot == -1) {
762 return;
763 }
764
765 // The index table points to the section index pool, where we
766 // can read a list of section indexes for the debug sections
767 // for the CU whose dwo_id we are looking for.
768 int index = byte_reader_.ReadFourBytes(
769 reinterpret_cast<const uint8_t *>(pindex_)
770 + slot * sizeof(uint32_t));
771 const char* shndx_list = shndx_pool_ + index * sizeof(uint32_t);
772 for (;;) {
773 if (shndx_list >= cu_index_ + cu_index_size_) {
774 version_ = 0;
775 return;
776 }
777 unsigned int shndx = byte_reader_.ReadFourBytes(
778 reinterpret_cast<const uint8_t *>(shndx_list));
779 shndx_list += sizeof(uint32_t);
780 if (shndx == 0)
781 break;
782 const char* section_name = elf_reader_->GetSectionName(shndx);
783 size_t section_size;
784 const char* section_data;
785 // We're only interested in these four debug sections.
786 // The section names in the .dwo file end with ".dwo", but we
787 // add them to the sections table with their normal names.
788 if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
789 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
790 sections->insert(std::make_pair(
791 ".debug_abbrev",
792 std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
793 section_size)));
794 } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
795 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
796 sections->insert(std::make_pair(
797 ".debug_info",
798 std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
799 section_size)));
800 } else if (!strncmp(section_name, ".debug_str_offsets",
801 strlen(".debug_str_offsets"))) {
802 section_data = elf_reader_->GetSectionByIndex(shndx, §ion_size);
803 sections->insert(std::make_pair(
804 ".debug_str_offsets",
805 std::make_pair(reinterpret_cast<const uint8_t *> (section_data),
806 section_size)));
807 }
808 }
809 sections->insert(std::make_pair(
810 ".debug_str",
811 std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
812 string_buffer_size_)));
813 } else if (version_ == 2) {
814 uint32_t index = LookupCUv2(dwo_id);
815 if (index == 0) {
816 return;
817 }
818
819 // The index points to a row in each of the section offsets table
820 // and the section size table, where we can read the offsets and sizes
821 // of the contributions to each debug section from the CU whose dwo_id
822 // we are looking for. Row 0 of the section offsets table has the
823 // section ids for each column of the table. The size table begins
824 // with row 1.
825 const char* id_row = offset_table_;
826 const char* offset_row = offset_table_
827 + index * ncolumns_ * sizeof(uint32_t);
828 const char* size_row =
829 size_table_ + (index - 1) * ncolumns_ * sizeof(uint32_t);
830 if (size_row + ncolumns_ * sizeof(uint32_t) > cu_index_ + cu_index_size_) {
831 version_ = 0;
832 return;
833 }
834 for (unsigned int col = 0u; col < ncolumns_; ++col) {
835 uint32_t section_id =
836 byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t *>(id_row)
837 + col * sizeof(uint32_t));
838 uint32_t offset = byte_reader_.ReadFourBytes(
839 reinterpret_cast<const uint8_t *>(offset_row)
840 + col * sizeof(uint32_t));
841 uint32_t size = byte_reader_.ReadFourBytes(
842 reinterpret_cast<const uint8_t *>(size_row) + col * sizeof(uint32_t));
843 if (section_id == DW_SECT_ABBREV) {
844 sections->insert(std::make_pair(
845 ".debug_abbrev",
846 std::make_pair(reinterpret_cast<const uint8_t *> (abbrev_data_)
847 + offset, size)));
848 } else if (section_id == DW_SECT_INFO) {
849 sections->insert(std::make_pair(
850 ".debug_info",
851 std::make_pair(reinterpret_cast<const uint8_t *> (info_data_)
852 + offset, size)));
853 } else if (section_id == DW_SECT_STR_OFFSETS) {
854 sections->insert(std::make_pair(
855 ".debug_str_offsets",
856 std::make_pair(reinterpret_cast<const uint8_t *> (str_offsets_data_)
857 + offset, size)));
858 }
859 }
860 sections->insert(std::make_pair(
861 ".debug_str",
862 std::make_pair(reinterpret_cast<const uint8_t *> (string_buffer_),
863 string_buffer_size_)));
864 }
865 }
866
LookupCU(uint64_t dwo_id)867 int DwpReader::LookupCU(uint64_t dwo_id) {
868 uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
869 uint64_t probe = byte_reader_.ReadEightBytes(
870 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
871 if (probe != 0 && probe != dwo_id) {
872 uint32_t secondary_hash =
873 (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
874 do {
875 slot = (slot + secondary_hash) & (nslots_ - 1);
876 probe = byte_reader_.ReadEightBytes(
877 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
878 } while (probe != 0 && probe != dwo_id);
879 }
880 if (probe == 0)
881 return -1;
882 return slot;
883 }
884
LookupCUv2(uint64_t dwo_id)885 uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) {
886 uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
887 uint64_t probe = byte_reader_.ReadEightBytes(
888 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
889 uint32_t index = byte_reader_.ReadFourBytes(
890 reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32_t));
891 if (index != 0 && probe != dwo_id) {
892 uint32_t secondary_hash =
893 (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
894 do {
895 slot = (slot + secondary_hash) & (nslots_ - 1);
896 probe = byte_reader_.ReadEightBytes(
897 reinterpret_cast<const uint8_t *>(phash_) + slot * sizeof(uint64_t));
898 index = byte_reader_.ReadFourBytes(
899 reinterpret_cast<const uint8_t *>(pindex_) + slot * sizeof(uint32_t));
900 } while (index != 0 && probe != dwo_id);
901 }
902 return index;
903 }
904
LineInfo(const uint8_t * buffer,uint64_t buffer_length,ByteReader * reader,LineInfoHandler * handler)905 LineInfo::LineInfo(const uint8_t *buffer, uint64_t buffer_length,
906 ByteReader* reader, LineInfoHandler* handler):
907 handler_(handler), reader_(reader), buffer_(buffer) {
908 #ifndef NDEBUG
909 buffer_length_ = buffer_length;
910 #endif
911 header_.std_opcode_lengths = NULL;
912 }
913
Start()914 uint64_t LineInfo::Start() {
915 ReadHeader();
916 ReadLines();
917 return after_header_ - buffer_;
918 }
919
920 // The header for a debug_line section is mildly complicated, because
921 // the line info is very tightly encoded.
ReadHeader()922 void LineInfo::ReadHeader() {
923 const uint8_t *lineptr = buffer_;
924 size_t initial_length_size;
925
926 const uint64_t initial_length
927 = reader_->ReadInitialLength(lineptr, &initial_length_size);
928
929 lineptr += initial_length_size;
930 header_.total_length = initial_length;
931 assert(buffer_ + initial_length_size + header_.total_length <=
932 buffer_ + buffer_length_);
933
934 // Address size *must* be set by CU ahead of time.
935 assert(reader_->AddressSize() != 0);
936
937 header_.version = reader_->ReadTwoBytes(lineptr);
938 lineptr += 2;
939
940 header_.prologue_length = reader_->ReadOffset(lineptr);
941 lineptr += reader_->OffsetSize();
942
943 header_.min_insn_length = reader_->ReadOneByte(lineptr);
944 lineptr += 1;
945
946 if (header_.version >= 4) {
947 __attribute__((unused)) uint8_t max_ops_per_insn =
948 reader_->ReadOneByte(lineptr);
949 ++lineptr;
950 assert(max_ops_per_insn == 1);
951 }
952
953 header_.default_is_stmt = reader_->ReadOneByte(lineptr);
954 lineptr += 1;
955
956 header_.line_base = *reinterpret_cast<const int8_t*>(lineptr);
957 lineptr += 1;
958
959 header_.line_range = reader_->ReadOneByte(lineptr);
960 lineptr += 1;
961
962 header_.opcode_base = reader_->ReadOneByte(lineptr);
963 lineptr += 1;
964
965 header_.std_opcode_lengths = new std::vector<unsigned char>;
966 header_.std_opcode_lengths->resize(header_.opcode_base + 1);
967 (*header_.std_opcode_lengths)[0] = 0;
968 for (int i = 1; i < header_.opcode_base; i++) {
969 (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
970 lineptr += 1;
971 }
972
973 // It is legal for the directory entry table to be empty.
974 if (*lineptr) {
975 uint32_t dirindex = 1;
976 while (*lineptr) {
977 const char *dirname = reinterpret_cast<const char *>(lineptr);
978 handler_->DefineDir(dirname, dirindex);
979 lineptr += strlen(dirname) + 1;
980 dirindex++;
981 }
982 }
983 lineptr++;
984
985 // It is also legal for the file entry table to be empty.
986 if (*lineptr) {
987 uint32_t fileindex = 1;
988 size_t len;
989 while (*lineptr) {
990 const char *filename = reinterpret_cast<const char *>(lineptr);
991 lineptr += strlen(filename) + 1;
992
993 uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
994 lineptr += len;
995
996 uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
997 lineptr += len;
998
999 uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
1000 lineptr += len;
1001 handler_->DefineFile(filename, fileindex, static_cast<uint32_t>(dirindex),
1002 mod_time, filelength);
1003 fileindex++;
1004 }
1005 }
1006 lineptr++;
1007
1008 after_header_ = lineptr;
1009 }
1010
1011 /* static */
ProcessOneOpcode(ByteReader * reader,LineInfoHandler * handler,const struct LineInfoHeader & header,const uint8_t * start,struct LineStateMachine * lsm,size_t * len,uintptr pc,bool * lsm_passes_pc)1012 bool LineInfo::ProcessOneOpcode(ByteReader* reader,
1013 LineInfoHandler* handler,
1014 const struct LineInfoHeader &header,
1015 const uint8_t *start,
1016 struct LineStateMachine* lsm,
1017 size_t* len,
1018 uintptr pc,
1019 bool *lsm_passes_pc) {
1020 size_t oplen = 0;
1021 size_t templen;
1022 uint8_t opcode = reader->ReadOneByte(start);
1023 oplen++;
1024 start++;
1025
1026 // If the opcode is great than the opcode_base, it is a special
1027 // opcode. Most line programs consist mainly of special opcodes.
1028 if (opcode >= header.opcode_base) {
1029 opcode -= header.opcode_base;
1030 const int64_t advance_address = (opcode / header.line_range)
1031 * header.min_insn_length;
1032 const int32_t advance_line = (opcode % header.line_range)
1033 + header.line_base;
1034
1035 // Check if the lsm passes "pc". If so, mark it as passed.
1036 if (lsm_passes_pc &&
1037 lsm->address <= pc && pc < lsm->address + advance_address) {
1038 *lsm_passes_pc = true;
1039 }
1040
1041 lsm->address += advance_address;
1042 lsm->line_num += advance_line;
1043 lsm->basic_block = true;
1044 *len = oplen;
1045 return true;
1046 }
1047
1048 // Otherwise, we have the regular opcodes
1049 switch (opcode) {
1050 case DW_LNS_copy: {
1051 lsm->basic_block = false;
1052 *len = oplen;
1053 return true;
1054 }
1055
1056 case DW_LNS_advance_pc: {
1057 uint64_t advance_address = reader->ReadUnsignedLEB128(start, &templen);
1058 oplen += templen;
1059
1060 // Check if the lsm passes "pc". If so, mark it as passed.
1061 if (lsm_passes_pc && lsm->address <= pc &&
1062 pc < lsm->address + header.min_insn_length * advance_address) {
1063 *lsm_passes_pc = true;
1064 }
1065
1066 lsm->address += header.min_insn_length * advance_address;
1067 }
1068 break;
1069 case DW_LNS_advance_line: {
1070 const int64_t advance_line = reader->ReadSignedLEB128(start, &templen);
1071 oplen += templen;
1072 lsm->line_num += static_cast<int32_t>(advance_line);
1073
1074 // With gcc 4.2.1, we can get the line_no here for the first time
1075 // since DW_LNS_advance_line is called after DW_LNE_set_address is
1076 // called. So we check if the lsm passes "pc" here, not in
1077 // DW_LNE_set_address.
1078 if (lsm_passes_pc && lsm->address == pc) {
1079 *lsm_passes_pc = true;
1080 }
1081 }
1082 break;
1083 case DW_LNS_set_file: {
1084 const uint64_t fileno = reader->ReadUnsignedLEB128(start, &templen);
1085 oplen += templen;
1086 lsm->file_num = static_cast<uint32_t>(fileno);
1087 }
1088 break;
1089 case DW_LNS_set_column: {
1090 const uint64_t colno = reader->ReadUnsignedLEB128(start, &templen);
1091 oplen += templen;
1092 lsm->column_num = static_cast<uint32_t>(colno);
1093 }
1094 break;
1095 case DW_LNS_negate_stmt: {
1096 lsm->is_stmt = !lsm->is_stmt;
1097 }
1098 break;
1099 case DW_LNS_set_basic_block: {
1100 lsm->basic_block = true;
1101 }
1102 break;
1103 case DW_LNS_fixed_advance_pc: {
1104 const uint16_t advance_address = reader->ReadTwoBytes(start);
1105 oplen += 2;
1106
1107 // Check if the lsm passes "pc". If so, mark it as passed.
1108 if (lsm_passes_pc &&
1109 lsm->address <= pc && pc < lsm->address + advance_address) {
1110 *lsm_passes_pc = true;
1111 }
1112
1113 lsm->address += advance_address;
1114 }
1115 break;
1116 case DW_LNS_const_add_pc: {
1117 const int64_t advance_address = header.min_insn_length
1118 * ((255 - header.opcode_base)
1119 / header.line_range);
1120
1121 // Check if the lsm passes "pc". If so, mark it as passed.
1122 if (lsm_passes_pc &&
1123 lsm->address <= pc && pc < lsm->address + advance_address) {
1124 *lsm_passes_pc = true;
1125 }
1126
1127 lsm->address += advance_address;
1128 }
1129 break;
1130 case DW_LNS_extended_op: {
1131 const uint64_t extended_op_len = reader->ReadUnsignedLEB128(start,
1132 &templen);
1133 start += templen;
1134 oplen += templen + extended_op_len;
1135
1136 const uint64_t extended_op = reader->ReadOneByte(start);
1137 start++;
1138
1139 switch (extended_op) {
1140 case DW_LNE_end_sequence: {
1141 lsm->end_sequence = true;
1142 *len = oplen;
1143 return true;
1144 }
1145 break;
1146 case DW_LNE_set_address: {
1147 // With gcc 4.2.1, we cannot tell the line_no here since
1148 // DW_LNE_set_address is called before DW_LNS_advance_line is
1149 // called. So we do not check if the lsm passes "pc" here. See
1150 // also the comment in DW_LNS_advance_line.
1151 uint64_t address = reader->ReadAddress(start);
1152 lsm->address = address;
1153 }
1154 break;
1155 case DW_LNE_define_file: {
1156 const char *filename = reinterpret_cast<const char *>(start);
1157
1158 templen = strlen(filename) + 1;
1159 start += templen;
1160
1161 uint64_t dirindex = reader->ReadUnsignedLEB128(start, &templen);
1162 oplen += templen;
1163
1164 const uint64_t mod_time = reader->ReadUnsignedLEB128(start,
1165 &templen);
1166 oplen += templen;
1167
1168 const uint64_t filelength = reader->ReadUnsignedLEB128(start,
1169 &templen);
1170 oplen += templen;
1171
1172 if (handler) {
1173 handler->DefineFile(filename, -1, static_cast<uint32_t>(dirindex),
1174 mod_time, filelength);
1175 }
1176 }
1177 break;
1178 }
1179 }
1180 break;
1181
1182 default: {
1183 // Ignore unknown opcode silently
1184 if (header.std_opcode_lengths) {
1185 for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
1186 reader->ReadUnsignedLEB128(start, &templen);
1187 start += templen;
1188 oplen += templen;
1189 }
1190 }
1191 }
1192 break;
1193 }
1194 *len = oplen;
1195 return false;
1196 }
1197
ReadLines()1198 void LineInfo::ReadLines() {
1199 struct LineStateMachine lsm;
1200
1201 // lengthstart is the place the length field is based on.
1202 // It is the point in the header after the initial length field
1203 const uint8_t *lengthstart = buffer_;
1204
1205 // In 64 bit dwarf, the initial length is 12 bytes, because of the
1206 // 0xffffffff at the start.
1207 if (reader_->OffsetSize() == 8)
1208 lengthstart += 12;
1209 else
1210 lengthstart += 4;
1211
1212 const uint8_t *lineptr = after_header_;
1213 lsm.Reset(header_.default_is_stmt);
1214
1215 // The LineInfoHandler interface expects each line's length along
1216 // with its address, but DWARF only provides addresses (sans
1217 // length), and an end-of-sequence address; one infers the length
1218 // from the next address. So we report a line only when we get the
1219 // next line's address, or the end-of-sequence address.
1220 bool have_pending_line = false;
1221 uint64_t pending_address = 0;
1222 uint32_t pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
1223
1224 while (lineptr < lengthstart + header_.total_length) {
1225 size_t oplength;
1226 bool add_row = ProcessOneOpcode(reader_, handler_, header_,
1227 lineptr, &lsm, &oplength, (uintptr)-1,
1228 NULL);
1229 if (add_row) {
1230 if (have_pending_line)
1231 handler_->AddLine(pending_address, lsm.address - pending_address,
1232 pending_file_num, pending_line_num,
1233 pending_column_num);
1234 if (lsm.end_sequence) {
1235 lsm.Reset(header_.default_is_stmt);
1236 have_pending_line = false;
1237 } else {
1238 pending_address = lsm.address;
1239 pending_file_num = lsm.file_num;
1240 pending_line_num = lsm.line_num;
1241 pending_column_num = lsm.column_num;
1242 have_pending_line = true;
1243 }
1244 }
1245 lineptr += oplength;
1246 }
1247
1248 after_header_ = lengthstart + header_.total_length;
1249 }
1250
RangeListReader(const uint8_t * buffer,uint64_t size,ByteReader * reader,RangeListHandler * handler)1251 RangeListReader::RangeListReader(const uint8_t *buffer, uint64_t size,
1252 ByteReader *reader, RangeListHandler *handler)
1253 : buffer_(buffer), size_(size), reader_(reader), handler_(handler) { }
1254
ReadRangeList(uint64_t offset)1255 bool RangeListReader::ReadRangeList(uint64_t offset) {
1256 const uint64_t max_address =
1257 (reader_->AddressSize() == 4) ? 0xffffffffUL
1258 : 0xffffffffffffffffULL;
1259 const uint64_t entry_size = reader_->AddressSize() * 2;
1260 bool list_end = false;
1261
1262 do {
1263 if (offset > size_ - entry_size) {
1264 return false; // Invalid range detected
1265 }
1266
1267 uint64_t start_address = reader_->ReadAddress(buffer_ + offset);
1268 uint64_t end_address =
1269 reader_->ReadAddress(buffer_ + offset + reader_->AddressSize());
1270
1271 if (start_address == max_address) { // Base address selection
1272 handler_->SetBaseAddress(end_address);
1273 } else if (start_address == 0 && end_address == 0) { // End-of-list
1274 handler_->Finish();
1275 list_end = true;
1276 } else { // Add a range entry
1277 handler_->AddRange(start_address, end_address);
1278 }
1279
1280 offset += entry_size;
1281 } while (!list_end);
1282
1283 return true;
1284 }
1285
1286 // A DWARF rule for recovering the address or value of a register, or
1287 // computing the canonical frame address. There is one subclass of this for
1288 // each '*Rule' member function in CallFrameInfo::Handler.
1289 //
1290 // It's annoying that we have to handle Rules using pointers (because
1291 // the concrete instances can have an arbitrary size). They're small,
1292 // so it would be much nicer if we could just handle them by value
1293 // instead of fretting about ownership and destruction.
1294 //
1295 // It seems like all these could simply be instances of std::tr1::bind,
1296 // except that we need instances to be EqualityComparable, too.
1297 //
1298 // This could logically be nested within State, but then the qualified names
1299 // get horrendous.
1300 class CallFrameInfo::Rule {
1301 public:
~Rule()1302 virtual ~Rule() { }
1303
1304 // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
1305 // this rule. If REG is kCFARegister, then this rule describes how to compute
1306 // the canonical frame address. Return what the HANDLER member function
1307 // returned.
1308 virtual bool Handle(Handler *handler,
1309 uint64_t address, int reg) const = 0;
1310
1311 // Equality on rules. We use these to decide which rules we need
1312 // to report after a DW_CFA_restore_state instruction.
1313 virtual bool operator==(const Rule &rhs) const = 0;
1314
operator !=(const Rule & rhs) const1315 bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
1316
1317 // Return a pointer to a copy of this rule.
1318 virtual Rule *Copy() const = 0;
1319
1320 // If this is a base+offset rule, change its base register to REG.
1321 // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
SetBaseRegister(unsigned reg)1322 virtual void SetBaseRegister(unsigned reg) { }
1323
1324 // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
1325 // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
SetOffset(long long offset)1326 virtual void SetOffset(long long offset) { }
1327 };
1328
1329 // Rule: the value the register had in the caller cannot be recovered.
1330 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
1331 public:
UndefinedRule()1332 UndefinedRule() { }
~UndefinedRule()1333 ~UndefinedRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1334 bool Handle(Handler *handler, uint64_t address, int reg) const {
1335 return handler->UndefinedRule(address, reg);
1336 }
operator ==(const Rule & rhs) const1337 bool operator==(const Rule &rhs) const {
1338 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1339 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1340 const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs);
1341 return (our_rhs != NULL);
1342 }
Copy() const1343 Rule *Copy() const { return new UndefinedRule(*this); }
1344 };
1345
1346 // Rule: the register's value is the same as that it had in the caller.
1347 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
1348 public:
SameValueRule()1349 SameValueRule() { }
~SameValueRule()1350 ~SameValueRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1351 bool Handle(Handler *handler, uint64_t address, int reg) const {
1352 return handler->SameValueRule(address, reg);
1353 }
operator ==(const Rule & rhs) const1354 bool operator==(const Rule &rhs) const {
1355 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1356 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1357 const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs);
1358 return (our_rhs != NULL);
1359 }
Copy() const1360 Rule *Copy() const { return new SameValueRule(*this); }
1361 };
1362
1363 // Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
1364 // may be CallFrameInfo::Handler::kCFARegister.
1365 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
1366 public:
OffsetRule(int base_register,long offset)1367 OffsetRule(int base_register, long offset)
1368 : base_register_(base_register), offset_(offset) { }
~OffsetRule()1369 ~OffsetRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1370 bool Handle(Handler *handler, uint64_t address, int reg) const {
1371 return handler->OffsetRule(address, reg, base_register_, offset_);
1372 }
operator ==(const Rule & rhs) const1373 bool operator==(const Rule &rhs) const {
1374 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1375 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1376 const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs);
1377 return (our_rhs &&
1378 base_register_ == our_rhs->base_register_ &&
1379 offset_ == our_rhs->offset_);
1380 }
Copy() const1381 Rule *Copy() const { return new OffsetRule(*this); }
1382 // We don't actually need SetBaseRegister or SetOffset here, since they
1383 // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
1384 // doesn't make sense to use OffsetRule for computing the CFA: it
1385 // computes the address at which a register is saved, not a value.
1386 private:
1387 int base_register_;
1388 long offset_;
1389 };
1390
1391 // Rule: the value the register had in the caller is the value of
1392 // BASE_REGISTER plus offset. BASE_REGISTER may be
1393 // CallFrameInfo::Handler::kCFARegister.
1394 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
1395 public:
ValOffsetRule(int base_register,long offset)1396 ValOffsetRule(int base_register, long offset)
1397 : base_register_(base_register), offset_(offset) { }
~ValOffsetRule()1398 ~ValOffsetRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1399 bool Handle(Handler *handler, uint64_t address, int reg) const {
1400 return handler->ValOffsetRule(address, reg, base_register_, offset_);
1401 }
operator ==(const Rule & rhs) const1402 bool operator==(const Rule &rhs) const {
1403 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1404 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1405 const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs);
1406 return (our_rhs &&
1407 base_register_ == our_rhs->base_register_ &&
1408 offset_ == our_rhs->offset_);
1409 }
Copy() const1410 Rule *Copy() const { return new ValOffsetRule(*this); }
SetBaseRegister(unsigned reg)1411 void SetBaseRegister(unsigned reg) { base_register_ = reg; }
SetOffset(long long offset)1412 void SetOffset(long long offset) { offset_ = offset; }
1413 private:
1414 int base_register_;
1415 long offset_;
1416 };
1417
1418 // Rule: the register has been saved in another register REGISTER_NUMBER_.
1419 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
1420 public:
RegisterRule(int register_number)1421 explicit RegisterRule(int register_number)
1422 : register_number_(register_number) { }
~RegisterRule()1423 ~RegisterRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1424 bool Handle(Handler *handler, uint64_t address, int reg) const {
1425 return handler->RegisterRule(address, reg, register_number_);
1426 }
operator ==(const Rule & rhs) const1427 bool operator==(const Rule &rhs) const {
1428 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1429 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1430 const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs);
1431 return (our_rhs && register_number_ == our_rhs->register_number_);
1432 }
Copy() const1433 Rule *Copy() const { return new RegisterRule(*this); }
1434 private:
1435 int register_number_;
1436 };
1437
1438 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1439 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
1440 public:
ExpressionRule(const string & expression)1441 explicit ExpressionRule(const string &expression)
1442 : expression_(expression) { }
~ExpressionRule()1443 ~ExpressionRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1444 bool Handle(Handler *handler, uint64_t address, int reg) const {
1445 return handler->ExpressionRule(address, reg, expression_);
1446 }
operator ==(const Rule & rhs) const1447 bool operator==(const Rule &rhs) const {
1448 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1449 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1450 const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs);
1451 return (our_rhs && expression_ == our_rhs->expression_);
1452 }
Copy() const1453 Rule *Copy() const { return new ExpressionRule(*this); }
1454 private:
1455 string expression_;
1456 };
1457
1458 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1459 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
1460 public:
ValExpressionRule(const string & expression)1461 explicit ValExpressionRule(const string &expression)
1462 : expression_(expression) { }
~ValExpressionRule()1463 ~ValExpressionRule() { }
Handle(Handler * handler,uint64_t address,int reg) const1464 bool Handle(Handler *handler, uint64_t address, int reg) const {
1465 return handler->ValExpressionRule(address, reg, expression_);
1466 }
operator ==(const Rule & rhs) const1467 bool operator==(const Rule &rhs) const {
1468 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1469 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1470 const ValExpressionRule *our_rhs =
1471 dynamic_cast<const ValExpressionRule *>(&rhs);
1472 return (our_rhs && expression_ == our_rhs->expression_);
1473 }
Copy() const1474 Rule *Copy() const { return new ValExpressionRule(*this); }
1475 private:
1476 string expression_;
1477 };
1478
1479 // A map from register numbers to rules.
1480 class CallFrameInfo::RuleMap {
1481 public:
RuleMap()1482 RuleMap() : cfa_rule_(NULL) { }
RuleMap(const RuleMap & rhs)1483 RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
~RuleMap()1484 ~RuleMap() { Clear(); }
1485
1486 RuleMap &operator=(const RuleMap &rhs);
1487
1488 // Set the rule for computing the CFA to RULE. Take ownership of RULE.
SetCFARule(Rule * rule)1489 void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
1490
1491 // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
1492 // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
1493 // DW_CFA_def_cfa_register, and for detecting references to the CFA before
1494 // a rule for it has been established.
CFARule() const1495 Rule *CFARule() const { return cfa_rule_; }
1496
1497 // Return the rule for REG, or NULL if there is none. The caller takes
1498 // ownership of the result.
1499 Rule *RegisterRule(int reg) const;
1500
1501 // Set the rule for computing REG to RULE. Take ownership of RULE.
1502 void SetRegisterRule(int reg, Rule *rule);
1503
1504 // Make all the appropriate calls to HANDLER as if we were changing from
1505 // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
1506 // DW_CFA_restore_state, where lots of rules can change simultaneously.
1507 // Return true if all handlers returned true; otherwise, return false.
1508 bool HandleTransitionTo(Handler *handler, uint64_t address,
1509 const RuleMap &new_rules) const;
1510
1511 private:
1512 // A map from register numbers to Rules.
1513 typedef std::map<int, Rule *> RuleByNumber;
1514
1515 // Remove all register rules and clear cfa_rule_.
1516 void Clear();
1517
1518 // The rule for computing the canonical frame address. This RuleMap owns
1519 // this rule.
1520 Rule *cfa_rule_;
1521
1522 // A map from register numbers to postfix expressions to recover
1523 // their values. This RuleMap owns the Rules the map refers to.
1524 RuleByNumber registers_;
1525 };
1526
operator =(const RuleMap & rhs)1527 CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
1528 Clear();
1529 // Since each map owns the rules it refers to, assignment must copy them.
1530 if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
1531 for (RuleByNumber::const_iterator it = rhs.registers_.begin();
1532 it != rhs.registers_.end(); it++)
1533 registers_[it->first] = it->second->Copy();
1534 return *this;
1535 }
1536
RegisterRule(int reg) const1537 CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
1538 assert(reg != Handler::kCFARegister);
1539 RuleByNumber::const_iterator it = registers_.find(reg);
1540 if (it != registers_.end())
1541 return it->second->Copy();
1542 else
1543 return NULL;
1544 }
1545
SetRegisterRule(int reg,Rule * rule)1546 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
1547 assert(reg != Handler::kCFARegister);
1548 assert(rule);
1549 Rule **slot = ®isters_[reg];
1550 delete *slot;
1551 *slot = rule;
1552 }
1553
HandleTransitionTo(Handler * handler,uint64_t address,const RuleMap & new_rules) const1554 bool CallFrameInfo::RuleMap::HandleTransitionTo(
1555 Handler *handler,
1556 uint64_t address,
1557 const RuleMap &new_rules) const {
1558 // Transition from cfa_rule_ to new_rules.cfa_rule_.
1559 if (cfa_rule_ && new_rules.cfa_rule_) {
1560 if (*cfa_rule_ != *new_rules.cfa_rule_ &&
1561 !new_rules.cfa_rule_->Handle(handler, address,
1562 Handler::kCFARegister))
1563 return false;
1564 } else if (cfa_rule_) {
1565 // this RuleMap has a CFA rule but new_rules doesn't.
1566 // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
1567 // it's garbage input. The instruction interpreter should have
1568 // detected this and warned, so take no action here.
1569 } else if (new_rules.cfa_rule_) {
1570 // This shouldn't be possible: NEW_RULES is some prior state, and
1571 // there's no way to remove entries.
1572 assert(0);
1573 } else {
1574 // Both CFA rules are empty. No action needed.
1575 }
1576
1577 // Traverse the two maps in order by register number, and report
1578 // whatever differences we find.
1579 RuleByNumber::const_iterator old_it = registers_.begin();
1580 RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
1581 while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
1582 if (old_it->first < new_it->first) {
1583 // This RuleMap has an entry for old_it->first, but NEW_RULES
1584 // doesn't.
1585 //
1586 // This isn't really the right thing to do, but since CFI generally
1587 // only mentions callee-saves registers, and GCC's convention for
1588 // callee-saves registers is that they are unchanged, it's a good
1589 // approximation.
1590 if (!handler->SameValueRule(address, old_it->first))
1591 return false;
1592 old_it++;
1593 } else if (old_it->first > new_it->first) {
1594 // NEW_RULES has entry for new_it->first, but this RuleMap
1595 // doesn't. This shouldn't be possible: NEW_RULES is some prior
1596 // state, and there's no way to remove entries.
1597 assert(0);
1598 } else {
1599 // Both maps have an entry for this register. Report the new
1600 // rule if it is different.
1601 if (*old_it->second != *new_it->second &&
1602 !new_it->second->Handle(handler, address, new_it->first))
1603 return false;
1604 new_it++, old_it++;
1605 }
1606 }
1607 // Finish off entries from this RuleMap with no counterparts in new_rules.
1608 while (old_it != registers_.end()) {
1609 if (!handler->SameValueRule(address, old_it->first))
1610 return false;
1611 old_it++;
1612 }
1613 // Since we only make transitions from a rule set to some previously
1614 // saved rule set, and we can only add rules to the map, NEW_RULES
1615 // must have fewer rules than *this.
1616 assert(new_it == new_rules.registers_.end());
1617
1618 return true;
1619 }
1620
1621 // Remove all register rules and clear cfa_rule_.
Clear()1622 void CallFrameInfo::RuleMap::Clear() {
1623 delete cfa_rule_;
1624 cfa_rule_ = NULL;
1625 for (RuleByNumber::iterator it = registers_.begin();
1626 it != registers_.end(); it++)
1627 delete it->second;
1628 registers_.clear();
1629 }
1630
1631 // The state of the call frame information interpreter as it processes
1632 // instructions from a CIE and FDE.
1633 class CallFrameInfo::State {
1634 public:
1635 // Create a call frame information interpreter state with the given
1636 // reporter, reader, handler, and initial call frame info address.
State(ByteReader * reader,Handler * handler,Reporter * reporter,uint64_t address)1637 State(ByteReader *reader, Handler *handler, Reporter *reporter,
1638 uint64_t address)
1639 : reader_(reader), handler_(handler), reporter_(reporter),
1640 address_(address), entry_(NULL), cursor_(NULL) { }
1641
1642 // Interpret instructions from CIE, save the resulting rule set for
1643 // DW_CFA_restore instructions, and return true. On error, report
1644 // the problem to reporter_ and return false.
1645 bool InterpretCIE(const CIE &cie);
1646
1647 // Interpret instructions from FDE, and return true. On error,
1648 // report the problem to reporter_ and return false.
1649 bool InterpretFDE(const FDE &fde);
1650
1651 private:
1652 // The operands of a CFI instruction, for ParseOperands.
1653 struct Operands {
1654 unsigned register_number; // A register number.
1655 uint64_t offset; // An offset or address.
1656 long signed_offset; // A signed offset.
1657 string expression; // A DWARF expression.
1658 };
1659
1660 // Parse CFI instruction operands from STATE's instruction stream as
1661 // described by FORMAT. On success, populate OPERANDS with the
1662 // results, and return true. On failure, report the problem and
1663 // return false.
1664 //
1665 // Each character of FORMAT should be one of the following:
1666 //
1667 // 'r' unsigned LEB128 register number (OPERANDS->register_number)
1668 // 'o' unsigned LEB128 offset (OPERANDS->offset)
1669 // 's' signed LEB128 offset (OPERANDS->signed_offset)
1670 // 'a' machine-size address (OPERANDS->offset)
1671 // (If the CIE has a 'z' augmentation string, 'a' uses the
1672 // encoding specified by the 'R' argument.)
1673 // '1' a one-byte offset (OPERANDS->offset)
1674 // '2' a two-byte offset (OPERANDS->offset)
1675 // '4' a four-byte offset (OPERANDS->offset)
1676 // '8' an eight-byte offset (OPERANDS->offset)
1677 // 'e' a DW_FORM_block holding a (OPERANDS->expression)
1678 // DWARF expression
1679 bool ParseOperands(const char *format, Operands *operands);
1680
1681 // Interpret one CFI instruction from STATE's instruction stream, update
1682 // STATE, report any rule changes to handler_, and return true. On
1683 // failure, report the problem and return false.
1684 bool DoInstruction();
1685
1686 // The following Do* member functions are subroutines of DoInstruction,
1687 // factoring out the actual work of operations that have several
1688 // different encodings.
1689
1690 // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
1691 // return true. On failure, report and return false. (Used for
1692 // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
1693 bool DoDefCFA(unsigned base_register, long offset);
1694
1695 // Change the offset of the CFA rule to OFFSET, and return true. On
1696 // failure, report and return false. (Subroutine for
1697 // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
1698 bool DoDefCFAOffset(long offset);
1699
1700 // Specify that REG can be recovered using RULE, and return true. On
1701 // failure, report and return false.
1702 bool DoRule(unsigned reg, Rule *rule);
1703
1704 // Specify that REG can be found at OFFSET from the CFA, and return true.
1705 // On failure, report and return false. (Subroutine for DW_CFA_offset,
1706 // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
1707 bool DoOffset(unsigned reg, long offset);
1708
1709 // Specify that the caller's value for REG is the CFA plus OFFSET,
1710 // and return true. On failure, report and return false. (Subroutine
1711 // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
1712 bool DoValOffset(unsigned reg, long offset);
1713
1714 // Restore REG to the rule established in the CIE, and return true. On
1715 // failure, report and return false. (Subroutine for DW_CFA_restore and
1716 // DW_CFA_restore_extended.)
1717 bool DoRestore(unsigned reg);
1718
1719 // Return the section offset of the instruction at cursor. For use
1720 // in error messages.
CursorOffset()1721 uint64_t CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
1722
1723 // Report that entry_ is incomplete, and return false. For brevity.
ReportIncomplete()1724 bool ReportIncomplete() {
1725 reporter_->Incomplete(entry_->offset, entry_->kind);
1726 return false;
1727 }
1728
1729 // For reading multi-byte values with the appropriate endianness.
1730 ByteReader *reader_;
1731
1732 // The handler to which we should report the data we find.
1733 Handler *handler_;
1734
1735 // For reporting problems in the info we're parsing.
1736 Reporter *reporter_;
1737
1738 // The code address to which the next instruction in the stream applies.
1739 uint64_t address_;
1740
1741 // The entry whose instructions we are currently processing. This is
1742 // first a CIE, and then an FDE.
1743 const Entry *entry_;
1744
1745 // The next instruction to process.
1746 const uint8_t *cursor_;
1747
1748 // The current set of rules.
1749 RuleMap rules_;
1750
1751 // The set of rules established by the CIE, used by DW_CFA_restore
1752 // and DW_CFA_restore_extended. We set this after interpreting the
1753 // CIE's instructions.
1754 RuleMap cie_rules_;
1755
1756 // A stack of saved states, for DW_CFA_remember_state and
1757 // DW_CFA_restore_state.
1758 std::stack<RuleMap> saved_rules_;
1759 };
1760
InterpretCIE(const CIE & cie)1761 bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
1762 entry_ = &cie;
1763 cursor_ = entry_->instructions;
1764 while (cursor_ < entry_->end)
1765 if (!DoInstruction())
1766 return false;
1767 // Note the rules established by the CIE, for use by DW_CFA_restore
1768 // and DW_CFA_restore_extended.
1769 cie_rules_ = rules_;
1770 return true;
1771 }
1772
InterpretFDE(const FDE & fde)1773 bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
1774 entry_ = &fde;
1775 cursor_ = entry_->instructions;
1776 while (cursor_ < entry_->end)
1777 if (!DoInstruction())
1778 return false;
1779 return true;
1780 }
1781
ParseOperands(const char * format,Operands * operands)1782 bool CallFrameInfo::State::ParseOperands(const char *format,
1783 Operands *operands) {
1784 size_t len;
1785 const char *operand;
1786
1787 for (operand = format; *operand; operand++) {
1788 size_t bytes_left = entry_->end - cursor_;
1789 switch (*operand) {
1790 case 'r':
1791 operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
1792 if (len > bytes_left) return ReportIncomplete();
1793 cursor_ += len;
1794 break;
1795
1796 case 'o':
1797 operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
1798 if (len > bytes_left) return ReportIncomplete();
1799 cursor_ += len;
1800 break;
1801
1802 case 's':
1803 operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
1804 if (len > bytes_left) return ReportIncomplete();
1805 cursor_ += len;
1806 break;
1807
1808 case 'a':
1809 operands->offset =
1810 reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
1811 &len);
1812 if (len > bytes_left) return ReportIncomplete();
1813 cursor_ += len;
1814 break;
1815
1816 case '1':
1817 if (1 > bytes_left) return ReportIncomplete();
1818 operands->offset = static_cast<unsigned char>(*cursor_++);
1819 break;
1820
1821 case '2':
1822 if (2 > bytes_left) return ReportIncomplete();
1823 operands->offset = reader_->ReadTwoBytes(cursor_);
1824 cursor_ += 2;
1825 break;
1826
1827 case '4':
1828 if (4 > bytes_left) return ReportIncomplete();
1829 operands->offset = reader_->ReadFourBytes(cursor_);
1830 cursor_ += 4;
1831 break;
1832
1833 case '8':
1834 if (8 > bytes_left) return ReportIncomplete();
1835 operands->offset = reader_->ReadEightBytes(cursor_);
1836 cursor_ += 8;
1837 break;
1838
1839 case 'e': {
1840 size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
1841 if (len > bytes_left || expression_length > bytes_left - len)
1842 return ReportIncomplete();
1843 cursor_ += len;
1844 operands->expression = string(reinterpret_cast<const char *>(cursor_),
1845 expression_length);
1846 cursor_ += expression_length;
1847 break;
1848 }
1849
1850 default:
1851 assert(0);
1852 }
1853 }
1854
1855 return true;
1856 }
1857
DoInstruction()1858 bool CallFrameInfo::State::DoInstruction() {
1859 CIE *cie = entry_->cie;
1860 Operands ops;
1861
1862 // Our entry's kind should have been set by now.
1863 assert(entry_->kind != kUnknown);
1864
1865 // We shouldn't have been invoked unless there were more
1866 // instructions to parse.
1867 assert(cursor_ < entry_->end);
1868
1869 unsigned opcode = *cursor_++;
1870 if ((opcode & 0xc0) != 0) {
1871 switch (opcode & 0xc0) {
1872 // Advance the address.
1873 case DW_CFA_advance_loc: {
1874 size_t code_offset = opcode & 0x3f;
1875 address_ += code_offset * cie->code_alignment_factor;
1876 break;
1877 }
1878
1879 // Find a register at an offset from the CFA.
1880 case DW_CFA_offset:
1881 if (!ParseOperands("o", &ops) ||
1882 !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
1883 return false;
1884 break;
1885
1886 // Restore the rule established for a register by the CIE.
1887 case DW_CFA_restore:
1888 if (!DoRestore(opcode & 0x3f)) return false;
1889 break;
1890
1891 // The 'if' above should have excluded this possibility.
1892 default:
1893 assert(0);
1894 }
1895
1896 // Return here, so the big switch below won't be indented.
1897 return true;
1898 }
1899
1900 switch (opcode) {
1901 // Set the address.
1902 case DW_CFA_set_loc:
1903 if (!ParseOperands("a", &ops)) return false;
1904 address_ = ops.offset;
1905 break;
1906
1907 // Advance the address.
1908 case DW_CFA_advance_loc1:
1909 if (!ParseOperands("1", &ops)) return false;
1910 address_ += ops.offset * cie->code_alignment_factor;
1911 break;
1912
1913 // Advance the address.
1914 case DW_CFA_advance_loc2:
1915 if (!ParseOperands("2", &ops)) return false;
1916 address_ += ops.offset * cie->code_alignment_factor;
1917 break;
1918
1919 // Advance the address.
1920 case DW_CFA_advance_loc4:
1921 if (!ParseOperands("4", &ops)) return false;
1922 address_ += ops.offset * cie->code_alignment_factor;
1923 break;
1924
1925 // Advance the address.
1926 case DW_CFA_MIPS_advance_loc8:
1927 if (!ParseOperands("8", &ops)) return false;
1928 address_ += ops.offset * cie->code_alignment_factor;
1929 break;
1930
1931 // Compute the CFA by adding an offset to a register.
1932 case DW_CFA_def_cfa:
1933 if (!ParseOperands("ro", &ops) ||
1934 !DoDefCFA(ops.register_number, ops.offset))
1935 return false;
1936 break;
1937
1938 // Compute the CFA by adding an offset to a register.
1939 case DW_CFA_def_cfa_sf:
1940 if (!ParseOperands("rs", &ops) ||
1941 !DoDefCFA(ops.register_number,
1942 ops.signed_offset * cie->data_alignment_factor))
1943 return false;
1944 break;
1945
1946 // Change the base register used to compute the CFA.
1947 case DW_CFA_def_cfa_register: {
1948 if (!ParseOperands("r", &ops)) return false;
1949 Rule *cfa_rule = rules_.CFARule();
1950 if (!cfa_rule) {
1951 if (!DoDefCFA(ops.register_number, ops.offset)) {
1952 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1953 return false;
1954 }
1955 } else {
1956 cfa_rule->SetBaseRegister(ops.register_number);
1957 if (!cfa_rule->Handle(handler_, address_,
1958 Handler::kCFARegister))
1959 return false;
1960 }
1961 break;
1962 }
1963
1964 // Change the offset used to compute the CFA.
1965 case DW_CFA_def_cfa_offset:
1966 if (!ParseOperands("o", &ops) ||
1967 !DoDefCFAOffset(ops.offset))
1968 return false;
1969 break;
1970
1971 // Change the offset used to compute the CFA.
1972 case DW_CFA_def_cfa_offset_sf:
1973 if (!ParseOperands("s", &ops) ||
1974 !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
1975 return false;
1976 break;
1977
1978 // Specify an expression whose value is the CFA.
1979 case DW_CFA_def_cfa_expression: {
1980 if (!ParseOperands("e", &ops))
1981 return false;
1982 Rule *rule = new ValExpressionRule(ops.expression);
1983 rules_.SetCFARule(rule);
1984 if (!rule->Handle(handler_, address_,
1985 Handler::kCFARegister))
1986 return false;
1987 break;
1988 }
1989
1990 // The register's value cannot be recovered.
1991 case DW_CFA_undefined: {
1992 if (!ParseOperands("r", &ops) ||
1993 !DoRule(ops.register_number, new UndefinedRule()))
1994 return false;
1995 break;
1996 }
1997
1998 // The register's value is unchanged from its value in the caller.
1999 case DW_CFA_same_value: {
2000 if (!ParseOperands("r", &ops) ||
2001 !DoRule(ops.register_number, new SameValueRule()))
2002 return false;
2003 break;
2004 }
2005
2006 // Find a register at an offset from the CFA.
2007 case DW_CFA_offset_extended:
2008 if (!ParseOperands("ro", &ops) ||
2009 !DoOffset(ops.register_number,
2010 ops.offset * cie->data_alignment_factor))
2011 return false;
2012 break;
2013
2014 // The register is saved at an offset from the CFA.
2015 case DW_CFA_offset_extended_sf:
2016 if (!ParseOperands("rs", &ops) ||
2017 !DoOffset(ops.register_number,
2018 ops.signed_offset * cie->data_alignment_factor))
2019 return false;
2020 break;
2021
2022 // The register is saved at an offset from the CFA.
2023 case DW_CFA_GNU_negative_offset_extended:
2024 if (!ParseOperands("ro", &ops) ||
2025 !DoOffset(ops.register_number,
2026 -ops.offset * cie->data_alignment_factor))
2027 return false;
2028 break;
2029
2030 // The register's value is the sum of the CFA plus an offset.
2031 case DW_CFA_val_offset:
2032 if (!ParseOperands("ro", &ops) ||
2033 !DoValOffset(ops.register_number,
2034 ops.offset * cie->data_alignment_factor))
2035 return false;
2036 break;
2037
2038 // The register's value is the sum of the CFA plus an offset.
2039 case DW_CFA_val_offset_sf:
2040 if (!ParseOperands("rs", &ops) ||
2041 !DoValOffset(ops.register_number,
2042 ops.signed_offset * cie->data_alignment_factor))
2043 return false;
2044 break;
2045
2046 // The register has been saved in another register.
2047 case DW_CFA_register: {
2048 if (!ParseOperands("ro", &ops) ||
2049 !DoRule(ops.register_number, new RegisterRule(ops.offset)))
2050 return false;
2051 break;
2052 }
2053
2054 // An expression yields the address at which the register is saved.
2055 case DW_CFA_expression: {
2056 if (!ParseOperands("re", &ops) ||
2057 !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
2058 return false;
2059 break;
2060 }
2061
2062 // An expression yields the caller's value for the register.
2063 case DW_CFA_val_expression: {
2064 if (!ParseOperands("re", &ops) ||
2065 !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
2066 return false;
2067 break;
2068 }
2069
2070 // Restore the rule established for a register by the CIE.
2071 case DW_CFA_restore_extended:
2072 if (!ParseOperands("r", &ops) ||
2073 !DoRestore( ops.register_number))
2074 return false;
2075 break;
2076
2077 // Save the current set of rules on a stack.
2078 case DW_CFA_remember_state:
2079 saved_rules_.push(rules_);
2080 break;
2081
2082 // Pop the current set of rules off the stack.
2083 case DW_CFA_restore_state: {
2084 if (saved_rules_.empty()) {
2085 reporter_->EmptyStateStack(entry_->offset, entry_->kind,
2086 CursorOffset());
2087 return false;
2088 }
2089 const RuleMap &new_rules = saved_rules_.top();
2090 if (rules_.CFARule() && !new_rules.CFARule()) {
2091 reporter_->ClearingCFARule(entry_->offset, entry_->kind,
2092 CursorOffset());
2093 return false;
2094 }
2095 rules_.HandleTransitionTo(handler_, address_, new_rules);
2096 rules_ = new_rules;
2097 saved_rules_.pop();
2098 break;
2099 }
2100
2101 // No operation. (Padding instruction.)
2102 case DW_CFA_nop:
2103 break;
2104
2105 // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
2106 // are saved in registers 24 through 31 (%i0-%i7), and registers
2107 // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
2108 // (0-15 * the register size). The register numbers must be
2109 // hard-coded. A GNU extension, and not a pretty one.
2110 case DW_CFA_GNU_window_save: {
2111 // Save %o0-%o7 in %i0-%i7.
2112 for (int i = 8; i < 16; i++)
2113 if (!DoRule(i, new RegisterRule(i + 16)))
2114 return false;
2115 // Save %l0-%l7 and %i0-%i7 at the CFA.
2116 for (int i = 16; i < 32; i++)
2117 // Assume that the byte reader's address size is the same as
2118 // the architecture's register size. !@#%*^ hilarious.
2119 if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
2120 (i - 16) * reader_->AddressSize())))
2121 return false;
2122 break;
2123 }
2124
2125 // I'm not sure what this is. GDB doesn't use it for unwinding.
2126 case DW_CFA_GNU_args_size:
2127 if (!ParseOperands("o", &ops)) return false;
2128 break;
2129
2130 // An opcode we don't recognize.
2131 default: {
2132 reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
2133 return false;
2134 }
2135 }
2136
2137 return true;
2138 }
2139
DoDefCFA(unsigned base_register,long offset)2140 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
2141 Rule *rule = new ValOffsetRule(base_register, offset);
2142 rules_.SetCFARule(rule);
2143 return rule->Handle(handler_, address_,
2144 Handler::kCFARegister);
2145 }
2146
DoDefCFAOffset(long offset)2147 bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
2148 Rule *cfa_rule = rules_.CFARule();
2149 if (!cfa_rule) {
2150 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2151 return false;
2152 }
2153 cfa_rule->SetOffset(offset);
2154 return cfa_rule->Handle(handler_, address_,
2155 Handler::kCFARegister);
2156 }
2157
DoRule(unsigned reg,Rule * rule)2158 bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
2159 rules_.SetRegisterRule(reg, rule);
2160 return rule->Handle(handler_, address_, reg);
2161 }
2162
DoOffset(unsigned reg,long offset)2163 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
2164 if (!rules_.CFARule()) {
2165 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2166 return false;
2167 }
2168 return DoRule(reg,
2169 new OffsetRule(Handler::kCFARegister, offset));
2170 }
2171
DoValOffset(unsigned reg,long offset)2172 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
2173 if (!rules_.CFARule()) {
2174 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2175 return false;
2176 }
2177 return DoRule(reg,
2178 new ValOffsetRule(Handler::kCFARegister, offset));
2179 }
2180
DoRestore(unsigned reg)2181 bool CallFrameInfo::State::DoRestore(unsigned reg) {
2182 // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
2183 if (entry_->kind == kCIE) {
2184 reporter_->RestoreInCIE(entry_->offset, CursorOffset());
2185 return false;
2186 }
2187 Rule *rule = cie_rules_.RegisterRule(reg);
2188 if (!rule) {
2189 // This isn't really the right thing to do, but since CFI generally
2190 // only mentions callee-saves registers, and GCC's convention for
2191 // callee-saves registers is that they are unchanged, it's a good
2192 // approximation.
2193 rule = new SameValueRule();
2194 }
2195 return DoRule(reg, rule);
2196 }
2197
ReadEntryPrologue(const uint8_t * cursor,Entry * entry)2198 bool CallFrameInfo::ReadEntryPrologue(const uint8_t *cursor, Entry *entry) {
2199 const uint8_t *buffer_end = buffer_ + buffer_length_;
2200
2201 // Initialize enough of ENTRY for use in error reporting.
2202 entry->offset = cursor - buffer_;
2203 entry->start = cursor;
2204 entry->kind = kUnknown;
2205 entry->end = NULL;
2206
2207 // Read the initial length. This sets reader_'s offset size.
2208 size_t length_size;
2209 uint64_t length = reader_->ReadInitialLength(cursor, &length_size);
2210 if (length_size > size_t(buffer_end - cursor))
2211 return ReportIncomplete(entry);
2212 cursor += length_size;
2213
2214 // In a .eh_frame section, a length of zero marks the end of the series
2215 // of entries.
2216 if (length == 0 && eh_frame_) {
2217 entry->kind = kTerminator;
2218 entry->end = cursor;
2219 return true;
2220 }
2221
2222 // Validate the length.
2223 if (length > size_t(buffer_end - cursor))
2224 return ReportIncomplete(entry);
2225
2226 // The length is the number of bytes after the initial length field;
2227 // we have that position handy at this point, so compute the end
2228 // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
2229 // and the length didn't fit in a size_t, we would have rejected it
2230 // above.)
2231 entry->end = cursor + length;
2232
2233 // Parse the next field: either the offset of a CIE or a CIE id.
2234 size_t offset_size = reader_->OffsetSize();
2235 if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
2236 entry->id = reader_->ReadOffset(cursor);
2237
2238 // Don't advance cursor past id field yet; in .eh_frame data we need
2239 // the id's position to compute the section offset of an FDE's CIE.
2240
2241 // Now we can decide what kind of entry this is.
2242 if (eh_frame_) {
2243 // In .eh_frame data, an ID of zero marks the entry as a CIE, and
2244 // anything else is an offset from the id field of the FDE to the start
2245 // of the CIE.
2246 if (entry->id == 0) {
2247 entry->kind = kCIE;
2248 } else {
2249 entry->kind = kFDE;
2250 // Turn the offset from the id into an offset from the buffer's start.
2251 entry->id = (cursor - buffer_) - entry->id;
2252 }
2253 } else {
2254 // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
2255 // offset size for the entry) marks the entry as a CIE, and anything
2256 // else is the offset of the CIE from the beginning of the section.
2257 if (offset_size == 4)
2258 entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
2259 else {
2260 assert(offset_size == 8);
2261 entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
2262 }
2263 }
2264
2265 // Now advance cursor past the id.
2266 cursor += offset_size;
2267
2268 // The fields specific to this kind of entry start here.
2269 entry->fields = cursor;
2270
2271 entry->cie = NULL;
2272
2273 return true;
2274 }
2275
ReadCIEFields(CIE * cie)2276 bool CallFrameInfo::ReadCIEFields(CIE *cie) {
2277 const uint8_t *cursor = cie->fields;
2278 size_t len;
2279
2280 assert(cie->kind == kCIE);
2281
2282 // Prepare for early exit.
2283 cie->version = 0;
2284 cie->augmentation.clear();
2285 cie->code_alignment_factor = 0;
2286 cie->data_alignment_factor = 0;
2287 cie->return_address_register = 0;
2288 cie->has_z_augmentation = false;
2289 cie->pointer_encoding = DW_EH_PE_absptr;
2290 cie->instructions = 0;
2291
2292 // Parse the version number.
2293 if (cie->end - cursor < 1)
2294 return ReportIncomplete(cie);
2295 cie->version = reader_->ReadOneByte(cursor);
2296 cursor++;
2297
2298 // If we don't recognize the version, we can't parse any more fields of the
2299 // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
2300 // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
2301 // the difference between those versions seems to be the same as for
2302 // .debug_frame.
2303 if (cie->version < 1 || cie->version > 4) {
2304 reporter_->UnrecognizedVersion(cie->offset, cie->version);
2305 return false;
2306 }
2307
2308 const uint8_t *augmentation_start = cursor;
2309 const uint8_t *augmentation_end =
2310 reinterpret_cast<const uint8_t *>(memchr(augmentation_start, '\0',
2311 cie->end - augmentation_start));
2312 if (! augmentation_end) return ReportIncomplete(cie);
2313 cursor = augmentation_end;
2314 cie->augmentation = string(reinterpret_cast<const char *>(augmentation_start),
2315 cursor - augmentation_start);
2316 // Skip the terminating '\0'.
2317 cursor++;
2318
2319 // Is this CFI augmented?
2320 if (!cie->augmentation.empty()) {
2321 // Is it an augmentation we recognize?
2322 if (cie->augmentation[0] == DW_Z_augmentation_start) {
2323 // Linux C++ ABI 'z' augmentation, used for exception handling data.
2324 cie->has_z_augmentation = true;
2325 } else {
2326 // Not an augmentation we recognize. Augmentations can have arbitrary
2327 // effects on the form of rest of the content, so we have to give up.
2328 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2329 return false;
2330 }
2331 }
2332
2333 if (cie->version >= 4) {
2334 cie->address_size = *cursor++;
2335 if (cie->address_size != 8 && cie->address_size != 4) {
2336 reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
2337 return false;
2338 }
2339
2340 cie->segment_size = *cursor++;
2341 if (cie->segment_size != 0) {
2342 reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
2343 return false;
2344 }
2345 }
2346
2347 // Parse the code alignment factor.
2348 cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
2349 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2350 cursor += len;
2351
2352 // Parse the data alignment factor.
2353 cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
2354 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2355 cursor += len;
2356
2357 // Parse the return address register. This is a ubyte in version 1, and
2358 // a ULEB128 in version 3.
2359 if (cie->version == 1) {
2360 if (cursor >= cie->end) return ReportIncomplete(cie);
2361 cie->return_address_register = uint8_t(*cursor++);
2362 } else {
2363 cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
2364 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2365 cursor += len;
2366 }
2367
2368 // If we have a 'z' augmentation string, find the augmentation data and
2369 // use the augmentation string to parse it.
2370 if (cie->has_z_augmentation) {
2371 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
2372 if (size_t(cie->end - cursor) < len + data_size)
2373 return ReportIncomplete(cie);
2374 cursor += len;
2375 const uint8_t *data = cursor;
2376 cursor += data_size;
2377 const uint8_t *data_end = cursor;
2378
2379 cie->has_z_lsda = false;
2380 cie->has_z_personality = false;
2381 cie->has_z_signal_frame = false;
2382
2383 // Walk the augmentation string, and extract values from the
2384 // augmentation data as the string directs.
2385 for (size_t i = 1; i < cie->augmentation.size(); i++) {
2386 switch (cie->augmentation[i]) {
2387 case DW_Z_has_LSDA:
2388 // The CIE's augmentation data holds the language-specific data
2389 // area pointer's encoding, and the FDE's augmentation data holds
2390 // the pointer itself.
2391 cie->has_z_lsda = true;
2392 // Fetch the LSDA encoding from the augmentation data.
2393 if (data >= data_end) return ReportIncomplete(cie);
2394 cie->lsda_encoding = DwarfPointerEncoding(*data++);
2395 if (!reader_->ValidEncoding(cie->lsda_encoding)) {
2396 reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
2397 return false;
2398 }
2399 // Don't check if the encoding is usable here --- we haven't
2400 // read the FDE's fields yet, so we're not prepared for
2401 // DW_EH_PE_funcrel, although that's a fine encoding for the
2402 // LSDA to use, since it appears in the FDE.
2403 break;
2404
2405 case DW_Z_has_personality_routine:
2406 // The CIE's augmentation data holds the personality routine
2407 // pointer's encoding, followed by the pointer itself.
2408 cie->has_z_personality = true;
2409 // Fetch the personality routine pointer's encoding from the
2410 // augmentation data.
2411 if (data >= data_end) return ReportIncomplete(cie);
2412 cie->personality_encoding = DwarfPointerEncoding(*data++);
2413 if (!reader_->ValidEncoding(cie->personality_encoding)) {
2414 reporter_->InvalidPointerEncoding(cie->offset,
2415 cie->personality_encoding);
2416 return false;
2417 }
2418 if (!reader_->UsableEncoding(cie->personality_encoding)) {
2419 reporter_->UnusablePointerEncoding(cie->offset,
2420 cie->personality_encoding);
2421 return false;
2422 }
2423 // Fetch the personality routine's pointer itself from the data.
2424 cie->personality_address =
2425 reader_->ReadEncodedPointer(data, cie->personality_encoding,
2426 &len);
2427 if (len > size_t(data_end - data))
2428 return ReportIncomplete(cie);
2429 data += len;
2430 break;
2431
2432 case DW_Z_has_FDE_address_encoding:
2433 // The CIE's augmentation data holds the pointer encoding to use
2434 // for addresses in the FDE.
2435 if (data >= data_end) return ReportIncomplete(cie);
2436 cie->pointer_encoding = DwarfPointerEncoding(*data++);
2437 if (!reader_->ValidEncoding(cie->pointer_encoding)) {
2438 reporter_->InvalidPointerEncoding(cie->offset,
2439 cie->pointer_encoding);
2440 return false;
2441 }
2442 if (!reader_->UsableEncoding(cie->pointer_encoding)) {
2443 reporter_->UnusablePointerEncoding(cie->offset,
2444 cie->pointer_encoding);
2445 return false;
2446 }
2447 break;
2448
2449 case DW_Z_is_signal_trampoline:
2450 // Frames using this CIE are signal delivery frames.
2451 cie->has_z_signal_frame = true;
2452 break;
2453
2454 default:
2455 // An augmentation we don't recognize.
2456 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2457 return false;
2458 }
2459 }
2460 }
2461
2462 // The CIE's instructions start here.
2463 cie->instructions = cursor;
2464
2465 return true;
2466 }
2467
ReadFDEFields(FDE * fde)2468 bool CallFrameInfo::ReadFDEFields(FDE *fde) {
2469 const uint8_t *cursor = fde->fields;
2470 size_t size;
2471
2472 fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
2473 &size);
2474 if (size > size_t(fde->end - cursor))
2475 return ReportIncomplete(fde);
2476 cursor += size;
2477 reader_->SetFunctionBase(fde->address);
2478
2479 // For the length, we strip off the upper nybble of the encoding used for
2480 // the starting address.
2481 DwarfPointerEncoding length_encoding =
2482 DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
2483 fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
2484 if (size > size_t(fde->end - cursor))
2485 return ReportIncomplete(fde);
2486 cursor += size;
2487
2488 // If the CIE has a 'z' augmentation string, then augmentation data
2489 // appears here.
2490 if (fde->cie->has_z_augmentation) {
2491 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
2492 if (size_t(fde->end - cursor) < size + data_size)
2493 return ReportIncomplete(fde);
2494 cursor += size;
2495
2496 // In the abstract, we should walk the augmentation string, and extract
2497 // items from the FDE's augmentation data as we encounter augmentation
2498 // string characters that specify their presence: the ordering of items
2499 // in the augmentation string determines the arrangement of values in
2500 // the augmentation data.
2501 //
2502 // In practice, there's only ever one value in FDE augmentation data
2503 // that we support --- the LSDA pointer --- and we have to bail if we
2504 // see any unrecognized augmentation string characters. So if there is
2505 // anything here at all, we know what it is, and where it starts.
2506 if (fde->cie->has_z_lsda) {
2507 // Check whether the LSDA's pointer encoding is usable now: only once
2508 // we've parsed the FDE's starting address do we call reader_->
2509 // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
2510 // usable.
2511 if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
2512 reporter_->UnusablePointerEncoding(fde->cie->offset,
2513 fde->cie->lsda_encoding);
2514 return false;
2515 }
2516
2517 fde->lsda_address =
2518 reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
2519 if (size > data_size)
2520 return ReportIncomplete(fde);
2521 // Ideally, we would also complain here if there were unconsumed
2522 // augmentation data.
2523 }
2524
2525 cursor += data_size;
2526 }
2527
2528 // The FDE's instructions start after those.
2529 fde->instructions = cursor;
2530
2531 return true;
2532 }
2533
Start()2534 bool CallFrameInfo::Start() {
2535 const uint8_t *buffer_end = buffer_ + buffer_length_;
2536 const uint8_t *cursor;
2537 bool all_ok = true;
2538 const uint8_t *entry_end;
2539 bool ok;
2540
2541 // Traverse all the entries in buffer_, skipping CIEs and offering
2542 // FDEs to the handler.
2543 for (cursor = buffer_; cursor < buffer_end;
2544 cursor = entry_end, all_ok = all_ok && ok) {
2545 FDE fde;
2546
2547 // Make it easy to skip this entry with 'continue': assume that
2548 // things are not okay until we've checked all the data, and
2549 // prepare the address of the next entry.
2550 ok = false;
2551
2552 // Read the entry's prologue.
2553 if (!ReadEntryPrologue(cursor, &fde)) {
2554 if (!fde.end) {
2555 // If we couldn't even figure out this entry's extent, then we
2556 // must stop processing entries altogether.
2557 all_ok = false;
2558 break;
2559 }
2560 entry_end = fde.end;
2561 continue;
2562 }
2563
2564 // The next iteration picks up after this entry.
2565 entry_end = fde.end;
2566
2567 // Did we see an .eh_frame terminating mark?
2568 if (fde.kind == kTerminator) {
2569 // If there appears to be more data left in the section after the
2570 // terminating mark, warn the user. But this is just a warning;
2571 // we leave all_ok true.
2572 if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
2573 break;
2574 }
2575
2576 // In this loop, we skip CIEs. We only parse them fully when we
2577 // parse an FDE that refers to them. This limits our memory
2578 // consumption (beyond the buffer itself) to that needed to
2579 // process the largest single entry.
2580 if (fde.kind != kFDE) {
2581 ok = true;
2582 continue;
2583 }
2584
2585 // Validate the CIE pointer.
2586 if (fde.id > buffer_length_) {
2587 reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
2588 continue;
2589 }
2590
2591 CIE cie;
2592
2593 // Parse this FDE's CIE header.
2594 if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
2595 continue;
2596 // This had better be an actual CIE.
2597 if (cie.kind != kCIE) {
2598 reporter_->BadCIEId(fde.offset, fde.id);
2599 continue;
2600 }
2601 if (!ReadCIEFields(&cie))
2602 continue;
2603
2604 // TODO(nbilling): This could lead to strange behavior if a single buffer
2605 // contained a mixture of DWARF versions as well as address sizes. Not
2606 // sure if it's worth handling such a case.
2607
2608 // DWARF4 CIE specifies address_size, so use it for this call frame.
2609 if (cie.version >= 4) {
2610 reader_->SetAddressSize(cie.address_size);
2611 }
2612
2613 // We now have the values that govern both the CIE and the FDE.
2614 cie.cie = &cie;
2615 fde.cie = &cie;
2616
2617 // Parse the FDE's header.
2618 if (!ReadFDEFields(&fde))
2619 continue;
2620
2621 // Call Entry to ask the consumer if they're interested.
2622 if (!handler_->Entry(fde.offset, fde.address, fde.size,
2623 cie.version, cie.augmentation,
2624 cie.return_address_register)) {
2625 // The handler isn't interested in this entry. That's not an error.
2626 ok = true;
2627 continue;
2628 }
2629
2630 if (cie.has_z_augmentation) {
2631 // Report the personality routine address, if we have one.
2632 if (cie.has_z_personality) {
2633 if (!handler_
2634 ->PersonalityRoutine(cie.personality_address,
2635 IsIndirectEncoding(cie.personality_encoding)))
2636 continue;
2637 }
2638
2639 // Report the language-specific data area address, if we have one.
2640 if (cie.has_z_lsda) {
2641 if (!handler_
2642 ->LanguageSpecificDataArea(fde.lsda_address,
2643 IsIndirectEncoding(cie.lsda_encoding)))
2644 continue;
2645 }
2646
2647 // If this is a signal-handling frame, report that.
2648 if (cie.has_z_signal_frame) {
2649 if (!handler_->SignalHandler())
2650 continue;
2651 }
2652 }
2653
2654 // Interpret the CIE's instructions, and then the FDE's instructions.
2655 State state(reader_, handler_, reporter_, fde.address);
2656 ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
2657
2658 // Tell the ByteReader that the function start address from the
2659 // FDE header is no longer valid.
2660 reader_->ClearFunctionBase();
2661
2662 // Report the end of the entry.
2663 handler_->End();
2664 }
2665
2666 return all_ok;
2667 }
2668
KindName(EntryKind kind)2669 const char *CallFrameInfo::KindName(EntryKind kind) {
2670 if (kind == CallFrameInfo::kUnknown)
2671 return "entry";
2672 else if (kind == CallFrameInfo::kCIE)
2673 return "common information entry";
2674 else if (kind == CallFrameInfo::kFDE)
2675 return "frame description entry";
2676 else {
2677 assert (kind == CallFrameInfo::kTerminator);
2678 return ".eh_frame sequence terminator";
2679 }
2680 }
2681
ReportIncomplete(Entry * entry)2682 bool CallFrameInfo::ReportIncomplete(Entry *entry) {
2683 reporter_->Incomplete(entry->offset, entry->kind);
2684 return false;
2685 }
2686
Incomplete(uint64_t offset,CallFrameInfo::EntryKind kind)2687 void CallFrameInfo::Reporter::Incomplete(uint64_t offset,
2688 CallFrameInfo::EntryKind kind) {
2689 fprintf(stderr,
2690 "%s: CFI %s at offset 0x%" PRIx64 " in '%s': entry ends early\n",
2691 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2692 section_.c_str());
2693 }
2694
EarlyEHTerminator(uint64_t offset)2695 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64_t offset) {
2696 fprintf(stderr,
2697 "%s: CFI at offset 0x%" PRIx64 " in '%s': saw end-of-data marker"
2698 " before end of section contents\n",
2699 filename_.c_str(), offset, section_.c_str());
2700 }
2701
CIEPointerOutOfRange(uint64_t offset,uint64_t cie_offset)2702 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64_t offset,
2703 uint64_t cie_offset) {
2704 fprintf(stderr,
2705 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2706 " CIE pointer is out of range: 0x%" PRIx64 "\n",
2707 filename_.c_str(), offset, section_.c_str(), cie_offset);
2708 }
2709
BadCIEId(uint64_t offset,uint64_t cie_offset)2710 void CallFrameInfo::Reporter::BadCIEId(uint64_t offset, uint64_t cie_offset) {
2711 fprintf(stderr,
2712 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2713 " CIE pointer does not point to a CIE: 0x%" PRIx64 "\n",
2714 filename_.c_str(), offset, section_.c_str(), cie_offset);
2715 }
2716
UnexpectedAddressSize(uint64_t offset,uint8_t address_size)2717 void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64_t offset,
2718 uint8_t address_size) {
2719 fprintf(stderr,
2720 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2721 " CIE specifies unexpected address size: %d\n",
2722 filename_.c_str(), offset, section_.c_str(), address_size);
2723 }
2724
UnexpectedSegmentSize(uint64_t offset,uint8_t segment_size)2725 void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64_t offset,
2726 uint8_t segment_size) {
2727 fprintf(stderr,
2728 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2729 " CIE specifies unexpected segment size: %d\n",
2730 filename_.c_str(), offset, section_.c_str(), segment_size);
2731 }
2732
UnrecognizedVersion(uint64_t offset,int version)2733 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64_t offset, int version) {
2734 fprintf(stderr,
2735 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2736 " CIE specifies unrecognized version: %d\n",
2737 filename_.c_str(), offset, section_.c_str(), version);
2738 }
2739
UnrecognizedAugmentation(uint64_t offset,const string & aug)2740 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64_t offset,
2741 const string &aug) {
2742 fprintf(stderr,
2743 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
2744 " CIE specifies unrecognized augmentation: '%s'\n",
2745 filename_.c_str(), offset, section_.c_str(), aug.c_str());
2746 }
2747
InvalidPointerEncoding(uint64_t offset,uint8_t encoding)2748 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64_t offset,
2749 uint8_t encoding) {
2750 fprintf(stderr,
2751 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
2752 " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
2753 filename_.c_str(), offset, section_.c_str(), encoding);
2754 }
2755
UnusablePointerEncoding(uint64_t offset,uint8_t encoding)2756 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64_t offset,
2757 uint8_t encoding) {
2758 fprintf(stderr,
2759 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
2760 " 'z' augmentation specifies a pointer encoding for which"
2761 " we have no base address: 0x%02x\n",
2762 filename_.c_str(), offset, section_.c_str(), encoding);
2763 }
2764
RestoreInCIE(uint64_t offset,uint64_t insn_offset)2765 void CallFrameInfo::Reporter::RestoreInCIE(uint64_t offset, uint64_t insn_offset) {
2766 fprintf(stderr,
2767 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
2768 " the DW_CFA_restore instruction at offset 0x%" PRIx64
2769 " cannot be used in a common information entry\n",
2770 filename_.c_str(), offset, section_.c_str(), insn_offset);
2771 }
2772
BadInstruction(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2773 void CallFrameInfo::Reporter::BadInstruction(uint64_t offset,
2774 CallFrameInfo::EntryKind kind,
2775 uint64_t insn_offset) {
2776 fprintf(stderr,
2777 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2778 " the instruction at offset 0x%" PRIx64 " is unrecognized\n",
2779 filename_.c_str(), CallFrameInfo::KindName(kind),
2780 offset, section_.c_str(), insn_offset);
2781 }
2782
NoCFARule(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2783 void CallFrameInfo::Reporter::NoCFARule(uint64_t offset,
2784 CallFrameInfo::EntryKind kind,
2785 uint64_t insn_offset) {
2786 fprintf(stderr,
2787 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2788 " the instruction at offset 0x%" PRIx64 " assumes that a CFA rule has"
2789 " been set, but none has been set\n",
2790 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2791 section_.c_str(), insn_offset);
2792 }
2793
EmptyStateStack(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2794 void CallFrameInfo::Reporter::EmptyStateStack(uint64_t offset,
2795 CallFrameInfo::EntryKind kind,
2796 uint64_t insn_offset) {
2797 fprintf(stderr,
2798 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2799 " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
2800 " should pop a saved state from the stack, but the stack is empty\n",
2801 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2802 section_.c_str(), insn_offset);
2803 }
2804
ClearingCFARule(uint64_t offset,CallFrameInfo::EntryKind kind,uint64_t insn_offset)2805 void CallFrameInfo::Reporter::ClearingCFARule(uint64_t offset,
2806 CallFrameInfo::EntryKind kind,
2807 uint64_t insn_offset) {
2808 fprintf(stderr,
2809 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
2810 " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
2811 " would clear the CFA rule in effect\n",
2812 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2813 section_.c_str(), insn_offset);
2814 }
2815
2816 } // namespace dwarf2reader
2817