1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30
31 // Implementation of dwarf2reader::LineInfo, dwarf2reader::CompilationUnit,
32 // and dwarf2reader::CallFrameInfo. See dwarf2reader.h for details.
33
34 #include "common/dwarf/dwarf2reader.h"
35
36 #include <assert.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <string.h>
40
41 #include <map>
42 #include <memory>
43 #include <stack>
44 #include <string>
45 #include <utility>
46
47 #include "common/dwarf/bytereader-inl.h"
48 #include "common/dwarf/bytereader.h"
49 #include "common/dwarf/line_state_machine.h"
50 #include "common/using_std_string.h"
51
52 namespace dwarf2reader {
53
CompilationUnit(const SectionMap & sections,uint64 offset,ByteReader * reader,Dwarf2Handler * handler)54 CompilationUnit::CompilationUnit(const SectionMap& sections, uint64 offset,
55 ByteReader* reader, Dwarf2Handler* handler)
56 : offset_from_section_start_(offset), reader_(reader),
57 sections_(sections), handler_(handler), abbrevs_(NULL),
58 string_buffer_(NULL), string_buffer_length_(0) {}
59
60 // Read a DWARF2/3 abbreviation section.
61 // Each abbrev consists of a abbreviation number, a tag, a byte
62 // specifying whether the tag has children, and a list of
63 // attribute/form pairs.
64 // The list of forms is terminated by a 0 for the attribute, and a
65 // zero for the form. The entire abbreviation section is terminated
66 // by a zero for the code.
67
ReadAbbrevs()68 void CompilationUnit::ReadAbbrevs() {
69 if (abbrevs_)
70 return;
71
72 // First get the debug_abbrev section. ".debug_abbrev" is the name
73 // recommended in the DWARF spec, and used on Linux;
74 // "__debug_abbrev" is the name used in Mac OS X Mach-O files.
75 SectionMap::const_iterator iter = sections_.find(".debug_abbrev");
76 if (iter == sections_.end())
77 iter = sections_.find("__debug_abbrev");
78 assert(iter != sections_.end());
79
80 abbrevs_ = new std::vector<Abbrev>;
81 abbrevs_->resize(1);
82
83 // The only way to check whether we are reading over the end of the
84 // buffer would be to first compute the size of the leb128 data by
85 // reading it, then go back and read it again.
86 const char* abbrev_start = iter->second.first +
87 header_.abbrev_offset;
88 const char* abbrevptr = abbrev_start;
89 #ifndef NDEBUG
90 const uint64 abbrev_length = iter->second.second - header_.abbrev_offset;
91 #endif
92
93 while (1) {
94 CompilationUnit::Abbrev abbrev;
95 size_t len;
96 const uint64 number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
97
98 if (number == 0)
99 break;
100 abbrev.number = number;
101 abbrevptr += len;
102
103 assert(abbrevptr < abbrev_start + abbrev_length);
104 const uint64 tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
105 abbrevptr += len;
106 abbrev.tag = static_cast<enum DwarfTag>(tag);
107
108 assert(abbrevptr < abbrev_start + abbrev_length);
109 abbrev.has_children = reader_->ReadOneByte(abbrevptr);
110 abbrevptr += 1;
111
112 assert(abbrevptr < abbrev_start + abbrev_length);
113
114 while (1) {
115 const uint64 nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
116 abbrevptr += len;
117
118 assert(abbrevptr < abbrev_start + abbrev_length);
119 const uint64 formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
120 abbrevptr += len;
121 if (nametemp == 0 && formtemp == 0)
122 break;
123
124 const enum DwarfAttribute name =
125 static_cast<enum DwarfAttribute>(nametemp);
126 const enum DwarfForm form = static_cast<enum DwarfForm>(formtemp);
127 abbrev.attributes.push_back(std::make_pair(name, form));
128 }
129 assert(abbrev.number == abbrevs_->size());
130 abbrevs_->push_back(abbrev);
131 }
132 }
133
134 // Skips a single DIE's attributes.
SkipDIE(const char * start,const Abbrev & abbrev)135 const char* CompilationUnit::SkipDIE(const char* start,
136 const Abbrev& abbrev) {
137 for (AttributeList::const_iterator i = abbrev.attributes.begin();
138 i != abbrev.attributes.end();
139 i++) {
140 start = SkipAttribute(start, i->second);
141 }
142 return start;
143 }
144
145 // Skips a single attribute form's data.
SkipAttribute(const char * start,enum DwarfForm form)146 const char* CompilationUnit::SkipAttribute(const char* start,
147 enum DwarfForm form) {
148 size_t len;
149
150 switch (form) {
151 case DW_FORM_indirect:
152 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
153 &len));
154 start += len;
155 return SkipAttribute(start, form);
156
157 case DW_FORM_flag_present:
158 return start;
159 case DW_FORM_data1:
160 case DW_FORM_flag:
161 case DW_FORM_ref1:
162 return start + 1;
163 case DW_FORM_ref2:
164 case DW_FORM_data2:
165 return start + 2;
166 case DW_FORM_ref4:
167 case DW_FORM_data4:
168 return start + 4;
169 case DW_FORM_ref8:
170 case DW_FORM_data8:
171 case DW_FORM_ref_sig8:
172 return start + 8;
173 case DW_FORM_string:
174 return start + strlen(start) + 1;
175 case DW_FORM_udata:
176 case DW_FORM_ref_udata:
177 reader_->ReadUnsignedLEB128(start, &len);
178 return start + len;
179
180 case DW_FORM_sdata:
181 reader_->ReadSignedLEB128(start, &len);
182 return start + len;
183 case DW_FORM_addr:
184 return start + reader_->AddressSize();
185 case DW_FORM_ref_addr:
186 // DWARF2 and 3/4 differ on whether ref_addr is address size or
187 // offset size.
188 assert(header_.version >= 2);
189 if (header_.version == 2) {
190 return start + reader_->AddressSize();
191 } else if (header_.version >= 3) {
192 return start + reader_->OffsetSize();
193 }
194 break;
195
196 case DW_FORM_block1:
197 return start + 1 + reader_->ReadOneByte(start);
198 case DW_FORM_block2:
199 return start + 2 + reader_->ReadTwoBytes(start);
200 case DW_FORM_block4:
201 return start + 4 + reader_->ReadFourBytes(start);
202 case DW_FORM_block:
203 case DW_FORM_exprloc: {
204 uint64 size = reader_->ReadUnsignedLEB128(start, &len);
205 return start + size + len;
206 }
207 case DW_FORM_strp:
208 case DW_FORM_sec_offset:
209 return start + reader_->OffsetSize();
210 }
211 fprintf(stderr,"Unhandled form type");
212 return NULL;
213 }
214
215 // Read a DWARF2/3 header.
216 // The header is variable length in DWARF3 (and DWARF2 as extended by
217 // most compilers), and consists of an length field, a version number,
218 // the offset in the .debug_abbrev section for our abbrevs, and an
219 // address size.
ReadHeader()220 void CompilationUnit::ReadHeader() {
221 const char* headerptr = buffer_;
222 size_t initial_length_size;
223
224 assert(headerptr + 4 < buffer_ + buffer_length_);
225 const uint64 initial_length
226 = reader_->ReadInitialLength(headerptr, &initial_length_size);
227 headerptr += initial_length_size;
228 header_.length = initial_length;
229
230 assert(headerptr + 2 < buffer_ + buffer_length_);
231 header_.version = reader_->ReadTwoBytes(headerptr);
232 headerptr += 2;
233
234 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
235 header_.abbrev_offset = reader_->ReadOffset(headerptr);
236 headerptr += reader_->OffsetSize();
237
238 assert(headerptr + 1 < buffer_ + buffer_length_);
239 header_.address_size = reader_->ReadOneByte(headerptr);
240 reader_->SetAddressSize(header_.address_size);
241 headerptr += 1;
242
243 after_header_ = headerptr;
244
245 // This check ensures that we don't have to do checking during the
246 // reading of DIEs. header_.length does not include the size of the
247 // initial length.
248 assert(buffer_ + initial_length_size + header_.length <=
249 buffer_ + buffer_length_);
250 }
251
Start()252 uint64 CompilationUnit::Start() {
253 // First get the debug_info section. ".debug_info" is the name
254 // recommended in the DWARF spec, and used on Linux; "__debug_info"
255 // is the name used in Mac OS X Mach-O files.
256 SectionMap::const_iterator iter = sections_.find(".debug_info");
257 if (iter == sections_.end())
258 iter = sections_.find("__debug_info");
259 assert(iter != sections_.end());
260
261 // Set up our buffer
262 buffer_ = iter->second.first + offset_from_section_start_;
263 buffer_length_ = iter->second.second - offset_from_section_start_;
264
265 // Read the header
266 ReadHeader();
267
268 // Figure out the real length from the end of the initial length to
269 // the end of the compilation unit, since that is the value we
270 // return.
271 uint64 ourlength = header_.length;
272 if (reader_->OffsetSize() == 8)
273 ourlength += 12;
274 else
275 ourlength += 4;
276
277 // See if the user wants this compilation unit, and if not, just return.
278 if (!handler_->StartCompilationUnit(offset_from_section_start_,
279 reader_->AddressSize(),
280 reader_->OffsetSize(),
281 header_.length,
282 header_.version))
283 return ourlength;
284
285 // Otherwise, continue by reading our abbreviation entries.
286 ReadAbbrevs();
287
288 // Set the string section if we have one. ".debug_str" is the name
289 // recommended in the DWARF spec, and used on Linux; "__debug_str"
290 // is the name used in Mac OS X Mach-O files.
291 iter = sections_.find(".debug_str");
292 if (iter == sections_.end())
293 iter = sections_.find("__debug_str");
294 if (iter != sections_.end()) {
295 string_buffer_ = iter->second.first;
296 string_buffer_length_ = iter->second.second;
297 }
298
299 // Now that we have our abbreviations, start processing DIE's.
300 ProcessDIEs();
301
302 return ourlength;
303 }
304
305 // If one really wanted, you could merge SkipAttribute and
306 // ProcessAttribute
307 // This is all boring data manipulation and calling of the handler.
ProcessAttribute(uint64 dieoffset,const char * start,enum DwarfAttribute attr,enum DwarfForm form)308 const char* CompilationUnit::ProcessAttribute(
309 uint64 dieoffset, const char* start, enum DwarfAttribute attr,
310 enum DwarfForm form) {
311 size_t len;
312
313 switch (form) {
314 // DW_FORM_indirect is never used because it is such a space
315 // waster.
316 case DW_FORM_indirect:
317 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
318 &len));
319 start += len;
320 return ProcessAttribute(dieoffset, start, attr, form);
321
322 case DW_FORM_flag_present:
323 handler_->ProcessAttributeUnsigned(dieoffset, attr, form, 1);
324 return start;
325 case DW_FORM_data1:
326 case DW_FORM_flag:
327 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
328 reader_->ReadOneByte(start));
329 return start + 1;
330 case DW_FORM_data2:
331 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
332 reader_->ReadTwoBytes(start));
333 return start + 2;
334 case DW_FORM_data4:
335 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
336 reader_->ReadFourBytes(start));
337 return start + 4;
338 case DW_FORM_data8:
339 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
340 reader_->ReadEightBytes(start));
341 return start + 8;
342 case DW_FORM_string: {
343 const char* str = start;
344 handler_->ProcessAttributeString(dieoffset, attr, form,
345 str);
346 return start + strlen(str) + 1;
347 }
348 case DW_FORM_udata:
349 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
350 reader_->ReadUnsignedLEB128(start,
351 &len));
352 return start + len;
353
354 case DW_FORM_sdata:
355 handler_->ProcessAttributeSigned(dieoffset, attr, form,
356 reader_->ReadSignedLEB128(start, &len));
357 return start + len;
358 case DW_FORM_addr:
359 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
360 reader_->ReadAddress(start));
361 return start + reader_->AddressSize();
362 case DW_FORM_sec_offset:
363 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
364 reader_->ReadOffset(start));
365 return start + reader_->OffsetSize();
366
367 case DW_FORM_ref1:
368 handler_->ProcessAttributeReference(dieoffset, attr, form,
369 reader_->ReadOneByte(start)
370 + offset_from_section_start_);
371 return start + 1;
372 case DW_FORM_ref2:
373 handler_->ProcessAttributeReference(dieoffset, attr, form,
374 reader_->ReadTwoBytes(start)
375 + offset_from_section_start_);
376 return start + 2;
377 case DW_FORM_ref4:
378 handler_->ProcessAttributeReference(dieoffset, attr, form,
379 reader_->ReadFourBytes(start)
380 + offset_from_section_start_);
381 return start + 4;
382 case DW_FORM_ref8:
383 handler_->ProcessAttributeReference(dieoffset, attr, form,
384 reader_->ReadEightBytes(start)
385 + offset_from_section_start_);
386 return start + 8;
387 case DW_FORM_ref_udata:
388 handler_->ProcessAttributeReference(dieoffset, attr, form,
389 reader_->ReadUnsignedLEB128(start,
390 &len)
391 + offset_from_section_start_);
392 return start + len;
393 case DW_FORM_ref_addr:
394 // DWARF2 and 3/4 differ on whether ref_addr is address size or
395 // offset size.
396 assert(header_.version >= 2);
397 if (header_.version == 2) {
398 handler_->ProcessAttributeReference(dieoffset, attr, form,
399 reader_->ReadAddress(start));
400 return start + reader_->AddressSize();
401 } else if (header_.version >= 3) {
402 handler_->ProcessAttributeReference(dieoffset, attr, form,
403 reader_->ReadOffset(start));
404 return start + reader_->OffsetSize();
405 }
406 break;
407 case DW_FORM_ref_sig8:
408 handler_->ProcessAttributeSignature(dieoffset, attr, form,
409 reader_->ReadEightBytes(start));
410 return start + 8;
411
412 case DW_FORM_block1: {
413 uint64 datalen = reader_->ReadOneByte(start);
414 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
415 datalen);
416 return start + 1 + datalen;
417 }
418 case DW_FORM_block2: {
419 uint64 datalen = reader_->ReadTwoBytes(start);
420 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
421 datalen);
422 return start + 2 + datalen;
423 }
424 case DW_FORM_block4: {
425 uint64 datalen = reader_->ReadFourBytes(start);
426 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
427 datalen);
428 return start + 4 + datalen;
429 }
430 case DW_FORM_block:
431 case DW_FORM_exprloc: {
432 uint64 datalen = reader_->ReadUnsignedLEB128(start, &len);
433 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
434 datalen);
435 return start + datalen + len;
436 }
437 case DW_FORM_strp: {
438 assert(string_buffer_ != NULL);
439
440 const uint64 offset = reader_->ReadOffset(start);
441 assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
442
443 const char* str = string_buffer_ + offset;
444 handler_->ProcessAttributeString(dieoffset, attr, form,
445 str);
446 return start + reader_->OffsetSize();
447 }
448 }
449 fprintf(stderr, "Unhandled form type\n");
450 return NULL;
451 }
452
ProcessDIE(uint64 dieoffset,const char * start,const Abbrev & abbrev)453 const char* CompilationUnit::ProcessDIE(uint64 dieoffset,
454 const char* start,
455 const Abbrev& abbrev) {
456 for (AttributeList::const_iterator i = abbrev.attributes.begin();
457 i != abbrev.attributes.end();
458 i++) {
459 start = ProcessAttribute(dieoffset, start, i->first, i->second);
460 }
461 return start;
462 }
463
ProcessDIEs()464 void CompilationUnit::ProcessDIEs() {
465 const char* dieptr = after_header_;
466 size_t len;
467
468 // lengthstart is the place the length field is based on.
469 // It is the point in the header after the initial length field
470 const char* lengthstart = buffer_;
471
472 // In 64 bit dwarf, the initial length is 12 bytes, because of the
473 // 0xffffffff at the start.
474 if (reader_->OffsetSize() == 8)
475 lengthstart += 12;
476 else
477 lengthstart += 4;
478
479 std::stack<uint64> die_stack;
480
481 while (dieptr < (lengthstart + header_.length)) {
482 // We give the user the absolute offset from the beginning of
483 // debug_info, since they need it to deal with ref_addr forms.
484 uint64 absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
485
486 uint64 abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
487
488 dieptr += len;
489
490 // Abbrev == 0 represents the end of a list of children, or padding
491 // at the end of the compilation unit.
492 if (abbrev_num == 0) {
493 if (die_stack.size() == 0)
494 // If it is padding, then we are done with the compilation unit's DIEs.
495 return;
496 const uint64 offset = die_stack.top();
497 die_stack.pop();
498 handler_->EndDIE(offset);
499 continue;
500 }
501
502 const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
503 const enum DwarfTag tag = abbrev.tag;
504 if (!handler_->StartDIE(absolute_offset, tag)) {
505 dieptr = SkipDIE(dieptr, abbrev);
506 } else {
507 dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
508 }
509
510 if (abbrev.has_children) {
511 die_stack.push(absolute_offset);
512 } else {
513 handler_->EndDIE(absolute_offset);
514 }
515 }
516 }
517
LineInfo(const char * buffer,uint64 buffer_length,ByteReader * reader,LineInfoHandler * handler)518 LineInfo::LineInfo(const char* buffer, uint64 buffer_length,
519 ByteReader* reader, LineInfoHandler* handler):
520 handler_(handler), reader_(reader), buffer_(buffer),
521 buffer_length_(buffer_length) {
522 header_.std_opcode_lengths = NULL;
523 }
524
Start()525 uint64 LineInfo::Start() {
526 ReadHeader();
527 ReadLines();
528 return after_header_ - buffer_;
529 }
530
531 // The header for a debug_line section is mildly complicated, because
532 // the line info is very tightly encoded.
ReadHeader()533 void LineInfo::ReadHeader() {
534 const char* lineptr = buffer_;
535 size_t initial_length_size;
536
537 const uint64 initial_length
538 = reader_->ReadInitialLength(lineptr, &initial_length_size);
539
540 lineptr += initial_length_size;
541 header_.total_length = initial_length;
542 assert(buffer_ + initial_length_size + header_.total_length <=
543 buffer_ + buffer_length_);
544
545 // Address size *must* be set by CU ahead of time.
546 assert(reader_->AddressSize() != 0);
547
548 header_.version = reader_->ReadTwoBytes(lineptr);
549 lineptr += 2;
550
551 header_.prologue_length = reader_->ReadOffset(lineptr);
552 lineptr += reader_->OffsetSize();
553
554 header_.min_insn_length = reader_->ReadOneByte(lineptr);
555 lineptr += 1;
556
557 header_.default_is_stmt = reader_->ReadOneByte(lineptr);
558 lineptr += 1;
559
560 header_.line_base = *reinterpret_cast<const int8*>(lineptr);
561 lineptr += 1;
562
563 header_.line_range = reader_->ReadOneByte(lineptr);
564 lineptr += 1;
565
566 header_.opcode_base = reader_->ReadOneByte(lineptr);
567 lineptr += 1;
568
569 header_.std_opcode_lengths = new std::vector<unsigned char>;
570 header_.std_opcode_lengths->resize(header_.opcode_base + 1);
571 (*header_.std_opcode_lengths)[0] = 0;
572 for (int i = 1; i < header_.opcode_base; i++) {
573 (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
574 lineptr += 1;
575 }
576
577 // It is legal for the directory entry table to be empty.
578 if (*lineptr) {
579 uint32 dirindex = 1;
580 while (*lineptr) {
581 const char* dirname = lineptr;
582 handler_->DefineDir(dirname, dirindex);
583 lineptr += strlen(dirname) + 1;
584 dirindex++;
585 }
586 }
587 lineptr++;
588
589 // It is also legal for the file entry table to be empty.
590 if (*lineptr) {
591 uint32 fileindex = 1;
592 size_t len;
593 while (*lineptr) {
594 const char* filename = lineptr;
595 lineptr += strlen(filename) + 1;
596
597 uint64 dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
598 lineptr += len;
599
600 uint64 mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
601 lineptr += len;
602
603 uint64 filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
604 lineptr += len;
605 handler_->DefineFile(filename, fileindex, static_cast<uint32>(dirindex),
606 mod_time, filelength);
607 fileindex++;
608 }
609 }
610 lineptr++;
611
612 after_header_ = lineptr;
613 }
614
615 /* static */
ProcessOneOpcode(ByteReader * reader,LineInfoHandler * handler,const struct LineInfoHeader & header,const char * start,struct LineStateMachine * lsm,size_t * len,uintptr pc,bool * lsm_passes_pc)616 bool LineInfo::ProcessOneOpcode(ByteReader* reader,
617 LineInfoHandler* handler,
618 const struct LineInfoHeader &header,
619 const char* start,
620 struct LineStateMachine* lsm,
621 size_t* len,
622 uintptr pc,
623 bool *lsm_passes_pc) {
624 size_t oplen = 0;
625 size_t templen;
626 uint8 opcode = reader->ReadOneByte(start);
627 oplen++;
628 start++;
629
630 // If the opcode is great than the opcode_base, it is a special
631 // opcode. Most line programs consist mainly of special opcodes.
632 if (opcode >= header.opcode_base) {
633 opcode -= header.opcode_base;
634 const int64 advance_address = (opcode / header.line_range)
635 * header.min_insn_length;
636 const int32 advance_line = (opcode % header.line_range)
637 + header.line_base;
638
639 // Check if the lsm passes "pc". If so, mark it as passed.
640 if (lsm_passes_pc &&
641 lsm->address <= pc && pc < lsm->address + advance_address) {
642 *lsm_passes_pc = true;
643 }
644
645 lsm->address += advance_address;
646 lsm->line_num += advance_line;
647 lsm->basic_block = true;
648 *len = oplen;
649 return true;
650 }
651
652 // Otherwise, we have the regular opcodes
653 switch (opcode) {
654 case DW_LNS_copy: {
655 lsm->basic_block = false;
656 *len = oplen;
657 return true;
658 }
659
660 case DW_LNS_advance_pc: {
661 uint64 advance_address = reader->ReadUnsignedLEB128(start, &templen);
662 oplen += templen;
663
664 // Check if the lsm passes "pc". If so, mark it as passed.
665 if (lsm_passes_pc && lsm->address <= pc &&
666 pc < lsm->address + header.min_insn_length * advance_address) {
667 *lsm_passes_pc = true;
668 }
669
670 lsm->address += header.min_insn_length * advance_address;
671 }
672 break;
673 case DW_LNS_advance_line: {
674 const int64 advance_line = reader->ReadSignedLEB128(start, &templen);
675 oplen += templen;
676 lsm->line_num += static_cast<int32>(advance_line);
677
678 // With gcc 4.2.1, we can get the line_no here for the first time
679 // since DW_LNS_advance_line is called after DW_LNE_set_address is
680 // called. So we check if the lsm passes "pc" here, not in
681 // DW_LNE_set_address.
682 if (lsm_passes_pc && lsm->address == pc) {
683 *lsm_passes_pc = true;
684 }
685 }
686 break;
687 case DW_LNS_set_file: {
688 const uint64 fileno = reader->ReadUnsignedLEB128(start, &templen);
689 oplen += templen;
690 lsm->file_num = static_cast<uint32>(fileno);
691 }
692 break;
693 case DW_LNS_set_column: {
694 const uint64 colno = reader->ReadUnsignedLEB128(start, &templen);
695 oplen += templen;
696 lsm->column_num = static_cast<uint32>(colno);
697 }
698 break;
699 case DW_LNS_negate_stmt: {
700 lsm->is_stmt = !lsm->is_stmt;
701 }
702 break;
703 case DW_LNS_set_basic_block: {
704 lsm->basic_block = true;
705 }
706 break;
707 case DW_LNS_fixed_advance_pc: {
708 const uint16 advance_address = reader->ReadTwoBytes(start);
709 oplen += 2;
710
711 // Check if the lsm passes "pc". If so, mark it as passed.
712 if (lsm_passes_pc &&
713 lsm->address <= pc && pc < lsm->address + advance_address) {
714 *lsm_passes_pc = true;
715 }
716
717 lsm->address += advance_address;
718 }
719 break;
720 case DW_LNS_const_add_pc: {
721 const int64 advance_address = header.min_insn_length
722 * ((255 - header.opcode_base)
723 / header.line_range);
724
725 // Check if the lsm passes "pc". If so, mark it as passed.
726 if (lsm_passes_pc &&
727 lsm->address <= pc && pc < lsm->address + advance_address) {
728 *lsm_passes_pc = true;
729 }
730
731 lsm->address += advance_address;
732 }
733 break;
734 case DW_LNS_extended_op: {
735 const uint64 extended_op_len = reader->ReadUnsignedLEB128(start,
736 &templen);
737 start += templen;
738 oplen += templen + extended_op_len;
739
740 const uint64 extended_op = reader->ReadOneByte(start);
741 start++;
742
743 switch (extended_op) {
744 case DW_LNE_end_sequence: {
745 lsm->end_sequence = true;
746 *len = oplen;
747 return true;
748 }
749 break;
750 case DW_LNE_set_address: {
751 // With gcc 4.2.1, we cannot tell the line_no here since
752 // DW_LNE_set_address is called before DW_LNS_advance_line is
753 // called. So we do not check if the lsm passes "pc" here. See
754 // also the comment in DW_LNS_advance_line.
755 uint64 address = reader->ReadAddress(start);
756 lsm->address = address;
757 }
758 break;
759 case DW_LNE_define_file: {
760 const char* filename = start;
761
762 templen = strlen(filename) + 1;
763 start += templen;
764
765 uint64 dirindex = reader->ReadUnsignedLEB128(start, &templen);
766 oplen += templen;
767
768 const uint64 mod_time = reader->ReadUnsignedLEB128(start,
769 &templen);
770 oplen += templen;
771
772 const uint64 filelength = reader->ReadUnsignedLEB128(start,
773 &templen);
774 oplen += templen;
775
776 if (handler) {
777 handler->DefineFile(filename, -1, static_cast<uint32>(dirindex),
778 mod_time, filelength);
779 }
780 }
781 break;
782 }
783 }
784 break;
785
786 default: {
787 // Ignore unknown opcode silently
788 if (header.std_opcode_lengths) {
789 for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
790 reader->ReadUnsignedLEB128(start, &templen);
791 start += templen;
792 oplen += templen;
793 }
794 }
795 }
796 break;
797 }
798 *len = oplen;
799 return false;
800 }
801
ReadLines()802 void LineInfo::ReadLines() {
803 struct LineStateMachine lsm;
804
805 // lengthstart is the place the length field is based on.
806 // It is the point in the header after the initial length field
807 const char* lengthstart = buffer_;
808
809 // In 64 bit dwarf, the initial length is 12 bytes, because of the
810 // 0xffffffff at the start.
811 if (reader_->OffsetSize() == 8)
812 lengthstart += 12;
813 else
814 lengthstart += 4;
815
816 const char* lineptr = after_header_;
817 lsm.Reset(header_.default_is_stmt);
818
819 // The LineInfoHandler interface expects each line's length along
820 // with its address, but DWARF only provides addresses (sans
821 // length), and an end-of-sequence address; one infers the length
822 // from the next address. So we report a line only when we get the
823 // next line's address, or the end-of-sequence address.
824 bool have_pending_line = false;
825 uint64 pending_address = 0;
826 uint32 pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
827
828 while (lineptr < lengthstart + header_.total_length) {
829 size_t oplength;
830 bool add_row = ProcessOneOpcode(reader_, handler_, header_,
831 lineptr, &lsm, &oplength, (uintptr)-1,
832 NULL);
833 if (add_row) {
834 if (have_pending_line)
835 handler_->AddLine(pending_address, lsm.address - pending_address,
836 pending_file_num, pending_line_num,
837 pending_column_num);
838 if (lsm.end_sequence) {
839 lsm.Reset(header_.default_is_stmt);
840 have_pending_line = false;
841 } else {
842 pending_address = lsm.address;
843 pending_file_num = lsm.file_num;
844 pending_line_num = lsm.line_num;
845 pending_column_num = lsm.column_num;
846 have_pending_line = true;
847 }
848 }
849 lineptr += oplength;
850 }
851
852 after_header_ = lengthstart + header_.total_length;
853 }
854
855 // A DWARF rule for recovering the address or value of a register, or
856 // computing the canonical frame address. There is one subclass of this for
857 // each '*Rule' member function in CallFrameInfo::Handler.
858 //
859 // It's annoying that we have to handle Rules using pointers (because
860 // the concrete instances can have an arbitrary size). They're small,
861 // so it would be much nicer if we could just handle them by value
862 // instead of fretting about ownership and destruction.
863 //
864 // It seems like all these could simply be instances of std::tr1::bind,
865 // except that we need instances to be EqualityComparable, too.
866 //
867 // This could logically be nested within State, but then the qualified names
868 // get horrendous.
869 class CallFrameInfo::Rule {
870 public:
~Rule()871 virtual ~Rule() { }
872
873 // Tell HANDLER that, at ADDRESS in the program, REGISTER can be
874 // recovered using this rule. If REGISTER is kCFARegister, then this rule
875 // describes how to compute the canonical frame address. Return what the
876 // HANDLER member function returned.
877 virtual bool Handle(Handler *handler,
878 uint64 address, int register) const = 0;
879
880 // Equality on rules. We use these to decide which rules we need
881 // to report after a DW_CFA_restore_state instruction.
882 virtual bool operator==(const Rule &rhs) const = 0;
883
operator !=(const Rule & rhs) const884 bool operator!=(const Rule &rhs) const { return ! (*this == rhs); }
885
886 // Return a pointer to a copy of this rule.
887 virtual Rule *Copy() const = 0;
888
889 // If this is a base+offset rule, change its base register to REG.
890 // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
SetBaseRegister(unsigned reg)891 virtual void SetBaseRegister(unsigned reg) { }
892
893 // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
894 // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
SetOffset(long long offset)895 virtual void SetOffset(long long offset) { }
896 };
897
898 // Rule: the value the register had in the caller cannot be recovered.
899 class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
900 public:
UndefinedRule()901 UndefinedRule() { }
~UndefinedRule()902 ~UndefinedRule() { }
Handle(Handler * handler,uint64 address,int reg) const903 bool Handle(Handler *handler, uint64 address, int reg) const {
904 return handler->UndefinedRule(address, reg);
905 }
operator ==(const Rule & rhs) const906 bool operator==(const Rule &rhs) const {
907 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
908 // been carefully considered; cheap RTTI-like workarounds are forbidden.
909 const UndefinedRule *our_rhs = dynamic_cast<const UndefinedRule *>(&rhs);
910 return (our_rhs != NULL);
911 }
Copy() const912 Rule *Copy() const { return new UndefinedRule(*this); }
913 };
914
915 // Rule: the register's value is the same as that it had in the caller.
916 class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
917 public:
SameValueRule()918 SameValueRule() { }
~SameValueRule()919 ~SameValueRule() { }
Handle(Handler * handler,uint64 address,int reg) const920 bool Handle(Handler *handler, uint64 address, int reg) const {
921 return handler->SameValueRule(address, reg);
922 }
operator ==(const Rule & rhs) const923 bool operator==(const Rule &rhs) const {
924 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
925 // been carefully considered; cheap RTTI-like workarounds are forbidden.
926 const SameValueRule *our_rhs = dynamic_cast<const SameValueRule *>(&rhs);
927 return (our_rhs != NULL);
928 }
Copy() const929 Rule *Copy() const { return new SameValueRule(*this); }
930 };
931
932 // Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
933 // may be CallFrameInfo::Handler::kCFARegister.
934 class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
935 public:
OffsetRule(int base_register,long offset)936 OffsetRule(int base_register, long offset)
937 : base_register_(base_register), offset_(offset) { }
~OffsetRule()938 ~OffsetRule() { }
Handle(Handler * handler,uint64 address,int reg) const939 bool Handle(Handler *handler, uint64 address, int reg) const {
940 return handler->OffsetRule(address, reg, base_register_, offset_);
941 }
operator ==(const Rule & rhs) const942 bool operator==(const Rule &rhs) const {
943 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
944 // been carefully considered; cheap RTTI-like workarounds are forbidden.
945 const OffsetRule *our_rhs = dynamic_cast<const OffsetRule *>(&rhs);
946 return (our_rhs &&
947 base_register_ == our_rhs->base_register_ &&
948 offset_ == our_rhs->offset_);
949 }
Copy() const950 Rule *Copy() const { return new OffsetRule(*this); }
951 // We don't actually need SetBaseRegister or SetOffset here, since they
952 // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
953 // doesn't make sense to use OffsetRule for computing the CFA: it
954 // computes the address at which a register is saved, not a value.
955 private:
956 int base_register_;
957 long offset_;
958 };
959
960 // Rule: the value the register had in the caller is the value of
961 // BASE_REGISTER plus offset. BASE_REGISTER may be
962 // CallFrameInfo::Handler::kCFARegister.
963 class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
964 public:
ValOffsetRule(int base_register,long offset)965 ValOffsetRule(int base_register, long offset)
966 : base_register_(base_register), offset_(offset) { }
~ValOffsetRule()967 ~ValOffsetRule() { }
Handle(Handler * handler,uint64 address,int reg) const968 bool Handle(Handler *handler, uint64 address, int reg) const {
969 return handler->ValOffsetRule(address, reg, base_register_, offset_);
970 }
operator ==(const Rule & rhs) const971 bool operator==(const Rule &rhs) const {
972 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
973 // been carefully considered; cheap RTTI-like workarounds are forbidden.
974 const ValOffsetRule *our_rhs = dynamic_cast<const ValOffsetRule *>(&rhs);
975 return (our_rhs &&
976 base_register_ == our_rhs->base_register_ &&
977 offset_ == our_rhs->offset_);
978 }
Copy() const979 Rule *Copy() const { return new ValOffsetRule(*this); }
SetBaseRegister(unsigned reg)980 void SetBaseRegister(unsigned reg) { base_register_ = reg; }
SetOffset(long long offset)981 void SetOffset(long long offset) { offset_ = offset; }
982 private:
983 int base_register_;
984 long offset_;
985 };
986
987 // Rule: the register has been saved in another register REGISTER_NUMBER_.
988 class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
989 public:
RegisterRule(int register_number)990 explicit RegisterRule(int register_number)
991 : register_number_(register_number) { }
~RegisterRule()992 ~RegisterRule() { }
Handle(Handler * handler,uint64 address,int reg) const993 bool Handle(Handler *handler, uint64 address, int reg) const {
994 return handler->RegisterRule(address, reg, register_number_);
995 }
operator ==(const Rule & rhs) const996 bool operator==(const Rule &rhs) const {
997 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
998 // been carefully considered; cheap RTTI-like workarounds are forbidden.
999 const RegisterRule *our_rhs = dynamic_cast<const RegisterRule *>(&rhs);
1000 return (our_rhs && register_number_ == our_rhs->register_number_);
1001 }
Copy() const1002 Rule *Copy() const { return new RegisterRule(*this); }
1003 private:
1004 int register_number_;
1005 };
1006
1007 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1008 class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
1009 public:
ExpressionRule(const string & expression)1010 explicit ExpressionRule(const string &expression)
1011 : expression_(expression) { }
~ExpressionRule()1012 ~ExpressionRule() { }
Handle(Handler * handler,uint64 address,int reg) const1013 bool Handle(Handler *handler, uint64 address, int reg) const {
1014 return handler->ExpressionRule(address, reg, expression_);
1015 }
operator ==(const Rule & rhs) const1016 bool operator==(const Rule &rhs) const {
1017 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1018 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1019 const ExpressionRule *our_rhs = dynamic_cast<const ExpressionRule *>(&rhs);
1020 return (our_rhs && expression_ == our_rhs->expression_);
1021 }
Copy() const1022 Rule *Copy() const { return new ExpressionRule(*this); }
1023 private:
1024 string expression_;
1025 };
1026
1027 // Rule: EXPRESSION evaluates to the address at which the register is saved.
1028 class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
1029 public:
ValExpressionRule(const string & expression)1030 explicit ValExpressionRule(const string &expression)
1031 : expression_(expression) { }
~ValExpressionRule()1032 ~ValExpressionRule() { }
Handle(Handler * handler,uint64 address,int reg) const1033 bool Handle(Handler *handler, uint64 address, int reg) const {
1034 return handler->ValExpressionRule(address, reg, expression_);
1035 }
operator ==(const Rule & rhs) const1036 bool operator==(const Rule &rhs) const {
1037 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1038 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1039 const ValExpressionRule *our_rhs =
1040 dynamic_cast<const ValExpressionRule *>(&rhs);
1041 return (our_rhs && expression_ == our_rhs->expression_);
1042 }
Copy() const1043 Rule *Copy() const { return new ValExpressionRule(*this); }
1044 private:
1045 string expression_;
1046 };
1047
1048 // A map from register numbers to rules.
1049 class CallFrameInfo::RuleMap {
1050 public:
RuleMap()1051 RuleMap() : cfa_rule_(NULL) { }
RuleMap(const RuleMap & rhs)1052 RuleMap(const RuleMap &rhs) : cfa_rule_(NULL) { *this = rhs; }
~RuleMap()1053 ~RuleMap() { Clear(); }
1054
1055 RuleMap &operator=(const RuleMap &rhs);
1056
1057 // Set the rule for computing the CFA to RULE. Take ownership of RULE.
SetCFARule(Rule * rule)1058 void SetCFARule(Rule *rule) { delete cfa_rule_; cfa_rule_ = rule; }
1059
1060 // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
1061 // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
1062 // DW_CFA_def_cfa_register, and for detecting references to the CFA before
1063 // a rule for it has been established.
CFARule() const1064 Rule *CFARule() const { return cfa_rule_; }
1065
1066 // Return the rule for REG, or NULL if there is none. The caller takes
1067 // ownership of the result.
1068 Rule *RegisterRule(int reg) const;
1069
1070 // Set the rule for computing REG to RULE. Take ownership of RULE.
1071 void SetRegisterRule(int reg, Rule *rule);
1072
1073 // Make all the appropriate calls to HANDLER as if we were changing from
1074 // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
1075 // DW_CFA_restore_state, where lots of rules can change simultaneously.
1076 // Return true if all handlers returned true; otherwise, return false.
1077 bool HandleTransitionTo(Handler *handler, uint64 address,
1078 const RuleMap &new_rules) const;
1079
1080 private:
1081 // A map from register numbers to Rules.
1082 typedef std::map<int, Rule *> RuleByNumber;
1083
1084 // Remove all register rules and clear cfa_rule_.
1085 void Clear();
1086
1087 // The rule for computing the canonical frame address. This RuleMap owns
1088 // this rule.
1089 Rule *cfa_rule_;
1090
1091 // A map from register numbers to postfix expressions to recover
1092 // their values. This RuleMap owns the Rules the map refers to.
1093 RuleByNumber registers_;
1094 };
1095
operator =(const RuleMap & rhs)1096 CallFrameInfo::RuleMap &CallFrameInfo::RuleMap::operator=(const RuleMap &rhs) {
1097 Clear();
1098 // Since each map owns the rules it refers to, assignment must copy them.
1099 if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
1100 for (RuleByNumber::const_iterator it = rhs.registers_.begin();
1101 it != rhs.registers_.end(); it++)
1102 registers_[it->first] = it->second->Copy();
1103 return *this;
1104 }
1105
RegisterRule(int reg) const1106 CallFrameInfo::Rule *CallFrameInfo::RuleMap::RegisterRule(int reg) const {
1107 assert(reg != Handler::kCFARegister);
1108 RuleByNumber::const_iterator it = registers_.find(reg);
1109 if (it != registers_.end())
1110 return it->second->Copy();
1111 else
1112 return NULL;
1113 }
1114
SetRegisterRule(int reg,Rule * rule)1115 void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule *rule) {
1116 assert(reg != Handler::kCFARegister);
1117 assert(rule);
1118 Rule **slot = ®isters_[reg];
1119 delete *slot;
1120 *slot = rule;
1121 }
1122
HandleTransitionTo(Handler * handler,uint64 address,const RuleMap & new_rules) const1123 bool CallFrameInfo::RuleMap::HandleTransitionTo(
1124 Handler *handler,
1125 uint64 address,
1126 const RuleMap &new_rules) const {
1127 // Transition from cfa_rule_ to new_rules.cfa_rule_.
1128 if (cfa_rule_ && new_rules.cfa_rule_) {
1129 if (*cfa_rule_ != *new_rules.cfa_rule_ &&
1130 !new_rules.cfa_rule_->Handle(handler, address,
1131 Handler::kCFARegister))
1132 return false;
1133 } else if (cfa_rule_) {
1134 // this RuleMap has a CFA rule but new_rules doesn't.
1135 // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
1136 // it's garbage input. The instruction interpreter should have
1137 // detected this and warned, so take no action here.
1138 } else if (new_rules.cfa_rule_) {
1139 // This shouldn't be possible: NEW_RULES is some prior state, and
1140 // there's no way to remove entries.
1141 assert(0);
1142 } else {
1143 // Both CFA rules are empty. No action needed.
1144 }
1145
1146 // Traverse the two maps in order by register number, and report
1147 // whatever differences we find.
1148 RuleByNumber::const_iterator old_it = registers_.begin();
1149 RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
1150 while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
1151 if (old_it->first < new_it->first) {
1152 // This RuleMap has an entry for old_it->first, but NEW_RULES
1153 // doesn't.
1154 //
1155 // This isn't really the right thing to do, but since CFI generally
1156 // only mentions callee-saves registers, and GCC's convention for
1157 // callee-saves registers is that they are unchanged, it's a good
1158 // approximation.
1159 if (!handler->SameValueRule(address, old_it->first))
1160 return false;
1161 old_it++;
1162 } else if (old_it->first > new_it->first) {
1163 // NEW_RULES has entry for new_it->first, but this RuleMap
1164 // doesn't. This shouldn't be possible: NEW_RULES is some prior
1165 // state, and there's no way to remove entries.
1166 assert(0);
1167 } else {
1168 // Both maps have an entry for this register. Report the new
1169 // rule if it is different.
1170 if (*old_it->second != *new_it->second &&
1171 !new_it->second->Handle(handler, address, new_it->first))
1172 return false;
1173 new_it++, old_it++;
1174 }
1175 }
1176 // Finish off entries from this RuleMap with no counterparts in new_rules.
1177 while (old_it != registers_.end()) {
1178 if (!handler->SameValueRule(address, old_it->first))
1179 return false;
1180 old_it++;
1181 }
1182 // Since we only make transitions from a rule set to some previously
1183 // saved rule set, and we can only add rules to the map, NEW_RULES
1184 // must have fewer rules than *this.
1185 assert(new_it == new_rules.registers_.end());
1186
1187 return true;
1188 }
1189
1190 // Remove all register rules and clear cfa_rule_.
Clear()1191 void CallFrameInfo::RuleMap::Clear() {
1192 delete cfa_rule_;
1193 cfa_rule_ = NULL;
1194 for (RuleByNumber::iterator it = registers_.begin();
1195 it != registers_.end(); it++)
1196 delete it->second;
1197 registers_.clear();
1198 }
1199
1200 // The state of the call frame information interpreter as it processes
1201 // instructions from a CIE and FDE.
1202 class CallFrameInfo::State {
1203 public:
1204 // Create a call frame information interpreter state with the given
1205 // reporter, reader, handler, and initial call frame info address.
State(ByteReader * reader,Handler * handler,Reporter * reporter,uint64 address)1206 State(ByteReader *reader, Handler *handler, Reporter *reporter,
1207 uint64 address)
1208 : reader_(reader), handler_(handler), reporter_(reporter),
1209 address_(address), entry_(NULL), cursor_(NULL) { }
1210
1211 // Interpret instructions from CIE, save the resulting rule set for
1212 // DW_CFA_restore instructions, and return true. On error, report
1213 // the problem to reporter_ and return false.
1214 bool InterpretCIE(const CIE &cie);
1215
1216 // Interpret instructions from FDE, and return true. On error,
1217 // report the problem to reporter_ and return false.
1218 bool InterpretFDE(const FDE &fde);
1219
1220 private:
1221 // The operands of a CFI instruction, for ParseOperands.
1222 struct Operands {
1223 unsigned register_number; // A register number.
1224 uint64 offset; // An offset or address.
1225 long signed_offset; // A signed offset.
1226 string expression; // A DWARF expression.
1227 };
1228
1229 // Parse CFI instruction operands from STATE's instruction stream as
1230 // described by FORMAT. On success, populate OPERANDS with the
1231 // results, and return true. On failure, report the problem and
1232 // return false.
1233 //
1234 // Each character of FORMAT should be one of the following:
1235 //
1236 // 'r' unsigned LEB128 register number (OPERANDS->register_number)
1237 // 'o' unsigned LEB128 offset (OPERANDS->offset)
1238 // 's' signed LEB128 offset (OPERANDS->signed_offset)
1239 // 'a' machine-size address (OPERANDS->offset)
1240 // (If the CIE has a 'z' augmentation string, 'a' uses the
1241 // encoding specified by the 'R' argument.)
1242 // '1' a one-byte offset (OPERANDS->offset)
1243 // '2' a two-byte offset (OPERANDS->offset)
1244 // '4' a four-byte offset (OPERANDS->offset)
1245 // '8' an eight-byte offset (OPERANDS->offset)
1246 // 'e' a DW_FORM_block holding a (OPERANDS->expression)
1247 // DWARF expression
1248 bool ParseOperands(const char *format, Operands *operands);
1249
1250 // Interpret one CFI instruction from STATE's instruction stream, update
1251 // STATE, report any rule changes to handler_, and return true. On
1252 // failure, report the problem and return false.
1253 bool DoInstruction();
1254
1255 // The following Do* member functions are subroutines of DoInstruction,
1256 // factoring out the actual work of operations that have several
1257 // different encodings.
1258
1259 // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
1260 // return true. On failure, report and return false. (Used for
1261 // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
1262 bool DoDefCFA(unsigned base_register, long offset);
1263
1264 // Change the offset of the CFA rule to OFFSET, and return true. On
1265 // failure, report and return false. (Subroutine for
1266 // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
1267 bool DoDefCFAOffset(long offset);
1268
1269 // Specify that REG can be recovered using RULE, and return true. On
1270 // failure, report and return false.
1271 bool DoRule(unsigned reg, Rule *rule);
1272
1273 // Specify that REG can be found at OFFSET from the CFA, and return true.
1274 // On failure, report and return false. (Subroutine for DW_CFA_offset,
1275 // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
1276 bool DoOffset(unsigned reg, long offset);
1277
1278 // Specify that the caller's value for REG is the CFA plus OFFSET,
1279 // and return true. On failure, report and return false. (Subroutine
1280 // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
1281 bool DoValOffset(unsigned reg, long offset);
1282
1283 // Restore REG to the rule established in the CIE, and return true. On
1284 // failure, report and return false. (Subroutine for DW_CFA_restore and
1285 // DW_CFA_restore_extended.)
1286 bool DoRestore(unsigned reg);
1287
1288 // Return the section offset of the instruction at cursor. For use
1289 // in error messages.
CursorOffset()1290 uint64 CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
1291
1292 // Report that entry_ is incomplete, and return false. For brevity.
ReportIncomplete()1293 bool ReportIncomplete() {
1294 reporter_->Incomplete(entry_->offset, entry_->kind);
1295 return false;
1296 }
1297
1298 // For reading multi-byte values with the appropriate endianness.
1299 ByteReader *reader_;
1300
1301 // The handler to which we should report the data we find.
1302 Handler *handler_;
1303
1304 // For reporting problems in the info we're parsing.
1305 Reporter *reporter_;
1306
1307 // The code address to which the next instruction in the stream applies.
1308 uint64 address_;
1309
1310 // The entry whose instructions we are currently processing. This is
1311 // first a CIE, and then an FDE.
1312 const Entry *entry_;
1313
1314 // The next instruction to process.
1315 const char *cursor_;
1316
1317 // The current set of rules.
1318 RuleMap rules_;
1319
1320 // The set of rules established by the CIE, used by DW_CFA_restore
1321 // and DW_CFA_restore_extended. We set this after interpreting the
1322 // CIE's instructions.
1323 RuleMap cie_rules_;
1324
1325 // A stack of saved states, for DW_CFA_remember_state and
1326 // DW_CFA_restore_state.
1327 std::stack<RuleMap> saved_rules_;
1328 };
1329
InterpretCIE(const CIE & cie)1330 bool CallFrameInfo::State::InterpretCIE(const CIE &cie) {
1331 entry_ = &cie;
1332 cursor_ = entry_->instructions;
1333 while (cursor_ < entry_->end)
1334 if (!DoInstruction())
1335 return false;
1336 // Note the rules established by the CIE, for use by DW_CFA_restore
1337 // and DW_CFA_restore_extended.
1338 cie_rules_ = rules_;
1339 return true;
1340 }
1341
InterpretFDE(const FDE & fde)1342 bool CallFrameInfo::State::InterpretFDE(const FDE &fde) {
1343 entry_ = &fde;
1344 cursor_ = entry_->instructions;
1345 while (cursor_ < entry_->end)
1346 if (!DoInstruction())
1347 return false;
1348 return true;
1349 }
1350
ParseOperands(const char * format,Operands * operands)1351 bool CallFrameInfo::State::ParseOperands(const char *format,
1352 Operands *operands) {
1353 size_t len;
1354 const char *operand;
1355
1356 for (operand = format; *operand; operand++) {
1357 size_t bytes_left = entry_->end - cursor_;
1358 switch (*operand) {
1359 case 'r':
1360 operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
1361 if (len > bytes_left) return ReportIncomplete();
1362 cursor_ += len;
1363 break;
1364
1365 case 'o':
1366 operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
1367 if (len > bytes_left) return ReportIncomplete();
1368 cursor_ += len;
1369 break;
1370
1371 case 's':
1372 operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
1373 if (len > bytes_left) return ReportIncomplete();
1374 cursor_ += len;
1375 break;
1376
1377 case 'a':
1378 operands->offset =
1379 reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
1380 &len);
1381 if (len > bytes_left) return ReportIncomplete();
1382 cursor_ += len;
1383 break;
1384
1385 case '1':
1386 if (1 > bytes_left) return ReportIncomplete();
1387 operands->offset = static_cast<unsigned char>(*cursor_++);
1388 break;
1389
1390 case '2':
1391 if (2 > bytes_left) return ReportIncomplete();
1392 operands->offset = reader_->ReadTwoBytes(cursor_);
1393 cursor_ += 2;
1394 break;
1395
1396 case '4':
1397 if (4 > bytes_left) return ReportIncomplete();
1398 operands->offset = reader_->ReadFourBytes(cursor_);
1399 cursor_ += 4;
1400 break;
1401
1402 case '8':
1403 if (8 > bytes_left) return ReportIncomplete();
1404 operands->offset = reader_->ReadEightBytes(cursor_);
1405 cursor_ += 8;
1406 break;
1407
1408 case 'e': {
1409 size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
1410 if (len > bytes_left || expression_length > bytes_left - len)
1411 return ReportIncomplete();
1412 cursor_ += len;
1413 operands->expression = string(cursor_, expression_length);
1414 cursor_ += expression_length;
1415 break;
1416 }
1417
1418 default:
1419 assert(0);
1420 }
1421 }
1422
1423 return true;
1424 }
1425
DoInstruction()1426 bool CallFrameInfo::State::DoInstruction() {
1427 CIE *cie = entry_->cie;
1428 Operands ops;
1429
1430 // Our entry's kind should have been set by now.
1431 assert(entry_->kind != kUnknown);
1432
1433 // We shouldn't have been invoked unless there were more
1434 // instructions to parse.
1435 assert(cursor_ < entry_->end);
1436
1437 unsigned opcode = *cursor_++;
1438 if ((opcode & 0xc0) != 0) {
1439 switch (opcode & 0xc0) {
1440 // Advance the address.
1441 case DW_CFA_advance_loc: {
1442 size_t code_offset = opcode & 0x3f;
1443 address_ += code_offset * cie->code_alignment_factor;
1444 break;
1445 }
1446
1447 // Find a register at an offset from the CFA.
1448 case DW_CFA_offset:
1449 if (!ParseOperands("o", &ops) ||
1450 !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
1451 return false;
1452 break;
1453
1454 // Restore the rule established for a register by the CIE.
1455 case DW_CFA_restore:
1456 if (!DoRestore(opcode & 0x3f)) return false;
1457 break;
1458
1459 // The 'if' above should have excluded this possibility.
1460 default:
1461 assert(0);
1462 }
1463
1464 // Return here, so the big switch below won't be indented.
1465 return true;
1466 }
1467
1468 switch (opcode) {
1469 // Set the address.
1470 case DW_CFA_set_loc:
1471 if (!ParseOperands("a", &ops)) return false;
1472 address_ = ops.offset;
1473 break;
1474
1475 // Advance the address.
1476 case DW_CFA_advance_loc1:
1477 if (!ParseOperands("1", &ops)) return false;
1478 address_ += ops.offset * cie->code_alignment_factor;
1479 break;
1480
1481 // Advance the address.
1482 case DW_CFA_advance_loc2:
1483 if (!ParseOperands("2", &ops)) return false;
1484 address_ += ops.offset * cie->code_alignment_factor;
1485 break;
1486
1487 // Advance the address.
1488 case DW_CFA_advance_loc4:
1489 if (!ParseOperands("4", &ops)) return false;
1490 address_ += ops.offset * cie->code_alignment_factor;
1491 break;
1492
1493 // Advance the address.
1494 case DW_CFA_MIPS_advance_loc8:
1495 if (!ParseOperands("8", &ops)) return false;
1496 address_ += ops.offset * cie->code_alignment_factor;
1497 break;
1498
1499 // Compute the CFA by adding an offset to a register.
1500 case DW_CFA_def_cfa:
1501 if (!ParseOperands("ro", &ops) ||
1502 !DoDefCFA(ops.register_number, ops.offset))
1503 return false;
1504 break;
1505
1506 // Compute the CFA by adding an offset to a register.
1507 case DW_CFA_def_cfa_sf:
1508 if (!ParseOperands("rs", &ops) ||
1509 !DoDefCFA(ops.register_number,
1510 ops.signed_offset * cie->data_alignment_factor))
1511 return false;
1512 break;
1513
1514 // Change the base register used to compute the CFA.
1515 case DW_CFA_def_cfa_register: {
1516 if (!ParseOperands("r", &ops)) return false;
1517 Rule *cfa_rule = rules_.CFARule();
1518 if (!cfa_rule) {
1519 if (!DoDefCFA(ops.register_number, ops.offset)) {
1520 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1521 return false;
1522 }
1523 } else {
1524 cfa_rule->SetBaseRegister(ops.register_number);
1525 if (!cfa_rule->Handle(handler_, address_,
1526 Handler::kCFARegister))
1527 return false;
1528 }
1529 break;
1530 }
1531
1532 // Change the offset used to compute the CFA.
1533 case DW_CFA_def_cfa_offset:
1534 if (!ParseOperands("o", &ops) ||
1535 !DoDefCFAOffset(ops.offset))
1536 return false;
1537 break;
1538
1539 // Change the offset used to compute the CFA.
1540 case DW_CFA_def_cfa_offset_sf:
1541 if (!ParseOperands("s", &ops) ||
1542 !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
1543 return false;
1544 break;
1545
1546 // Specify an expression whose value is the CFA.
1547 case DW_CFA_def_cfa_expression: {
1548 if (!ParseOperands("e", &ops))
1549 return false;
1550 Rule *rule = new ValExpressionRule(ops.expression);
1551 rules_.SetCFARule(rule);
1552 if (!rule->Handle(handler_, address_,
1553 Handler::kCFARegister))
1554 return false;
1555 break;
1556 }
1557
1558 // The register's value cannot be recovered.
1559 case DW_CFA_undefined: {
1560 if (!ParseOperands("r", &ops) ||
1561 !DoRule(ops.register_number, new UndefinedRule()))
1562 return false;
1563 break;
1564 }
1565
1566 // The register's value is unchanged from its value in the caller.
1567 case DW_CFA_same_value: {
1568 if (!ParseOperands("r", &ops) ||
1569 !DoRule(ops.register_number, new SameValueRule()))
1570 return false;
1571 break;
1572 }
1573
1574 // Find a register at an offset from the CFA.
1575 case DW_CFA_offset_extended:
1576 if (!ParseOperands("ro", &ops) ||
1577 !DoOffset(ops.register_number,
1578 ops.offset * cie->data_alignment_factor))
1579 return false;
1580 break;
1581
1582 // The register is saved at an offset from the CFA.
1583 case DW_CFA_offset_extended_sf:
1584 if (!ParseOperands("rs", &ops) ||
1585 !DoOffset(ops.register_number,
1586 ops.signed_offset * cie->data_alignment_factor))
1587 return false;
1588 break;
1589
1590 // The register is saved at an offset from the CFA.
1591 case DW_CFA_GNU_negative_offset_extended:
1592 if (!ParseOperands("ro", &ops) ||
1593 !DoOffset(ops.register_number,
1594 -ops.offset * cie->data_alignment_factor))
1595 return false;
1596 break;
1597
1598 // The register's value is the sum of the CFA plus an offset.
1599 case DW_CFA_val_offset:
1600 if (!ParseOperands("ro", &ops) ||
1601 !DoValOffset(ops.register_number,
1602 ops.offset * cie->data_alignment_factor))
1603 return false;
1604 break;
1605
1606 // The register's value is the sum of the CFA plus an offset.
1607 case DW_CFA_val_offset_sf:
1608 if (!ParseOperands("rs", &ops) ||
1609 !DoValOffset(ops.register_number,
1610 ops.signed_offset * cie->data_alignment_factor))
1611 return false;
1612 break;
1613
1614 // The register has been saved in another register.
1615 case DW_CFA_register: {
1616 if (!ParseOperands("ro", &ops) ||
1617 !DoRule(ops.register_number, new RegisterRule(ops.offset)))
1618 return false;
1619 break;
1620 }
1621
1622 // An expression yields the address at which the register is saved.
1623 case DW_CFA_expression: {
1624 if (!ParseOperands("re", &ops) ||
1625 !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
1626 return false;
1627 break;
1628 }
1629
1630 // An expression yields the caller's value for the register.
1631 case DW_CFA_val_expression: {
1632 if (!ParseOperands("re", &ops) ||
1633 !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
1634 return false;
1635 break;
1636 }
1637
1638 // Restore the rule established for a register by the CIE.
1639 case DW_CFA_restore_extended:
1640 if (!ParseOperands("r", &ops) ||
1641 !DoRestore( ops.register_number))
1642 return false;
1643 break;
1644
1645 // Save the current set of rules on a stack.
1646 case DW_CFA_remember_state:
1647 saved_rules_.push(rules_);
1648 break;
1649
1650 // Pop the current set of rules off the stack.
1651 case DW_CFA_restore_state: {
1652 if (saved_rules_.empty()) {
1653 reporter_->EmptyStateStack(entry_->offset, entry_->kind,
1654 CursorOffset());
1655 return false;
1656 }
1657 const RuleMap &new_rules = saved_rules_.top();
1658 if (rules_.CFARule() && !new_rules.CFARule()) {
1659 reporter_->ClearingCFARule(entry_->offset, entry_->kind,
1660 CursorOffset());
1661 return false;
1662 }
1663 rules_.HandleTransitionTo(handler_, address_, new_rules);
1664 rules_ = new_rules;
1665 saved_rules_.pop();
1666 break;
1667 }
1668
1669 // No operation. (Padding instruction.)
1670 case DW_CFA_nop:
1671 break;
1672
1673 // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
1674 // are saved in registers 24 through 31 (%i0-%i7), and registers
1675 // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
1676 // (0-15 * the register size). The register numbers must be
1677 // hard-coded. A GNU extension, and not a pretty one.
1678 case DW_CFA_GNU_window_save: {
1679 // Save %o0-%o7 in %i0-%i7.
1680 for (int i = 8; i < 16; i++)
1681 if (!DoRule(i, new RegisterRule(i + 16)))
1682 return false;
1683 // Save %l0-%l7 and %i0-%i7 at the CFA.
1684 for (int i = 16; i < 32; i++)
1685 // Assume that the byte reader's address size is the same as
1686 // the architecture's register size. !@#%*^ hilarious.
1687 if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
1688 (i - 16) * reader_->AddressSize())))
1689 return false;
1690 break;
1691 }
1692
1693 // I'm not sure what this is. GDB doesn't use it for unwinding.
1694 case DW_CFA_GNU_args_size:
1695 if (!ParseOperands("o", &ops)) return false;
1696 break;
1697
1698 // An opcode we don't recognize.
1699 default: {
1700 reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
1701 return false;
1702 }
1703 }
1704
1705 return true;
1706 }
1707
DoDefCFA(unsigned base_register,long offset)1708 bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
1709 Rule *rule = new ValOffsetRule(base_register, offset);
1710 rules_.SetCFARule(rule);
1711 return rule->Handle(handler_, address_,
1712 Handler::kCFARegister);
1713 }
1714
DoDefCFAOffset(long offset)1715 bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
1716 Rule *cfa_rule = rules_.CFARule();
1717 if (!cfa_rule) {
1718 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1719 return false;
1720 }
1721 cfa_rule->SetOffset(offset);
1722 return cfa_rule->Handle(handler_, address_,
1723 Handler::kCFARegister);
1724 }
1725
DoRule(unsigned reg,Rule * rule)1726 bool CallFrameInfo::State::DoRule(unsigned reg, Rule *rule) {
1727 rules_.SetRegisterRule(reg, rule);
1728 return rule->Handle(handler_, address_, reg);
1729 }
1730
DoOffset(unsigned reg,long offset)1731 bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
1732 if (!rules_.CFARule()) {
1733 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1734 return false;
1735 }
1736 return DoRule(reg,
1737 new OffsetRule(Handler::kCFARegister, offset));
1738 }
1739
DoValOffset(unsigned reg,long offset)1740 bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
1741 if (!rules_.CFARule()) {
1742 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
1743 return false;
1744 }
1745 return DoRule(reg,
1746 new ValOffsetRule(Handler::kCFARegister, offset));
1747 }
1748
DoRestore(unsigned reg)1749 bool CallFrameInfo::State::DoRestore(unsigned reg) {
1750 // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
1751 if (entry_->kind == kCIE) {
1752 reporter_->RestoreInCIE(entry_->offset, CursorOffset());
1753 return false;
1754 }
1755 Rule *rule = cie_rules_.RegisterRule(reg);
1756 if (!rule) {
1757 // This isn't really the right thing to do, but since CFI generally
1758 // only mentions callee-saves registers, and GCC's convention for
1759 // callee-saves registers is that they are unchanged, it's a good
1760 // approximation.
1761 rule = new SameValueRule();
1762 }
1763 return DoRule(reg, rule);
1764 }
1765
ReadEntryPrologue(const char * cursor,Entry * entry)1766 bool CallFrameInfo::ReadEntryPrologue(const char *cursor, Entry *entry) {
1767 const char *buffer_end = buffer_ + buffer_length_;
1768
1769 // Initialize enough of ENTRY for use in error reporting.
1770 entry->offset = cursor - buffer_;
1771 entry->start = cursor;
1772 entry->kind = kUnknown;
1773 entry->end = NULL;
1774
1775 // Read the initial length. This sets reader_'s offset size.
1776 size_t length_size;
1777 uint64 length = reader_->ReadInitialLength(cursor, &length_size);
1778 if (length_size > size_t(buffer_end - cursor))
1779 return ReportIncomplete(entry);
1780 cursor += length_size;
1781
1782 // In a .eh_frame section, a length of zero marks the end of the series
1783 // of entries.
1784 if (length == 0 && eh_frame_) {
1785 entry->kind = kTerminator;
1786 entry->end = cursor;
1787 return true;
1788 }
1789
1790 // Validate the length.
1791 if (length > size_t(buffer_end - cursor))
1792 return ReportIncomplete(entry);
1793
1794 // The length is the number of bytes after the initial length field;
1795 // we have that position handy at this point, so compute the end
1796 // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
1797 // and the length didn't fit in a size_t, we would have rejected it
1798 // above.)
1799 entry->end = cursor + length;
1800
1801 // Parse the next field: either the offset of a CIE or a CIE id.
1802 size_t offset_size = reader_->OffsetSize();
1803 if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
1804 entry->id = reader_->ReadOffset(cursor);
1805
1806 // Don't advance cursor past id field yet; in .eh_frame data we need
1807 // the id's position to compute the section offset of an FDE's CIE.
1808
1809 // Now we can decide what kind of entry this is.
1810 if (eh_frame_) {
1811 // In .eh_frame data, an ID of zero marks the entry as a CIE, and
1812 // anything else is an offset from the id field of the FDE to the start
1813 // of the CIE.
1814 if (entry->id == 0) {
1815 entry->kind = kCIE;
1816 } else {
1817 entry->kind = kFDE;
1818 // Turn the offset from the id into an offset from the buffer's start.
1819 entry->id = (cursor - buffer_) - entry->id;
1820 }
1821 } else {
1822 // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
1823 // offset size for the entry) marks the entry as a CIE, and anything
1824 // else is the offset of the CIE from the beginning of the section.
1825 if (offset_size == 4)
1826 entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
1827 else {
1828 assert(offset_size == 8);
1829 entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
1830 }
1831 }
1832
1833 // Now advance cursor past the id.
1834 cursor += offset_size;
1835
1836 // The fields specific to this kind of entry start here.
1837 entry->fields = cursor;
1838
1839 entry->cie = NULL;
1840
1841 return true;
1842 }
1843
ReadCIEFields(CIE * cie)1844 bool CallFrameInfo::ReadCIEFields(CIE *cie) {
1845 const char *cursor = cie->fields;
1846 size_t len;
1847
1848 assert(cie->kind == kCIE);
1849
1850 // Prepare for early exit.
1851 cie->version = 0;
1852 cie->augmentation.clear();
1853 cie->code_alignment_factor = 0;
1854 cie->data_alignment_factor = 0;
1855 cie->return_address_register = 0;
1856 cie->has_z_augmentation = false;
1857 cie->pointer_encoding = DW_EH_PE_absptr;
1858 cie->instructions = 0;
1859
1860 // Parse the version number.
1861 if (cie->end - cursor < 1)
1862 return ReportIncomplete(cie);
1863 cie->version = reader_->ReadOneByte(cursor);
1864 cursor++;
1865
1866 // If we don't recognize the version, we can't parse any more fields of the
1867 // CIE. For DWARF CFI, we handle versions 1 through 3 (there was never a
1868 // version 2 of CFI data). For .eh_frame, we handle versions 1 and 3 as well;
1869 // the difference between those versions seems to be the same as for
1870 // .debug_frame.
1871 if (cie->version < 1 || cie->version > 3) {
1872 reporter_->UnrecognizedVersion(cie->offset, cie->version);
1873 return false;
1874 }
1875
1876 const char *augmentation_start = cursor;
1877 const void *augmentation_end =
1878 memchr(augmentation_start, '\0', cie->end - augmentation_start);
1879 if (! augmentation_end) return ReportIncomplete(cie);
1880 cursor = static_cast<const char *>(augmentation_end);
1881 cie->augmentation = string(augmentation_start,
1882 cursor - augmentation_start);
1883 // Skip the terminating '\0'.
1884 cursor++;
1885
1886 // Is this CFI augmented?
1887 if (!cie->augmentation.empty()) {
1888 // Is it an augmentation we recognize?
1889 if (cie->augmentation[0] == DW_Z_augmentation_start) {
1890 // Linux C++ ABI 'z' augmentation, used for exception handling data.
1891 cie->has_z_augmentation = true;
1892 } else {
1893 // Not an augmentation we recognize. Augmentations can have arbitrary
1894 // effects on the form of rest of the content, so we have to give up.
1895 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
1896 return false;
1897 }
1898 }
1899
1900 // Parse the code alignment factor.
1901 cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
1902 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
1903 cursor += len;
1904
1905 // Parse the data alignment factor.
1906 cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
1907 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
1908 cursor += len;
1909
1910 // Parse the return address register. This is a ubyte in version 1, and
1911 // a ULEB128 in version 3.
1912 if (cie->version == 1) {
1913 if (cursor >= cie->end) return ReportIncomplete(cie);
1914 cie->return_address_register = uint8(*cursor++);
1915 } else {
1916 cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
1917 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
1918 cursor += len;
1919 }
1920
1921 // If we have a 'z' augmentation string, find the augmentation data and
1922 // use the augmentation string to parse it.
1923 if (cie->has_z_augmentation) {
1924 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
1925 if (size_t(cie->end - cursor) < len + data_size)
1926 return ReportIncomplete(cie);
1927 cursor += len;
1928 const char *data = cursor;
1929 cursor += data_size;
1930 const char *data_end = cursor;
1931
1932 cie->has_z_lsda = false;
1933 cie->has_z_personality = false;
1934 cie->has_z_signal_frame = false;
1935
1936 // Walk the augmentation string, and extract values from the
1937 // augmentation data as the string directs.
1938 for (size_t i = 1; i < cie->augmentation.size(); i++) {
1939 switch (cie->augmentation[i]) {
1940 case DW_Z_has_LSDA:
1941 // The CIE's augmentation data holds the language-specific data
1942 // area pointer's encoding, and the FDE's augmentation data holds
1943 // the pointer itself.
1944 cie->has_z_lsda = true;
1945 // Fetch the LSDA encoding from the augmentation data.
1946 if (data >= data_end) return ReportIncomplete(cie);
1947 cie->lsda_encoding = DwarfPointerEncoding(*data++);
1948 if (!reader_->ValidEncoding(cie->lsda_encoding)) {
1949 reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
1950 return false;
1951 }
1952 // Don't check if the encoding is usable here --- we haven't
1953 // read the FDE's fields yet, so we're not prepared for
1954 // DW_EH_PE_funcrel, although that's a fine encoding for the
1955 // LSDA to use, since it appears in the FDE.
1956 break;
1957
1958 case DW_Z_has_personality_routine:
1959 // The CIE's augmentation data holds the personality routine
1960 // pointer's encoding, followed by the pointer itself.
1961 cie->has_z_personality = true;
1962 // Fetch the personality routine pointer's encoding from the
1963 // augmentation data.
1964 if (data >= data_end) return ReportIncomplete(cie);
1965 cie->personality_encoding = DwarfPointerEncoding(*data++);
1966 if (!reader_->ValidEncoding(cie->personality_encoding)) {
1967 reporter_->InvalidPointerEncoding(cie->offset,
1968 cie->personality_encoding);
1969 return false;
1970 }
1971 if (!reader_->UsableEncoding(cie->personality_encoding)) {
1972 reporter_->UnusablePointerEncoding(cie->offset,
1973 cie->personality_encoding);
1974 return false;
1975 }
1976 // Fetch the personality routine's pointer itself from the data.
1977 cie->personality_address =
1978 reader_->ReadEncodedPointer(data, cie->personality_encoding,
1979 &len);
1980 if (len > size_t(data_end - data))
1981 return ReportIncomplete(cie);
1982 data += len;
1983 break;
1984
1985 case DW_Z_has_FDE_address_encoding:
1986 // The CIE's augmentation data holds the pointer encoding to use
1987 // for addresses in the FDE.
1988 if (data >= data_end) return ReportIncomplete(cie);
1989 cie->pointer_encoding = DwarfPointerEncoding(*data++);
1990 if (!reader_->ValidEncoding(cie->pointer_encoding)) {
1991 reporter_->InvalidPointerEncoding(cie->offset,
1992 cie->pointer_encoding);
1993 return false;
1994 }
1995 if (!reader_->UsableEncoding(cie->pointer_encoding)) {
1996 reporter_->UnusablePointerEncoding(cie->offset,
1997 cie->pointer_encoding);
1998 return false;
1999 }
2000 break;
2001
2002 case DW_Z_is_signal_trampoline:
2003 // Frames using this CIE are signal delivery frames.
2004 cie->has_z_signal_frame = true;
2005 break;
2006
2007 default:
2008 // An augmentation we don't recognize.
2009 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2010 return false;
2011 }
2012 }
2013 }
2014
2015 // The CIE's instructions start here.
2016 cie->instructions = cursor;
2017
2018 return true;
2019 }
2020
ReadFDEFields(FDE * fde)2021 bool CallFrameInfo::ReadFDEFields(FDE *fde) {
2022 const char *cursor = fde->fields;
2023 size_t size;
2024
2025 fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
2026 &size);
2027 if (size > size_t(fde->end - cursor))
2028 return ReportIncomplete(fde);
2029 cursor += size;
2030 reader_->SetFunctionBase(fde->address);
2031
2032 // For the length, we strip off the upper nybble of the encoding used for
2033 // the starting address.
2034 DwarfPointerEncoding length_encoding =
2035 DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
2036 fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
2037 if (size > size_t(fde->end - cursor))
2038 return ReportIncomplete(fde);
2039 cursor += size;
2040
2041 // If the CIE has a 'z' augmentation string, then augmentation data
2042 // appears here.
2043 if (fde->cie->has_z_augmentation) {
2044 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
2045 if (size_t(fde->end - cursor) < size + data_size)
2046 return ReportIncomplete(fde);
2047 cursor += size;
2048
2049 // In the abstract, we should walk the augmentation string, and extract
2050 // items from the FDE's augmentation data as we encounter augmentation
2051 // string characters that specify their presence: the ordering of items
2052 // in the augmentation string determines the arrangement of values in
2053 // the augmentation data.
2054 //
2055 // In practice, there's only ever one value in FDE augmentation data
2056 // that we support --- the LSDA pointer --- and we have to bail if we
2057 // see any unrecognized augmentation string characters. So if there is
2058 // anything here at all, we know what it is, and where it starts.
2059 if (fde->cie->has_z_lsda) {
2060 // Check whether the LSDA's pointer encoding is usable now: only once
2061 // we've parsed the FDE's starting address do we call reader_->
2062 // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
2063 // usable.
2064 if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
2065 reporter_->UnusablePointerEncoding(fde->cie->offset,
2066 fde->cie->lsda_encoding);
2067 return false;
2068 }
2069
2070 fde->lsda_address =
2071 reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
2072 if (size > data_size)
2073 return ReportIncomplete(fde);
2074 // Ideally, we would also complain here if there were unconsumed
2075 // augmentation data.
2076 }
2077
2078 cursor += data_size;
2079 }
2080
2081 // The FDE's instructions start after those.
2082 fde->instructions = cursor;
2083
2084 return true;
2085 }
2086
Start()2087 bool CallFrameInfo::Start() {
2088 const char *buffer_end = buffer_ + buffer_length_;
2089 const char *cursor;
2090 bool all_ok = true;
2091 const char *entry_end;
2092 bool ok;
2093
2094 // Traverse all the entries in buffer_, skipping CIEs and offering
2095 // FDEs to the handler.
2096 for (cursor = buffer_; cursor < buffer_end;
2097 cursor = entry_end, all_ok = all_ok && ok) {
2098 FDE fde;
2099
2100 // Make it easy to skip this entry with 'continue': assume that
2101 // things are not okay until we've checked all the data, and
2102 // prepare the address of the next entry.
2103 ok = false;
2104
2105 // Read the entry's prologue.
2106 if (!ReadEntryPrologue(cursor, &fde)) {
2107 if (!fde.end) {
2108 // If we couldn't even figure out this entry's extent, then we
2109 // must stop processing entries altogether.
2110 all_ok = false;
2111 break;
2112 }
2113 entry_end = fde.end;
2114 continue;
2115 }
2116
2117 // The next iteration picks up after this entry.
2118 entry_end = fde.end;
2119
2120 // Did we see an .eh_frame terminating mark?
2121 if (fde.kind == kTerminator) {
2122 // If there appears to be more data left in the section after the
2123 // terminating mark, warn the user. But this is just a warning;
2124 // we leave all_ok true.
2125 if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
2126 break;
2127 }
2128
2129 // In this loop, we skip CIEs. We only parse them fully when we
2130 // parse an FDE that refers to them. This limits our memory
2131 // consumption (beyond the buffer itself) to that needed to
2132 // process the largest single entry.
2133 if (fde.kind != kFDE) {
2134 ok = true;
2135 continue;
2136 }
2137
2138 // Validate the CIE pointer.
2139 if (fde.id > buffer_length_) {
2140 reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
2141 continue;
2142 }
2143
2144 CIE cie;
2145
2146 // Parse this FDE's CIE header.
2147 if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
2148 continue;
2149 // This had better be an actual CIE.
2150 if (cie.kind != kCIE) {
2151 reporter_->BadCIEId(fde.offset, fde.id);
2152 continue;
2153 }
2154 if (!ReadCIEFields(&cie))
2155 continue;
2156
2157 // We now have the values that govern both the CIE and the FDE.
2158 cie.cie = &cie;
2159 fde.cie = &cie;
2160
2161 // Parse the FDE's header.
2162 if (!ReadFDEFields(&fde))
2163 continue;
2164
2165 // Call Entry to ask the consumer if they're interested.
2166 if (!handler_->Entry(fde.offset, fde.address, fde.size,
2167 cie.version, cie.augmentation,
2168 cie.return_address_register)) {
2169 // The handler isn't interested in this entry. That's not an error.
2170 ok = true;
2171 continue;
2172 }
2173
2174 if (cie.has_z_augmentation) {
2175 // Report the personality routine address, if we have one.
2176 if (cie.has_z_personality) {
2177 if (!handler_
2178 ->PersonalityRoutine(cie.personality_address,
2179 IsIndirectEncoding(cie.personality_encoding)))
2180 continue;
2181 }
2182
2183 // Report the language-specific data area address, if we have one.
2184 if (cie.has_z_lsda) {
2185 if (!handler_
2186 ->LanguageSpecificDataArea(fde.lsda_address,
2187 IsIndirectEncoding(cie.lsda_encoding)))
2188 continue;
2189 }
2190
2191 // If this is a signal-handling frame, report that.
2192 if (cie.has_z_signal_frame) {
2193 if (!handler_->SignalHandler())
2194 continue;
2195 }
2196 }
2197
2198 // Interpret the CIE's instructions, and then the FDE's instructions.
2199 State state(reader_, handler_, reporter_, fde.address);
2200 ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
2201
2202 // Tell the ByteReader that the function start address from the
2203 // FDE header is no longer valid.
2204 reader_->ClearFunctionBase();
2205
2206 // Report the end of the entry.
2207 handler_->End();
2208 }
2209
2210 return all_ok;
2211 }
2212
KindName(EntryKind kind)2213 const char *CallFrameInfo::KindName(EntryKind kind) {
2214 if (kind == CallFrameInfo::kUnknown)
2215 return "entry";
2216 else if (kind == CallFrameInfo::kCIE)
2217 return "common information entry";
2218 else if (kind == CallFrameInfo::kFDE)
2219 return "frame description entry";
2220 else {
2221 assert (kind == CallFrameInfo::kTerminator);
2222 return ".eh_frame sequence terminator";
2223 }
2224 }
2225
ReportIncomplete(Entry * entry)2226 bool CallFrameInfo::ReportIncomplete(Entry *entry) {
2227 reporter_->Incomplete(entry->offset, entry->kind);
2228 return false;
2229 }
2230
Incomplete(uint64 offset,CallFrameInfo::EntryKind kind)2231 void CallFrameInfo::Reporter::Incomplete(uint64 offset,
2232 CallFrameInfo::EntryKind kind) {
2233 fprintf(stderr,
2234 "%s: CFI %s at offset 0x%llx in '%s': entry ends early\n",
2235 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2236 section_.c_str());
2237 }
2238
EarlyEHTerminator(uint64 offset)2239 void CallFrameInfo::Reporter::EarlyEHTerminator(uint64 offset) {
2240 fprintf(stderr,
2241 "%s: CFI at offset 0x%llx in '%s': saw end-of-data marker"
2242 " before end of section contents\n",
2243 filename_.c_str(), offset, section_.c_str());
2244 }
2245
CIEPointerOutOfRange(uint64 offset,uint64 cie_offset)2246 void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64 offset,
2247 uint64 cie_offset) {
2248 fprintf(stderr,
2249 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2250 " CIE pointer is out of range: 0x%llx\n",
2251 filename_.c_str(), offset, section_.c_str(), cie_offset);
2252 }
2253
BadCIEId(uint64 offset,uint64 cie_offset)2254 void CallFrameInfo::Reporter::BadCIEId(uint64 offset, uint64 cie_offset) {
2255 fprintf(stderr,
2256 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2257 " CIE pointer does not point to a CIE: 0x%llx\n",
2258 filename_.c_str(), offset, section_.c_str(), cie_offset);
2259 }
2260
UnrecognizedVersion(uint64 offset,int version)2261 void CallFrameInfo::Reporter::UnrecognizedVersion(uint64 offset, int version) {
2262 fprintf(stderr,
2263 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2264 " CIE specifies unrecognized version: %d\n",
2265 filename_.c_str(), offset, section_.c_str(), version);
2266 }
2267
UnrecognizedAugmentation(uint64 offset,const string & aug)2268 void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64 offset,
2269 const string &aug) {
2270 fprintf(stderr,
2271 "%s: CFI frame description entry at offset 0x%llx in '%s':"
2272 " CIE specifies unrecognized augmentation: '%s'\n",
2273 filename_.c_str(), offset, section_.c_str(), aug.c_str());
2274 }
2275
InvalidPointerEncoding(uint64 offset,uint8 encoding)2276 void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64 offset,
2277 uint8 encoding) {
2278 fprintf(stderr,
2279 "%s: CFI common information entry at offset 0x%llx in '%s':"
2280 " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
2281 filename_.c_str(), offset, section_.c_str(), encoding);
2282 }
2283
UnusablePointerEncoding(uint64 offset,uint8 encoding)2284 void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64 offset,
2285 uint8 encoding) {
2286 fprintf(stderr,
2287 "%s: CFI common information entry at offset 0x%llx in '%s':"
2288 " 'z' augmentation specifies a pointer encoding for which"
2289 " we have no base address: 0x%02x\n",
2290 filename_.c_str(), offset, section_.c_str(), encoding);
2291 }
2292
RestoreInCIE(uint64 offset,uint64 insn_offset)2293 void CallFrameInfo::Reporter::RestoreInCIE(uint64 offset, uint64 insn_offset) {
2294 fprintf(stderr,
2295 "%s: CFI common information entry at offset 0x%llx in '%s':"
2296 " the DW_CFA_restore instruction at offset 0x%llx"
2297 " cannot be used in a common information entry\n",
2298 filename_.c_str(), offset, section_.c_str(), insn_offset);
2299 }
2300
BadInstruction(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2301 void CallFrameInfo::Reporter::BadInstruction(uint64 offset,
2302 CallFrameInfo::EntryKind kind,
2303 uint64 insn_offset) {
2304 fprintf(stderr,
2305 "%s: CFI %s at offset 0x%llx in section '%s':"
2306 " the instruction at offset 0x%llx is unrecognized\n",
2307 filename_.c_str(), CallFrameInfo::KindName(kind),
2308 offset, section_.c_str(), insn_offset);
2309 }
2310
NoCFARule(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2311 void CallFrameInfo::Reporter::NoCFARule(uint64 offset,
2312 CallFrameInfo::EntryKind kind,
2313 uint64 insn_offset) {
2314 fprintf(stderr,
2315 "%s: CFI %s at offset 0x%llx in section '%s':"
2316 " the instruction at offset 0x%llx assumes that a CFA rule has"
2317 " been set, but none has been set\n",
2318 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2319 section_.c_str(), insn_offset);
2320 }
2321
EmptyStateStack(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2322 void CallFrameInfo::Reporter::EmptyStateStack(uint64 offset,
2323 CallFrameInfo::EntryKind kind,
2324 uint64 insn_offset) {
2325 fprintf(stderr,
2326 "%s: CFI %s at offset 0x%llx in section '%s':"
2327 " the DW_CFA_restore_state instruction at offset 0x%llx"
2328 " should pop a saved state from the stack, but the stack is empty\n",
2329 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2330 section_.c_str(), insn_offset);
2331 }
2332
ClearingCFARule(uint64 offset,CallFrameInfo::EntryKind kind,uint64 insn_offset)2333 void CallFrameInfo::Reporter::ClearingCFARule(uint64 offset,
2334 CallFrameInfo::EntryKind kind,
2335 uint64 insn_offset) {
2336 fprintf(stderr,
2337 "%s: CFI %s at offset 0x%llx in section '%s':"
2338 " the DW_CFA_restore_state instruction at offset 0x%llx"
2339 " would clear the CFA rule in effect\n",
2340 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
2341 section_.c_str(), insn_offset);
2342 }
2343
2344 } // namespace dwarf2reader
2345