1 // Copyright 2021 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/debug/dwarf_line_no.h"
6
7 #include "base/memory/raw_ref.h"
8
9 #ifdef USE_SYMBOLIZE
10 #include <algorithm>
11 #include <cstdint>
12 #include <limits>
13
14 #include <string.h>
15 #include <unistd.h>
16
17 #include "base/debug/buffered_dwarf_reader.h"
18 #include "base/debug/stack_trace.h"
19 #include "base/memory/raw_ptr.h"
20 #include "base/third_party/symbolize/symbolize.h"
21
22 namespace base {
23 namespace debug {
24
25 namespace {
26
27 constexpr uint64_t kMaxOffset = std::numeric_limits<uint64_t>::max();
28
29 // These numbers are suitable for most compilation units for chrome and
30 // content_shell. If a compilation unit has bigger number of directories or
31 // filenames, the additional directories/filenames will be ignored, and the
32 // stack frames pointing to these directories/filenames will not get line
33 // numbers. We can't set these numbers too big because they affect the size of
34 // ProgramInfo which is allocated in the stack.
35 constexpr int kMaxDirectories = 128;
36 constexpr size_t kMaxFilenames = 512;
37
38 // DWARF-4 line number program header, section 6.2.4
39 struct ProgramInfo {
40 uint64_t header_length;
41 uint64_t start_offset;
42 uint64_t end_offset;
43 uint8_t minimum_instruction_length;
44 uint8_t maximum_operations_per_instruction;
45 uint8_t default_is_stmt;
46 int8_t line_base;
47 uint8_t line_range;
48 uint8_t opcode_base;
49 uint8_t standard_opcode_lengths[256];
50 uint8_t include_directories_table_offset;
51 uint8_t file_names_table_offset;
52
53 // Store the directories as offsets.
54 int num_directories = 1;
55 uint64_t directory_offsets[kMaxDirectories];
56 uint64_t directory_sizes[kMaxDirectories];
57
58 // Store the file number table offsets.
59 mutable unsigned int num_filenames = 1;
60 mutable uint64_t filename_offsets[kMaxFilenames];
61 mutable uint8_t filename_dirs[kMaxFilenames];
62
OpcodeToAdvancebase::debug::__anon525df51b0111::ProgramInfo63 unsigned int OpcodeToAdvance(uint8_t adjusted_opcode) const {
64 // Special opcodes advance line numbers by an amount based on line_range
65 // and opcode_base. This calculation is taken from 6.2.5.1.
66 return static_cast<unsigned int>(adjusted_opcode) / line_range;
67 }
68 };
69
70 // DWARF-4 line number program registers, section 6.2.2
71 struct LineNumberRegisters {
72 // During the line number program evaluation, some instructions perform a
73 // "commit" which is when the registers have finished calculating a new row in
74 // the line-number table. This callback is executed and can be viewed as a
75 // iterator over all rows in the line number table.
76 class OnCommit {
77 public:
78 virtual void Do(LineNumberRegisters* registers) = 0;
79 };
80
81 raw_ptr<OnCommit> on_commit;
LineNumberRegistersbase::debug::__anon525df51b0111::LineNumberRegisters82 LineNumberRegisters(ProgramInfo info, OnCommit* on_commit)
83 : on_commit(on_commit), is_stmt(info.default_is_stmt) {}
84
85 // Current program counter.
86 uintptr_t address = 0;
87
88 // For VLIW architectures, the index of the operation in the VLIW instruction.
89 unsigned int op_index = 0;
90
91 // Identifies the source file relating to the address in the DWARF File name
92 // table.
93 uint64_t file = 0;
94
95 // Identifies the line number. Starts at 1. Can become 0 if instruction does
96 // not match any line in the file.
97 uint64_t line = 1;
98
99 // Identifies the column within the source line. Starts at 1 though "0"
100 // also means "left edge" of the line.
101 uint64_t column = 0;
102
103 // Boolean determining if this is a recommended spot for a breakpoint.
104 // Should be initialized by the program header.
105 bool is_stmt = false;
106
107 // Indicates start of a basic block.
108 bool basic_block = false;
109
110 // Indicates first byte after a sequence of machine instructions.
111 bool end_sequence = false;
112
113 // Indicates this may be where execution should stop if trying to break for
114 // entering a function.
115 bool prologue_end = false;
116
117 // Indicates this may be where execution should stop if trying to break for
118 // exiting a function.
119 bool epilogue_begin = false;
120
121 // Identifier for the instruction set of the current address.
122 uint64_t isa = 0;
123
124 // Identifies which block the current instruction belongs to.
125 uint64_t discriminator = 0;
126
127 // Values from the previously committed line. See OnCommit interface for more
128 // details. This conceptually should be a copy of the whole
129 // LineNumberRegisters but since only 4 pieces of data are needed, hacking
130 // it inline was easier.
131 uintptr_t last_address = 0;
132 uint64_t last_file = 0;
133 uint64_t last_line = 0;
134 uint64_t last_column = 0;
135
136 // This is the magical calculation for decompressing the line-number
137 // information. The `program_info` provides the parameters for the formula
138 // and the `op_advance` is the input value. See DWARF-4 sections 6.2.5.1 for
139 // the formula.
OpAdvancebase::debug::__anon525df51b0111::LineNumberRegisters140 void OpAdvance(const ProgramInfo* program_info, uint64_t op_advance) {
141 address += program_info->minimum_instruction_length *
142 ((op_index + op_advance) /
143 program_info->maximum_operations_per_instruction);
144
145 op_index = (op_index + op_advance) %
146 program_info->maximum_operations_per_instruction;
147 }
148
149 // Committing a line means the calculation has landed on a stable set of
150 // values that represent an actual entry in the line number table.
CommitLinebase::debug::__anon525df51b0111::LineNumberRegisters151 void CommitLine() {
152 on_commit->Do(this);
153
154 // Inlined or compiler generator code may have line number 0 which isn't
155 // useful to the user. Better to go up one line number.
156 if (line != 0) {
157 last_address = address;
158 last_file = file;
159 last_column = column;
160 last_line = line;
161 }
162 }
163 };
164
165 struct LineNumberInfo {
166 uint64_t pc = 0;
167 uint64_t line = 0;
168 uint64_t column = 0;
169
170 // Offsets here are to the file table and directory table arrays inside the
171 // ProgramInfo.
172 uint64_t module_dir_offset = 0;
173 uint64_t dir_size = 0;
174 uint64_t module_filename_offset = 0;
175 };
176
177 // Evaluates a Line Number Program as defined by the rules in section 6.2.5.
EvaluateLineNumberProgram(const int fd,LineNumberInfo * info,uint64_t base_address,uint64_t start,const ProgramInfo & program_info)178 void EvaluateLineNumberProgram(const int fd,
179 LineNumberInfo* info,
180 uint64_t base_address,
181 uint64_t start,
182 const ProgramInfo& program_info) {
183 BufferedDwarfReader reader(fd, start);
184 uint64_t module_relative_pc = info->pc - base_address;
185
186 // Helper that records the line-number table entry corresponding with the
187 // `module_relative_pc`. This is the thing that actually finds the line
188 // number for an address.
189 struct OnCommitImpl : public LineNumberRegisters::OnCommit {
190 private:
191 raw_ptr<LineNumberInfo> info;
192 uint64_t module_relative_pc;
193 const raw_ref<const ProgramInfo> program_info;
194
195 public:
196 OnCommitImpl(LineNumberInfo* info,
197 uint64_t module_relative_pc,
198 const ProgramInfo& program_info)
199 : info(info),
200 module_relative_pc(module_relative_pc),
201 program_info(program_info) {}
202
203 void Do(LineNumberRegisters* registers) override {
204 // When a line is committed, the program counter needs to check if it is
205 // in the [last_address, cur_addres) range. If yes, then the line pertains
206 // to the program counter.
207 if (registers->last_address == 0) {
208 // This is the first table entry so by definition, nothing is in its
209 // range.
210 return;
211 }
212
213 // If module_relative_pc is out of range, skip.
214 if (module_relative_pc < registers->last_address ||
215 module_relative_pc >= registers->address)
216 return;
217
218 if (registers->last_file < program_info->num_filenames) {
219 info->line = registers->last_line;
220 info->column = registers->last_column;
221
222 // Since DW_AT_name in the compile_unit is optional, it may be empty. If
223 // it is, guess that the file in entry 1 is the name. This does not
224 // follow spec, but seems to be common behavior. See the following LLVM
225 // bug for more info: https://reviews.llvm.org/D11003
226 if (registers->last_file == 0 &&
227 program_info->filename_offsets[0] == 0 &&
228 1 < program_info->num_filenames) {
229 program_info->filename_offsets[0] = program_info->filename_offsets[1];
230 program_info->filename_dirs[0] = program_info->filename_dirs[1];
231 }
232
233 if (registers->last_file < kMaxFilenames) {
234 info->module_filename_offset =
235 program_info->filename_offsets[registers->last_file];
236
237 uint8_t dir = program_info->filename_dirs[registers->last_file];
238 info->module_dir_offset = program_info->directory_offsets[dir];
239 info->dir_size = program_info->directory_sizes[dir];
240 }
241 }
242 }
243 } on_commit(info, module_relative_pc, program_info);
244
245 LineNumberRegisters registers(program_info, &on_commit);
246
247 // Special opcode range is [program_info.opcode_base, 255].
248 // Lines can be max incremented by [line_base + line range - 1].
249 // opcode = (desired line increment - line_base) + (line_range * operation
250 // advance) + opcode_base.
251 uint8_t opcode;
252 while (reader.position() < program_info.end_offset && info->line == 0) {
253 if (!reader.ReadInt8(opcode))
254 return;
255
256 // It's SPECIAL OPCODE TIME!. They're so special that they make up the
257 // vast majority of the opcodes and are the first thing described in the
258 // documentation.
259 //
260 // See DWARF-4 spec 6.2.5.1.
261 if (opcode >= program_info.opcode_base) {
262 uint8_t adjusted_opcode = opcode - program_info.opcode_base;
263 registers.OpAdvance(&program_info,
264 program_info.OpcodeToAdvance(adjusted_opcode));
265 const int line_adjust =
266 program_info.line_base + (adjusted_opcode % program_info.line_range);
267 if (line_adjust < 0) {
268 if (static_cast<uint64_t>(-line_adjust) > registers.line)
269 return;
270 registers.line -= static_cast<uint64_t>(-line_adjust);
271 } else {
272 registers.line += static_cast<uint64_t>(line_adjust);
273 }
274 registers.basic_block = false;
275 registers.prologue_end = false;
276 registers.epilogue_begin = false;
277 registers.discriminator = 0;
278 registers.CommitLine();
279 } else {
280 // Standard opcodes
281 switch (opcode) {
282 case 0: {
283 // Extended opcode.
284 uint64_t extended_opcode;
285 uint64_t extended_opcode_length;
286 if (!reader.ReadLeb128(extended_opcode_length))
287 return;
288 uint64_t next_opcode = reader.position() + extended_opcode_length;
289 if (!reader.ReadLeb128(extended_opcode))
290 return;
291 switch (extended_opcode) {
292 case 1: {
293 // DW_LNE_end_sequence
294 registers.end_sequence = true;
295 registers.CommitLine();
296 registers = LineNumberRegisters(program_info, &on_commit);
297 break;
298 }
299
300 case 2: {
301 // DW_LNE_set_address
302 uint32_t value;
303 if (!reader.ReadInt32(value))
304 return;
305 registers.address = value;
306 registers.op_index = 0;
307 break;
308 }
309
310 case 3: {
311 // DW_LNE_define_file
312 //
313 // This should only get used if the filename table itself is null.
314 // Record the module offset for the string and then drop the data.
315 uint64_t filename_offset = reader.position();
316 reader.ReadCString(program_info.end_offset, nullptr, 0);
317
318 // dir index
319 uint64_t value;
320 if (!reader.ReadLeb128(value))
321 return;
322 size_t cur_filename = program_info.num_filenames;
323 if (cur_filename < kMaxFilenames && value < kMaxDirectories) {
324 ++program_info.num_filenames;
325 // Store the offset from the start of file and skip the data to
326 // save memory.
327 program_info.filename_offsets[cur_filename] = filename_offset;
328 program_info.filename_dirs[cur_filename] =
329 static_cast<uint8_t>(value);
330 }
331
332 // modification time
333 if (!reader.ReadLeb128(value))
334 return;
335
336 // source file length
337 if (!reader.ReadLeb128(value))
338 return;
339 break;
340 }
341
342 case 4: {
343 // DW_LNE_set_discriminator
344 uint64_t value;
345 if (!reader.ReadLeb128(value))
346 return;
347 registers.discriminator = value;
348 break;
349 }
350
351 default:
352 abort();
353 }
354
355 // Skip any padding bytes in extended opcode.
356 reader.set_position(next_opcode);
357 break;
358 }
359
360 case 1: {
361 // DW_LNS_copy. This commits the registers to the line number table.
362 registers.CommitLine();
363 registers.discriminator = 0;
364 registers.basic_block = false;
365 registers.prologue_end = false;
366 registers.epilogue_begin = false;
367 break;
368 }
369
370 case 2: {
371 // DW_LNS_advance_pc
372 uint64_t op_advance;
373 if (!reader.ReadLeb128(op_advance))
374 return;
375 registers.OpAdvance(&program_info, op_advance);
376 break;
377 }
378
379 case 3: {
380 // DW_LNS_advance_line
381 int64_t line_advance;
382 if (!reader.ReadLeb128(line_advance))
383 return;
384 if (line_advance < 0) {
385 if (static_cast<uint64_t>(-line_advance) > registers.line)
386 return;
387 registers.line -= static_cast<uint64_t>(-line_advance);
388 } else {
389 registers.line += static_cast<uint64_t>(line_advance);
390 }
391 break;
392 }
393
394 case 4: {
395 // DW_LNS_set_file
396 uint64_t value;
397 if (!reader.ReadLeb128(value))
398 return;
399 registers.file = value;
400 break;
401 }
402
403 case 5: {
404 // DW_LNS_set_column
405 uint64_t value;
406 if (!reader.ReadLeb128(value))
407 return;
408 registers.column = value;
409 break;
410 }
411
412 case 6:
413 // DW_LNS_negate_stmt
414 registers.is_stmt = !registers.is_stmt;
415 break;
416
417 case 7:
418 // DW_LNS_set_basic_block
419 registers.basic_block = true;
420 break;
421
422 case 8:
423 // DW_LNS_const_add_pc
424 registers.OpAdvance(
425 &program_info,
426 program_info.OpcodeToAdvance(255 - program_info.opcode_base));
427 break;
428
429 case 9: {
430 // DW_LNS_fixed_advance_pc
431 uint16_t value;
432 if (!reader.ReadInt16(value))
433 return;
434 registers.address += value;
435 registers.op_index = 0;
436 break;
437 }
438
439 case 10:
440 // DW_LNS_set_prologue_end
441 registers.prologue_end = true;
442 break;
443
444 case 11:
445 // DW_LNS_set_epilogue_begin
446 registers.epilogue_begin = true;
447 break;
448
449 case 12: {
450 // DW_LNS_set_isa
451 uint64_t value;
452 if (!reader.ReadLeb128(value))
453 return;
454 registers.isa = value;
455 break;
456 }
457
458 default:
459 abort();
460 }
461 }
462 }
463 }
464
465 // Parses a 32-bit DWARF-4 line number program header per section 6.2.4.
466 // `cu_name_offset` is the module offset for the 0th entry of the file table.
ParseDwarf4ProgramInfo(BufferedDwarfReader * reader,bool is_64bit,uint64_t cu_name_offset,ProgramInfo * program_info)467 bool ParseDwarf4ProgramInfo(BufferedDwarfReader* reader,
468 bool is_64bit,
469 uint64_t cu_name_offset,
470 ProgramInfo* program_info) {
471 if (!reader->ReadOffset(is_64bit, program_info->header_length))
472 return false;
473 program_info->start_offset = reader->position() + program_info->header_length;
474
475 if (!reader->ReadInt8(program_info->minimum_instruction_length) ||
476 !reader->ReadInt8(program_info->maximum_operations_per_instruction) ||
477 !reader->ReadInt8(program_info->default_is_stmt) ||
478 !reader->ReadInt8(program_info->line_base) ||
479 !reader->ReadInt8(program_info->line_range) ||
480 !reader->ReadInt8(program_info->opcode_base)) {
481 return false;
482 }
483
484 for (int i = 0; i < (program_info->opcode_base - 1); i++) {
485 if (!reader->ReadInt8(program_info->standard_opcode_lengths[i]))
486 return false;
487 }
488
489 // Table ends with a single null line. This basically means search for 2
490 // contiguous empty bytes.
491 uint8_t last = 0, cur = 0;
492 for (;;) {
493 // Read a byte.
494 last = cur;
495 if (!reader->ReadInt8(cur))
496 return false;
497
498 if (last == 0 && cur == 0) {
499 // We're at the last entry where it's a double null.
500 break;
501 }
502
503 // Read in all of the filename.
504 int cur_dir = program_info->num_directories;
505 if (cur_dir < kMaxDirectories) {
506 ++program_info->num_directories;
507 // "-1" is because we have already read the first byte above.
508 program_info->directory_offsets[cur_dir] = reader->position() - 1;
509 program_info->directory_sizes[cur_dir] = 1;
510 }
511 do {
512 if (!reader->ReadInt8(cur))
513 return false;
514 if (cur_dir < kMaxDirectories)
515 ++program_info->directory_sizes[cur_dir];
516 } while (cur != '\0');
517 }
518
519 // Read filename table line-by-line.
520 last = 0;
521 cur = 0;
522 for (;;) {
523 // Read a byte.
524 last = cur;
525 if (!reader->ReadInt8(cur))
526 return false;
527
528 if (last == 0 && cur == 0) {
529 // We're at the last entry where it's a double null.
530 break;
531 }
532
533 // Read in all of the filename. "-1" is because we have already read the
534 // first byte of the filename above.
535 uint64_t filename_offset = reader->position() - 1;
536 do {
537 if (!reader->ReadInt8(cur))
538 return false;
539 } while (cur != '\0');
540
541 uint64_t value;
542
543 // Dir index
544 if (!reader->ReadLeb128(value))
545 return false;
546 size_t cur_filename = program_info->num_filenames;
547 if (cur_filename < kMaxFilenames && value < kMaxDirectories) {
548 ++program_info->num_filenames;
549 program_info->filename_offsets[cur_filename] = filename_offset;
550 program_info->filename_dirs[cur_filename] = static_cast<uint8_t>(value);
551 }
552
553 // Modification time
554 if (!reader->ReadLeb128(value))
555 return false;
556
557 // Bytes in file.
558 if (!reader->ReadLeb128(value))
559 return false;
560 }
561
562 // Set up the 0th filename.
563 program_info->filename_offsets[0] = cu_name_offset;
564 program_info->filename_dirs[0] = 0;
565 program_info->directory_offsets[0] = 0;
566
567 return true;
568 }
569
570 // Returns the offset of the next byte to read.
571 // `program_info.program_end` is guaranteed to be initlialized to either
572 // `kMaxOffset` if the program length could not be processed, or to
573 // the byte after the end of this program.
ReadProgramInfo(const int fd,uint64_t start,uint64_t cu_name_offset,ProgramInfo * program_info)574 bool ReadProgramInfo(const int fd,
575 uint64_t start,
576 uint64_t cu_name_offset,
577 ProgramInfo* program_info) {
578 BufferedDwarfReader reader(fd, start);
579 program_info->end_offset = kMaxOffset;
580
581 // Note that 64-bit dwarf does NOT imply a 64-bit binary and vice-versa. In
582 // fact many 64-bit binaries use 32-bit dwarf encoding.
583 bool is_64bit = false;
584 uint64_t data_length;
585 if (!reader.ReadInitialLength(is_64bit, data_length)) {
586 return false;
587 }
588
589 // Set the program end. This allows the search to recover by skipping an
590 // unparsable program.
591 program_info->end_offset = reader.position() + data_length;
592
593 uint16_t version;
594 if (!reader.ReadInt16(version)) {
595 return false;
596 }
597
598 if (version == 4) {
599 return ParseDwarf4ProgramInfo(&reader, is_64bit, cu_name_offset,
600 program_info);
601 }
602
603 // Currently does not support other DWARF versions.
604 return false;
605 }
606
607 // Attempts to find line-number info for all of |info|. Returns the number of
608 // entries that do not have info yet.
GetLineNumbersInProgram(const int fd,LineNumberInfo * info,uint64_t base_address,uint64_t start,uint64_t cu_name_offset)609 uint64_t GetLineNumbersInProgram(const int fd,
610 LineNumberInfo* info,
611 uint64_t base_address,
612 uint64_t start,
613 uint64_t cu_name_offset) {
614 // Open the program.
615 ProgramInfo program_info;
616 if (ReadProgramInfo(fd, start, cu_name_offset, &program_info)) {
617 EvaluateLineNumberProgram(fd, info, base_address, program_info.start_offset,
618 program_info);
619 }
620
621 return program_info.end_offset;
622 }
623
624 // Scans the .debug_abbrev entry until it finds the Attribute List matching the
625 // `wanted_abbreviation_code`. This is called when parsing a DIE in .debug_info.
AdvancedReaderToAttributeList(BufferedDwarfReader & reader,uint64_t table_end,uint64_t wanted_abbreviation_code,uint64_t & tag,bool & has_children)626 bool AdvancedReaderToAttributeList(BufferedDwarfReader& reader,
627 uint64_t table_end,
628 uint64_t wanted_abbreviation_code,
629 uint64_t& tag,
630 bool& has_children) {
631 // Abbreviation Table entries are:
632 // LEB128 - abbreviation code
633 // LEB128 - the entry's tag
634 // 1 byte - DW_CHILDREN_yes or DW_CHILDREN_no for if entry has children.
635 // [LEB128, LEB128] -- repeated set of attribute + form values in LEB128
636 // [0, 0] -- null entry terminating list is 2 LEB128 0s.
637 while (reader.position() < table_end) {
638 uint64_t abbreviation_code;
639 if (!reader.ReadLeb128(abbreviation_code)) {
640 return false;
641 }
642
643 if (!reader.ReadLeb128(tag)) {
644 return false;
645 }
646
647 uint8_t raw_has_children;
648 if (!reader.ReadInt8(raw_has_children)) {
649 return false;
650 }
651 if (raw_has_children == 0) {
652 has_children = false;
653 } else if (raw_has_children == 1) {
654 has_children = true;
655 } else {
656 return false;
657 }
658
659 if (abbreviation_code == wanted_abbreviation_code) {
660 return true;
661 }
662
663 // Incorrect Abbreviation entry. Skip all of its attributes.
664 uint64_t attr;
665 uint64_t form;
666 do {
667 if (!reader.ReadLeb128(attr) || !reader.ReadLeb128(form)) {
668 return false;
669 }
670 } while (attr != 0 || form != 0);
671 }
672
673 return false;
674 }
675
676 // This reads through a .debug_info compile unit entry to try and extract
677 // the `cu_name_offset` as well as the `debug_line_offset` (offset into the
678 // .debug_lines table` corresponding to `pc`.
679 //
680 // The .debug_info sections are a packed set of bytes whose format is defined
681 // by a corresponding .debug_abbrev entry. Basically .debug_abbrev describes
682 // a struct and .debug_info has a header that tells which struct it is followed
683 // by a bunch of bytes.
684 //
685 // The control flow is to find the .debug_abbrev entry for each .debug_info
686 // entry, then walk through the .debug_abbrev entry to parse the bytes of the
687 // .debug_info entry. A successful parse calculates the address range that the
688 // .debug_info entry covers. When that is retrieved, `pc` can be compared to
689 // the range and a corresponding .debug_info can be found.
690 //
691 // The `debug_info_start` be the start of the whole .debug_info section or an
692 // offset into the section if it was known ahead of time (perhaps by consulting
693 // .debug_aranges).
694 //
695 // To fully interpret this data, the .debug_ranges and .debug_str sections
696 // also need to be interpreted.
GetCompileUnitName(int fd,uint64_t debug_info_start,uint64_t debug_info_end,uint64_t pc,uint64_t module_base_address,uint64_t * debug_line_offset,uint64_t * cu_name_offset)697 bool GetCompileUnitName(int fd,
698 uint64_t debug_info_start,
699 uint64_t debug_info_end,
700 uint64_t pc,
701 uint64_t module_base_address,
702 uint64_t* debug_line_offset,
703 uint64_t* cu_name_offset) {
704 // Ensure defined `cu_name_offset` in case DW_AT_name is missing.
705 *cu_name_offset = 0;
706
707 // Open .debug_info and .debug_abbrev as both are needed to find the
708 // DW_AT_name for the DW_TAG_compile_unit or DW_TAG_partial_unit
709 // corresponding to the given address.
710
711 ElfW(Shdr) debug_abbrev;
712 constexpr static char kDebugAbbrevSectionName[] = ".debug_abbrev";
713 if (!google::GetSectionHeaderByName(fd, kDebugAbbrevSectionName,
714 sizeof(kDebugAbbrevSectionName),
715 &debug_abbrev)) {
716 return false;
717 }
718 uint64_t debug_abbrev_end = debug_abbrev.sh_offset + debug_abbrev.sh_size;
719
720 ElfW(Shdr) debug_str;
721 constexpr static char kDebugStrSectionName[] = ".debug_str";
722 if (!google::GetSectionHeaderByName(
723 fd, kDebugStrSectionName, sizeof(kDebugStrSectionName), &debug_str)) {
724 return false;
725 }
726 uint64_t debug_str_end = debug_str.sh_offset + debug_str.sh_size;
727
728 ElfW(Shdr) debug_ranges;
729 constexpr static char kDebugRangesSectionName[] = ".debug_ranges";
730 if (!google::GetSectionHeaderByName(fd, kDebugRangesSectionName,
731 sizeof(kDebugRangesSectionName),
732 &debug_ranges)) {
733 return false;
734 }
735 uint64_t debug_ranges_end = debug_ranges.sh_offset + debug_ranges.sh_size;
736
737 // Iterate Compile Units.
738 uint64_t next_compilation_unit = kMaxOffset;
739 for (BufferedDwarfReader reader(fd, debug_info_start);
740 reader.position() < debug_info_end;
741 reader.set_position(next_compilation_unit)) {
742 bool is_64bit;
743 uint64_t length;
744 uint16_t dwarf_version;
745 uint64_t abbrev_offset;
746 uint8_t address_size;
747 if (!reader.ReadCommonHeader(is_64bit, length, dwarf_version, abbrev_offset,
748 address_size, next_compilation_unit)) {
749 return false;
750 }
751
752 // Compilation Unit Header parsed. Now read the first tag which is either a
753 // DW_TAG_compile_unit or DW_TAG_partial_unit. The entry type is designated
754 // by a LEB128 number that needs to be cross-referenced in the abbreviations
755 // table to understand the format of the rest of the entry.
756 uint64_t abbreviation_code;
757 if (!reader.ReadLeb128(abbreviation_code)) {
758 return false;
759 }
760
761 // Find entry in the abbreviation table.
762 BufferedDwarfReader abbrev_reader(fd,
763 debug_abbrev.sh_offset + abbrev_offset);
764 uint64_t tag;
765 bool has_children;
766 AdvancedReaderToAttributeList(abbrev_reader, debug_abbrev_end,
767 abbreviation_code, tag, has_children);
768
769 // Ignore if it has children.
770 static constexpr int kDW_TAG_compile_unit = 0x11;
771 static constexpr int kDW_TAG_partial_unit = 0x3c;
772 if (tag != kDW_TAG_compile_unit && tag != kDW_TAG_partial_unit) {
773 return false;
774 }
775
776 // Use table to parse the name, high, and low attributes.
777 static constexpr int kDW_AT_name = 0x3; // string
778 static constexpr int kDW_AT_stmt_list = 0x10; // lineptr
779 static constexpr int kDW_AT_low_pc = 0x11; // address
780 static constexpr int kDW_AT_high_pc = 0x12; // address, constant
781 static constexpr int kDW_AT_ranges = 0x55; // rangelistptr
782 uint64_t attr;
783 uint64_t form;
784 uint64_t low_pc = 0;
785 uint64_t high_pc = 0;
786 bool high_pc_is_offset = false;
787 bool is_found_in_range = false;
788 do {
789 if (!abbrev_reader.ReadLeb128(attr)) {
790 return false;
791 }
792 if (!abbrev_reader.ReadLeb128(form)) {
793 return false;
794 }
795 // Table from 7.5.4, Figure 20.
796 enum Form {
797 kDW_FORM_addr = 0x01,
798 kDW_FORM_block2 = 0x03,
799 kDW_FORM_block4 = 0x04,
800 kDW_FORM_data2 = 0x05,
801 kDW_FORM_data4 = 0x06,
802 kDW_FORM_data8 = 0x07,
803 kDW_FORM_string = 0x08,
804 kDW_FORM_block = 0x09,
805 kDW_FORM_block1 = 0x0a,
806 kDW_FORM_data1 = 0x0b,
807 kDW_FORM_flag = 0x0c,
808 kDW_FORM_sdata = 0x0d,
809 kDW_FORM_strp = 0x0e,
810 kDW_FORM_udata = 0x0f,
811 kDW_FORM_ref_addr = 0x10,
812 kDW_FORM_ref1 = 0x11,
813 kDW_FORM_ref2 = 0x12,
814 kDW_FORM_ref4 = 0x13,
815 kDW_FORM_ref8 = 0x14,
816 kDW_FORM_ref_udata = 0x15,
817 kDW_FORM_ref_indrect = 0x16,
818 kDW_FORM_sec_offset = 0x17,
819 kDW_FORM_exprloc = 0x18,
820 kDW_FORM_flag_present = 0x19,
821 kDW_FORM_ref_sig8 = 0x20,
822 };
823
824 switch (form) {
825 case kDW_FORM_string: {
826 // Read the value into if necessary `out`
827 if (attr == kDW_AT_name) {
828 *cu_name_offset = reader.position();
829 }
830 if (!reader.ReadCString(debug_info_end, nullptr, 0)) {
831 return false;
832 }
833 } break;
834
835 case kDW_FORM_strp: {
836 uint64_t strp_offset;
837 if (!reader.ReadOffset(is_64bit, strp_offset)) {
838 return false;
839 }
840
841 if (attr == kDW_AT_name) {
842 uint64_t pos = debug_str.sh_offset + strp_offset;
843 if (pos >= debug_str_end) {
844 return false;
845 }
846 *cu_name_offset = pos;
847 }
848 } break;
849
850 case kDW_FORM_addr: {
851 uint64_t address;
852 if (!reader.ReadAddress(address_size, address)) {
853 return false;
854 }
855
856 if (attr == kDW_AT_low_pc) {
857 low_pc = address;
858 } else if (attr == kDW_AT_high_pc) {
859 high_pc_is_offset = false;
860 high_pc = address;
861 }
862 } break;
863
864 case kDW_FORM_data1: {
865 uint8_t data;
866 if (!reader.ReadInt8(data)) {
867 return false;
868 }
869 if (attr == kDW_AT_high_pc) {
870 high_pc_is_offset = true;
871 high_pc = data;
872 }
873 } break;
874
875 case kDW_FORM_data2: {
876 uint16_t data;
877 if (!reader.ReadInt16(data)) {
878 return false;
879 }
880 if (attr == kDW_AT_high_pc) {
881 high_pc_is_offset = true;
882 high_pc = data;
883 }
884 } break;
885
886 case kDW_FORM_data4: {
887 uint32_t data;
888 if (!reader.ReadInt32(data)) {
889 return false;
890 }
891 if (attr == kDW_AT_high_pc) {
892 high_pc_is_offset = true;
893 high_pc = data;
894 }
895 } break;
896
897 case kDW_FORM_data8: {
898 uint64_t data;
899 if (!reader.ReadInt64(data)) {
900 return false;
901 }
902 if (attr == kDW_AT_high_pc) {
903 high_pc_is_offset = true;
904 high_pc = data;
905 }
906 } break;
907
908 case kDW_FORM_sdata: {
909 int64_t data;
910 if (!reader.ReadLeb128(data)) {
911 return false;
912 }
913 if (attr == kDW_AT_high_pc) {
914 high_pc_is_offset = true;
915 high_pc = static_cast<uint64_t>(data);
916 }
917 } break;
918
919 case kDW_FORM_udata: {
920 uint64_t data;
921 if (!reader.ReadLeb128(data)) {
922 return false;
923 }
924 if (attr == kDW_AT_high_pc) {
925 high_pc_is_offset = true;
926 high_pc = data;
927 }
928 } break;
929
930 case kDW_FORM_ref_addr:
931 case kDW_FORM_sec_offset: {
932 uint64_t value;
933 if (!reader.ReadOffset(is_64bit, value)) {
934 return false;
935 }
936
937 if (attr == kDW_AT_ranges) {
938 uint64_t current_base_address = module_base_address;
939 BufferedDwarfReader ranges_reader(fd,
940 debug_ranges.sh_offset + value);
941
942 while (ranges_reader.position() < debug_ranges_end) {
943 // Ranges are 2 addresses in size.
944 uint64_t range_start;
945 uint64_t range_end;
946 if (!ranges_reader.ReadAddress(address_size, range_start)) {
947 return false;
948 }
949 if (!ranges_reader.ReadAddress(address_size, range_end)) {
950 return false;
951 }
952 uint64_t relative_pc = pc - current_base_address;
953
954 if (range_start == 0 && range_end == 0) {
955 if (!is_found_in_range) {
956 // Time to go to the next iteration.
957 goto next_cu;
958 }
959 break;
960 } else if (((address_size == 4) &&
961 (range_start == 0xffffffffUL)) ||
962 ((address_size == 8) &&
963 (range_start == 0xffffffffffffffffULL))) {
964 // Check if this is a new base add value. 2.17.3
965 current_base_address = range_end;
966 } else {
967 if (relative_pc >= range_start && relative_pc < range_end) {
968 is_found_in_range = true;
969 break;
970 }
971 }
972 }
973 } else if (attr == kDW_AT_stmt_list) {
974 *debug_line_offset = value;
975 }
976 } break;
977
978 case kDW_FORM_flag:
979 case kDW_FORM_ref1:
980 case kDW_FORM_block1: {
981 uint8_t dummy;
982 if (!reader.ReadInt8(dummy)) {
983 return false;
984 }
985 } break;
986
987 case kDW_FORM_ref2:
988 case kDW_FORM_block2: {
989 uint16_t dummy;
990 if (!reader.ReadInt16(dummy)) {
991 return false;
992 }
993 } break;
994
995 case kDW_FORM_ref4:
996 case kDW_FORM_block4: {
997 uint32_t dummy;
998 if (!reader.ReadInt32(dummy)) {
999 return false;
1000 }
1001 } break;
1002
1003 case kDW_FORM_ref8: {
1004 uint64_t dummy;
1005 if (!reader.ReadInt64(dummy)) {
1006 return false;
1007 }
1008 } break;
1009
1010 case kDW_FORM_ref_udata:
1011 case kDW_FORM_block: {
1012 uint64_t dummy;
1013 if (!reader.ReadLeb128(dummy)) {
1014 return false;
1015 }
1016 } break;
1017
1018 case kDW_FORM_exprloc: {
1019 uint64_t value;
1020 if (!reader.ReadLeb128(value)) {
1021 return false;
1022 }
1023 reader.set_position(reader.position() + value);
1024 } break;
1025 }
1026 } while (attr != 0 || form != 0);
1027
1028 // Because attributes can be in any order, most of the computations (minus
1029 // checking range list entries) cannot happen until everything is parsed for
1030 // the one .debug_info entry. Do the analysis here.
1031 if (is_found_in_range) {
1032 // Well formed compile_unit and partial_unit tags either have a
1033 // DT_AT_ranges entry or an DT_AT_low_pc entiry. If is_found_in_range
1034 // matched as true, then this entry matches the given pc.
1035 return true;
1036 }
1037
1038 // If high_pc_is_offset is 0, it was never found in the DIE. This indicates
1039 // a single address entry. Only look at the low_pc.
1040 {
1041 uint64_t module_relative_pc = pc - module_base_address;
1042 if (high_pc == 0 && module_relative_pc != low_pc) {
1043 goto next_cu;
1044 }
1045
1046 // Otherwise this is a contiguous range DIE. Normalize the meaning of the
1047 // high_pc field and check if it contains the pc.
1048 if (high_pc_is_offset) {
1049 high_pc = low_pc + high_pc;
1050 high_pc_is_offset = false;
1051 }
1052
1053 if (module_relative_pc >= low_pc && module_relative_pc < high_pc) {
1054 return true;
1055 }
1056 }
1057
1058 // Not found.
1059 next_cu:;
1060 }
1061 return false;
1062 }
1063
1064 // Thin wrapper over `GetCompileUnitName` that opens the .debug_info section.
ReadCompileUnit(int fd,uint64_t pc,uint64_t cu_offset,uint64_t base_address,uint64_t * debug_line_offset,uint64_t * cu_name_offset)1065 bool ReadCompileUnit(int fd,
1066 uint64_t pc,
1067 uint64_t cu_offset,
1068 uint64_t base_address,
1069 uint64_t* debug_line_offset,
1070 uint64_t* cu_name_offset) {
1071 if (cu_offset == 0) {
1072 return false;
1073 }
1074
1075 ElfW(Shdr) debug_info;
1076 constexpr static char kDebugInfoSectionName[] = ".debug_info";
1077 if (!google::GetSectionHeaderByName(fd, kDebugInfoSectionName,
1078 sizeof(kDebugInfoSectionName),
1079 &debug_info)) {
1080 return false;
1081 }
1082 uint64_t debug_info_end = debug_info.sh_offset + debug_info.sh_size;
1083
1084 return GetCompileUnitName(fd, debug_info.sh_offset + cu_offset,
1085 debug_info_end, pc, base_address, debug_line_offset,
1086 cu_name_offset);
1087 }
1088
1089 // Takes the information from `info` and renders the data located in the
1090 // object file `fd` into `out`. The format looks like:
1091 //
1092 // [../path/to/foo.cc:10:40]
1093 //
1094 // which would indicate line 10 column 40 in ../path/to/foo.cc
SerializeLineNumberInfoToString(int fd,const LineNumberInfo & info,char * out,size_t out_size)1095 void SerializeLineNumberInfoToString(int fd,
1096 const LineNumberInfo& info,
1097 char* out,
1098 size_t out_size) {
1099 size_t out_pos = 0;
1100 if (info.module_filename_offset) {
1101 BufferedDwarfReader reader(fd, info.module_dir_offset);
1102 if (info.module_dir_offset != 0) {
1103 out_pos +=
1104 reader.ReadCString(kMaxOffset, out + out_pos, out_size - out_pos);
1105 out[out_pos - 1] = '/';
1106 }
1107
1108 reader.set_position(info.module_filename_offset);
1109 out_pos +=
1110 reader.ReadCString(kMaxOffset, out + out_pos, out_size - out_pos);
1111 } else {
1112 out[out_pos++] = '\0';
1113 }
1114
1115 out[out_pos - 1] = ':';
1116 char* tmp = internal::itoa_r(static_cast<intptr_t>(info.line), out + out_pos,
1117 out_size - out_pos, 10, 0);
1118 out_pos += strlen(tmp) + 1;
1119 out[out_pos - 1] = ':';
1120 tmp = internal::itoa_r(static_cast<intptr_t>(info.column), out + out_pos,
1121 out_size - out_pos, 10, 0);
1122 out_pos += strlen(tmp) + 1;
1123 }
1124
1125 // Reads the Line Number info for a compile unit.
GetLineNumberInfoFromObject(int fd,uint64_t pc,uint64_t cu_offset,uint64_t base_address,char * out,size_t out_size)1126 bool GetLineNumberInfoFromObject(int fd,
1127 uint64_t pc,
1128 uint64_t cu_offset,
1129 uint64_t base_address,
1130 char* out,
1131 size_t out_size) {
1132 uint64_t cu_name_offset;
1133 uint64_t debug_line_offset;
1134 if (!ReadCompileUnit(fd, pc, cu_offset, base_address, &debug_line_offset,
1135 &cu_name_offset)) {
1136 return false;
1137 }
1138
1139 ElfW(Shdr) debug_line;
1140 constexpr static char kDebugLineSectionName[] = ".debug_line";
1141 if (!google::GetSectionHeaderByName(fd, kDebugLineSectionName,
1142 sizeof(kDebugLineSectionName),
1143 &debug_line)) {
1144 return false;
1145 }
1146
1147 LineNumberInfo info;
1148 info.pc = pc;
1149 uint64_t line_info_program_offset = debug_line.sh_offset + debug_line_offset;
1150 GetLineNumbersInProgram(fd, &info, base_address, line_info_program_offset,
1151 cu_name_offset);
1152
1153 if (info.line == 0) {
1154 // No matching line number or filename found.
1155 return false;
1156 }
1157
1158 SerializeLineNumberInfoToString(fd, info, out, out_size);
1159
1160 return true;
1161 }
1162
1163 struct FrameInfo {
1164 raw_ptr<uint64_t> cu_offset;
1165 uintptr_t pc;
1166 };
1167
1168 // Returns the number of frames still missing info.
1169 //
1170 // The aranges table is a mapping of ranges to compilation units. Given an array
1171 // of `frame_info`, this finds the compile units for each of the frames doing
1172 // only one pass over the table. It does not preserve the order of `frame_info`.
1173 //
1174 // The main benefit of this function is preserving the single pass through the
1175 // table which is important for performance.
ProcessFlatArangeSet(BufferedDwarfReader * reader,uint64_t next_set,uint8_t address_size,uint64_t base_address,uint64_t cu_offset,FrameInfo * frame_info,size_t num_frames)1176 size_t ProcessFlatArangeSet(BufferedDwarfReader* reader,
1177 uint64_t next_set,
1178 uint8_t address_size,
1179 uint64_t base_address,
1180 uint64_t cu_offset,
1181 FrameInfo* frame_info,
1182 size_t num_frames) {
1183 size_t unsorted_start = 0;
1184 while (unsorted_start < num_frames && reader->position() < next_set) {
1185 uint64_t start;
1186 uint64_t length;
1187 if (!reader->ReadAddress(address_size, start)) {
1188 break;
1189 }
1190 if (!reader->ReadAddress(address_size, length)) {
1191 break;
1192 }
1193 uint64_t end = start + length;
1194 for (size_t i = unsorted_start; i < num_frames; ++i) {
1195 uint64_t module_relative_pc = frame_info[i].pc - base_address;
1196 if (start <= module_relative_pc && module_relative_pc < end) {
1197 *frame_info[i].cu_offset = cu_offset;
1198 if (i != unsorted_start) {
1199 // Move to sorted section.
1200 std::swap(frame_info[i], frame_info[unsorted_start]);
1201 }
1202 unsorted_start++;
1203 }
1204 }
1205 }
1206
1207 return unsorted_start;
1208 }
1209
1210 // This is a pre-step that uses the .debug_aranges table to find all the compile
1211 // units for a given set of frames. This allows code to avoid iterating over
1212 // all compile units at a later step in the symbolization process.
PopulateCompileUnitOffsets(int fd,FrameInfo * frame_info,size_t num_frames,uint64_t base_address)1213 void PopulateCompileUnitOffsets(int fd,
1214 FrameInfo* frame_info,
1215 size_t num_frames,
1216 uint64_t base_address) {
1217 ElfW(Shdr) debug_aranges;
1218 constexpr static char kDebugArangesSectionName[] = ".debug_aranges";
1219 if (!google::GetSectionHeaderByName(fd, kDebugArangesSectionName,
1220 sizeof(kDebugArangesSectionName),
1221 &debug_aranges)) {
1222 return;
1223 }
1224 uint64_t debug_aranges_end = debug_aranges.sh_offset + debug_aranges.sh_size;
1225 uint64_t next_arange_set = kMaxOffset;
1226 size_t unsorted_start = 0;
1227 for (BufferedDwarfReader reader(fd, debug_aranges.sh_offset);
1228 unsorted_start < num_frames && reader.position() < debug_aranges_end;
1229 reader.set_position(next_arange_set)) {
1230 bool is_64bit;
1231 uint64_t length;
1232 uint16_t arange_version;
1233 uint64_t debug_info_offset;
1234 uint8_t address_size;
1235 if (!reader.ReadCommonHeader(is_64bit, length, arange_version,
1236 debug_info_offset, address_size,
1237 next_arange_set)) {
1238 return;
1239 }
1240
1241 uint8_t segment_size;
1242 if (!reader.ReadInt8(segment_size)) {
1243 return;
1244 }
1245
1246 if (segment_size != 0) {
1247 // Only flat namespaces are supported.
1248 return;
1249 }
1250
1251 // The tuple list is aligned, to a multiple of the tuple-size after the
1252 // section sstart. Because this code only supports flat address spaces, this
1253 // means 2*address_size.
1254 while (((reader.position() - debug_aranges.sh_offset) %
1255 (2 * address_size)) != 0) {
1256 uint8_t dummy;
1257 if (!reader.ReadInt8(dummy)) {
1258 return;
1259 }
1260 }
1261 unsorted_start += ProcessFlatArangeSet(
1262 &reader, next_arange_set, address_size, base_address, debug_info_offset,
1263 &frame_info[unsorted_start], num_frames - unsorted_start);
1264 }
1265 }
1266
1267 } // namespace
1268
GetDwarfSourceLineNumber(void * pc,uintptr_t cu_offset,char * out,size_t out_size)1269 bool GetDwarfSourceLineNumber(void* pc,
1270 uintptr_t cu_offset,
1271 char* out,
1272 size_t out_size) {
1273 uint64_t pc0 = reinterpret_cast<uint64_t>(pc);
1274 uint64_t object_start_address = 0;
1275 uint64_t object_base_address = 0;
1276
1277 google::FileDescriptor object_fd(google::FileDescriptor(
1278 google::OpenObjectFileContainingPcAndGetStartAddress(
1279 pc0, object_start_address, object_base_address, nullptr, 0)));
1280
1281 if (!object_fd.get()) {
1282 return false;
1283 }
1284
1285 if (!GetLineNumberInfoFromObject(object_fd.get(), pc0, cu_offset,
1286 object_base_address, out, out_size)) {
1287 return false;
1288 }
1289
1290 return true;
1291 }
1292
GetDwarfCompileUnitOffsets(void * const * trace,uint64_t * cu_offsets,size_t num_frames)1293 void GetDwarfCompileUnitOffsets(void* const* trace,
1294 uint64_t* cu_offsets,
1295 size_t num_frames) {
1296 // Ensure `cu_offsets` always has a known state.
1297 memset(cu_offsets, 0, sizeof(uint64_t) * num_frames);
1298
1299 FrameInfo* frame_info =
1300 static_cast<FrameInfo*>(alloca(sizeof(FrameInfo) * num_frames));
1301 for (size_t i = 0; i < num_frames; i++) {
1302 // The `cu_offset` also encodes the original sort order.
1303 frame_info[i].cu_offset = &cu_offsets[i];
1304 frame_info[i].pc = reinterpret_cast<uintptr_t>(trace[i]);
1305 }
1306 auto pc_comparator = [](const FrameInfo& lhs, const FrameInfo& rhs) {
1307 return lhs.pc < rhs.pc;
1308 };
1309
1310 // Use heapsort to avoid recursion in a signal handler.
1311 std::make_heap(&frame_info[0], &frame_info[num_frames - 1], pc_comparator);
1312 std::sort_heap(&frame_info[0], &frame_info[num_frames - 1], pc_comparator);
1313
1314 // Walk the frame_info one compilation unit at a time.
1315 for (size_t cur_frame = 0; cur_frame < num_frames; ++cur_frame) {
1316 uint64_t object_start_address = 0;
1317 uint64_t object_base_address = 0;
1318 google::FileDescriptor object_fd(google::FileDescriptor(
1319 google::OpenObjectFileContainingPcAndGetStartAddress(
1320 frame_info[cur_frame].pc, object_start_address, object_base_address,
1321 nullptr, 0)));
1322
1323 // TODO(https://crbug.com/1335630): Consider exposing the end address so a
1324 // range of frames can be bulk-populated. This was originally implemented,
1325 // but line number symbolization is currently broken by default (and also
1326 // broken in sandboxed processes). The various issues will be addressed
1327 // incrementally in follow-up patches, and the optimization here restored if
1328 // needed.
1329
1330 PopulateCompileUnitOffsets(object_fd.get(), &frame_info[cur_frame], 1,
1331 object_base_address);
1332 }
1333 }
1334
1335 } // namespace debug
1336 } // namespace base
1337
1338 #else // USE_SYMBOLIZE
1339
1340 #include <cstring>
1341
1342 namespace base {
1343 namespace debug {
1344
GetDwarfSourceLineNumber(void * pc,uintptr_t cu_offset,char * out,size_t out_size)1345 bool GetDwarfSourceLineNumber(void* pc,
1346 uintptr_t cu_offset,
1347 char* out,
1348 size_t out_size) {
1349 return false;
1350 }
1351
GetDwarfCompileUnitOffsets(void * const * trace,uint64_t * cu_offsets,size_t num_frames)1352 void GetDwarfCompileUnitOffsets(void* const* trace,
1353 uint64_t* cu_offsets,
1354 size_t num_frames) {
1355 // Provide defined values even in the stub.
1356 memset(cu_offsets, 0, sizeof(cu_offsets) * num_frames);
1357 }
1358
1359 } // namespace debug
1360 } // namespace base
1361
1362 #endif
1363