1 /* Copyright (C) 2007-2010 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 ** GNU General Public License for more details.
11 */
12
13 /*
14 * Contains declarations of types, constants and structures
15 * describing DWARF format.
16 */
17
18 #ifndef ELFF_DWARF_DEFS_H_
19 #define ELFF_DWARF_DEFS_H_
20
21 #include "dwarf.h"
22 #include "elf_defs.h"
23
24 /* DWARF structures are packed to 1 byte. */
25 #define ELFF_PACKED __attribute__ ((packed))
26
27 /*
28 * Helper types for misc. DWARF variables.
29 */
30
31 /* Type for DWARF abbreviation number. */
32 typedef uint32_t Dwarf_AbbrNum;
33
34 /* Type for DWARF tag ID. */
35 typedef uint16_t Dwarf_Tag;
36
37 /* Type for DWARF attribute ID. */
38 typedef uint16_t Dwarf_At;
39
40 /* Type for DWARF form ID. */
41 typedef uint16_t Dwarf_Form;
42
43 /* Type for offset in 32-bit DWARF. */
44 typedef uint32_t Dwarf32_Off;
45
46 /* Type for offset in 64-bit DWARF. */
47 typedef uint64_t Dwarf64_Off;
48
49 /* Enumerates types of values, obtained during DWARF attribute decoding. */
50 typedef enum DwarfValueType {
51 /* Undefined */
52 DWARF_VALUE_UNKNOWN = 1,
53
54 /* uint8_t */
55 DWARF_VALUE_U8,
56
57 /* int8_t */
58 DWARF_VALUE_S8,
59
60 /* uint16_t */
61 DWARF_VALUE_U16,
62
63 /* int16_t */
64 DWARF_VALUE_S16,
65
66 /* uint32_t */
67 DWARF_VALUE_U32,
68
69 /* int32_t */
70 DWARF_VALUE_S32,
71
72 /* uint64_t */
73 DWARF_VALUE_U64,
74
75 /* int64_t */
76 DWARF_VALUE_S64,
77
78 /* const char* */
79 DWARF_VALUE_STR,
80
81 /* 32-bit address */
82 DWARF_VALUE_PTR32,
83
84 /* 64-bit address */
85 DWARF_VALUE_PTR64,
86
87 /* Dwarf_Block */
88 DWARF_VALUE_BLOCK,
89 } DwarfValueType;
90
91 /* Describes block of data, stored directly in the mapped .debug_info
92 * section. This type is used to represent an attribute encoded with
93 * DW_FORM_block# form.
94 */
95 typedef struct Dwarf_Block {
96 /* Pointer to the block data inside mapped .debug_info section. */
97 const void* block_ptr;
98
99 /* Byte size of the block data. */
100 Elf_Word block_size;
101 } Dwarf_Block;
102
103 /* Describes a value, obtained from the mapped .debug_info section
104 * during DWARF attribute decoding.
105 */
106 typedef struct Dwarf_Value {
107 /* Unites all possible data types for the value.
108 * See DwarfValueType for the list of types.
109 */
110 union {
111 Elf_Byte u8;
112 Elf_Sbyte s8;
113 Elf_Half u16;
114 Elf_Shalf s16;
115 Elf_Word u32;
116 Elf_Sword s32;
117 Elf_Xword u64;
118 Elf_Sxword s64;
119 Elf_Word ptr32;
120 Elf_Xword ptr64;
121 const char* str;
122 Dwarf_Block block;
123 };
124
125 /* Value type (defines which variable in the union abowe
126 * contains the value).
127 */
128 DwarfValueType type;
129
130 /* Number of bytes that encode this value in .debug_info section
131 * of ELF file.
132 */
133 Elf_Word encoded_size;
134 } Dwarf_Value;
135
136 /* DWARF's LEB128 data type. LEB128 is defined as:
137 * Variable Length Data. "Little Endian Base 128" (LEB128) numbers. LEB128 is
138 * a scheme for encoding integers densely that exploits the assumption that
139 * most integers are small in magnitude. (This encoding is equally suitable
140 * whether the target machine architecture represents data in big-endian or
141 * littleendian order. It is "little endian" only in the sense that it avoids
142 * using space to represent the "big" end of an unsigned integer, when the big
143 * end is all zeroes or sign extension bits).
144 *
145 * Unsigned LEB128 numbers are encoded as follows: start at the low order end
146 * of an unsigned integer and chop it into 7-bit chunks. Place each chunk into
147 * the low order 7 bits of a byte. Typically, several of the high order bytes
148 * will be zero; discard them. Emit the remaining bytes in a stream, starting
149 * with the low order byte; set the high order bit on each byte except the last
150 * emitted byte. The high bit of zero on the last byte indicates to the decoder
151 * that it has encountered the last byte. The integer zero is a special case,
152 * consisting of a single zero byte.
153 *
154 * The encoding for signed LEB128 numbers is similar, except that the criterion
155 * for discarding high order bytes is not whether they are zero, but whether
156 * they consist entirely of sign extension bits. Consider the 32-bit integer
157 * -2. The three high level bytes of the number are sign extension, thus LEB128
158 * would represent it as a single byte containing the low order 7 bits, with
159 * the high order bit cleared to indicate the end of the byte stream. Note that
160 * there is nothing within the LEB128 representation that indicates whether an
161 * encoded number is signed or unsigned. The decoder must know what type of
162 * number to expect.
163 *
164 * NOTE: It's assumed that LEB128 will not contain encodings for integers,
165 * larger than 64 bit.
166 */
167 typedef struct ELFF_PACKED Dwarf_Leb128 {
168 /* Beginning of the LEB128 block. */
169 Elf_Byte val;
170
171 /* Pulls actual value, encoded with this LEB128 block.
172 * Param:
173 * value - Upon return will contain value, encoded with this LEB128 block.
174 * sign - If true, the caller expects the LEB128 to contain a signed
175 * integer, otherwise, caller expects an unsigned integer value to be
176 * encoded with this LEB128 block.
177 */
get_commonDwarf_Leb128178 void get_common(Dwarf_Value* value, bool sign) const {
179 value->u64 = 0;
180 /* Integer zero is a special case. */
181 if (val == 0) {
182 value->type = sign ? DWARF_VALUE_S32 : DWARF_VALUE_U32;
183 value->encoded_size = 1;
184 return;
185 }
186
187 /* We've got to reconstruct the integer. */
188 value->type = DWARF_VALUE_UNKNOWN;
189 value->encoded_size = 0;
190
191 /* Byte by byte loop though the LEB128, reconstructing the integer from
192 * 7-bits chunks. Byte with 8-th bit set to zero indicates the end
193 * of the LEB128 block. For signed integers, 7-th bit of the last LEB128
194 * byte controls the sign. If 7-th bit of the last LEB128 byte is set,
195 * the integer is negative. If 7-th bit of the last LEB128 byte is not
196 * set, the integer is positive.
197 */
198 const Elf_Byte* cur = &val;
199 Elf_Word shift = 0;
200 while ((*cur & 0x80) != 0) {
201 value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
202 shift += 7;
203 value->encoded_size++;
204 cur++;
205 }
206 value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
207 value->encoded_size++;
208
209 /* LEB128 format doesn't carry any info of the sizeof of the integer it
210 * represents. We well guess it, judging by the highest bit set in the
211 * reconstucted integer.
212 */
213 if ((value->u64 & 0xFFFFFFFF00000000LL) == 0) {
214 /* 32-bit integer. */
215 if (sign) {
216 value->type = DWARF_VALUE_S32;
217 if (((*cur) & 0x40) != 0) {
218 // Value is negative.
219 value->u64 |= - (1 << (shift + 7));
220 } else if ((value->u32 & 0x80000000) != 0) {
221 // Make sure we don't report negative value in this case.
222 value->type = DWARF_VALUE_S64;
223 }
224 } else {
225 value->type = DWARF_VALUE_U32;
226 }
227 } else {
228 /* 64-bit integer. */
229 if (sign) {
230 value->type = DWARF_VALUE_S64;
231 if (((*cur) & 0x40) != 0) {
232 // Value is negative.
233 value->u64 |= - (1 << (shift + 7));
234 }
235 } else {
236 value->type = DWARF_VALUE_U64;
237 }
238 }
239 }
240
241 /* Pulls actual unsigned value, encoded with this LEB128 block.
242 * See get_common() for more info.
243 * Param:
244 * value - Upon return will contain unsigned value, encoded with
245 * this LEB128 block.
246 */
get_unsignedDwarf_Leb128247 void get_unsigned(Dwarf_Value* value) const {
248 get_common(value, false);
249 }
250
251 /* Pulls actual signed value, encoded with this LEB128 block.
252 * See get_common() for more info.
253 * Param:
254 * value - Upon return will contain signed value, encoded with
255 * this LEB128 block.
256 */
get_signedDwarf_Leb128257 void get_signed(Dwarf_Value* value) const {
258 get_common(value, true);
259 }
260
261 /* Pulls LEB128 value, advancing past this LEB128 block.
262 * See get_common() for more info.
263 * Return:
264 * Pointer to the byte past this LEB128 block.
265 */
processDwarf_Leb128266 const void* process(Dwarf_Value* value, bool sign) const {
267 get_common(value, sign);
268 return INC_CPTR(&val, value->encoded_size);
269 }
270
271 /* Pulls LEB128 unsigned value, advancing past this LEB128 block.
272 * See process() for more info.
273 */
process_unsignedDwarf_Leb128274 const void* process_unsigned(Dwarf_Value* value) const {
275 return process(value, false);
276 }
277
278 /* Pulls LEB128 signed value, advancing past this LEB128 block.
279 * See process() for more info.
280 */
process_signedDwarf_Leb128281 const void* process_signed(Dwarf_Value* value) const {
282 return process(value, true);
283 }
284 } Dwarf_Leb128;
285
286 /* DIE attribute descriptor in the .debug_abbrev section.
287 * Attribute descriptor contains two LEB128 values. First one provides
288 * attribute ID (one of DW_AT_XXX values), and the second one provides
289 * format (one of DW_FORMAT_XXX values), in which attribute value is
290 * encoded in the .debug_info section of the ELF file.
291 */
292 typedef struct ELFF_PACKED Dwarf_Abbr_AT {
293 /* Attribute ID (DW_AT_XXX).
294 * Attribute format (DW_FORMAT_XXX) follows immediately.
295 */
296 Dwarf_Leb128 at;
297
298 /* Checks if this is a separator descriptor.
299 * Zero is an invalid attribute ID, indicating the end of attribute
300 * list for the current DIE.
301 */
is_separatorDwarf_Abbr_AT302 bool is_separator() const {
303 return at.val == 0;
304 }
305
306 /* Pulls attribute data, advancing past this descriptor.
307 * Param:
308 * at_value - Upon return contains attribute value of this descriptor.
309 * form - Upon return contains form value of this descriptor.
310 * Return:
311 * Pointer to the byte past this descriptor block (usually, next
312 * attribute decriptor).
313 */
processDwarf_Abbr_AT314 const Dwarf_Abbr_AT* process(Dwarf_At* at_value, Dwarf_Form* form) const {
315 if (is_separator()) {
316 /* Size of separator descriptor is always 2 bytes. */
317 *at_value = 0;
318 *form = 0;
319 return INC_CPTR_T(Dwarf_Abbr_AT, &at.val, 2);
320 }
321
322 Dwarf_Value val;
323
324 /* Process attribute ID. */
325 const Dwarf_Leb128* next =
326 reinterpret_cast<const Dwarf_Leb128*>(at.process_unsigned(&val));
327 *at_value = val.u16;
328
329 /* Follow with processing the form. */
330 next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
331 *form = val.u16;
332 return reinterpret_cast<const Dwarf_Abbr_AT*>(next);
333 }
334 } Dwarf_Abbr_AT;
335
336 /* DIE abbreviation descriptor in the .debug_abbrev section.
337 * DIE abbreviation descriptor contains three parameters. The first one is a
338 * LEB128 value, that encodes 1 - based abbreviation descriptor number.
339 * Abbreviation descriptor numbers seems to be always in sequential order, and
340 * are counted on per-compilation unit basis. I.e. abbreviation number for the
341 * first DIE abbreviation descriptor of each compilation unit is always 1.
342 *
343 * Besides abbreviation number, DIE abbreviation descriptor contains two more
344 * values. The first one (after abbr_num) is a LEB128 value containing DIE's
345 * tag value, and the second one is one byte flag specifying whether or not
346 * the DIE contains any cildren.
347 *
348 * This descriptor is immediately followed by a list of attribute descriptors
349 * (see Dwarf_Abbr_AT) for the DIE represented by this abbreviation descriptor.
350 */
351 typedef struct ELFF_PACKED Dwarf_Abbr_DIE {
352 /* 1 - based abbreviation number for the DIE. */
353 Dwarf_Leb128 abbr_num;
354
355 /* Gets abbreviation number for this descriptor. */
get_abbr_numDwarf_Abbr_DIE356 Dwarf_AbbrNum get_abbr_num() const {
357 Dwarf_Value val;
358 abbr_num.get_unsigned(&val);
359 return val.u16;
360 }
361
362 /* Gets DIE tag for this descriptor. */
get_tagDwarf_Abbr_DIE363 Dwarf_Tag get_tag() const {
364 Dwarf_Tag tag;
365 process(NULL, &tag);
366 return tag;
367 }
368
369 /* Pulls DIE abbreviation descriptor data, advancing past this descriptor.
370 * Param:
371 * abbr_index - Upon return contains abbreviation number for this
372 * descriptor. This parameter can be NULL, if the caller is not interested
373 * in this value.
374 * tag - Upon return contains tag of the DIE for this descriptor. This
375 * parameter can be NULL, if the caller is not interested in this value.
376 * form - Upon return contains form of the DIE for this descriptor.
377 * Return:
378 * Pointer to the list of attribute descriptors for the DIE.
379 */
processDwarf_Abbr_DIE380 const Dwarf_Abbr_AT* process(Dwarf_AbbrNum* abbr_index,
381 Dwarf_Tag* tag) const {
382 Dwarf_Value val;
383 const Dwarf_Leb128* next =
384 reinterpret_cast<const Dwarf_Leb128*>(abbr_num.process_unsigned(&val));
385 if (abbr_index != NULL) {
386 *abbr_index = val.u32;
387 }
388
389 /* Next one is a "tag". */
390 next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
391 if (tag != NULL) {
392 *tag = val.u16;
393 }
394
395 /* Next one is a "has children" one byte flag. We're not interested in it,
396 * so jump to the list of attribute descriptors that immediately follows
397 * this DIE descriptor. */
398 return INC_CPTR_T(Dwarf_Abbr_AT, next, 1);
399 }
400 } Dwarf_Abbr_DIE;
401
402 /* DIE descriptor in the .debug_info section.
403 * DIE descriptor contains one LEB128-encoded value, containing DIE's
404 * abbreviation descriptor number in the .debug_abbrev section.
405 *
406 * DIE descriptor is immediately followed by the list of DIE attribute values,
407 * format of wich is defined by the list of attribute descriptors in the
408 * .debug_abbrev section, that immediately follow the DIE attribute descriptor,
409 * addressed by this descriptor's abbr_num LEB128.
410 */
411 typedef struct ELFF_PACKED Dwarf_DIE {
412 /* 1 - based index of DIE abbreviation descriptor (Dwarf_Abbr_DIE) for this
413 * DIE in the .debug_abbrev section.
414 *
415 * NOTE: DIE abbreviation descriptor indexes are tied to the compilation
416 * unit. In other words, each compilation unit restarts counting DIE
417 * abbreviation descriptors from 1.
418 *
419 * NOTE: Zero is invalid value for this field, indicating that this DIE is a
420 * separator (usually it ends a list of "child" DIEs)
421 */
422 Dwarf_Leb128 abbr_num;
423
424 /* Checks if this is a separator DIE. */
is_separatorDwarf_DIE425 bool is_separator() const {
426 return abbr_num.val == 0;
427 }
428
429 /* Gets (1 - based) abbreviation number for this DIE. */
get_abbr_numDwarf_DIE430 Dwarf_AbbrNum get_abbr_num() const {
431 Dwarf_Value val;
432 abbr_num.get_unsigned(&val);
433 return val.u16;
434 }
435
436 /* Pulls DIE information, advancing past this descriptor to DIE attributes.
437 * Param:
438 * abbr_num - Upon return contains abbreviation number for this DIE. This
439 * parameter can be NULL, if the caller is not interested in this value.
440 * Return:
441 * Pointer to the byte past this descriptor (the list of DIE attributes).
442 */
processDwarf_DIE443 const Elf_Byte* process(Dwarf_AbbrNum* abbr_number) const {
444 if (is_separator()) {
445 if (abbr_number != NULL) {
446 *abbr_number = 0;
447 }
448 // Size of a separator DIE is 1 byte.
449 return INC_CPTR_T(Elf_Byte, &abbr_num.val, 1);
450 }
451 Dwarf_Value val;
452 const void* ret = abbr_num.process_unsigned(&val);
453 if (abbr_number != NULL) {
454 *abbr_number = val.u32;
455 }
456 return reinterpret_cast<const Elf_Byte*>(ret);
457 }
458 } Dwarf_DIE;
459
460 /*
461 * Variable size headers.
462 * When encoding size value in DWARF, the first 32 bits of a "size" header
463 * define header type. If first 32 bits of the header contain 0xFFFFFFFF
464 * value, this is 64-bit size header with the following 64 bits encoding
465 * the size. Otherwise, if first 32 bits are not 0xFFFFFFFF, they contain
466 * 32-bit size value.
467 */
468
469 /* Size header for 32-bit DWARF. */
470 typedef struct ELFF_PACKED Dwarf32_SizeHdr {
471 /* Size value. */
472 Elf_Word size;
473 } Dwarf32_SizeHdr;
474
475 /* Size header for 64-bit DWARF. */
476 typedef struct ELFF_PACKED Dwarf64_SizeHdr {
477 /* Size selector. For 64-bit DWARF this field is set to 0xFFFFFFFF */
478 Elf_Word size_selector;
479
480 /* Actual size value. */
481 Elf_Xword size;
482 } Dwarf64_SizeHdr;
483
484 /* Compilation unit header in the .debug_info section.
485 * Template param:
486 * Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
487 * for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
488 * Elf_Off - Type for abbrev_offset field. Must be Elf_Word for for 32-bit
489 * DWARF, or Elf_Xword for 64-bit DWARF.
490 */
491 template <typename Dwarf_SizeHdr, typename Elf_Off>
492 struct ELFF_PACKED Dwarf_CUHdr {
493 /* Size of the compilation unit data in .debug_info section. */
494 Dwarf_SizeHdr size_hdr;
495
496 /* Compilation unit's DWARF version stamp. */
497 Elf_Half version;
498
499 /* Relative (to the beginning of .debug_abbrev section data) offset of the
500 * beginning of abbreviation sequence for this compilation unit.
501 */
502 Elf_Off abbrev_offset;
503
504 /* Pointer size for this compilation unit (should be 4, or 8). */
505 Elf_Byte address_size;
506 };
507 /* Compilation unit header in the .debug_info section for 32-bit DWARF. */
508 typedef Dwarf_CUHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_CUHdr;
509 /* Compilation unit header in the .debug_info section for 64-bit DWARF. */
510 typedef Dwarf_CUHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_CUHdr;
511
512 /* CU STMTL header in the .debug_line section.
513 * Template param:
514 * Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
515 * for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
516 * Elf_Size - Type for header_length field. Must be Elf_Word for for 32-bit
517 * DWARF, or Elf_Xword for 64-bit DWARF.
518 */
519 template <typename Dwarf_SizeHdr, typename Elf_Size>
520 struct ELFF_PACKED Dwarf_STMTLHdr {
521 /* The size in bytes of the line number information for this compilation
522 * unit, not including the unit_length field itself. */
523 Dwarf_SizeHdr unit_length;
524
525 /* A version number. This number is specific to the line number information
526 * and is independent of the DWARF version number. */
527 Elf_Half version;
528
529 /* The number of bytes following the header_length field to the beginning of
530 * the first byte of the line number program itself. In the 32-bit DWARF
531 * format, this is a 4-byte unsigned length; in the 64-bit DWARF format,
532 * this field is an 8-byte unsigned length. */
533 Elf_Size header_length;
534
535 /* The size in bytes of the smallest target machine instruction. Line number
536 * program opcodes that alter the address register first multiply their
537 * operands by this value. */
538 Elf_Byte min_instruction_len;
539
540 /* The initial value of the is_stmt register. */
541 Elf_Byte default_is_stmt;
542
543 /* This parameter affects the meaning of the special opcodes. */
544 Elf_Sbyte line_base;
545
546 /* This parameter affects the meaning of the special opcodes. */
547 Elf_Byte line_range;
548
549 /* The number assigned to the first special opcode. */
550 Elf_Byte opcode_base;
551
552 /* This is first opcode in an array specifying the number of LEB128 operands
553 * for each of the standard opcodes. The first element of the array
554 * corresponds to the opcode whose value is 1, and the last element
555 * corresponds to the opcode whose value is opcode_base - 1. By increasing
556 * opcode_base, and adding elements to this array, new standard opcodes can
557 * be added, while allowing consumers who do not know about these new opcodes
558 * to be able to skip them. NOTE: this array points to the mapped
559 * .debug_line section. */
560 Elf_Byte standard_opcode_lengths;
561 };
562 /* CU STMTL header in the .debug_line section for 32-bit DWARF. */
563 typedef Dwarf_STMTLHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_STMTLHdr;
564 /* CU STMTL header in the .debug_line section for 64-bit DWARF. */
565 typedef Dwarf_STMTLHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_STMTLHdr;
566
567 /* Source file descriptor in the .debug_line section.
568 * Descriptor begins with zero-terminated file name, followed by an ULEB128,
569 * encoding directory index in the list of included directories, followed by
570 * an ULEB12, encoding file modification time, followed by an ULEB12, encoding
571 * file size.
572 */
573 typedef struct ELFF_PACKED Dwarf_STMTL_FileDesc {
574 /* Zero-terminated file name. */
575 char file_name[1];
576
577 /* Checks of this descriptor ends the list. */
is_last_entryDwarf_STMTL_FileDesc578 bool is_last_entry() const {
579 return file_name[0] == '\0';
580 }
581
582 /* Gets file name. */
get_file_nameDwarf_STMTL_FileDesc583 const char* get_file_name() const {
584 return file_name;
585 }
586
587 /* Processes this descriptor, advancing to the next one.
588 * Param:
589 * dir_index - Upon return contains index of the parent directory in the
590 * list of included directories. Can be NULL if caller is not interested
591 * in this value.
592 * Return:
593 * Pointer to the next source file descriptor in the list.
594 */
processDwarf_STMTL_FileDesc595 const Dwarf_STMTL_FileDesc* process(Elf_Word* dir_index) const {
596 if (is_last_entry()) {
597 return this;
598 }
599
600 /* First parameter: include directory index. */
601 Dwarf_Value tmp;
602 const Dwarf_Leb128* leb =
603 INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
604 leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
605 if (dir_index != NULL) {
606 *dir_index = tmp.u32;
607 }
608 /* Process file time. */
609 leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
610 /* Process file size. */
611 return reinterpret_cast<const Dwarf_STMTL_FileDesc*>(leb->process_unsigned(&tmp));
612 }
613
614 /* Gets directory index for this descriptor. */
get_dir_indexDwarf_STMTL_FileDesc615 Elf_Word get_dir_index() const {
616 assert(!is_last_entry());
617 if (is_last_entry()) {
618 return 0;
619 }
620 /* Get directory index. */
621 Dwarf_Value ret;
622 const Dwarf_Leb128* leb =
623 INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
624 leb->process_unsigned(&ret);
625 return ret.u32;
626 }
627 } Dwarf_STMTL_FileDesc;
628
629 /* Encapsulates a DIE attribute, collected during ELF file parsing.
630 */
631 class DIEAttrib {
632 public:
633 /* Constructs DIEAttrib intance. */
DIEAttrib()634 DIEAttrib()
635 : at_(0),
636 form_(0) {
637 value_.type = DWARF_VALUE_UNKNOWN;
638 }
639
640 /* Destructs DIEAttrib intance. */
~DIEAttrib()641 ~DIEAttrib() {
642 }
643
644 /* Gets DWARF attribute ID (DW_AT_Xxx) for this property. */
at()645 Dwarf_At at() const {
646 return at_;
647 }
648
649 /* Gets DWARF form ID (DW_FORM_Xxx) for this property. */
form()650 Dwarf_Form form() const {
651 return form_;
652 }
653
654 /* Gets value of this property. */
value()655 const Dwarf_Value* value() const {
656 return &value_;
657 }
658
659 /* Value of this property. */
660 Dwarf_Value value_;
661
662 /* DWARF attribute ID (DW_AT_Xxx) for this property. */
663 Dwarf_At at_;
664
665 /* DWARF form ID (DW_FORM_Xxx) for this property. */
666 Dwarf_Form form_;
667 };
668
669 /* Parse tag context.
670 * This structure is used as an ELF file parsing parameter, limiting collected
671 * DIEs by the list of tags.
672 */
673 typedef struct DwarfParseContext {
674 /* Zero-terminated list of tags to collect DIEs for. If this field is NULL,
675 * DIEs for all tags will be collected during the parsing. */
676 const Dwarf_Tag* tags;
677 } DwarfParseContext;
678
679 /* Checks if a DIE with the given tag should be collected during the parsing.
680 * Param:
681 * parse_context - Parse context to check the tag against. This parameter can
682 * be NULL, indicating that all tags should be collected.
683 * tag - Tag to check.
684 * Return:
685 * true if a DIE with the given tag should be collected during the parsing,
686 * or false, if the DIE should not be collected.
687 */
688 static inline bool
collect_die(const DwarfParseContext * parse_context,Dwarf_Tag tag)689 collect_die(const DwarfParseContext* parse_context, Dwarf_Tag tag) {
690 if (parse_context == NULL || parse_context->tags == NULL) {
691 return true;
692 }
693 for (const Dwarf_Tag* tags = parse_context->tags; *tags != 0; tags++) {
694 if (*tags == tag) {
695 return true;
696 }
697 }
698 return false;
699 }
700
701 /* Encapsulates an array of Dwarf_Abbr_DIE pointers, cached for a compilation
702 * unit. Although Dwarf_Abbr_DIE descriptors in the .debug_abbrev section of
703 * the ELF file seems to be always in sequential order, DIE descriptors may
704 * reference them randomly. So, to provide better performance, we will cache
705 * all Dwarf_Abbr_DIE pointers, that were found for each DIE. Since all of the
706 * Dwarf_Abbr_DIE are sequential, an array is the best way to cache them.
707 *
708 * NOTE: Objects of this class are instantiated one per each CU, as all DIE
709 * abbreviation numberation is restarted from 1 for each new CU.
710 */
711 class DwarfAbbrDieArray {
712 public:
713 /* Constructs DwarfAbbrDieArray instance.
714 * Most of the CUs don't have too many unique Dwarf_Abbr_DIEs, so, in order
715 * to decrease the amount of memory allocation calls, we will preallocate
716 * a relatively small array for them along with the instance of this class,
717 * hopping, that all Dwarf_Abbr_DIEs for the CU will fit into it.
718 */
DwarfAbbrDieArray()719 DwarfAbbrDieArray()
720 : array_(&small_array_[0]),
721 array_size_(ELFF_ARRAY_SIZE(small_array_)),
722 count_(0) {
723 }
724
725 /* Destructs DwarfAbbrDieArray instance. */
~DwarfAbbrDieArray()726 ~DwarfAbbrDieArray() {
727 if (array_ != &small_array_[0]) {
728 delete[] array_;
729 }
730 }
731
732 /* Adds new entry to the array
733 * Param:
734 * abbr - New entry to add.
735 * num - Abbreviation number for the adding entry.
736 * NOTE: before adding, this method will verify that descriptor for the
737 * given abbreviation number has not been cached yet.
738 * NOTE: due to the nature of this array, entries MUST be added strictly
739 * in sequential order.
740 * Return:
741 * true on success, false on failure.
742 */
add(const Dwarf_Abbr_DIE * abbr,Dwarf_AbbrNum num)743 bool add(const Dwarf_Abbr_DIE* abbr, Dwarf_AbbrNum num) {
744 assert(num != 0);
745 if (num == 0) {
746 // Zero is illegal DIE abbreviation number.
747 _set_errno(EINVAL);
748 return false;
749 }
750
751 if (num <= count_) {
752 // Already cached.
753 return true;
754 }
755
756 // Enforce strict sequential order.
757 assert(num == (count_ + 1));
758 if (num != (count_ + 1)) {
759 _set_errno(EINVAL);
760 return false;
761 }
762
763 if (num >= array_size_) {
764 /* Expand the array. Make it 64 entries bigger than adding entry number.
765 * NOTE: that we don't check for an overflow here, since we secured
766 * ourselves from that by enforcing strict sequential order. So, an
767 * overflow may happen iff number of entries cached in this array is
768 * close to 4G, which is a) totally unreasonable, and b) we would die
769 * long before this amount of entries is cached.
770 */
771 Dwarf_AbbrNum new_size = num + 64;
772
773 // Reallocate.
774 const Dwarf_Abbr_DIE** new_array = new const Dwarf_Abbr_DIE*[new_size];
775 assert(new_array != NULL);
776 if (new_array == NULL) {
777 _set_errno(ENOMEM);
778 return false;
779 }
780 memcpy(new_array, array_, count_ * sizeof(const Dwarf_Abbr_DIE*));
781 if (array_ != &small_array_[0]) {
782 delete[] array_;
783 }
784 array_ = new_array;
785 array_size_ = new_size;
786 }
787
788 // Abbreviation numbers are 1-based.
789 array_[num - 1] = abbr;
790 count_++;
791 return true;
792 }
793
794 /* Adds new entry to the array
795 * Param:
796 * abbr - New entry to add.
797 * Return:
798 * true on success, false on failure.
799 */
add(const Dwarf_Abbr_DIE * abbr)800 bool add(const Dwarf_Abbr_DIE* abbr) {
801 return add(abbr, abbr->get_abbr_num());
802 }
803
804 /* Gets an entry from the array
805 * Param:
806 * num - 1-based index of an entry to get.
807 * Return:
808 * Entry on success, or NULL if num exceeds the number of entries
809 * contained in the array.
810 */
get(Dwarf_AbbrNum num)811 const Dwarf_Abbr_DIE* get(Dwarf_AbbrNum num) const {
812 assert(num != 0 && num <= count_);
813 if (num != 0 && num <= count_) {
814 return array_[num - 1];
815 } else {
816 _set_errno(EINVAL);
817 return NULL;
818 }
819 }
820
821 /* Caches Dwarf_Abbr_DIEs into this array up to the requested number.
822 * NOTE: This method cannot be called on an empty array. Usually, first
823 * entry is inserted into this array when CU object is initialized.
824 * Param:
825 * num - Entry number to cache entries up to.
826 * Return:
827 * Last cached entry (actually, an entry for the 'num' index).
828 */
cache_to(Dwarf_AbbrNum num)829 const Dwarf_Abbr_DIE* cache_to(Dwarf_AbbrNum num) {
830 /* Last cached DIE abbreviation. We always should have cached at least one
831 * abbreviation for the CU DIE itself, added via "add" method when CU
832 * object was initialized. */
833 const Dwarf_Abbr_DIE* cur_abbr = get(count_);
834 assert(cur_abbr != NULL);
835 if (cur_abbr == NULL) {
836 return NULL;
837 }
838
839 /* Starting with the last cached DIE abbreviation, loop through the
840 * remaining DIE abbreviations in the .debug_abbrev section of the
841 * mapped ELF file, caching them until we reach the requested
842 * abbreviation descriptor number. Normally, the very next DIE
843 * abbreviation will stop the loop. */
844 while (num > count_) {
845 Dwarf_AbbrNum abbr_num;
846 Dwarf_Tag tmp2;
847 Dwarf_Form tmp3;
848 Dwarf_At tmp4;
849
850 /* Process all AT abbreviations for the current DIE entry, reaching next
851 * DIE abbreviation. */
852 const Dwarf_Abbr_AT* abbr_at = cur_abbr->process(&abbr_num, &tmp2);
853 while (!abbr_at->is_separator()) {
854 abbr_at = abbr_at->process(&tmp4, &tmp3);
855 }
856
857 // Next DIE abbreviation is right after the separator AT abbreviation.
858 cur_abbr = reinterpret_cast<const Dwarf_Abbr_DIE*>
859 (abbr_at->process(&tmp4, &tmp3));
860 if (!add(cur_abbr)) {
861 return NULL;
862 }
863 }
864
865 return array_[num - 1];
866 }
867
868 /* Empties array and frees allocations. */
empty()869 void empty() {
870 if (array_ != &small_array_[0]) {
871 delete[] array_;
872 array_ = &small_array_[0];
873 array_size_ = sizeof(small_array_) / sizeof(small_array_[0]);
874 }
875 count_ = 0;
876 }
877
878 protected:
879 /* Array, preallocated in anticipation of relatively small number of
880 * DIE abbreviations in compilation unit. */
881 const Dwarf_Abbr_DIE* small_array_[64];
882
883 /* Array of Dwarf_Abbr_DIE pointers, cached for a compilation unit. */
884 const Dwarf_Abbr_DIE** array_;
885
886 /* Current size of the array. */
887 Dwarf_AbbrNum array_size_;
888
889 /* Number of entries, cached in the array. */
890 Dwarf_AbbrNum count_;
891 };
892
893 /* Encapsulates a state machine for the "Line Number Program", that is run
894 * on data conained in the mapped .debug_line section.
895 */
896 class DwarfStateMachine {
897 public:
898 /* Constructs DwarfStateMachine instance.
899 * Param:
900 * set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
901 * see Dwarf_STMTL_HdrXX.
902 */
DwarfStateMachine(bool set_is_stmt)903 explicit DwarfStateMachine(bool set_is_stmt)
904 : address_(0),
905 file_(1),
906 line_(1),
907 column_(0),
908 discriminator_(0),
909 is_stmt_(set_is_stmt),
910 basic_block_(false),
911 end_sequence_(false),
912 prologue_end_(false),
913 epilogue_begin_(false),
914 isa_(0),
915 set_file_info_(NULL) {
916 }
917
918 /* Destructs DwarfStateMachine instance. */
~DwarfStateMachine()919 ~DwarfStateMachine() {
920 }
921
922 /* Resets the state to default.
923 * Param:
924 * set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
925 * see Dwarf_STMTL_HdrXX.
926 */
reset(bool set_is_stmt)927 void reset(bool set_is_stmt) {
928 address_ = 0;
929 file_ = 1;
930 line_ = 1;
931 column_ = 0;
932 discriminator_ = 0;
933 is_stmt_ = set_is_stmt;
934 basic_block_ = false;
935 end_sequence_ = false;
936 prologue_end_ = false;
937 epilogue_begin_ = false;
938 isa_ = 0;
939 set_file_info_ = NULL;
940 }
941
942 /*
943 * Machine state.
944 */
945
946 /* Current address (current PC value). */
947 Elf_Xword address_;
948
949 /* Current index of source file descriptor. */
950 Elf_Word file_;
951
952 /* Current line in the current source file. */
953 Elf_Word line_;
954
955 /* Current column. */
956 Elf_Word column_;
957
958 /* Current discriminator value. */
959 Elf_Word discriminator_;
960
961 /* Current STMT flag. */
962 bool is_stmt_;
963
964 /* Current basic block flag. */
965 bool basic_block_;
966
967 /* Current end of sequence flag. */
968 bool end_sequence_;
969
970 /* Current end of prologue flag. */
971 bool prologue_end_;
972
973 /* Current epilogue begin flag. */
974 bool epilogue_begin_;
975
976 /* Current ISA value. */
977 Elf_Word isa_;
978
979 /* Current value for explicitly set current source file descriptor.
980 * If not NULL, this descriptor has priority over the descriptor, addressed
981 * by the file_ member of this class. */
982 const Dwarf_STMTL_FileDesc* set_file_info_;
983 };
984
985 /* Checks if given tag belongs to a routine. */
986 static inline bool
dwarf_tag_is_routine(Dwarf_Tag tag)987 dwarf_tag_is_routine(Dwarf_Tag tag) {
988 return tag == DW_TAG_inlined_subroutine ||
989 tag == DW_TAG_subprogram ||
990 tag == DW_AT_main_subprogram;
991 }
992
993 /* Checks if given tag belongs to a compilation unit. */
994 static inline bool
dwarf_tag_is_cu(Dwarf_Tag tag)995 dwarf_tag_is_cu(Dwarf_Tag tag) {
996 return tag == DW_TAG_compile_unit ||
997 tag == DW_TAG_partial_unit;
998 }
999
1000 #endif // ELFF_DWARF_DEFS_H_
1001