1 /**
2 * @file bfd_support.cpp
3 * BFD muck we have to deal with.
4 *
5 * @remark Copyright 2005 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11 #include "bfd_support.h"
12
13 #include "op_bfd.h"
14 #include "op_fileio.h"
15 #include "op_config.h"
16 #include "string_manip.h"
17 #include "file_manip.h"
18 #include "cverb.h"
19 #include "locate_images.h"
20
21 #include <cstdlib>
22 #include <cstring>
23 #include <cassert>
24 #include <iostream>
25 #include <fstream>
26 #include <sstream>
27 #include <string>
28 #include <cstring>
29 #include <cstdlib>
30
31 using namespace std;
32
33 extern verbose vbfd;
34
35 namespace {
36
37
check_format(string const & file,bfd ** ibfd)38 void check_format(string const & file, bfd ** ibfd)
39 {
40 if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
41 cverb << vbfd << "BFD format failure for " << file << endl;
42 bfd_close(*ibfd);
43 *ibfd = NULL;
44 }
45 }
46
47
separate_debug_file_exists(string & name,unsigned long const crc,extra_images const & extra)48 bool separate_debug_file_exists(string & name, unsigned long const crc,
49 extra_images const & extra)
50 {
51 unsigned long file_crc = 0;
52 // The size of 2 * 1024 elements for the buffer is arbitrary.
53 char buffer[2 * 1024];
54
55 image_error img_ok;
56 string const image_path = extra.find_image_path(name, img_ok, true);
57
58 if (img_ok != image_ok)
59 return false;
60
61 name = image_path;
62
63 ifstream file(image_path.c_str());
64 if (!file)
65 return false;
66
67 cverb << vbfd << "found " << name;
68 while (file) {
69 file.read(buffer, sizeof(buffer));
70 file_crc = calc_crc32(file_crc,
71 reinterpret_cast<unsigned char *>(&buffer[0]),
72 file.gcount());
73 }
74 cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
75 return crc == file_crc;
76 }
77
78
get_debug_link_info(bfd * ibfd,string & filename,unsigned long & crc32)79 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
80 {
81 asection * sect;
82
83 cverb << vbfd << "fetching .gnu_debuglink section" << endl;
84 sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
85
86 if (sect == NULL)
87 return false;
88
89 bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
90 char contents[debuglink_size];
91 cverb << vbfd
92 << ".gnu_debuglink section has size " << debuglink_size << endl;
93
94 if (!bfd_get_section_contents(ibfd, sect,
95 reinterpret_cast<unsigned char *>(contents),
96 static_cast<file_ptr>(0), debuglink_size)) {
97 bfd_perror("bfd_get_section_contents:get_debug:");
98 exit(2);
99 }
100
101 /* CRC value is stored after the filename, aligned up to 4 bytes. */
102 size_t filename_len = strlen(contents);
103 size_t crc_offset = filename_len + 1;
104 crc_offset = (crc_offset + 3) & ~3;
105
106 crc32 = bfd_get_32(ibfd,
107 reinterpret_cast<bfd_byte *>(contents + crc_offset));
108 filename = string(contents, filename_len);
109 cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
110 return true;
111 }
112
113
114 /**
115 * With Objective C, we'll get strings like:
116 *
117 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
118 *
119 * for the symbol name, and:
120 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
121 *
122 * for the function name, so we have to do some looser matching
123 * than for other languages (unfortunately, it's not possible
124 * to demangle Objective C symbols).
125 */
objc_match(string const & sym,string const & method)126 bool objc_match(string const & sym, string const & method)
127 {
128 if (method.length() < 3)
129 return false;
130
131 string mangled;
132
133 if (is_prefix(method, "-[")) {
134 mangled += "_i_";
135 } else if (is_prefix(method, "+[")) {
136 mangled += "_c_";
137 } else {
138 return false;
139 }
140
141 string::const_iterator it = method.begin() + 2;
142 string::const_iterator const end = method.end();
143
144 bool found_paren = false;
145
146 for (; it != end; ++it) {
147 switch (*it) {
148 case ' ':
149 mangled += '_';
150 if (!found_paren)
151 mangled += '_';
152 break;
153 case ':':
154 mangled += '_';
155 break;
156 case ')':
157 case ']':
158 break;
159 case '(':
160 found_paren = true;
161 mangled += '_';
162 break;
163 default:
164 mangled += *it;
165 }
166 }
167
168 return sym == mangled;
169 }
170
171
172 /*
173 * With a binary image where some objects are missing debug
174 * info, we can end up attributing to a completely different
175 * function (#484660): bfd_nearest_line() will happily move from one
176 * symbol to the nearest one it can find with debug information.
177 * To mitigate this problem, we check that the symbol name
178 * matches the returned function name.
179 *
180 * However, this check fails in some cases it shouldn't:
181 * Objective C, and C++ static inline functions (as discussed in
182 * GCC bugzilla #11774). So, we have a looser check that
183 * accepts merely a substring, plus some magic for Objective C.
184 *
185 * If even the loose check fails, then we give up.
186 */
is_correct_function(string const & function,string const & name)187 bool is_correct_function(string const & function, string const & name)
188 {
189 if (name == function)
190 return true;
191
192 if (objc_match(name, function))
193 return true;
194
195 // warn the user if we had to use the loose check
196 if (name.find(function) != string::npos) {
197 static bool warned = false;
198 if (!warned) {
199 cerr << "warning: some functions compiled without "
200 << "debug information may have incorrect source "
201 << "line attributions" << endl;
202 warned = true;
203 }
204 cverb << vbfd << "is_correct_function(" << function << ", "
205 << name << ") fuzzy match." << endl;
206 return true;
207 }
208
209 return false;
210 }
211
212
213 /*
214 * binutils 2.12 and below have a small bug where functions without a
215 * debug entry at the prologue start do not give a useful line number
216 * from bfd_find_nearest_line(). This can happen with certain gcc
217 * versions such as 2.95.
218 *
219 * We work around this problem by scanning forward for a vma with valid
220 * linenr info, if we can't get a valid line number. Problem uncovered
221 * by Norbert Kaufmann. The work-around decreases, on the tincas
222 * application, the number of failure to retrieve linenr info from 835
223 * to 173. Most of the remaining are c++ inline functions mainly from
224 * the STL library. Fix #529622
225 */
fixup_linenr(bfd * abfd,asection * section,asymbol ** syms,string const & name,bfd_vma pc,char const ** filename,unsigned int * line)226 void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
227 string const & name, bfd_vma pc,
228 char const ** filename, unsigned int * line)
229 {
230 char const * cfilename;
231 char const * function;
232 unsigned int linenr;
233
234 // FIXME: looking at debug info for all gcc version shows than
235 // the same problems can -perhaps- occur for epilog code: find a
236 // samples files with samples in epilog and try opreport -l -g
237 // on it, check it also with opannotate.
238
239 // first restrict the search on a sensible range of vma, 16 is
240 // an intuitive value based on epilog code look
241 size_t max_search = 16;
242 size_t section_size = bfd_section_size(abfd, section);
243 if (pc + max_search > section_size)
244 max_search = section_size - pc;
245
246 for (size_t i = 1; i < max_search; ++i) {
247 bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
248 &cfilename, &function,
249 &linenr);
250
251 if (ret && cfilename && function && linenr != 0
252 && is_correct_function(function, name)) {
253 *filename = cfilename;
254 *line = linenr;
255 return;
256 }
257 }
258 }
259
260
261 } // namespace anon
262
263
open_bfd(string const & file)264 bfd * open_bfd(string const & file)
265 {
266 /* bfd keeps its own reference to the filename char *,
267 * so it must have a lifetime longer than the ibfd */
268 bfd * ibfd = bfd_openr(file.c_str(), NULL);
269 if (!ibfd) {
270 cverb << vbfd << "bfd_openr failed for " << file << endl;
271 return NULL;
272 }
273
274 check_format(file, &ibfd);
275
276 return ibfd;
277 }
278
279
fdopen_bfd(string const & file,int fd)280 bfd * fdopen_bfd(string const & file, int fd)
281 {
282 /* bfd keeps its own reference to the filename char *,
283 * so it must have a lifetime longer than the ibfd */
284 bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
285 if (!ibfd) {
286 cverb << vbfd << "bfd_openr failed for " << file << endl;
287 return NULL;
288 }
289
290 check_format(file, &ibfd);
291
292 return ibfd;
293 }
294
295
find_separate_debug_file(bfd * ibfd,string const & filepath_in,string & debug_filename,extra_images const & extra)296 bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
297 string & debug_filename, extra_images const & extra)
298 {
299 string filepath(filepath_in);
300 string basename;
301 unsigned long crc32;
302
303 if (!get_debug_link_info(ibfd, basename, crc32))
304 return false;
305
306 // Work out the image file's directory prefix
307 string filedir = op_dirname(filepath);
308 // Make sure it starts with /
309 if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
310 filedir += '/';
311
312 string first_try(filedir + ".debug/" + basename);
313 string second_try(DEBUGDIR + filedir + basename);
314 string third_try(filedir + basename);
315
316 cverb << vbfd << "looking for debugging file " << basename
317 << " with crc32 = " << hex << crc32 << endl;
318
319 if (separate_debug_file_exists(first_try, crc32, extra))
320 debug_filename = first_try;
321 else if (separate_debug_file_exists(second_try, crc32, extra))
322 debug_filename = second_try;
323 else if (separate_debug_file_exists(third_try, crc32, extra))
324 debug_filename = third_try;
325 else
326 return false;
327
328 return true;
329 }
330
331
interesting_symbol(asymbol * sym)332 bool interesting_symbol(asymbol * sym)
333 {
334 // #717720 some binutils are miscompiled by gcc 2.95, one of the
335 // typical symptom can be catched here.
336 if (!sym->section) {
337 ostringstream os;
338 os << "Your version of binutils seems to have a bug.\n"
339 << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
340 throw op_runtime_error(os.str());
341 }
342
343 if (!(sym->section->flags & SEC_CODE))
344 return false;
345
346 // returning true for fix up in op_bfd_symbol()
347 if (!sym->name || sym->name[0] == '\0')
348 return true;
349 /* ARM assembler internal mapping symbols aren't interesting */
350 if ((strcmp("$a", sym->name) == 0) ||
351 (strcmp("$t", sym->name) == 0) ||
352 (strcmp("$d", sym->name) == 0))
353 return false;
354
355 // C++ exception stuff
356 if (sym->name[0] == '.' && sym->name[1] == 'L')
357 return false;
358
359 /* This case cannot be moved to boring_symbol(),
360 * because that's only used for duplicate VMAs,
361 * and sometimes this symbol appears at an address
362 * different from all other symbols.
363 */
364 if (!strcmp("gcc2_compiled.", sym->name))
365 return false;
366
367 if (sym->flags & BSF_SECTION_SYM)
368 return false;
369
370 if (!(sym->section->flags & SEC_LOAD))
371 return false;
372
373 return true;
374 }
375
376
boring_symbol(op_bfd_symbol const & first,op_bfd_symbol const & second)377 bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
378 {
379 if (first.name() == "Letext")
380 return true;
381 else if (second.name() == "Letext")
382 return false;
383
384 if (first.name().substr(0, 2) == "??")
385 return true;
386 else if (second.name().substr(0, 2) == "??")
387 return false;
388
389 if (first.hidden() && !second.hidden())
390 return true;
391 else if (!first.hidden() && second.hidden())
392 return false;
393
394 if (first.name()[0] == '_' && second.name()[0] != '_')
395 return true;
396 else if (first.name()[0] != '_' && second.name()[0] == '_')
397 return false;
398
399 if (first.weak() && !second.weak())
400 return true;
401 else if (!first.weak() && second.weak())
402 return false;
403
404 return false;
405 }
406
407
has_debug_info() const408 bool bfd_info::has_debug_info() const
409 {
410 if (!valid())
411 return false;
412
413 for (asection const * sect = abfd->sections; sect; sect = sect->next) {
414 if (sect->flags & SEC_DEBUGGING)
415 return true;
416 }
417
418 return false;
419 }
420
421
~bfd_info()422 bfd_info::~bfd_info()
423 {
424 free(synth_syms);
425 close();
426 }
427
428
close()429 void bfd_info::close()
430 {
431 if (abfd)
432 bfd_close(abfd);
433 }
434
435 /**
436 * This function is only called when processing symbols retrieved from a
437 * debuginfo file that is separate from the actual runtime binary image.
438 * Separate debuginfo files may be needed in two different cases:
439 * 1) the real image is completely stripped, where there is no symbol
440 information at all
441 * 2) the real image has debuginfo stripped, and the user is requesting "-g"
442 * (src file/line num info)
443 * After all symbols are gathered up, there will be some filtering/removal of
444 * unnecessary symbols. In particular, the bfd_info::interesting_symbol()
445 * function filters out symbols whose section's flag value does not include
446 * SEC_LOAD. This filtering is required, so it must be retained. However,
447 * we run into a problem with symbols from debuginfo files, since the
448 * section flag does NOT include SEC_LOAD. To solve this problem, the
449 * translate_debuginfo_syms function maps the debuginfo symbol's sections to
450 * that of their corresponding real image.
451 */
translate_debuginfo_syms(asymbol ** dbg_syms,long nr_dbg_syms)452 void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
453 {
454 unsigned int img_sect_cnt = 0;
455 bfd * image_bfd = image_bfd_info->abfd;
456 multimap<string, bfd_section *> image_sections;
457
458 for (bfd_section * sect = image_bfd->sections;
459 sect && img_sect_cnt < image_bfd->section_count;
460 sect = sect->next) {
461 // A comment section marks the end of the needed sections
462 if (strstr(sect->name, ".comment") == sect->name)
463 break;
464 image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
465 img_sect_cnt++;
466 }
467
468 asymbol * sym = dbg_syms[0];
469 string prev_sect_name = "";
470 bfd_section * matched_section = NULL;
471 for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
472 bool section_switch;
473
474 if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
475 section_switch = true;
476 prev_sect_name = sym->section->name;
477 } else {
478 section_switch = false;
479 }
480 if (sym->section->owner && sym->section->owner == abfd) {
481 if (section_switch ) {
482 matched_section = NULL;
483 multimap<string, bfd_section *>::iterator it;
484 pair<multimap<string, bfd_section *>::iterator,
485 multimap<string, bfd_section *>::iterator> range;
486
487 range = image_sections.equal_range(sym->section->name);
488 for (it = range.first; it != range.second; it++) {
489 if ((*it).second->vma == sym->section->vma) {
490 matched_section = (*it).second;
491 break;
492 }
493 }
494 }
495 if (matched_section) {
496 sym->section = matched_section;
497 sym->the_bfd = image_bfd;
498 }
499 }
500 }
501 }
502
503 #if SYNTHESIZE_SYMBOLS
get_synth_symbols()504 bool bfd_info::get_synth_symbols()
505 {
506 extern const bfd_target bfd_elf64_powerpc_vec;
507 extern const bfd_target bfd_elf64_powerpcle_vec;
508 bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
509 || (abfd->xvec == &bfd_elf64_powerpcle_vec);
510
511 if (!is_elf64_powerpc_target)
512 return false;
513
514 void * buf;
515 uint tmp;
516 long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
517 if (nr_mini_syms < 1)
518 return false;
519
520 asymbol ** mini_syms = (asymbol **)buf;
521 buf = NULL;
522 bfd * synth_bfd;
523
524 /* For ppc64, a debuginfo file by itself does not hold enough symbol
525 * information for us to properly attribute samples to symbols. If
526 * the image file's bfd has no symbols (as in a super-stripped library),
527 * then we need to do the extra processing in translate_debuginfo_syms.
528 */
529 if (image_bfd_info && image_bfd_info->nr_syms == 0) {
530 translate_debuginfo_syms(mini_syms, nr_mini_syms);
531 synth_bfd = image_bfd_info->abfd;
532 } else
533 synth_bfd = abfd;
534
535 long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
536 nr_mini_syms,
537 mini_syms, 0,
538 NULL, &synth_syms);
539
540 if (nr_synth_syms < 0) {
541 free(mini_syms);
542 return false;
543 }
544
545 cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
546 cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
547
548 nr_syms = nr_mini_syms + nr_synth_syms;
549 syms.reset(new asymbol *[nr_syms + 1]);
550
551 for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
552 syms[i] = mini_syms[i];
553
554
555 for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
556 syms[nr_mini_syms + i] = synth_syms + i;
557
558
559 free(mini_syms);
560
561 // bfd_canonicalize_symtab does this, so shall we
562 syms[nr_syms] = NULL;
563
564 return true;
565 }
566 #else
get_synth_symbols()567 bool bfd_info::get_synth_symbols()
568 {
569 return false;
570 }
571 #endif /* SYNTHESIZE_SYMBOLS */
572
573
get_symbols()574 void bfd_info::get_symbols()
575 {
576 if (!abfd)
577 return;
578
579 cverb << vbfd << "bfd_info::get_symbols() for "
580 << bfd_get_filename(abfd) << endl;
581
582 if (get_synth_symbols())
583 return;
584
585 if (bfd_get_file_flags(abfd) & HAS_SYMS)
586 nr_syms = bfd_get_symtab_upper_bound(abfd);
587
588 cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
589 << nr_syms << hex << endl;
590
591 nr_syms /= sizeof(asymbol *);
592
593 if (nr_syms < 1)
594 return;
595
596 syms.reset(new asymbol *[nr_syms]);
597
598 nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
599
600 if (image_bfd_info)
601 translate_debuginfo_syms(syms.get(), nr_syms);
602
603 cverb << vbfd << "bfd_canonicalize_symtab: " << dec
604 << nr_syms << hex << endl;
605 }
606
607
608 linenr_info const
find_nearest_line(bfd_info const & b,op_bfd_symbol const & sym,bfd_vma offset,bool anon_obj)609 find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
610 bfd_vma offset, bool anon_obj)
611 {
612 char const * function = "";
613 char const * cfilename = "";
614 unsigned int linenr = 0;
615 linenr_info info;
616 bfd * abfd;
617 asymbol ** syms;
618 asection * section;
619 bfd_vma pc;
620 bool ret;
621
622 if (!b.valid())
623 goto fail;
624
625 // take care about artificial symbol
626 if (!sym.symbol())
627 goto fail;
628
629 abfd = b.abfd;
630 syms = b.syms.get();
631 if (!syms)
632 goto fail;
633 section = sym.symbol()->section;
634 if (anon_obj)
635 pc = offset - sym.symbol()->section->vma;
636 else
637 pc = (sym.value() + offset) - sym.filepos();
638
639 if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
640 goto fail;
641
642 if (pc >= bfd_section_size(abfd, section))
643 goto fail;
644
645 ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
646 &function, &linenr);
647
648 if (!ret || !cfilename || !function)
649 goto fail;
650
651 /*
652 * is_correct_function does not handle the case of static inlines,
653 * but if the linenr is non-zero in the inline case, it is the correct
654 * line number.
655 */
656 if (linenr == 0 && !is_correct_function(function, sym.name()))
657 goto fail;
658
659 if (linenr == 0) {
660 fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
661 &linenr);
662 }
663
664 info.found = true;
665 info.filename = cfilename;
666 info.line = linenr;
667 return info;
668
669 fail:
670 info.found = false;
671 // some stl lacks string::clear()
672 info.filename.erase(info.filename.begin(), info.filename.end());
673 info.line = 0;
674 return info;
675 }
676