• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * @file bfd_support.cpp
3  * BFD muck we have to deal with.
4  *
5  * @remark Copyright 2005 OProfile authors
6  * @remark Read the file COPYING
7  *
8  * @author John Levon
9  */
10 
11 #include "bfd_support.h"
12 
13 #include "op_bfd.h"
14 #include "op_fileio.h"
15 #include "op_config.h"
16 #include "string_manip.h"
17 #include "file_manip.h"
18 #include "cverb.h"
19 #include "locate_images.h"
20 
21 #include <cstdlib>
22 #include <cstring>
23 #include <cassert>
24 #include <iostream>
25 #include <fstream>
26 #include <sstream>
27 #include <string>
28 #include <cstring>
29 #include <cstdlib>
30 
31 using namespace std;
32 
33 extern verbose vbfd;
34 
35 namespace {
36 
37 
check_format(string const & file,bfd ** ibfd)38 void check_format(string const & file, bfd ** ibfd)
39 {
40 	if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
41 		cverb << vbfd << "BFD format failure for " << file << endl;
42 		bfd_close(*ibfd);
43 		*ibfd = NULL;
44 	}
45 }
46 
47 
separate_debug_file_exists(string & name,unsigned long const crc,extra_images const & extra)48 bool separate_debug_file_exists(string & name, unsigned long const crc,
49                                 extra_images const & extra)
50 {
51 	unsigned long file_crc = 0;
52 	// The size of 2 * 1024 elements for the buffer is arbitrary.
53 	char buffer[2 * 1024];
54 
55 	image_error img_ok;
56 	string const image_path = extra.find_image_path(name, img_ok, true);
57 
58 	if (img_ok != image_ok)
59 		return false;
60 
61 	name = image_path;
62 
63 	ifstream file(image_path.c_str());
64 	if (!file)
65 		return false;
66 
67 	cverb << vbfd << "found " << name;
68 	while (file) {
69 		file.read(buffer, sizeof(buffer));
70 		file_crc = calc_crc32(file_crc,
71 				      reinterpret_cast<unsigned char *>(&buffer[0]),
72 				      file.gcount());
73 	}
74 	cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
75 	return crc == file_crc;
76 }
77 
78 
get_debug_link_info(bfd * ibfd,string & filename,unsigned long & crc32)79 bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
80 {
81 	asection * sect;
82 
83 	cverb << vbfd << "fetching .gnu_debuglink section" << endl;
84 	sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
85 
86 	if (sect == NULL)
87 		return false;
88 
89 	bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
90 	char contents[debuglink_size];
91 	cverb << vbfd
92 	      << ".gnu_debuglink section has size " << debuglink_size << endl;
93 
94 	if (!bfd_get_section_contents(ibfd, sect,
95 				 reinterpret_cast<unsigned char *>(contents),
96 				 static_cast<file_ptr>(0), debuglink_size)) {
97 		bfd_perror("bfd_get_section_contents:get_debug:");
98 		exit(2);
99 	}
100 
101 	/* CRC value is stored after the filename, aligned up to 4 bytes. */
102 	size_t filename_len = strlen(contents);
103 	size_t crc_offset = filename_len + 1;
104 	crc_offset = (crc_offset + 3) & ~3;
105 
106 	crc32 = bfd_get_32(ibfd,
107 			       reinterpret_cast<bfd_byte *>(contents + crc_offset));
108 	filename = string(contents, filename_len);
109 	cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
110 	return true;
111 }
112 
113 
114 /**
115  * With Objective C, we'll get strings like:
116  *
117  * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
118  *
119  * for the symbol name, and:
120  * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
121  *
122  * for the function name, so we have to do some looser matching
123  * than for other languages (unfortunately, it's not possible
124  * to demangle Objective C symbols).
125  */
objc_match(string const & sym,string const & method)126 bool objc_match(string const & sym, string const & method)
127 {
128 	if (method.length() < 3)
129 		return false;
130 
131 	string mangled;
132 
133 	if (is_prefix(method, "-[")) {
134 		mangled += "_i_";
135 	} else if (is_prefix(method, "+[")) {
136 		mangled += "_c_";
137 	} else {
138 		return false;
139 	}
140 
141 	string::const_iterator it = method.begin() + 2;
142 	string::const_iterator const end = method.end();
143 
144 	bool found_paren = false;
145 
146 	for (; it != end; ++it) {
147 		switch (*it) {
148 		case ' ':
149 			mangled += '_';
150 			if (!found_paren)
151 				mangled += '_';
152 			break;
153 		case ':':
154 			mangled += '_';
155 			break;
156 		case ')':
157 		case ']':
158 			break;
159 		case '(':
160 			found_paren = true;
161 			mangled += '_';
162 			break;
163 		default:
164 			mangled += *it;
165 		}
166 	}
167 
168 	return sym == mangled;
169 }
170 
171 
172 /*
173  * With a binary image where some objects are missing debug
174  * info, we can end up attributing to a completely different
175  * function (#484660): bfd_nearest_line() will happily move from one
176  * symbol to the nearest one it can find with debug information.
177  * To mitigate this problem, we check that the symbol name
178  * matches the returned function name.
179  *
180  * However, this check fails in some cases it shouldn't:
181  * Objective C, and C++ static inline functions (as discussed in
182  * GCC bugzilla #11774). So, we have a looser check that
183  * accepts merely a substring, plus some magic for Objective C.
184  *
185  * If even the loose check fails, then we give up.
186  */
is_correct_function(string const & function,string const & name)187 bool is_correct_function(string const & function, string const & name)
188 {
189 	if (name == function)
190 		return true;
191 
192 	if (objc_match(name, function))
193 		return true;
194 
195 	// warn the user if we had to use the loose check
196 	if (name.find(function) != string::npos) {
197 		static bool warned = false;
198 		if (!warned) {
199 			cerr << "warning: some functions compiled without "
200 			     << "debug information may have incorrect source "
201 			     << "line attributions" << endl;
202 				warned = true;
203 		}
204 		cverb << vbfd << "is_correct_function(" << function << ", "
205 		      << name << ") fuzzy match." << endl;
206 		return true;
207 	}
208 
209 	return false;
210 }
211 
212 
213 /*
214  * binutils 2.12 and below have a small bug where functions without a
215  * debug entry at the prologue start do not give a useful line number
216  * from bfd_find_nearest_line(). This can happen with certain gcc
217  * versions such as 2.95.
218  *
219  * We work around this problem by scanning forward for a vma with valid
220  * linenr info, if we can't get a valid line number.  Problem uncovered
221  * by Norbert Kaufmann. The work-around decreases, on the tincas
222  * application, the number of failure to retrieve linenr info from 835
223  * to 173. Most of the remaining are c++ inline functions mainly from
224  * the STL library. Fix #529622
225  */
fixup_linenr(bfd * abfd,asection * section,asymbol ** syms,string const & name,bfd_vma pc,char const ** filename,unsigned int * line)226 void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
227 		  string const & name, bfd_vma pc,
228                   char const ** filename, unsigned int * line)
229 {
230 	char const * cfilename;
231 	char const * function;
232 	unsigned int linenr;
233 
234 	// FIXME: looking at debug info for all gcc version shows than
235 	// the same problems can -perhaps- occur for epilog code: find a
236 	// samples files with samples in epilog and try opreport -l -g
237 	// on it, check it also with opannotate.
238 
239 	// first restrict the search on a sensible range of vma, 16 is
240 	// an intuitive value based on epilog code look
241 	size_t max_search = 16;
242 	size_t section_size = bfd_section_size(abfd, section);
243 	if (pc + max_search > section_size)
244 		max_search = section_size - pc;
245 
246 	for (size_t i = 1; i < max_search; ++i) {
247 		bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
248 						 &cfilename, &function,
249 						 &linenr);
250 
251 		if (ret && cfilename && function && linenr != 0
252 		    && is_correct_function(function, name)) {
253 			*filename = cfilename;
254 			*line = linenr;
255 			return;
256 		}
257 	}
258 }
259 
260 
261 } // namespace anon
262 
263 
open_bfd(string const & file)264 bfd * open_bfd(string const & file)
265 {
266 	/* bfd keeps its own reference to the filename char *,
267 	 * so it must have a lifetime longer than the ibfd */
268 	bfd * ibfd = bfd_openr(file.c_str(), NULL);
269 	if (!ibfd) {
270 		cverb << vbfd << "bfd_openr failed for " << file << endl;
271 		return NULL;
272 	}
273 
274 	check_format(file, &ibfd);
275 
276 	return ibfd;
277 }
278 
279 
fdopen_bfd(string const & file,int fd)280 bfd * fdopen_bfd(string const & file, int fd)
281 {
282 	/* bfd keeps its own reference to the filename char *,
283 	 * so it must have a lifetime longer than the ibfd */
284 	bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
285 	if (!ibfd) {
286 		cverb << vbfd << "bfd_openr failed for " << file << endl;
287 		return NULL;
288 	}
289 
290 	check_format(file, &ibfd);
291 
292 	return ibfd;
293 }
294 
295 
find_separate_debug_file(bfd * ibfd,string const & filepath_in,string & debug_filename,extra_images const & extra)296 bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
297                               string & debug_filename, extra_images const & extra)
298 {
299 	string filepath(filepath_in);
300 	string basename;
301 	unsigned long crc32;
302 
303 	if (!get_debug_link_info(ibfd, basename, crc32))
304 		return false;
305 
306 	// Work out the image file's directory prefix
307 	string filedir = op_dirname(filepath);
308 	// Make sure it starts with /
309 	if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
310 		filedir += '/';
311 
312 	string first_try(filedir + ".debug/" + basename);
313 	string second_try(DEBUGDIR + filedir + basename);
314 	string third_try(filedir + basename);
315 
316 	cverb << vbfd << "looking for debugging file " << basename
317 	      << " with crc32 = " << hex << crc32 << endl;
318 
319 	if (separate_debug_file_exists(first_try, crc32, extra))
320 		debug_filename = first_try;
321 	else if (separate_debug_file_exists(second_try, crc32, extra))
322 		debug_filename = second_try;
323 	else if (separate_debug_file_exists(third_try, crc32, extra))
324 		debug_filename = third_try;
325 	else
326 		return false;
327 
328 	return true;
329 }
330 
331 
interesting_symbol(asymbol * sym)332 bool interesting_symbol(asymbol * sym)
333 {
334 	// #717720 some binutils are miscompiled by gcc 2.95, one of the
335 	// typical symptom can be catched here.
336 	if (!sym->section) {
337 		ostringstream os;
338 		os << "Your version of binutils seems to have a bug.\n"
339 		   << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
340 		throw op_runtime_error(os.str());
341 	}
342 
343 	if (!(sym->section->flags & SEC_CODE))
344 		return false;
345 
346 	// returning true for fix up in op_bfd_symbol()
347 	if (!sym->name || sym->name[0] == '\0')
348 		return true;
349 	/* ARM assembler internal mapping symbols aren't interesting */
350 	if ((strcmp("$a", sym->name) == 0) ||
351 	    (strcmp("$t", sym->name) == 0) ||
352 	    (strcmp("$d", sym->name) == 0))
353 		return false;
354 
355 	// C++ exception stuff
356 	if (sym->name[0] == '.' && sym->name[1] == 'L')
357 		return false;
358 
359 	/* This case cannot be moved to boring_symbol(),
360 	 * because that's only used for duplicate VMAs,
361 	 * and sometimes this symbol appears at an address
362 	 * different from all other symbols.
363 	 */
364 	if (!strcmp("gcc2_compiled.", sym->name))
365 		return false;
366 
367         if (sym->flags & BSF_SECTION_SYM)
368                 return false;
369 
370 	if (!(sym->section->flags & SEC_LOAD))
371 		return false;
372 
373 	return true;
374 }
375 
376 
boring_symbol(op_bfd_symbol const & first,op_bfd_symbol const & second)377 bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
378 {
379 	if (first.name() == "Letext")
380 		return true;
381 	else if (second.name() == "Letext")
382 		return false;
383 
384 	if (first.name().substr(0, 2) == "??")
385 		return true;
386 	else if (second.name().substr(0, 2) == "??")
387 		return false;
388 
389 	if (first.hidden() && !second.hidden())
390 		return true;
391 	else if (!first.hidden() && second.hidden())
392 		return false;
393 
394 	if (first.name()[0] == '_' && second.name()[0] != '_')
395 		return true;
396 	else if (first.name()[0] != '_' && second.name()[0] == '_')
397 		return false;
398 
399 	if (first.weak() && !second.weak())
400 		return true;
401 	else if (!first.weak() && second.weak())
402 		return false;
403 
404 	return false;
405 }
406 
407 
has_debug_info() const408 bool bfd_info::has_debug_info() const
409 {
410 	if (!valid())
411 		return false;
412 
413 	for (asection const * sect = abfd->sections; sect; sect = sect->next) {
414 		if (sect->flags & SEC_DEBUGGING)
415 			return true;
416 	}
417 
418 	return false;
419 }
420 
421 
~bfd_info()422 bfd_info::~bfd_info()
423 {
424 	free(synth_syms);
425 	close();
426 }
427 
428 
close()429 void bfd_info::close()
430 {
431 	if (abfd)
432 		bfd_close(abfd);
433 }
434 
435 /**
436  * This function is only called when processing symbols retrieved from a
437  * debuginfo file that is separate from the actual runtime binary image.
438  * Separate debuginfo files may be needed in two different cases:
439  *   1) the real image is completely stripped, where there is no symbol
440 	information at all
441  *   2) the real image has debuginfo stripped, and the user is requesting "-g"
442  *   (src file/line num info)
443  * After all symbols are gathered up, there will be some filtering/removal of
444  * unnecessary symbols.  In particular, the bfd_info::interesting_symbol()
445  * function filters out symbols whose section's flag value does not include
446  * SEC_LOAD.  This filtering is required, so it must be retained.  However,
447  * we run into a problem with symbols from debuginfo files, since the
448  * section flag does NOT include SEC_LOAD.  To solve this problem, the
449  * translate_debuginfo_syms function maps the debuginfo symbol's sections to
450  * that of their corresponding real image.
451 */
translate_debuginfo_syms(asymbol ** dbg_syms,long nr_dbg_syms)452 void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
453 {
454 	unsigned int img_sect_cnt = 0;
455 	bfd * image_bfd = image_bfd_info->abfd;
456 	multimap<string, bfd_section *> image_sections;
457 
458 	for (bfd_section * sect = image_bfd->sections;
459 	     sect && img_sect_cnt < image_bfd->section_count;
460 	     sect = sect->next) {
461 		// A comment section marks the end of the needed sections
462 		if (strstr(sect->name, ".comment") == sect->name)
463 			break;
464 		image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
465 		img_sect_cnt++;
466 	}
467 
468 	asymbol * sym = dbg_syms[0];
469 	string prev_sect_name = "";
470 	bfd_section * matched_section = NULL;
471 	for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
472 		bool section_switch;
473 
474 		if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
475 			section_switch = true;
476 			prev_sect_name = sym->section->name;
477 		} else {
478 			section_switch = false;
479 		}
480 		if (sym->section->owner && sym->section->owner == abfd) {
481 			if (section_switch ) {
482 				matched_section = NULL;
483 				multimap<string, bfd_section *>::iterator it;
484 				pair<multimap<string, bfd_section *>::iterator,
485 				     multimap<string, bfd_section *>::iterator> range;
486 
487 				range = image_sections.equal_range(sym->section->name);
488 				for (it = range.first; it != range.second; it++) {
489 					if ((*it).second->vma == sym->section->vma) {
490 						matched_section = (*it).second;
491 						break;
492 					}
493 				}
494 			}
495 			if (matched_section) {
496 				sym->section = matched_section;
497 				sym->the_bfd = image_bfd;
498 			}
499 		}
500 	}
501 }
502 
503 #if SYNTHESIZE_SYMBOLS
get_synth_symbols()504 bool bfd_info::get_synth_symbols()
505 {
506 	extern const bfd_target bfd_elf64_powerpc_vec;
507 	extern const bfd_target bfd_elf64_powerpcle_vec;
508 	bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
509 		|| (abfd->xvec == &bfd_elf64_powerpcle_vec);
510 
511 	if (!is_elf64_powerpc_target)
512 		return false;
513 
514 	void * buf;
515 	uint tmp;
516 	long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
517 	if (nr_mini_syms < 1)
518 		return false;
519 
520 	asymbol ** mini_syms = (asymbol **)buf;
521 	buf = NULL;
522 	bfd * synth_bfd;
523 
524 	/* For ppc64, a debuginfo file by itself does not hold enough symbol
525 	 * information for us to properly attribute samples to symbols.  If
526 	 * the image file's bfd has no symbols (as in a super-stripped library),
527 	 * then we need to do the extra processing in translate_debuginfo_syms.
528 	 */
529 	if (image_bfd_info && image_bfd_info->nr_syms == 0) {
530 		translate_debuginfo_syms(mini_syms, nr_mini_syms);
531 		synth_bfd = image_bfd_info->abfd;
532 	} else
533 		synth_bfd = abfd;
534 
535 	long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
536 	                                              nr_mini_syms,
537 	                                              mini_syms, 0,
538 	                                              NULL, &synth_syms);
539 
540 	if (nr_synth_syms < 0) {
541 		free(mini_syms);
542 		return false;
543 	}
544 
545 	cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
546 	cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
547 
548 	nr_syms = nr_mini_syms + nr_synth_syms;
549 	syms.reset(new asymbol *[nr_syms + 1]);
550 
551 	for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
552 		syms[i] = mini_syms[i];
553 
554 
555 	for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
556 		syms[nr_mini_syms + i] = synth_syms + i;
557 
558 
559 	free(mini_syms);
560 
561 	// bfd_canonicalize_symtab does this, so shall we
562 	syms[nr_syms] = NULL;
563 
564 	return true;
565 }
566 #else
get_synth_symbols()567 bool bfd_info::get_synth_symbols()
568 {
569 	return false;
570 }
571 #endif /* SYNTHESIZE_SYMBOLS */
572 
573 
get_symbols()574 void bfd_info::get_symbols()
575 {
576 	if (!abfd)
577 		return;
578 
579 	cverb << vbfd << "bfd_info::get_symbols() for "
580 	      << bfd_get_filename(abfd) << endl;
581 
582 	if (get_synth_symbols())
583 		return;
584 
585 	if (bfd_get_file_flags(abfd) & HAS_SYMS)
586 		nr_syms = bfd_get_symtab_upper_bound(abfd);
587 
588 	cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
589 	      << nr_syms << hex << endl;
590 
591 	nr_syms /= sizeof(asymbol *);
592 
593 	if (nr_syms < 1)
594 		return;
595 
596 	syms.reset(new asymbol *[nr_syms]);
597 
598 	nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
599 
600 	if (image_bfd_info)
601 		translate_debuginfo_syms(syms.get(), nr_syms);
602 
603 	cverb << vbfd << "bfd_canonicalize_symtab: " << dec
604 	      << nr_syms << hex << endl;
605 }
606 
607 
608 linenr_info const
find_nearest_line(bfd_info const & b,op_bfd_symbol const & sym,bfd_vma offset,bool anon_obj)609 find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
610                   bfd_vma offset, bool anon_obj)
611 {
612 	char const * function = "";
613 	char const * cfilename = "";
614 	unsigned int linenr = 0;
615 	linenr_info info;
616 	bfd * abfd;
617 	asymbol ** syms;
618 	asection * section;
619 	bfd_vma pc;
620 	bool ret;
621 
622 	if (!b.valid())
623 		goto fail;
624 
625 	// take care about artificial symbol
626 	if (!sym.symbol())
627 		goto fail;
628 
629 	abfd = b.abfd;
630 	syms = b.syms.get();
631 	if (!syms)
632 		goto fail;
633 	section = sym.symbol()->section;
634 	if (anon_obj)
635 		pc = offset - sym.symbol()->section->vma;
636 	else
637 		pc = (sym.value() + offset) - sym.filepos();
638 
639 	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
640 		goto fail;
641 
642 	if (pc >= bfd_section_size(abfd, section))
643 		goto fail;
644 
645 	ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
646 	                                 &function, &linenr);
647 
648 	if (!ret || !cfilename || !function)
649 		goto fail;
650 
651 	/*
652 	 * is_correct_function does not handle the case of static inlines,
653 	 * but if the linenr is non-zero in the inline case, it is the correct
654 	 * line number.
655 	 */
656 	if (linenr == 0 && !is_correct_function(function, sym.name()))
657 		goto fail;
658 
659 	if (linenr == 0) {
660 		fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
661 		             &linenr);
662 	}
663 
664 	info.found = true;
665 	info.filename = cfilename;
666 	info.line = linenr;
667 	return info;
668 
669 fail:
670 	info.found = false;
671 	// some stl lacks string::clear()
672 	info.filename.erase(info.filename.begin(), info.filename.end());
673 	info.line = 0;
674 	return info;
675 }
676