1 /*
2 * Copyright (c) 2016 GitHub, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/mman.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include <string.h>
23 #include <libgen.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <limits.h>
27 #ifdef HAVE_LIBDEBUGINFOD
28 #include <elfutils/debuginfod.h>
29 #endif
30
31 #include <gelf.h>
32 #include "bcc_elf.h"
33 #include "bcc_proc.h"
34 #include "bcc_syms.h"
35
36 #define NT_STAPSDT 3
37 #define ELF_ST_TYPE(x) (((uint32_t) x) & 0xf)
38
openelf_fd(int fd,Elf ** elf_out)39 static int openelf_fd(int fd, Elf **elf_out) {
40 if (elf_version(EV_CURRENT) == EV_NONE)
41 return -1;
42
43 *elf_out = elf_begin(fd, ELF_C_READ, 0);
44 if (*elf_out == NULL)
45 return -1;
46
47 return 0;
48 }
49
openelf(const char * path,Elf ** elf_out,int * fd_out)50 static int openelf(const char *path, Elf **elf_out, int *fd_out) {
51 *fd_out = open(path, O_RDONLY);
52 if (*fd_out < 0)
53 return -1;
54
55 if (openelf_fd(*fd_out, elf_out) == -1) {
56 close(*fd_out);
57 return -1;
58 }
59
60 return 0;
61 }
62
parse_stapsdt_note(struct bcc_elf_usdt * probe,GElf_Shdr * probes_shdr,const char * desc,int elf_class)63 static const char *parse_stapsdt_note(struct bcc_elf_usdt *probe,
64 GElf_Shdr *probes_shdr,
65 const char *desc, int elf_class) {
66 if (elf_class == ELFCLASS32) {
67 probe->pc = *((uint32_t *)(desc));
68 probe->base_addr = *((uint32_t *)(desc + 4));
69 probe->semaphore = *((uint32_t *)(desc + 8));
70 desc = desc + 12;
71 } else {
72 probe->pc = *((uint64_t *)(desc));
73 probe->base_addr = *((uint64_t *)(desc + 8));
74 probe->semaphore = *((uint64_t *)(desc + 16));
75 desc = desc + 24;
76 }
77
78 // Offset from start of file
79 if (probe->semaphore && probes_shdr)
80 probe->semaphore_offset =
81 probe->semaphore - probes_shdr->sh_addr + probes_shdr->sh_offset;
82 else
83 probe->semaphore_offset = 0;
84
85 probe->provider = desc;
86 desc += strlen(desc) + 1;
87
88 probe->name = desc;
89 desc += strlen(desc) + 1;
90
91 probe->arg_fmt = desc;
92 desc += strlen(desc) + 1;
93
94 return desc;
95 }
96
do_note_segment(Elf_Scn * section,GElf_Shdr * probes_shdr,int elf_class,bcc_elf_probecb callback,const char * binpath,uint64_t first_inst_offset,void * payload)97 static int do_note_segment(Elf_Scn *section, GElf_Shdr *probes_shdr, int elf_class,
98 bcc_elf_probecb callback, const char *binpath,
99 uint64_t first_inst_offset, void *payload) {
100 Elf_Data *data = NULL;
101
102 while ((data = elf_getdata(section, data)) != 0) {
103 size_t offset = 0;
104 GElf_Nhdr hdr;
105 size_t name_off, desc_off;
106
107 while ((offset = gelf_getnote(data, offset, &hdr, &name_off, &desc_off)) !=
108 0) {
109 const char *desc, *desc_end;
110 struct bcc_elf_usdt probe;
111
112 if (hdr.n_type != NT_STAPSDT)
113 continue;
114
115 if (hdr.n_namesz != 8)
116 continue;
117
118 if (memcmp((const char *)data->d_buf + name_off, "stapsdt", 8) != 0)
119 continue;
120
121 desc = (const char *)data->d_buf + desc_off;
122 desc_end = desc + hdr.n_descsz;
123
124 if (parse_stapsdt_note(&probe, probes_shdr, desc, elf_class) == desc_end) {
125 if (probe.pc < first_inst_offset)
126 fprintf(stderr,
127 "WARNING: invalid address 0x%lx for probe (%s,%s) in binary %s\n",
128 probe.pc, probe.provider, probe.name, binpath);
129 else
130 callback(binpath, &probe, payload);
131 }
132 }
133 }
134 return 0;
135 }
136
listprobes(Elf * e,bcc_elf_probecb callback,const char * binpath,void * payload)137 static int listprobes(Elf *e, bcc_elf_probecb callback, const char *binpath,
138 void *payload) {
139 Elf_Scn *section = NULL;
140 bool found_probes_shdr;
141 size_t stridx;
142 int elf_class = gelf_getclass(e);
143 uint64_t first_inst_offset = 0;
144 GElf_Shdr probes_shdr = {};
145
146 if (elf_getshdrstrndx(e, &stridx) != 0)
147 return -1;
148
149 // Get the offset to the first instruction
150 while ((section = elf_nextscn(e, section)) != 0) {
151 GElf_Shdr header;
152
153 if (!gelf_getshdr(section, &header))
154 continue;
155
156 // The elf file section layout is based on increasing virtual address,
157 // getting the first section with SHF_EXECINSTR is enough.
158 if (header.sh_flags & SHF_EXECINSTR) {
159 first_inst_offset = header.sh_addr;
160 break;
161 }
162 }
163
164 found_probes_shdr = false;
165 while ((section = elf_nextscn(e, section)) != 0) {
166 if (!gelf_getshdr(section, &probes_shdr))
167 continue;
168
169 char *name = elf_strptr(e, stridx, probes_shdr.sh_name);
170 if (name && !strcmp(name, ".probes")) {
171 found_probes_shdr = true;
172 break;
173 }
174 }
175
176 while ((section = elf_nextscn(e, section)) != 0) {
177 GElf_Shdr header;
178 char *name;
179
180 if (!gelf_getshdr(section, &header))
181 continue;
182
183 if (header.sh_type != SHT_NOTE)
184 continue;
185
186 name = elf_strptr(e, stridx, header.sh_name);
187 if (name && !strcmp(name, ".note.stapsdt")) {
188 GElf_Shdr *shdr_ptr = found_probes_shdr ? &probes_shdr : NULL;
189 if (do_note_segment(section, shdr_ptr, elf_class, callback, binpath,
190 first_inst_offset, payload) < 0)
191 return -1;
192 }
193 }
194
195 return 0;
196 }
197
bcc_elf_foreach_usdt(const char * path,bcc_elf_probecb callback,void * payload)198 int bcc_elf_foreach_usdt(const char *path, bcc_elf_probecb callback,
199 void *payload) {
200 Elf *e;
201 int fd, res;
202
203 if (openelf(path, &e, &fd) < 0)
204 return -1;
205
206 res = listprobes(e, callback, path, payload);
207 elf_end(e);
208 close(fd);
209
210 return res;
211 }
212
get_section(Elf * e,const char * section_name,GElf_Shdr * section_hdr,size_t * section_idx)213 static Elf_Scn * get_section(Elf *e, const char *section_name,
214 GElf_Shdr *section_hdr, size_t *section_idx) {
215 Elf_Scn *section = NULL;
216 GElf_Shdr header;
217 char *name;
218
219 size_t stridx;
220 if (elf_getshdrstrndx(e, &stridx) != 0)
221 return NULL;
222
223 size_t index;
224 for (index = 1; (section = elf_nextscn(e, section)) != 0; index++) {
225 if (!gelf_getshdr(section, &header))
226 continue;
227
228 name = elf_strptr(e, stridx, header.sh_name);
229 if (name && !strcmp(name, section_name)) {
230 if (section_hdr)
231 *section_hdr = header;
232 if (section_idx)
233 *section_idx = index;
234 return section;
235 }
236 }
237
238 return NULL;
239 }
240
list_in_scn(Elf * e,Elf_Scn * section,size_t stridx,size_t symsize,struct bcc_symbol_option * option,bcc_elf_symcb callback,bcc_elf_symcb_lazy callback_lazy,void * payload,bool debugfile)241 static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
242 struct bcc_symbol_option *option,
243 bcc_elf_symcb callback, bcc_elf_symcb_lazy callback_lazy,
244 void *payload, bool debugfile) {
245 Elf_Data *data = NULL;
246
247 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
248 size_t opdidx = 0;
249 Elf_Scn *opdsec = NULL;
250 GElf_Shdr opdshdr = {};
251 Elf_Data *opddata = NULL;
252
253 opdsec = get_section(e, ".opd", &opdshdr, &opdidx);
254 if (opdsec && opdshdr.sh_type == SHT_PROGBITS)
255 opddata = elf_getdata(opdsec, NULL);
256 #endif
257
258 while ((data = elf_getdata(section, data)) != 0) {
259 size_t i, symcount = data->d_size / symsize;
260
261 if (data->d_size % symsize)
262 return -1;
263
264 for (i = 0; i < symcount; ++i) {
265 GElf_Sym sym;
266 const char *name;
267 size_t name_len;
268
269 if (!gelf_getsym(data, (int)i, &sym))
270 continue;
271
272 if ((name = elf_strptr(e, stridx, sym.st_name)) == NULL)
273 continue;
274 if (name[0] == 0)
275 continue;
276 name_len = strlen(name);
277
278 if (sym.st_value == 0)
279 continue;
280
281 uint32_t st_type = ELF_ST_TYPE(sym.st_info);
282 if (!(option->use_symbol_type & (1 << st_type)))
283 continue;
284
285 #ifdef __powerpc64__
286 #if defined(_CALL_ELF) && _CALL_ELF == 2
287 if (option->use_symbol_type & (1 << STT_PPC64_ELFV2_SYM_LEP)) {
288 /*
289 * The PowerPC 64-bit ELF v2 ABI says that the 3 most significant bits
290 * in the st_other field of the symbol table specifies the number of
291 * instructions between a function's Global Entry Point (GEP) and Local
292 * Entry Point (LEP).
293 */
294 switch (sym.st_other >> 5) {
295 /* GEP and LEP are the same for 0 or 1, usage is reserved for 7 */
296 /* If 2, LEP is 1 instruction past the GEP */
297 case 2: sym.st_value += 4; break;
298 /* If 3, LEP is 2 instructions past the GEP */
299 case 3: sym.st_value += 8; break;
300 /* If 4, LEP is 4 instructions past the GEP */
301 case 4: sym.st_value += 16; break;
302 /* If 5, LEP is 8 instructions past the GEP */
303 case 5: sym.st_value += 32; break;
304 /* If 6, LEP is 16 instructions past the GEP */
305 case 6: sym.st_value += 64; break;
306 }
307 }
308 #else
309 if (opddata && sym.st_shndx == opdidx) {
310 size_t offset = sym.st_value - opdshdr.sh_addr;
311 /* Find the function descriptor */
312 uint64_t *descr = opddata->d_buf + offset;
313 /* Read the actual entry point address from the descriptor */
314 sym.st_value = *descr;
315 }
316 #endif
317 #endif
318
319 int ret;
320 if (option->lazy_symbolize)
321 ret = callback_lazy(stridx, sym.st_name, name_len, sym.st_value,
322 sym.st_size, debugfile, payload);
323 else
324 ret = callback(name, sym.st_value, sym.st_size, payload);
325 if (ret < 0)
326 return 1; // signal termination to caller
327 }
328 }
329
330 return 0;
331 }
332
listsymbols(Elf * e,bcc_elf_symcb callback,bcc_elf_symcb_lazy callback_lazy,void * payload,struct bcc_symbol_option * option,bool debugfile)333 static int listsymbols(Elf *e, bcc_elf_symcb callback,
334 bcc_elf_symcb_lazy callback_lazy, void *payload,
335 struct bcc_symbol_option *option, bool debugfile) {
336 Elf_Scn *section = NULL;
337
338 while ((section = elf_nextscn(e, section)) != 0) {
339 GElf_Shdr header;
340
341 if (!gelf_getshdr(section, &header))
342 continue;
343
344 if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
345 continue;
346
347 int rc = list_in_scn(e, section, header.sh_link, header.sh_entsize,
348 option, callback, callback_lazy, payload, debugfile);
349 if (rc == 1)
350 break; // callback signaled termination
351
352 if (rc < 0)
353 return rc;
354 }
355
356 return 0;
357 }
358
get_section_elf_data(Elf * e,const char * section_name)359 static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) {
360 Elf_Scn *section = get_section(e, section_name, NULL, NULL);
361 if (section)
362 return elf_getdata(section, NULL);
363 return NULL;
364 }
365
find_debuglink(Elf * e,char ** debug_file,unsigned int * crc)366 static int find_debuglink(Elf *e, char **debug_file, unsigned int *crc) {
367 Elf_Data *data = NULL;
368
369 *debug_file = NULL;
370 *crc = 0;
371
372 data = get_section_elf_data(e, ".gnu_debuglink");
373 if (!data || data->d_size <= 5)
374 return 0;
375
376 *debug_file = (char *)data->d_buf;
377 *crc = *(unsigned int*)((char *)data->d_buf + data->d_size - 4);
378
379 return *debug_file ? 1 : 0;
380 }
381
find_buildid(Elf * e,char * buildid)382 static int find_buildid(Elf *e, char *buildid) {
383 Elf_Data *data = get_section_elf_data(e, ".note.gnu.build-id");
384 if (!data || data->d_size <= 16 || strcmp((char *)data->d_buf + 12, "GNU"))
385 return 0;
386
387 char *buf = (char *)data->d_buf + 16;
388 size_t length = data->d_size - 16;
389 size_t i = 0;
390 for (i = 0; i < length; ++i) {
391 sprintf(buildid + (i * 2), "%02hhx", buf[i]);
392 }
393
394 return 1;
395 }
396
397 // The CRC algorithm used by GNU debuglink. Taken from:
398 // https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
gnu_debuglink_crc32(unsigned int crc,char * buf,size_t len)399 static unsigned int gnu_debuglink_crc32(unsigned int crc,
400 char *buf, size_t len) {
401 static const unsigned int crc32_table[256] =
402 {
403 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
404 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
405 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
406 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
407 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
408 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
409 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
410 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
411 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
412 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
413 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
414 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
415 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
416 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
417 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
418 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
419 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
420 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
421 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
422 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
423 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
424 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
425 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
426 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
427 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
428 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
429 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
430 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
431 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
432 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
433 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
434 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
435 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
436 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
437 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
438 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
439 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
440 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
441 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
442 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
443 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
444 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
445 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
446 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
447 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
448 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
449 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
450 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
451 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
452 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
453 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
454 0x2d02ef8d
455 };
456 char *end;
457
458 crc = ~crc & 0xffffffff;
459 for (end = buf + len; buf < end; ++buf)
460 crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
461 return ~crc & 0xffffffff;
462 }
463
verify_checksum(const char * file,unsigned int crc)464 static int verify_checksum(const char *file, unsigned int crc) {
465 struct stat st;
466 int fd;
467 void *buf;
468 unsigned int actual;
469
470 fd = open(file, O_RDONLY);
471 if (fd < 0)
472 return 0;
473
474 if (fstat(fd, &st) < 0) {
475 close(fd);
476 return 0;
477 }
478
479 buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
480 if (!buf) {
481 close(fd);
482 return 0;
483 }
484
485 actual = gnu_debuglink_crc32(0, buf, st.st_size);
486
487 munmap(buf, st.st_size);
488 close(fd);
489 return actual == crc;
490 }
491
492 // Check if two filenames point to the same file, including hard or soft links.
same_file(char * a,const char * b)493 static bool same_file(char *a, const char *b)
494 {
495 struct stat stat_a, stat_b;
496
497 if (stat(a, &stat_a) || stat(b, &stat_b))
498 return false;
499
500 if ((stat_a.st_dev == stat_b.st_dev) &&
501 (stat_a.st_ino == stat_b.st_ino))
502 return true;
503 else
504 return false;
505 }
506
find_debug_via_debuglink(Elf * e,const char * binpath,int check_crc)507 static char *find_debug_via_debuglink(Elf *e, const char *binpath,
508 int check_crc) {
509 char fullpath[PATH_MAX];
510 char *tmppath;
511 char *bindir = NULL;
512 char *res = NULL;
513 unsigned int crc;
514 char *name; // the name of the debuginfo file
515
516 if (!find_debuglink(e, &name, &crc))
517 return NULL;
518
519 tmppath = strdup(binpath);
520 bindir = dirname(tmppath);
521
522 // Search for the file in 'binpath', but ignore the file we find if it
523 // matches the binary itself: the binary will always be probed later on,
524 // and it might contain poorer symbols (e.g. stripped or partial symbols)
525 // than the external debuginfo that might be available elsewhere.
526 snprintf(fullpath, sizeof(fullpath),"%s/%s", bindir, name);
527 if (same_file(fullpath, binpath) != true && access(fullpath, F_OK) != -1) {
528 res = strdup(fullpath);
529 goto DONE;
530 }
531
532 // Search for the file in 'binpath'/.debug
533 snprintf(fullpath, sizeof(fullpath), "%s/.debug/%s", bindir, name);
534 if (access(fullpath, F_OK) != -1) {
535 res = strdup(fullpath);
536 goto DONE;
537 }
538
539 // Search for the file in the global debug directory /usr/lib/debug/'binpath'
540 snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug%s/%s", bindir, name);
541 if (access(fullpath, F_OK) != -1) {
542 res = strdup(fullpath);
543 goto DONE;
544 }
545
546 DONE:
547 free(tmppath);
548 if (res && check_crc && !verify_checksum(res, crc)) {
549 free(res);
550 return NULL;
551 }
552 return res;
553 }
554
find_debug_via_buildid(Elf * e)555 static char *find_debug_via_buildid(Elf *e) {
556 char fullpath[PATH_MAX];
557 char buildid[128]; // currently 40 seems to be default, let's be safe
558
559 if (!find_buildid(e, buildid))
560 return NULL;
561
562 // Search for the file in the global debug directory with a sub-path:
563 // mm/nnnnnn...nnnn.debug
564 // Where mm are the first two characters of the buildid, and nnnn are the
565 // rest of the build id, followed by .debug.
566 snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug/.build-id/%c%c/%s.debug",
567 buildid[0], buildid[1], buildid + 2);
568 if (access(fullpath, F_OK) != -1) {
569 return strdup(fullpath);
570 }
571
572 return NULL;
573 }
574
find_debug_via_symfs(Elf * e,const char * path)575 static char *find_debug_via_symfs(Elf *e, const char* path) {
576 char fullpath[PATH_MAX];
577 char buildid[128];
578 char symfs_buildid[128];
579 int check_build_id;
580 char *symfs;
581 Elf *symfs_e = NULL;
582 int symfs_fd = -1;
583 char *result = NULL;
584
585 symfs = getenv("BCC_SYMFS");
586 if (!symfs || !*symfs)
587 goto out;
588
589 check_build_id = find_buildid(e, buildid);
590
591 int ns_prefix_length = 0;
592 sscanf(path, "/proc/%*u/root/%n", &ns_prefix_length);
593 path += ns_prefix_length;
594
595 snprintf(fullpath, sizeof(fullpath), "%s/%s", symfs, path);
596 if (access(fullpath, F_OK) == -1)
597 goto out;
598
599 if (openelf(fullpath, &symfs_e, &symfs_fd) < 0) {
600 symfs_e = NULL;
601 symfs_fd = -1;
602 goto out;
603 }
604
605 if (check_build_id) {
606 if (!find_buildid(symfs_e, symfs_buildid))
607 goto out;
608
609 if (strncmp(buildid, symfs_buildid, sizeof(buildid)))
610 goto out;
611 }
612
613 result = strdup(fullpath);
614
615 out:
616 if (symfs_e) {
617 elf_end(symfs_e);
618 }
619
620 if (symfs_fd != -1) {
621 close(symfs_fd);
622 }
623
624 return result;
625 }
626
627 #ifdef HAVE_LIBDEBUGINFOD
find_debug_via_debuginfod(Elf * e)628 static char *find_debug_via_debuginfod(Elf *e){
629 char buildid[128];
630 char *debugpath = NULL;
631 int fd = -1;
632
633 if (!find_buildid(e, buildid))
634 return NULL;
635
636 debuginfod_client *client = debuginfod_begin();
637 if (!client)
638 return NULL;
639
640 // In case of an error, the function returns a negative error code and
641 // debugpath stays NULL.
642 fd = debuginfod_find_debuginfo(client, (const unsigned char *) buildid, 0,
643 &debugpath);
644 if (fd >= 0)
645 close(fd);
646
647 debuginfod_end(client);
648 return debugpath;
649 }
650 #endif
651
find_debug_file(Elf * e,const char * path,int check_crc)652 static char *find_debug_file(Elf* e, const char* path, int check_crc) {
653 char *debug_file = NULL;
654
655 // If there is a separate debuginfo file, try to locate and read it, first
656 // using symfs, then using the build-id section, finally using the debuglink
657 // section. These rules are what perf and gdb follow.
658 // See:
659 // - https://github.com/torvalds/linux/blob/v5.2/tools/perf/Documentation/perf-report.txt#L325
660 // - https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
661 debug_file = find_debug_via_symfs(e, path);
662 if (!debug_file)
663 debug_file = find_debug_via_buildid(e);
664 if (!debug_file)
665 debug_file = find_debug_via_debuglink(e, path, check_crc);
666 #ifdef HAVE_LIBDEBUGINFOD
667 if (!debug_file)
668 debug_file = find_debug_via_debuginfod(e);
669 #endif
670
671 return debug_file;
672 }
673
674
foreach_sym_core(const char * path,bcc_elf_symcb callback,bcc_elf_symcb_lazy callback_lazy,struct bcc_symbol_option * option,void * payload,int is_debug_file)675 static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
676 bcc_elf_symcb_lazy callback_lazy,
677 struct bcc_symbol_option *option, void *payload,
678 int is_debug_file) {
679 Elf *e;
680 int fd, res;
681 char *debug_file;
682
683 if (!option)
684 return -1;
685
686 if (openelf(path, &e, &fd) < 0)
687 return -1;
688
689 if (option->use_debug_file && !is_debug_file) {
690 // The is_debug_file argument helps avoid infinitely resolving debuginfo
691 // files for debuginfo files and so on.
692 debug_file = find_debug_file(e, path,
693 option->check_debug_file_crc);
694 if (debug_file) {
695 foreach_sym_core(debug_file, callback, callback_lazy, option, payload, 1);
696 free(debug_file);
697 }
698 }
699
700 res = listsymbols(e, callback, callback_lazy, payload, option, is_debug_file);
701 elf_end(e);
702 close(fd);
703 return res;
704 }
705
bcc_elf_foreach_sym(const char * path,bcc_elf_symcb callback,void * option,void * payload)706 int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
707 void *option, void *payload) {
708 struct bcc_symbol_option *o = option;
709 o->lazy_symbolize = 0;
710 return foreach_sym_core(path, callback, NULL, o, payload, 0);
711 }
712
bcc_elf_foreach_sym_lazy(const char * path,bcc_elf_symcb_lazy callback,void * option,void * payload)713 int bcc_elf_foreach_sym_lazy(const char *path, bcc_elf_symcb_lazy callback,
714 void *option, void *payload) {
715 struct bcc_symbol_option *o = option;
716 o->lazy_symbolize = 1;
717 return foreach_sym_core(path, NULL, callback, o, payload, 0);
718 }
719
bcc_elf_get_text_scn_info(const char * path,uint64_t * addr,uint64_t * offset)720 int bcc_elf_get_text_scn_info(const char *path, uint64_t *addr,
721 uint64_t *offset) {
722 Elf *e = NULL;
723 int fd = -1, err;
724 Elf_Scn *section = NULL;
725 GElf_Shdr header;
726 size_t stridx;
727 char *name;
728
729 if ((err = openelf(path, &e, &fd)) < 0 ||
730 (err = elf_getshdrstrndx(e, &stridx)) < 0)
731 goto exit;
732
733 err = -1;
734 while ((section = elf_nextscn(e, section)) != 0) {
735 if (!gelf_getshdr(section, &header))
736 continue;
737
738 name = elf_strptr(e, stridx, header.sh_name);
739 if (name && !strcmp(name, ".text")) {
740 *addr = (uint64_t)header.sh_addr;
741 *offset = (uint64_t)header.sh_offset;
742 err = 0;
743 break;
744 }
745 }
746
747 exit:
748 if (e)
749 elf_end(e);
750 if (fd >= 0)
751 close(fd);
752 return err;
753 }
754
bcc_elf_foreach_load_section(const char * path,bcc_elf_load_sectioncb callback,void * payload)755 int bcc_elf_foreach_load_section(const char *path,
756 bcc_elf_load_sectioncb callback,
757 void *payload) {
758 Elf *e = NULL;
759 int fd = -1, err = -1, res;
760 size_t nhdrs, i;
761
762 if (openelf(path, &e, &fd) < 0)
763 goto exit;
764
765 if (elf_getphdrnum(e, &nhdrs) != 0)
766 goto exit;
767
768 GElf_Phdr header;
769 for (i = 0; i < nhdrs; i++) {
770 if (!gelf_getphdr(e, (int)i, &header))
771 continue;
772 if (header.p_type != PT_LOAD || !(header.p_flags & PF_X))
773 continue;
774 res = callback(header.p_vaddr, header.p_memsz, header.p_offset, payload);
775 if (res < 0) {
776 err = 1;
777 goto exit;
778 }
779 }
780 err = 0;
781
782 exit:
783 if (e)
784 elf_end(e);
785 if (fd >= 0)
786 close(fd);
787 return err;
788 }
789
bcc_elf_get_type(const char * path)790 int bcc_elf_get_type(const char *path) {
791 Elf *e;
792 GElf_Ehdr hdr;
793 int fd;
794 void* res = NULL;
795
796 if (openelf(path, &e, &fd) < 0)
797 return -1;
798
799 res = (void*)gelf_getehdr(e, &hdr);
800 elf_end(e);
801 close(fd);
802
803 if (!res)
804 return -1;
805 else
806 return hdr.e_type;
807 }
808
bcc_elf_is_exe(const char * path)809 int bcc_elf_is_exe(const char *path) {
810 return (bcc_elf_get_type(path) != -1) && (access(path, X_OK) == 0);
811 }
812
bcc_elf_is_shared_obj(const char * path)813 int bcc_elf_is_shared_obj(const char *path) {
814 return bcc_elf_get_type(path) == ET_DYN;
815 }
816
bcc_elf_is_vdso(const char * name)817 int bcc_elf_is_vdso(const char *name) {
818 return strcmp(name, "[vdso]") == 0;
819 }
820
821 // -2: Failed
822 // -1: Not initialized
823 // >0: Initialized
824 static int vdso_image_fd = -1;
825
find_vdso(struct mod_info * info,int enter_ns,void * payload)826 static int find_vdso(struct mod_info *info, int enter_ns, void *payload) {
827 int fd;
828 char tmpfile[128];
829 if (!bcc_elf_is_vdso(info->name))
830 return 0;
831
832 uint64_t sz = info->end_addr - info->start_addr;
833 void *image = malloc(sz);
834 if (!image)
835 goto on_error;
836 memcpy(image, (void *)info->start_addr, sz);
837
838 snprintf(tmpfile, sizeof(tmpfile), "/tmp/bcc_%d_vdso_image_XXXXXX", getpid());
839 fd = mkostemp(tmpfile, O_CLOEXEC);
840 if (fd < 0) {
841 fprintf(stderr, "Unable to create temp file: %s\n", strerror(errno));
842 goto on_error;
843 }
844 // Unlink the file to avoid leaking
845 if (unlink(tmpfile) == -1)
846 fprintf(stderr, "Unlink %s failed: %s\n", tmpfile, strerror(errno));
847
848 if (write(fd, image, sz) == -1) {
849 fprintf(stderr, "Failed to write to vDSO image: %s\n", strerror(errno));
850 close(fd);
851 goto on_error;
852 }
853 vdso_image_fd = fd;
854
855 on_error:
856 if (image)
857 free(image);
858 // Always stop the iteration
859 return -1;
860 }
861
bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback,void * payload)862 int bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback, void *payload) {
863 Elf *elf;
864 static struct bcc_symbol_option default_option = {
865 .use_debug_file = 0,
866 .check_debug_file_crc = 0,
867 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
868 };
869
870 if (vdso_image_fd == -1) {
871 vdso_image_fd = -2;
872 bcc_procutils_each_module(getpid(), &find_vdso, NULL);
873 }
874 if (vdso_image_fd == -2)
875 return -1;
876
877 if (openelf_fd(vdso_image_fd, &elf) == -1)
878 return -1;
879
880 return listsymbols(elf, callback, NULL, payload, &default_option, 0);
881 }
882
883 // return value: 0 : success
884 // < 0 : error and no bcc lib found
885 // > 0 : error and bcc lib found
bcc_free_memory_with_file(const char * path)886 static int bcc_free_memory_with_file(const char *path) {
887 unsigned long sym_addr = 0, sym_shndx;
888 Elf_Scn *section = NULL;
889 int fd = -1, err;
890 GElf_Shdr header;
891 Elf *e = NULL;
892
893 if ((err = openelf(path, &e, &fd)) < 0)
894 goto exit;
895
896 // get symbol address of "bcc_free_memory", which
897 // will be used to calculate runtime .text address
898 // range, esp. for shared libraries.
899 err = -1;
900 while ((section = elf_nextscn(e, section)) != 0) {
901 Elf_Data *data = NULL;
902 size_t symsize;
903
904 if (!gelf_getshdr(section, &header))
905 continue;
906
907 if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
908 continue;
909
910 /* iterate all symbols */
911 symsize = header.sh_entsize;
912 while ((data = elf_getdata(section, data)) != 0) {
913 size_t i, symcount = data->d_size / symsize;
914
915 for (i = 0; i < symcount; ++i) {
916 GElf_Sym sym;
917
918 if (!gelf_getsym(data, (int)i, &sym))
919 continue;
920
921 if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
922 continue;
923
924 const char *name;
925 if ((name = elf_strptr(e, header.sh_link, sym.st_name)) == NULL)
926 continue;
927
928 if (strcmp(name, "bcc_free_memory") == 0) {
929 sym_addr = sym.st_value;
930 sym_shndx = sym.st_shndx;
931 break;
932 }
933 }
934 }
935 }
936
937 // Didn't find bcc_free_memory in the ELF file.
938 if (sym_addr == 0)
939 goto exit;
940
941 int sh_idx = 0;
942 section = NULL;
943 err = 1;
944 while ((section = elf_nextscn(e, section)) != 0) {
945 sh_idx++;
946 if (!gelf_getshdr(section, &header))
947 continue;
948
949 if (sh_idx == sym_shndx) {
950 unsigned long saddr, saddr_n, eaddr;
951 long page_size = sysconf(_SC_PAGESIZE);
952
953 saddr = (unsigned long)bcc_free_memory - sym_addr + header.sh_addr;
954 eaddr = saddr + header.sh_size;
955
956 // adjust saddr and eaddr, start addr needs to be page aligned
957 saddr_n = (saddr + page_size - 1) & ~(page_size - 1);
958 eaddr -= saddr_n - saddr;
959
960 if (madvise((void *)saddr_n, eaddr - saddr_n, MADV_DONTNEED)) {
961 fprintf(stderr, "madvise failed, saddr %lx, eaddr %lx\n", saddr, eaddr);
962 goto exit;
963 }
964
965 err = 0;
966 break;
967 }
968 }
969
970 exit:
971 if (e)
972 elf_end(e);
973 if (fd >= 0)
974 close(fd);
975 return err;
976 }
977
978 // Free bcc mmemory
979 //
980 // The main purpose of this function is to free llvm/clang text memory
981 // through madvise MADV_DONTNEED.
982 //
983 // bcc could be linked statically or dynamically into the application.
984 // If it is static linking, there is no easy way to know which region
985 // inside .text section belongs to llvm/clang, so the whole .text section
986 // is freed. Otherwise, the process map is searched to find libbcc.so
987 // library and the whole .text section for that shared library is
988 // freed.
989 //
990 // Note that the text memory used by bcc (mainly llvm/clang) is reclaimable
991 // in the kernel as it is file backed. But the reclaim process
992 // may take some time if no memory pressure. So this API is mostly
993 // used for application who needs to immediately lowers its RssFile
994 // metric right after loading BPF program.
bcc_free_memory()995 int bcc_free_memory() {
996 int err;
997
998 // First try whether bcc is statically linked or not
999 err = bcc_free_memory_with_file("/proc/self/exe");
1000 if (err >= 0)
1001 return -err;
1002
1003 // Not statically linked, let us find the libbcc.so
1004 FILE *maps = fopen("/proc/self/maps", "r");
1005 if (!maps)
1006 return -1;
1007
1008 char *line = NULL;
1009 size_t size;
1010 while (getline(&line, &size, maps) > 0) {
1011 char *libbcc = strstr(line, "libbcc.so");
1012 if (!libbcc)
1013 continue;
1014
1015 // Parse the line and get the full libbcc.so path
1016 unsigned long addr_start, addr_end, offset, inode;
1017 int path_start = 0, path_end = 0;
1018 unsigned int devmajor, devminor;
1019 char perms[8];
1020 if (sscanf(line, "%lx-%lx %7s %lx %x:%x %lu %n%*[^\n]%n",
1021 &addr_start, &addr_end, perms, &offset,
1022 &devmajor, &devminor, &inode,
1023 &path_start, &path_end) < 7)
1024 break;
1025
1026 // Free the text in the bcc dynamic library.
1027 char libbcc_path[4096];
1028 memcpy(libbcc_path, line + path_start, path_end - path_start);
1029 libbcc_path[path_end - path_start] = '\0';
1030 err = bcc_free_memory_with_file(libbcc_path);
1031 err = (err <= 0) ? err : -err;
1032 }
1033
1034 fclose(maps);
1035 free(line);
1036 return err;
1037 }
1038
bcc_elf_get_buildid(const char * path,char * buildid)1039 int bcc_elf_get_buildid(const char *path, char *buildid)
1040 {
1041 Elf *e;
1042 int fd;
1043
1044 if (openelf(path, &e, &fd) < 0)
1045 return -1;
1046
1047 if (!find_buildid(e, buildid))
1048 return -1;
1049
1050 return 0;
1051 }
1052
bcc_elf_symbol_str(const char * path,size_t section_idx,size_t str_table_idx,char * out,size_t len,int debugfile)1053 int bcc_elf_symbol_str(const char *path, size_t section_idx,
1054 size_t str_table_idx, char *out, size_t len,
1055 int debugfile)
1056 {
1057 Elf *e = NULL, *d = NULL;
1058 int fd = -1, dfd = -1, err = 0;
1059 const char *name;
1060 char *debug_file = NULL;
1061
1062 if (!out)
1063 return -1;
1064
1065 if (openelf(path, &e, &fd) < 0)
1066 return -1;
1067
1068 if (debugfile) {
1069 debug_file = find_debug_file(e, path, 0);
1070 if (!debug_file) {
1071 err = -1;
1072 goto exit;
1073 }
1074
1075 if (openelf(debug_file, &d, &dfd) < 0) {
1076 err = -1;
1077 goto exit;
1078 }
1079
1080 if ((name = elf_strptr(d, section_idx, str_table_idx)) == NULL) {
1081 err = -1;
1082 goto exit;
1083 }
1084 } else {
1085 if ((name = elf_strptr(e, section_idx, str_table_idx)) == NULL) {
1086 err = -1;
1087 goto exit;
1088 }
1089 }
1090
1091 strncpy(out, name, len);
1092
1093 exit:
1094 if (debug_file)
1095 free(debug_file);
1096 if (e)
1097 elf_end(e);
1098 if (d)
1099 elf_end(d);
1100 if (fd >= 0)
1101 close(fd);
1102 if (dfd >= 0)
1103 close(dfd);
1104 return err;
1105 }
1106
1107 #if 0
1108 #include <stdio.h>
1109
1110 int main(int argc, char *argv[])
1111 {
1112 uint64_t addr;
1113 if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0)
1114 return -1;
1115
1116 printf("%s: %p\n", argv[2], (void *)addr);
1117 return 0;
1118 }
1119 #endif
1120