1 /*
2 * Copyright (c) 2016 GitHub, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/mman.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include <string.h>
23 #include <libgen.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <limits.h>
27
28 #include <gelf.h>
29 #include "bcc_elf.h"
30 #include "bcc_proc.h"
31 #include "bcc_syms.h"
32
33 #define NT_STAPSDT 3
34 #define ELF_ST_TYPE(x) (((uint32_t) x) & 0xf)
35
openelf_fd(int fd,Elf ** elf_out)36 static int openelf_fd(int fd, Elf **elf_out) {
37 if (elf_version(EV_CURRENT) == EV_NONE)
38 return -1;
39
40 *elf_out = elf_begin(fd, ELF_C_READ, 0);
41 if (*elf_out == NULL)
42 return -1;
43
44 return 0;
45 }
46
openelf(const char * path,Elf ** elf_out,int * fd_out)47 static int openelf(const char *path, Elf **elf_out, int *fd_out) {
48 *fd_out = open(path, O_RDONLY);
49 if (*fd_out < 0)
50 return -1;
51
52 if (openelf_fd(*fd_out, elf_out) == -1) {
53 close(*fd_out);
54 return -1;
55 }
56
57 return 0;
58 }
59
parse_stapsdt_note(struct bcc_elf_usdt * probe,const char * desc,int elf_class)60 static const char *parse_stapsdt_note(struct bcc_elf_usdt *probe,
61 const char *desc, int elf_class) {
62 if (elf_class == ELFCLASS32) {
63 probe->pc = *((uint32_t *)(desc));
64 probe->base_addr = *((uint32_t *)(desc + 4));
65 probe->semaphore = *((uint32_t *)(desc + 8));
66 desc = desc + 12;
67 } else {
68 probe->pc = *((uint64_t *)(desc));
69 probe->base_addr = *((uint64_t *)(desc + 8));
70 probe->semaphore = *((uint64_t *)(desc + 16));
71 desc = desc + 24;
72 }
73
74 probe->provider = desc;
75 desc += strlen(desc) + 1;
76
77 probe->name = desc;
78 desc += strlen(desc) + 1;
79
80 probe->arg_fmt = desc;
81 desc += strlen(desc) + 1;
82
83 return desc;
84 }
85
do_note_segment(Elf_Scn * section,int elf_class,bcc_elf_probecb callback,const char * binpath,uint64_t first_inst_offset,void * payload)86 static int do_note_segment(Elf_Scn *section, int elf_class,
87 bcc_elf_probecb callback, const char *binpath,
88 uint64_t first_inst_offset, void *payload) {
89 Elf_Data *data = NULL;
90
91 while ((data = elf_getdata(section, data)) != 0) {
92 size_t offset = 0;
93 GElf_Nhdr hdr;
94 size_t name_off, desc_off;
95
96 while ((offset = gelf_getnote(data, offset, &hdr, &name_off, &desc_off)) !=
97 0) {
98 const char *desc, *desc_end;
99 struct bcc_elf_usdt probe;
100
101 if (hdr.n_type != NT_STAPSDT)
102 continue;
103
104 if (hdr.n_namesz != 8)
105 continue;
106
107 if (memcmp((const char *)data->d_buf + name_off, "stapsdt", 8) != 0)
108 continue;
109
110 desc = (const char *)data->d_buf + desc_off;
111 desc_end = desc + hdr.n_descsz;
112
113 if (parse_stapsdt_note(&probe, desc, elf_class) == desc_end) {
114 if (probe.pc < first_inst_offset)
115 fprintf(stderr,
116 "WARNING: invalid address 0x%lx for probe (%s,%s) in binary %s\n",
117 probe.pc, probe.provider, probe.name, binpath);
118 else
119 callback(binpath, &probe, payload);
120 }
121 }
122 }
123 return 0;
124 }
125
listprobes(Elf * e,bcc_elf_probecb callback,const char * binpath,void * payload)126 static int listprobes(Elf *e, bcc_elf_probecb callback, const char *binpath,
127 void *payload) {
128 Elf_Scn *section = NULL;
129 size_t stridx;
130 int elf_class = gelf_getclass(e);
131 uint64_t first_inst_offset = 0;
132
133 if (elf_getshdrstrndx(e, &stridx) != 0)
134 return -1;
135
136 // Get the offset to the first instruction
137 while ((section = elf_nextscn(e, section)) != 0) {
138 GElf_Shdr header;
139
140 if (!gelf_getshdr(section, &header))
141 continue;
142
143 // The elf file section layout is based on increasing virtual address,
144 // getting the first section with SHF_EXECINSTR is enough.
145 if (header.sh_flags & SHF_EXECINSTR) {
146 first_inst_offset = header.sh_addr;
147 break;
148 }
149 }
150
151 while ((section = elf_nextscn(e, section)) != 0) {
152 GElf_Shdr header;
153 char *name;
154
155 if (!gelf_getshdr(section, &header))
156 continue;
157
158 if (header.sh_type != SHT_NOTE)
159 continue;
160
161 name = elf_strptr(e, stridx, header.sh_name);
162 if (name && !strcmp(name, ".note.stapsdt")) {
163 if (do_note_segment(section, elf_class, callback, binpath,
164 first_inst_offset, payload) < 0)
165 return -1;
166 }
167 }
168
169 return 0;
170 }
171
bcc_elf_foreach_usdt(const char * path,bcc_elf_probecb callback,void * payload)172 int bcc_elf_foreach_usdt(const char *path, bcc_elf_probecb callback,
173 void *payload) {
174 Elf *e;
175 int fd, res;
176
177 if (openelf(path, &e, &fd) < 0)
178 return -1;
179
180 res = listprobes(e, callback, path, payload);
181 elf_end(e);
182 close(fd);
183
184 return res;
185 }
186
get_section(Elf * e,const char * section_name,GElf_Shdr * section_hdr,size_t * section_idx)187 static Elf_Scn * get_section(Elf *e, const char *section_name,
188 GElf_Shdr *section_hdr, size_t *section_idx) {
189 Elf_Scn *section = NULL;
190 GElf_Shdr header;
191 char *name;
192
193 size_t stridx;
194 if (elf_getshdrstrndx(e, &stridx) != 0)
195 return NULL;
196
197 size_t index;
198 for (index = 1; (section = elf_nextscn(e, section)) != 0; index++) {
199 if (!gelf_getshdr(section, &header))
200 continue;
201
202 name = elf_strptr(e, stridx, header.sh_name);
203 if (name && !strcmp(name, section_name)) {
204 if (section_hdr)
205 *section_hdr = header;
206 if (section_idx)
207 *section_idx = index;
208 return section;
209 }
210 }
211
212 return NULL;
213 }
214
list_in_scn(Elf * e,Elf_Scn * section,size_t stridx,size_t symsize,struct bcc_symbol_option * option,bcc_elf_symcb callback,void * payload)215 static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
216 struct bcc_symbol_option *option,
217 bcc_elf_symcb callback, void *payload) {
218 Elf_Data *data = NULL;
219
220 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
221 size_t opdidx = 0;
222 Elf_Scn *opdsec = NULL;
223 GElf_Shdr opdshdr = {};
224 Elf_Data *opddata = NULL;
225
226 opdsec = get_section(e, ".opd", &opdshdr, &opdidx);
227 if (opdsec && opdshdr.sh_type == SHT_PROGBITS)
228 opddata = elf_getdata(opdsec, NULL);
229 #endif
230
231 while ((data = elf_getdata(section, data)) != 0) {
232 size_t i, symcount = data->d_size / symsize;
233
234 if (data->d_size % symsize)
235 return -1;
236
237 for (i = 0; i < symcount; ++i) {
238 GElf_Sym sym;
239 const char *name;
240
241 if (!gelf_getsym(data, (int)i, &sym))
242 continue;
243
244 if ((name = elf_strptr(e, stridx, sym.st_name)) == NULL)
245 continue;
246 if (name[0] == 0)
247 continue;
248
249 if (sym.st_value == 0)
250 continue;
251
252 uint32_t st_type = ELF_ST_TYPE(sym.st_info);
253 if (!(option->use_symbol_type & (1 << st_type)))
254 continue;
255
256 #ifdef __powerpc64__
257 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
258 if (opddata && sym.st_shndx == opdidx) {
259 size_t offset = sym.st_value - opdshdr.sh_addr;
260 /* Find the function descriptor */
261 uint64_t *descr = opddata->d_buf + offset;
262 /* Read the actual entry point address from the descriptor */
263 sym.st_value = *descr;
264 }
265 #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
266 if (option->use_symbol_type & (1 << STT_PPC64LE_SYM_LEP)) {
267 /*
268 * The PowerPC 64-bit ELF v2 ABI says that the 3 most significant bits
269 * in the st_other field of the symbol table specifies the number of
270 * instructions between a function's Global Entry Point (GEP) and Local
271 * Entry Point (LEP).
272 */
273 switch (sym.st_other >> 5) {
274 /* GEP and LEP are the same for 0 or 1, usage is reserved for 7 */
275 /* If 2, LEP is 1 instruction past the GEP */
276 case 2: sym.st_value += 4; break;
277 /* If 3, LEP is 2 instructions past the GEP */
278 case 3: sym.st_value += 8; break;
279 /* If 4, LEP is 4 instructions past the GEP */
280 case 4: sym.st_value += 16; break;
281 /* If 5, LEP is 8 instructions past the GEP */
282 case 5: sym.st_value += 32; break;
283 /* If 6, LEP is 16 instructions past the GEP */
284 case 6: sym.st_value += 64; break;
285 }
286 }
287 #endif
288 #endif
289
290 if (callback(name, sym.st_value, sym.st_size, payload) < 0)
291 return 1; // signal termination to caller
292 }
293 }
294
295 return 0;
296 }
297
listsymbols(Elf * e,bcc_elf_symcb callback,void * payload,struct bcc_symbol_option * option)298 static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload,
299 struct bcc_symbol_option *option) {
300 Elf_Scn *section = NULL;
301
302 while ((section = elf_nextscn(e, section)) != 0) {
303 GElf_Shdr header;
304
305 if (!gelf_getshdr(section, &header))
306 continue;
307
308 if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
309 continue;
310
311 int rc = list_in_scn(e, section, header.sh_link, header.sh_entsize,
312 option, callback, payload);
313 if (rc == 1)
314 break; // callback signaled termination
315
316 if (rc < 0)
317 return rc;
318 }
319
320 return 0;
321 }
322
get_section_elf_data(Elf * e,const char * section_name)323 static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) {
324 Elf_Scn *section = get_section(e, section_name, NULL, NULL);
325 if (section)
326 return elf_getdata(section, NULL);
327 return NULL;
328 }
329
find_debuglink(Elf * e,char ** debug_file,unsigned int * crc)330 static int find_debuglink(Elf *e, char **debug_file, unsigned int *crc) {
331 Elf_Data *data = NULL;
332
333 *debug_file = NULL;
334 *crc = 0;
335
336 data = get_section_elf_data(e, ".gnu_debuglink");
337 if (!data || data->d_size <= 5)
338 return 0;
339
340 *debug_file = (char *)data->d_buf;
341 *crc = *(unsigned int*)((char *)data->d_buf + data->d_size - 4);
342
343 return *debug_file ? 1 : 0;
344 }
345
find_buildid(Elf * e,char * buildid)346 static int find_buildid(Elf *e, char *buildid) {
347 Elf_Data *data = get_section_elf_data(e, ".note.gnu.build-id");
348 if (!data || data->d_size <= 16 || strcmp((char *)data->d_buf + 12, "GNU"))
349 return 0;
350
351 char *buf = (char *)data->d_buf + 16;
352 size_t length = data->d_size - 16;
353 size_t i = 0;
354 for (i = 0; i < length; ++i) {
355 sprintf(buildid + (i * 2), "%02hhx", buf[i]);
356 }
357
358 return 1;
359 }
360
361 // The CRC algorithm used by GNU debuglink. Taken from:
362 // https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
gnu_debuglink_crc32(unsigned int crc,char * buf,size_t len)363 static unsigned int gnu_debuglink_crc32(unsigned int crc,
364 char *buf, size_t len) {
365 static const unsigned int crc32_table[256] =
366 {
367 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
368 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
369 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
370 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
371 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
372 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
373 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
374 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
375 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
376 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
377 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
378 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
379 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
380 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
381 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
382 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
383 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
384 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
385 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
386 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
387 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
388 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
389 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
390 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
391 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
392 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
393 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
394 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
395 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
396 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
397 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
398 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
399 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
400 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
401 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
402 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
403 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
404 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
405 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
406 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
407 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
408 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
409 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
410 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
411 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
412 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
413 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
414 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
415 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
416 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
417 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
418 0x2d02ef8d
419 };
420 char *end;
421
422 crc = ~crc & 0xffffffff;
423 for (end = buf + len; buf < end; ++buf)
424 crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
425 return ~crc & 0xffffffff;
426 }
427
verify_checksum(const char * file,unsigned int crc)428 static int verify_checksum(const char *file, unsigned int crc) {
429 struct stat st;
430 int fd;
431 void *buf;
432 unsigned int actual;
433
434 fd = open(file, O_RDONLY);
435 if (fd < 0)
436 return 0;
437
438 if (fstat(fd, &st) < 0) {
439 close(fd);
440 return 0;
441 }
442
443 buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
444 if (!buf) {
445 close(fd);
446 return 0;
447 }
448
449 actual = gnu_debuglink_crc32(0, buf, st.st_size);
450
451 munmap(buf, st.st_size);
452 close(fd);
453 return actual == crc;
454 }
455
find_debug_via_debuglink(Elf * e,const char * binpath,int check_crc)456 static char *find_debug_via_debuglink(Elf *e, const char *binpath,
457 int check_crc) {
458 char fullpath[PATH_MAX];
459 char *tmppath;
460 char *bindir = NULL;
461 char *res = NULL;
462 unsigned int crc;
463 char *name; // the name of the debuginfo file
464
465 if (!find_debuglink(e, &name, &crc))
466 return NULL;
467
468 tmppath = strdup(binpath);
469 bindir = dirname(tmppath);
470
471 // Search for the file in 'binpath', but ignore the file we find if it
472 // matches the binary itself: the binary will always be probed later on,
473 // and it might contain poorer symbols (e.g. stripped or partial symbols)
474 // than the external debuginfo that might be available elsewhere.
475 snprintf(fullpath, sizeof(fullpath),"%s/%s", bindir, name);
476 if (strcmp(fullpath, binpath) != 0 && access(fullpath, F_OK) != -1) {
477 res = strdup(fullpath);
478 goto DONE;
479 }
480
481 // Search for the file in 'binpath'/.debug
482 snprintf(fullpath, sizeof(fullpath), "%s/.debug/%s", bindir, name);
483 if (access(fullpath, F_OK) != -1) {
484 res = strdup(fullpath);
485 goto DONE;
486 }
487
488 // Search for the file in the global debug directory /usr/lib/debug/'binpath'
489 snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug%s/%s", bindir, name);
490 if (access(fullpath, F_OK) != -1) {
491 res = strdup(fullpath);
492 goto DONE;
493 }
494
495 DONE:
496 free(tmppath);
497 if (res && check_crc && !verify_checksum(res, crc)) {
498 free(res);
499 return NULL;
500 }
501 return res;
502 }
503
find_debug_via_buildid(Elf * e)504 static char *find_debug_via_buildid(Elf *e) {
505 char fullpath[PATH_MAX];
506 char buildid[128]; // currently 40 seems to be default, let's be safe
507
508 if (!find_buildid(e, buildid))
509 return NULL;
510
511 // Search for the file in the global debug directory with a sub-path:
512 // mm/nnnnnn...nnnn.debug
513 // Where mm are the first two characters of the buildid, and nnnn are the
514 // rest of the build id, followed by .debug.
515 snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug/.build-id/%c%c/%s.debug",
516 buildid[0], buildid[1], buildid + 2);
517 if (access(fullpath, F_OK) != -1) {
518 return strdup(fullpath);
519 }
520
521 return NULL;
522 }
523
foreach_sym_core(const char * path,bcc_elf_symcb callback,struct bcc_symbol_option * option,void * payload,int is_debug_file)524 static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
525 struct bcc_symbol_option *option, void *payload,
526 int is_debug_file) {
527 Elf *e;
528 int fd, res;
529 char *debug_file;
530
531 if (!option)
532 return -1;
533
534 if (openelf(path, &e, &fd) < 0)
535 return -1;
536
537 // If there is a separate debuginfo file, try to locate and read it, first
538 // using the build-id section, then using the debuglink section. These are
539 // also the rules that GDB folows.
540 // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
541 if (option->use_debug_file && !is_debug_file) {
542 // The is_debug_file argument helps avoid infinitely resolving debuginfo
543 // files for debuginfo files and so on.
544 debug_file = find_debug_via_buildid(e);
545 if (!debug_file)
546 debug_file = find_debug_via_debuglink(e, path,
547 option->check_debug_file_crc);
548 if (debug_file) {
549 foreach_sym_core(debug_file, callback, option, payload, 1);
550 free(debug_file);
551 }
552 }
553
554 res = listsymbols(e, callback, payload, option);
555 elf_end(e);
556 close(fd);
557 return res;
558 }
559
bcc_elf_foreach_sym(const char * path,bcc_elf_symcb callback,void * option,void * payload)560 int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
561 void *option, void *payload) {
562 return foreach_sym_core(
563 path, callback, (struct bcc_symbol_option*)option, payload, 0);
564 }
565
bcc_elf_get_text_scn_info(const char * path,uint64_t * addr,uint64_t * offset)566 int bcc_elf_get_text_scn_info(const char *path, uint64_t *addr,
567 uint64_t *offset) {
568 Elf *e = NULL;
569 int fd = -1, err;
570 Elf_Scn *section = NULL;
571 GElf_Shdr header;
572 size_t stridx;
573 char *name;
574
575 if ((err = openelf(path, &e, &fd)) < 0 ||
576 (err = elf_getshdrstrndx(e, &stridx)) < 0)
577 goto exit;
578
579 err = -1;
580 while ((section = elf_nextscn(e, section)) != 0) {
581 if (!gelf_getshdr(section, &header))
582 continue;
583
584 name = elf_strptr(e, stridx, header.sh_name);
585 if (name && !strcmp(name, ".text")) {
586 *addr = (uint64_t)header.sh_addr;
587 *offset = (uint64_t)header.sh_offset;
588 err = 0;
589 break;
590 }
591 }
592
593 exit:
594 if (e)
595 elf_end(e);
596 if (fd >= 0)
597 close(fd);
598 return err;
599 }
600
bcc_elf_foreach_load_section(const char * path,bcc_elf_load_sectioncb callback,void * payload)601 int bcc_elf_foreach_load_section(const char *path,
602 bcc_elf_load_sectioncb callback,
603 void *payload) {
604 Elf *e = NULL;
605 int fd = -1, err = -1, res;
606 size_t nhdrs, i;
607
608 if (openelf(path, &e, &fd) < 0)
609 goto exit;
610
611 if (elf_getphdrnum(e, &nhdrs) != 0)
612 goto exit;
613
614 GElf_Phdr header;
615 for (i = 0; i < nhdrs; i++) {
616 if (!gelf_getphdr(e, (int)i, &header))
617 continue;
618 if (header.p_type != PT_LOAD || !(header.p_flags & PF_X))
619 continue;
620 res = callback(header.p_vaddr, header.p_memsz, header.p_offset, payload);
621 if (res < 0) {
622 err = 1;
623 goto exit;
624 }
625 }
626 err = 0;
627
628 exit:
629 if (e)
630 elf_end(e);
631 if (fd >= 0)
632 close(fd);
633 return err;
634 }
635
bcc_elf_get_type(const char * path)636 int bcc_elf_get_type(const char *path) {
637 Elf *e;
638 GElf_Ehdr hdr;
639 int fd;
640 void* res = NULL;
641
642 if (openelf(path, &e, &fd) < 0)
643 return -1;
644
645 res = (void*)gelf_getehdr(e, &hdr);
646 elf_end(e);
647 close(fd);
648
649 if (!res)
650 return -1;
651 else
652 return hdr.e_type;
653 }
654
bcc_elf_is_exe(const char * path)655 int bcc_elf_is_exe(const char *path) {
656 return (bcc_elf_get_type(path) != -1) && (access(path, X_OK) == 0);
657 }
658
bcc_elf_is_shared_obj(const char * path)659 int bcc_elf_is_shared_obj(const char *path) {
660 return bcc_elf_get_type(path) == ET_DYN;
661 }
662
bcc_elf_is_vdso(const char * name)663 int bcc_elf_is_vdso(const char *name) {
664 return strcmp(name, "[vdso]") == 0;
665 }
666
667 // -2: Failed
668 // -1: Not initialized
669 // >0: Initialized
670 static int vdso_image_fd = -1;
671
find_vdso(const char * name,uint64_t st,uint64_t en,uint64_t offset,bool enter_ns,void * payload)672 static int find_vdso(const char *name, uint64_t st, uint64_t en,
673 uint64_t offset, bool enter_ns, void *payload) {
674 int fd;
675 char tmpfile[128];
676 if (!bcc_elf_is_vdso(name))
677 return 0;
678
679 void *image = malloc(en - st);
680 if (!image)
681 goto on_error;
682 memcpy(image, (void *)st, en - st);
683
684 snprintf(tmpfile, sizeof(tmpfile), "/tmp/bcc_%d_vdso_image_XXXXXX", getpid());
685 fd = mkostemp(tmpfile, O_CLOEXEC);
686 if (fd < 0) {
687 fprintf(stderr, "Unable to create temp file: %s\n", strerror(errno));
688 goto on_error;
689 }
690 // Unlink the file to avoid leaking
691 if (unlink(tmpfile) == -1)
692 fprintf(stderr, "Unlink %s failed: %s\n", tmpfile, strerror(errno));
693
694 if (write(fd, image, en - st) == -1) {
695 fprintf(stderr, "Failed to write to vDSO image: %s\n", strerror(errno));
696 close(fd);
697 goto on_error;
698 }
699 vdso_image_fd = fd;
700
701 on_error:
702 if (image)
703 free(image);
704 // Always stop the iteration
705 return -1;
706 }
707
bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback,void * payload)708 int bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback, void *payload) {
709 Elf *elf;
710 static struct bcc_symbol_option default_option = {
711 .use_debug_file = 0,
712 .check_debug_file_crc = 0,
713 .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
714 };
715
716 if (vdso_image_fd == -1) {
717 vdso_image_fd = -2;
718 bcc_procutils_each_module(getpid(), &find_vdso, NULL);
719 }
720 if (vdso_image_fd == -2)
721 return -1;
722
723 if (openelf_fd(vdso_image_fd, &elf) == -1)
724 return -1;
725
726 return listsymbols(elf, callback, payload, &default_option);
727 }
728
729 // return value: 0 : success
730 // < 0 : error and no bcc lib found
731 // > 0 : error and bcc lib found
bcc_free_memory_with_file(const char * path)732 static int bcc_free_memory_with_file(const char *path) {
733 unsigned long sym_addr = 0, sym_shndx;
734 Elf_Scn *section = NULL;
735 int fd = -1, err;
736 GElf_Shdr header;
737 Elf *e = NULL;
738
739 if ((err = openelf(path, &e, &fd)) < 0)
740 goto exit;
741
742 // get symbol address of "bcc_free_memory", which
743 // will be used to calculate runtime .text address
744 // range, esp. for shared libraries.
745 err = -1;
746 while ((section = elf_nextscn(e, section)) != 0) {
747 Elf_Data *data = NULL;
748 size_t symsize;
749
750 if (!gelf_getshdr(section, &header))
751 continue;
752
753 if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
754 continue;
755
756 /* iterate all symbols */
757 symsize = header.sh_entsize;
758 while ((data = elf_getdata(section, data)) != 0) {
759 size_t i, symcount = data->d_size / symsize;
760
761 for (i = 0; i < symcount; ++i) {
762 GElf_Sym sym;
763
764 if (!gelf_getsym(data, (int)i, &sym))
765 continue;
766
767 if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
768 continue;
769
770 const char *name;
771 if ((name = elf_strptr(e, header.sh_link, sym.st_name)) == NULL)
772 continue;
773
774 if (strcmp(name, "bcc_free_memory") == 0) {
775 sym_addr = sym.st_value;
776 sym_shndx = sym.st_shndx;
777 break;
778 }
779 }
780 }
781 }
782
783 // Didn't find bcc_free_memory in the ELF file.
784 if (sym_addr == 0)
785 goto exit;
786
787 int sh_idx = 0;
788 section = NULL;
789 err = 1;
790 while ((section = elf_nextscn(e, section)) != 0) {
791 sh_idx++;
792 if (!gelf_getshdr(section, &header))
793 continue;
794
795 if (sh_idx == sym_shndx) {
796 unsigned long saddr, saddr_n, eaddr;
797 long page_size = sysconf(_SC_PAGESIZE);
798
799 saddr = (unsigned long)bcc_free_memory - sym_addr + header.sh_addr;
800 eaddr = saddr + header.sh_size;
801
802 // adjust saddr and eaddr, start addr needs to be page aligned
803 saddr_n = (saddr + page_size - 1) & ~(page_size - 1);
804 eaddr -= saddr_n - saddr;
805
806 if (madvise((void *)saddr_n, eaddr - saddr_n, MADV_DONTNEED)) {
807 fprintf(stderr, "madvise failed, saddr %lx, eaddr %lx\n", saddr, eaddr);
808 goto exit;
809 }
810
811 err = 0;
812 break;
813 }
814 }
815
816 exit:
817 if (e)
818 elf_end(e);
819 if (fd >= 0)
820 close(fd);
821 return err;
822 }
823
824 // Free bcc mmemory
825 //
826 // The main purpose of this function is to free llvm/clang text memory
827 // through madvise MADV_DONTNEED.
828 //
829 // bcc could be linked statically or dynamically into the application.
830 // If it is static linking, there is no easy way to know which region
831 // inside .text section belongs to llvm/clang, so the whole .text section
832 // is freed. Otherwise, the process map is searched to find libbcc.so
833 // library and the whole .text section for that shared library is
834 // freed.
835 //
836 // Note that the text memory used by bcc (mainly llvm/clang) is reclaimable
837 // in the kernel as it is file backed. But the reclaim process
838 // may take some time if no memory pressure. So this API is mostly
839 // used for application who needs to immediately lowers its RssFile
840 // metric right after loading BPF program.
bcc_free_memory()841 int bcc_free_memory() {
842 int err;
843
844 // First try whether bcc is statically linked or not
845 err = bcc_free_memory_with_file("/proc/self/exe");
846 if (err >= 0)
847 return -err;
848
849 // Not statically linked, let us find the libbcc.so
850 FILE *maps = fopen("/proc/self/maps", "r");
851 if (!maps)
852 return -1;
853
854 char *line = NULL;
855 size_t size;
856 while (getline(&line, &size, maps) > 0) {
857 char *libbcc = strstr(line, "libbcc.so");
858 if (!libbcc)
859 continue;
860
861 // Parse the line and get the full libbcc.so path
862 unsigned long addr_start, addr_end, offset, inode;
863 int path_start = 0, path_end = 0;
864 unsigned int devmajor, devminor;
865 char perms[8];
866 if (sscanf(line, "%lx-%lx %7s %lx %u:%u %lu %n%*[^\n]%n",
867 &addr_start, &addr_end, perms, &offset,
868 &devmajor, &devminor, &inode,
869 &path_start, &path_end) < 7)
870 break;
871
872 // Free the text in the bcc dynamic library.
873 char libbcc_path[4096];
874 memcpy(libbcc_path, line + path_start, path_end - path_start);
875 libbcc_path[path_end - path_start] = '\0';
876 err = bcc_free_memory_with_file(libbcc_path);
877 err = (err <= 0) ? err : -err;
878 }
879
880 fclose(maps);
881 free(line);
882 return err;
883 }
884
885 #if 0
886 #include <stdio.h>
887
888 int main(int argc, char *argv[])
889 {
890 uint64_t addr;
891 if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0)
892 return -1;
893
894 printf("%s: %p\n", argv[2], (void *)addr);
895 return 0;
896 }
897 #endif
898