• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016 GitHub, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <sys/mman.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <unistd.h>
22 #include <string.h>
23 #include <libgen.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <limits.h>
27 #ifdef HAVE_LIBDEBUGINFOD
28 #include <elfutils/debuginfod.h>
29 #endif
30 
31 #include <gelf.h>
32 #include "bcc_elf.h"
33 #include "bcc_proc.h"
34 #include "bcc_syms.h"
35 
36 #define NT_STAPSDT 3
37 #define ELF_ST_TYPE(x) (((uint32_t) x) & 0xf)
38 
openelf_fd(int fd,Elf ** elf_out)39 static int openelf_fd(int fd, Elf **elf_out) {
40   if (elf_version(EV_CURRENT) == EV_NONE)
41     return -1;
42 
43   *elf_out = elf_begin(fd, ELF_C_READ, 0);
44   if (*elf_out == NULL)
45     return -1;
46 
47   return 0;
48 }
49 
openelf(const char * path,Elf ** elf_out,int * fd_out)50 static int openelf(const char *path, Elf **elf_out, int *fd_out) {
51   *fd_out = open(path, O_RDONLY);
52   if (*fd_out < 0)
53     return -1;
54 
55   if (openelf_fd(*fd_out, elf_out) == -1) {
56     close(*fd_out);
57     return -1;
58   }
59 
60   return 0;
61 }
62 
parse_stapsdt_note(struct bcc_elf_usdt * probe,GElf_Shdr * probes_shdr,const char * desc,int elf_class)63 static const char *parse_stapsdt_note(struct bcc_elf_usdt *probe,
64                                       GElf_Shdr *probes_shdr,
65                                       const char *desc, int elf_class) {
66   if (elf_class == ELFCLASS32) {
67     probe->pc = *((uint32_t *)(desc));
68     probe->base_addr = *((uint32_t *)(desc + 4));
69     probe->semaphore = *((uint32_t *)(desc + 8));
70     desc = desc + 12;
71   } else {
72     probe->pc = *((uint64_t *)(desc));
73     probe->base_addr = *((uint64_t *)(desc + 8));
74     probe->semaphore = *((uint64_t *)(desc + 16));
75     desc = desc + 24;
76   }
77 
78   // Offset from start of file
79   if (probe->semaphore && probes_shdr)
80     probe->semaphore_offset =
81       probe->semaphore - probes_shdr->sh_addr + probes_shdr->sh_offset;
82   else
83     probe->semaphore_offset = 0;
84 
85   probe->provider = desc;
86   desc += strlen(desc) + 1;
87 
88   probe->name = desc;
89   desc += strlen(desc) + 1;
90 
91   probe->arg_fmt = desc;
92   desc += strlen(desc) + 1;
93 
94   return desc;
95 }
96 
do_note_segment(Elf_Scn * section,GElf_Shdr * probes_shdr,int elf_class,bcc_elf_probecb callback,const char * binpath,uint64_t first_inst_offset,void * payload)97 static int do_note_segment(Elf_Scn *section, GElf_Shdr *probes_shdr, int elf_class,
98                            bcc_elf_probecb callback, const char *binpath,
99                            uint64_t first_inst_offset, void *payload) {
100   Elf_Data *data = NULL;
101 
102   while ((data = elf_getdata(section, data)) != 0) {
103     size_t offset = 0;
104     GElf_Nhdr hdr;
105     size_t name_off, desc_off;
106 
107     while ((offset = gelf_getnote(data, offset, &hdr, &name_off, &desc_off)) !=
108            0) {
109       const char *desc, *desc_end;
110       struct bcc_elf_usdt probe;
111 
112       if (hdr.n_type != NT_STAPSDT)
113         continue;
114 
115       if (hdr.n_namesz != 8)
116         continue;
117 
118       if (memcmp((const char *)data->d_buf + name_off, "stapsdt", 8) != 0)
119         continue;
120 
121       desc = (const char *)data->d_buf + desc_off;
122       desc_end = desc + hdr.n_descsz;
123 
124       if (parse_stapsdt_note(&probe, probes_shdr, desc, elf_class) == desc_end) {
125         if (probe.pc < first_inst_offset)
126           fprintf(stderr,
127                   "WARNING: invalid address 0x%lx for probe (%s,%s) in binary %s\n",
128                   probe.pc, probe.provider, probe.name, binpath);
129         else
130           callback(binpath, &probe, payload);
131       }
132     }
133   }
134   return 0;
135 }
136 
listprobes(Elf * e,bcc_elf_probecb callback,const char * binpath,void * payload)137 static int listprobes(Elf *e, bcc_elf_probecb callback, const char *binpath,
138                       void *payload) {
139   Elf_Scn *section = NULL;
140   bool found_probes_shdr;
141   size_t stridx;
142   int elf_class = gelf_getclass(e);
143   uint64_t first_inst_offset = 0;
144   GElf_Shdr probes_shdr = {};
145 
146   if (elf_getshdrstrndx(e, &stridx) != 0)
147     return -1;
148 
149   // Get the offset to the first instruction
150   while ((section = elf_nextscn(e, section)) != 0) {
151     GElf_Shdr header;
152 
153     if (!gelf_getshdr(section, &header))
154       continue;
155 
156     // The elf file section layout is based on increasing virtual address,
157     // getting the first section with SHF_EXECINSTR is enough.
158     if (header.sh_flags & SHF_EXECINSTR) {
159       first_inst_offset = header.sh_addr;
160       break;
161     }
162   }
163 
164   found_probes_shdr = false;
165   while ((section = elf_nextscn(e, section)) != 0) {
166     if (!gelf_getshdr(section, &probes_shdr))
167       continue;
168 
169     char *name = elf_strptr(e, stridx, probes_shdr.sh_name);
170     if (name && !strcmp(name, ".probes")) {
171       found_probes_shdr = true;
172       break;
173     }
174   }
175 
176   while ((section = elf_nextscn(e, section)) != 0) {
177     GElf_Shdr header;
178     char *name;
179 
180     if (!gelf_getshdr(section, &header))
181       continue;
182 
183     if (header.sh_type != SHT_NOTE)
184       continue;
185 
186     name = elf_strptr(e, stridx, header.sh_name);
187     if (name && !strcmp(name, ".note.stapsdt")) {
188       GElf_Shdr *shdr_ptr = found_probes_shdr ? &probes_shdr : NULL;
189       if (do_note_segment(section, shdr_ptr, elf_class, callback, binpath,
190                           first_inst_offset, payload) < 0)
191         return -1;
192     }
193   }
194 
195   return 0;
196 }
197 
bcc_elf_foreach_usdt(const char * path,bcc_elf_probecb callback,void * payload)198 int bcc_elf_foreach_usdt(const char *path, bcc_elf_probecb callback,
199                          void *payload) {
200   Elf *e;
201   int fd, res;
202 
203   if (openelf(path, &e, &fd) < 0)
204     return -1;
205 
206   res = listprobes(e, callback, path, payload);
207   elf_end(e);
208   close(fd);
209 
210   return res;
211 }
212 
get_section(Elf * e,const char * section_name,GElf_Shdr * section_hdr,size_t * section_idx)213 static Elf_Scn * get_section(Elf *e, const char *section_name,
214                              GElf_Shdr *section_hdr, size_t *section_idx) {
215   Elf_Scn *section = NULL;
216   GElf_Shdr header;
217   char *name;
218 
219   size_t stridx;
220   if (elf_getshdrstrndx(e, &stridx) != 0)
221     return NULL;
222 
223   size_t index;
224   for (index = 1; (section = elf_nextscn(e, section)) != 0; index++) {
225     if (!gelf_getshdr(section, &header))
226       continue;
227 
228     name = elf_strptr(e, stridx, header.sh_name);
229     if (name && !strcmp(name, section_name)) {
230       if (section_hdr)
231         *section_hdr = header;
232       if (section_idx)
233         *section_idx = index;
234       return section;
235     }
236   }
237 
238   return NULL;
239 }
240 
list_in_scn(Elf * e,Elf_Scn * section,size_t stridx,size_t symsize,struct bcc_symbol_option * option,bcc_elf_symcb callback,bcc_elf_symcb_lazy callback_lazy,void * payload,bool debugfile)241 static int list_in_scn(Elf *e, Elf_Scn *section, size_t stridx, size_t symsize,
242                        struct bcc_symbol_option *option,
243                        bcc_elf_symcb callback, bcc_elf_symcb_lazy callback_lazy,
244                        void *payload, bool debugfile) {
245   Elf_Data *data = NULL;
246 
247 #if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
248   size_t opdidx = 0;
249   Elf_Scn *opdsec = NULL;
250   GElf_Shdr opdshdr = {};
251   Elf_Data *opddata = NULL;
252 
253   opdsec = get_section(e, ".opd", &opdshdr, &opdidx);
254   if (opdsec && opdshdr.sh_type == SHT_PROGBITS)
255     opddata = elf_getdata(opdsec, NULL);
256 #endif
257 
258   while ((data = elf_getdata(section, data)) != 0) {
259     size_t i, symcount = data->d_size / symsize;
260 
261     if (data->d_size % symsize)
262       return -1;
263 
264     for (i = 0; i < symcount; ++i) {
265       GElf_Sym sym;
266       const char *name;
267       size_t name_len;
268 
269       if (!gelf_getsym(data, (int)i, &sym))
270         continue;
271 
272       if ((name = elf_strptr(e, stridx, sym.st_name)) == NULL)
273         continue;
274       if (name[0] == 0)
275         continue;
276       name_len = strlen(name);
277 
278       if (sym.st_value == 0)
279         continue;
280 
281       uint32_t st_type = ELF_ST_TYPE(sym.st_info);
282       if (!(option->use_symbol_type & (1 << st_type)))
283         continue;
284 
285 #ifdef __powerpc64__
286 #if defined(_CALL_ELF) && _CALL_ELF == 2
287       if (option->use_symbol_type & (1 << STT_PPC64_ELFV2_SYM_LEP)) {
288         /*
289          * The PowerPC 64-bit ELF v2 ABI says that the 3 most significant bits
290          * in the st_other field of the symbol table specifies the number of
291          * instructions between a function's Global Entry Point (GEP) and Local
292          * Entry Point (LEP).
293          */
294         switch (sym.st_other >> 5) {
295           /* GEP and LEP are the same for 0 or 1, usage is reserved for 7 */
296           /* If 2, LEP is 1 instruction past the GEP */
297           case 2: sym.st_value += 4; break;
298           /* If 3, LEP is 2 instructions past the GEP */
299           case 3: sym.st_value += 8; break;
300           /* If 4, LEP is 4 instructions past the GEP */
301           case 4: sym.st_value += 16; break;
302           /* If 5, LEP is 8 instructions past the GEP */
303           case 5: sym.st_value += 32; break;
304           /* If 6, LEP is 16 instructions past the GEP */
305           case 6: sym.st_value += 64; break;
306         }
307       }
308 #else
309       if (opddata && sym.st_shndx == opdidx) {
310         size_t offset = sym.st_value - opdshdr.sh_addr;
311         /* Find the function descriptor */
312         uint64_t *descr = opddata->d_buf + offset;
313         /* Read the actual entry point address from the descriptor */
314         sym.st_value = *descr;
315       }
316 #endif
317 #endif
318 
319       int ret;
320       if (option->lazy_symbolize)
321         ret = callback_lazy(stridx, sym.st_name, name_len, sym.st_value,
322                             sym.st_size, debugfile, payload);
323       else
324         ret = callback(name, sym.st_value, sym.st_size, payload);
325       if (ret < 0)
326         return 1;      // signal termination to caller
327     }
328   }
329 
330   return 0;
331 }
332 
listsymbols(Elf * e,bcc_elf_symcb callback,bcc_elf_symcb_lazy callback_lazy,void * payload,struct bcc_symbol_option * option,bool debugfile)333 static int listsymbols(Elf *e, bcc_elf_symcb callback,
334                        bcc_elf_symcb_lazy callback_lazy, void *payload,
335                        struct bcc_symbol_option *option, bool debugfile) {
336   Elf_Scn *section = NULL;
337 
338   while ((section = elf_nextscn(e, section)) != 0) {
339     GElf_Shdr header;
340 
341     if (!gelf_getshdr(section, &header))
342       continue;
343 
344     if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
345       continue;
346 
347     int rc = list_in_scn(e, section, header.sh_link, header.sh_entsize,
348                          option, callback, callback_lazy, payload, debugfile);
349     if (rc == 1)
350       break;    // callback signaled termination
351 
352     if (rc < 0)
353       return rc;
354   }
355 
356   return 0;
357 }
358 
get_section_elf_data(Elf * e,const char * section_name)359 static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) {
360   Elf_Scn *section = get_section(e, section_name, NULL, NULL);
361   if (section)
362     return elf_getdata(section, NULL);
363   return NULL;
364 }
365 
find_debuglink(Elf * e,char ** debug_file,unsigned int * crc)366 static int find_debuglink(Elf *e, char **debug_file, unsigned int *crc) {
367   Elf_Data *data = NULL;
368 
369   *debug_file = NULL;
370   *crc = 0;
371 
372   data = get_section_elf_data(e, ".gnu_debuglink");
373   if (!data || data->d_size <= 5)
374     return 0;
375 
376   *debug_file = (char *)data->d_buf;
377   *crc = *(unsigned int*)((char *)data->d_buf + data->d_size - 4);
378 
379   return *debug_file ? 1 : 0;
380 }
381 
find_buildid(Elf * e,char * buildid)382 static int find_buildid(Elf *e, char *buildid) {
383   Elf_Data *data = get_section_elf_data(e, ".note.gnu.build-id");
384   if (!data || data->d_size <= 16 || strcmp((char *)data->d_buf + 12, "GNU"))
385     return 0;
386 
387   char *buf = (char *)data->d_buf + 16;
388   size_t length = data->d_size - 16;
389   size_t i = 0;
390   for (i = 0; i < length; ++i) {
391     sprintf(buildid + (i * 2), "%02hhx", buf[i]);
392   }
393 
394   return 1;
395 }
396 
397 // The CRC algorithm used by GNU debuglink. Taken from:
398 //    https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
gnu_debuglink_crc32(unsigned int crc,char * buf,size_t len)399 static unsigned int gnu_debuglink_crc32(unsigned int crc,
400                                         char *buf, size_t len) {
401   static const unsigned int crc32_table[256] =
402   {
403     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
404     0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
405     0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
406     0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
407     0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
408     0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
409     0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
410     0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
411     0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
412     0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
413     0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
414     0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
415     0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
416     0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
417     0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
418     0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
419     0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
420     0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
421     0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
422     0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
423     0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
424     0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
425     0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
426     0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
427     0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
428     0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
429     0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
430     0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
431     0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
432     0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
433     0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
434     0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
435     0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
436     0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
437     0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
438     0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
439     0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
440     0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
441     0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
442     0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
443     0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
444     0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
445     0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
446     0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
447     0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
448     0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
449     0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
450     0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
451     0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
452     0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
453     0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
454     0x2d02ef8d
455   };
456   char *end;
457 
458   crc = ~crc & 0xffffffff;
459   for (end = buf + len; buf < end; ++buf)
460     crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
461   return ~crc & 0xffffffff;
462 }
463 
verify_checksum(const char * file,unsigned int crc)464 static int verify_checksum(const char *file, unsigned int crc) {
465   struct stat st;
466   int fd;
467   void *buf;
468   unsigned int actual;
469 
470   fd = open(file, O_RDONLY);
471   if (fd < 0)
472     return 0;
473 
474   if (fstat(fd, &st) < 0) {
475     close(fd);
476     return 0;
477   }
478 
479   buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
480   if (!buf) {
481     close(fd);
482     return 0;
483   }
484 
485   actual = gnu_debuglink_crc32(0, buf, st.st_size);
486 
487   munmap(buf, st.st_size);
488   close(fd);
489   return actual == crc;
490 }
491 
492 // Check if two filenames point to the same file, including hard or soft links.
same_file(char * a,const char * b)493 static bool same_file(char *a, const char *b)
494 {
495 	struct stat stat_a, stat_b;
496 
497 	if (stat(a, &stat_a) || stat(b, &stat_b))
498 		return false;
499 
500 	if ((stat_a.st_dev == stat_b.st_dev) &&
501 	    (stat_a.st_ino == stat_b.st_ino))
502 		return true;
503 	else
504 		return false;
505 }
506 
find_debug_via_debuglink(Elf * e,const char * binpath,int check_crc)507 static char *find_debug_via_debuglink(Elf *e, const char *binpath,
508                                       int check_crc) {
509   char fullpath[PATH_MAX];
510   char *tmppath;
511   char *bindir = NULL;
512   char *res = NULL;
513   unsigned int crc;
514   char *name;  // the name of the debuginfo file
515 
516   if (!find_debuglink(e, &name, &crc))
517     return NULL;
518 
519   tmppath = strdup(binpath);
520   bindir = dirname(tmppath);
521 
522   // Search for the file in 'binpath', but ignore the file we find if it
523   // matches the binary itself: the binary will always be probed later on,
524   // and it might contain poorer symbols (e.g. stripped or partial symbols)
525   // than the external debuginfo that might be available elsewhere.
526   snprintf(fullpath, sizeof(fullpath),"%s/%s", bindir, name);
527   if (same_file(fullpath, binpath) != true && access(fullpath, F_OK) != -1) {
528     res = strdup(fullpath);
529     goto DONE;
530   }
531 
532   // Search for the file in 'binpath'/.debug
533   snprintf(fullpath, sizeof(fullpath), "%s/.debug/%s", bindir, name);
534   if (access(fullpath, F_OK) != -1) {
535     res = strdup(fullpath);
536     goto DONE;
537   }
538 
539   // Search for the file in the global debug directory /usr/lib/debug/'binpath'
540   snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug%s/%s", bindir, name);
541   if (access(fullpath, F_OK) != -1) {
542     res = strdup(fullpath);
543     goto DONE;
544   }
545 
546 DONE:
547   free(tmppath);
548   if (res && check_crc && !verify_checksum(res, crc)) {
549     free(res);
550     return NULL;
551   }
552   return res;
553 }
554 
find_debug_via_buildid(Elf * e)555 static char *find_debug_via_buildid(Elf *e) {
556   char fullpath[PATH_MAX];
557   char buildid[128];  // currently 40 seems to be default, let's be safe
558 
559   if (!find_buildid(e, buildid))
560     return NULL;
561 
562   // Search for the file in the global debug directory with a sub-path:
563   //    mm/nnnnnn...nnnn.debug
564   // Where mm are the first two characters of the buildid, and nnnn are the
565   // rest of the build id, followed by .debug.
566   snprintf(fullpath, sizeof(fullpath), "/usr/lib/debug/.build-id/%c%c/%s.debug",
567           buildid[0], buildid[1], buildid + 2);
568   if (access(fullpath, F_OK) != -1) {
569     return strdup(fullpath);
570   }
571 
572   return NULL;
573 }
574 
find_debug_via_symfs(Elf * e,const char * path)575 static char *find_debug_via_symfs(Elf *e, const char* path) {
576   char fullpath[PATH_MAX];
577   char buildid[128];
578   char symfs_buildid[128];
579   int check_build_id;
580   char *symfs;
581   Elf *symfs_e = NULL;
582   int symfs_fd = -1;
583   char *result = NULL;
584 
585   symfs = getenv("BCC_SYMFS");
586   if (!symfs || !*symfs)
587     goto out;
588 
589   check_build_id = find_buildid(e, buildid);
590 
591   int ns_prefix_length = 0;
592   sscanf(path, "/proc/%*u/root/%n", &ns_prefix_length);
593   path += ns_prefix_length;
594 
595   snprintf(fullpath, sizeof(fullpath), "%s/%s", symfs, path);
596   if (access(fullpath, F_OK) == -1)
597     goto out;
598 
599   if (openelf(fullpath, &symfs_e, &symfs_fd) < 0) {
600     symfs_e = NULL;
601     symfs_fd = -1;
602     goto out;
603   }
604 
605   if (check_build_id) {
606     if (!find_buildid(symfs_e, symfs_buildid))
607       goto out;
608 
609     if (strncmp(buildid, symfs_buildid, sizeof(buildid)))
610       goto out;
611   }
612 
613   result = strdup(fullpath);
614 
615 out:
616   if (symfs_e) {
617     elf_end(symfs_e);
618   }
619 
620   if (symfs_fd != -1) {
621     close(symfs_fd);
622   }
623 
624   return result;
625 }
626 
627 #ifdef HAVE_LIBDEBUGINFOD
find_debug_via_debuginfod(Elf * e)628 static char *find_debug_via_debuginfod(Elf *e){
629   char buildid[128];
630   char *debugpath = NULL;
631   int fd = -1;
632 
633   if (!find_buildid(e, buildid))
634     return NULL;
635 
636   debuginfod_client *client = debuginfod_begin();
637   if (!client)
638     return NULL;
639 
640   // In case of an error, the function returns a negative error code and
641   // debugpath stays NULL.
642   fd = debuginfod_find_debuginfo(client, (const unsigned char *) buildid, 0,
643                                  &debugpath);
644   if (fd >= 0)
645     close(fd);
646 
647   debuginfod_end(client);
648   return debugpath;
649 }
650 #endif
651 
find_debug_file(Elf * e,const char * path,int check_crc)652 static char *find_debug_file(Elf* e, const char* path, int check_crc) {
653   char *debug_file = NULL;
654 
655   // If there is a separate debuginfo file, try to locate and read it, first
656   // using symfs, then using the build-id section, finally using the debuglink
657   // section. These rules are what perf and gdb follow.
658   // See:
659   // - https://github.com/torvalds/linux/blob/v5.2/tools/perf/Documentation/perf-report.txt#L325
660   // - https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
661   debug_file = find_debug_via_symfs(e, path);
662   if (!debug_file)
663     debug_file = find_debug_via_buildid(e);
664   if (!debug_file)
665     debug_file = find_debug_via_debuglink(e, path, check_crc);
666 #ifdef HAVE_LIBDEBUGINFOD
667   if (!debug_file)
668     debug_file = find_debug_via_debuginfod(e);
669 #endif
670 
671   return debug_file;
672 }
673 
674 
foreach_sym_core(const char * path,bcc_elf_symcb callback,bcc_elf_symcb_lazy callback_lazy,struct bcc_symbol_option * option,void * payload,int is_debug_file)675 static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
676                             bcc_elf_symcb_lazy callback_lazy,
677                             struct bcc_symbol_option *option, void *payload,
678                             int is_debug_file) {
679   Elf *e;
680   int fd, res;
681   char *debug_file;
682 
683   if (!option)
684     return -1;
685 
686   if (openelf(path, &e, &fd) < 0)
687     return -1;
688 
689   if (option->use_debug_file && !is_debug_file) {
690     // The is_debug_file argument helps avoid infinitely resolving debuginfo
691     // files for debuginfo files and so on.
692     debug_file = find_debug_file(e, path,
693                                  option->check_debug_file_crc);
694     if (debug_file) {
695       foreach_sym_core(debug_file, callback, callback_lazy, option, payload, 1);
696       free(debug_file);
697     }
698   }
699 
700   res = listsymbols(e, callback, callback_lazy, payload, option, is_debug_file);
701   elf_end(e);
702   close(fd);
703   return res;
704 }
705 
bcc_elf_foreach_sym(const char * path,bcc_elf_symcb callback,void * option,void * payload)706 int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
707                         void *option, void *payload) {
708   struct bcc_symbol_option *o = option;
709   o->lazy_symbolize = 0;
710   return foreach_sym_core(path, callback, NULL, o, payload, 0);
711 }
712 
bcc_elf_foreach_sym_lazy(const char * path,bcc_elf_symcb_lazy callback,void * option,void * payload)713 int bcc_elf_foreach_sym_lazy(const char *path, bcc_elf_symcb_lazy callback,
714                         void *option, void *payload) {
715   struct bcc_symbol_option *o = option;
716   o->lazy_symbolize = 1;
717   return foreach_sym_core(path, NULL, callback, o, payload, 0);
718 }
719 
bcc_elf_get_text_scn_info(const char * path,uint64_t * addr,uint64_t * offset)720 int bcc_elf_get_text_scn_info(const char *path, uint64_t *addr,
721 				   uint64_t *offset) {
722   Elf *e = NULL;
723   int fd = -1, err;
724   Elf_Scn *section = NULL;
725   GElf_Shdr header;
726   size_t stridx;
727   char *name;
728 
729   if ((err = openelf(path, &e, &fd)) < 0 ||
730       (err = elf_getshdrstrndx(e, &stridx)) < 0)
731     goto exit;
732 
733   err = -1;
734   while ((section = elf_nextscn(e, section)) != 0) {
735     if (!gelf_getshdr(section, &header))
736       continue;
737 
738     name = elf_strptr(e, stridx, header.sh_name);
739     if (name && !strcmp(name, ".text")) {
740       *addr = (uint64_t)header.sh_addr;
741       *offset = (uint64_t)header.sh_offset;
742       err = 0;
743       break;
744     }
745   }
746 
747 exit:
748   if (e)
749     elf_end(e);
750   if (fd >= 0)
751     close(fd);
752   return err;
753 }
754 
bcc_elf_foreach_load_section(const char * path,bcc_elf_load_sectioncb callback,void * payload)755 int bcc_elf_foreach_load_section(const char *path,
756                                  bcc_elf_load_sectioncb callback,
757                                  void *payload) {
758   Elf *e = NULL;
759   int fd = -1, err = -1, res;
760   size_t nhdrs, i;
761 
762   if (openelf(path, &e, &fd) < 0)
763     goto exit;
764 
765   if (elf_getphdrnum(e, &nhdrs) != 0)
766     goto exit;
767 
768   GElf_Phdr header;
769   for (i = 0; i < nhdrs; i++) {
770     if (!gelf_getphdr(e, (int)i, &header))
771       continue;
772     if (header.p_type != PT_LOAD || !(header.p_flags & PF_X))
773       continue;
774     res = callback(header.p_vaddr, header.p_memsz, header.p_offset, payload);
775     if (res < 0) {
776       err = 1;
777       goto exit;
778     }
779   }
780   err = 0;
781 
782 exit:
783   if (e)
784     elf_end(e);
785   if (fd >= 0)
786     close(fd);
787   return err;
788 }
789 
bcc_elf_get_type(const char * path)790 int bcc_elf_get_type(const char *path) {
791   Elf *e;
792   GElf_Ehdr hdr;
793   int fd;
794   void* res = NULL;
795 
796   if (openelf(path, &e, &fd) < 0)
797     return -1;
798 
799   res = (void*)gelf_getehdr(e, &hdr);
800   elf_end(e);
801   close(fd);
802 
803   if (!res)
804     return -1;
805   else
806     return hdr.e_type;
807 }
808 
bcc_elf_is_exe(const char * path)809 int bcc_elf_is_exe(const char *path) {
810   return (bcc_elf_get_type(path) != -1) && (access(path, X_OK) == 0);
811 }
812 
bcc_elf_is_shared_obj(const char * path)813 int bcc_elf_is_shared_obj(const char *path) {
814   return bcc_elf_get_type(path) == ET_DYN;
815 }
816 
bcc_elf_is_vdso(const char * name)817 int bcc_elf_is_vdso(const char *name) {
818   return strcmp(name, "[vdso]") == 0;
819 }
820 
821 // -2: Failed
822 // -1: Not initialized
823 // >0: Initialized
824 static int vdso_image_fd = -1;
825 
find_vdso(struct mod_info * info,int enter_ns,void * payload)826 static int find_vdso(struct mod_info *info, int enter_ns, void *payload) {
827   int fd;
828   char tmpfile[128];
829   if (!bcc_elf_is_vdso(info->name))
830     return 0;
831 
832   uint64_t sz = info->end_addr - info->start_addr;
833   void *image = malloc(sz);
834   if (!image)
835     goto on_error;
836   memcpy(image, (void *)info->start_addr, sz);
837 
838   snprintf(tmpfile, sizeof(tmpfile), "/tmp/bcc_%d_vdso_image_XXXXXX", getpid());
839   fd = mkostemp(tmpfile, O_CLOEXEC);
840   if (fd < 0) {
841     fprintf(stderr, "Unable to create temp file: %s\n", strerror(errno));
842     goto on_error;
843   }
844   // Unlink the file to avoid leaking
845   if (unlink(tmpfile) == -1)
846     fprintf(stderr, "Unlink %s failed: %s\n", tmpfile, strerror(errno));
847 
848   if (write(fd, image, sz) == -1) {
849     fprintf(stderr, "Failed to write to vDSO image: %s\n", strerror(errno));
850     close(fd);
851     goto on_error;
852   }
853   vdso_image_fd = fd;
854 
855 on_error:
856   if (image)
857     free(image);
858   // Always stop the iteration
859   return -1;
860 }
861 
bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback,void * payload)862 int bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback, void *payload) {
863   Elf *elf;
864   static struct bcc_symbol_option default_option = {
865     .use_debug_file = 0,
866     .check_debug_file_crc = 0,
867     .use_symbol_type = (1 << STT_FUNC) | (1 << STT_GNU_IFUNC)
868   };
869 
870   if (vdso_image_fd == -1) {
871     vdso_image_fd = -2;
872     bcc_procutils_each_module(getpid(), &find_vdso, NULL);
873   }
874   if (vdso_image_fd == -2)
875     return -1;
876 
877   if (openelf_fd(vdso_image_fd, &elf) == -1)
878     return -1;
879 
880   return listsymbols(elf, callback, NULL, payload, &default_option, 0);
881 }
882 
883 // return value: 0   : success
884 //               < 0 : error and no bcc lib found
885 //               > 0 : error and bcc lib found
bcc_free_memory_with_file(const char * path)886 static int bcc_free_memory_with_file(const char *path) {
887   unsigned long sym_addr = 0, sym_shndx;
888   Elf_Scn *section = NULL;
889   int fd = -1, err;
890   GElf_Shdr header;
891   Elf *e = NULL;
892 
893   if ((err = openelf(path, &e, &fd)) < 0)
894     goto exit;
895 
896   // get symbol address of "bcc_free_memory", which
897   // will be used to calculate runtime .text address
898   // range, esp. for shared libraries.
899   err = -1;
900   while ((section = elf_nextscn(e, section)) != 0) {
901     Elf_Data *data = NULL;
902     size_t symsize;
903 
904     if (!gelf_getshdr(section, &header))
905       continue;
906 
907     if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
908       continue;
909 
910     /* iterate all symbols */
911     symsize = header.sh_entsize;
912     while ((data = elf_getdata(section, data)) != 0) {
913       size_t i, symcount = data->d_size / symsize;
914 
915       for (i = 0; i < symcount; ++i) {
916         GElf_Sym sym;
917 
918         if (!gelf_getsym(data, (int)i, &sym))
919           continue;
920 
921         if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
922           continue;
923 
924         const char *name;
925         if ((name = elf_strptr(e, header.sh_link, sym.st_name)) == NULL)
926           continue;
927 
928         if (strcmp(name, "bcc_free_memory") == 0) {
929           sym_addr = sym.st_value;
930           sym_shndx = sym.st_shndx;
931           break;
932         }
933       }
934     }
935   }
936 
937   // Didn't find bcc_free_memory in the ELF file.
938   if (sym_addr == 0)
939     goto exit;
940 
941   int sh_idx = 0;
942   section = NULL;
943   err = 1;
944   while ((section = elf_nextscn(e, section)) != 0) {
945     sh_idx++;
946     if (!gelf_getshdr(section, &header))
947       continue;
948 
949     if (sh_idx == sym_shndx) {
950       unsigned long saddr, saddr_n, eaddr;
951       long page_size = sysconf(_SC_PAGESIZE);
952 
953       saddr = (unsigned long)bcc_free_memory - sym_addr + header.sh_addr;
954       eaddr = saddr + header.sh_size;
955 
956       // adjust saddr and eaddr, start addr needs to be page aligned
957       saddr_n = (saddr + page_size - 1) & ~(page_size - 1);
958       eaddr -= saddr_n - saddr;
959 
960       if (madvise((void *)saddr_n, eaddr - saddr_n, MADV_DONTNEED)) {
961         fprintf(stderr, "madvise failed, saddr %lx, eaddr %lx\n", saddr, eaddr);
962         goto exit;
963       }
964 
965       err = 0;
966       break;
967     }
968   }
969 
970 exit:
971   if (e)
972     elf_end(e);
973   if (fd >= 0)
974     close(fd);
975   return err;
976 }
977 
978 // Free bcc mmemory
979 //
980 // The main purpose of this function is to free llvm/clang text memory
981 // through madvise MADV_DONTNEED.
982 //
983 // bcc could be linked statically or dynamically into the application.
984 // If it is static linking, there is no easy way to know which region
985 // inside .text section belongs to llvm/clang, so the whole .text section
986 // is freed. Otherwise, the process map is searched to find libbcc.so
987 // library and the whole .text section for that shared library is
988 // freed.
989 //
990 // Note that the text memory used by bcc (mainly llvm/clang) is reclaimable
991 // in the kernel as it is file backed. But the reclaim process
992 // may take some time if no memory pressure. So this API is mostly
993 // used for application who needs to immediately lowers its RssFile
994 // metric right after loading BPF program.
bcc_free_memory()995 int bcc_free_memory() {
996   int err;
997 
998   // First try whether bcc is statically linked or not
999   err = bcc_free_memory_with_file("/proc/self/exe");
1000   if (err >= 0)
1001     return -err;
1002 
1003   // Not statically linked, let us find the libbcc.so
1004   FILE *maps = fopen("/proc/self/maps", "r");
1005   if (!maps)
1006     return -1;
1007 
1008   char *line = NULL;
1009   size_t size;
1010   while (getline(&line, &size, maps) > 0) {
1011     char *libbcc = strstr(line, "libbcc.so");
1012     if (!libbcc)
1013       continue;
1014 
1015     // Parse the line and get the full libbcc.so path
1016     unsigned long addr_start, addr_end, offset, inode;
1017     int path_start = 0, path_end = 0;
1018     unsigned int devmajor, devminor;
1019     char perms[8];
1020     if (sscanf(line, "%lx-%lx %7s %lx %x:%x %lu %n%*[^\n]%n",
1021                &addr_start, &addr_end, perms, &offset,
1022                &devmajor, &devminor, &inode,
1023                &path_start, &path_end) < 7)
1024        break;
1025 
1026     // Free the text in the bcc dynamic library.
1027     char libbcc_path[4096];
1028     memcpy(libbcc_path, line + path_start, path_end - path_start);
1029     libbcc_path[path_end - path_start] = '\0';
1030     err = bcc_free_memory_with_file(libbcc_path);
1031     err = (err <= 0) ? err : -err;
1032   }
1033 
1034   fclose(maps);
1035   free(line);
1036   return err;
1037 }
1038 
bcc_elf_get_buildid(const char * path,char * buildid)1039 int bcc_elf_get_buildid(const char *path, char *buildid)
1040 {
1041   Elf *e;
1042   int fd;
1043 
1044   if (openelf(path, &e, &fd) < 0)
1045     return -1;
1046 
1047   if (!find_buildid(e, buildid))
1048     return -1;
1049 
1050   return 0;
1051 }
1052 
bcc_elf_symbol_str(const char * path,size_t section_idx,size_t str_table_idx,char * out,size_t len,int debugfile)1053 int bcc_elf_symbol_str(const char *path, size_t section_idx,
1054                        size_t str_table_idx, char *out, size_t len,
1055                        int debugfile)
1056 {
1057   Elf *e = NULL, *d = NULL;
1058   int fd = -1, dfd = -1, err = 0;
1059   const char *name;
1060   char *debug_file = NULL;
1061 
1062   if (!out)
1063     return -1;
1064 
1065   if (openelf(path, &e, &fd) < 0)
1066     return -1;
1067 
1068   if (debugfile) {
1069     debug_file = find_debug_file(e, path, 0);
1070     if (!debug_file) {
1071       err = -1;
1072       goto exit;
1073     }
1074 
1075     if (openelf(debug_file, &d, &dfd) < 0) {
1076       err = -1;
1077       goto exit;
1078     }
1079 
1080     if ((name = elf_strptr(d, section_idx, str_table_idx)) == NULL) {
1081       err = -1;
1082       goto exit;
1083     }
1084   } else {
1085     if ((name = elf_strptr(e, section_idx, str_table_idx)) == NULL) {
1086       err = -1;
1087       goto exit;
1088     }
1089   }
1090 
1091   strncpy(out, name, len);
1092 
1093 exit:
1094   if (debug_file)
1095     free(debug_file);
1096   if (e)
1097     elf_end(e);
1098   if (d)
1099     elf_end(d);
1100   if (fd >= 0)
1101     close(fd);
1102   if (dfd >= 0)
1103     close(dfd);
1104   return err;
1105 }
1106 
1107 #if 0
1108 #include <stdio.h>
1109 
1110 int main(int argc, char *argv[])
1111 {
1112   uint64_t addr;
1113   if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0)
1114     return -1;
1115 
1116   printf("%s: %p\n", argv[2], (void *)addr);
1117   return 0;
1118 }
1119 #endif
1120