• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2    Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3    This file is part of elfutils.
4 
5    This file is free software; you can redistribute it and/or modify
6    it under the terms of either
7 
8      * the GNU Lesser General Public License as published by the Free
9        Software Foundation; either version 3 of the License, or (at
10        your option) any later version
11 
12    or
13 
14      * the GNU General Public License as published by the Free
15        Software Foundation; either version 2 of the License, or (at
16        your option) any later version
17 
18    or both in parallel, as here.
19 
20    elfutils is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received copies of the GNU General Public License and
26    the GNU Lesser General Public License along with this program.  If
27    not, see <http://www.gnu.org/licenses/>.  */
28 
29 /* In case we have a bad fts we include this before config.h because it
30    can't handle _FILE_OFFSET_BITS.
31    Everything we need here is fine if its declarations just come first.
32    Also, include sys/types.h before fts. On some systems fts.h is not self
33    contained. */
34 #ifdef BAD_FTS
35   #include <sys/types.h>
36   #include <fts.h>
37 #endif
38 
39 #include <config.h>
40 #include <system.h>
41 
42 #include "libelfP.h"
43 #include "libdwflP.h"
44 #include <inttypes.h>
45 #include <errno.h>
46 #include <stdio.h>
47 #include <stdio_ext.h>
48 #include <string.h>
49 #include <stdlib.h>
50 #include <sys/utsname.h>
51 #include <fcntl.h>
52 #include <unistd.h>
53 
54 /* If fts.h is included before config.h, its indirect inclusions may not
55    give us the right LFS aliases of these functions, so map them manually.  */
56 #ifdef BAD_FTS
57   #ifdef _FILE_OFFSET_BITS
58     #define open open64
59     #define fopen fopen64
60   #endif
61 #else
62   #include <sys/types.h>
63   #include <fts.h>
64 #endif
65 
66 
67 #define KERNEL_MODNAME	"kernel"
68 
69 #define MODULEDIRFMT	"/lib/modules/%s"
70 
71 #define KNOTESFILE	"/sys/kernel/notes"
72 #define	MODNOTESFMT	"/sys/module/%s/notes"
73 #define KSYMSFILE	"/proc/kallsyms"
74 #define MODULELIST	"/proc/modules"
75 #define	SECADDRDIRFMT	"/sys/module/%s/sections/"
76 #define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
77 
78 
79 static const char *vmlinux_suffixes[] =
80   {
81     ".gz",
82 #ifdef USE_BZLIB
83     ".bz2",
84 #endif
85 #ifdef USE_LZMA
86     ".xz",
87 #endif
88   };
89 
90 /* Try to open the given file as it is or under the debuginfo directory.  */
91 static int
try_kernel_name(Dwfl * dwfl,char ** fname,bool try_debug)92 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
93 {
94   if (*fname == NULL)
95     return -1;
96 
97   /* Don't bother trying *FNAME itself here if the path will cause it to be
98      tried because we give its own basename as DEBUGLINK_FILE.  */
99   int fd = ((((dwfl->callbacks->debuginfo_path
100 	       ? *dwfl->callbacks->debuginfo_path : NULL)
101 	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
102 	    : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
103 
104   if (fd < 0)
105     {
106       Dwfl_Module fakemod = { .dwfl = dwfl };
107 
108       if (try_debug)
109 	/* Passing NULL for DEBUGLINK_FILE searches for both the basenamer
110 	   "vmlinux" and the default of basename + ".debug", to look for
111 	   "vmlinux.debug" files.  */
112 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
113 						   *fname, NULL, 0,
114 						   &fakemod.debug.name);
115       else
116 	/* Try the file's unadorned basename as DEBUGLINK_FILE,
117 	   to look only for "vmlinux" files.  */
118 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
119 						   *fname, basename (*fname),
120 						   0, &fakemod.debug.name);
121 
122       if (fakemod.debug.name != NULL)
123 	{
124 	  free (*fname);
125 	  *fname = fakemod.debug.name;
126 	}
127     }
128 
129   if (fd < 0)
130     for (size_t i = 0;
131 	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0] && fd < 0;
132 	 ++i)
133       {
134 	char *zname;
135 	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
136 	  {
137 	    fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
138 	    if (fd < 0)
139 	      free (zname);
140 	    else
141 	      {
142 		free (*fname);
143 		*fname = zname;
144 	      }
145 	  }
146       }
147 
148   if (fd < 0)
149     {
150       free (*fname);
151       *fname = NULL;
152     }
153 
154   return fd;
155 }
156 
157 static inline const char *
kernel_release(void)158 kernel_release (void)
159 {
160 #ifdef __linux__
161   /* Cache the `uname -r` string we'll use.  */
162   static struct utsname utsname;
163   if (utsname.release[0] == '\0' && uname (&utsname) != 0)
164     return NULL;
165   return utsname.release;
166 #else
167   /* Used for finding the running linux kernel, which isn't supported
168      on non-linux kernel systems.  */
169   errno = ENOTSUP;
170   return NULL;
171 #endif
172 }
173 
174 static int
find_kernel_elf(Dwfl * dwfl,const char * release,char ** fname)175 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
176 {
177   /* First try to find an uncompressed vmlinux image.  Possibly
178      including debuginfo.  */
179   if (release == NULL
180       || ((release[0] == '/'
181 	   ? asprintf (fname, "%s/vmlinux", release)
182 	   : asprintf (fname, "/boot/vmlinux-%s", release)) < 0))
183     return -1;
184 
185   int fd = try_kernel_name (dwfl, fname, true);
186   if (fd < 0 && release[0] != '/')
187     {
188       free (*fname);
189       if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
190 	return -1;
191       fd = try_kernel_name (dwfl, fname, true);
192     }
193 
194   /* There might be a compressed vmlinuz image.  Probably without
195      debuginfo, but try to find it under the debug path also, just in
196      case.  */
197   if (fd < 0)
198     {
199       free (*fname);
200       if ((release[0] == '/'
201            ? asprintf (fname, "%s/vmlinuz", release)
202            : asprintf (fname, "/boot/vmlinuz-%s", release)) < 0)
203         return -1;
204 
205       fd = try_kernel_name (dwfl, fname, true);
206       if (fd < 0 && release[0] != '/')
207 	{
208 	  free (*fname);
209 	  if (asprintf (fname, MODULEDIRFMT "/vmlinuz", release) < 0)
210 	    return -1;
211 	  fd = try_kernel_name (dwfl, fname, true);
212 	}
213     }
214 
215   return fd;
216 }
217 
218 static int
get_release(Dwfl * dwfl,const char ** release)219 get_release (Dwfl *dwfl, const char **release)
220 {
221   if (dwfl == NULL)
222     return -1;
223 
224   const char *release_string = release == NULL ? NULL : *release;
225   if (release_string == NULL)
226     {
227       release_string = kernel_release ();
228       if (release_string == NULL)
229 	return errno;
230       if (release != NULL)
231 	*release = release_string;
232     }
233 
234   return 0;
235 }
236 
237 static int
report_kernel(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))238 report_kernel (Dwfl *dwfl, const char **release,
239 	       int (*predicate) (const char *module, const char *file))
240 {
241   int result = get_release (dwfl, release);
242   if (unlikely (result != 0))
243     return result;
244 
245   if (release == NULL || *release == NULL)
246     return EINVAL;
247 
248   char *fname;
249   int fd = find_kernel_elf (dwfl, *release, &fname);
250 
251   if (fd < 0)
252     result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
253 	      ? 0 : errno ?: ENOENT);
254   else
255     {
256       bool report = true;
257 
258       if (predicate != NULL)
259 	{
260 	  /* Let the predicate decide whether to use this one.  */
261 	  int want = (*predicate) (KERNEL_MODNAME, fname);
262 	  if (want < 0)
263 	    result = errno;
264 	  report = want > 0;
265 	}
266 
267       if (report)
268 	{
269 	  /* Note that on some architectures (e.g. x86_64) the vmlinux
270 	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
271 	     In both cases the phdr p_vaddr load address will be non-zero.
272 	     We want the image to be placed as if it was ET_DYN, so
273 	     pass true for add_p_vaddr which will do the right thing
274 	     (in combination with a zero base) in either case.  */
275 	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
276 						      fname, fd, 0, true);
277 	  if (mod == NULL)
278 	    result = -1;
279 	  else
280 	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
281 	    mod->e_type = ET_DYN;
282 	}
283 
284       free (fname);
285 
286       if (!report || result < 0)
287 	close (fd);
288     }
289 
290   return result;
291 }
292 
293 /* Look for a kernel debug archive.  If we find one, report all its modules.
294    If not, return ENOENT.  */
295 static int
report_kernel_archive(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))296 report_kernel_archive (Dwfl *dwfl, const char **release,
297 		       int (*predicate) (const char *module, const char *file))
298 {
299   int result = get_release (dwfl, release);
300   if (unlikely (result != 0))
301     return result;
302 
303   if (release == NULL || *release == NULL)
304     return EINVAL;
305 
306   char *archive;
307   int res = (((*release)[0] == '/')
308 	     ? asprintf (&archive, "%s/debug.a", *release)
309 	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
310   if (unlikely (res < 0))
311     return ENOMEM;
312 
313   int fd = try_kernel_name (dwfl, &archive, false);
314   if (fd < 0)
315     result = errno ?: ENOENT;
316   else
317     {
318       /* We have the archive file open!  */
319       Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
320 						    true, predicate);
321       if (unlikely (last == NULL))
322 	result = -1;
323       else
324 	{
325 	  /* Find the kernel and move it to the head of the list.  */
326 	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
327 	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
328 	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
329 	      {
330 		*prevp = m->next;
331 		m->next = *tailp;
332 		*tailp = m;
333 		break;
334 	      }
335 	}
336     }
337 
338   free (archive);
339   return result;
340 }
341 
342 static size_t
check_suffix(const FTSENT * f,size_t namelen)343 check_suffix (const FTSENT *f, size_t namelen)
344 {
345 #define TRY(sfx)							\
346   if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
347        : f->fts_namelen >= sizeof sfx)					\
348       && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
349 		  sfx, sizeof sfx))					\
350     return sizeof sfx - 1
351 
352   TRY (".ko");
353   TRY (".ko.gz");
354 #if USE_BZLIB
355   TRY (".ko.bz2");
356 #endif
357 #if USE_LZMA
358   TRY (".ko.xz");
359 #endif
360 #if USE_ZSTD
361   TRY (".ko.zst");
362 #endif
363 
364   return 0;
365 
366 #undef	TRY
367 }
368 
369 /* Report a kernel and all its modules found on disk, for offline use.
370    If RELEASE starts with '/', it names a directory to look in;
371    if not, it names a directory to find under /lib/modules/;
372    if null, /lib/modules/`uname -r` is used.
373    Returns zero on success, -1 if dwfl_report_module failed,
374    or an errno code if finding the files on disk failed.  */
375 int
dwfl_linux_kernel_report_offline(Dwfl * dwfl,const char * release,int (* predicate)(const char * module,const char * file))376 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
377 				  int (*predicate) (const char *module,
378 						    const char *file))
379 {
380   int result = report_kernel_archive (dwfl, &release, predicate);
381   if (result != ENOENT)
382     return result;
383 
384   /* First report the kernel.  */
385   result = report_kernel (dwfl, &release, predicate);
386   if (result == 0)
387     {
388       /* Do "find /lib/modules/RELEASE -name *.ko".  */
389 
390       char *modulesdir[] = { NULL, NULL };
391       if (release[0] == '/')
392 	modulesdir[0] = (char *) release;
393       else
394 	{
395 	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
396 	    return errno;
397 	}
398 
399       FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
400       if (modulesdir[0] == (char *) release)
401 	modulesdir[0] = NULL;
402       if (fts == NULL)
403 	{
404 	  free (modulesdir[0]);
405 	  return errno;
406 	}
407 
408       FTSENT *f;
409       while ((f = fts_read (fts)) != NULL)
410 	{
411 	  /* Skip a "source" subtree, which tends to be large.
412 	     This insane hard-coding of names is what depmod does too.  */
413 	  if (f->fts_namelen == sizeof "source" - 1
414 	      && !strcmp (f->fts_name, "source"))
415 	    {
416 	      fts_set (fts, f, FTS_SKIP);
417 	      continue;
418 	    }
419 
420 	  switch (f->fts_info)
421 	    {
422 	    case FTS_F:
423 	    case FTS_SL:
424 	    case FTS_NSOK:;
425 	      /* See if this file name matches "*.ko".  */
426 	      const size_t suffix = check_suffix (f, 0);
427 	      if (suffix)
428 		{
429 		  /* We have a .ko file to report.  Following the algorithm
430 		     by which the kernel makefiles set KBUILD_MODNAME, we
431 		     replace all ',' or '-' with '_' in the file name and
432 		     call that the module name.  Modules could well be
433 		     built using different embedded names than their file
434 		     names.  To handle that, we would have to look at the
435 		     __this_module.name contents in the module's text.  */
436 
437 		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
438 		  if (unlikely (name == NULL))
439 		    {
440 		      __libdwfl_seterrno (DWFL_E_NOMEM);
441 		      result = -1;
442 		      break;
443 		    }
444 		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
445 		    if (name[i] == '-' || name[i] == ',')
446 		      name[i] = '_';
447 
448 		  if (predicate != NULL)
449 		    {
450 		      /* Let the predicate decide whether to use this one.  */
451 		      int want = (*predicate) (name, f->fts_path);
452 		      if (want < 0)
453 			{
454 			  result = -1;
455 			  free (name);
456 			  break;
457 			}
458 		      if (!want)
459 			{
460 			  free (name);
461 			  continue;
462 			}
463 		    }
464 
465 		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
466 		    {
467 		      free (name);
468 		      result = -1;
469 		      break;
470 		    }
471 		  free (name);
472 		}
473 	      continue;
474 
475 	    case FTS_ERR:
476 	    case FTS_DNR:
477 	    case FTS_NS:
478 	      result = f->fts_errno;
479 	      break;
480 
481 	    case FTS_SLNONE:
482 	    default:
483 	      continue;
484 	    }
485 
486 	  /* We only get here in error cases.  */
487 	  break;
488 	}
489       fts_close (fts);
490       free (modulesdir[0]);
491     }
492 
493   return result;
494 }
495 INTDEF (dwfl_linux_kernel_report_offline)
496 
497 
498 /* State of read_address used by intuit_kernel_bounds. */
499 struct read_address_state {
500   FILE *f;
501   char *line;
502   size_t linesz;
503   size_t n;
504   char *p;
505   const char *type;
506 };
507 
508 static inline bool
read_address(struct read_address_state * state,Dwarf_Addr * addr)509 read_address (struct read_address_state *state, Dwarf_Addr *addr)
510 {
511   if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
512       state->line[state->n - 2] == ']')
513     return false;
514   *addr = strtoull (state->line, &state->p, 16);
515   state->p += strspn (state->p, " \t");
516   state->type = strsep (&state->p, " \t\n");
517   if (state->type == NULL)
518     return false;
519   return state->p != NULL && state->p != state->line;
520 }
521 
522 
523 /* Grovel around to guess the bounds of the runtime kernel image.  */
524 static int
intuit_kernel_bounds(Dwarf_Addr * start,Dwarf_Addr * end,Dwarf_Addr * notes)525 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
526 {
527   struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
528 
529   *notes = 0;
530 
531   state.f = fopen (KSYMSFILE, "r");
532   if (state.f == NULL)
533     return errno;
534 
535   (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
536 
537   int result;
538   do
539     result = read_address (&state, start) ? 0 : -1;
540   while (result == 0 && strchr ("TtRr", *state.type) == NULL);
541 
542   if (result == 0)
543     {
544       Dwarf_Addr addr;
545       *end = *start;
546       while (read_address (&state, &addr) && addr >= *end)
547 	{
548 	  *end = addr;
549 	  if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
550 	    *notes = *end;
551 	}
552 
553       Dwarf_Addr round_kernel = sysconf (_SC_PAGESIZE);
554       *start &= -(Dwarf_Addr) round_kernel;
555       *end += round_kernel - 1;
556       *end &= -(Dwarf_Addr) round_kernel;
557       if (*start >= *end || *end - *start < round_kernel)
558 	result = -1;
559     }
560   free (state.line);
561 
562   if (result == -1)
563     result = ferror_unlocked (state.f) ? errno : ENOEXEC;
564 
565   fclose (state.f);
566 
567   return result;
568 }
569 
570 
571 /* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
572 static int
check_notes(Dwfl_Module * mod,const char * notesfile,Dwarf_Addr vaddr,const char * secname)573 check_notes (Dwfl_Module *mod, const char *notesfile,
574 	     Dwarf_Addr vaddr, const char *secname)
575 {
576   int fd = open (notesfile, O_RDONLY);
577   if (fd < 0)
578     return 1;
579 
580   assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
581   assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
582   union
583   {
584     GElf_Nhdr nhdr;
585     unsigned char data[8192];
586   } buf;
587 
588   ssize_t n = read (fd, buf.data, sizeof buf);
589   close (fd);
590 
591   if (n <= 0)
592     return 1;
593 
594   unsigned char *p = buf.data;
595   size_t len = 0;
596   while (p < &buf.data[n])
597     {
598       /* No translation required since we are reading the native kernel.  */
599       GElf_Nhdr *nhdr = (void *) p;
600       len += sizeof *nhdr;
601       p += len;
602       unsigned char *name = p;
603       unsigned char *bits;
604       /* This is somewhat ugly, GNU Property notes use different padding,
605 	 but all we have is the file content, so we have to actually check
606 	 the name and type.  */
607       if (nhdr->n_type == NT_GNU_PROPERTY_TYPE_0
608           && nhdr->n_namesz == sizeof "GNU"
609           && name + nhdr->n_namesz < &buf.data[n]
610           && !memcmp (name, "GNU", sizeof "GNU"))
611 	{
612 	  len += nhdr->n_namesz;
613 	  len = NOTE_ALIGN8 (len);
614 	  p = buf.data + len;
615 	  bits = p;
616 	  len += nhdr->n_descsz;
617 	  len = NOTE_ALIGN8 (len);
618 	  p = buf.data + len;
619 	}
620       else
621 	{
622 	  len += nhdr->n_namesz;
623 	  len = NOTE_ALIGN4 (len);
624 	  p = buf.data + len;
625 	  bits = p;
626 	  len += nhdr->n_descsz;
627 	  len = NOTE_ALIGN4 (len);
628 	  p = buf.data + len;
629 	}
630 
631       if (p <= &buf.data[n]
632 	  && nhdr->n_type == NT_GNU_BUILD_ID
633 	  && nhdr->n_namesz == sizeof "GNU"
634 	  && !memcmp (name, "GNU", sizeof "GNU"))
635 	{
636 	  /* Found it.  For a module we must figure out its VADDR now.  */
637 
638 	  if (secname != NULL
639 	      && (INTUSE(dwfl_linux_kernel_module_section_address)
640 		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
641 		  || vaddr == (GElf_Addr) -1l))
642 	    vaddr = 0;
643 
644 	  if (vaddr != 0)
645 	    vaddr += bits - buf.data;
646 	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
647 						      nhdr->n_descsz, vaddr);
648 	}
649     }
650 
651   return 0;
652 }
653 
654 /* Look for a build ID for the kernel.  */
655 static int
check_kernel_notes(Dwfl_Module * kernelmod,GElf_Addr vaddr)656 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
657 {
658   return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
659 }
660 
661 /* Look for a build ID for a loaded kernel module.  */
662 static int
check_module_notes(Dwfl_Module * mod)663 check_module_notes (Dwfl_Module *mod)
664 {
665   char *dirs[2] = { NULL, NULL };
666   if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
667     return ENOMEM;
668 
669   FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
670   if (fts == NULL)
671     {
672       free (dirs[0]);
673       return 0;
674     }
675 
676   int result = 0;
677   FTSENT *f;
678   while ((f = fts_read (fts)) != NULL)
679     {
680       switch (f->fts_info)
681 	{
682 	case FTS_F:
683 	case FTS_SL:
684 	case FTS_NSOK:
685 	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
686 	  if (result > 0)	/* Nothing found.  */
687 	    {
688 	      result = 0;
689 	      continue;
690 	    }
691 	  break;
692 
693 	case FTS_ERR:
694 	case FTS_DNR:
695 	  result = f->fts_errno;
696 	  break;
697 
698 	case FTS_NS:
699 	case FTS_SLNONE:
700 	default:
701 	  continue;
702 	}
703 
704       /* We only get here when finished or in error cases.  */
705       break;
706     }
707   fts_close (fts);
708   free (dirs[0]);
709 
710   return result;
711 }
712 
713 int
dwfl_linux_kernel_report_kernel(Dwfl * dwfl)714 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
715 {
716   Dwarf_Addr start = 0;
717   Dwarf_Addr end = 0;
718 
719   #define report() \
720     (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
721 
722   /* This is a bit of a kludge.  If we already reported the kernel,
723      don't bother figuring it out again--it never changes.  */
724   for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
725     if (!strcmp (m->name, KERNEL_MODNAME))
726       {
727 	start = m->low_addr;
728 	end = m->high_addr;
729 	return report () == NULL ? -1 : 0;
730       }
731 
732   /* Try to figure out the bounds of the kernel image without
733      looking for any vmlinux file.  */
734   Dwarf_Addr notes;
735   int result = intuit_kernel_bounds (&start, &end, &notes);
736   if (result == 0)
737     {
738       Dwfl_Module *mod = report ();
739       return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
740     }
741   if (result != ENOENT)
742     return result;
743 
744   /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
745   return report_kernel (dwfl, NULL, NULL);
746 }
INTDEF(dwfl_linux_kernel_report_kernel)747 INTDEF (dwfl_linux_kernel_report_kernel)
748 
749 
750 static inline bool
751 subst_name (char from, char to,
752             const char * const module_name,
753             char * const alternate_name,
754             const size_t namelen)
755 {
756   const char *n = memchr (module_name, from, namelen);
757   if (n == NULL)
758     return false;
759   char *a = mempcpy (alternate_name, module_name, n - module_name);
760   *a++ = to;
761   ++n;
762   const char *p;
763   while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
764     {
765       a = mempcpy (a, n, p - n);
766       *a++ = to;
767       n = p + 1;
768     }
769   memcpy (a, n, namelen - (n - module_name) + 1);
770   return true;
771 }
772 
773 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
774 
775 int
dwfl_linux_kernel_find_elf(Dwfl_Module * mod,void ** userdata,const char * module_name,Dwarf_Addr base,char ** file_name,Elf ** elfp)776 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
777 			    void **userdata __attribute__ ((unused)),
778 			    const char *module_name,
779 			    Dwarf_Addr base __attribute__ ((unused)),
780 			    char **file_name, Elf **elfp)
781 {
782   if (mod->build_id_len > 0)
783     {
784       int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
785 					       file_name, elfp);
786       if (fd >= 0 || mod->main.elf != NULL || errno != 0)
787 	return fd;
788     }
789 
790   const char *release = kernel_release ();
791   if (release == NULL)
792     return errno;
793 
794   if (!strcmp (module_name, KERNEL_MODNAME))
795     return find_kernel_elf (mod->dwfl, release, file_name);
796 
797   /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
798 
799   char *modulesdir[] = { NULL, NULL };
800   if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
801     return -1;
802 
803   FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
804   if (fts == NULL)
805     {
806       free (modulesdir[0]);
807       return -1;
808     }
809 
810   size_t namelen = strlen (module_name);
811 
812   /* This is a kludge.  There is no actual necessary relationship between
813      the name of the .ko file installed and the module name the kernel
814      knows it by when it's loaded.  The kernel's only idea of the module
815      name comes from the name embedded in the object's magic
816      .gnu.linkonce.this_module section.
817 
818      In practice, these module names match the .ko file names except for
819      some using '_' and some using '-'.  So our cheap kludge is to look for
820      two files when either a '_' or '-' appears in a module name, one using
821      only '_' and one only using '-'.  */
822 
823   char *alternate_name = malloc (namelen + 1);
824   if (unlikely (alternate_name == NULL))
825     {
826       free (modulesdir[0]);
827       return ENOMEM;
828     }
829   if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
830       !subst_name ('_', '-', module_name, alternate_name, namelen))
831     alternate_name[0] = '\0';
832 
833   FTSENT *f;
834   int error = ENOENT;
835   while ((f = fts_read (fts)) != NULL)
836     {
837       /* Skip a "source" subtree, which tends to be large.
838 	 This insane hard-coding of names is what depmod does too.  */
839       if (f->fts_namelen == sizeof "source" - 1
840 	  && !strcmp (f->fts_name, "source"))
841 	{
842 	  fts_set (fts, f, FTS_SKIP);
843 	  continue;
844 	}
845 
846       error = ENOENT;
847       switch (f->fts_info)
848 	{
849 	case FTS_F:
850 	case FTS_SL:
851 	case FTS_NSOK:
852 	  /* See if this file name is "MODULE_NAME.ko".  */
853 	  if (check_suffix (f, namelen)
854 	      && (!memcmp (f->fts_name, module_name, namelen)
855 		  || !memcmp (f->fts_name, alternate_name, namelen)))
856 	    {
857 	      int fd = open (f->fts_accpath, O_RDONLY);
858 	      *file_name = strdup (f->fts_path);
859 	      fts_close (fts);
860 	      free (modulesdir[0]);
861 	      free (alternate_name);
862 	      if (fd < 0)
863 		free (*file_name);
864 	      else if (*file_name == NULL)
865 		{
866 		  close (fd);
867 		  fd = -1;
868 		}
869 	      return fd;
870 	    }
871 	  break;
872 
873 	case FTS_ERR:
874 	case FTS_DNR:
875 	case FTS_NS:
876 	  error = f->fts_errno;
877 	  break;
878 
879 	case FTS_SLNONE:
880 	default:
881 	  break;
882 	}
883     }
884 
885   fts_close (fts);
886   free (modulesdir[0]);
887   free (alternate_name);
888   errno = error;
889   return -1;
890 }
INTDEF(dwfl_linux_kernel_find_elf)891 INTDEF (dwfl_linux_kernel_find_elf)
892 
893 
894 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
895    We read the information from /sys/module directly.  */
896 
897 int
898 dwfl_linux_kernel_module_section_address
899 (Dwfl_Module *mod __attribute__ ((unused)),
900  void **userdata __attribute__ ((unused)),
901  const char *modname, Dwarf_Addr base __attribute__ ((unused)),
902  const char *secname, Elf32_Word shndx __attribute__ ((unused)),
903  const GElf_Shdr *shdr __attribute__ ((unused)),
904  Dwarf_Addr *addr)
905 {
906   char *sysfile;
907   if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
908     return DWARF_CB_ABORT;
909 
910   FILE *f = fopen (sysfile, "r");
911   free (sysfile);
912 
913   if (f == NULL)
914     {
915       if (errno == ENOENT)
916 	{
917 	  /* The .modinfo and .data.percpu sections are never kept
918 	     loaded in the kernel.  If the kernel was compiled without
919 	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
920 	     actually loaded at all.
921 
922 	     Setting *ADDR to -1 tells the caller this section is
923 	     actually absent from memory.  */
924 
925 	  if (!strcmp (secname, ".modinfo")
926 	      || !strcmp (secname, ".data.percpu")
927 	      || !strncmp (secname, ".exit", 5))
928 	    {
929 	      *addr = (Dwarf_Addr) -1l;
930 	      return DWARF_CB_OK;
931 	    }
932 
933 	  /* The goofy PPC64 module_frob_arch_sections function tweaks
934 	     the section names as a way to control other kernel code's
935 	     behavior, and this cruft leaks out into the /sys information.
936 	     The file name for ".init*" may actually look like "_init*".  */
937 
938 	  const bool is_init = !strncmp (secname, ".init", 5);
939 	  if (is_init)
940 	    {
941 	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
942 			    modname, &secname[1]) < 0)
943 		return ENOMEM;
944 	      f = fopen (sysfile, "r");
945 	      free (sysfile);
946 	      if (f != NULL)
947 		goto ok;
948 	    }
949 
950 	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
951 	     In case that size increases in the future, look for longer
952 	     truncated names first.  */
953 	  size_t namelen = strlen (secname);
954 	  if (namelen >= MODULE_SECT_NAME_LEN)
955 	    {
956 	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
957 				  modname, secname);
958 	      if (len < 0)
959 		return DWARF_CB_ABORT;
960 	      char *end = sysfile + len;
961 	      do
962 		{
963 		  *--end = '\0';
964 		  f = fopen (sysfile, "r");
965 		  if (is_init && f == NULL && errno == ENOENT)
966 		    {
967 		      sysfile[len - namelen] = '_';
968 		      f = fopen (sysfile, "r");
969 		      sysfile[len - namelen] = '.';
970 		    }
971 		}
972 	      while (f == NULL && errno == ENOENT
973 		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
974 	      free (sysfile);
975 
976 	      if (f != NULL)
977 		goto ok;
978 	    }
979 	}
980 
981       return DWARF_CB_ABORT;
982     }
983 
984  ok:
985   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
986 
987   int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
988 		: ferror_unlocked (f) ? errno : ENOEXEC);
989   fclose (f);
990 
991   if (result == 0)
992     return DWARF_CB_OK;
993 
994   errno = result;
995   return DWARF_CB_ABORT;
996 }
INTDEF(dwfl_linux_kernel_module_section_address)997 INTDEF (dwfl_linux_kernel_module_section_address)
998 
999 int
1000 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
1001 {
1002   FILE *f = fopen (MODULELIST, "r");
1003   if (f == NULL)
1004     return errno;
1005 
1006   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
1007 
1008   int result = 0;
1009   Dwarf_Addr modaddr;
1010   unsigned long int modsz;
1011   char modname[128];
1012   char *line = NULL;
1013   size_t linesz = 0;
1014   /* We can't just use fscanf here because it's not easy to distinguish \n
1015      from other whitespace so as to take the optional word following the
1016      address but always stop at the end of the line.  */
1017   while (getline (&line, &linesz, f) > 0
1018 	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
1019 		    modname, &modsz, &modaddr) == 3)
1020     {
1021       Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
1022 						     modaddr, modaddr + modsz);
1023       if (mod == NULL)
1024 	{
1025 	  result = -1;
1026 	  break;
1027 	}
1028 
1029       result = check_module_notes (mod);
1030     }
1031   free (line);
1032 
1033   if (result == 0)
1034     result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
1035 
1036   fclose (f);
1037 
1038   return result;
1039 }
1040 INTDEF (dwfl_linux_kernel_report_modules)
1041