• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2    Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3    This file is part of elfutils.
4 
5    This file is free software; you can redistribute it and/or modify
6    it under the terms of either
7 
8      * the GNU Lesser General Public License as published by the Free
9        Software Foundation; either version 3 of the License, or (at
10        your option) any later version
11 
12    or
13 
14      * the GNU General Public License as published by the Free
15        Software Foundation; either version 2 of the License, or (at
16        your option) any later version
17 
18    or both in parallel, as here.
19 
20    elfutils is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received copies of the GNU General Public License and
26    the GNU Lesser General Public License along with this program.  If
27    not, see <http://www.gnu.org/licenses/>.  */
28 
29 /* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
30    Everything we need here is fine if its declarations just come first.  */
31 /* Some makefiles, e.g. HOST_linux-x86.mk, predefine _FILE_OFFSET_BITS.  */
32 #undef _FILE_OFFSET_BITS
33 #include <fts.h>
34 
35 #include <config.h>
36 
37 #include "libdwflP.h"
38 #include <inttypes.h>
39 #include <errno.h>
40 #include <stdio.h>
41 #include <stdio_ext.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <sys/utsname.h>
45 #include <fcntl.h>
46 #include <unistd.h>
47 
48 /* Since fts.h is included before config.h, its indirect inclusions may not
49    give us the right LFS aliases of these functions, so map them manually.  */
50 #ifdef _FILE_OFFSET_BITS
51 #define open open64
52 #define fopen fopen64
53 #endif
54 
55 
56 #define KERNEL_MODNAME	"kernel"
57 
58 #define MODULEDIRFMT	"/lib/modules/%s"
59 
60 #define KNOTESFILE	"/sys/kernel/notes"
61 #define	MODNOTESFMT	"/sys/module/%s/notes"
62 #define KSYMSFILE	"/proc/kallsyms"
63 #define MODULELIST	"/proc/modules"
64 #define	SECADDRDIRFMT	"/sys/module/%s/sections/"
65 #define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
66 
67 
68 static const char *vmlinux_suffixes[] =
69   {
70     ".gz",
71 #ifdef USE_BZLIB
72     ".bz2",
73 #endif
74 #ifdef USE_LZMA
75     ".xz",
76 #endif
77   };
78 
79 /* Try to open the given file as it is or under the debuginfo directory.  */
80 static int
try_kernel_name(Dwfl * dwfl,char ** fname,bool try_debug)81 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
82 {
83   if (*fname == NULL)
84     return -1;
85 
86   /* Don't bother trying *FNAME itself here if the path will cause it to be
87      tried because we give its own basename as DEBUGLINK_FILE.  */
88   int fd = ((((dwfl->callbacks->debuginfo_path
89 	       ? *dwfl->callbacks->debuginfo_path : NULL)
90 	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
91 	    : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
92 
93   if (fd < 0)
94     {
95       Dwfl_Module fakemod = { .dwfl = dwfl };
96       /* First try the file's unadorned basename as DEBUGLINK_FILE,
97 	 to look for "vmlinux" files.  */
98       fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
99 						 *fname, basename (*fname), 0,
100 						 &fakemod.debug.name);
101       if (fd < 0 && try_debug)
102 	/* Next, let the call use the default of basename + ".debug",
103 	   to look for "vmlinux.debug" files.  */
104 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
105 						   *fname, NULL, 0,
106 						   &fakemod.debug.name);
107       if (fakemod.debug.name != NULL)
108 	{
109 	  free (*fname);
110 	  *fname = fakemod.debug.name;
111 	}
112     }
113 
114   if (fd < 0)
115     for (size_t i = 0;
116 	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
117 	 ++i)
118       {
119 	char *zname;
120 	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
121 	  {
122 	    fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
123 	    if (fd < 0)
124 	      free (zname);
125 	    else
126 	      {
127 		free (*fname);
128 		*fname = zname;
129 	      }
130 	  }
131       }
132 
133   if (fd < 0)
134     {
135       free (*fname);
136       *fname = NULL;
137     }
138 
139   return fd;
140 }
141 
142 static inline const char *
kernel_release(void)143 kernel_release (void)
144 {
145   /* Cache the `uname -r` string we'll use.  */
146   static struct utsname utsname;
147   if (utsname.release[0] == '\0' && uname (&utsname) != 0)
148     return NULL;
149   return utsname.release;
150 }
151 
152 static int
find_kernel_elf(Dwfl * dwfl,const char * release,char ** fname)153 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
154 {
155   if ((release[0] == '/'
156        ? asprintf (fname, "%s/vmlinux", release)
157        : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
158     return -1;
159 
160   int fd = try_kernel_name (dwfl, fname, true);
161   if (fd < 0 && release[0] != '/')
162     {
163       free (*fname);
164       if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
165 	return -1;
166       fd = try_kernel_name (dwfl, fname, true);
167     }
168 
169   return fd;
170 }
171 
172 static int
get_release(Dwfl * dwfl,const char ** release)173 get_release (Dwfl *dwfl, const char **release)
174 {
175   if (dwfl == NULL)
176     return -1;
177 
178   const char *release_string = release == NULL ? NULL : *release;
179   if (release_string == NULL)
180     {
181       release_string = kernel_release ();
182       if (release_string == NULL)
183 	return errno;
184       if (release != NULL)
185 	*release = release_string;
186     }
187 
188   return 0;
189 }
190 
191 static int
report_kernel(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))192 report_kernel (Dwfl *dwfl, const char **release,
193 	       int (*predicate) (const char *module, const char *file))
194 {
195   int result = get_release (dwfl, release);
196   if (unlikely (result != 0))
197     return result;
198 
199   char *fname;
200   int fd = find_kernel_elf (dwfl, *release, &fname);
201 
202   if (fd < 0)
203     result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
204 	      ? 0 : errno ?: ENOENT);
205   else
206     {
207       bool report = true;
208 
209       if (predicate != NULL)
210 	{
211 	  /* Let the predicate decide whether to use this one.  */
212 	  int want = (*predicate) (KERNEL_MODNAME, fname);
213 	  if (want < 0)
214 	    result = errno;
215 	  report = want > 0;
216 	}
217 
218       if (report)
219 	{
220 	  /* Note that on some architectures (e.g. x86_64) the vmlinux
221 	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
222 	     In both cases the phdr p_vaddr load address will be non-zero.
223 	     We want the image to be placed as if it was ET_DYN, so
224 	     pass true for add_p_vaddr which will do the right thing
225 	     (in combination with a zero base) in either case.  */
226 	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
227 						      fname, fd, 0, true);
228 	  if (mod == NULL)
229 	    result = -1;
230 	  else
231 	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
232 	    mod->e_type = ET_DYN;
233 	}
234 
235       free (fname);
236 
237       if (!report || result < 0)
238 	close (fd);
239     }
240 
241   return result;
242 }
243 
244 /* Look for a kernel debug archive.  If we find one, report all its modules.
245    If not, return ENOENT.  */
246 static int
report_kernel_archive(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))247 report_kernel_archive (Dwfl *dwfl, const char **release,
248 		       int (*predicate) (const char *module, const char *file))
249 {
250   int result = get_release (dwfl, release);
251   if (unlikely (result != 0))
252     return result;
253 
254   char *archive;
255   int res = (((*release)[0] == '/')
256 	     ? asprintf (&archive, "%s/debug.a", *release)
257 	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
258   if (unlikely (res < 0))
259     return ENOMEM;
260 
261   int fd = try_kernel_name (dwfl, &archive, false);
262   if (fd < 0)
263     result = errno ?: ENOENT;
264   else
265     {
266       /* We have the archive file open!  */
267       Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
268 						    true, predicate);
269       if (unlikely (last == NULL))
270 	result = -1;
271       else
272 	{
273 	  /* Find the kernel and move it to the head of the list.  */
274 	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
275 	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
276 	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
277 	      {
278 		*prevp = m->next;
279 		m->next = *tailp;
280 		*tailp = m;
281 		break;
282 	      }
283 	}
284     }
285 
286   free (archive);
287   return result;
288 }
289 
290 static size_t
check_suffix(const FTSENT * f,size_t namelen)291 check_suffix (const FTSENT *f, size_t namelen)
292 {
293 #define TRY(sfx)							\
294   if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
295        : f->fts_namelen >= sizeof sfx)					\
296       && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
297 		  sfx, sizeof sfx))					\
298     return sizeof sfx - 1
299 
300   TRY (".ko");
301   TRY (".ko.gz");
302 #if USE_BZLIB
303   TRY (".ko.bz2");
304 #endif
305 #if USE_LZMA
306   TRY (".ko.xz");
307 #endif
308 
309   return 0;
310 
311 #undef	TRY
312 }
313 
314 /* Report a kernel and all its modules found on disk, for offline use.
315    If RELEASE starts with '/', it names a directory to look in;
316    if not, it names a directory to find under /lib/modules/;
317    if null, /lib/modules/`uname -r` is used.
318    Returns zero on success, -1 if dwfl_report_module failed,
319    or an errno code if finding the files on disk failed.  */
320 int
dwfl_linux_kernel_report_offline(Dwfl * dwfl,const char * release,int (* predicate)(const char * module,const char * file))321 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
322 				  int (*predicate) (const char *module,
323 						    const char *file))
324 {
325   int result = report_kernel_archive (dwfl, &release, predicate);
326   if (result != ENOENT)
327     return result;
328 
329   /* First report the kernel.  */
330   result = report_kernel (dwfl, &release, predicate);
331   if (result == 0)
332     {
333       /* Do "find /lib/modules/RELEASE -name *.ko".  */
334 
335       char *modulesdir[] = { NULL, NULL };
336       if (release[0] == '/')
337 	modulesdir[0] = (char *) release;
338       else
339 	{
340 	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
341 	    return errno;
342 	}
343 
344       FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
345       if (modulesdir[0] == (char *) release)
346 	modulesdir[0] = NULL;
347       if (fts == NULL)
348 	{
349 	  free (modulesdir[0]);
350 	  return errno;
351 	}
352 
353       FTSENT *f;
354       while ((f = fts_read (fts)) != NULL)
355 	{
356 	  /* Skip a "source" subtree, which tends to be large.
357 	     This insane hard-coding of names is what depmod does too.  */
358 	  if (f->fts_namelen == sizeof "source" - 1
359 	      && !strcmp (f->fts_name, "source"))
360 	    {
361 	      fts_set (fts, f, FTS_SKIP);
362 	      continue;
363 	    }
364 
365 	  switch (f->fts_info)
366 	    {
367 	    case FTS_F:
368 	    case FTS_SL:
369 	    case FTS_NSOK:;
370 	      /* See if this file name matches "*.ko".  */
371 	      const size_t suffix = check_suffix (f, 0);
372 	      if (suffix)
373 		{
374 		  /* We have a .ko file to report.  Following the algorithm
375 		     by which the kernel makefiles set KBUILD_MODNAME, we
376 		     replace all ',' or '-' with '_' in the file name and
377 		     call that the module name.  Modules could well be
378 		     built using different embedded names than their file
379 		     names.  To handle that, we would have to look at the
380 		     __this_module.name contents in the module's text.  */
381 
382 		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
383 		  if (unlikely (name == NULL))
384 		    {
385 		      __libdwfl_seterrno (DWFL_E_NOMEM);
386 		      result = -1;
387 		      break;
388 		    }
389 		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
390 		    if (name[i] == '-' || name[i] == ',')
391 		      name[i] = '_';
392 
393 		  if (predicate != NULL)
394 		    {
395 		      /* Let the predicate decide whether to use this one.  */
396 		      int want = (*predicate) (name, f->fts_path);
397 		      if (want < 0)
398 			{
399 			  result = -1;
400 			  free (name);
401 			  break;
402 			}
403 		      if (!want)
404 			{
405 			  free (name);
406 			  continue;
407 			}
408 		    }
409 
410 		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
411 		    {
412 		      free (name);
413 		      result = -1;
414 		      break;
415 		    }
416 		  free (name);
417 		}
418 	      continue;
419 
420 	    case FTS_ERR:
421 	    case FTS_DNR:
422 	    case FTS_NS:
423 	      result = f->fts_errno;
424 	      break;
425 
426 	    case FTS_SLNONE:
427 	    default:
428 	      continue;
429 	    }
430 
431 	  /* We only get here in error cases.  */
432 	  break;
433 	}
434       fts_close (fts);
435       free (modulesdir[0]);
436     }
437 
438   return result;
439 }
440 INTDEF (dwfl_linux_kernel_report_offline)
441 
442 
443 /* State of read_address used by intuit_kernel_bounds. */
444 struct read_address_state {
445   FILE *f;
446   char *line;
447   size_t linesz;
448   size_t n;
449   char *p;
450   const char *type;
451 };
452 
453 static inline bool
read_address(struct read_address_state * state,Dwarf_Addr * addr)454 read_address (struct read_address_state *state, Dwarf_Addr *addr)
455 {
456   if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
457       state->line[state->n - 2] == ']')
458     return false;
459   *addr = strtoull (state->line, &state->p, 16);
460   state->p += strspn (state->p, " \t");
461   state->type = strsep (&state->p, " \t\n");
462   if (state->type == NULL)
463     return false;
464   return state->p != NULL && state->p != state->line;
465 }
466 
467 
468 /* Grovel around to guess the bounds of the runtime kernel image.  */
469 static int
intuit_kernel_bounds(Dwarf_Addr * start,Dwarf_Addr * end,Dwarf_Addr * notes)470 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
471 {
472   struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
473 
474   state.f = fopen (KSYMSFILE, "r");
475   if (state.f == NULL)
476     return errno;
477 
478   (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
479 
480   *notes = 0;
481 
482   int result;
483   do
484     result = read_address (&state, start) ? 0 : -1;
485   while (result == 0 && strchr ("TtRr", *state.type) == NULL);
486 
487   if (result == 0)
488     {
489       *end = *start;
490       while (read_address (&state, end))
491 	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
492 	  *notes = *end;
493 
494       Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
495       *start &= -(Dwarf_Addr) round_kernel;
496       *end += round_kernel - 1;
497       *end &= -(Dwarf_Addr) round_kernel;
498       if (*start >= *end || *end - *start < round_kernel)
499 	result = -1;
500     }
501   free (state.line);
502 
503   if (result == -1)
504     result = ferror_unlocked (state.f) ? errno : ENOEXEC;
505 
506   fclose (state.f);
507 
508   return result;
509 }
510 
511 
512 /* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
513 static int
check_notes(Dwfl_Module * mod,const char * notesfile,Dwarf_Addr vaddr,const char * secname)514 check_notes (Dwfl_Module *mod, const char *notesfile,
515 	     Dwarf_Addr vaddr, const char *secname)
516 {
517   int fd = open (notesfile, O_RDONLY);
518   if (fd < 0)
519     return 1;
520 
521   assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
522   assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
523   union
524   {
525     GElf_Nhdr nhdr;
526     unsigned char data[8192];
527   } buf;
528 
529   ssize_t n = read (fd, buf.data, sizeof buf);
530   close (fd);
531 
532   if (n <= 0)
533     return 1;
534 
535   unsigned char *p = buf.data;
536   while (p < &buf.data[n])
537     {
538       /* No translation required since we are reading the native kernel.  */
539       GElf_Nhdr *nhdr = (void *) p;
540       p += sizeof *nhdr;
541       unsigned char *name = p;
542       p += (nhdr->n_namesz + 3) & -4U;
543       unsigned char *bits = p;
544       p += (nhdr->n_descsz + 3) & -4U;
545 
546       if (p <= &buf.data[n]
547 	  && nhdr->n_type == NT_GNU_BUILD_ID
548 	  && nhdr->n_namesz == sizeof "GNU"
549 	  && !memcmp (name, "GNU", sizeof "GNU"))
550 	{
551 	  /* Found it.  For a module we must figure out its VADDR now.  */
552 
553 	  if (secname != NULL
554 	      && (INTUSE(dwfl_linux_kernel_module_section_address)
555 		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
556 		  || vaddr == (GElf_Addr) -1l))
557 	    vaddr = 0;
558 
559 	  if (vaddr != 0)
560 	    vaddr += bits - buf.data;
561 	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
562 						      nhdr->n_descsz, vaddr);
563 	}
564     }
565 
566   return 0;
567 }
568 
569 /* Look for a build ID for the kernel.  */
570 static int
check_kernel_notes(Dwfl_Module * kernelmod,GElf_Addr vaddr)571 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
572 {
573   return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
574 }
575 
576 /* Look for a build ID for a loaded kernel module.  */
577 static int
check_module_notes(Dwfl_Module * mod)578 check_module_notes (Dwfl_Module *mod)
579 {
580   char *dirs[2] = { NULL, NULL };
581   if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
582     return ENOMEM;
583 
584   FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
585   if (fts == NULL)
586     {
587       free (dirs[0]);
588       return 0;
589     }
590 
591   int result = 0;
592   FTSENT *f;
593   while ((f = fts_read (fts)) != NULL)
594     {
595       switch (f->fts_info)
596 	{
597 	case FTS_F:
598 	case FTS_SL:
599 	case FTS_NSOK:
600 	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
601 	  if (result > 0)	/* Nothing found.  */
602 	    {
603 	      result = 0;
604 	      continue;
605 	    }
606 	  break;
607 
608 	case FTS_ERR:
609 	case FTS_DNR:
610 	  result = f->fts_errno;
611 	  break;
612 
613 	case FTS_NS:
614 	case FTS_SLNONE:
615 	default:
616 	  continue;
617 	}
618 
619       /* We only get here when finished or in error cases.  */
620       break;
621     }
622   fts_close (fts);
623   free (dirs[0]);
624 
625   return result;
626 }
627 
628 int
dwfl_linux_kernel_report_kernel(Dwfl * dwfl)629 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
630 {
631   Dwarf_Addr start = 0;
632   Dwarf_Addr end = 0;
633 
634   #define report() \
635     (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
636 
637   /* This is a bit of a kludge.  If we already reported the kernel,
638      don't bother figuring it out again--it never changes.  */
639   for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
640     if (!strcmp (m->name, KERNEL_MODNAME))
641       {
642 	start = m->low_addr;
643 	end = m->high_addr;
644 	return report () == NULL ? -1 : 0;
645       }
646 
647   /* Try to figure out the bounds of the kernel image without
648      looking for any vmlinux file.  */
649   Dwarf_Addr notes;
650   /* The compiler cannot deduce that if intuit_kernel_bounds returns
651      zero NOTES will be initialized.  Fake the initialization.  */
652   asm ("" : "=m" (notes));
653   int result = intuit_kernel_bounds (&start, &end, &notes);
654   if (result == 0)
655     {
656       Dwfl_Module *mod = report ();
657       return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
658     }
659   if (result != ENOENT)
660     return result;
661 
662   /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
663   return report_kernel (dwfl, NULL, NULL);
664 }
INTDEF(dwfl_linux_kernel_report_kernel)665 INTDEF (dwfl_linux_kernel_report_kernel)
666 
667 
668 static inline bool
669 subst_name (char from, char to,
670             const char * const module_name,
671             char * const alternate_name,
672             const size_t namelen)
673 {
674   const char *n = memchr (module_name, from, namelen);
675   if (n == NULL)
676     return false;
677   char *a = mempcpy (alternate_name, module_name, n - module_name);
678   *a++ = to;
679   ++n;
680   const char *p;
681   while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
682     {
683       a = mempcpy (a, n, p - n);
684       *a++ = to;
685       n = p + 1;
686     }
687   memcpy (a, n, namelen - (n - module_name) + 1);
688   return true;
689 }
690 
691 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
692 
693 int
dwfl_linux_kernel_find_elf(Dwfl_Module * mod,void ** userdata,const char * module_name,Dwarf_Addr base,char ** file_name,Elf ** elfp)694 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
695 			    void **userdata __attribute__ ((unused)),
696 			    const char *module_name,
697 			    Dwarf_Addr base __attribute__ ((unused)),
698 			    char **file_name, Elf **elfp)
699 {
700   if (mod->build_id_len > 0)
701     {
702       int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
703 					       file_name, elfp);
704       if (fd >= 0 || mod->main.elf != NULL || errno != 0)
705 	return fd;
706     }
707 
708   const char *release = kernel_release ();
709   if (release == NULL)
710     return errno;
711 
712   if (!strcmp (module_name, KERNEL_MODNAME))
713     return find_kernel_elf (mod->dwfl, release, file_name);
714 
715   /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
716 
717   char *modulesdir[] = { NULL, NULL };
718   if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
719     return -1;
720 
721   FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
722   if (fts == NULL)
723     {
724       free (modulesdir[0]);
725       return -1;
726     }
727 
728   size_t namelen = strlen (module_name);
729 
730   /* This is a kludge.  There is no actual necessary relationship between
731      the name of the .ko file installed and the module name the kernel
732      knows it by when it's loaded.  The kernel's only idea of the module
733      name comes from the name embedded in the object's magic
734      .gnu.linkonce.this_module section.
735 
736      In practice, these module names match the .ko file names except for
737      some using '_' and some using '-'.  So our cheap kludge is to look for
738      two files when either a '_' or '-' appears in a module name, one using
739      only '_' and one only using '-'.  */
740 
741   char *alternate_name = malloc (namelen + 1);
742   if (unlikely (alternate_name == NULL))
743     {
744       free (modulesdir[0]);
745       return ENOMEM;
746     }
747   if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
748       !subst_name ('_', '-', module_name, alternate_name, namelen))
749     alternate_name[0] = '\0';
750 
751   FTSENT *f;
752   int error = ENOENT;
753   while ((f = fts_read (fts)) != NULL)
754     {
755       /* Skip a "source" subtree, which tends to be large.
756 	 This insane hard-coding of names is what depmod does too.  */
757       if (f->fts_namelen == sizeof "source" - 1
758 	  && !strcmp (f->fts_name, "source"))
759 	{
760 	  fts_set (fts, f, FTS_SKIP);
761 	  continue;
762 	}
763 
764       error = ENOENT;
765       switch (f->fts_info)
766 	{
767 	case FTS_F:
768 	case FTS_SL:
769 	case FTS_NSOK:
770 	  /* See if this file name is "MODULE_NAME.ko".  */
771 	  if (check_suffix (f, namelen)
772 	      && (!memcmp (f->fts_name, module_name, namelen)
773 		  || !memcmp (f->fts_name, alternate_name, namelen)))
774 	    {
775 	      int fd = open (f->fts_accpath, O_RDONLY);
776 	      *file_name = strdup (f->fts_path);
777 	      fts_close (fts);
778 	      free (modulesdir[0]);
779 	      free (alternate_name);
780 	      if (fd < 0)
781 		free (*file_name);
782 	      else if (*file_name == NULL)
783 		{
784 		  close (fd);
785 		  fd = -1;
786 		}
787 	      return fd;
788 	    }
789 	  break;
790 
791 	case FTS_ERR:
792 	case FTS_DNR:
793 	case FTS_NS:
794 	  error = f->fts_errno;
795 	  break;
796 
797 	case FTS_SLNONE:
798 	default:
799 	  break;
800 	}
801     }
802 
803   fts_close (fts);
804   free (modulesdir[0]);
805   free (alternate_name);
806   errno = error;
807   return -1;
808 }
INTDEF(dwfl_linux_kernel_find_elf)809 INTDEF (dwfl_linux_kernel_find_elf)
810 
811 
812 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
813    We read the information from /sys/module directly.  */
814 
815 int
816 dwfl_linux_kernel_module_section_address
817 (Dwfl_Module *mod __attribute__ ((unused)),
818  void **userdata __attribute__ ((unused)),
819  const char *modname, Dwarf_Addr base __attribute__ ((unused)),
820  const char *secname, Elf32_Word shndx __attribute__ ((unused)),
821  const GElf_Shdr *shdr __attribute__ ((unused)),
822  Dwarf_Addr *addr)
823 {
824   char *sysfile;
825   if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
826     return DWARF_CB_ABORT;
827 
828   FILE *f = fopen (sysfile, "r");
829   free (sysfile);
830 
831   if (f == NULL)
832     {
833       if (errno == ENOENT)
834 	{
835 	  /* The .modinfo and .data.percpu sections are never kept
836 	     loaded in the kernel.  If the kernel was compiled without
837 	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
838 	     actually loaded at all.
839 
840 	     Setting *ADDR to -1 tells the caller this section is
841 	     actually absent from memory.  */
842 
843 	  if (!strcmp (secname, ".modinfo")
844 	      || !strcmp (secname, ".data.percpu")
845 	      || !strncmp (secname, ".exit", 5))
846 	    {
847 	      *addr = (Dwarf_Addr) -1l;
848 	      return DWARF_CB_OK;
849 	    }
850 
851 	  /* The goofy PPC64 module_frob_arch_sections function tweaks
852 	     the section names as a way to control other kernel code's
853 	     behavior, and this cruft leaks out into the /sys information.
854 	     The file name for ".init*" may actually look like "_init*".  */
855 
856 	  const bool is_init = !strncmp (secname, ".init", 5);
857 	  if (is_init)
858 	    {
859 	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
860 			    modname, &secname[1]) < 0)
861 		return ENOMEM;
862 	      f = fopen (sysfile, "r");
863 	      free (sysfile);
864 	      if (f != NULL)
865 		goto ok;
866 	    }
867 
868 	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
869 	     In case that size increases in the future, look for longer
870 	     truncated names first.  */
871 	  size_t namelen = strlen (secname);
872 	  if (namelen >= MODULE_SECT_NAME_LEN)
873 	    {
874 	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
875 				  modname, secname);
876 	      if (len < 0)
877 		return DWARF_CB_ABORT;
878 	      char *end = sysfile + len;
879 	      do
880 		{
881 		  *--end = '\0';
882 		  f = fopen (sysfile, "r");
883 		  if (is_init && f == NULL && errno == ENOENT)
884 		    {
885 		      sysfile[len - namelen] = '_';
886 		      f = fopen (sysfile, "r");
887 		      sysfile[len - namelen] = '.';
888 		    }
889 		}
890 	      while (f == NULL && errno == ENOENT
891 		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
892 	      free (sysfile);
893 
894 	      if (f != NULL)
895 		goto ok;
896 	    }
897 	}
898 
899       return DWARF_CB_ABORT;
900     }
901 
902  ok:
903   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
904 
905   int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
906 		: ferror_unlocked (f) ? errno : ENOEXEC);
907   fclose (f);
908 
909   if (result == 0)
910     return DWARF_CB_OK;
911 
912   errno = result;
913   return DWARF_CB_ABORT;
914 }
INTDEF(dwfl_linux_kernel_module_section_address)915 INTDEF (dwfl_linux_kernel_module_section_address)
916 
917 int
918 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
919 {
920   FILE *f = fopen (MODULELIST, "r");
921   if (f == NULL)
922     return errno;
923 
924   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
925 
926   int result = 0;
927   Dwarf_Addr modaddr;
928   unsigned long int modsz;
929   char modname[128];
930   char *line = NULL;
931   size_t linesz = 0;
932   /* We can't just use fscanf here because it's not easy to distinguish \n
933      from other whitespace so as to take the optional word following the
934      address but always stop at the end of the line.  */
935   while (getline (&line, &linesz, f) > 0
936 	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
937 		    modname, &modsz, &modaddr) == 3)
938     {
939       Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
940 						     modaddr, modaddr + modsz);
941       if (mod == NULL)
942 	{
943 	  result = -1;
944 	  break;
945 	}
946 
947       result = check_module_notes (mod);
948     }
949   free (line);
950 
951   if (result == 0)
952     result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
953 
954   fclose (f);
955 
956   return result;
957 }
958 INTDEF (dwfl_linux_kernel_report_modules)
959