1 //===-- llvm-nm.cpp - Symbol table dumping utility for llvm ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This program is a utility that works like traditional Unix "nm", that is, it
10 // prints out the names of symbols in a bitcode or object file, along with some
11 // information about each symbol.
12 //
13 // This "nm" supports many of the features of GNU "nm", including its different
14 // output formats.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/BinaryFormat/COFF.h"
20 #include "llvm/Demangle/Demangle.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/Object/Archive.h"
24 #include "llvm/Object/COFF.h"
25 #include "llvm/Object/COFFImportFile.h"
26 #include "llvm/Object/ELFObjectFile.h"
27 #include "llvm/Object/IRObjectFile.h"
28 #include "llvm/Object/MachO.h"
29 #include "llvm/Object/MachOUniversal.h"
30 #include "llvm/Object/ObjectFile.h"
31 #include "llvm/Object/TapiFile.h"
32 #include "llvm/Object/TapiUniversal.h"
33 #include "llvm/Object/Wasm.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Format.h"
37 #include "llvm/Support/InitLLVM.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Program.h"
40 #include "llvm/Support/Signals.h"
41 #include "llvm/Support/TargetSelect.h"
42 #include "llvm/Support/WithColor.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include <vector>
45
46 using namespace llvm;
47 using namespace object;
48
49 namespace {
50 enum OutputFormatTy { bsd, sysv, posix, darwin };
51
52 cl::OptionCategory NMCat("llvm-nm Options");
53
54 cl::opt<OutputFormatTy> OutputFormat(
55 "format", cl::desc("Specify output format"),
56 cl::values(clEnumVal(bsd, "BSD format"), clEnumVal(sysv, "System V format"),
57 clEnumVal(posix, "POSIX.2 format"),
58 clEnumVal(darwin, "Darwin -m format")),
59 cl::init(bsd), cl::cat(NMCat));
60 cl::alias OutputFormat2("f", cl::desc("Alias for --format"),
61 cl::aliasopt(OutputFormat));
62
63 cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input files>"),
64 cl::ZeroOrMore);
65
66 cl::opt<bool> UndefinedOnly("undefined-only",
67 cl::desc("Show only undefined symbols"),
68 cl::cat(NMCat));
69 cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
70 cl::aliasopt(UndefinedOnly), cl::Grouping);
71
72 cl::opt<bool> DynamicSyms("dynamic",
73 cl::desc("Display the dynamic symbols instead "
74 "of normal symbols."),
75 cl::cat(NMCat));
76 cl::alias DynamicSyms2("D", cl::desc("Alias for --dynamic"),
77 cl::aliasopt(DynamicSyms), cl::Grouping);
78
79 cl::opt<bool> DefinedOnly("defined-only", cl::desc("Show only defined symbols"),
80 cl::cat(NMCat));
81 cl::alias DefinedOnly2("U", cl::desc("Alias for --defined-only"),
82 cl::aliasopt(DefinedOnly), cl::Grouping);
83
84 cl::opt<bool> ExternalOnly("extern-only",
85 cl::desc("Show only external symbols"),
86 cl::ZeroOrMore, cl::cat(NMCat));
87 cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
88 cl::aliasopt(ExternalOnly), cl::Grouping,
89 cl::ZeroOrMore);
90
91 cl::opt<bool> NoWeakSymbols("no-weak", cl::desc("Show only non-weak symbols"),
92 cl::cat(NMCat));
93 cl::alias NoWeakSymbols2("W", cl::desc("Alias for --no-weak"),
94 cl::aliasopt(NoWeakSymbols), cl::Grouping);
95
96 cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"), cl::Grouping,
97 cl::cat(NMCat));
98 cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
99 cl::Grouping, cl::cat(NMCat));
100 cl::alias Portability("portability", cl::desc("Alias for --format=posix"),
101 cl::aliasopt(POSIXFormat), cl::NotHidden);
102 cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
103 cl::Grouping, cl::cat(NMCat));
104
105 static cl::list<std::string>
106 ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
107 cl::ZeroOrMore, cl::cat(NMCat));
108 bool ArchAll = false;
109
110 cl::opt<bool> PrintFileName(
111 "print-file-name",
112 cl::desc("Precede each symbol with the object file it came from"),
113 cl::cat(NMCat));
114
115 cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
116 cl::aliasopt(PrintFileName), cl::Grouping);
117 cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
118 cl::aliasopt(PrintFileName), cl::Grouping);
119
120 cl::opt<bool> DebugSyms("debug-syms",
121 cl::desc("Show all symbols, even debugger only"),
122 cl::cat(NMCat));
123 cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
124 cl::aliasopt(DebugSyms), cl::Grouping);
125
126 cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"),
127 cl::cat(NMCat));
128 cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
129 cl::aliasopt(NumericSort), cl::Grouping);
130 cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
131 cl::aliasopt(NumericSort), cl::Grouping);
132
133 cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"),
134 cl::cat(NMCat));
135 cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort),
136 cl::Grouping);
137
138 cl::opt<bool> Demangle("demangle", cl::ZeroOrMore,
139 cl::desc("Demangle C++ symbol names"), cl::cat(NMCat));
140 cl::alias DemangleC("C", cl::desc("Alias for --demangle"),
141 cl::aliasopt(Demangle), cl::Grouping);
142 cl::opt<bool> NoDemangle("no-demangle", cl::init(false), cl::ZeroOrMore,
143 cl::desc("Don't demangle symbol names"),
144 cl::cat(NMCat));
145
146 cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"),
147 cl::cat(NMCat));
148 cl::alias ReverseSortr("r", cl::desc("Alias for --reverse-sort"),
149 cl::aliasopt(ReverseSort), cl::Grouping);
150
151 cl::opt<bool> PrintSize("print-size",
152 cl::desc("Show symbol size as well as address"),
153 cl::cat(NMCat));
154 cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
155 cl::aliasopt(PrintSize), cl::Grouping);
156 bool MachOPrintSizeWarning = false;
157
158 cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"),
159 cl::cat(NMCat));
160
161 cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
162 cl::desc("Exclude aliases from output"),
163 cl::cat(NMCat));
164
165 cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"),
166 cl::cat(NMCat));
167 cl::alias ArchiveMaps("M", cl::desc("Alias for --print-armap"),
168 cl::aliasopt(ArchiveMap), cl::Grouping);
169
170 enum Radix { d, o, x };
171 cl::opt<Radix>
172 AddressRadix("radix", cl::desc("Radix (o/d/x) for printing symbol Values"),
173 cl::values(clEnumVal(d, "decimal"), clEnumVal(o, "octal"),
174 clEnumVal(x, "hexadecimal")),
175 cl::init(x), cl::cat(NMCat));
176 cl::alias RadixAlias("t", cl::desc("Alias for --radix"),
177 cl::aliasopt(AddressRadix));
178
179 cl::opt<bool> JustSymbolName("just-symbol-name",
180 cl::desc("Print just the symbol's name"),
181 cl::cat(NMCat));
182 cl::alias JustSymbolNames("j", cl::desc("Alias for --just-symbol-name"),
183 cl::aliasopt(JustSymbolName), cl::Grouping);
184
185 cl::opt<bool>
186 SpecialSyms("special-syms",
187 cl::desc("Do not filter special symbols from the output"),
188 cl::cat(NMCat));
189
190 cl::list<std::string> SegSect("s", cl::multi_val(2), cl::ZeroOrMore,
191 cl::value_desc("segment section"), cl::Hidden,
192 cl::desc("Dump only symbols from this segment "
193 "and section name, Mach-O only"),
194 cl::cat(NMCat));
195
196 cl::opt<bool> FormatMachOasHex("x",
197 cl::desc("Print symbol entry in hex, "
198 "Mach-O only"),
199 cl::Grouping, cl::cat(NMCat));
200 cl::opt<bool> AddDyldInfo("add-dyldinfo",
201 cl::desc("Add symbols from the dyldinfo not already "
202 "in the symbol table, Mach-O only"),
203 cl::cat(NMCat));
204 cl::opt<bool> NoDyldInfo("no-dyldinfo",
205 cl::desc("Don't add any symbols from the dyldinfo, "
206 "Mach-O only"),
207 cl::cat(NMCat));
208 cl::opt<bool> DyldInfoOnly("dyldinfo-only",
209 cl::desc("Show only symbols from the dyldinfo, "
210 "Mach-O only"),
211 cl::cat(NMCat));
212
213 cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
214 cl::desc("Disable LLVM bitcode reader"),
215 cl::cat(NMCat));
216
217 cl::opt<bool> AddInlinedInfo("add-inlinedinfo",
218 cl::desc("Add symbols from the inlined libraries, "
219 "TBD(Mach-O) only"),
220 cl::cat(NMCat));
221
222 cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
223
224 bool PrintAddress = true;
225
226 bool MultipleFiles = false;
227
228 bool HadError = false;
229
230 std::string ToolName;
231 } // anonymous namespace
232
error(Twine Message,Twine Path=Twine ())233 static void error(Twine Message, Twine Path = Twine()) {
234 HadError = true;
235 WithColor::error(errs(), ToolName) << Path << ": " << Message << ".\n";
236 }
237
error(std::error_code EC,Twine Path=Twine ())238 static bool error(std::error_code EC, Twine Path = Twine()) {
239 if (EC) {
240 error(EC.message(), Path);
241 return true;
242 }
243 return false;
244 }
245
246 // This version of error() prints the archive name and member name, for example:
247 // "libx.a(foo.o)" after the ToolName before the error message. It sets
248 // HadError but returns allowing the code to move on to other archive members.
error(llvm::Error E,StringRef FileName,const Archive::Child & C,StringRef ArchitectureName=StringRef ())249 static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
250 StringRef ArchitectureName = StringRef()) {
251 HadError = true;
252 WithColor::error(errs(), ToolName) << FileName;
253
254 Expected<StringRef> NameOrErr = C.getName();
255 // TODO: if we have a error getting the name then it would be nice to print
256 // the index of which archive member this is and or its offset in the
257 // archive instead of "???" as the name.
258 if (!NameOrErr) {
259 consumeError(NameOrErr.takeError());
260 errs() << "(" << "???" << ")";
261 } else
262 errs() << "(" << NameOrErr.get() << ")";
263
264 if (!ArchitectureName.empty())
265 errs() << " (for architecture " << ArchitectureName << ") ";
266
267 std::string Buf;
268 raw_string_ostream OS(Buf);
269 logAllUnhandledErrors(std::move(E), OS);
270 OS.flush();
271 errs() << " " << Buf << "\n";
272 }
273
274 // This version of error() prints the file name and which architecture slice it
275 // is from, for example: "foo.o (for architecture i386)" after the ToolName
276 // before the error message. It sets HadError but returns allowing the code to
277 // move on to other architecture slices.
error(llvm::Error E,StringRef FileName,StringRef ArchitectureName=StringRef ())278 static void error(llvm::Error E, StringRef FileName,
279 StringRef ArchitectureName = StringRef()) {
280 HadError = true;
281 WithColor::error(errs(), ToolName) << FileName;
282
283 if (!ArchitectureName.empty())
284 errs() << " (for architecture " << ArchitectureName << ") ";
285
286 std::string Buf;
287 raw_string_ostream OS(Buf);
288 logAllUnhandledErrors(std::move(E), OS);
289 OS.flush();
290 errs() << " " << Buf << "\n";
291 }
292
293 namespace {
294 struct NMSymbol {
295 uint64_t Address;
296 uint64_t Size;
297 char TypeChar;
298 StringRef Name;
299 StringRef SectionName;
300 StringRef TypeName;
301 BasicSymbolRef Sym;
302 // The Sym field above points to the native symbol in the object file,
303 // for Mach-O when we are creating symbols from the dyld info the above
304 // pointer is null as there is no native symbol. In these cases the fields
305 // below are filled in to represent what would have been a Mach-O nlist
306 // native symbol.
307 uint32_t SymFlags;
308 SectionRef Section;
309 uint8_t NType;
310 uint8_t NSect;
311 uint16_t NDesc;
312 StringRef IndirectName;
313 };
314 } // anonymous namespace
315
compareSymbolAddress(const NMSymbol & A,const NMSymbol & B)316 static bool compareSymbolAddress(const NMSymbol &A, const NMSymbol &B) {
317 bool ADefined;
318 // Symbol flags have been checked in the caller.
319 if (A.Sym.getRawDataRefImpl().p) {
320 uint32_t AFlags = cantFail(A.Sym.getFlags());
321 ADefined = !(AFlags & SymbolRef::SF_Undefined);
322 } else {
323 ADefined = A.TypeChar != 'U';
324 }
325 bool BDefined;
326 // Symbol flags have been checked in the caller.
327 if (B.Sym.getRawDataRefImpl().p) {
328 uint32_t BFlags = cantFail(B.Sym.getFlags());
329 BDefined = !(BFlags & SymbolRef::SF_Undefined);
330 } else {
331 BDefined = B.TypeChar != 'U';
332 }
333 return std::make_tuple(ADefined, A.Address, A.Name, A.Size) <
334 std::make_tuple(BDefined, B.Address, B.Name, B.Size);
335 }
336
compareSymbolSize(const NMSymbol & A,const NMSymbol & B)337 static bool compareSymbolSize(const NMSymbol &A, const NMSymbol &B) {
338 return std::make_tuple(A.Size, A.Name, A.Address) <
339 std::make_tuple(B.Size, B.Name, B.Address);
340 }
341
compareSymbolName(const NMSymbol & A,const NMSymbol & B)342 static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) {
343 return std::make_tuple(A.Name, A.Size, A.Address) <
344 std::make_tuple(B.Name, B.Size, B.Address);
345 }
346
isSymbolList64Bit(SymbolicFile & Obj)347 static char isSymbolList64Bit(SymbolicFile &Obj) {
348 if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj))
349 return Triple(IRObj->getTargetTriple()).isArch64Bit();
350 if (isa<COFFObjectFile>(Obj) || isa<COFFImportFile>(Obj))
351 return false;
352 if (isa<WasmObjectFile>(Obj))
353 return false;
354 if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
355 return Tapi->is64Bit();
356 if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
357 return MachO->is64Bit();
358 return cast<ELFObjectFileBase>(Obj).getBytesInAddress() == 8;
359 }
360
361 static StringRef CurrentFilename;
362 static std::vector<NMSymbol> SymbolList;
363
364 static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
365
366 // darwinPrintSymbol() is used to print a symbol from a Mach-O file when the
367 // the OutputFormat is darwin or we are printing Mach-O symbols in hex. For
368 // the darwin format it produces the same output as darwin's nm(1) -m output
369 // and when printing Mach-O symbols in hex it produces the same output as
370 // darwin's nm(1) -x format.
darwinPrintSymbol(SymbolicFile & Obj,const NMSymbol & S,char * SymbolAddrStr,const char * printBlanks,const char * printDashes,const char * printFormat)371 static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
372 char *SymbolAddrStr, const char *printBlanks,
373 const char *printDashes,
374 const char *printFormat) {
375 MachO::mach_header H;
376 MachO::mach_header_64 H_64;
377 uint32_t Filetype = MachO::MH_OBJECT;
378 uint32_t Flags = 0;
379 uint8_t NType = 0;
380 uint8_t NSect = 0;
381 uint16_t NDesc = 0;
382 uint32_t NStrx = 0;
383 uint64_t NValue = 0;
384 MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
385 if (Obj.isIR()) {
386 uint32_t SymFlags = cantFail(S.Sym.getFlags());
387 if (SymFlags & SymbolRef::SF_Global)
388 NType |= MachO::N_EXT;
389 if (SymFlags & SymbolRef::SF_Hidden)
390 NType |= MachO::N_PEXT;
391 if (SymFlags & SymbolRef::SF_Undefined)
392 NType |= MachO::N_EXT | MachO::N_UNDF;
393 else {
394 // Here we have a symbol definition. So to fake out a section name we
395 // use 1, 2 and 3 for section numbers. See below where they are used to
396 // print out fake section names.
397 NType |= MachO::N_SECT;
398 if (SymFlags & SymbolRef::SF_Const)
399 NSect = 3;
400 else if (SymFlags & SymbolRef::SF_Executable)
401 NSect = 1;
402 else
403 NSect = 2;
404 }
405 if (SymFlags & SymbolRef::SF_Weak)
406 NDesc |= MachO::N_WEAK_DEF;
407 } else {
408 DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
409 if (MachO->is64Bit()) {
410 H_64 = MachO->MachOObjectFile::getHeader64();
411 Filetype = H_64.filetype;
412 Flags = H_64.flags;
413 if (SymDRI.p){
414 MachO::nlist_64 STE_64 = MachO->getSymbol64TableEntry(SymDRI);
415 NType = STE_64.n_type;
416 NSect = STE_64.n_sect;
417 NDesc = STE_64.n_desc;
418 NStrx = STE_64.n_strx;
419 NValue = STE_64.n_value;
420 } else {
421 NType = S.NType;
422 NSect = S.NSect;
423 NDesc = S.NDesc;
424 NStrx = 0;
425 NValue = S.Address;
426 }
427 } else {
428 H = MachO->MachOObjectFile::getHeader();
429 Filetype = H.filetype;
430 Flags = H.flags;
431 if (SymDRI.p){
432 MachO::nlist STE = MachO->getSymbolTableEntry(SymDRI);
433 NType = STE.n_type;
434 NSect = STE.n_sect;
435 NDesc = STE.n_desc;
436 NStrx = STE.n_strx;
437 NValue = STE.n_value;
438 } else {
439 NType = S.NType;
440 NSect = S.NSect;
441 NDesc = S.NDesc;
442 NStrx = 0;
443 NValue = S.Address;
444 }
445 }
446 }
447
448 // If we are printing Mach-O symbols in hex do that and return.
449 if (FormatMachOasHex) {
450 outs() << format(printFormat, NValue) << ' '
451 << format("%02x %02x %04x %08x", NType, NSect, NDesc, NStrx) << ' '
452 << S.Name;
453 if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
454 outs() << " (indirect for ";
455 outs() << format(printFormat, NValue) << ' ';
456 StringRef IndirectName;
457 if (S.Sym.getRawDataRefImpl().p) {
458 if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
459 outs() << "?)";
460 else
461 outs() << IndirectName << ")";
462 } else
463 outs() << S.IndirectName << ")";
464 }
465 outs() << "\n";
466 return;
467 }
468
469 if (PrintAddress) {
470 if ((NType & MachO::N_TYPE) == MachO::N_INDR)
471 strcpy(SymbolAddrStr, printBlanks);
472 if (Obj.isIR() && (NType & MachO::N_TYPE) == MachO::N_TYPE)
473 strcpy(SymbolAddrStr, printDashes);
474 outs() << SymbolAddrStr << ' ';
475 }
476
477 switch (NType & MachO::N_TYPE) {
478 case MachO::N_UNDF:
479 if (NValue != 0) {
480 outs() << "(common) ";
481 if (MachO::GET_COMM_ALIGN(NDesc) != 0)
482 outs() << "(alignment 2^" << (int)MachO::GET_COMM_ALIGN(NDesc) << ") ";
483 } else {
484 if ((NType & MachO::N_TYPE) == MachO::N_PBUD)
485 outs() << "(prebound ";
486 else
487 outs() << "(";
488 if ((NDesc & MachO::REFERENCE_TYPE) ==
489 MachO::REFERENCE_FLAG_UNDEFINED_LAZY)
490 outs() << "undefined [lazy bound]) ";
491 else if ((NDesc & MachO::REFERENCE_TYPE) ==
492 MachO::REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY)
493 outs() << "undefined [private lazy bound]) ";
494 else if ((NDesc & MachO::REFERENCE_TYPE) ==
495 MachO::REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY)
496 outs() << "undefined [private]) ";
497 else
498 outs() << "undefined) ";
499 }
500 break;
501 case MachO::N_ABS:
502 outs() << "(absolute) ";
503 break;
504 case MachO::N_INDR:
505 outs() << "(indirect) ";
506 break;
507 case MachO::N_SECT: {
508 if (Obj.isIR()) {
509 // For llvm bitcode files print out a fake section name using the values
510 // use 1, 2 and 3 for section numbers as set above.
511 if (NSect == 1)
512 outs() << "(LTO,CODE) ";
513 else if (NSect == 2)
514 outs() << "(LTO,DATA) ";
515 else if (NSect == 3)
516 outs() << "(LTO,RODATA) ";
517 else
518 outs() << "(?,?) ";
519 break;
520 }
521 section_iterator Sec = SectionRef();
522 if (S.Sym.getRawDataRefImpl().p) {
523 Expected<section_iterator> SecOrErr =
524 MachO->getSymbolSection(S.Sym.getRawDataRefImpl());
525 if (!SecOrErr) {
526 consumeError(SecOrErr.takeError());
527 outs() << "(?,?) ";
528 break;
529 }
530 Sec = *SecOrErr;
531 if (Sec == MachO->section_end()) {
532 outs() << "(?,?) ";
533 break;
534 }
535 } else {
536 Sec = S.Section;
537 }
538 DataRefImpl Ref = Sec->getRawDataRefImpl();
539 StringRef SectionName;
540 if (Expected<StringRef> NameOrErr = MachO->getSectionName(Ref))
541 SectionName = *NameOrErr;
542 StringRef SegmentName = MachO->getSectionFinalSegmentName(Ref);
543 outs() << "(" << SegmentName << "," << SectionName << ") ";
544 break;
545 }
546 default:
547 outs() << "(?) ";
548 break;
549 }
550
551 if (NType & MachO::N_EXT) {
552 if (NDesc & MachO::REFERENCED_DYNAMICALLY)
553 outs() << "[referenced dynamically] ";
554 if (NType & MachO::N_PEXT) {
555 if ((NDesc & MachO::N_WEAK_DEF) == MachO::N_WEAK_DEF)
556 outs() << "weak private external ";
557 else
558 outs() << "private external ";
559 } else {
560 if ((NDesc & MachO::N_WEAK_REF) == MachO::N_WEAK_REF ||
561 (NDesc & MachO::N_WEAK_DEF) == MachO::N_WEAK_DEF) {
562 if ((NDesc & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF)) ==
563 (MachO::N_WEAK_REF | MachO::N_WEAK_DEF))
564 outs() << "weak external automatically hidden ";
565 else
566 outs() << "weak external ";
567 } else
568 outs() << "external ";
569 }
570 } else {
571 if (NType & MachO::N_PEXT)
572 outs() << "non-external (was a private external) ";
573 else
574 outs() << "non-external ";
575 }
576
577 if (Filetype == MachO::MH_OBJECT) {
578 if (NDesc & MachO::N_NO_DEAD_STRIP)
579 outs() << "[no dead strip] ";
580 if ((NType & MachO::N_TYPE) != MachO::N_UNDF &&
581 NDesc & MachO::N_SYMBOL_RESOLVER)
582 outs() << "[symbol resolver] ";
583 if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_ALT_ENTRY)
584 outs() << "[alt entry] ";
585 if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_COLD_FUNC)
586 outs() << "[cold func] ";
587 }
588
589 if ((NDesc & MachO::N_ARM_THUMB_DEF) == MachO::N_ARM_THUMB_DEF)
590 outs() << "[Thumb] ";
591
592 if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
593 outs() << S.Name << " (for ";
594 StringRef IndirectName;
595 if (MachO) {
596 if (S.Sym.getRawDataRefImpl().p) {
597 if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
598 outs() << "?)";
599 else
600 outs() << IndirectName << ")";
601 } else
602 outs() << S.IndirectName << ")";
603 } else
604 outs() << "?)";
605 } else
606 outs() << S.Name;
607
608 if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
609 (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
610 (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
611 uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
612 if (LibraryOrdinal != 0) {
613 if (LibraryOrdinal == MachO::EXECUTABLE_ORDINAL)
614 outs() << " (from executable)";
615 else if (LibraryOrdinal == MachO::DYNAMIC_LOOKUP_ORDINAL)
616 outs() << " (dynamically looked up)";
617 else {
618 StringRef LibraryName;
619 if (!MachO ||
620 MachO->getLibraryShortNameByIndex(LibraryOrdinal - 1, LibraryName))
621 outs() << " (from bad library ordinal " << LibraryOrdinal << ")";
622 else
623 outs() << " (from " << LibraryName << ")";
624 }
625 }
626 }
627
628 outs() << "\n";
629 }
630
631 // Table that maps Darwin's Mach-O stab constants to strings to allow printing.
632 struct DarwinStabName {
633 uint8_t NType;
634 const char *Name;
635 };
636 static const struct DarwinStabName DarwinStabNames[] = {
637 {MachO::N_GSYM, "GSYM"},
638 {MachO::N_FNAME, "FNAME"},
639 {MachO::N_FUN, "FUN"},
640 {MachO::N_STSYM, "STSYM"},
641 {MachO::N_LCSYM, "LCSYM"},
642 {MachO::N_BNSYM, "BNSYM"},
643 {MachO::N_PC, "PC"},
644 {MachO::N_AST, "AST"},
645 {MachO::N_OPT, "OPT"},
646 {MachO::N_RSYM, "RSYM"},
647 {MachO::N_SLINE, "SLINE"},
648 {MachO::N_ENSYM, "ENSYM"},
649 {MachO::N_SSYM, "SSYM"},
650 {MachO::N_SO, "SO"},
651 {MachO::N_OSO, "OSO"},
652 {MachO::N_LSYM, "LSYM"},
653 {MachO::N_BINCL, "BINCL"},
654 {MachO::N_SOL, "SOL"},
655 {MachO::N_PARAMS, "PARAM"},
656 {MachO::N_VERSION, "VERS"},
657 {MachO::N_OLEVEL, "OLEV"},
658 {MachO::N_PSYM, "PSYM"},
659 {MachO::N_EINCL, "EINCL"},
660 {MachO::N_ENTRY, "ENTRY"},
661 {MachO::N_LBRAC, "LBRAC"},
662 {MachO::N_EXCL, "EXCL"},
663 {MachO::N_RBRAC, "RBRAC"},
664 {MachO::N_BCOMM, "BCOMM"},
665 {MachO::N_ECOMM, "ECOMM"},
666 {MachO::N_ECOML, "ECOML"},
667 {MachO::N_LENG, "LENG"},
668 };
669
getDarwinStabString(uint8_t NType)670 static const char *getDarwinStabString(uint8_t NType) {
671 for (auto I : makeArrayRef(DarwinStabNames))
672 if (I.NType == NType)
673 return I.Name;
674 return nullptr;
675 }
676
677 // darwinPrintStab() prints the n_sect, n_desc along with a symbolic name of
678 // a stab n_type value in a Mach-O file.
darwinPrintStab(MachOObjectFile * MachO,const NMSymbol & S)679 static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
680 MachO::nlist_64 STE_64;
681 MachO::nlist STE;
682 uint8_t NType;
683 uint8_t NSect;
684 uint16_t NDesc;
685 DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
686 if (MachO->is64Bit()) {
687 STE_64 = MachO->getSymbol64TableEntry(SymDRI);
688 NType = STE_64.n_type;
689 NSect = STE_64.n_sect;
690 NDesc = STE_64.n_desc;
691 } else {
692 STE = MachO->getSymbolTableEntry(SymDRI);
693 NType = STE.n_type;
694 NSect = STE.n_sect;
695 NDesc = STE.n_desc;
696 }
697
698 outs() << format(" %02x %04x ", NSect, NDesc);
699 if (const char *stabString = getDarwinStabString(NType))
700 outs() << format("%5.5s", stabString);
701 else
702 outs() << format(" %02x", NType);
703 }
704
demangle(StringRef Name,bool StripUnderscore)705 static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
706 if (StripUnderscore && !Name.empty() && Name[0] == '_')
707 Name = Name.substr(1);
708
709 if (!Name.startswith("_Z"))
710 return None;
711
712 int Status;
713 char *Undecorated =
714 itaniumDemangle(Name.str().c_str(), nullptr, nullptr, &Status);
715 if (Status != 0)
716 return None;
717
718 std::string S(Undecorated);
719 free(Undecorated);
720 return S;
721 }
722
symbolIsDefined(const NMSymbol & Sym)723 static bool symbolIsDefined(const NMSymbol &Sym) {
724 return Sym.TypeChar != 'U' && Sym.TypeChar != 'w' && Sym.TypeChar != 'v';
725 }
726
writeFileName(raw_ostream & S,StringRef ArchiveName,StringRef ArchitectureName)727 static void writeFileName(raw_ostream &S, StringRef ArchiveName,
728 StringRef ArchitectureName) {
729 if (!ArchitectureName.empty())
730 S << "(for architecture " << ArchitectureName << "):";
731 if (OutputFormat == posix && !ArchiveName.empty())
732 S << ArchiveName << "[" << CurrentFilename << "]: ";
733 else {
734 if (!ArchiveName.empty())
735 S << ArchiveName << ":";
736 S << CurrentFilename << ": ";
737 }
738 }
739
isSpecialSym(SymbolicFile & Obj,StringRef Name)740 static bool isSpecialSym(SymbolicFile &Obj, StringRef Name) {
741 auto *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj);
742 if (!ELFObj)
743 return false;
744 uint16_t EMachine = ELFObj->getEMachine();
745 if (EMachine != ELF::EM_ARM && EMachine != ELF::EM_AARCH64)
746 return false;
747 return !Name.empty() && Name[0] == '$';
748 }
749
sortAndPrintSymbolList(SymbolicFile & Obj,bool printName,StringRef ArchiveName,StringRef ArchitectureName)750 static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
751 StringRef ArchiveName,
752 StringRef ArchitectureName) {
753 if (!NoSort) {
754 using Comparator = bool (*)(const NMSymbol &, const NMSymbol &);
755 Comparator Cmp;
756 if (NumericSort)
757 Cmp = &compareSymbolAddress;
758 else if (SizeSort)
759 Cmp = &compareSymbolSize;
760 else
761 Cmp = &compareSymbolName;
762
763 if (ReverseSort)
764 llvm::sort(SymbolList, [=](const NMSymbol &A, const NMSymbol &B) -> bool {
765 return Cmp(B, A);
766 });
767 else
768 llvm::sort(SymbolList, Cmp);
769 }
770
771 if (!PrintFileName) {
772 if (OutputFormat == posix && MultipleFiles && printName) {
773 outs() << '\n' << CurrentFilename << ":\n";
774 } else if (OutputFormat == bsd && MultipleFiles && printName) {
775 outs() << "\n" << CurrentFilename << ":\n";
776 } else if (OutputFormat == sysv) {
777 outs() << "\n\nSymbols from " << CurrentFilename << ":\n\n";
778 if (isSymbolList64Bit(Obj))
779 outs() << "Name Value Class Type"
780 << " Size Line Section\n";
781 else
782 outs() << "Name Value Class Type"
783 << " Size Line Section\n";
784 }
785 }
786
787 const char *printBlanks, *printDashes, *printFormat;
788 if (isSymbolList64Bit(Obj)) {
789 printBlanks = " ";
790 printDashes = "----------------";
791 switch (AddressRadix) {
792 case Radix::o:
793 printFormat = OutputFormat == posix ? "%" PRIo64 : "%016" PRIo64;
794 break;
795 case Radix::x:
796 printFormat = OutputFormat == posix ? "%" PRIx64 : "%016" PRIx64;
797 break;
798 default:
799 printFormat = OutputFormat == posix ? "%" PRId64 : "%016" PRId64;
800 }
801 } else {
802 printBlanks = " ";
803 printDashes = "--------";
804 switch (AddressRadix) {
805 case Radix::o:
806 printFormat = OutputFormat == posix ? "%" PRIo64 : "%08" PRIo64;
807 break;
808 case Radix::x:
809 printFormat = OutputFormat == posix ? "%" PRIx64 : "%08" PRIx64;
810 break;
811 default:
812 printFormat = OutputFormat == posix ? "%" PRId64 : "%08" PRId64;
813 }
814 }
815
816 for (const NMSymbol &S : SymbolList) {
817 uint32_t SymFlags;
818 std::string Name = S.Name.str();
819 MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
820 if (Demangle) {
821 if (Optional<std::string> Opt = demangle(S.Name, MachO))
822 Name = *Opt;
823 }
824 if (S.Sym.getRawDataRefImpl().p) {
825 Expected<uint32_t> SymFlagsOrErr = S.Sym.getFlags();
826 if (!SymFlagsOrErr) {
827 // TODO: Test this error.
828 error(SymFlagsOrErr.takeError(), Obj.getFileName());
829 return;
830 }
831 SymFlags = *SymFlagsOrErr;
832 } else
833 SymFlags = S.SymFlags;
834
835 bool Undefined = SymFlags & SymbolRef::SF_Undefined;
836 bool Global = SymFlags & SymbolRef::SF_Global;
837 bool Weak = SymFlags & SymbolRef::SF_Weak;
838 if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) ||
839 (!Global && ExternalOnly) || (Weak && NoWeakSymbols) ||
840 (!SpecialSyms && isSpecialSym(Obj, Name)))
841 continue;
842 if (PrintFileName)
843 writeFileName(outs(), ArchiveName, ArchitectureName);
844 if ((JustSymbolName ||
845 (UndefinedOnly && MachO && OutputFormat != darwin)) &&
846 OutputFormat != posix) {
847 outs() << Name << "\n";
848 continue;
849 }
850
851 char SymbolAddrStr[23], SymbolSizeStr[23];
852
853 // If the format is SysV or the symbol isn't defined, then print spaces.
854 if (OutputFormat == sysv || !symbolIsDefined(S)) {
855 if (OutputFormat == posix) {
856 format(printFormat, S.Address)
857 .print(SymbolAddrStr, sizeof(SymbolAddrStr));
858 format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
859 } else {
860 strcpy(SymbolAddrStr, printBlanks);
861 strcpy(SymbolSizeStr, printBlanks);
862 }
863 }
864
865 if (symbolIsDefined(S)) {
866 // Otherwise, print the symbol address and size.
867 if (Obj.isIR())
868 strcpy(SymbolAddrStr, printDashes);
869 else if (MachO && S.TypeChar == 'I')
870 strcpy(SymbolAddrStr, printBlanks);
871 else
872 format(printFormat, S.Address)
873 .print(SymbolAddrStr, sizeof(SymbolAddrStr));
874 format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
875 }
876
877 // If OutputFormat is darwin or we are printing Mach-O symbols in hex and
878 // we have a MachOObjectFile, call darwinPrintSymbol to print as darwin's
879 // nm(1) -m output or hex, else if OutputFormat is darwin or we are
880 // printing Mach-O symbols in hex and not a Mach-O object fall back to
881 // OutputFormat bsd (see below).
882 if ((OutputFormat == darwin || FormatMachOasHex) && (MachO || Obj.isIR())) {
883 darwinPrintSymbol(Obj, S, SymbolAddrStr, printBlanks, printDashes,
884 printFormat);
885 } else if (OutputFormat == posix) {
886 outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
887 << (MachO ? "0" : SymbolSizeStr) << "\n";
888 } else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
889 if (PrintAddress)
890 outs() << SymbolAddrStr << ' ';
891 if (PrintSize)
892 outs() << SymbolSizeStr << ' ';
893 outs() << S.TypeChar;
894 if (S.TypeChar == '-' && MachO)
895 darwinPrintStab(MachO, S);
896 outs() << " " << Name;
897 if (S.TypeChar == 'I' && MachO) {
898 outs() << " (indirect for ";
899 if (S.Sym.getRawDataRefImpl().p) {
900 StringRef IndirectName;
901 if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
902 outs() << "?)";
903 else
904 outs() << IndirectName << ")";
905 } else
906 outs() << S.IndirectName << ")";
907 }
908 outs() << "\n";
909 } else if (OutputFormat == sysv) {
910 outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "| "
911 << S.TypeChar << " |" << right_justify(S.TypeName, 18) << "|"
912 << SymbolSizeStr << "| |" << S.SectionName << "\n";
913 }
914 }
915
916 SymbolList.clear();
917 }
918
getSymbolNMTypeChar(ELFObjectFileBase & Obj,basic_symbol_iterator I)919 static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
920 basic_symbol_iterator I) {
921 // OK, this is ELF
922 elf_symbol_iterator SymI(I);
923
924 Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
925 if (!SecIOrErr) {
926 consumeError(SecIOrErr.takeError());
927 return '?';
928 }
929
930 uint8_t Binding = SymI->getBinding();
931 if (Binding == ELF::STB_GNU_UNIQUE)
932 return 'u';
933
934 assert(Binding != ELF::STB_WEAK && "STB_WEAK not tested in calling function");
935 if (Binding != ELF::STB_GLOBAL && Binding != ELF::STB_LOCAL)
936 return '?';
937
938 elf_section_iterator SecI = *SecIOrErr;
939 if (SecI != Obj.section_end()) {
940 uint32_t Type = SecI->getType();
941 uint64_t Flags = SecI->getFlags();
942 if (Flags & ELF::SHF_EXECINSTR)
943 return 't';
944 if (Type == ELF::SHT_NOBITS)
945 return 'b';
946 if (Flags & ELF::SHF_ALLOC)
947 return Flags & ELF::SHF_WRITE ? 'd' : 'r';
948
949 auto NameOrErr = SecI->getName();
950 if (!NameOrErr) {
951 consumeError(NameOrErr.takeError());
952 return '?';
953 }
954 if ((*NameOrErr).startswith(".debug"))
955 return 'N';
956 if (!(Flags & ELF::SHF_WRITE))
957 return 'n';
958 }
959
960 return '?';
961 }
962
getSymbolNMTypeChar(COFFObjectFile & Obj,symbol_iterator I)963 static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
964 COFFSymbolRef Symb = Obj.getCOFFSymbol(*I);
965 // OK, this is COFF.
966 symbol_iterator SymI(I);
967
968 Expected<StringRef> Name = SymI->getName();
969 if (!Name) {
970 consumeError(Name.takeError());
971 return '?';
972 }
973
974 char Ret = StringSwitch<char>(*Name)
975 .StartsWith(".debug", 'N')
976 .StartsWith(".sxdata", 'N')
977 .Default('?');
978
979 if (Ret != '?')
980 return Ret;
981
982 uint32_t Characteristics = 0;
983 if (!COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
984 Expected<section_iterator> SecIOrErr = SymI->getSection();
985 if (!SecIOrErr) {
986 consumeError(SecIOrErr.takeError());
987 return '?';
988 }
989 section_iterator SecI = *SecIOrErr;
990 const coff_section *Section = Obj.getCOFFSection(*SecI);
991 Characteristics = Section->Characteristics;
992 if (Expected<StringRef> NameOrErr = Obj.getSectionName(Section))
993 if (NameOrErr->startswith(".idata"))
994 return 'i';
995 }
996
997 switch (Symb.getSectionNumber()) {
998 case COFF::IMAGE_SYM_DEBUG:
999 return 'n';
1000 default:
1001 // Check section type.
1002 if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
1003 return 't';
1004 if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
1005 return Characteristics & COFF::IMAGE_SCN_MEM_WRITE ? 'd' : 'r';
1006 if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
1007 return 'b';
1008 if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
1009 return 'i';
1010 // Check for section symbol.
1011 if (Symb.isSectionDefinition())
1012 return 's';
1013 }
1014
1015 return '?';
1016 }
1017
getSymbolNMTypeChar(COFFImportFile & Obj)1018 static char getSymbolNMTypeChar(COFFImportFile &Obj) {
1019 switch (Obj.getCOFFImportHeader()->getType()) {
1020 case COFF::IMPORT_CODE:
1021 return 't';
1022 case COFF::IMPORT_DATA:
1023 return 'd';
1024 case COFF::IMPORT_CONST:
1025 return 'r';
1026 }
1027 return '?';
1028 }
1029
getSymbolNMTypeChar(MachOObjectFile & Obj,basic_symbol_iterator I)1030 static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
1031 DataRefImpl Symb = I->getRawDataRefImpl();
1032 uint8_t NType = Obj.is64Bit() ? Obj.getSymbol64TableEntry(Symb).n_type
1033 : Obj.getSymbolTableEntry(Symb).n_type;
1034
1035 if (NType & MachO::N_STAB)
1036 return '-';
1037
1038 switch (NType & MachO::N_TYPE) {
1039 case MachO::N_ABS:
1040 return 's';
1041 case MachO::N_INDR:
1042 return 'i';
1043 case MachO::N_SECT: {
1044 Expected<section_iterator> SecOrErr = Obj.getSymbolSection(Symb);
1045 if (!SecOrErr) {
1046 consumeError(SecOrErr.takeError());
1047 return 's';
1048 }
1049 section_iterator Sec = *SecOrErr;
1050 if (Sec == Obj.section_end())
1051 return 's';
1052 DataRefImpl Ref = Sec->getRawDataRefImpl();
1053 StringRef SectionName;
1054 if (Expected<StringRef> NameOrErr = Obj.getSectionName(Ref))
1055 SectionName = *NameOrErr;
1056 StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
1057 if (Obj.is64Bit() && Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
1058 SegmentName == "__TEXT_EXEC" && SectionName == "__text")
1059 return 't';
1060 if (SegmentName == "__TEXT" && SectionName == "__text")
1061 return 't';
1062 if (SegmentName == "__DATA" && SectionName == "__data")
1063 return 'd';
1064 if (SegmentName == "__DATA" && SectionName == "__bss")
1065 return 'b';
1066 return 's';
1067 }
1068 }
1069
1070 return '?';
1071 }
1072
getSymbolNMTypeChar(TapiFile & Obj,basic_symbol_iterator I)1073 static char getSymbolNMTypeChar(TapiFile &Obj, basic_symbol_iterator I) {
1074 return 's';
1075 }
1076
getSymbolNMTypeChar(WasmObjectFile & Obj,basic_symbol_iterator I)1077 static char getSymbolNMTypeChar(WasmObjectFile &Obj, basic_symbol_iterator I) {
1078 uint32_t Flags = cantFail(I->getFlags());
1079 if (Flags & SymbolRef::SF_Executable)
1080 return 't';
1081 return 'd';
1082 }
1083
getSymbolNMTypeChar(IRObjectFile & Obj,basic_symbol_iterator I)1084 static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I) {
1085 uint32_t Flags = cantFail(I->getFlags());
1086 // FIXME: should we print 'b'? At the IR level we cannot be sure if this
1087 // will be in bss or not, but we could approximate.
1088 if (Flags & SymbolRef::SF_Executable)
1089 return 't';
1090 else if (Triple(Obj.getTargetTriple()).isOSDarwin() &&
1091 (Flags & SymbolRef::SF_Const))
1092 return 's';
1093 else
1094 return 'd';
1095 }
1096
isObject(SymbolicFile & Obj,basic_symbol_iterator I)1097 static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
1098 return !dyn_cast<ELFObjectFileBase>(&Obj)
1099 ? false
1100 : elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
1101 }
1102
1103 // For ELF object files, Set TypeName to the symbol typename, to be printed
1104 // in the 'Type' column of the SYSV format output.
getNMTypeName(SymbolicFile & Obj,basic_symbol_iterator I)1105 static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) {
1106 if (isa<ELFObjectFileBase>(&Obj)) {
1107 elf_symbol_iterator SymI(I);
1108 return SymI->getELFTypeName();
1109 }
1110 return "";
1111 }
1112
1113 // Return Posix nm class type tag (single letter), but also set SecName and
1114 // section and name, to be used in format=sysv output.
getNMSectionTagAndName(SymbolicFile & Obj,basic_symbol_iterator I,StringRef & SecName)1115 static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
1116 StringRef &SecName) {
1117 // Symbol Flags have been checked in the caller.
1118 uint32_t Symflags = cantFail(I->getFlags());
1119 if (ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj)) {
1120 if (Symflags & object::SymbolRef::SF_Absolute)
1121 SecName = "*ABS*";
1122 else if (Symflags & object::SymbolRef::SF_Common)
1123 SecName = "*COM*";
1124 else if (Symflags & object::SymbolRef::SF_Undefined)
1125 SecName = "*UND*";
1126 else {
1127 elf_symbol_iterator SymI(I);
1128 Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
1129 if (!SecIOrErr) {
1130 consumeError(SecIOrErr.takeError());
1131 return '?';
1132 }
1133
1134 if (*SecIOrErr == ELFObj->section_end())
1135 return '?';
1136
1137 Expected<StringRef> NameOrErr = (*SecIOrErr)->getName();
1138 if (!NameOrErr) {
1139 consumeError(NameOrErr.takeError());
1140 return '?';
1141 }
1142 SecName = *NameOrErr;
1143 }
1144 }
1145
1146 if ((Symflags & object::SymbolRef::SF_Weak) && !isa<MachOObjectFile>(Obj)) {
1147 char Ret = isObject(Obj, I) ? 'v' : 'w';
1148 return (!(Symflags & object::SymbolRef::SF_Undefined)) ? toupper(Ret) : Ret;
1149 }
1150
1151 if (Symflags & object::SymbolRef::SF_Undefined)
1152 return 'U';
1153
1154 if (Symflags & object::SymbolRef::SF_Common)
1155 return 'C';
1156
1157 char Ret = '?';
1158 if (Symflags & object::SymbolRef::SF_Absolute)
1159 Ret = 'a';
1160 else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj))
1161 Ret = getSymbolNMTypeChar(*IR, I);
1162 else if (COFFObjectFile *COFF = dyn_cast<COFFObjectFile>(&Obj))
1163 Ret = getSymbolNMTypeChar(*COFF, I);
1164 else if (COFFImportFile *COFFImport = dyn_cast<COFFImportFile>(&Obj))
1165 Ret = getSymbolNMTypeChar(*COFFImport);
1166 else if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
1167 Ret = getSymbolNMTypeChar(*MachO, I);
1168 else if (WasmObjectFile *Wasm = dyn_cast<WasmObjectFile>(&Obj))
1169 Ret = getSymbolNMTypeChar(*Wasm, I);
1170 else if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
1171 Ret = getSymbolNMTypeChar(*Tapi, I);
1172 else if (ELFObjectFileBase *ELF = dyn_cast<ELFObjectFileBase>(&Obj)) {
1173 if (ELFSymbolRef(*I).getELFType() == ELF::STT_GNU_IFUNC)
1174 return 'i';
1175 Ret = getSymbolNMTypeChar(*ELF, I);
1176 if (ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
1177 return Ret;
1178 } else
1179 llvm_unreachable("unknown binary format");
1180
1181 if (!(Symflags & object::SymbolRef::SF_Global))
1182 return Ret;
1183
1184 return toupper(Ret);
1185 }
1186
1187 // getNsectForSegSect() is used to implement the Mach-O "-s segname sectname"
1188 // option to dump only those symbols from that section in a Mach-O file.
1189 // It is called once for each Mach-O file from dumpSymbolNamesFromObject()
1190 // to get the section number for that named section from the command line
1191 // arguments. It returns the section number for that section in the Mach-O
1192 // file or zero it is not present.
getNsectForSegSect(MachOObjectFile * Obj)1193 static unsigned getNsectForSegSect(MachOObjectFile *Obj) {
1194 unsigned Nsect = 1;
1195 for (auto &S : Obj->sections()) {
1196 DataRefImpl Ref = S.getRawDataRefImpl();
1197 StringRef SectionName;
1198 if (Expected<StringRef> NameOrErr = Obj->getSectionName(Ref))
1199 SectionName = *NameOrErr;
1200 StringRef SegmentName = Obj->getSectionFinalSegmentName(Ref);
1201 if (SegmentName == SegSect[0] && SectionName == SegSect[1])
1202 return Nsect;
1203 Nsect++;
1204 }
1205 return 0;
1206 }
1207
1208 // getNsectInMachO() is used to implement the Mach-O "-s segname sectname"
1209 // option to dump only those symbols from that section in a Mach-O file.
1210 // It is called once for each symbol in a Mach-O file from
1211 // dumpSymbolNamesFromObject() and returns the section number for that symbol
1212 // if it is in a section, else it returns 0.
getNsectInMachO(MachOObjectFile & Obj,BasicSymbolRef Sym)1213 static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) {
1214 DataRefImpl Symb = Sym.getRawDataRefImpl();
1215 if (Obj.is64Bit()) {
1216 MachO::nlist_64 STE = Obj.getSymbol64TableEntry(Symb);
1217 return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
1218 }
1219 MachO::nlist STE = Obj.getSymbolTableEntry(Symb);
1220 return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
1221 }
1222
dumpSymbolNamesFromObject(SymbolicFile & Obj,bool printName,StringRef ArchiveName={},StringRef ArchitectureName={})1223 static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
1224 StringRef ArchiveName = {},
1225 StringRef ArchitectureName = {}) {
1226 auto Symbols = Obj.symbols();
1227 if (DynamicSyms) {
1228 const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
1229 if (!E) {
1230 error("File format has no dynamic symbol table", Obj.getFileName());
1231 return;
1232 }
1233 Symbols = E->getDynamicSymbolIterators();
1234 }
1235 std::string NameBuffer;
1236 raw_string_ostream OS(NameBuffer);
1237 // If a "-s segname sectname" option was specified and this is a Mach-O
1238 // file get the section number for that section in this object file.
1239 unsigned int Nsect = 0;
1240 MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
1241 if (!SegSect.empty() && MachO) {
1242 Nsect = getNsectForSegSect(MachO);
1243 // If this section is not in the object file no symbols are printed.
1244 if (Nsect == 0)
1245 return;
1246 }
1247 if (!(MachO && DyldInfoOnly)) {
1248 for (BasicSymbolRef Sym : Symbols) {
1249 Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
1250 if (!SymFlagsOrErr) {
1251 error(SymFlagsOrErr.takeError(), Obj.getFileName());
1252 return;
1253 }
1254 if (!DebugSyms && (*SymFlagsOrErr & SymbolRef::SF_FormatSpecific))
1255 continue;
1256 if (WithoutAliases && (*SymFlagsOrErr & SymbolRef::SF_Indirect))
1257 continue;
1258 // If a "-s segname sectname" option was specified and this is a Mach-O
1259 // file and this section appears in this file, Nsect will be non-zero then
1260 // see if this symbol is a symbol from that section and if not skip it.
1261 if (Nsect && Nsect != getNsectInMachO(*MachO, Sym))
1262 continue;
1263 NMSymbol S = {};
1264 S.Size = 0;
1265 S.Address = 0;
1266 if (isa<ELFObjectFileBase>(&Obj))
1267 S.Size = ELFSymbolRef(Sym).getSize();
1268 if (PrintAddress && isa<ObjectFile>(Obj)) {
1269 SymbolRef SymRef(Sym);
1270 Expected<uint64_t> AddressOrErr = SymRef.getAddress();
1271 if (!AddressOrErr) {
1272 consumeError(AddressOrErr.takeError());
1273 break;
1274 }
1275 S.Address = *AddressOrErr;
1276 }
1277 S.TypeName = getNMTypeName(Obj, Sym);
1278 S.TypeChar = getNMSectionTagAndName(Obj, Sym, S.SectionName);
1279 if (Error E = Sym.printName(OS)) {
1280 if (MachO) {
1281 OS << "bad string index";
1282 consumeError(std::move(E));
1283 } else
1284 error(std::move(E), Obj.getFileName());
1285 }
1286 OS << '\0';
1287 S.Sym = Sym;
1288 SymbolList.push_back(S);
1289 }
1290 }
1291
1292 OS.flush();
1293 const char *P = NameBuffer.c_str();
1294 unsigned I;
1295 for (I = 0; I < SymbolList.size(); ++I) {
1296 SymbolList[I].Name = P;
1297 P += strlen(P) + 1;
1298 }
1299
1300 // If this is a Mach-O file where the nlist symbol table is out of sync
1301 // with the dyld export trie then look through exports and fake up symbols
1302 // for the ones that are missing (also done with the -add-dyldinfo flag).
1303 // This is needed if strip(1) -T is run on a binary containing swift
1304 // language symbols for example. The option -only-dyldinfo will fake up
1305 // all symbols from the dyld export trie as well as the bind info.
1306 std::string ExportsNameBuffer;
1307 raw_string_ostream EOS(ExportsNameBuffer);
1308 std::string BindsNameBuffer;
1309 raw_string_ostream BOS(BindsNameBuffer);
1310 std::string LazysNameBuffer;
1311 raw_string_ostream LOS(LazysNameBuffer);
1312 std::string WeaksNameBuffer;
1313 raw_string_ostream WOS(WeaksNameBuffer);
1314 std::string FunctionStartsNameBuffer;
1315 raw_string_ostream FOS(FunctionStartsNameBuffer);
1316 if (MachO && !NoDyldInfo) {
1317 MachO::mach_header H;
1318 MachO::mach_header_64 H_64;
1319 uint32_t HFlags = 0;
1320 if (MachO->is64Bit()) {
1321 H_64 = MachO->MachOObjectFile::getHeader64();
1322 HFlags = H_64.flags;
1323 } else {
1324 H = MachO->MachOObjectFile::getHeader();
1325 HFlags = H.flags;
1326 }
1327 uint64_t BaseSegmentAddress = 0;
1328 for (const auto &Command : MachO->load_commands()) {
1329 if (Command.C.cmd == MachO::LC_SEGMENT) {
1330 MachO::segment_command Seg = MachO->getSegmentLoadCommand(Command);
1331 if (Seg.fileoff == 0 && Seg.filesize != 0) {
1332 BaseSegmentAddress = Seg.vmaddr;
1333 break;
1334 }
1335 } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
1336 MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Command);
1337 if (Seg.fileoff == 0 && Seg.filesize != 0) {
1338 BaseSegmentAddress = Seg.vmaddr;
1339 break;
1340 }
1341 }
1342 }
1343 if (DyldInfoOnly || AddDyldInfo ||
1344 HFlags & MachO::MH_NLIST_OUTOFSYNC_WITH_DYLDINFO) {
1345 unsigned ExportsAdded = 0;
1346 Error Err = Error::success();
1347 for (const llvm::object::ExportEntry &Entry : MachO->exports(Err)) {
1348 bool found = false;
1349 bool ReExport = false;
1350 if (!DyldInfoOnly) {
1351 for (const NMSymbol &S : SymbolList)
1352 if (S.Address == Entry.address() + BaseSegmentAddress &&
1353 S.Name == Entry.name()) {
1354 found = true;
1355 break;
1356 }
1357 }
1358 if (!found) {
1359 NMSymbol S = {};
1360 S.Address = Entry.address() + BaseSegmentAddress;
1361 S.Size = 0;
1362 S.TypeChar = '\0';
1363 S.Name = Entry.name();
1364 // There is no symbol in the nlist symbol table for this so we set
1365 // Sym effectivly to null and the rest of code in here must test for
1366 // it and not do things like Sym.getFlags() for it.
1367 S.Sym = BasicSymbolRef();
1368 S.SymFlags = SymbolRef::SF_Global;
1369 S.Section = SectionRef();
1370 S.NType = 0;
1371 S.NSect = 0;
1372 S.NDesc = 0;
1373 S.IndirectName = StringRef();
1374
1375 uint64_t EFlags = Entry.flags();
1376 bool Abs = ((EFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
1377 MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE);
1378 bool Resolver = (EFlags &
1379 MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER);
1380 ReExport = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
1381 bool WeakDef = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1382 if (WeakDef)
1383 S.NDesc |= MachO::N_WEAK_DEF;
1384 if (Abs) {
1385 S.NType = MachO::N_EXT | MachO::N_ABS;
1386 S.TypeChar = 'A';
1387 } else if (ReExport) {
1388 S.NType = MachO::N_EXT | MachO::N_INDR;
1389 S.TypeChar = 'I';
1390 } else {
1391 S.NType = MachO::N_EXT | MachO::N_SECT;
1392 if (Resolver) {
1393 S.Address = Entry.other() + BaseSegmentAddress;
1394 if ((S.Address & 1) != 0 &&
1395 !MachO->is64Bit() && H.cputype == MachO::CPU_TYPE_ARM){
1396 S.Address &= ~1LL;
1397 S.NDesc |= MachO::N_ARM_THUMB_DEF;
1398 }
1399 } else {
1400 S.Address = Entry.address() + BaseSegmentAddress;
1401 }
1402 StringRef SegmentName = StringRef();
1403 StringRef SectionName = StringRef();
1404 for (const SectionRef &Section : MachO->sections()) {
1405 S.NSect++;
1406
1407 if (Expected<StringRef> NameOrErr = Section.getName())
1408 SectionName = *NameOrErr;
1409 else
1410 consumeError(NameOrErr.takeError());
1411
1412 SegmentName = MachO->getSectionFinalSegmentName(
1413 Section.getRawDataRefImpl());
1414 if (S.Address >= Section.getAddress() &&
1415 S.Address < Section.getAddress() + Section.getSize()) {
1416 S.Section = Section;
1417 break;
1418 } else if (Entry.name() == "__mh_execute_header" &&
1419 SegmentName == "__TEXT" && SectionName == "__text") {
1420 S.Section = Section;
1421 S.NDesc |= MachO::REFERENCED_DYNAMICALLY;
1422 break;
1423 }
1424 }
1425 if (SegmentName == "__TEXT" && SectionName == "__text")
1426 S.TypeChar = 'T';
1427 else if (SegmentName == "__DATA" && SectionName == "__data")
1428 S.TypeChar = 'D';
1429 else if (SegmentName == "__DATA" && SectionName == "__bss")
1430 S.TypeChar = 'B';
1431 else
1432 S.TypeChar = 'S';
1433 }
1434 SymbolList.push_back(S);
1435
1436 EOS << Entry.name();
1437 EOS << '\0';
1438 ExportsAdded++;
1439
1440 // For ReExports there are a two more things to do, first add the
1441 // indirect name and second create the undefined symbol using the
1442 // referened dynamic library.
1443 if (ReExport) {
1444
1445 // Add the indirect name.
1446 if (Entry.otherName().empty())
1447 EOS << Entry.name();
1448 else
1449 EOS << Entry.otherName();
1450 EOS << '\0';
1451
1452 // Now create the undefined symbol using the referened dynamic
1453 // library.
1454 NMSymbol U = {};
1455 U.Address = 0;
1456 U.Size = 0;
1457 U.TypeChar = 'U';
1458 if (Entry.otherName().empty())
1459 U.Name = Entry.name();
1460 else
1461 U.Name = Entry.otherName();
1462 // Again there is no symbol in the nlist symbol table for this so
1463 // we set Sym effectivly to null and the rest of code in here must
1464 // test for it and not do things like Sym.getFlags() for it.
1465 U.Sym = BasicSymbolRef();
1466 U.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1467 U.Section = SectionRef();
1468 U.NType = MachO::N_EXT | MachO::N_UNDF;
1469 U.NSect = 0;
1470 U.NDesc = 0;
1471 // The library ordinal for this undefined symbol is in the export
1472 // trie Entry.other().
1473 MachO::SET_LIBRARY_ORDINAL(U.NDesc, Entry.other());
1474 U.IndirectName = StringRef();
1475 SymbolList.push_back(U);
1476
1477 // Finally add the undefined symbol's name.
1478 if (Entry.otherName().empty())
1479 EOS << Entry.name();
1480 else
1481 EOS << Entry.otherName();
1482 EOS << '\0';
1483 ExportsAdded++;
1484 }
1485 }
1486 }
1487 if (Err)
1488 error(std::move(Err), MachO->getFileName());
1489 // Set the symbol names and indirect names for the added symbols.
1490 if (ExportsAdded) {
1491 EOS.flush();
1492 const char *Q = ExportsNameBuffer.c_str();
1493 for (unsigned K = 0; K < ExportsAdded; K++) {
1494 SymbolList[I].Name = Q;
1495 Q += strlen(Q) + 1;
1496 if (SymbolList[I].TypeChar == 'I') {
1497 SymbolList[I].IndirectName = Q;
1498 Q += strlen(Q) + 1;
1499 }
1500 I++;
1501 }
1502 }
1503
1504 // Add the undefined symbols from the bind entries.
1505 unsigned BindsAdded = 0;
1506 Error BErr = Error::success();
1507 StringRef LastSymbolName = StringRef();
1508 for (const llvm::object::MachOBindEntry &Entry : MachO->bindTable(BErr)) {
1509 bool found = false;
1510 if (LastSymbolName == Entry.symbolName())
1511 found = true;
1512 else if(!DyldInfoOnly) {
1513 for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1514 if (SymbolList[J].Name == Entry.symbolName())
1515 found = true;
1516 }
1517 }
1518 if (!found) {
1519 LastSymbolName = Entry.symbolName();
1520 NMSymbol B = {};
1521 B.Address = 0;
1522 B.Size = 0;
1523 B.TypeChar = 'U';
1524 // There is no symbol in the nlist symbol table for this so we set
1525 // Sym effectivly to null and the rest of code in here must test for
1526 // it and not do things like Sym.getFlags() for it.
1527 B.Sym = BasicSymbolRef();
1528 B.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1529 B.NType = MachO::N_EXT | MachO::N_UNDF;
1530 B.NSect = 0;
1531 B.NDesc = 0;
1532 MachO::SET_LIBRARY_ORDINAL(B.NDesc, Entry.ordinal());
1533 B.IndirectName = StringRef();
1534 B.Name = Entry.symbolName();
1535 SymbolList.push_back(B);
1536 BOS << Entry.symbolName();
1537 BOS << '\0';
1538 BindsAdded++;
1539 }
1540 }
1541 if (BErr)
1542 error(std::move(BErr), MachO->getFileName());
1543 // Set the symbol names and indirect names for the added symbols.
1544 if (BindsAdded) {
1545 BOS.flush();
1546 const char *Q = BindsNameBuffer.c_str();
1547 for (unsigned K = 0; K < BindsAdded; K++) {
1548 SymbolList[I].Name = Q;
1549 Q += strlen(Q) + 1;
1550 if (SymbolList[I].TypeChar == 'I') {
1551 SymbolList[I].IndirectName = Q;
1552 Q += strlen(Q) + 1;
1553 }
1554 I++;
1555 }
1556 }
1557
1558 // Add the undefined symbols from the lazy bind entries.
1559 unsigned LazysAdded = 0;
1560 Error LErr = Error::success();
1561 LastSymbolName = StringRef();
1562 for (const llvm::object::MachOBindEntry &Entry :
1563 MachO->lazyBindTable(LErr)) {
1564 bool found = false;
1565 if (LastSymbolName == Entry.symbolName())
1566 found = true;
1567 else {
1568 // Here we must check to see it this symbol is already in the
1569 // SymbolList as it might have already have been added above via a
1570 // non-lazy (bind) entry.
1571 for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1572 if (SymbolList[J].Name == Entry.symbolName())
1573 found = true;
1574 }
1575 }
1576 if (!found) {
1577 LastSymbolName = Entry.symbolName();
1578 NMSymbol L = {};
1579 L.Name = Entry.symbolName();
1580 L.Address = 0;
1581 L.Size = 0;
1582 L.TypeChar = 'U';
1583 // There is no symbol in the nlist symbol table for this so we set
1584 // Sym effectivly to null and the rest of code in here must test for
1585 // it and not do things like Sym.getFlags() for it.
1586 L.Sym = BasicSymbolRef();
1587 L.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1588 L.NType = MachO::N_EXT | MachO::N_UNDF;
1589 L.NSect = 0;
1590 // The REFERENCE_FLAG_UNDEFINED_LAZY is no longer used but here it
1591 // makes sence since we are creating this from a lazy bind entry.
1592 L.NDesc = MachO::REFERENCE_FLAG_UNDEFINED_LAZY;
1593 MachO::SET_LIBRARY_ORDINAL(L.NDesc, Entry.ordinal());
1594 L.IndirectName = StringRef();
1595 SymbolList.push_back(L);
1596 LOS << Entry.symbolName();
1597 LOS << '\0';
1598 LazysAdded++;
1599 }
1600 }
1601 if (LErr)
1602 error(std::move(LErr), MachO->getFileName());
1603 // Set the symbol names and indirect names for the added symbols.
1604 if (LazysAdded) {
1605 LOS.flush();
1606 const char *Q = LazysNameBuffer.c_str();
1607 for (unsigned K = 0; K < LazysAdded; K++) {
1608 SymbolList[I].Name = Q;
1609 Q += strlen(Q) + 1;
1610 if (SymbolList[I].TypeChar == 'I') {
1611 SymbolList[I].IndirectName = Q;
1612 Q += strlen(Q) + 1;
1613 }
1614 I++;
1615 }
1616 }
1617
1618 // Add the undefineds symbol from the weak bind entries which are not
1619 // strong symbols.
1620 unsigned WeaksAdded = 0;
1621 Error WErr = Error::success();
1622 LastSymbolName = StringRef();
1623 for (const llvm::object::MachOBindEntry &Entry :
1624 MachO->weakBindTable(WErr)) {
1625 bool found = false;
1626 unsigned J = 0;
1627 if (LastSymbolName == Entry.symbolName() ||
1628 Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
1629 found = true;
1630 } else {
1631 for (J = 0; J < SymbolList.size() && !found; ++J) {
1632 if (SymbolList[J].Name == Entry.symbolName()) {
1633 found = true;
1634 break;
1635 }
1636 }
1637 }
1638 if (!found) {
1639 LastSymbolName = Entry.symbolName();
1640 NMSymbol W = {};
1641 W.Name = Entry.symbolName();
1642 W.Address = 0;
1643 W.Size = 0;
1644 W.TypeChar = 'U';
1645 // There is no symbol in the nlist symbol table for this so we set
1646 // Sym effectivly to null and the rest of code in here must test for
1647 // it and not do things like Sym.getFlags() for it.
1648 W.Sym = BasicSymbolRef();
1649 W.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1650 W.NType = MachO::N_EXT | MachO::N_UNDF;
1651 W.NSect = 0;
1652 // Odd that we are using N_WEAK_DEF on an undefined symbol but that is
1653 // what is created in this case by the linker when there are real
1654 // symbols in the nlist structs.
1655 W.NDesc = MachO::N_WEAK_DEF;
1656 W.IndirectName = StringRef();
1657 SymbolList.push_back(W);
1658 WOS << Entry.symbolName();
1659 WOS << '\0';
1660 WeaksAdded++;
1661 } else {
1662 // This is the case the symbol was previously been found and it could
1663 // have been added from a bind or lazy bind symbol. If so and not
1664 // a definition also mark it as weak.
1665 if (SymbolList[J].TypeChar == 'U')
1666 // See comment above about N_WEAK_DEF.
1667 SymbolList[J].NDesc |= MachO::N_WEAK_DEF;
1668 }
1669 }
1670 if (WErr)
1671 error(std::move(WErr), MachO->getFileName());
1672 // Set the symbol names and indirect names for the added symbols.
1673 if (WeaksAdded) {
1674 WOS.flush();
1675 const char *Q = WeaksNameBuffer.c_str();
1676 for (unsigned K = 0; K < WeaksAdded; K++) {
1677 SymbolList[I].Name = Q;
1678 Q += strlen(Q) + 1;
1679 if (SymbolList[I].TypeChar == 'I') {
1680 SymbolList[I].IndirectName = Q;
1681 Q += strlen(Q) + 1;
1682 }
1683 I++;
1684 }
1685 }
1686
1687 // Trying adding symbol from the function starts table and LC_MAIN entry
1688 // point.
1689 SmallVector<uint64_t, 8> FoundFns;
1690 uint64_t lc_main_offset = UINT64_MAX;
1691 for (const auto &Command : MachO->load_commands()) {
1692 if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
1693 // We found a function starts segment, parse the addresses for
1694 // consumption.
1695 MachO::linkedit_data_command LLC =
1696 MachO->getLinkeditDataLoadCommand(Command);
1697
1698 MachO->ReadULEB128s(LLC.dataoff, FoundFns);
1699 } else if (Command.C.cmd == MachO::LC_MAIN) {
1700 MachO::entry_point_command LCmain =
1701 MachO->getEntryPointCommand(Command);
1702 lc_main_offset = LCmain.entryoff;
1703 }
1704 }
1705 // See if these addresses are already in the symbol table.
1706 unsigned FunctionStartsAdded = 0;
1707 for (uint64_t f = 0; f < FoundFns.size(); f++) {
1708 bool found = false;
1709 for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1710 if (SymbolList[J].Address == FoundFns[f] + BaseSegmentAddress)
1711 found = true;
1712 }
1713 // See this address is not already in the symbol table fake up an
1714 // nlist for it.
1715 if (!found) {
1716 NMSymbol F = {};
1717 F.Name = "<redacted function X>";
1718 F.Address = FoundFns[f] + BaseSegmentAddress;
1719 F.Size = 0;
1720 // There is no symbol in the nlist symbol table for this so we set
1721 // Sym effectivly to null and the rest of code in here must test for
1722 // it and not do things like Sym.getFlags() for it.
1723 F.Sym = BasicSymbolRef();
1724 F.SymFlags = 0;
1725 F.NType = MachO::N_SECT;
1726 F.NSect = 0;
1727 StringRef SegmentName = StringRef();
1728 StringRef SectionName = StringRef();
1729 for (const SectionRef &Section : MachO->sections()) {
1730 if (Expected<StringRef> NameOrErr = Section.getName())
1731 SectionName = *NameOrErr;
1732 else
1733 consumeError(NameOrErr.takeError());
1734
1735 SegmentName = MachO->getSectionFinalSegmentName(
1736 Section.getRawDataRefImpl());
1737 F.NSect++;
1738 if (F.Address >= Section.getAddress() &&
1739 F.Address < Section.getAddress() + Section.getSize()) {
1740 F.Section = Section;
1741 break;
1742 }
1743 }
1744 if (SegmentName == "__TEXT" && SectionName == "__text")
1745 F.TypeChar = 't';
1746 else if (SegmentName == "__DATA" && SectionName == "__data")
1747 F.TypeChar = 'd';
1748 else if (SegmentName == "__DATA" && SectionName == "__bss")
1749 F.TypeChar = 'b';
1750 else
1751 F.TypeChar = 's';
1752 F.NDesc = 0;
1753 F.IndirectName = StringRef();
1754 SymbolList.push_back(F);
1755 if (FoundFns[f] == lc_main_offset)
1756 FOS << "<redacted LC_MAIN>";
1757 else
1758 FOS << "<redacted function " << f << ">";
1759 FOS << '\0';
1760 FunctionStartsAdded++;
1761 }
1762 }
1763 if (FunctionStartsAdded) {
1764 FOS.flush();
1765 const char *Q = FunctionStartsNameBuffer.c_str();
1766 for (unsigned K = 0; K < FunctionStartsAdded; K++) {
1767 SymbolList[I].Name = Q;
1768 Q += strlen(Q) + 1;
1769 if (SymbolList[I].TypeChar == 'I') {
1770 SymbolList[I].IndirectName = Q;
1771 Q += strlen(Q) + 1;
1772 }
1773 I++;
1774 }
1775 }
1776 }
1777 }
1778
1779 CurrentFilename = Obj.getFileName();
1780
1781 if (Symbols.empty() && SymbolList.empty()) {
1782 writeFileName(errs(), ArchiveName, ArchitectureName);
1783 errs() << "no symbols\n";
1784 }
1785
1786 sortAndPrintSymbolList(Obj, printName, ArchiveName, ArchitectureName);
1787 }
1788
1789 // checkMachOAndArchFlags() checks to see if the SymbolicFile is a Mach-O file
1790 // and if it is and there is a list of architecture flags is specified then
1791 // check to make sure this Mach-O file is one of those architectures or all
1792 // architectures was specificed. If not then an error is generated and this
1793 // routine returns false. Else it returns true.
checkMachOAndArchFlags(SymbolicFile * O,std::string & Filename)1794 static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
1795 auto *MachO = dyn_cast<MachOObjectFile>(O);
1796
1797 if (!MachO || ArchAll || ArchFlags.empty())
1798 return true;
1799
1800 MachO::mach_header H;
1801 MachO::mach_header_64 H_64;
1802 Triple T;
1803 const char *McpuDefault, *ArchFlag;
1804 if (MachO->is64Bit()) {
1805 H_64 = MachO->MachOObjectFile::getHeader64();
1806 T = MachOObjectFile::getArchTriple(H_64.cputype, H_64.cpusubtype,
1807 &McpuDefault, &ArchFlag);
1808 } else {
1809 H = MachO->MachOObjectFile::getHeader();
1810 T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype,
1811 &McpuDefault, &ArchFlag);
1812 }
1813 const std::string ArchFlagName(ArchFlag);
1814 if (none_of(ArchFlags, [&](const std::string &Name) {
1815 return Name == ArchFlagName;
1816 })) {
1817 error("No architecture specified", Filename);
1818 return false;
1819 }
1820 return true;
1821 }
1822
dumpSymbolNamesFromFile(std::string & Filename)1823 static void dumpSymbolNamesFromFile(std::string &Filename) {
1824 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1825 MemoryBuffer::getFileOrSTDIN(Filename);
1826 if (error(BufferOrErr.getError(), Filename))
1827 return;
1828
1829 LLVMContext Context;
1830 LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
1831 Expected<std::unique_ptr<Binary>> BinaryOrErr =
1832 createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
1833 if (!BinaryOrErr) {
1834 error(BinaryOrErr.takeError(), Filename);
1835 return;
1836 }
1837 Binary &Bin = *BinaryOrErr.get();
1838
1839 if (Archive *A = dyn_cast<Archive>(&Bin)) {
1840 if (ArchiveMap) {
1841 Archive::symbol_iterator I = A->symbol_begin();
1842 Archive::symbol_iterator E = A->symbol_end();
1843 if (I != E) {
1844 outs() << "Archive map\n";
1845 for (; I != E; ++I) {
1846 Expected<Archive::Child> C = I->getMember();
1847 if (!C) {
1848 error(C.takeError(), Filename);
1849 break;
1850 }
1851 Expected<StringRef> FileNameOrErr = C->getName();
1852 if (!FileNameOrErr) {
1853 error(FileNameOrErr.takeError(), Filename);
1854 break;
1855 }
1856 StringRef SymName = I->getName();
1857 outs() << SymName << " in " << FileNameOrErr.get() << "\n";
1858 }
1859 outs() << "\n";
1860 }
1861 }
1862
1863 {
1864 Error Err = Error::success();
1865 for (auto &C : A->children(Err)) {
1866 Expected<std::unique_ptr<Binary>> ChildOrErr =
1867 C.getAsBinary(ContextPtr);
1868 if (!ChildOrErr) {
1869 if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
1870 error(std::move(E), Filename, C);
1871 continue;
1872 }
1873 if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
1874 if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
1875 WithColor::warning(errs(), ToolName)
1876 << "sizes with -print-size for Mach-O files are always zero.\n";
1877 MachOPrintSizeWarning = true;
1878 }
1879 if (!checkMachOAndArchFlags(O, Filename))
1880 return;
1881 if (!PrintFileName) {
1882 outs() << "\n";
1883 if (isa<MachOObjectFile>(O)) {
1884 outs() << Filename << "(" << O->getFileName() << ")";
1885 } else
1886 outs() << O->getFileName();
1887 outs() << ":\n";
1888 }
1889 dumpSymbolNamesFromObject(*O, false, Filename);
1890 }
1891 }
1892 if (Err)
1893 error(std::move(Err), A->getFileName());
1894 }
1895 return;
1896 }
1897 if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
1898 // If we have a list of architecture flags specified dump only those.
1899 if (!ArchAll && !ArchFlags.empty()) {
1900 // Look for a slice in the universal binary that matches each ArchFlag.
1901 bool ArchFound;
1902 for (unsigned i = 0; i < ArchFlags.size(); ++i) {
1903 ArchFound = false;
1904 for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
1905 E = UB->end_objects();
1906 I != E; ++I) {
1907 if (ArchFlags[i] == I->getArchFlagName()) {
1908 ArchFound = true;
1909 Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
1910 I->getAsObjectFile();
1911 std::string ArchiveName;
1912 std::string ArchitectureName;
1913 ArchiveName.clear();
1914 ArchitectureName.clear();
1915 if (ObjOrErr) {
1916 ObjectFile &Obj = *ObjOrErr.get();
1917 if (ArchFlags.size() > 1) {
1918 if (PrintFileName)
1919 ArchitectureName = I->getArchFlagName();
1920 else
1921 outs() << "\n" << Obj.getFileName() << " (for architecture "
1922 << I->getArchFlagName() << ")"
1923 << ":\n";
1924 }
1925 dumpSymbolNamesFromObject(Obj, false, ArchiveName,
1926 ArchitectureName);
1927 } else if (auto E = isNotObjectErrorInvalidFileType(
1928 ObjOrErr.takeError())) {
1929 error(std::move(E), Filename, ArchFlags.size() > 1 ?
1930 StringRef(I->getArchFlagName()) : StringRef());
1931 continue;
1932 } else if (Expected<std::unique_ptr<Archive>> AOrErr =
1933 I->getAsArchive()) {
1934 std::unique_ptr<Archive> &A = *AOrErr;
1935 Error Err = Error::success();
1936 for (auto &C : A->children(Err)) {
1937 Expected<std::unique_ptr<Binary>> ChildOrErr =
1938 C.getAsBinary(ContextPtr);
1939 if (!ChildOrErr) {
1940 if (auto E = isNotObjectErrorInvalidFileType(
1941 ChildOrErr.takeError())) {
1942 error(std::move(E), Filename, C, ArchFlags.size() > 1 ?
1943 StringRef(I->getArchFlagName()) : StringRef());
1944 }
1945 continue;
1946 }
1947 if (SymbolicFile *O =
1948 dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
1949 if (PrintFileName) {
1950 ArchiveName = std::string(A->getFileName());
1951 if (ArchFlags.size() > 1)
1952 ArchitectureName = I->getArchFlagName();
1953 } else {
1954 outs() << "\n" << A->getFileName();
1955 outs() << "(" << O->getFileName() << ")";
1956 if (ArchFlags.size() > 1) {
1957 outs() << " (for architecture " << I->getArchFlagName()
1958 << ")";
1959 }
1960 outs() << ":\n";
1961 }
1962 dumpSymbolNamesFromObject(*O, false, ArchiveName,
1963 ArchitectureName);
1964 }
1965 }
1966 if (Err)
1967 error(std::move(Err), A->getFileName());
1968 } else {
1969 consumeError(AOrErr.takeError());
1970 error(Filename + " for architecture " +
1971 StringRef(I->getArchFlagName()) +
1972 " is not a Mach-O file or an archive file",
1973 "Mach-O universal file");
1974 }
1975 }
1976 }
1977 if (!ArchFound) {
1978 error(ArchFlags[i],
1979 "file: " + Filename + " does not contain architecture");
1980 return;
1981 }
1982 }
1983 return;
1984 }
1985 // No architecture flags were specified so if this contains a slice that
1986 // matches the host architecture dump only that.
1987 if (!ArchAll) {
1988 Triple HostTriple = MachOObjectFile::getHostArch();
1989 StringRef HostArchName = HostTriple.getArchName();
1990 for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
1991 E = UB->end_objects();
1992 I != E; ++I) {
1993 if (HostArchName == I->getArchFlagName()) {
1994 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
1995 std::string ArchiveName;
1996 if (ObjOrErr) {
1997 ObjectFile &Obj = *ObjOrErr.get();
1998 dumpSymbolNamesFromObject(Obj, false);
1999 } else if (auto E = isNotObjectErrorInvalidFileType(
2000 ObjOrErr.takeError())) {
2001 error(std::move(E), Filename);
2002 return;
2003 } else if (Expected<std::unique_ptr<Archive>> AOrErr =
2004 I->getAsArchive()) {
2005 std::unique_ptr<Archive> &A = *AOrErr;
2006 Error Err = Error::success();
2007 for (auto &C : A->children(Err)) {
2008 Expected<std::unique_ptr<Binary>> ChildOrErr =
2009 C.getAsBinary(ContextPtr);
2010 if (!ChildOrErr) {
2011 if (auto E = isNotObjectErrorInvalidFileType(
2012 ChildOrErr.takeError()))
2013 error(std::move(E), Filename, C);
2014 continue;
2015 }
2016 if (SymbolicFile *O =
2017 dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2018 if (PrintFileName)
2019 ArchiveName = std::string(A->getFileName());
2020 else
2021 outs() << "\n" << A->getFileName() << "(" << O->getFileName()
2022 << ")"
2023 << ":\n";
2024 dumpSymbolNamesFromObject(*O, false, ArchiveName);
2025 }
2026 }
2027 if (Err)
2028 error(std::move(Err), A->getFileName());
2029 } else {
2030 consumeError(AOrErr.takeError());
2031 error(Filename + " for architecture " +
2032 StringRef(I->getArchFlagName()) +
2033 " is not a Mach-O file or an archive file",
2034 "Mach-O universal file");
2035 }
2036 return;
2037 }
2038 }
2039 }
2040 // Either all architectures have been specified or none have been specified
2041 // and this does not contain the host architecture so dump all the slices.
2042 bool moreThanOneArch = UB->getNumberOfObjects() > 1;
2043 for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) {
2044 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = O.getAsObjectFile();
2045 std::string ArchiveName;
2046 std::string ArchitectureName;
2047 ArchiveName.clear();
2048 ArchitectureName.clear();
2049 if (ObjOrErr) {
2050 ObjectFile &Obj = *ObjOrErr.get();
2051 if (PrintFileName) {
2052 if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
2053 ArchitectureName = O.getArchFlagName();
2054 } else {
2055 if (moreThanOneArch)
2056 outs() << "\n";
2057 outs() << Obj.getFileName();
2058 if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
2059 outs() << " (for architecture " << O.getArchFlagName() << ")";
2060 outs() << ":\n";
2061 }
2062 dumpSymbolNamesFromObject(Obj, false, ArchiveName, ArchitectureName);
2063 } else if (auto E = isNotObjectErrorInvalidFileType(
2064 ObjOrErr.takeError())) {
2065 error(std::move(E), Filename, moreThanOneArch ?
2066 StringRef(O.getArchFlagName()) : StringRef());
2067 continue;
2068 } else if (Expected<std::unique_ptr<Archive>> AOrErr =
2069 O.getAsArchive()) {
2070 std::unique_ptr<Archive> &A = *AOrErr;
2071 Error Err = Error::success();
2072 for (auto &C : A->children(Err)) {
2073 Expected<std::unique_ptr<Binary>> ChildOrErr =
2074 C.getAsBinary(ContextPtr);
2075 if (!ChildOrErr) {
2076 if (auto E = isNotObjectErrorInvalidFileType(
2077 ChildOrErr.takeError()))
2078 error(std::move(E), Filename, C, moreThanOneArch ?
2079 StringRef(ArchitectureName) : StringRef());
2080 continue;
2081 }
2082 if (SymbolicFile *F = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2083 if (PrintFileName) {
2084 ArchiveName = std::string(A->getFileName());
2085 if (isa<MachOObjectFile>(F) && moreThanOneArch)
2086 ArchitectureName = O.getArchFlagName();
2087 } else {
2088 outs() << "\n" << A->getFileName();
2089 if (isa<MachOObjectFile>(F)) {
2090 outs() << "(" << F->getFileName() << ")";
2091 if (moreThanOneArch)
2092 outs() << " (for architecture " << O.getArchFlagName()
2093 << ")";
2094 } else
2095 outs() << ":" << F->getFileName();
2096 outs() << ":\n";
2097 }
2098 dumpSymbolNamesFromObject(*F, false, ArchiveName, ArchitectureName);
2099 }
2100 }
2101 if (Err)
2102 error(std::move(Err), A->getFileName());
2103 } else {
2104 consumeError(AOrErr.takeError());
2105 error(Filename + " for architecture " +
2106 StringRef(O.getArchFlagName()) +
2107 " is not a Mach-O file or an archive file",
2108 "Mach-O universal file");
2109 }
2110 }
2111 return;
2112 }
2113
2114 if (TapiUniversal *TU = dyn_cast<TapiUniversal>(&Bin)) {
2115 for (const TapiUniversal::ObjectForArch &I : TU->objects()) {
2116 StringRef ArchName = I.getArchFlagName();
2117 const bool ShowArch =
2118 ArchFlags.empty() ||
2119 any_of(ArchFlags, [&](StringRef Name) { return Name == ArchName; });
2120 if (!ShowArch)
2121 continue;
2122 if (!AddInlinedInfo && !I.isTopLevelLib())
2123 continue;
2124 if (auto ObjOrErr = I.getAsObjectFile()) {
2125 outs() << "\n"
2126 << I.getInstallName() << " (for architecture " << ArchName << ")"
2127 << ":\n";
2128 dumpSymbolNamesFromObject(*ObjOrErr.get(), false, {}, ArchName);
2129 } else if (Error E =
2130 isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
2131 error(std::move(E), Filename, ArchName);
2132 }
2133 }
2134
2135 return;
2136 }
2137
2138 if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin)) {
2139 if (!MachOPrintSizeWarning && PrintSize && isa<MachOObjectFile>(O)) {
2140 WithColor::warning(errs(), ToolName)
2141 << "sizes with --print-size for Mach-O files are always zero.\n";
2142 MachOPrintSizeWarning = true;
2143 }
2144 if (!checkMachOAndArchFlags(O, Filename))
2145 return;
2146 dumpSymbolNamesFromObject(*O, true);
2147 }
2148 }
2149
main(int argc,char ** argv)2150 int main(int argc, char **argv) {
2151 InitLLVM X(argc, argv);
2152 cl::HideUnrelatedOptions(NMCat);
2153 cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
2154
2155 // llvm-nm only reads binary files.
2156 if (error(sys::ChangeStdinToBinary()))
2157 return 1;
2158
2159 // These calls are needed so that we can read bitcode correctly.
2160 llvm::InitializeAllTargetInfos();
2161 llvm::InitializeAllTargetMCs();
2162 llvm::InitializeAllAsmParsers();
2163
2164 ToolName = argv[0];
2165 if (BSDFormat)
2166 OutputFormat = bsd;
2167 if (POSIXFormat)
2168 OutputFormat = posix;
2169 if (DarwinFormat)
2170 OutputFormat = darwin;
2171
2172 // The relative order of these is important. If you pass --size-sort it should
2173 // only print out the size. However, if you pass -S --size-sort, it should
2174 // print out both the size and address.
2175 if (SizeSort && !PrintSize)
2176 PrintAddress = false;
2177 if (OutputFormat == sysv || SizeSort)
2178 PrintSize = true;
2179 if (InputFilenames.empty())
2180 InputFilenames.push_back("a.out");
2181 if (InputFilenames.size() > 1)
2182 MultipleFiles = true;
2183
2184 // If both --demangle and --no-demangle are specified then pick the last one.
2185 if (NoDemangle.getPosition() > Demangle.getPosition())
2186 Demangle = !NoDemangle;
2187
2188 for (unsigned i = 0; i < ArchFlags.size(); ++i) {
2189 if (ArchFlags[i] == "all") {
2190 ArchAll = true;
2191 } else {
2192 if (!MachOObjectFile::isValidArch(ArchFlags[i]))
2193 error("Unknown architecture named '" + ArchFlags[i] + "'",
2194 "for the --arch option");
2195 }
2196 }
2197
2198 if (!SegSect.empty() && SegSect.size() != 2)
2199 error("bad number of arguments (must be two arguments)",
2200 "for the -s option");
2201
2202 if (NoDyldInfo && (AddDyldInfo || DyldInfoOnly))
2203 error("--no-dyldinfo can't be used with --add-dyldinfo or --dyldinfo-only");
2204
2205 llvm::for_each(InputFilenames, dumpSymbolNamesFromFile);
2206
2207 if (HadError)
2208 return 1;
2209 }
2210