• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- llvm-nm.cpp - Symbol table dumping utility for llvm ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This program is a utility that works like traditional Unix "nm", that is, it
10 // prints out the names of symbols in a bitcode or object file, along with some
11 // information about each symbol.
12 //
13 // This "nm" supports many of the features of GNU "nm", including its different
14 // output formats.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/BinaryFormat/COFF.h"
20 #include "llvm/Demangle/Demangle.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/LLVMContext.h"
23 #include "llvm/Object/Archive.h"
24 #include "llvm/Object/COFF.h"
25 #include "llvm/Object/COFFImportFile.h"
26 #include "llvm/Object/ELFObjectFile.h"
27 #include "llvm/Object/IRObjectFile.h"
28 #include "llvm/Object/MachO.h"
29 #include "llvm/Object/MachOUniversal.h"
30 #include "llvm/Object/ObjectFile.h"
31 #include "llvm/Object/TapiFile.h"
32 #include "llvm/Object/TapiUniversal.h"
33 #include "llvm/Object/Wasm.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/Format.h"
37 #include "llvm/Support/InitLLVM.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Program.h"
40 #include "llvm/Support/Signals.h"
41 #include "llvm/Support/TargetSelect.h"
42 #include "llvm/Support/WithColor.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include <vector>
45 
46 using namespace llvm;
47 using namespace object;
48 
49 namespace {
50 enum OutputFormatTy { bsd, sysv, posix, darwin };
51 
52 cl::OptionCategory NMCat("llvm-nm Options");
53 
54 cl::opt<OutputFormatTy> OutputFormat(
55     "format", cl::desc("Specify output format"),
56     cl::values(clEnumVal(bsd, "BSD format"), clEnumVal(sysv, "System V format"),
57                clEnumVal(posix, "POSIX.2 format"),
58                clEnumVal(darwin, "Darwin -m format")),
59     cl::init(bsd), cl::cat(NMCat));
60 cl::alias OutputFormat2("f", cl::desc("Alias for --format"),
61                         cl::aliasopt(OutputFormat));
62 
63 cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input files>"),
64                                      cl::ZeroOrMore);
65 
66 cl::opt<bool> UndefinedOnly("undefined-only",
67                             cl::desc("Show only undefined symbols"),
68                             cl::cat(NMCat));
69 cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
70                          cl::aliasopt(UndefinedOnly), cl::Grouping);
71 
72 cl::opt<bool> DynamicSyms("dynamic",
73                           cl::desc("Display the dynamic symbols instead "
74                                    "of normal symbols."),
75                           cl::cat(NMCat));
76 cl::alias DynamicSyms2("D", cl::desc("Alias for --dynamic"),
77                        cl::aliasopt(DynamicSyms), cl::Grouping);
78 
79 cl::opt<bool> DefinedOnly("defined-only", cl::desc("Show only defined symbols"),
80                           cl::cat(NMCat));
81 cl::alias DefinedOnly2("U", cl::desc("Alias for --defined-only"),
82                        cl::aliasopt(DefinedOnly), cl::Grouping);
83 
84 cl::opt<bool> ExternalOnly("extern-only",
85                            cl::desc("Show only external symbols"),
86                            cl::ZeroOrMore, cl::cat(NMCat));
87 cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
88                         cl::aliasopt(ExternalOnly), cl::Grouping,
89                         cl::ZeroOrMore);
90 
91 cl::opt<bool> NoWeakSymbols("no-weak", cl::desc("Show only non-weak symbols"),
92                             cl::cat(NMCat));
93 cl::alias NoWeakSymbols2("W", cl::desc("Alias for --no-weak"),
94                          cl::aliasopt(NoWeakSymbols), cl::Grouping);
95 
96 cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"), cl::Grouping,
97                         cl::cat(NMCat));
98 cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
99                           cl::Grouping, cl::cat(NMCat));
100 cl::alias Portability("portability", cl::desc("Alias for --format=posix"),
101                       cl::aliasopt(POSIXFormat), cl::NotHidden);
102 cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
103                            cl::Grouping, cl::cat(NMCat));
104 
105 static cl::list<std::string>
106     ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
107               cl::ZeroOrMore, cl::cat(NMCat));
108 bool ArchAll = false;
109 
110 cl::opt<bool> PrintFileName(
111     "print-file-name",
112     cl::desc("Precede each symbol with the object file it came from"),
113     cl::cat(NMCat));
114 
115 cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
116                          cl::aliasopt(PrintFileName), cl::Grouping);
117 cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
118                          cl::aliasopt(PrintFileName), cl::Grouping);
119 
120 cl::opt<bool> DebugSyms("debug-syms",
121                         cl::desc("Show all symbols, even debugger only"),
122                         cl::cat(NMCat));
123 cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
124                      cl::aliasopt(DebugSyms), cl::Grouping);
125 
126 cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"),
127                           cl::cat(NMCat));
128 cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
129                        cl::aliasopt(NumericSort), cl::Grouping);
130 cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
131                        cl::aliasopt(NumericSort), cl::Grouping);
132 
133 cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"),
134                      cl::cat(NMCat));
135 cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort),
136                   cl::Grouping);
137 
138 cl::opt<bool> Demangle("demangle", cl::ZeroOrMore,
139                        cl::desc("Demangle C++ symbol names"), cl::cat(NMCat));
140 cl::alias DemangleC("C", cl::desc("Alias for --demangle"),
141                     cl::aliasopt(Demangle), cl::Grouping);
142 cl::opt<bool> NoDemangle("no-demangle", cl::init(false), cl::ZeroOrMore,
143                          cl::desc("Don't demangle symbol names"),
144                          cl::cat(NMCat));
145 
146 cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"),
147                           cl::cat(NMCat));
148 cl::alias ReverseSortr("r", cl::desc("Alias for --reverse-sort"),
149                        cl::aliasopt(ReverseSort), cl::Grouping);
150 
151 cl::opt<bool> PrintSize("print-size",
152                         cl::desc("Show symbol size as well as address"),
153                         cl::cat(NMCat));
154 cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
155                      cl::aliasopt(PrintSize), cl::Grouping);
156 bool MachOPrintSizeWarning = false;
157 
158 cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"),
159                        cl::cat(NMCat));
160 
161 cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
162                              cl::desc("Exclude aliases from output"),
163                              cl::cat(NMCat));
164 
165 cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"),
166                          cl::cat(NMCat));
167 cl::alias ArchiveMaps("M", cl::desc("Alias for --print-armap"),
168                       cl::aliasopt(ArchiveMap), cl::Grouping);
169 
170 enum Radix { d, o, x };
171 cl::opt<Radix>
172     AddressRadix("radix", cl::desc("Radix (o/d/x) for printing symbol Values"),
173                  cl::values(clEnumVal(d, "decimal"), clEnumVal(o, "octal"),
174                             clEnumVal(x, "hexadecimal")),
175                  cl::init(x), cl::cat(NMCat));
176 cl::alias RadixAlias("t", cl::desc("Alias for --radix"),
177                      cl::aliasopt(AddressRadix));
178 
179 cl::opt<bool> JustSymbolName("just-symbol-name",
180                              cl::desc("Print just the symbol's name"),
181                              cl::cat(NMCat));
182 cl::alias JustSymbolNames("j", cl::desc("Alias for --just-symbol-name"),
183                           cl::aliasopt(JustSymbolName), cl::Grouping);
184 
185 cl::opt<bool>
186     SpecialSyms("special-syms",
187                 cl::desc("Do not filter special symbols from the output"),
188                 cl::cat(NMCat));
189 
190 cl::list<std::string> SegSect("s", cl::multi_val(2), cl::ZeroOrMore,
191                               cl::value_desc("segment section"), cl::Hidden,
192                               cl::desc("Dump only symbols from this segment "
193                                        "and section name, Mach-O only"),
194                               cl::cat(NMCat));
195 
196 cl::opt<bool> FormatMachOasHex("x",
197                                cl::desc("Print symbol entry in hex, "
198                                         "Mach-O only"),
199                                cl::Grouping, cl::cat(NMCat));
200 cl::opt<bool> AddDyldInfo("add-dyldinfo",
201                           cl::desc("Add symbols from the dyldinfo not already "
202                                    "in the symbol table, Mach-O only"),
203                           cl::cat(NMCat));
204 cl::opt<bool> NoDyldInfo("no-dyldinfo",
205                          cl::desc("Don't add any symbols from the dyldinfo, "
206                                   "Mach-O only"),
207                          cl::cat(NMCat));
208 cl::opt<bool> DyldInfoOnly("dyldinfo-only",
209                            cl::desc("Show only symbols from the dyldinfo, "
210                                     "Mach-O only"),
211                            cl::cat(NMCat));
212 
213 cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
214                             cl::desc("Disable LLVM bitcode reader"),
215                             cl::cat(NMCat));
216 
217 cl::opt<bool> AddInlinedInfo("add-inlinedinfo",
218                              cl::desc("Add symbols from the inlined libraries, "
219                                       "TBD(Mach-O) only"),
220                              cl::cat(NMCat));
221 
222 cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
223 
224 bool PrintAddress = true;
225 
226 bool MultipleFiles = false;
227 
228 bool HadError = false;
229 
230 std::string ToolName;
231 } // anonymous namespace
232 
error(Twine Message,Twine Path=Twine ())233 static void error(Twine Message, Twine Path = Twine()) {
234   HadError = true;
235   WithColor::error(errs(), ToolName) << Path << ": " << Message << ".\n";
236 }
237 
error(std::error_code EC,Twine Path=Twine ())238 static bool error(std::error_code EC, Twine Path = Twine()) {
239   if (EC) {
240     error(EC.message(), Path);
241     return true;
242   }
243   return false;
244 }
245 
246 // This version of error() prints the archive name and member name, for example:
247 // "libx.a(foo.o)" after the ToolName before the error message.  It sets
248 // HadError but returns allowing the code to move on to other archive members.
error(llvm::Error E,StringRef FileName,const Archive::Child & C,StringRef ArchitectureName=StringRef ())249 static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
250                   StringRef ArchitectureName = StringRef()) {
251   HadError = true;
252   WithColor::error(errs(), ToolName) << FileName;
253 
254   Expected<StringRef> NameOrErr = C.getName();
255   // TODO: if we have a error getting the name then it would be nice to print
256   // the index of which archive member this is and or its offset in the
257   // archive instead of "???" as the name.
258   if (!NameOrErr) {
259     consumeError(NameOrErr.takeError());
260     errs() << "(" << "???" << ")";
261   } else
262     errs() << "(" << NameOrErr.get() << ")";
263 
264   if (!ArchitectureName.empty())
265     errs() << " (for architecture " << ArchitectureName << ") ";
266 
267   std::string Buf;
268   raw_string_ostream OS(Buf);
269   logAllUnhandledErrors(std::move(E), OS);
270   OS.flush();
271   errs() << " " << Buf << "\n";
272 }
273 
274 // This version of error() prints the file name and which architecture slice it
275 // is from, for example: "foo.o (for architecture i386)" after the ToolName
276 // before the error message.  It sets HadError but returns allowing the code to
277 // move on to other architecture slices.
error(llvm::Error E,StringRef FileName,StringRef ArchitectureName=StringRef ())278 static void error(llvm::Error E, StringRef FileName,
279                   StringRef ArchitectureName = StringRef()) {
280   HadError = true;
281   WithColor::error(errs(), ToolName) << FileName;
282 
283   if (!ArchitectureName.empty())
284     errs() << " (for architecture " << ArchitectureName << ") ";
285 
286   std::string Buf;
287   raw_string_ostream OS(Buf);
288   logAllUnhandledErrors(std::move(E), OS);
289   OS.flush();
290   errs() << " " << Buf << "\n";
291 }
292 
293 namespace {
294 struct NMSymbol {
295   uint64_t Address;
296   uint64_t Size;
297   char TypeChar;
298   StringRef Name;
299   StringRef SectionName;
300   StringRef TypeName;
301   BasicSymbolRef Sym;
302   // The Sym field above points to the native symbol in the object file,
303   // for Mach-O when we are creating symbols from the dyld info the above
304   // pointer is null as there is no native symbol.  In these cases the fields
305   // below are filled in to represent what would have been a Mach-O nlist
306   // native symbol.
307   uint32_t SymFlags;
308   SectionRef Section;
309   uint8_t NType;
310   uint8_t NSect;
311   uint16_t NDesc;
312   StringRef IndirectName;
313 };
314 } // anonymous namespace
315 
compareSymbolAddress(const NMSymbol & A,const NMSymbol & B)316 static bool compareSymbolAddress(const NMSymbol &A, const NMSymbol &B) {
317   bool ADefined;
318   // Symbol flags have been checked in the caller.
319   if (A.Sym.getRawDataRefImpl().p) {
320     uint32_t AFlags = cantFail(A.Sym.getFlags());
321     ADefined = !(AFlags & SymbolRef::SF_Undefined);
322   } else {
323     ADefined = A.TypeChar != 'U';
324   }
325   bool BDefined;
326   // Symbol flags have been checked in the caller.
327   if (B.Sym.getRawDataRefImpl().p) {
328     uint32_t BFlags = cantFail(B.Sym.getFlags());
329     BDefined = !(BFlags & SymbolRef::SF_Undefined);
330   } else {
331     BDefined = B.TypeChar != 'U';
332   }
333   return std::make_tuple(ADefined, A.Address, A.Name, A.Size) <
334          std::make_tuple(BDefined, B.Address, B.Name, B.Size);
335 }
336 
compareSymbolSize(const NMSymbol & A,const NMSymbol & B)337 static bool compareSymbolSize(const NMSymbol &A, const NMSymbol &B) {
338   return std::make_tuple(A.Size, A.Name, A.Address) <
339          std::make_tuple(B.Size, B.Name, B.Address);
340 }
341 
compareSymbolName(const NMSymbol & A,const NMSymbol & B)342 static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) {
343   return std::make_tuple(A.Name, A.Size, A.Address) <
344          std::make_tuple(B.Name, B.Size, B.Address);
345 }
346 
isSymbolList64Bit(SymbolicFile & Obj)347 static char isSymbolList64Bit(SymbolicFile &Obj) {
348   if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj))
349     return Triple(IRObj->getTargetTriple()).isArch64Bit();
350   if (isa<COFFObjectFile>(Obj) || isa<COFFImportFile>(Obj))
351     return false;
352   if (isa<WasmObjectFile>(Obj))
353     return false;
354   if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
355     return Tapi->is64Bit();
356   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
357     return MachO->is64Bit();
358   return cast<ELFObjectFileBase>(Obj).getBytesInAddress() == 8;
359 }
360 
361 static StringRef CurrentFilename;
362 static std::vector<NMSymbol> SymbolList;
363 
364 static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
365 
366 // darwinPrintSymbol() is used to print a symbol from a Mach-O file when the
367 // the OutputFormat is darwin or we are printing Mach-O symbols in hex.  For
368 // the darwin format it produces the same output as darwin's nm(1) -m output
369 // and when printing Mach-O symbols in hex it produces the same output as
370 // darwin's nm(1) -x format.
darwinPrintSymbol(SymbolicFile & Obj,const NMSymbol & S,char * SymbolAddrStr,const char * printBlanks,const char * printDashes,const char * printFormat)371 static void darwinPrintSymbol(SymbolicFile &Obj, const NMSymbol &S,
372                               char *SymbolAddrStr, const char *printBlanks,
373                               const char *printDashes,
374                               const char *printFormat) {
375   MachO::mach_header H;
376   MachO::mach_header_64 H_64;
377   uint32_t Filetype = MachO::MH_OBJECT;
378   uint32_t Flags = 0;
379   uint8_t NType = 0;
380   uint8_t NSect = 0;
381   uint16_t NDesc = 0;
382   uint32_t NStrx = 0;
383   uint64_t NValue = 0;
384   MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
385   if (Obj.isIR()) {
386     uint32_t SymFlags = cantFail(S.Sym.getFlags());
387     if (SymFlags & SymbolRef::SF_Global)
388       NType |= MachO::N_EXT;
389     if (SymFlags & SymbolRef::SF_Hidden)
390       NType |= MachO::N_PEXT;
391     if (SymFlags & SymbolRef::SF_Undefined)
392       NType |= MachO::N_EXT | MachO::N_UNDF;
393     else {
394       // Here we have a symbol definition.  So to fake out a section name we
395       // use 1, 2 and 3 for section numbers.  See below where they are used to
396       // print out fake section names.
397       NType |= MachO::N_SECT;
398       if (SymFlags & SymbolRef::SF_Const)
399         NSect = 3;
400       else if (SymFlags & SymbolRef::SF_Executable)
401         NSect = 1;
402       else
403         NSect = 2;
404     }
405     if (SymFlags & SymbolRef::SF_Weak)
406       NDesc |= MachO::N_WEAK_DEF;
407   } else {
408     DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
409     if (MachO->is64Bit()) {
410       H_64 = MachO->MachOObjectFile::getHeader64();
411       Filetype = H_64.filetype;
412       Flags = H_64.flags;
413       if (SymDRI.p){
414         MachO::nlist_64 STE_64 = MachO->getSymbol64TableEntry(SymDRI);
415         NType = STE_64.n_type;
416         NSect = STE_64.n_sect;
417         NDesc = STE_64.n_desc;
418         NStrx = STE_64.n_strx;
419         NValue = STE_64.n_value;
420       } else {
421         NType = S.NType;
422         NSect = S.NSect;
423         NDesc = S.NDesc;
424         NStrx = 0;
425         NValue = S.Address;
426       }
427     } else {
428       H = MachO->MachOObjectFile::getHeader();
429       Filetype = H.filetype;
430       Flags = H.flags;
431       if (SymDRI.p){
432         MachO::nlist STE = MachO->getSymbolTableEntry(SymDRI);
433         NType = STE.n_type;
434         NSect = STE.n_sect;
435         NDesc = STE.n_desc;
436         NStrx = STE.n_strx;
437         NValue = STE.n_value;
438       } else {
439         NType = S.NType;
440         NSect = S.NSect;
441         NDesc = S.NDesc;
442         NStrx = 0;
443         NValue = S.Address;
444       }
445     }
446   }
447 
448   // If we are printing Mach-O symbols in hex do that and return.
449   if (FormatMachOasHex) {
450     outs() << format(printFormat, NValue) << ' '
451            << format("%02x %02x %04x %08x", NType, NSect, NDesc, NStrx) << ' '
452            << S.Name;
453     if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
454       outs() << " (indirect for ";
455       outs() << format(printFormat, NValue) << ' ';
456       StringRef IndirectName;
457       if (S.Sym.getRawDataRefImpl().p) {
458         if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
459           outs() << "?)";
460         else
461           outs() << IndirectName << ")";
462       } else
463         outs() << S.IndirectName << ")";
464     }
465     outs() << "\n";
466     return;
467   }
468 
469   if (PrintAddress) {
470     if ((NType & MachO::N_TYPE) == MachO::N_INDR)
471       strcpy(SymbolAddrStr, printBlanks);
472     if (Obj.isIR() && (NType & MachO::N_TYPE) == MachO::N_TYPE)
473       strcpy(SymbolAddrStr, printDashes);
474     outs() << SymbolAddrStr << ' ';
475   }
476 
477   switch (NType & MachO::N_TYPE) {
478   case MachO::N_UNDF:
479     if (NValue != 0) {
480       outs() << "(common) ";
481       if (MachO::GET_COMM_ALIGN(NDesc) != 0)
482         outs() << "(alignment 2^" << (int)MachO::GET_COMM_ALIGN(NDesc) << ") ";
483     } else {
484       if ((NType & MachO::N_TYPE) == MachO::N_PBUD)
485         outs() << "(prebound ";
486       else
487         outs() << "(";
488       if ((NDesc & MachO::REFERENCE_TYPE) ==
489           MachO::REFERENCE_FLAG_UNDEFINED_LAZY)
490         outs() << "undefined [lazy bound]) ";
491       else if ((NDesc & MachO::REFERENCE_TYPE) ==
492                MachO::REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY)
493         outs() << "undefined [private lazy bound]) ";
494       else if ((NDesc & MachO::REFERENCE_TYPE) ==
495                MachO::REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY)
496         outs() << "undefined [private]) ";
497       else
498         outs() << "undefined) ";
499     }
500     break;
501   case MachO::N_ABS:
502     outs() << "(absolute) ";
503     break;
504   case MachO::N_INDR:
505     outs() << "(indirect) ";
506     break;
507   case MachO::N_SECT: {
508     if (Obj.isIR()) {
509       // For llvm bitcode files print out a fake section name using the values
510       // use 1, 2 and 3 for section numbers as set above.
511       if (NSect == 1)
512         outs() << "(LTO,CODE) ";
513       else if (NSect == 2)
514         outs() << "(LTO,DATA) ";
515       else if (NSect == 3)
516         outs() << "(LTO,RODATA) ";
517       else
518         outs() << "(?,?) ";
519       break;
520     }
521     section_iterator Sec = SectionRef();
522     if (S.Sym.getRawDataRefImpl().p) {
523       Expected<section_iterator> SecOrErr =
524           MachO->getSymbolSection(S.Sym.getRawDataRefImpl());
525       if (!SecOrErr) {
526         consumeError(SecOrErr.takeError());
527         outs() << "(?,?) ";
528         break;
529       }
530       Sec = *SecOrErr;
531       if (Sec == MachO->section_end()) {
532         outs() << "(?,?) ";
533         break;
534       }
535     } else {
536       Sec = S.Section;
537     }
538     DataRefImpl Ref = Sec->getRawDataRefImpl();
539     StringRef SectionName;
540     if (Expected<StringRef> NameOrErr = MachO->getSectionName(Ref))
541       SectionName = *NameOrErr;
542     StringRef SegmentName = MachO->getSectionFinalSegmentName(Ref);
543     outs() << "(" << SegmentName << "," << SectionName << ") ";
544     break;
545   }
546   default:
547     outs() << "(?) ";
548     break;
549   }
550 
551   if (NType & MachO::N_EXT) {
552     if (NDesc & MachO::REFERENCED_DYNAMICALLY)
553       outs() << "[referenced dynamically] ";
554     if (NType & MachO::N_PEXT) {
555       if ((NDesc & MachO::N_WEAK_DEF) == MachO::N_WEAK_DEF)
556         outs() << "weak private external ";
557       else
558         outs() << "private external ";
559     } else {
560       if ((NDesc & MachO::N_WEAK_REF) == MachO::N_WEAK_REF ||
561           (NDesc & MachO::N_WEAK_DEF) == MachO::N_WEAK_DEF) {
562         if ((NDesc & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF)) ==
563             (MachO::N_WEAK_REF | MachO::N_WEAK_DEF))
564           outs() << "weak external automatically hidden ";
565         else
566           outs() << "weak external ";
567       } else
568         outs() << "external ";
569     }
570   } else {
571     if (NType & MachO::N_PEXT)
572       outs() << "non-external (was a private external) ";
573     else
574       outs() << "non-external ";
575   }
576 
577   if (Filetype == MachO::MH_OBJECT) {
578     if (NDesc & MachO::N_NO_DEAD_STRIP)
579       outs() << "[no dead strip] ";
580     if ((NType & MachO::N_TYPE) != MachO::N_UNDF &&
581         NDesc & MachO::N_SYMBOL_RESOLVER)
582       outs() << "[symbol resolver] ";
583     if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_ALT_ENTRY)
584       outs() << "[alt entry] ";
585     if ((NType & MachO::N_TYPE) != MachO::N_UNDF && NDesc & MachO::N_COLD_FUNC)
586       outs() << "[cold func] ";
587   }
588 
589   if ((NDesc & MachO::N_ARM_THUMB_DEF) == MachO::N_ARM_THUMB_DEF)
590     outs() << "[Thumb] ";
591 
592   if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
593     outs() << S.Name << " (for ";
594     StringRef IndirectName;
595     if (MachO) {
596       if (S.Sym.getRawDataRefImpl().p) {
597         if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
598           outs() << "?)";
599         else
600           outs() << IndirectName << ")";
601       } else
602         outs() << S.IndirectName << ")";
603     } else
604       outs() << "?)";
605   } else
606     outs() << S.Name;
607 
608   if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
609       (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
610        (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
611     uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
612     if (LibraryOrdinal != 0) {
613       if (LibraryOrdinal == MachO::EXECUTABLE_ORDINAL)
614         outs() << " (from executable)";
615       else if (LibraryOrdinal == MachO::DYNAMIC_LOOKUP_ORDINAL)
616         outs() << " (dynamically looked up)";
617       else {
618         StringRef LibraryName;
619         if (!MachO ||
620             MachO->getLibraryShortNameByIndex(LibraryOrdinal - 1, LibraryName))
621           outs() << " (from bad library ordinal " << LibraryOrdinal << ")";
622         else
623           outs() << " (from " << LibraryName << ")";
624       }
625     }
626   }
627 
628   outs() << "\n";
629 }
630 
631 // Table that maps Darwin's Mach-O stab constants to strings to allow printing.
632 struct DarwinStabName {
633   uint8_t NType;
634   const char *Name;
635 };
636 static const struct DarwinStabName DarwinStabNames[] = {
637     {MachO::N_GSYM, "GSYM"},
638     {MachO::N_FNAME, "FNAME"},
639     {MachO::N_FUN, "FUN"},
640     {MachO::N_STSYM, "STSYM"},
641     {MachO::N_LCSYM, "LCSYM"},
642     {MachO::N_BNSYM, "BNSYM"},
643     {MachO::N_PC, "PC"},
644     {MachO::N_AST, "AST"},
645     {MachO::N_OPT, "OPT"},
646     {MachO::N_RSYM, "RSYM"},
647     {MachO::N_SLINE, "SLINE"},
648     {MachO::N_ENSYM, "ENSYM"},
649     {MachO::N_SSYM, "SSYM"},
650     {MachO::N_SO, "SO"},
651     {MachO::N_OSO, "OSO"},
652     {MachO::N_LSYM, "LSYM"},
653     {MachO::N_BINCL, "BINCL"},
654     {MachO::N_SOL, "SOL"},
655     {MachO::N_PARAMS, "PARAM"},
656     {MachO::N_VERSION, "VERS"},
657     {MachO::N_OLEVEL, "OLEV"},
658     {MachO::N_PSYM, "PSYM"},
659     {MachO::N_EINCL, "EINCL"},
660     {MachO::N_ENTRY, "ENTRY"},
661     {MachO::N_LBRAC, "LBRAC"},
662     {MachO::N_EXCL, "EXCL"},
663     {MachO::N_RBRAC, "RBRAC"},
664     {MachO::N_BCOMM, "BCOMM"},
665     {MachO::N_ECOMM, "ECOMM"},
666     {MachO::N_ECOML, "ECOML"},
667     {MachO::N_LENG, "LENG"},
668 };
669 
getDarwinStabString(uint8_t NType)670 static const char *getDarwinStabString(uint8_t NType) {
671   for (auto I : makeArrayRef(DarwinStabNames))
672     if (I.NType == NType)
673       return I.Name;
674   return nullptr;
675 }
676 
677 // darwinPrintStab() prints the n_sect, n_desc along with a symbolic name of
678 // a stab n_type value in a Mach-O file.
darwinPrintStab(MachOObjectFile * MachO,const NMSymbol & S)679 static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
680   MachO::nlist_64 STE_64;
681   MachO::nlist STE;
682   uint8_t NType;
683   uint8_t NSect;
684   uint16_t NDesc;
685   DataRefImpl SymDRI = S.Sym.getRawDataRefImpl();
686   if (MachO->is64Bit()) {
687     STE_64 = MachO->getSymbol64TableEntry(SymDRI);
688     NType = STE_64.n_type;
689     NSect = STE_64.n_sect;
690     NDesc = STE_64.n_desc;
691   } else {
692     STE = MachO->getSymbolTableEntry(SymDRI);
693     NType = STE.n_type;
694     NSect = STE.n_sect;
695     NDesc = STE.n_desc;
696   }
697 
698   outs() << format(" %02x %04x ", NSect, NDesc);
699   if (const char *stabString = getDarwinStabString(NType))
700     outs() << format("%5.5s", stabString);
701   else
702     outs() << format("   %02x", NType);
703 }
704 
demangle(StringRef Name,bool StripUnderscore)705 static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
706   if (StripUnderscore && !Name.empty() && Name[0] == '_')
707     Name = Name.substr(1);
708 
709   if (!Name.startswith("_Z"))
710     return None;
711 
712   int Status;
713   char *Undecorated =
714       itaniumDemangle(Name.str().c_str(), nullptr, nullptr, &Status);
715   if (Status != 0)
716     return None;
717 
718   std::string S(Undecorated);
719   free(Undecorated);
720   return S;
721 }
722 
symbolIsDefined(const NMSymbol & Sym)723 static bool symbolIsDefined(const NMSymbol &Sym) {
724   return Sym.TypeChar != 'U' && Sym.TypeChar != 'w' && Sym.TypeChar != 'v';
725 }
726 
writeFileName(raw_ostream & S,StringRef ArchiveName,StringRef ArchitectureName)727 static void writeFileName(raw_ostream &S, StringRef ArchiveName,
728                           StringRef ArchitectureName) {
729   if (!ArchitectureName.empty())
730     S << "(for architecture " << ArchitectureName << "):";
731   if (OutputFormat == posix && !ArchiveName.empty())
732     S << ArchiveName << "[" << CurrentFilename << "]: ";
733   else {
734     if (!ArchiveName.empty())
735       S << ArchiveName << ":";
736     S << CurrentFilename << ": ";
737   }
738 }
739 
isSpecialSym(SymbolicFile & Obj,StringRef Name)740 static bool isSpecialSym(SymbolicFile &Obj, StringRef Name) {
741   auto *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj);
742   if (!ELFObj)
743     return false;
744   uint16_t EMachine = ELFObj->getEMachine();
745   if (EMachine != ELF::EM_ARM && EMachine != ELF::EM_AARCH64)
746     return false;
747   return !Name.empty() && Name[0] == '$';
748 }
749 
sortAndPrintSymbolList(SymbolicFile & Obj,bool printName,StringRef ArchiveName,StringRef ArchitectureName)750 static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
751                                    StringRef ArchiveName,
752                                    StringRef ArchitectureName) {
753   if (!NoSort) {
754     using Comparator = bool (*)(const NMSymbol &, const NMSymbol &);
755     Comparator Cmp;
756     if (NumericSort)
757       Cmp = &compareSymbolAddress;
758     else if (SizeSort)
759       Cmp = &compareSymbolSize;
760     else
761       Cmp = &compareSymbolName;
762 
763     if (ReverseSort)
764       llvm::sort(SymbolList, [=](const NMSymbol &A, const NMSymbol &B) -> bool {
765         return Cmp(B, A);
766       });
767     else
768       llvm::sort(SymbolList, Cmp);
769   }
770 
771   if (!PrintFileName) {
772     if (OutputFormat == posix && MultipleFiles && printName) {
773       outs() << '\n' << CurrentFilename << ":\n";
774     } else if (OutputFormat == bsd && MultipleFiles && printName) {
775       outs() << "\n" << CurrentFilename << ":\n";
776     } else if (OutputFormat == sysv) {
777       outs() << "\n\nSymbols from " << CurrentFilename << ":\n\n";
778       if (isSymbolList64Bit(Obj))
779         outs() << "Name                  Value           Class        Type"
780                << "         Size             Line  Section\n";
781       else
782         outs() << "Name                  Value   Class        Type"
783                << "         Size     Line  Section\n";
784     }
785   }
786 
787   const char *printBlanks, *printDashes, *printFormat;
788   if (isSymbolList64Bit(Obj)) {
789     printBlanks = "                ";
790     printDashes = "----------------";
791     switch (AddressRadix) {
792     case Radix::o:
793       printFormat = OutputFormat == posix ? "%" PRIo64 : "%016" PRIo64;
794       break;
795     case Radix::x:
796       printFormat = OutputFormat == posix ? "%" PRIx64 : "%016" PRIx64;
797       break;
798     default:
799       printFormat = OutputFormat == posix ? "%" PRId64 : "%016" PRId64;
800     }
801   } else {
802     printBlanks = "        ";
803     printDashes = "--------";
804     switch (AddressRadix) {
805     case Radix::o:
806       printFormat = OutputFormat == posix ? "%" PRIo64 : "%08" PRIo64;
807       break;
808     case Radix::x:
809       printFormat = OutputFormat == posix ? "%" PRIx64 : "%08" PRIx64;
810       break;
811     default:
812       printFormat = OutputFormat == posix ? "%" PRId64 : "%08" PRId64;
813     }
814   }
815 
816   for (const NMSymbol &S : SymbolList) {
817     uint32_t SymFlags;
818     std::string Name = S.Name.str();
819     MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
820     if (Demangle) {
821       if (Optional<std::string> Opt = demangle(S.Name, MachO))
822         Name = *Opt;
823     }
824     if (S.Sym.getRawDataRefImpl().p) {
825       Expected<uint32_t> SymFlagsOrErr = S.Sym.getFlags();
826       if (!SymFlagsOrErr) {
827         // TODO: Test this error.
828         error(SymFlagsOrErr.takeError(), Obj.getFileName());
829         return;
830       }
831       SymFlags = *SymFlagsOrErr;
832     } else
833       SymFlags = S.SymFlags;
834 
835     bool Undefined = SymFlags & SymbolRef::SF_Undefined;
836     bool Global = SymFlags & SymbolRef::SF_Global;
837     bool Weak = SymFlags & SymbolRef::SF_Weak;
838     if ((!Undefined && UndefinedOnly) || (Undefined && DefinedOnly) ||
839         (!Global && ExternalOnly) || (Weak && NoWeakSymbols) ||
840         (!SpecialSyms && isSpecialSym(Obj, Name)))
841       continue;
842     if (PrintFileName)
843       writeFileName(outs(), ArchiveName, ArchitectureName);
844     if ((JustSymbolName ||
845          (UndefinedOnly && MachO && OutputFormat != darwin)) &&
846         OutputFormat != posix) {
847       outs() << Name << "\n";
848       continue;
849     }
850 
851     char SymbolAddrStr[23], SymbolSizeStr[23];
852 
853     // If the format is SysV or the symbol isn't defined, then print spaces.
854     if (OutputFormat == sysv || !symbolIsDefined(S)) {
855       if (OutputFormat == posix) {
856         format(printFormat, S.Address)
857             .print(SymbolAddrStr, sizeof(SymbolAddrStr));
858         format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
859       } else {
860         strcpy(SymbolAddrStr, printBlanks);
861         strcpy(SymbolSizeStr, printBlanks);
862       }
863     }
864 
865     if (symbolIsDefined(S)) {
866       // Otherwise, print the symbol address and size.
867       if (Obj.isIR())
868         strcpy(SymbolAddrStr, printDashes);
869       else if (MachO && S.TypeChar == 'I')
870         strcpy(SymbolAddrStr, printBlanks);
871       else
872         format(printFormat, S.Address)
873             .print(SymbolAddrStr, sizeof(SymbolAddrStr));
874       format(printFormat, S.Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
875     }
876 
877     // If OutputFormat is darwin or we are printing Mach-O symbols in hex and
878     // we have a MachOObjectFile, call darwinPrintSymbol to print as darwin's
879     // nm(1) -m output or hex, else if OutputFormat is darwin or we are
880     // printing Mach-O symbols in hex and not a Mach-O object fall back to
881     // OutputFormat bsd (see below).
882     if ((OutputFormat == darwin || FormatMachOasHex) && (MachO || Obj.isIR())) {
883       darwinPrintSymbol(Obj, S, SymbolAddrStr, printBlanks, printDashes,
884                         printFormat);
885     } else if (OutputFormat == posix) {
886       outs() << Name << " " << S.TypeChar << " " << SymbolAddrStr << " "
887              << (MachO ? "0" : SymbolSizeStr) << "\n";
888     } else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
889       if (PrintAddress)
890         outs() << SymbolAddrStr << ' ';
891       if (PrintSize)
892         outs() << SymbolSizeStr << ' ';
893       outs() << S.TypeChar;
894       if (S.TypeChar == '-' && MachO)
895         darwinPrintStab(MachO, S);
896       outs() << " " << Name;
897       if (S.TypeChar == 'I' && MachO) {
898         outs() << " (indirect for ";
899         if (S.Sym.getRawDataRefImpl().p) {
900           StringRef IndirectName;
901           if (MachO->getIndirectName(S.Sym.getRawDataRefImpl(), IndirectName))
902             outs() << "?)";
903           else
904             outs() << IndirectName << ")";
905         } else
906           outs() << S.IndirectName << ")";
907       }
908       outs() << "\n";
909     } else if (OutputFormat == sysv) {
910       outs() << left_justify(Name, 20) << "|" << SymbolAddrStr << "|   "
911              << S.TypeChar << "  |" << right_justify(S.TypeName, 18) << "|"
912              << SymbolSizeStr << "|     |" << S.SectionName << "\n";
913     }
914   }
915 
916   SymbolList.clear();
917 }
918 
getSymbolNMTypeChar(ELFObjectFileBase & Obj,basic_symbol_iterator I)919 static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
920                                 basic_symbol_iterator I) {
921   // OK, this is ELF
922   elf_symbol_iterator SymI(I);
923 
924   Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
925   if (!SecIOrErr) {
926     consumeError(SecIOrErr.takeError());
927     return '?';
928   }
929 
930   uint8_t Binding = SymI->getBinding();
931   if (Binding == ELF::STB_GNU_UNIQUE)
932     return 'u';
933 
934   assert(Binding != ELF::STB_WEAK && "STB_WEAK not tested in calling function");
935   if (Binding != ELF::STB_GLOBAL && Binding != ELF::STB_LOCAL)
936     return '?';
937 
938   elf_section_iterator SecI = *SecIOrErr;
939   if (SecI != Obj.section_end()) {
940     uint32_t Type = SecI->getType();
941     uint64_t Flags = SecI->getFlags();
942     if (Flags & ELF::SHF_EXECINSTR)
943       return 't';
944     if (Type == ELF::SHT_NOBITS)
945       return 'b';
946     if (Flags & ELF::SHF_ALLOC)
947       return Flags & ELF::SHF_WRITE ? 'd' : 'r';
948 
949     auto NameOrErr = SecI->getName();
950     if (!NameOrErr) {
951       consumeError(NameOrErr.takeError());
952       return '?';
953     }
954     if ((*NameOrErr).startswith(".debug"))
955       return 'N';
956     if (!(Flags & ELF::SHF_WRITE))
957       return 'n';
958   }
959 
960   return '?';
961 }
962 
getSymbolNMTypeChar(COFFObjectFile & Obj,symbol_iterator I)963 static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
964   COFFSymbolRef Symb = Obj.getCOFFSymbol(*I);
965   // OK, this is COFF.
966   symbol_iterator SymI(I);
967 
968   Expected<StringRef> Name = SymI->getName();
969   if (!Name) {
970     consumeError(Name.takeError());
971     return '?';
972   }
973 
974   char Ret = StringSwitch<char>(*Name)
975                  .StartsWith(".debug", 'N')
976                  .StartsWith(".sxdata", 'N')
977                  .Default('?');
978 
979   if (Ret != '?')
980     return Ret;
981 
982   uint32_t Characteristics = 0;
983   if (!COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
984     Expected<section_iterator> SecIOrErr = SymI->getSection();
985     if (!SecIOrErr) {
986       consumeError(SecIOrErr.takeError());
987       return '?';
988     }
989     section_iterator SecI = *SecIOrErr;
990     const coff_section *Section = Obj.getCOFFSection(*SecI);
991     Characteristics = Section->Characteristics;
992     if (Expected<StringRef> NameOrErr = Obj.getSectionName(Section))
993       if (NameOrErr->startswith(".idata"))
994         return 'i';
995   }
996 
997   switch (Symb.getSectionNumber()) {
998   case COFF::IMAGE_SYM_DEBUG:
999     return 'n';
1000   default:
1001     // Check section type.
1002     if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
1003       return 't';
1004     if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
1005       return Characteristics & COFF::IMAGE_SCN_MEM_WRITE ? 'd' : 'r';
1006     if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
1007       return 'b';
1008     if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
1009       return 'i';
1010     // Check for section symbol.
1011     if (Symb.isSectionDefinition())
1012       return 's';
1013   }
1014 
1015   return '?';
1016 }
1017 
getSymbolNMTypeChar(COFFImportFile & Obj)1018 static char getSymbolNMTypeChar(COFFImportFile &Obj) {
1019   switch (Obj.getCOFFImportHeader()->getType()) {
1020   case COFF::IMPORT_CODE:
1021     return 't';
1022   case COFF::IMPORT_DATA:
1023     return 'd';
1024   case COFF::IMPORT_CONST:
1025     return 'r';
1026   }
1027   return '?';
1028 }
1029 
getSymbolNMTypeChar(MachOObjectFile & Obj,basic_symbol_iterator I)1030 static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
1031   DataRefImpl Symb = I->getRawDataRefImpl();
1032   uint8_t NType = Obj.is64Bit() ? Obj.getSymbol64TableEntry(Symb).n_type
1033                                 : Obj.getSymbolTableEntry(Symb).n_type;
1034 
1035   if (NType & MachO::N_STAB)
1036     return '-';
1037 
1038   switch (NType & MachO::N_TYPE) {
1039   case MachO::N_ABS:
1040     return 's';
1041   case MachO::N_INDR:
1042     return 'i';
1043   case MachO::N_SECT: {
1044     Expected<section_iterator> SecOrErr = Obj.getSymbolSection(Symb);
1045     if (!SecOrErr) {
1046       consumeError(SecOrErr.takeError());
1047       return 's';
1048     }
1049     section_iterator Sec = *SecOrErr;
1050     if (Sec == Obj.section_end())
1051       return 's';
1052     DataRefImpl Ref = Sec->getRawDataRefImpl();
1053     StringRef SectionName;
1054     if (Expected<StringRef> NameOrErr = Obj.getSectionName(Ref))
1055       SectionName = *NameOrErr;
1056     StringRef SegmentName = Obj.getSectionFinalSegmentName(Ref);
1057     if (Obj.is64Bit() && Obj.getHeader64().filetype == MachO::MH_KEXT_BUNDLE &&
1058         SegmentName == "__TEXT_EXEC" && SectionName == "__text")
1059       return 't';
1060     if (SegmentName == "__TEXT" && SectionName == "__text")
1061       return 't';
1062     if (SegmentName == "__DATA" && SectionName == "__data")
1063       return 'd';
1064     if (SegmentName == "__DATA" && SectionName == "__bss")
1065       return 'b';
1066     return 's';
1067   }
1068   }
1069 
1070   return '?';
1071 }
1072 
getSymbolNMTypeChar(TapiFile & Obj,basic_symbol_iterator I)1073 static char getSymbolNMTypeChar(TapiFile &Obj, basic_symbol_iterator I) {
1074   return 's';
1075 }
1076 
getSymbolNMTypeChar(WasmObjectFile & Obj,basic_symbol_iterator I)1077 static char getSymbolNMTypeChar(WasmObjectFile &Obj, basic_symbol_iterator I) {
1078   uint32_t Flags = cantFail(I->getFlags());
1079   if (Flags & SymbolRef::SF_Executable)
1080     return 't';
1081   return 'd';
1082 }
1083 
getSymbolNMTypeChar(IRObjectFile & Obj,basic_symbol_iterator I)1084 static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I) {
1085   uint32_t Flags = cantFail(I->getFlags());
1086   // FIXME: should we print 'b'? At the IR level we cannot be sure if this
1087   // will be in bss or not, but we could approximate.
1088   if (Flags & SymbolRef::SF_Executable)
1089     return 't';
1090   else if (Triple(Obj.getTargetTriple()).isOSDarwin() &&
1091            (Flags & SymbolRef::SF_Const))
1092     return 's';
1093   else
1094     return 'd';
1095 }
1096 
isObject(SymbolicFile & Obj,basic_symbol_iterator I)1097 static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
1098   return !dyn_cast<ELFObjectFileBase>(&Obj)
1099              ? false
1100              : elf_symbol_iterator(I)->getELFType() == ELF::STT_OBJECT;
1101 }
1102 
1103 // For ELF object files, Set TypeName to the symbol typename, to be printed
1104 // in the 'Type' column of the SYSV format output.
getNMTypeName(SymbolicFile & Obj,basic_symbol_iterator I)1105 static StringRef getNMTypeName(SymbolicFile &Obj, basic_symbol_iterator I) {
1106   if (isa<ELFObjectFileBase>(&Obj)) {
1107     elf_symbol_iterator SymI(I);
1108     return SymI->getELFTypeName();
1109   }
1110   return "";
1111 }
1112 
1113 // Return Posix nm class type tag (single letter), but also set SecName and
1114 // section and name, to be used in format=sysv output.
getNMSectionTagAndName(SymbolicFile & Obj,basic_symbol_iterator I,StringRef & SecName)1115 static char getNMSectionTagAndName(SymbolicFile &Obj, basic_symbol_iterator I,
1116                                    StringRef &SecName) {
1117   // Symbol Flags have been checked in the caller.
1118   uint32_t Symflags = cantFail(I->getFlags());
1119   if (ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj)) {
1120     if (Symflags & object::SymbolRef::SF_Absolute)
1121       SecName = "*ABS*";
1122     else if (Symflags & object::SymbolRef::SF_Common)
1123       SecName = "*COM*";
1124     else if (Symflags & object::SymbolRef::SF_Undefined)
1125       SecName = "*UND*";
1126     else {
1127       elf_symbol_iterator SymI(I);
1128       Expected<elf_section_iterator> SecIOrErr = SymI->getSection();
1129       if (!SecIOrErr) {
1130         consumeError(SecIOrErr.takeError());
1131         return '?';
1132       }
1133 
1134       if (*SecIOrErr == ELFObj->section_end())
1135         return '?';
1136 
1137       Expected<StringRef> NameOrErr = (*SecIOrErr)->getName();
1138       if (!NameOrErr) {
1139         consumeError(NameOrErr.takeError());
1140         return '?';
1141       }
1142       SecName = *NameOrErr;
1143     }
1144   }
1145 
1146   if ((Symflags & object::SymbolRef::SF_Weak) && !isa<MachOObjectFile>(Obj)) {
1147     char Ret = isObject(Obj, I) ? 'v' : 'w';
1148     return (!(Symflags & object::SymbolRef::SF_Undefined)) ? toupper(Ret) : Ret;
1149   }
1150 
1151   if (Symflags & object::SymbolRef::SF_Undefined)
1152     return 'U';
1153 
1154   if (Symflags & object::SymbolRef::SF_Common)
1155     return 'C';
1156 
1157   char Ret = '?';
1158   if (Symflags & object::SymbolRef::SF_Absolute)
1159     Ret = 'a';
1160   else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj))
1161     Ret = getSymbolNMTypeChar(*IR, I);
1162   else if (COFFObjectFile *COFF = dyn_cast<COFFObjectFile>(&Obj))
1163     Ret = getSymbolNMTypeChar(*COFF, I);
1164   else if (COFFImportFile *COFFImport = dyn_cast<COFFImportFile>(&Obj))
1165     Ret = getSymbolNMTypeChar(*COFFImport);
1166   else if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
1167     Ret = getSymbolNMTypeChar(*MachO, I);
1168   else if (WasmObjectFile *Wasm = dyn_cast<WasmObjectFile>(&Obj))
1169     Ret = getSymbolNMTypeChar(*Wasm, I);
1170   else if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
1171     Ret = getSymbolNMTypeChar(*Tapi, I);
1172   else if (ELFObjectFileBase *ELF = dyn_cast<ELFObjectFileBase>(&Obj)) {
1173     if (ELFSymbolRef(*I).getELFType() == ELF::STT_GNU_IFUNC)
1174       return 'i';
1175     Ret = getSymbolNMTypeChar(*ELF, I);
1176     if (ELFSymbolRef(*I).getBinding() == ELF::STB_GNU_UNIQUE)
1177       return Ret;
1178   } else
1179     llvm_unreachable("unknown binary format");
1180 
1181   if (!(Symflags & object::SymbolRef::SF_Global))
1182     return Ret;
1183 
1184   return toupper(Ret);
1185 }
1186 
1187 // getNsectForSegSect() is used to implement the Mach-O "-s segname sectname"
1188 // option to dump only those symbols from that section in a Mach-O file.
1189 // It is called once for each Mach-O file from dumpSymbolNamesFromObject()
1190 // to get the section number for that named section from the command line
1191 // arguments. It returns the section number for that section in the Mach-O
1192 // file or zero it is not present.
getNsectForSegSect(MachOObjectFile * Obj)1193 static unsigned getNsectForSegSect(MachOObjectFile *Obj) {
1194   unsigned Nsect = 1;
1195   for (auto &S : Obj->sections()) {
1196     DataRefImpl Ref = S.getRawDataRefImpl();
1197     StringRef SectionName;
1198     if (Expected<StringRef> NameOrErr = Obj->getSectionName(Ref))
1199       SectionName = *NameOrErr;
1200     StringRef SegmentName = Obj->getSectionFinalSegmentName(Ref);
1201     if (SegmentName == SegSect[0] && SectionName == SegSect[1])
1202       return Nsect;
1203     Nsect++;
1204   }
1205   return 0;
1206 }
1207 
1208 // getNsectInMachO() is used to implement the Mach-O "-s segname sectname"
1209 // option to dump only those symbols from that section in a Mach-O file.
1210 // It is called once for each symbol in a Mach-O file from
1211 // dumpSymbolNamesFromObject() and returns the section number for that symbol
1212 // if it is in a section, else it returns 0.
getNsectInMachO(MachOObjectFile & Obj,BasicSymbolRef Sym)1213 static unsigned getNsectInMachO(MachOObjectFile &Obj, BasicSymbolRef Sym) {
1214   DataRefImpl Symb = Sym.getRawDataRefImpl();
1215   if (Obj.is64Bit()) {
1216     MachO::nlist_64 STE = Obj.getSymbol64TableEntry(Symb);
1217     return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
1218   }
1219   MachO::nlist STE = Obj.getSymbolTableEntry(Symb);
1220   return (STE.n_type & MachO::N_TYPE) == MachO::N_SECT ? STE.n_sect : 0;
1221 }
1222 
dumpSymbolNamesFromObject(SymbolicFile & Obj,bool printName,StringRef ArchiveName={},StringRef ArchitectureName={})1223 static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
1224                                       StringRef ArchiveName = {},
1225                                       StringRef ArchitectureName = {}) {
1226   auto Symbols = Obj.symbols();
1227   if (DynamicSyms) {
1228     const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
1229     if (!E) {
1230       error("File format has no dynamic symbol table", Obj.getFileName());
1231       return;
1232     }
1233     Symbols = E->getDynamicSymbolIterators();
1234   }
1235   std::string NameBuffer;
1236   raw_string_ostream OS(NameBuffer);
1237   // If a "-s segname sectname" option was specified and this is a Mach-O
1238   // file get the section number for that section in this object file.
1239   unsigned int Nsect = 0;
1240   MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
1241   if (!SegSect.empty() && MachO) {
1242     Nsect = getNsectForSegSect(MachO);
1243     // If this section is not in the object file no symbols are printed.
1244     if (Nsect == 0)
1245       return;
1246   }
1247   if (!(MachO && DyldInfoOnly)) {
1248     for (BasicSymbolRef Sym : Symbols) {
1249       Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
1250       if (!SymFlagsOrErr) {
1251         error(SymFlagsOrErr.takeError(), Obj.getFileName());
1252         return;
1253       }
1254       if (!DebugSyms && (*SymFlagsOrErr & SymbolRef::SF_FormatSpecific))
1255         continue;
1256       if (WithoutAliases && (*SymFlagsOrErr & SymbolRef::SF_Indirect))
1257         continue;
1258       // If a "-s segname sectname" option was specified and this is a Mach-O
1259       // file and this section appears in this file, Nsect will be non-zero then
1260       // see if this symbol is a symbol from that section and if not skip it.
1261       if (Nsect && Nsect != getNsectInMachO(*MachO, Sym))
1262         continue;
1263       NMSymbol S = {};
1264       S.Size = 0;
1265       S.Address = 0;
1266       if (isa<ELFObjectFileBase>(&Obj))
1267         S.Size = ELFSymbolRef(Sym).getSize();
1268       if (PrintAddress && isa<ObjectFile>(Obj)) {
1269         SymbolRef SymRef(Sym);
1270         Expected<uint64_t> AddressOrErr = SymRef.getAddress();
1271         if (!AddressOrErr) {
1272           consumeError(AddressOrErr.takeError());
1273           break;
1274         }
1275         S.Address = *AddressOrErr;
1276       }
1277       S.TypeName = getNMTypeName(Obj, Sym);
1278       S.TypeChar = getNMSectionTagAndName(Obj, Sym, S.SectionName);
1279       if (Error E = Sym.printName(OS)) {
1280         if (MachO) {
1281           OS << "bad string index";
1282           consumeError(std::move(E));
1283         } else
1284           error(std::move(E), Obj.getFileName());
1285       }
1286       OS << '\0';
1287       S.Sym = Sym;
1288       SymbolList.push_back(S);
1289     }
1290   }
1291 
1292   OS.flush();
1293   const char *P = NameBuffer.c_str();
1294   unsigned I;
1295   for (I = 0; I < SymbolList.size(); ++I) {
1296     SymbolList[I].Name = P;
1297     P += strlen(P) + 1;
1298   }
1299 
1300   // If this is a Mach-O file where the nlist symbol table is out of sync
1301   // with the dyld export trie then look through exports and fake up symbols
1302   // for the ones that are missing (also done with the -add-dyldinfo flag).
1303   // This is needed if strip(1) -T is run on a binary containing swift
1304   // language symbols for example.  The option -only-dyldinfo will fake up
1305   // all symbols from the dyld export trie as well as the bind info.
1306   std::string ExportsNameBuffer;
1307   raw_string_ostream EOS(ExportsNameBuffer);
1308   std::string BindsNameBuffer;
1309   raw_string_ostream BOS(BindsNameBuffer);
1310   std::string LazysNameBuffer;
1311   raw_string_ostream LOS(LazysNameBuffer);
1312   std::string WeaksNameBuffer;
1313   raw_string_ostream WOS(WeaksNameBuffer);
1314   std::string FunctionStartsNameBuffer;
1315   raw_string_ostream FOS(FunctionStartsNameBuffer);
1316   if (MachO && !NoDyldInfo) {
1317     MachO::mach_header H;
1318     MachO::mach_header_64 H_64;
1319     uint32_t HFlags = 0;
1320     if (MachO->is64Bit()) {
1321       H_64 = MachO->MachOObjectFile::getHeader64();
1322       HFlags = H_64.flags;
1323     } else {
1324       H = MachO->MachOObjectFile::getHeader();
1325       HFlags = H.flags;
1326     }
1327     uint64_t BaseSegmentAddress = 0;
1328     for (const auto &Command : MachO->load_commands()) {
1329       if (Command.C.cmd == MachO::LC_SEGMENT) {
1330         MachO::segment_command Seg = MachO->getSegmentLoadCommand(Command);
1331         if (Seg.fileoff == 0 && Seg.filesize != 0) {
1332           BaseSegmentAddress = Seg.vmaddr;
1333           break;
1334         }
1335       } else if (Command.C.cmd == MachO::LC_SEGMENT_64) {
1336         MachO::segment_command_64 Seg = MachO->getSegment64LoadCommand(Command);
1337         if (Seg.fileoff == 0 && Seg.filesize != 0) {
1338           BaseSegmentAddress = Seg.vmaddr;
1339           break;
1340         }
1341       }
1342     }
1343     if (DyldInfoOnly || AddDyldInfo ||
1344         HFlags & MachO::MH_NLIST_OUTOFSYNC_WITH_DYLDINFO) {
1345       unsigned ExportsAdded = 0;
1346       Error Err = Error::success();
1347       for (const llvm::object::ExportEntry &Entry : MachO->exports(Err)) {
1348         bool found = false;
1349         bool ReExport = false;
1350         if (!DyldInfoOnly) {
1351           for (const NMSymbol &S : SymbolList)
1352             if (S.Address == Entry.address() + BaseSegmentAddress &&
1353                 S.Name == Entry.name()) {
1354               found = true;
1355               break;
1356             }
1357         }
1358         if (!found) {
1359           NMSymbol S = {};
1360           S.Address = Entry.address() + BaseSegmentAddress;
1361           S.Size = 0;
1362           S.TypeChar = '\0';
1363           S.Name = Entry.name();
1364           // There is no symbol in the nlist symbol table for this so we set
1365           // Sym effectivly to null and the rest of code in here must test for
1366           // it and not do things like Sym.getFlags() for it.
1367           S.Sym = BasicSymbolRef();
1368           S.SymFlags = SymbolRef::SF_Global;
1369           S.Section = SectionRef();
1370           S.NType = 0;
1371           S.NSect = 0;
1372           S.NDesc = 0;
1373           S.IndirectName = StringRef();
1374 
1375           uint64_t EFlags = Entry.flags();
1376           bool Abs = ((EFlags & MachO::EXPORT_SYMBOL_FLAGS_KIND_MASK) ==
1377                       MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE);
1378           bool Resolver = (EFlags &
1379                            MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER);
1380           ReExport = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT);
1381           bool WeakDef = (EFlags & MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1382           if (WeakDef)
1383             S.NDesc |= MachO::N_WEAK_DEF;
1384           if (Abs) {
1385             S.NType = MachO::N_EXT | MachO::N_ABS;
1386             S.TypeChar = 'A';
1387           } else if (ReExport) {
1388             S.NType = MachO::N_EXT | MachO::N_INDR;
1389             S.TypeChar = 'I';
1390           } else {
1391             S.NType = MachO::N_EXT | MachO::N_SECT;
1392             if (Resolver) {
1393               S.Address = Entry.other() + BaseSegmentAddress;
1394               if ((S.Address & 1) != 0 &&
1395                   !MachO->is64Bit() && H.cputype == MachO::CPU_TYPE_ARM){
1396                 S.Address &= ~1LL;
1397                 S.NDesc |= MachO::N_ARM_THUMB_DEF;
1398               }
1399             } else {
1400               S.Address = Entry.address() + BaseSegmentAddress;
1401             }
1402             StringRef SegmentName = StringRef();
1403             StringRef SectionName = StringRef();
1404             for (const SectionRef &Section : MachO->sections()) {
1405               S.NSect++;
1406 
1407               if (Expected<StringRef> NameOrErr = Section.getName())
1408                 SectionName = *NameOrErr;
1409               else
1410                 consumeError(NameOrErr.takeError());
1411 
1412               SegmentName = MachO->getSectionFinalSegmentName(
1413                                                   Section.getRawDataRefImpl());
1414               if (S.Address >= Section.getAddress() &&
1415                   S.Address < Section.getAddress() + Section.getSize()) {
1416                 S.Section = Section;
1417                 break;
1418               } else if (Entry.name() == "__mh_execute_header" &&
1419                          SegmentName == "__TEXT" && SectionName == "__text") {
1420                 S.Section = Section;
1421                 S.NDesc |= MachO::REFERENCED_DYNAMICALLY;
1422                 break;
1423               }
1424             }
1425             if (SegmentName == "__TEXT" && SectionName == "__text")
1426               S.TypeChar = 'T';
1427             else if (SegmentName == "__DATA" && SectionName == "__data")
1428               S.TypeChar = 'D';
1429             else if (SegmentName == "__DATA" && SectionName == "__bss")
1430               S.TypeChar = 'B';
1431             else
1432               S.TypeChar = 'S';
1433           }
1434           SymbolList.push_back(S);
1435 
1436           EOS << Entry.name();
1437           EOS << '\0';
1438           ExportsAdded++;
1439 
1440           // For ReExports there are a two more things to do, first add the
1441           // indirect name and second create the undefined symbol using the
1442           // referened dynamic library.
1443           if (ReExport) {
1444 
1445             // Add the indirect name.
1446             if (Entry.otherName().empty())
1447               EOS << Entry.name();
1448             else
1449               EOS << Entry.otherName();
1450             EOS << '\0';
1451 
1452             // Now create the undefined symbol using the referened dynamic
1453             // library.
1454             NMSymbol U = {};
1455             U.Address = 0;
1456             U.Size = 0;
1457             U.TypeChar = 'U';
1458             if (Entry.otherName().empty())
1459               U.Name = Entry.name();
1460             else
1461               U.Name = Entry.otherName();
1462             // Again there is no symbol in the nlist symbol table for this so
1463             // we set Sym effectivly to null and the rest of code in here must
1464             // test for it and not do things like Sym.getFlags() for it.
1465             U.Sym = BasicSymbolRef();
1466             U.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1467             U.Section = SectionRef();
1468             U.NType = MachO::N_EXT | MachO::N_UNDF;
1469             U.NSect = 0;
1470             U.NDesc = 0;
1471             // The library ordinal for this undefined symbol is in the export
1472             // trie Entry.other().
1473             MachO::SET_LIBRARY_ORDINAL(U.NDesc, Entry.other());
1474             U.IndirectName = StringRef();
1475             SymbolList.push_back(U);
1476 
1477             // Finally add the undefined symbol's name.
1478             if (Entry.otherName().empty())
1479               EOS << Entry.name();
1480             else
1481               EOS << Entry.otherName();
1482             EOS << '\0';
1483             ExportsAdded++;
1484           }
1485         }
1486       }
1487       if (Err)
1488         error(std::move(Err), MachO->getFileName());
1489       // Set the symbol names and indirect names for the added symbols.
1490       if (ExportsAdded) {
1491         EOS.flush();
1492         const char *Q = ExportsNameBuffer.c_str();
1493         for (unsigned K = 0; K < ExportsAdded; K++) {
1494           SymbolList[I].Name = Q;
1495           Q += strlen(Q) + 1;
1496           if (SymbolList[I].TypeChar == 'I') {
1497             SymbolList[I].IndirectName = Q;
1498             Q += strlen(Q) + 1;
1499           }
1500           I++;
1501         }
1502       }
1503 
1504       // Add the undefined symbols from the bind entries.
1505       unsigned BindsAdded = 0;
1506       Error BErr = Error::success();
1507       StringRef LastSymbolName = StringRef();
1508       for (const llvm::object::MachOBindEntry &Entry : MachO->bindTable(BErr)) {
1509         bool found = false;
1510         if (LastSymbolName == Entry.symbolName())
1511           found = true;
1512         else if(!DyldInfoOnly) {
1513           for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1514             if (SymbolList[J].Name == Entry.symbolName())
1515               found = true;
1516           }
1517         }
1518         if (!found) {
1519           LastSymbolName = Entry.symbolName();
1520           NMSymbol B = {};
1521           B.Address = 0;
1522           B.Size = 0;
1523           B.TypeChar = 'U';
1524           // There is no symbol in the nlist symbol table for this so we set
1525           // Sym effectivly to null and the rest of code in here must test for
1526           // it and not do things like Sym.getFlags() for it.
1527           B.Sym = BasicSymbolRef();
1528           B.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1529           B.NType = MachO::N_EXT | MachO::N_UNDF;
1530           B.NSect = 0;
1531           B.NDesc = 0;
1532           MachO::SET_LIBRARY_ORDINAL(B.NDesc, Entry.ordinal());
1533           B.IndirectName = StringRef();
1534           B.Name = Entry.symbolName();
1535           SymbolList.push_back(B);
1536           BOS << Entry.symbolName();
1537           BOS << '\0';
1538           BindsAdded++;
1539         }
1540       }
1541       if (BErr)
1542         error(std::move(BErr), MachO->getFileName());
1543       // Set the symbol names and indirect names for the added symbols.
1544       if (BindsAdded) {
1545         BOS.flush();
1546         const char *Q = BindsNameBuffer.c_str();
1547         for (unsigned K = 0; K < BindsAdded; K++) {
1548           SymbolList[I].Name = Q;
1549           Q += strlen(Q) + 1;
1550           if (SymbolList[I].TypeChar == 'I') {
1551             SymbolList[I].IndirectName = Q;
1552             Q += strlen(Q) + 1;
1553           }
1554           I++;
1555         }
1556       }
1557 
1558       // Add the undefined symbols from the lazy bind entries.
1559       unsigned LazysAdded = 0;
1560       Error LErr = Error::success();
1561       LastSymbolName = StringRef();
1562       for (const llvm::object::MachOBindEntry &Entry :
1563            MachO->lazyBindTable(LErr)) {
1564         bool found = false;
1565         if (LastSymbolName == Entry.symbolName())
1566           found = true;
1567         else {
1568           // Here we must check to see it this symbol is already in the
1569           // SymbolList as it might have already have been added above via a
1570           // non-lazy (bind) entry.
1571           for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1572             if (SymbolList[J].Name == Entry.symbolName())
1573               found = true;
1574           }
1575         }
1576         if (!found) {
1577           LastSymbolName = Entry.symbolName();
1578           NMSymbol L = {};
1579           L.Name = Entry.symbolName();
1580           L.Address = 0;
1581           L.Size = 0;
1582           L.TypeChar = 'U';
1583           // There is no symbol in the nlist symbol table for this so we set
1584           // Sym effectivly to null and the rest of code in here must test for
1585           // it and not do things like Sym.getFlags() for it.
1586           L.Sym = BasicSymbolRef();
1587           L.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1588           L.NType = MachO::N_EXT | MachO::N_UNDF;
1589           L.NSect = 0;
1590           // The REFERENCE_FLAG_UNDEFINED_LAZY is no longer used but here it
1591           // makes sence since we are creating this from a lazy bind entry.
1592           L.NDesc = MachO::REFERENCE_FLAG_UNDEFINED_LAZY;
1593           MachO::SET_LIBRARY_ORDINAL(L.NDesc, Entry.ordinal());
1594           L.IndirectName = StringRef();
1595           SymbolList.push_back(L);
1596           LOS << Entry.symbolName();
1597           LOS << '\0';
1598           LazysAdded++;
1599         }
1600       }
1601       if (LErr)
1602         error(std::move(LErr), MachO->getFileName());
1603       // Set the symbol names and indirect names for the added symbols.
1604       if (LazysAdded) {
1605         LOS.flush();
1606         const char *Q = LazysNameBuffer.c_str();
1607         for (unsigned K = 0; K < LazysAdded; K++) {
1608           SymbolList[I].Name = Q;
1609           Q += strlen(Q) + 1;
1610           if (SymbolList[I].TypeChar == 'I') {
1611             SymbolList[I].IndirectName = Q;
1612             Q += strlen(Q) + 1;
1613           }
1614           I++;
1615         }
1616       }
1617 
1618       // Add the undefineds symbol from the weak bind entries which are not
1619       // strong symbols.
1620       unsigned WeaksAdded = 0;
1621       Error WErr = Error::success();
1622       LastSymbolName = StringRef();
1623       for (const llvm::object::MachOBindEntry &Entry :
1624            MachO->weakBindTable(WErr)) {
1625         bool found = false;
1626         unsigned J = 0;
1627         if (LastSymbolName == Entry.symbolName() ||
1628             Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
1629           found = true;
1630         } else {
1631           for (J = 0; J < SymbolList.size() && !found; ++J) {
1632             if (SymbolList[J].Name == Entry.symbolName()) {
1633                found = true;
1634                break;
1635             }
1636           }
1637         }
1638         if (!found) {
1639           LastSymbolName = Entry.symbolName();
1640           NMSymbol W = {};
1641           W.Name = Entry.symbolName();
1642           W.Address = 0;
1643           W.Size = 0;
1644           W.TypeChar = 'U';
1645           // There is no symbol in the nlist symbol table for this so we set
1646           // Sym effectivly to null and the rest of code in here must test for
1647           // it and not do things like Sym.getFlags() for it.
1648           W.Sym = BasicSymbolRef();
1649           W.SymFlags = SymbolRef::SF_Global | SymbolRef::SF_Undefined;
1650           W.NType = MachO::N_EXT | MachO::N_UNDF;
1651           W.NSect = 0;
1652           // Odd that we are using N_WEAK_DEF on an undefined symbol but that is
1653           // what is created in this case by the linker when there are real
1654           // symbols in the nlist structs.
1655           W.NDesc = MachO::N_WEAK_DEF;
1656           W.IndirectName = StringRef();
1657           SymbolList.push_back(W);
1658           WOS << Entry.symbolName();
1659           WOS << '\0';
1660           WeaksAdded++;
1661         } else {
1662           // This is the case the symbol was previously been found and it could
1663           // have been added from a bind or lazy bind symbol.  If so and not
1664           // a definition also mark it as weak.
1665           if (SymbolList[J].TypeChar == 'U')
1666             // See comment above about N_WEAK_DEF.
1667             SymbolList[J].NDesc |= MachO::N_WEAK_DEF;
1668         }
1669       }
1670       if (WErr)
1671         error(std::move(WErr), MachO->getFileName());
1672       // Set the symbol names and indirect names for the added symbols.
1673       if (WeaksAdded) {
1674         WOS.flush();
1675         const char *Q = WeaksNameBuffer.c_str();
1676         for (unsigned K = 0; K < WeaksAdded; K++) {
1677           SymbolList[I].Name = Q;
1678           Q += strlen(Q) + 1;
1679           if (SymbolList[I].TypeChar == 'I') {
1680             SymbolList[I].IndirectName = Q;
1681             Q += strlen(Q) + 1;
1682           }
1683           I++;
1684         }
1685       }
1686 
1687       // Trying adding symbol from the function starts table and LC_MAIN entry
1688       // point.
1689       SmallVector<uint64_t, 8> FoundFns;
1690       uint64_t lc_main_offset = UINT64_MAX;
1691       for (const auto &Command : MachO->load_commands()) {
1692         if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) {
1693           // We found a function starts segment, parse the addresses for
1694           // consumption.
1695           MachO::linkedit_data_command LLC =
1696             MachO->getLinkeditDataLoadCommand(Command);
1697 
1698           MachO->ReadULEB128s(LLC.dataoff, FoundFns);
1699         } else if (Command.C.cmd == MachO::LC_MAIN) {
1700           MachO::entry_point_command LCmain =
1701             MachO->getEntryPointCommand(Command);
1702           lc_main_offset = LCmain.entryoff;
1703         }
1704       }
1705       // See if these addresses are already in the symbol table.
1706       unsigned FunctionStartsAdded = 0;
1707       for (uint64_t f = 0; f < FoundFns.size(); f++) {
1708         bool found = false;
1709         for (unsigned J = 0; J < SymbolList.size() && !found; ++J) {
1710           if (SymbolList[J].Address == FoundFns[f] + BaseSegmentAddress)
1711             found = true;
1712         }
1713         // See this address is not already in the symbol table fake up an
1714         // nlist for it.
1715         if (!found) {
1716           NMSymbol F = {};
1717           F.Name = "<redacted function X>";
1718           F.Address = FoundFns[f] + BaseSegmentAddress;
1719           F.Size = 0;
1720           // There is no symbol in the nlist symbol table for this so we set
1721           // Sym effectivly to null and the rest of code in here must test for
1722           // it and not do things like Sym.getFlags() for it.
1723           F.Sym = BasicSymbolRef();
1724           F.SymFlags = 0;
1725           F.NType = MachO::N_SECT;
1726           F.NSect = 0;
1727           StringRef SegmentName = StringRef();
1728           StringRef SectionName = StringRef();
1729           for (const SectionRef &Section : MachO->sections()) {
1730             if (Expected<StringRef> NameOrErr = Section.getName())
1731               SectionName = *NameOrErr;
1732             else
1733               consumeError(NameOrErr.takeError());
1734 
1735             SegmentName = MachO->getSectionFinalSegmentName(
1736                                                 Section.getRawDataRefImpl());
1737             F.NSect++;
1738             if (F.Address >= Section.getAddress() &&
1739                 F.Address < Section.getAddress() + Section.getSize()) {
1740               F.Section = Section;
1741               break;
1742             }
1743           }
1744           if (SegmentName == "__TEXT" && SectionName == "__text")
1745             F.TypeChar = 't';
1746           else if (SegmentName == "__DATA" && SectionName == "__data")
1747             F.TypeChar = 'd';
1748           else if (SegmentName == "__DATA" && SectionName == "__bss")
1749             F.TypeChar = 'b';
1750           else
1751             F.TypeChar = 's';
1752           F.NDesc = 0;
1753           F.IndirectName = StringRef();
1754           SymbolList.push_back(F);
1755           if (FoundFns[f] == lc_main_offset)
1756             FOS << "<redacted LC_MAIN>";
1757           else
1758             FOS << "<redacted function " << f << ">";
1759           FOS << '\0';
1760           FunctionStartsAdded++;
1761         }
1762       }
1763       if (FunctionStartsAdded) {
1764         FOS.flush();
1765         const char *Q = FunctionStartsNameBuffer.c_str();
1766         for (unsigned K = 0; K < FunctionStartsAdded; K++) {
1767           SymbolList[I].Name = Q;
1768           Q += strlen(Q) + 1;
1769           if (SymbolList[I].TypeChar == 'I') {
1770             SymbolList[I].IndirectName = Q;
1771             Q += strlen(Q) + 1;
1772           }
1773           I++;
1774         }
1775       }
1776     }
1777   }
1778 
1779   CurrentFilename = Obj.getFileName();
1780 
1781   if (Symbols.empty() && SymbolList.empty()) {
1782     writeFileName(errs(), ArchiveName, ArchitectureName);
1783     errs() << "no symbols\n";
1784   }
1785 
1786   sortAndPrintSymbolList(Obj, printName, ArchiveName, ArchitectureName);
1787 }
1788 
1789 // checkMachOAndArchFlags() checks to see if the SymbolicFile is a Mach-O file
1790 // and if it is and there is a list of architecture flags is specified then
1791 // check to make sure this Mach-O file is one of those architectures or all
1792 // architectures was specificed.  If not then an error is generated and this
1793 // routine returns false.  Else it returns true.
checkMachOAndArchFlags(SymbolicFile * O,std::string & Filename)1794 static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
1795   auto *MachO = dyn_cast<MachOObjectFile>(O);
1796 
1797   if (!MachO || ArchAll || ArchFlags.empty())
1798     return true;
1799 
1800   MachO::mach_header H;
1801   MachO::mach_header_64 H_64;
1802   Triple T;
1803   const char *McpuDefault, *ArchFlag;
1804   if (MachO->is64Bit()) {
1805     H_64 = MachO->MachOObjectFile::getHeader64();
1806     T = MachOObjectFile::getArchTriple(H_64.cputype, H_64.cpusubtype,
1807                                        &McpuDefault, &ArchFlag);
1808   } else {
1809     H = MachO->MachOObjectFile::getHeader();
1810     T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype,
1811                                        &McpuDefault, &ArchFlag);
1812   }
1813   const std::string ArchFlagName(ArchFlag);
1814   if (none_of(ArchFlags, [&](const std::string &Name) {
1815         return Name == ArchFlagName;
1816       })) {
1817     error("No architecture specified", Filename);
1818     return false;
1819   }
1820   return true;
1821 }
1822 
dumpSymbolNamesFromFile(std::string & Filename)1823 static void dumpSymbolNamesFromFile(std::string &Filename) {
1824   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1825       MemoryBuffer::getFileOrSTDIN(Filename);
1826   if (error(BufferOrErr.getError(), Filename))
1827     return;
1828 
1829   LLVMContext Context;
1830   LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
1831   Expected<std::unique_ptr<Binary>> BinaryOrErr =
1832       createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
1833   if (!BinaryOrErr) {
1834     error(BinaryOrErr.takeError(), Filename);
1835     return;
1836   }
1837   Binary &Bin = *BinaryOrErr.get();
1838 
1839   if (Archive *A = dyn_cast<Archive>(&Bin)) {
1840     if (ArchiveMap) {
1841       Archive::symbol_iterator I = A->symbol_begin();
1842       Archive::symbol_iterator E = A->symbol_end();
1843       if (I != E) {
1844         outs() << "Archive map\n";
1845         for (; I != E; ++I) {
1846           Expected<Archive::Child> C = I->getMember();
1847           if (!C) {
1848             error(C.takeError(), Filename);
1849             break;
1850           }
1851           Expected<StringRef> FileNameOrErr = C->getName();
1852           if (!FileNameOrErr) {
1853             error(FileNameOrErr.takeError(), Filename);
1854             break;
1855           }
1856           StringRef SymName = I->getName();
1857           outs() << SymName << " in " << FileNameOrErr.get() << "\n";
1858         }
1859         outs() << "\n";
1860       }
1861     }
1862 
1863     {
1864       Error Err = Error::success();
1865       for (auto &C : A->children(Err)) {
1866         Expected<std::unique_ptr<Binary>> ChildOrErr =
1867             C.getAsBinary(ContextPtr);
1868         if (!ChildOrErr) {
1869           if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError()))
1870             error(std::move(E), Filename, C);
1871           continue;
1872         }
1873         if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
1874           if (!MachOPrintSizeWarning && PrintSize &&  isa<MachOObjectFile>(O)) {
1875             WithColor::warning(errs(), ToolName)
1876                 << "sizes with -print-size for Mach-O files are always zero.\n";
1877             MachOPrintSizeWarning = true;
1878           }
1879           if (!checkMachOAndArchFlags(O, Filename))
1880             return;
1881           if (!PrintFileName) {
1882             outs() << "\n";
1883             if (isa<MachOObjectFile>(O)) {
1884               outs() << Filename << "(" << O->getFileName() << ")";
1885             } else
1886               outs() << O->getFileName();
1887             outs() << ":\n";
1888           }
1889           dumpSymbolNamesFromObject(*O, false, Filename);
1890         }
1891       }
1892       if (Err)
1893         error(std::move(Err), A->getFileName());
1894     }
1895     return;
1896   }
1897   if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(&Bin)) {
1898     // If we have a list of architecture flags specified dump only those.
1899     if (!ArchAll && !ArchFlags.empty()) {
1900       // Look for a slice in the universal binary that matches each ArchFlag.
1901       bool ArchFound;
1902       for (unsigned i = 0; i < ArchFlags.size(); ++i) {
1903         ArchFound = false;
1904         for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
1905                                                    E = UB->end_objects();
1906              I != E; ++I) {
1907           if (ArchFlags[i] == I->getArchFlagName()) {
1908             ArchFound = true;
1909             Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
1910                 I->getAsObjectFile();
1911             std::string ArchiveName;
1912             std::string ArchitectureName;
1913             ArchiveName.clear();
1914             ArchitectureName.clear();
1915             if (ObjOrErr) {
1916               ObjectFile &Obj = *ObjOrErr.get();
1917               if (ArchFlags.size() > 1) {
1918                 if (PrintFileName)
1919                   ArchitectureName = I->getArchFlagName();
1920                 else
1921                   outs() << "\n" << Obj.getFileName() << " (for architecture "
1922                          << I->getArchFlagName() << ")"
1923                          << ":\n";
1924               }
1925               dumpSymbolNamesFromObject(Obj, false, ArchiveName,
1926                                         ArchitectureName);
1927             } else if (auto E = isNotObjectErrorInvalidFileType(
1928                        ObjOrErr.takeError())) {
1929               error(std::move(E), Filename, ArchFlags.size() > 1 ?
1930                     StringRef(I->getArchFlagName()) : StringRef());
1931               continue;
1932             } else if (Expected<std::unique_ptr<Archive>> AOrErr =
1933                            I->getAsArchive()) {
1934               std::unique_ptr<Archive> &A = *AOrErr;
1935               Error Err = Error::success();
1936               for (auto &C : A->children(Err)) {
1937                 Expected<std::unique_ptr<Binary>> ChildOrErr =
1938                     C.getAsBinary(ContextPtr);
1939                 if (!ChildOrErr) {
1940                   if (auto E = isNotObjectErrorInvalidFileType(
1941                                        ChildOrErr.takeError())) {
1942                     error(std::move(E), Filename, C, ArchFlags.size() > 1 ?
1943                           StringRef(I->getArchFlagName()) : StringRef());
1944                   }
1945                   continue;
1946                 }
1947                 if (SymbolicFile *O =
1948                         dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
1949                   if (PrintFileName) {
1950                     ArchiveName = std::string(A->getFileName());
1951                     if (ArchFlags.size() > 1)
1952                       ArchitectureName = I->getArchFlagName();
1953                   } else {
1954                     outs() << "\n" << A->getFileName();
1955                     outs() << "(" << O->getFileName() << ")";
1956                     if (ArchFlags.size() > 1) {
1957                       outs() << " (for architecture " << I->getArchFlagName()
1958                              << ")";
1959                     }
1960                     outs() << ":\n";
1961                   }
1962                   dumpSymbolNamesFromObject(*O, false, ArchiveName,
1963                                             ArchitectureName);
1964                 }
1965               }
1966               if (Err)
1967                 error(std::move(Err), A->getFileName());
1968             } else {
1969               consumeError(AOrErr.takeError());
1970               error(Filename + " for architecture " +
1971                     StringRef(I->getArchFlagName()) +
1972                     " is not a Mach-O file or an archive file",
1973                     "Mach-O universal file");
1974             }
1975           }
1976         }
1977         if (!ArchFound) {
1978           error(ArchFlags[i],
1979                 "file: " + Filename + " does not contain architecture");
1980           return;
1981         }
1982       }
1983       return;
1984     }
1985     // No architecture flags were specified so if this contains a slice that
1986     // matches the host architecture dump only that.
1987     if (!ArchAll) {
1988       Triple HostTriple = MachOObjectFile::getHostArch();
1989       StringRef HostArchName = HostTriple.getArchName();
1990       for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
1991                                                  E = UB->end_objects();
1992            I != E; ++I) {
1993         if (HostArchName == I->getArchFlagName()) {
1994           Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
1995           std::string ArchiveName;
1996           if (ObjOrErr) {
1997             ObjectFile &Obj = *ObjOrErr.get();
1998             dumpSymbolNamesFromObject(Obj, false);
1999           } else if (auto E = isNotObjectErrorInvalidFileType(
2000                      ObjOrErr.takeError())) {
2001             error(std::move(E), Filename);
2002             return;
2003           } else if (Expected<std::unique_ptr<Archive>> AOrErr =
2004                          I->getAsArchive()) {
2005             std::unique_ptr<Archive> &A = *AOrErr;
2006             Error Err = Error::success();
2007             for (auto &C : A->children(Err)) {
2008               Expected<std::unique_ptr<Binary>> ChildOrErr =
2009                   C.getAsBinary(ContextPtr);
2010               if (!ChildOrErr) {
2011                 if (auto E = isNotObjectErrorInvalidFileType(
2012                                      ChildOrErr.takeError()))
2013                   error(std::move(E), Filename, C);
2014                 continue;
2015               }
2016               if (SymbolicFile *O =
2017                       dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2018                 if (PrintFileName)
2019                   ArchiveName = std::string(A->getFileName());
2020                 else
2021                   outs() << "\n" << A->getFileName() << "(" << O->getFileName()
2022                          << ")"
2023                          << ":\n";
2024                 dumpSymbolNamesFromObject(*O, false, ArchiveName);
2025               }
2026             }
2027             if (Err)
2028               error(std::move(Err), A->getFileName());
2029           } else {
2030             consumeError(AOrErr.takeError());
2031             error(Filename + " for architecture " +
2032                   StringRef(I->getArchFlagName()) +
2033                   " is not a Mach-O file or an archive file",
2034                   "Mach-O universal file");
2035           }
2036           return;
2037         }
2038       }
2039     }
2040     // Either all architectures have been specified or none have been specified
2041     // and this does not contain the host architecture so dump all the slices.
2042     bool moreThanOneArch = UB->getNumberOfObjects() > 1;
2043     for (const MachOUniversalBinary::ObjectForArch &O : UB->objects()) {
2044       Expected<std::unique_ptr<ObjectFile>> ObjOrErr = O.getAsObjectFile();
2045       std::string ArchiveName;
2046       std::string ArchitectureName;
2047       ArchiveName.clear();
2048       ArchitectureName.clear();
2049       if (ObjOrErr) {
2050         ObjectFile &Obj = *ObjOrErr.get();
2051         if (PrintFileName) {
2052           if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
2053             ArchitectureName = O.getArchFlagName();
2054         } else {
2055           if (moreThanOneArch)
2056             outs() << "\n";
2057           outs() << Obj.getFileName();
2058           if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
2059             outs() << " (for architecture " << O.getArchFlagName() << ")";
2060           outs() << ":\n";
2061         }
2062         dumpSymbolNamesFromObject(Obj, false, ArchiveName, ArchitectureName);
2063       } else if (auto E = isNotObjectErrorInvalidFileType(
2064                  ObjOrErr.takeError())) {
2065         error(std::move(E), Filename, moreThanOneArch ?
2066               StringRef(O.getArchFlagName()) : StringRef());
2067         continue;
2068       } else if (Expected<std::unique_ptr<Archive>> AOrErr =
2069                   O.getAsArchive()) {
2070         std::unique_ptr<Archive> &A = *AOrErr;
2071         Error Err = Error::success();
2072         for (auto &C : A->children(Err)) {
2073           Expected<std::unique_ptr<Binary>> ChildOrErr =
2074             C.getAsBinary(ContextPtr);
2075           if (!ChildOrErr) {
2076             if (auto E = isNotObjectErrorInvalidFileType(
2077                                  ChildOrErr.takeError()))
2078               error(std::move(E), Filename, C, moreThanOneArch ?
2079                     StringRef(ArchitectureName) : StringRef());
2080             continue;
2081           }
2082           if (SymbolicFile *F = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
2083             if (PrintFileName) {
2084               ArchiveName = std::string(A->getFileName());
2085               if (isa<MachOObjectFile>(F) && moreThanOneArch)
2086                 ArchitectureName = O.getArchFlagName();
2087             } else {
2088               outs() << "\n" << A->getFileName();
2089               if (isa<MachOObjectFile>(F)) {
2090                 outs() << "(" << F->getFileName() << ")";
2091                 if (moreThanOneArch)
2092                   outs() << " (for architecture " << O.getArchFlagName()
2093                          << ")";
2094               } else
2095                 outs() << ":" << F->getFileName();
2096               outs() << ":\n";
2097             }
2098             dumpSymbolNamesFromObject(*F, false, ArchiveName, ArchitectureName);
2099           }
2100         }
2101         if (Err)
2102           error(std::move(Err), A->getFileName());
2103       } else {
2104         consumeError(AOrErr.takeError());
2105         error(Filename + " for architecture " +
2106               StringRef(O.getArchFlagName()) +
2107               " is not a Mach-O file or an archive file",
2108               "Mach-O universal file");
2109       }
2110     }
2111     return;
2112   }
2113 
2114   if (TapiUniversal *TU = dyn_cast<TapiUniversal>(&Bin)) {
2115     for (const TapiUniversal::ObjectForArch &I : TU->objects()) {
2116       StringRef ArchName = I.getArchFlagName();
2117       const bool ShowArch =
2118           ArchFlags.empty() ||
2119           any_of(ArchFlags, [&](StringRef Name) { return Name == ArchName; });
2120       if (!ShowArch)
2121         continue;
2122       if (!AddInlinedInfo && !I.isTopLevelLib())
2123         continue;
2124       if (auto ObjOrErr = I.getAsObjectFile()) {
2125         outs() << "\n"
2126                << I.getInstallName() << " (for architecture " << ArchName << ")"
2127                << ":\n";
2128         dumpSymbolNamesFromObject(*ObjOrErr.get(), false, {}, ArchName);
2129       } else if (Error E =
2130                      isNotObjectErrorInvalidFileType(ObjOrErr.takeError())) {
2131         error(std::move(E), Filename, ArchName);
2132       }
2133     }
2134 
2135     return;
2136   }
2137 
2138   if (SymbolicFile *O = dyn_cast<SymbolicFile>(&Bin)) {
2139     if (!MachOPrintSizeWarning && PrintSize &&  isa<MachOObjectFile>(O)) {
2140       WithColor::warning(errs(), ToolName)
2141           << "sizes with --print-size for Mach-O files are always zero.\n";
2142       MachOPrintSizeWarning = true;
2143     }
2144     if (!checkMachOAndArchFlags(O, Filename))
2145       return;
2146     dumpSymbolNamesFromObject(*O, true);
2147   }
2148 }
2149 
main(int argc,char ** argv)2150 int main(int argc, char **argv) {
2151   InitLLVM X(argc, argv);
2152   cl::HideUnrelatedOptions(NMCat);
2153   cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
2154 
2155   // llvm-nm only reads binary files.
2156   if (error(sys::ChangeStdinToBinary()))
2157     return 1;
2158 
2159   // These calls are needed so that we can read bitcode correctly.
2160   llvm::InitializeAllTargetInfos();
2161   llvm::InitializeAllTargetMCs();
2162   llvm::InitializeAllAsmParsers();
2163 
2164   ToolName = argv[0];
2165   if (BSDFormat)
2166     OutputFormat = bsd;
2167   if (POSIXFormat)
2168     OutputFormat = posix;
2169   if (DarwinFormat)
2170     OutputFormat = darwin;
2171 
2172   // The relative order of these is important. If you pass --size-sort it should
2173   // only print out the size. However, if you pass -S --size-sort, it should
2174   // print out both the size and address.
2175   if (SizeSort && !PrintSize)
2176     PrintAddress = false;
2177   if (OutputFormat == sysv || SizeSort)
2178     PrintSize = true;
2179   if (InputFilenames.empty())
2180     InputFilenames.push_back("a.out");
2181   if (InputFilenames.size() > 1)
2182     MultipleFiles = true;
2183 
2184   // If both --demangle and --no-demangle are specified then pick the last one.
2185   if (NoDemangle.getPosition() > Demangle.getPosition())
2186     Demangle = !NoDemangle;
2187 
2188   for (unsigned i = 0; i < ArchFlags.size(); ++i) {
2189     if (ArchFlags[i] == "all") {
2190       ArchAll = true;
2191     } else {
2192       if (!MachOObjectFile::isValidArch(ArchFlags[i]))
2193         error("Unknown architecture named '" + ArchFlags[i] + "'",
2194               "for the --arch option");
2195     }
2196   }
2197 
2198   if (!SegSect.empty() && SegSect.size() != 2)
2199     error("bad number of arguments (must be two arguments)",
2200           "for the -s option");
2201 
2202   if (NoDyldInfo && (AddDyldInfo || DyldInfoOnly))
2203     error("--no-dyldinfo can't be used with --add-dyldinfo or --dyldinfo-only");
2204 
2205   llvm::for_each(InputFilenames, dumpSymbolNamesFromFile);
2206 
2207   if (HadError)
2208     return 1;
2209 }
2210