1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/BinaryFormat/Magic.h"
18 #include "llvm/Object/Archive.h"
19 #include "llvm/Object/COFF.h"
20 #include "llvm/Support/StringSaver.h"
21 #include <memory>
22 #include <set>
23 #include <vector>
24
25 namespace llvm {
26 struct DILineInfo;
27 namespace pdb {
28 class DbiModuleDescriptorBuilder;
29 class NativeSession;
30 }
31 namespace lto {
32 class InputFile;
33 }
34 }
35
36 namespace lld {
37 class DWARFCache;
38
39 namespace coff {
40
41 std::vector<MemoryBufferRef> getArchiveMembers(llvm::object::Archive *file);
42
43 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
44 using llvm::COFF::MachineTypes;
45 using llvm::object::Archive;
46 using llvm::object::COFFObjectFile;
47 using llvm::object::COFFSymbolRef;
48 using llvm::object::coff_import_header;
49 using llvm::object::coff_section;
50
51 class Chunk;
52 class Defined;
53 class DefinedImportData;
54 class DefinedImportThunk;
55 class DefinedRegular;
56 class SectionChunk;
57 class Symbol;
58 class Undefined;
59 class TpiSource;
60
61 // The root class of input files.
62 class InputFile {
63 public:
64 enum Kind {
65 ArchiveKind,
66 ObjectKind,
67 LazyObjectKind,
68 PDBKind,
69 ImportKind,
70 BitcodeKind
71 };
kind()72 Kind kind() const { return fileKind; }
~InputFile()73 virtual ~InputFile() {}
74
75 // Returns the filename.
getName()76 StringRef getName() const { return mb.getBufferIdentifier(); }
77
78 // Reads a file (the constructor doesn't do that).
79 virtual void parse() = 0;
80
81 // Returns the CPU type this file was compiled to.
getMachineType()82 virtual MachineTypes getMachineType() { return IMAGE_FILE_MACHINE_UNKNOWN; }
83
84 MemoryBufferRef mb;
85
86 // An archive file name if this file is created from an archive.
87 StringRef parentName;
88
89 // Returns .drectve section contents if exist.
getDirectives()90 StringRef getDirectives() { return directives; }
91
92 protected:
InputFile(Kind k,MemoryBufferRef m)93 InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
94
95 StringRef directives;
96
97 private:
98 const Kind fileKind;
99 };
100
101 // .lib or .a file.
102 class ArchiveFile : public InputFile {
103 public:
104 explicit ArchiveFile(MemoryBufferRef m);
classof(const InputFile * f)105 static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
106 void parse() override;
107
108 // Enqueues an archive member load for the given symbol. If we've already
109 // enqueued a load for the same archive member, this function does nothing,
110 // which ensures that we don't load the same member more than once.
111 void addMember(const Archive::Symbol &sym);
112
113 private:
114 std::unique_ptr<Archive> file;
115 llvm::DenseSet<uint64_t> seen;
116 };
117
118 // .obj or .o file between -start-lib and -end-lib.
119 class LazyObjFile : public InputFile {
120 public:
LazyObjFile(MemoryBufferRef m)121 explicit LazyObjFile(MemoryBufferRef m) : InputFile(LazyObjectKind, m) {}
classof(const InputFile * f)122 static bool classof(const InputFile *f) {
123 return f->kind() == LazyObjectKind;
124 }
125 // Makes this object file part of the link.
126 void fetch();
127 // Adds the symbols in this file to the symbol table as LazyObject symbols.
128 void parse() override;
129
130 private:
131 std::vector<Symbol *> symbols;
132 };
133
134 // .obj or .o file. This may be a member of an archive file.
135 class ObjFile : public InputFile {
136 public:
ObjFile(MemoryBufferRef m)137 explicit ObjFile(MemoryBufferRef m) : InputFile(ObjectKind, m) {}
ObjFile(MemoryBufferRef m,std::vector<Symbol * > && symbols)138 explicit ObjFile(MemoryBufferRef m, std::vector<Symbol *> &&symbols)
139 : InputFile(ObjectKind, m), symbols(std::move(symbols)) {}
classof(const InputFile * f)140 static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
141 void parse() override;
142 MachineTypes getMachineType() override;
getChunks()143 ArrayRef<Chunk *> getChunks() { return chunks; }
getDebugChunks()144 ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
getSXDataChunks()145 ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
getGuardFidChunks()146 ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
getGuardIATChunks()147 ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
getGuardLJmpChunks()148 ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
getSymbols()149 ArrayRef<Symbol *> getSymbols() { return symbols; }
150
getMutableSymbols()151 MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
152
153 ArrayRef<uint8_t> getDebugSection(StringRef secName);
154
155 // Returns a Symbol object for the symbolIndex'th symbol in the
156 // underlying object file.
getSymbol(uint32_t symbolIndex)157 Symbol *getSymbol(uint32_t symbolIndex) {
158 return symbols[symbolIndex];
159 }
160
161 // Returns the underlying COFF file.
getCOFFObj()162 COFFObjectFile *getCOFFObj() { return coffObj.get(); }
163
164 // Add a symbol for a range extension thunk. Return the new symbol table
165 // index. This index can be used to modify a relocation.
addRangeThunkSymbol(Symbol * thunk)166 uint32_t addRangeThunkSymbol(Symbol *thunk) {
167 symbols.push_back(thunk);
168 return symbols.size() - 1;
169 }
170
171 void includeResourceChunks();
172
isResourceObjFile()173 bool isResourceObjFile() const { return !resourceChunks.empty(); }
174
175 static std::vector<ObjFile *> instances;
176
177 // Flags in the absolute @feat.00 symbol if it is present. These usually
178 // indicate if an object was compiled with certain security features enabled
179 // like stack guard, safeseh, /guard:cf, or other things.
180 uint32_t feat00Flags = 0;
181
182 // True if this object file is compatible with SEH. COFF-specific and
183 // x86-only. COFF spec 5.10.1. The .sxdata section.
hasSafeSEH()184 bool hasSafeSEH() { return feat00Flags & 0x1; }
185
186 // True if this file was compiled with /guard:cf.
hasGuardCF()187 bool hasGuardCF() { return feat00Flags & 0x800; }
188
189 // Pointer to the PDB module descriptor builder. Various debug info records
190 // will reference object files by "module index", which is here. Things like
191 // source files and section contributions are also recorded here. Will be null
192 // if we are not producing a PDB.
193 llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
194
195 const coff_section *addrsigSec = nullptr;
196
197 const coff_section *callgraphSec = nullptr;
198
199 // When using Microsoft precompiled headers, this is the PCH's key.
200 // The same key is used by both the precompiled object, and objects using the
201 // precompiled object. Any difference indicates out-of-date objects.
202 llvm::Optional<uint32_t> pchSignature;
203
204 // Whether this file was compiled with /hotpatch.
205 bool hotPatchable = false;
206
207 // Whether the object was already merged into the final PDB.
208 bool mergedIntoPDB = false;
209
210 // If the OBJ has a .debug$T stream, this tells how it will be handled.
211 TpiSource *debugTypesObj = nullptr;
212
213 // The .debug$P or .debug$T section data if present. Empty otherwise.
214 ArrayRef<uint8_t> debugTypes;
215
216 llvm::Optional<std::pair<StringRef, uint32_t>>
217 getVariableLocation(StringRef var);
218
219 llvm::Optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
220 uint32_t sectionIndex);
221
222 private:
223 const coff_section* getSection(uint32_t i);
getSection(COFFSymbolRef sym)224 const coff_section *getSection(COFFSymbolRef sym) {
225 return getSection(sym.getSectionNumber());
226 }
227
228 void initializeChunks();
229 void initializeSymbols();
230 void initializeFlags();
231 void initializeDependencies();
232
233 SectionChunk *
234 readSection(uint32_t sectionNumber,
235 const llvm::object::coff_aux_section_definition *def,
236 StringRef leaderName);
237
238 void readAssociativeDefinition(
239 COFFSymbolRef coffSym,
240 const llvm::object::coff_aux_section_definition *def);
241
242 void readAssociativeDefinition(
243 COFFSymbolRef coffSym,
244 const llvm::object::coff_aux_section_definition *def,
245 uint32_t parentSection);
246
247 void recordPrevailingSymbolForMingw(
248 COFFSymbolRef coffSym,
249 llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
250
251 void maybeAssociateSEHForMingw(
252 COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
253 const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
254
255 // Given a new symbol Sym with comdat selection Selection, if the new
256 // symbol is not (yet) Prevailing and the existing comdat leader set to
257 // Leader, emits a diagnostic if the new symbol and its selection doesn't
258 // match the existing symbol and its selection. If either old or new
259 // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
260 // the existing leader. In that case, Prevailing is set to true.
261 void
262 handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
263 bool &prevailing, DefinedRegular *leader,
264 const llvm::object::coff_aux_section_definition *def);
265
266 llvm::Optional<Symbol *>
267 createDefined(COFFSymbolRef sym,
268 std::vector<const llvm::object::coff_aux_section_definition *>
269 &comdatDefs,
270 bool &prevailingComdat);
271 Symbol *createRegular(COFFSymbolRef sym);
272 Symbol *createUndefined(COFFSymbolRef sym);
273
274 std::unique_ptr<COFFObjectFile> coffObj;
275
276 // List of all chunks defined by this file. This includes both section
277 // chunks and non-section chunks for common symbols.
278 std::vector<Chunk *> chunks;
279
280 std::vector<SectionChunk *> resourceChunks;
281
282 // CodeView debug info sections.
283 std::vector<SectionChunk *> debugChunks;
284
285 // Chunks containing symbol table indices of exception handlers. Only used for
286 // 32-bit x86.
287 std::vector<SectionChunk *> sxDataChunks;
288
289 // Chunks containing symbol table indices of address taken symbols, address
290 // taken IAT entries, and longjmp targets. These are not linked into the
291 // final binary when /guard:cf is set.
292 std::vector<SectionChunk *> guardFidChunks;
293 std::vector<SectionChunk *> guardIATChunks;
294 std::vector<SectionChunk *> guardLJmpChunks;
295
296 // This vector contains a list of all symbols defined or referenced by this
297 // file. They are indexed such that you can get a Symbol by symbol
298 // index. Nonexistent indices (which are occupied by auxiliary
299 // symbols in the real symbol table) are filled with null pointers.
300 std::vector<Symbol *> symbols;
301
302 // This vector contains the same chunks as Chunks, but they are
303 // indexed such that you can get a SectionChunk by section index.
304 // Nonexistent section indices are filled with null pointers.
305 // (Because section number is 1-based, the first slot is always a
306 // null pointer.) This vector is only valid during initialization.
307 std::vector<SectionChunk *> sparseChunks;
308
309 DWARFCache *dwarf = nullptr;
310 };
311
312 // This is a PDB type server dependency, that is not a input file per se, but
313 // needs to be treated like one. Such files are discovered from the debug type
314 // stream.
315 class PDBInputFile : public InputFile {
316 public:
317 explicit PDBInputFile(MemoryBufferRef m);
318 ~PDBInputFile();
classof(const InputFile * f)319 static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
320 void parse() override;
321
322 static void enqueue(StringRef path, ObjFile *fromFile);
323
324 static PDBInputFile *findFromRecordPath(StringRef path, ObjFile *fromFile);
325
326 static std::map<std::string, PDBInputFile *> instances;
327
328 // Record possible errors while opening the PDB file
329 llvm::Optional<Error> loadErr;
330
331 // This is the actual interface to the PDB (if it was opened successfully)
332 std::unique_ptr<llvm::pdb::NativeSession> session;
333
334 // If the PDB has a .debug$T stream, this tells how it will be handled.
335 TpiSource *debugTypesObj = nullptr;
336 };
337
338 // This type represents import library members that contain DLL names
339 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
340 // for details about the format.
341 class ImportFile : public InputFile {
342 public:
ImportFile(MemoryBufferRef m)343 explicit ImportFile(MemoryBufferRef m) : InputFile(ImportKind, m) {}
344
classof(const InputFile * f)345 static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
346
347 static std::vector<ImportFile *> instances;
348
349 Symbol *impSym = nullptr;
350 Symbol *thunkSym = nullptr;
351 std::string dllName;
352
353 private:
354 void parse() override;
355
356 public:
357 StringRef externalName;
358 const coff_import_header *hdr;
359 Chunk *location = nullptr;
360
361 // We want to eliminate dllimported symbols if no one actually refers to them.
362 // These "Live" bits are used to keep track of which import library members
363 // are actually in use.
364 //
365 // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
366 // symbols provided by this import library member. We also track whether the
367 // imported symbol is used separately from whether the thunk is used in order
368 // to avoid creating unnecessary thunks.
369 bool live = !config->doGC;
370 bool thunkLive = !config->doGC;
371 };
372
373 // Used for LTO.
374 class BitcodeFile : public InputFile {
375 public:
376 BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
377 uint64_t offsetInArchive);
378 explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName,
379 uint64_t offsetInArchive,
380 std::vector<Symbol *> &&symbols);
381 ~BitcodeFile();
classof(const InputFile * f)382 static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
getSymbols()383 ArrayRef<Symbol *> getSymbols() { return symbols; }
384 MachineTypes getMachineType() override;
385 static std::vector<BitcodeFile *> instances;
386 std::unique_ptr<llvm::lto::InputFile> obj;
387
388 private:
389 void parse() override;
390
391 std::vector<Symbol *> symbols;
392 };
393
isBitcode(MemoryBufferRef mb)394 inline bool isBitcode(MemoryBufferRef mb) {
395 return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
396 }
397
398 std::string replaceThinLTOSuffix(StringRef path);
399 } // namespace coff
400
401 std::string toString(const coff::InputFile *file);
402 } // namespace lld
403
404 #endif
405