1 //===- Symbols.h ------------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLD_WASM_SYMBOLS_H
10 #define LLD_WASM_SYMBOLS_H
11
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/Object/Archive.h"
16 #include "llvm/Object/Wasm.h"
17
18 namespace lld {
19 namespace wasm {
20
21 // Shared string constants
22
23 // The default module name to use for symbol imports.
24 extern const char *defaultModule;
25
26 // The name under which to import or export the wasm table.
27 extern const char *functionTableName;
28
29 using llvm::wasm::WasmSymbolType;
30
31 class InputFile;
32 class InputChunk;
33 class InputSegment;
34 class InputFunction;
35 class InputGlobal;
36 class InputEvent;
37 class InputSection;
38 class OutputSection;
39
40 #define INVALID_INDEX UINT32_MAX
41
42 // The base class for real symbol classes.
43 class Symbol {
44 public:
45 enum Kind : uint8_t {
46 DefinedFunctionKind,
47 DefinedDataKind,
48 DefinedGlobalKind,
49 DefinedEventKind,
50 SectionKind,
51 OutputSectionKind,
52 UndefinedFunctionKind,
53 UndefinedDataKind,
54 UndefinedGlobalKind,
55 LazyKind,
56 };
57
kind()58 Kind kind() const { return symbolKind; }
59
isDefined()60 bool isDefined() const { return !isLazy() && !isUndefined(); }
61
isUndefined()62 bool isUndefined() const {
63 return symbolKind == UndefinedFunctionKind ||
64 symbolKind == UndefinedDataKind || symbolKind == UndefinedGlobalKind;
65 }
66
isLazy()67 bool isLazy() const { return symbolKind == LazyKind; }
68
69 bool isLocal() const;
70 bool isWeak() const;
71 bool isHidden() const;
72
73 // Returns true if this symbol exists in a discarded (due to COMDAT) section
74 bool isDiscarded() const;
75
76 // True if this is an undefined weak symbol. This only works once
77 // all input files have been added.
isUndefWeak()78 bool isUndefWeak() const {
79 // See comment on lazy symbols for details.
80 return isWeak() && (isUndefined() || isLazy());
81 }
82
83 // Returns the symbol name.
getName()84 StringRef getName() const { return name; }
85
86 // Returns the file from which this symbol was created.
getFile()87 InputFile *getFile() const { return file; }
88
89 InputChunk *getChunk() const;
90
91 // Indicates that the section or import for this symbol will be included in
92 // the final image.
93 bool isLive() const;
94
95 // Marks the symbol's InputChunk as Live, so that it will be included in the
96 // final image.
97 void markLive();
98
99 void setHidden(bool isHidden);
100
101 // Get/set the index in the output symbol table. This is only used for
102 // relocatable output.
103 uint32_t getOutputSymbolIndex() const;
104 void setOutputSymbolIndex(uint32_t index);
105
106 WasmSymbolType getWasmType() const;
107 bool isExported() const;
108
109 // Indicates that the symbol is used in an __attribute__((used)) directive
110 // or similar.
111 bool isNoStrip() const;
112
113 const WasmSignature* getSignature() const;
114
getGOTIndex()115 uint32_t getGOTIndex() const {
116 assert(gotIndex != INVALID_INDEX);
117 return gotIndex;
118 }
119
120 void setGOTIndex(uint32_t index);
hasGOTIndex()121 bool hasGOTIndex() const { return gotIndex != INVALID_INDEX; }
122
123 protected:
Symbol(StringRef name,Kind k,uint32_t flags,InputFile * f)124 Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
125 : name(name), file(f), symbolKind(k), referenced(!config->gcSections),
126 requiresGOT(false), isUsedInRegularObj(false), forceExport(false),
127 canInline(false), traced(false), isStub(false), flags(flags) {}
128
129 StringRef name;
130 InputFile *file;
131 uint32_t outputSymbolIndex = INVALID_INDEX;
132 uint32_t gotIndex = INVALID_INDEX;
133 Kind symbolKind;
134
135 public:
136 bool referenced : 1;
137
138 // True for data symbols that needs a dummy GOT entry. Used for static
139 // linking of GOT accesses.
140 bool requiresGOT : 1;
141
142 // True if the symbol was used for linking and thus need to be added to the
143 // output file's symbol table. This is true for all symbols except for
144 // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
145 // are unreferenced except by other bitcode objects.
146 bool isUsedInRegularObj : 1;
147
148 // True if ths symbol is explicitly marked for export (i.e. via the
149 // -e/--export command line flag)
150 bool forceExport : 1;
151
152 // False if LTO shouldn't inline whatever this symbol points to. If a symbol
153 // is overwritten after LTO, LTO shouldn't inline the symbol because it
154 // doesn't know the final contents of the symbol.
155 bool canInline : 1;
156
157 // True if this symbol is specified by --trace-symbol option.
158 bool traced : 1;
159
160 // True if this symbol is a linker-synthesized stub function (traps when
161 // called) and should otherwise be treated as missing/undefined. See
162 // SymbolTable::replaceWithUndefined.
163 // These stubs never appear in the table and any table index relocations
164 // against them will produce address 0 (The table index representing
165 // the null function pointer).
166 bool isStub : 1;
167
168 uint32_t flags;
169 };
170
171 class FunctionSymbol : public Symbol {
172 public:
classof(const Symbol * s)173 static bool classof(const Symbol *s) {
174 return s->kind() == DefinedFunctionKind ||
175 s->kind() == UndefinedFunctionKind;
176 }
177
178 // Get/set the table index
179 void setTableIndex(uint32_t index);
180 uint32_t getTableIndex() const;
181 bool hasTableIndex() const;
182
183 // Get/set the function index
184 uint32_t getFunctionIndex() const;
185 void setFunctionIndex(uint32_t index);
186 bool hasFunctionIndex() const;
187
188 const WasmSignature *signature;
189
190 protected:
FunctionSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmSignature * sig)191 FunctionSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
192 const WasmSignature *sig)
193 : Symbol(name, k, flags, f), signature(sig) {}
194
195 uint32_t tableIndex = INVALID_INDEX;
196 uint32_t functionIndex = INVALID_INDEX;
197 };
198
199 class DefinedFunction : public FunctionSymbol {
200 public:
201 DefinedFunction(StringRef name, uint32_t flags, InputFile *f,
202 InputFunction *function);
203
classof(const Symbol * s)204 static bool classof(const Symbol *s) {
205 return s->kind() == DefinedFunctionKind;
206 }
207
208 InputFunction *function;
209 };
210
211 class UndefinedFunction : public FunctionSymbol {
212 public:
213 UndefinedFunction(StringRef name, llvm::Optional<StringRef> importName,
214 llvm::Optional<StringRef> importModule, uint32_t flags,
215 InputFile *file = nullptr,
216 const WasmSignature *type = nullptr,
217 bool isCalledDirectly = true)
FunctionSymbol(name,UndefinedFunctionKind,flags,file,type)218 : FunctionSymbol(name, UndefinedFunctionKind, flags, file, type),
219 importName(importName), importModule(importModule),
220 isCalledDirectly(isCalledDirectly) {}
221
classof(const Symbol * s)222 static bool classof(const Symbol *s) {
223 return s->kind() == UndefinedFunctionKind;
224 }
225
226 llvm::Optional<StringRef> importName;
227 llvm::Optional<StringRef> importModule;
228 DefinedFunction *stubFunction = nullptr;
229 bool isCalledDirectly;
230 };
231
232 // Section symbols for output sections are different from those for input
233 // section. These are generated by the linker and point the OutputSection
234 // rather than an InputSection.
235 class OutputSectionSymbol : public Symbol {
236 public:
OutputSectionSymbol(const OutputSection * s)237 OutputSectionSymbol(const OutputSection *s)
238 : Symbol("", OutputSectionKind, llvm::wasm::WASM_SYMBOL_BINDING_LOCAL,
239 nullptr),
240 section(s) {}
241
classof(const Symbol * s)242 static bool classof(const Symbol *s) {
243 return s->kind() == OutputSectionKind;
244 }
245
246 const OutputSection *section;
247 };
248
249 class SectionSymbol : public Symbol {
250 public:
251 SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr)
252 : Symbol("", SectionKind, flags, f), section(s) {}
253
classof(const Symbol * s)254 static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
255
256 const OutputSectionSymbol *getOutputSectionSymbol() const;
257
258 const InputSection *section;
259 };
260
261 class DataSymbol : public Symbol {
262 public:
classof(const Symbol * s)263 static bool classof(const Symbol *s) {
264 return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind;
265 }
266
267 protected:
DataSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f)268 DataSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f)
269 : Symbol(name, k, flags, f) {}
270 };
271
272 class DefinedData : public DataSymbol {
273 public:
274 // Constructor for regular data symbols originating from input files.
DefinedData(StringRef name,uint32_t flags,InputFile * f,InputSegment * segment,uint64_t offset,uint64_t size)275 DefinedData(StringRef name, uint32_t flags, InputFile *f,
276 InputSegment *segment, uint64_t offset, uint64_t size)
277 : DataSymbol(name, DefinedDataKind, flags, f), segment(segment),
278 offset(offset), size(size) {}
279
280 // Constructor for linker synthetic data symbols.
DefinedData(StringRef name,uint32_t flags)281 DefinedData(StringRef name, uint32_t flags)
282 : DataSymbol(name, DefinedDataKind, flags, nullptr) {}
283
classof(const Symbol * s)284 static bool classof(const Symbol *s) { return s->kind() == DefinedDataKind; }
285
286 // Returns the output virtual address of a defined data symbol.
287 uint64_t getVirtualAddress() const;
288 void setVirtualAddress(uint64_t va);
289
290 // Returns the offset of a defined data symbol within its OutputSegment.
291 uint64_t getOutputSegmentOffset() const;
292 uint64_t getOutputSegmentIndex() const;
getSize()293 uint64_t getSize() const { return size; }
294
295 InputSegment *segment = nullptr;
296 uint32_t offset = 0;
297
298 protected:
299 uint64_t size = 0;
300 };
301
302 class UndefinedData : public DataSymbol {
303 public:
304 UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr)
DataSymbol(name,UndefinedDataKind,flags,file)305 : DataSymbol(name, UndefinedDataKind, flags, file) {}
classof(const Symbol * s)306 static bool classof(const Symbol *s) {
307 return s->kind() == UndefinedDataKind;
308 }
309 };
310
311 class GlobalSymbol : public Symbol {
312 public:
classof(const Symbol * s)313 static bool classof(const Symbol *s) {
314 return s->kind() == DefinedGlobalKind || s->kind() == UndefinedGlobalKind;
315 }
316
getGlobalType()317 const WasmGlobalType *getGlobalType() const { return globalType; }
318
319 // Get/set the global index
320 uint32_t getGlobalIndex() const;
321 void setGlobalIndex(uint32_t index);
322 bool hasGlobalIndex() const;
323
324 protected:
GlobalSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmGlobalType * globalType)325 GlobalSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
326 const WasmGlobalType *globalType)
327 : Symbol(name, k, flags, f), globalType(globalType) {}
328
329 const WasmGlobalType *globalType;
330 uint32_t globalIndex = INVALID_INDEX;
331 };
332
333 class DefinedGlobal : public GlobalSymbol {
334 public:
335 DefinedGlobal(StringRef name, uint32_t flags, InputFile *file,
336 InputGlobal *global);
337
classof(const Symbol * s)338 static bool classof(const Symbol *s) {
339 return s->kind() == DefinedGlobalKind;
340 }
341
342 InputGlobal *global;
343 };
344
345 class UndefinedGlobal : public GlobalSymbol {
346 public:
347 UndefinedGlobal(StringRef name, llvm::Optional<StringRef> importName,
348 llvm::Optional<StringRef> importModule, uint32_t flags,
349 InputFile *file = nullptr,
350 const WasmGlobalType *type = nullptr)
GlobalSymbol(name,UndefinedGlobalKind,flags,file,type)351 : GlobalSymbol(name, UndefinedGlobalKind, flags, file, type),
352 importName(importName), importModule(importModule) {}
353
classof(const Symbol * s)354 static bool classof(const Symbol *s) {
355 return s->kind() == UndefinedGlobalKind;
356 }
357
358 llvm::Optional<StringRef> importName;
359 llvm::Optional<StringRef> importModule;
360 };
361
362 // Wasm events are features that suspend the current execution and transfer the
363 // control flow to a corresponding handler. Currently the only supported event
364 // kind is exceptions.
365 //
366 // Event tags are values to distinguish different events. For exceptions, they
367 // can be used to distinguish different language's exceptions, i.e., all C++
368 // exceptions have the same tag. Wasm can generate code capable of doing
369 // different handling actions based on the tag of caught exceptions.
370 //
371 // A single EventSymbol object represents a single tag. C++ exception event
372 // symbol is a weak symbol generated in every object file in which exceptions
373 // are used, and has name '__cpp_exception' for linking.
374 class EventSymbol : public Symbol {
375 public:
classof(const Symbol * s)376 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
377
getEventType()378 const WasmEventType *getEventType() const { return eventType; }
379
380 // Get/set the event index
381 uint32_t getEventIndex() const;
382 void setEventIndex(uint32_t index);
383 bool hasEventIndex() const;
384
385 const WasmSignature *signature;
386
387 protected:
EventSymbol(StringRef name,Kind k,uint32_t flags,InputFile * f,const WasmEventType * eventType,const WasmSignature * sig)388 EventSymbol(StringRef name, Kind k, uint32_t flags, InputFile *f,
389 const WasmEventType *eventType, const WasmSignature *sig)
390 : Symbol(name, k, flags, f), signature(sig), eventType(eventType) {}
391
392 const WasmEventType *eventType;
393 uint32_t eventIndex = INVALID_INDEX;
394 };
395
396 class DefinedEvent : public EventSymbol {
397 public:
398 DefinedEvent(StringRef name, uint32_t flags, InputFile *file,
399 InputEvent *event);
400
classof(const Symbol * s)401 static bool classof(const Symbol *s) { return s->kind() == DefinedEventKind; }
402
403 InputEvent *event;
404 };
405
406 // LazySymbol represents a symbol that is not yet in the link, but we know where
407 // to find it if needed. If the resolver finds both Undefined and Lazy for the
408 // same name, it will ask the Lazy to load a file.
409 //
410 // A special complication is the handling of weak undefined symbols. They should
411 // not load a file, but we have to remember we have seen both the weak undefined
412 // and the lazy. We represent that with a lazy symbol with a weak binding. This
413 // means that code looking for undefined symbols normally also has to take lazy
414 // symbols into consideration.
415 class LazySymbol : public Symbol {
416 public:
LazySymbol(StringRef name,uint32_t flags,InputFile * file,const llvm::object::Archive::Symbol & sym)417 LazySymbol(StringRef name, uint32_t flags, InputFile *file,
418 const llvm::object::Archive::Symbol &sym)
419 : Symbol(name, LazyKind, flags, file), archiveSymbol(sym) {}
420
classof(const Symbol * s)421 static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
422 void fetch();
423 void setWeak();
424 MemoryBufferRef getMemberBuffer();
425
426 // Lazy symbols can have a signature because they can replace an
427 // UndefinedFunction which which case we need to be able to preserve the
428 // signature.
429 // TODO(sbc): This repetition of the signature field is inelegant. Revisit
430 // the use of class hierarchy to represent symbol taxonomy.
431 const WasmSignature *signature = nullptr;
432
433 private:
434 llvm::object::Archive::Symbol archiveSymbol;
435 };
436
437 // linker-generated symbols
438 struct WasmSym {
439 // __global_base
440 // Symbol marking the start of the global section.
441 static DefinedData *globalBase;
442
443 // __stack_pointer
444 // Global that holds the address of the top of the explicit value stack in
445 // linear memory.
446 static GlobalSymbol *stackPointer;
447
448 // __tls_base
449 // Global that holds the address of the base of the current thread's
450 // TLS block.
451 static GlobalSymbol *tlsBase;
452
453 // __tls_size
454 // Symbol whose value is the size of the TLS block.
455 static GlobalSymbol *tlsSize;
456
457 // __tls_size
458 // Symbol whose value is the alignment of the TLS block.
459 static GlobalSymbol *tlsAlign;
460
461 // __data_end
462 // Symbol marking the end of the data and bss.
463 static DefinedData *dataEnd;
464
465 // __heap_base
466 // Symbol marking the end of the data, bss and explicit stack. Any linear
467 // memory following this address is not used by the linked code and can
468 // therefore be used as a backing store for brk()/malloc() implementations.
469 static DefinedData *heapBase;
470
471 // __wasm_init_memory_flag
472 // Symbol whose contents are nonzero iff memory has already been initialized.
473 static DefinedData *initMemoryFlag;
474
475 // __wasm_init_memory
476 // Function that initializes passive data segments during instantiation.
477 static DefinedFunction *initMemory;
478
479 // __wasm_call_ctors
480 // Function that directly calls all ctors in priority order.
481 static DefinedFunction *callCtors;
482
483 // __wasm_call_dtors
484 // Function that calls the libc/etc. cleanup function.
485 static DefinedFunction *callDtors;
486
487 // __wasm_apply_relocs
488 // Function that applies relocations to data segment post-instantiation.
489 static DefinedFunction *applyRelocs;
490
491 // __wasm_init_tls
492 // Function that allocates thread-local storage and initializes it.
493 static DefinedFunction *initTLS;
494
495 // __dso_handle
496 // Symbol used in calls to __cxa_atexit to determine current DLL
497 static DefinedData *dsoHandle;
498
499 // __table_base
500 // Used in PIC code for offset of indirect function table
501 static UndefinedGlobal *tableBase;
502 static DefinedData *definedTableBase;
503
504 // __memory_base
505 // Used in PIC code for offset of global data
506 static UndefinedGlobal *memoryBase;
507 static DefinedData *definedMemoryBase;
508 };
509
510 // A buffer class that is large enough to hold any Symbol-derived
511 // object. We allocate memory using this class and instantiate a symbol
512 // using the placement new.
513 union SymbolUnion {
514 alignas(DefinedFunction) char a[sizeof(DefinedFunction)];
515 alignas(DefinedData) char b[sizeof(DefinedData)];
516 alignas(DefinedGlobal) char c[sizeof(DefinedGlobal)];
517 alignas(DefinedEvent) char d[sizeof(DefinedEvent)];
518 alignas(LazySymbol) char e[sizeof(LazySymbol)];
519 alignas(UndefinedFunction) char f[sizeof(UndefinedFunction)];
520 alignas(UndefinedData) char g[sizeof(UndefinedData)];
521 alignas(UndefinedGlobal) char h[sizeof(UndefinedGlobal)];
522 alignas(SectionSymbol) char i[sizeof(SectionSymbol)];
523 };
524
525 // It is important to keep the size of SymbolUnion small for performance and
526 // memory usage reasons. 96 bytes is a soft limit based on the size of
527 // UndefinedFunction on a 64-bit system.
528 static_assert(sizeof(SymbolUnion) <= 120, "SymbolUnion too large");
529
530 void printTraceSymbol(Symbol *sym);
531 void printTraceSymbolUndefined(StringRef name, const InputFile* file);
532
533 template <typename T, typename... ArgT>
replaceSymbol(Symbol * s,ArgT &&...arg)534 T *replaceSymbol(Symbol *s, ArgT &&... arg) {
535 static_assert(std::is_trivially_destructible<T>(),
536 "Symbol types must be trivially destructible");
537 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
538 static_assert(alignof(T) <= alignof(SymbolUnion),
539 "SymbolUnion not aligned enough");
540 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&
541 "Not a Symbol");
542
543 Symbol symCopy = *s;
544
545 T *s2 = new (s) T(std::forward<ArgT>(arg)...);
546 s2->isUsedInRegularObj = symCopy.isUsedInRegularObj;
547 s2->forceExport = symCopy.forceExport;
548 s2->canInline = symCopy.canInline;
549 s2->traced = symCopy.traced;
550
551 // Print out a log message if --trace-symbol was specified.
552 // This is for debugging.
553 if (s2->traced)
554 printTraceSymbol(s2);
555
556 return s2;
557 }
558
559 } // namespace wasm
560
561 // Returns a symbol name for an error message.
562 std::string toString(const wasm::Symbol &sym);
563 std::string toString(wasm::Symbol::Kind kind);
564 std::string maybeDemangleSymbol(StringRef name);
565
566 } // namespace lld
567
568 #endif
569