1 //===- InputChunks.h --------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // An InputChunks represents an indivisible opaque region of a input wasm file. 10 // i.e. a single wasm data segment or a single wasm function. 11 // 12 // They are written directly to the mmap'd output file after which relocations 13 // are applied. Because each Chunk is independent they can be written in 14 // parallel. 15 // 16 // Chunks are also unit on which garbage collection (--gc-sections) operates. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #ifndef LLD_WASM_INPUT_CHUNKS_H 21 #define LLD_WASM_INPUT_CHUNKS_H 22 23 #include "Config.h" 24 #include "InputFiles.h" 25 #include "lld/Common/ErrorHandler.h" 26 #include "lld/Common/LLVM.h" 27 #include "llvm/Object/Wasm.h" 28 29 namespace lld { 30 namespace wasm { 31 32 class ObjFile; 33 class OutputSegment; 34 class OutputSection; 35 36 class InputChunk { 37 public: 38 enum Kind { DataSegment, Function, SyntheticFunction, Section }; 39 kind()40 Kind kind() const { return sectionKind; } 41 getSize()42 virtual uint32_t getSize() const { return data().size(); } getInputSize()43 virtual uint32_t getInputSize() const { return getSize(); }; 44 45 virtual void writeTo(uint8_t *sectionStart) const; 46 getRelocations()47 ArrayRef<WasmRelocation> getRelocations() const { return relocations; } setRelocations(ArrayRef<WasmRelocation> rs)48 void setRelocations(ArrayRef<WasmRelocation> rs) { relocations = rs; } 49 50 virtual StringRef getName() const = 0; 51 virtual StringRef getDebugName() const = 0; 52 virtual uint32_t getComdat() const = 0; 53 StringRef getComdatName() const; 54 virtual uint32_t getInputSectionOffset() const = 0; 55 getNumRelocations()56 size_t getNumRelocations() const { return relocations.size(); } 57 void writeRelocations(llvm::raw_ostream &os) const; 58 59 ObjFile *file; 60 OutputSection *outputSec = nullptr; 61 // Offset withing the output section 62 int32_t outputOffset = 0; 63 64 // Signals that the section is part of the output. The garbage collector, 65 // and COMDAT handling can set a sections' Live bit. 66 // If GC is disabled, all sections start out as live by default. 67 unsigned live : 1; 68 69 // Signals the chunk was discarded by COMDAT handling. 70 unsigned discarded : 1; 71 72 protected: InputChunk(ObjFile * f,Kind k)73 InputChunk(ObjFile *f, Kind k) 74 : file(f), live(!config->gcSections), discarded(false), sectionKind(k) {} 75 virtual ~InputChunk() = default; 76 virtual ArrayRef<uint8_t> data() const = 0; getTombstone()77 virtual uint64_t getTombstone() const { return 0; } 78 79 // Verifies the existing data at relocation targets matches our expectations. 80 // This is performed only debug builds as an extra sanity check. 81 void verifyRelocTargets() const; 82 83 ArrayRef<WasmRelocation> relocations; 84 Kind sectionKind; 85 }; 86 87 // Represents a WebAssembly data segment which can be included as part of 88 // an output data segments. Note that in WebAssembly, unlike ELF and other 89 // formats, used the term "data segment" to refer to the continuous regions of 90 // memory that make on the data section. See: 91 // https://webassembly.github.io/spec/syntax/modules.html#syntax-data 92 // 93 // For example, by default, clang will produce a separate data section for 94 // each global variable. 95 class InputSegment : public InputChunk { 96 public: InputSegment(const WasmSegment & seg,ObjFile * f)97 InputSegment(const WasmSegment &seg, ObjFile *f) 98 : InputChunk(f, InputChunk::DataSegment), segment(seg) {} 99 classof(const InputChunk * c)100 static bool classof(const InputChunk *c) { return c->kind() == DataSegment; } 101 102 void generateRelocationCode(raw_ostream &os) const; 103 getAlignment()104 uint32_t getAlignment() const { return segment.Data.Alignment; } getName()105 StringRef getName() const override { return segment.Data.Name; } getDebugName()106 StringRef getDebugName() const override { return StringRef(); } getComdat()107 uint32_t getComdat() const override { return segment.Data.Comdat; } getInputSectionOffset()108 uint32_t getInputSectionOffset() const override { 109 return segment.SectionOffset; 110 } 111 112 const OutputSegment *outputSeg = nullptr; 113 int32_t outputSegmentOffset = 0; 114 115 protected: data()116 ArrayRef<uint8_t> data() const override { return segment.Data.Content; } 117 118 const WasmSegment &segment; 119 }; 120 121 // Represents a single wasm function within and input file. These are 122 // combined to create the final output CODE section. 123 class InputFunction : public InputChunk { 124 public: InputFunction(const WasmSignature & s,const WasmFunction * func,ObjFile * f)125 InputFunction(const WasmSignature &s, const WasmFunction *func, ObjFile *f) 126 : InputChunk(f, InputChunk::Function), signature(s), function(func), 127 exportName(func && func->ExportName.hasValue() 128 ? (*func->ExportName).str() 129 : llvm::Optional<std::string>()) {} 130 classof(const InputChunk * c)131 static bool classof(const InputChunk *c) { 132 return c->kind() == InputChunk::Function || 133 c->kind() == InputChunk::SyntheticFunction; 134 } 135 136 void writeTo(uint8_t *sectionStart) const override; getName()137 StringRef getName() const override { return function->SymbolName; } getDebugName()138 StringRef getDebugName() const override { return function->DebugName; } getExportName()139 llvm::Optional<StringRef> getExportName() const { 140 return exportName.hasValue() ? llvm::Optional<StringRef>(*exportName) 141 : llvm::Optional<StringRef>(); 142 } setExportName(std::string exportName)143 void setExportName(std::string exportName) { this->exportName = exportName; } getComdat()144 uint32_t getComdat() const override { return function->Comdat; } getFunctionInputOffset()145 uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); } getFunctionCodeOffset()146 uint32_t getFunctionCodeOffset() const { return function->CodeOffset; } getSize()147 uint32_t getSize() const override { 148 if (config->compressRelocations && file) { 149 assert(compressedSize); 150 return compressedSize; 151 } 152 return data().size(); 153 } getInputSize()154 uint32_t getInputSize() const override { return function->Size; } getFunctionIndex()155 uint32_t getFunctionIndex() const { return functionIndex.getValue(); } hasFunctionIndex()156 bool hasFunctionIndex() const { return functionIndex.hasValue(); } 157 void setFunctionIndex(uint32_t index); getInputSectionOffset()158 uint32_t getInputSectionOffset() const override { 159 return function->CodeSectionOffset; 160 } getTableIndex()161 uint32_t getTableIndex() const { return tableIndex.getValue(); } hasTableIndex()162 bool hasTableIndex() const { return tableIndex.hasValue(); } 163 void setTableIndex(uint32_t index); 164 165 // The size of a given input function can depend on the values of the 166 // LEB relocations within it. This finalizeContents method is called after 167 // all the symbol values have be calculated but before getSize() is ever 168 // called. 169 void calculateSize(); 170 171 const WasmSignature &signature; 172 173 protected: data()174 ArrayRef<uint8_t> data() const override { 175 assert(!config->compressRelocations); 176 return file->codeSection->Content.slice(getInputSectionOffset(), 177 function->Size); 178 } 179 180 const WasmFunction *function; 181 llvm::Optional<std::string> exportName; 182 llvm::Optional<uint32_t> functionIndex; 183 llvm::Optional<uint32_t> tableIndex; 184 uint32_t compressedFuncSize = 0; 185 uint32_t compressedSize = 0; 186 }; 187 188 class SyntheticFunction : public InputFunction { 189 public: 190 SyntheticFunction(const WasmSignature &s, StringRef name, 191 StringRef debugName = {}) InputFunction(s,nullptr,nullptr)192 : InputFunction(s, nullptr, nullptr), name(name), debugName(debugName) { 193 sectionKind = InputChunk::SyntheticFunction; 194 } 195 classof(const InputChunk * c)196 static bool classof(const InputChunk *c) { 197 return c->kind() == InputChunk::SyntheticFunction; 198 } 199 getName()200 StringRef getName() const override { return name; } getDebugName()201 StringRef getDebugName() const override { return debugName; } getComdat()202 uint32_t getComdat() const override { return UINT32_MAX; } 203 setBody(ArrayRef<uint8_t> body_)204 void setBody(ArrayRef<uint8_t> body_) { body = body_; } 205 206 protected: data()207 ArrayRef<uint8_t> data() const override { return body; } 208 209 StringRef name; 210 StringRef debugName; 211 ArrayRef<uint8_t> body; 212 }; 213 214 // Represents a single Wasm Section within an input file. 215 class InputSection : public InputChunk { 216 public: InputSection(const WasmSection & s,ObjFile * f)217 InputSection(const WasmSection &s, ObjFile *f) 218 : InputChunk(f, InputChunk::Section), section(s), tombstoneValue(getTombstoneForSection(s.Name)) { 219 assert(section.Type == llvm::wasm::WASM_SEC_CUSTOM); 220 } 221 getName()222 StringRef getName() const override { return section.Name; } getDebugName()223 StringRef getDebugName() const override { return StringRef(); } getComdat()224 uint32_t getComdat() const override { return UINT32_MAX; } 225 226 protected: data()227 ArrayRef<uint8_t> data() const override { return section.Content; } 228 229 // Offset within the input section. This is only zero since this chunk 230 // type represents an entire input section, not part of one. getInputSectionOffset()231 uint32_t getInputSectionOffset() const override { return 0; } getTombstone()232 uint64_t getTombstone() const override { return tombstoneValue; } 233 static uint64_t getTombstoneForSection(StringRef name); 234 235 const WasmSection §ion; 236 const uint64_t tombstoneValue; 237 }; 238 239 } // namespace wasm 240 241 std::string toString(const wasm::InputChunk *); 242 StringRef relocTypeToString(uint8_t relocType); 243 244 } // namespace lld 245 246 #endif // LLD_WASM_INPUT_CHUNKS_H 247