1 //===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 ///
10 /// \file These data structures comprise the "normalized" view of
11 /// mach-o object files. The normalized view is an in-memory only data structure
12 /// which is always in native endianness and pointer size.
13 ///
14 /// The normalized view easily converts to and from YAML using YAML I/O.
15 ///
16 /// The normalized view converts to and from binary mach-o object files using
17 /// the writeBinary() and readBinary() functions.
18 ///
19 /// The normalized view converts to and from lld::Atoms using the
20 /// normalizedToAtoms() and normalizedFromAtoms().
21 ///
22 /// Overall, the conversion paths available look like:
23 ///
24 /// +---------------+
25 /// | binary mach-o |
26 /// +---------------+
27 /// ^
28 /// |
29 /// v
30 /// +------------+ +------+
31 /// | normalized | <-> | yaml |
32 /// +------------+ +------+
33 /// ^
34 /// |
35 /// v
36 /// +-------+
37 /// | Atoms |
38 /// +-------+
39 ///
40
41 #ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
42 #define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
43
44 #include "DebugInfo.h"
45 #include "lld/Common/LLVM.h"
46 #include "lld/Core/Error.h"
47 #include "lld/ReaderWriter/MachOLinkingContext.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/StringRef.h"
50 #include "llvm/BinaryFormat/MachO.h"
51 #include "llvm/Support/Allocator.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorOr.h"
54 #include "llvm/Support/YAMLTraits.h"
55
56 using llvm::BumpPtrAllocator;
57 using llvm::yaml::Hex64;
58 using llvm::yaml::Hex32;
59 using llvm::yaml::Hex16;
60 using llvm::yaml::Hex8;
61 using llvm::yaml::SequenceTraits;
62 using llvm::MachO::HeaderFileType;
63 using llvm::MachO::BindType;
64 using llvm::MachO::RebaseType;
65 using llvm::MachO::NListType;
66 using llvm::MachO::RelocationInfoType;
67 using llvm::MachO::SectionType;
68 using llvm::MachO::LoadCommandType;
69 using llvm::MachO::ExportSymbolKind;
70 using llvm::MachO::DataRegionType;
71
72 namespace lld {
73 namespace mach_o {
74 namespace normalized {
75
76
77 /// The real mach-o relocation record is 8-bytes on disk and is
78 /// encoded in one of two different bit-field patterns. This
79 /// normalized form has the union of all possible fields.
80 struct Relocation {
RelocationRelocation81 Relocation() : offset(0), scattered(false),
82 type(llvm::MachO::GENERIC_RELOC_VANILLA),
83 length(0), pcRel(false), isExtern(false), value(0),
84 symbol(0) { }
85
86 Hex32 offset;
87 bool scattered;
88 RelocationInfoType type;
89 uint8_t length;
90 bool pcRel;
91 bool isExtern;
92 Hex32 value;
93 uint32_t symbol;
94 };
95
96 /// A typedef so that YAML I/O can treat this vector as a sequence.
97 typedef std::vector<Relocation> Relocations;
98
99 /// A typedef so that YAML I/O can process the raw bytes in a section.
100 typedef std::vector<Hex8> ContentBytes;
101
102 /// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
103 typedef std::vector<uint32_t> IndirectSymbols;
104
105 /// A typedef so that YAML I/O can encode/decode section attributes.
106 LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
107
108 /// A typedef so that YAML I/O can encode/decode section alignment.
109 LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment)
110
111 /// Mach-O has a 32-bit and 64-bit section record. This normalized form
112 /// can support either kind.
113 struct Section {
SectionSection114 Section() : type(llvm::MachO::S_REGULAR),
115 attributes(0), alignment(1), address(0) { }
116
117 StringRef segmentName;
118 StringRef sectionName;
119 SectionType type;
120 SectionAttr attributes;
121 SectionAlignment alignment;
122 Hex64 address;
123 ArrayRef<uint8_t> content;
124 Relocations relocations;
125 IndirectSymbols indirectSymbols;
126 };
127
128
129 /// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
130 LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
131
132 /// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
133 LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
134
135 /// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
136 /// type and scope and mixed in the same n_type field. This normalized form
137 /// works for any pointer size and separates out the type and scope.
138 struct Symbol {
SymbolSymbol139 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
140
141 StringRef name;
142 NListType type;
143 SymbolScope scope;
144 uint8_t sect;
145 SymbolDesc desc;
146 Hex64 value;
147 };
148
149 /// Check whether the given section type indicates a zero-filled section.
150 // FIXME: Utility functions of this kind should probably be moved into
151 // llvm/Support.
isZeroFillSection(SectionType T)152 inline bool isZeroFillSection(SectionType T) {
153 return (T == llvm::MachO::S_ZEROFILL ||
154 T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL);
155 }
156
157 /// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
158 LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
159
160 /// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz
161 LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion)
162
163 /// Segments are only used in normalized final linked images (not in relocatable
164 /// object files). They specify how a range of the file is loaded.
165 struct Segment {
166 StringRef name;
167 Hex64 address;
168 Hex64 size;
169 VMProtect init_access;
170 VMProtect max_access;
171 };
172
173 /// Only used in normalized final linked images to specify on which dylibs
174 /// it depends.
175 struct DependentDylib {
176 StringRef path;
177 LoadCommandType kind;
178 PackedVersion compatVersion;
179 PackedVersion currentVersion;
180 };
181
182 /// A normalized rebasing entry. Only used in normalized final linked images.
183 struct RebaseLocation {
184 Hex32 segOffset;
185 uint8_t segIndex;
186 RebaseType kind;
187 };
188
189 /// A normalized binding entry. Only used in normalized final linked images.
190 struct BindLocation {
191 Hex32 segOffset;
192 uint8_t segIndex;
193 BindType kind;
194 bool canBeNull;
195 int ordinal;
196 StringRef symbolName;
197 Hex64 addend;
198 };
199
200 /// A typedef so that YAML I/O can encode/decode export flags.
201 LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
202
203 /// A normalized export entry. Only used in normalized final linked images.
204 struct Export {
205 StringRef name;
206 Hex64 offset;
207 ExportSymbolKind kind;
208 ExportFlags flags;
209 Hex32 otherOffset;
210 StringRef otherName;
211 };
212
213 /// A normalized data-in-code entry.
214 struct DataInCode {
215 Hex32 offset;
216 Hex16 length;
217 DataRegionType kind;
218 };
219
220 /// A typedef so that YAML I/O can encode/decode mach_header.flags.
221 LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
222
223 ///
224 struct NormalizedFile {
225 MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown;
226 HeaderFileType fileType = llvm::MachO::MH_OBJECT;
227 FileFlags flags = 0;
228 std::vector<Segment> segments; // Not used in object files.
229 std::vector<Section> sections;
230
231 // Symbols sorted by kind.
232 std::vector<Symbol> localSymbols;
233 std::vector<Symbol> globalSymbols;
234 std::vector<Symbol> undefinedSymbols;
235 std::vector<Symbol> stabsSymbols;
236
237 // Maps to load commands with no LINKEDIT content (final linked images only).
238 std::vector<DependentDylib> dependentDylibs;
239 StringRef installName; // dylibs only
240 PackedVersion compatVersion = 0; // dylibs only
241 PackedVersion currentVersion = 0; // dylibs only
242 bool hasUUID = false;
243 bool hasMinVersionLoadCommand = false;
244 bool generateDataInCodeLoadCommand = false;
245 std::vector<StringRef> rpaths;
246 Hex64 entryAddress = 0;
247 Hex64 stackSize = 0;
248 MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown;
249 Hex64 sourceVersion = 0;
250 PackedVersion minOSverson = 0;
251 PackedVersion sdkVersion = 0;
252 LoadCommandType minOSVersionKind = (LoadCommandType)0;
253
254 // Maps to load commands with LINKEDIT content (final linked images only).
255 Hex32 pageSize = 0;
256 std::vector<RebaseLocation> rebasingInfo;
257 std::vector<BindLocation> bindingInfo;
258 std::vector<BindLocation> weakBindingInfo;
259 std::vector<BindLocation> lazyBindingInfo;
260 std::vector<Export> exportInfo;
261 std::vector<uint8_t> functionStarts;
262 std::vector<DataInCode> dataInCode;
263
264 // TODO:
265 // code-signature
266 // split-seg-info
267 // function-starts
268
269 // For any allocations in this struct which need to be owned by this struct.
270 BumpPtrAllocator ownedAllocations;
271 };
272
273 /// Tests if a file is a non-fat mach-o object file.
274 bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch);
275
276 /// If the buffer is a fat file with the request arch, then this function
277 /// returns true with 'offset' and 'size' set to location of the arch slice
278 /// within the buffer. Otherwise returns false;
279 bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
280 uint32_t &offset, uint32_t &size);
281
282 /// Reads a mach-o file and produces an in-memory normalized view.
283 llvm::Expected<std::unique_ptr<NormalizedFile>>
284 readBinary(std::unique_ptr<MemoryBuffer> &mb,
285 const MachOLinkingContext::Arch arch);
286
287 /// Takes in-memory normalized view and writes a mach-o object file.
288 llvm::Error writeBinary(const NormalizedFile &file, StringRef path);
289
290 size_t headerAndLoadCommandsSize(const NormalizedFile &file,
291 bool includeFunctionStarts);
292
293
294 /// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
295 llvm::Expected<std::unique_ptr<NormalizedFile>>
296 readYaml(std::unique_ptr<MemoryBuffer> &mb);
297
298 /// Writes a yaml encoded mach-o files given an in-memory normalized view.
299 std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
300
301 llvm::Error
302 normalizedObjectToAtoms(MachOFile *file,
303 const NormalizedFile &normalizedFile,
304 bool copyRefs);
305
306 llvm::Error
307 normalizedDylibToAtoms(MachODylibFile *file,
308 const NormalizedFile &normalizedFile,
309 bool copyRefs);
310
311 /// Takes in-memory normalized dylib or object and parses it into lld::File
312 llvm::Expected<std::unique_ptr<lld::File>>
313 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
314 bool copyRefs);
315
316 /// Takes atoms and generates a normalized macho-o view.
317 llvm::Expected<std::unique_ptr<NormalizedFile>>
318 normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
319
320
321 } // namespace normalized
322
323 /// Class for interfacing mach-o yaml files into generic yaml parsing
324 class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
325 public:
MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)326 MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
327 : _arch(arch) { }
328 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
329 private:
330 const MachOLinkingContext::Arch _arch;
331 };
332
333 } // namespace mach_o
334 } // namespace lld
335
336 #endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
337