1 //===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MachOLayoutBuilder.h"
10 #include "llvm/Support/Alignment.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/ErrorHandling.h"
13
14 namespace llvm {
15 namespace objcopy {
16 namespace macho {
17
18 StringTableBuilder::Kind
getStringTableBuilderKind(const Object & O,bool Is64Bit)19 MachOLayoutBuilder::getStringTableBuilderKind(const Object &O, bool Is64Bit) {
20 if (O.Header.FileType == MachO::HeaderFileType::MH_OBJECT)
21 return Is64Bit ? StringTableBuilder::MachO64 : StringTableBuilder::MachO;
22 return Is64Bit ? StringTableBuilder::MachO64Linked
23 : StringTableBuilder::MachOLinked;
24 }
25
computeSizeOfCmds() const26 uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
27 uint32_t Size = 0;
28 for (const LoadCommand &LC : O.LoadCommands) {
29 const MachO::macho_load_command &MLC = LC.MachOLoadCommand;
30 auto cmd = MLC.load_command_data.cmd;
31 switch (cmd) {
32 case MachO::LC_SEGMENT:
33 Size += sizeof(MachO::segment_command) +
34 sizeof(MachO::section) * LC.Sections.size();
35 continue;
36 case MachO::LC_SEGMENT_64:
37 Size += sizeof(MachO::segment_command_64) +
38 sizeof(MachO::section_64) * LC.Sections.size();
39 continue;
40 }
41
42 switch (cmd) {
43 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
44 case MachO::LCName: \
45 Size += sizeof(MachO::LCStruct) + LC.Payload.size(); \
46 break;
47 #include "llvm/BinaryFormat/MachO.def"
48 #undef HANDLE_LOAD_COMMAND
49 }
50 }
51
52 return Size;
53 }
54
constructStringTable()55 void MachOLayoutBuilder::constructStringTable() {
56 for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
57 StrTableBuilder.add(Sym->Name);
58 StrTableBuilder.finalize();
59 }
60
updateSymbolIndexes()61 void MachOLayoutBuilder::updateSymbolIndexes() {
62 uint32_t Index = 0;
63 for (auto &Symbol : O.SymTable.Symbols)
64 Symbol->Index = Index++;
65 }
66
67 // Updates the index and the number of local/external/undefined symbols.
updateDySymTab(MachO::macho_load_command & MLC)68 void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
69 assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
70 // Make sure that nlist entries in the symbol table are sorted by the those
71 // types. The order is: local < defined external < undefined external.
72 assert(llvm::is_sorted(O.SymTable.Symbols,
73 [](const std::unique_ptr<SymbolEntry> &A,
74 const std::unique_ptr<SymbolEntry> &B) {
75 bool AL = A->isLocalSymbol(),
76 BL = B->isLocalSymbol();
77 if (AL != BL)
78 return AL;
79 return !AL && !A->isUndefinedSymbol() &&
80 B->isUndefinedSymbol();
81 }) &&
82 "Symbols are not sorted by their types.");
83
84 uint32_t NumLocalSymbols = 0;
85 auto Iter = O.SymTable.Symbols.begin();
86 auto End = O.SymTable.Symbols.end();
87 for (; Iter != End; ++Iter) {
88 if ((*Iter)->isExternalSymbol())
89 break;
90
91 ++NumLocalSymbols;
92 }
93
94 uint32_t NumExtDefSymbols = 0;
95 for (; Iter != End; ++Iter) {
96 if ((*Iter)->isUndefinedSymbol())
97 break;
98
99 ++NumExtDefSymbols;
100 }
101
102 MLC.dysymtab_command_data.ilocalsym = 0;
103 MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
104 MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
105 MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
106 MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
107 MLC.dysymtab_command_data.nundefsym =
108 O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
109 }
110
111 // Recomputes and updates offset and size fields in load commands and sections
112 // since they could be modified.
layoutSegments()113 uint64_t MachOLayoutBuilder::layoutSegments() {
114 auto HeaderSize =
115 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
116 const bool IsObjectFile =
117 O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
118 uint64_t Offset = IsObjectFile ? (HeaderSize + O.Header.SizeOfCmds) : 0;
119 for (LoadCommand &LC : O.LoadCommands) {
120 auto &MLC = LC.MachOLoadCommand;
121 StringRef Segname;
122 uint64_t SegmentVmAddr;
123 uint64_t SegmentVmSize;
124 switch (MLC.load_command_data.cmd) {
125 case MachO::LC_SEGMENT:
126 SegmentVmAddr = MLC.segment_command_data.vmaddr;
127 SegmentVmSize = MLC.segment_command_data.vmsize;
128 Segname = StringRef(MLC.segment_command_data.segname,
129 strnlen(MLC.segment_command_data.segname,
130 sizeof(MLC.segment_command_data.segname)));
131 break;
132 case MachO::LC_SEGMENT_64:
133 SegmentVmAddr = MLC.segment_command_64_data.vmaddr;
134 SegmentVmSize = MLC.segment_command_64_data.vmsize;
135 Segname = StringRef(MLC.segment_command_64_data.segname,
136 strnlen(MLC.segment_command_64_data.segname,
137 sizeof(MLC.segment_command_64_data.segname)));
138 break;
139 default:
140 continue;
141 }
142
143 if (Segname == "__LINKEDIT") {
144 // We update the __LINKEDIT segment later (in layoutTail).
145 assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
146 LinkEditLoadCommand = &MLC;
147 continue;
148 }
149
150 // Update file offsets and sizes of sections.
151 uint64_t SegOffset = Offset;
152 uint64_t SegFileSize = 0;
153 uint64_t VMSize = 0;
154 for (std::unique_ptr<Section> &Sec : LC.Sections) {
155 assert(SegmentVmAddr <= Sec->Addr &&
156 "Section's address cannot be smaller than Segment's one");
157 uint32_t SectOffset = Sec->Addr - SegmentVmAddr;
158 if (IsObjectFile) {
159 if (!Sec->hasValidOffset()) {
160 Sec->Offset = 0;
161 } else {
162 uint64_t PaddingSize =
163 offsetToAlignment(SegFileSize, Align(1ull << Sec->Align));
164 Sec->Offset = SegOffset + SegFileSize + PaddingSize;
165 Sec->Size = Sec->Content.size();
166 SegFileSize += PaddingSize + Sec->Size;
167 }
168 } else {
169 if (!Sec->hasValidOffset()) {
170 Sec->Offset = 0;
171 } else {
172 Sec->Offset = SegOffset + SectOffset;
173 Sec->Size = Sec->Content.size();
174 SegFileSize = std::max(SegFileSize, SectOffset + Sec->Size);
175 }
176 }
177 VMSize = std::max(VMSize, SectOffset + Sec->Size);
178 }
179
180 if (IsObjectFile) {
181 Offset += SegFileSize;
182 } else {
183 Offset = alignTo(Offset + SegFileSize, PageSize);
184 SegFileSize = alignTo(SegFileSize, PageSize);
185 // Use the original vmsize if the segment is __PAGEZERO.
186 VMSize =
187 Segname == "__PAGEZERO" ? SegmentVmSize : alignTo(VMSize, PageSize);
188 }
189
190 switch (MLC.load_command_data.cmd) {
191 case MachO::LC_SEGMENT:
192 MLC.segment_command_data.cmdsize =
193 sizeof(MachO::segment_command) +
194 sizeof(MachO::section) * LC.Sections.size();
195 MLC.segment_command_data.nsects = LC.Sections.size();
196 MLC.segment_command_data.fileoff = SegOffset;
197 MLC.segment_command_data.vmsize = VMSize;
198 MLC.segment_command_data.filesize = SegFileSize;
199 break;
200 case MachO::LC_SEGMENT_64:
201 MLC.segment_command_64_data.cmdsize =
202 sizeof(MachO::segment_command_64) +
203 sizeof(MachO::section_64) * LC.Sections.size();
204 MLC.segment_command_64_data.nsects = LC.Sections.size();
205 MLC.segment_command_64_data.fileoff = SegOffset;
206 MLC.segment_command_64_data.vmsize = VMSize;
207 MLC.segment_command_64_data.filesize = SegFileSize;
208 break;
209 }
210 }
211
212 return Offset;
213 }
214
layoutRelocations(uint64_t Offset)215 uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
216 for (LoadCommand &LC : O.LoadCommands)
217 for (std::unique_ptr<Section> &Sec : LC.Sections) {
218 Sec->RelOff = Sec->Relocations.empty() ? 0 : Offset;
219 Sec->NReloc = Sec->Relocations.size();
220 Offset += sizeof(MachO::any_relocation_info) * Sec->NReloc;
221 }
222
223 return Offset;
224 }
225
layoutTail(uint64_t Offset)226 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
227 // If we are building the layout of an executable or dynamic library
228 // which does not have any segments other than __LINKEDIT,
229 // the Offset can be equal to zero by this time. It happens because of the
230 // convention that in such cases the file offsets specified by LC_SEGMENT
231 // start with zero (unlike the case of a relocatable object file).
232 const uint64_t HeaderSize =
233 Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
234 assert((!(O.Header.FileType == MachO::HeaderFileType::MH_OBJECT) ||
235 Offset >= HeaderSize + O.Header.SizeOfCmds) &&
236 "Incorrect tail offset");
237 Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
238
239 // The order of LINKEDIT elements is as follows:
240 // rebase info, binding info, weak binding info, lazy binding info, export
241 // trie, data-in-code, symbol table, indirect symbol table, symbol table
242 // strings, code signature.
243 uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
244 uint64_t StartOfLinkEdit = Offset;
245 uint64_t StartOfRebaseInfo = StartOfLinkEdit;
246 uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
247 uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
248 uint64_t StartOfLazyBindingInfo =
249 StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
250 uint64_t StartOfExportTrie =
251 StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
252 uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
253 uint64_t StartOfDataInCode =
254 StartOfFunctionStarts + O.FunctionStarts.Data.size();
255 uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
256 uint64_t StartOfIndirectSymbols =
257 StartOfSymbols + NListSize * O.SymTable.Symbols.size();
258 uint64_t StartOfSymbolStrings =
259 StartOfIndirectSymbols +
260 sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
261 uint64_t StartOfCodeSignature =
262 StartOfSymbolStrings + StrTableBuilder.getSize();
263 if (O.CodeSignatureCommandIndex)
264 StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
265 uint64_t LinkEditSize =
266 (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
267
268 // Now we have determined the layout of the contents of the __LINKEDIT
269 // segment. Update its load command.
270 if (LinkEditLoadCommand) {
271 MachO::macho_load_command *MLC = LinkEditLoadCommand;
272 switch (LinkEditLoadCommand->load_command_data.cmd) {
273 case MachO::LC_SEGMENT:
274 MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
275 MLC->segment_command_data.fileoff = StartOfLinkEdit;
276 MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
277 MLC->segment_command_data.filesize = LinkEditSize;
278 break;
279 case MachO::LC_SEGMENT_64:
280 MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
281 MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
282 MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
283 MLC->segment_command_64_data.filesize = LinkEditSize;
284 break;
285 }
286 }
287
288 for (LoadCommand &LC : O.LoadCommands) {
289 auto &MLC = LC.MachOLoadCommand;
290 auto cmd = MLC.load_command_data.cmd;
291 switch (cmd) {
292 case MachO::LC_CODE_SIGNATURE:
293 MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
294 MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size();
295 break;
296 case MachO::LC_SYMTAB:
297 MLC.symtab_command_data.symoff = StartOfSymbols;
298 MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
299 MLC.symtab_command_data.stroff = StartOfSymbolStrings;
300 MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
301 break;
302 case MachO::LC_DYSYMTAB: {
303 if (MLC.dysymtab_command_data.ntoc != 0 ||
304 MLC.dysymtab_command_data.nmodtab != 0 ||
305 MLC.dysymtab_command_data.nextrefsyms != 0 ||
306 MLC.dysymtab_command_data.nlocrel != 0 ||
307 MLC.dysymtab_command_data.nextrel != 0)
308 return createStringError(llvm::errc::not_supported,
309 "shared library is not yet supported");
310
311 if (!O.IndirectSymTable.Symbols.empty()) {
312 MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
313 MLC.dysymtab_command_data.nindirectsyms =
314 O.IndirectSymTable.Symbols.size();
315 }
316
317 updateDySymTab(MLC);
318 break;
319 }
320 case MachO::LC_DATA_IN_CODE:
321 MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
322 MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
323 break;
324 case MachO::LC_FUNCTION_STARTS:
325 MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
326 MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
327 break;
328 case MachO::LC_DYLD_INFO:
329 case MachO::LC_DYLD_INFO_ONLY:
330 MLC.dyld_info_command_data.rebase_off =
331 O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
332 MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
333 MLC.dyld_info_command_data.bind_off =
334 O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
335 MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
336 MLC.dyld_info_command_data.weak_bind_off =
337 O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
338 MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
339 MLC.dyld_info_command_data.lazy_bind_off =
340 O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
341 MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
342 MLC.dyld_info_command_data.export_off =
343 O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
344 MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
345 break;
346 // Note that LC_ENCRYPTION_INFO.cryptoff despite its name and the comment in
347 // <mach-o/loader.h> is not an offset in the binary file, instead, it is a
348 // relative virtual address. At the moment modification of the __TEXT
349 // segment of executables isn't supported anyway (e.g. data in code entries
350 // are not recalculated). Moreover, in general
351 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 are nontrivial to update because
352 // without making additional assumptions (e.g. that the entire __TEXT
353 // segment should be encrypted) we do not know how to recalculate the
354 // boundaries of the encrypted part. For now just copy over these load
355 // commands until we encounter a real world usecase where
356 // LC_ENCRYPT_INFO/LC_ENCRYPTION_INFO_64 need to be adjusted.
357 case MachO::LC_ENCRYPTION_INFO:
358 case MachO::LC_ENCRYPTION_INFO_64:
359 case MachO::LC_LOAD_DYLINKER:
360 case MachO::LC_MAIN:
361 case MachO::LC_RPATH:
362 case MachO::LC_SEGMENT:
363 case MachO::LC_SEGMENT_64:
364 case MachO::LC_VERSION_MIN_MACOSX:
365 case MachO::LC_VERSION_MIN_IPHONEOS:
366 case MachO::LC_VERSION_MIN_TVOS:
367 case MachO::LC_VERSION_MIN_WATCHOS:
368 case MachO::LC_BUILD_VERSION:
369 case MachO::LC_ID_DYLIB:
370 case MachO::LC_LOAD_DYLIB:
371 case MachO::LC_LOAD_WEAK_DYLIB:
372 case MachO::LC_UUID:
373 case MachO::LC_SOURCE_VERSION:
374 // Nothing to update.
375 break;
376 default:
377 // Abort if it's unsupported in order to prevent corrupting the object.
378 return createStringError(llvm::errc::not_supported,
379 "unsupported load command (cmd=0x%x)", cmd);
380 }
381 }
382
383 return Error::success();
384 }
385
layout()386 Error MachOLayoutBuilder::layout() {
387 O.Header.NCmds = O.LoadCommands.size();
388 O.Header.SizeOfCmds = computeSizeOfCmds();
389 constructStringTable();
390 updateSymbolIndexes();
391 uint64_t Offset = layoutSegments();
392 Offset = layoutRelocations(Offset);
393 return layoutTail(Offset);
394 }
395
396 } // end namespace macho
397 } // end namespace objcopy
398 } // end namespace llvm
399