1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/ADT/DenseMap.h"
10 #include "llvm/ADT/Twine.h"
11 #include "llvm/ADT/iterator_range.h"
12 #include "llvm/BinaryFormat/MachO.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmLayout.h"
15 #include "llvm/MC/MCAssembler.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDirectives.h"
18 #include "llvm/MC/MCExpr.h"
19 #include "llvm/MC/MCFixupKindInfo.h"
20 #include "llvm/MC/MCFragment.h"
21 #include "llvm/MC/MCMachObjectWriter.h"
22 #include "llvm/MC/MCObjectWriter.h"
23 #include "llvm/MC/MCSection.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCSymbol.h"
26 #include "llvm/MC/MCSymbolMachO.h"
27 #include "llvm/MC/MCValue.h"
28 #include "llvm/Support/Alignment.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include <algorithm>
35 #include <cassert>
36 #include <cstdint>
37 #include <string>
38 #include <utility>
39 #include <vector>
40
41 using namespace llvm;
42
43 #define DEBUG_TYPE "mc"
44
reset()45 void MachObjectWriter::reset() {
46 Relocations.clear();
47 IndirectSymBase.clear();
48 StringTable.clear();
49 LocalSymbolData.clear();
50 ExternalSymbolData.clear();
51 UndefinedSymbolData.clear();
52 MCObjectWriter::reset();
53 }
54
doesSymbolRequireExternRelocation(const MCSymbol & S)55 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
56 // Undefined symbols are always extern.
57 if (S.isUndefined())
58 return true;
59
60 // References to weak definitions require external relocation entries; the
61 // definition may not always be the one in the same object file.
62 if (cast<MCSymbolMachO>(S).isWeakDefinition())
63 return true;
64
65 // Otherwise, we can use an internal relocation.
66 return false;
67 }
68
69 bool MachObjectWriter::
operator <(const MachSymbolData & RHS) const70 MachSymbolData::operator<(const MachSymbolData &RHS) const {
71 return Symbol->getName() < RHS.Symbol->getName();
72 }
73
isFixupKindPCRel(const MCAssembler & Asm,unsigned Kind)74 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
75 const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
76 (MCFixupKind) Kind);
77
78 return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
79 }
80
getFragmentAddress(const MCFragment * Fragment,const MCAsmLayout & Layout) const81 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
82 const MCAsmLayout &Layout) const {
83 return getSectionAddress(Fragment->getParent()) +
84 Layout.getFragmentOffset(Fragment);
85 }
86
getSymbolAddress(const MCSymbol & S,const MCAsmLayout & Layout) const87 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
88 const MCAsmLayout &Layout) const {
89 // If this is a variable, then recursively evaluate now.
90 if (S.isVariable()) {
91 if (const MCConstantExpr *C =
92 dyn_cast<const MCConstantExpr>(S.getVariableValue()))
93 return C->getValue();
94
95 MCValue Target;
96 if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
97 report_fatal_error("unable to evaluate offset for variable '" +
98 S.getName() + "'");
99
100 // Verify that any used symbols are defined.
101 if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
102 report_fatal_error("unable to evaluate offset to undefined symbol '" +
103 Target.getSymA()->getSymbol().getName() + "'");
104 if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
105 report_fatal_error("unable to evaluate offset to undefined symbol '" +
106 Target.getSymB()->getSymbol().getName() + "'");
107
108 uint64_t Address = Target.getConstant();
109 if (Target.getSymA())
110 Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
111 if (Target.getSymB())
112 Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
113 return Address;
114 }
115
116 return getSectionAddress(S.getFragment()->getParent()) +
117 Layout.getSymbolOffset(S);
118 }
119
getPaddingSize(const MCSection * Sec,const MCAsmLayout & Layout) const120 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
121 const MCAsmLayout &Layout) const {
122 uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
123 unsigned Next = Sec->getLayoutOrder() + 1;
124 if (Next >= Layout.getSectionOrder().size())
125 return 0;
126
127 const MCSection &NextSec = *Layout.getSectionOrder()[Next];
128 if (NextSec.isVirtualSection())
129 return 0;
130 return offsetToAlignment(EndAddr, Align(NextSec.getAlignment()));
131 }
132
writeHeader(MachO::HeaderFileType Type,unsigned NumLoadCommands,unsigned LoadCommandsSize,bool SubsectionsViaSymbols)133 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
134 unsigned NumLoadCommands,
135 unsigned LoadCommandsSize,
136 bool SubsectionsViaSymbols) {
137 uint32_t Flags = 0;
138
139 if (SubsectionsViaSymbols)
140 Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
141
142 // struct mach_header (28 bytes) or
143 // struct mach_header_64 (32 bytes)
144
145 uint64_t Start = W.OS.tell();
146 (void) Start;
147
148 W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
149
150 W.write<uint32_t>(TargetObjectWriter->getCPUType());
151 W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
152
153 W.write<uint32_t>(Type);
154 W.write<uint32_t>(NumLoadCommands);
155 W.write<uint32_t>(LoadCommandsSize);
156 W.write<uint32_t>(Flags);
157 if (is64Bit())
158 W.write<uint32_t>(0); // reserved
159
160 assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
161 : sizeof(MachO::mach_header)));
162 }
163
writeWithPadding(StringRef Str,uint64_t Size)164 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
165 assert(Size >= Str.size());
166 W.OS << Str;
167 W.OS.write_zeros(Size - Str.size());
168 }
169
170 /// writeSegmentLoadCommand - Write a segment load command.
171 ///
172 /// \param NumSections The number of sections in this segment.
173 /// \param SectionDataSize The total size of the sections.
writeSegmentLoadCommand(StringRef Name,unsigned NumSections,uint64_t VMAddr,uint64_t VMSize,uint64_t SectionDataStartOffset,uint64_t SectionDataSize,uint32_t MaxProt,uint32_t InitProt)174 void MachObjectWriter::writeSegmentLoadCommand(
175 StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
176 uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
177 uint32_t InitProt) {
178 // struct segment_command (56 bytes) or
179 // struct segment_command_64 (72 bytes)
180
181 uint64_t Start = W.OS.tell();
182 (void) Start;
183
184 unsigned SegmentLoadCommandSize =
185 is64Bit() ? sizeof(MachO::segment_command_64):
186 sizeof(MachO::segment_command);
187 W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
188 W.write<uint32_t>(SegmentLoadCommandSize +
189 NumSections * (is64Bit() ? sizeof(MachO::section_64) :
190 sizeof(MachO::section)));
191
192 writeWithPadding(Name, 16);
193 if (is64Bit()) {
194 W.write<uint64_t>(VMAddr); // vmaddr
195 W.write<uint64_t>(VMSize); // vmsize
196 W.write<uint64_t>(SectionDataStartOffset); // file offset
197 W.write<uint64_t>(SectionDataSize); // file size
198 } else {
199 W.write<uint32_t>(VMAddr); // vmaddr
200 W.write<uint32_t>(VMSize); // vmsize
201 W.write<uint32_t>(SectionDataStartOffset); // file offset
202 W.write<uint32_t>(SectionDataSize); // file size
203 }
204 // maxprot
205 W.write<uint32_t>(MaxProt);
206 // initprot
207 W.write<uint32_t>(InitProt);
208 W.write<uint32_t>(NumSections);
209 W.write<uint32_t>(0); // flags
210
211 assert(W.OS.tell() - Start == SegmentLoadCommandSize);
212 }
213
writeSection(const MCAsmLayout & Layout,const MCSection & Sec,uint64_t VMAddr,uint64_t FileOffset,unsigned Flags,uint64_t RelocationsStart,unsigned NumRelocations)214 void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
215 const MCSection &Sec, uint64_t VMAddr,
216 uint64_t FileOffset, unsigned Flags,
217 uint64_t RelocationsStart,
218 unsigned NumRelocations) {
219 uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
220 const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
221
222 // The offset is unused for virtual sections.
223 if (Section.isVirtualSection()) {
224 assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
225 FileOffset = 0;
226 }
227
228 // struct section (68 bytes) or
229 // struct section_64 (80 bytes)
230
231 uint64_t Start = W.OS.tell();
232 (void) Start;
233
234 writeWithPadding(Section.getSectionName(), 16);
235 writeWithPadding(Section.getSegmentName(), 16);
236 if (is64Bit()) {
237 W.write<uint64_t>(VMAddr); // address
238 W.write<uint64_t>(SectionSize); // size
239 } else {
240 W.write<uint32_t>(VMAddr); // address
241 W.write<uint32_t>(SectionSize); // size
242 }
243 W.write<uint32_t>(FileOffset);
244
245 assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
246 W.write<uint32_t>(Log2_32(Section.getAlignment()));
247 W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
248 W.write<uint32_t>(NumRelocations);
249 W.write<uint32_t>(Flags);
250 W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
251 W.write<uint32_t>(Section.getStubSize()); // reserved2
252 if (is64Bit())
253 W.write<uint32_t>(0); // reserved3
254
255 assert(W.OS.tell() - Start ==
256 (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
257 }
258
writeSymtabLoadCommand(uint32_t SymbolOffset,uint32_t NumSymbols,uint32_t StringTableOffset,uint32_t StringTableSize)259 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
260 uint32_t NumSymbols,
261 uint32_t StringTableOffset,
262 uint32_t StringTableSize) {
263 // struct symtab_command (24 bytes)
264
265 uint64_t Start = W.OS.tell();
266 (void) Start;
267
268 W.write<uint32_t>(MachO::LC_SYMTAB);
269 W.write<uint32_t>(sizeof(MachO::symtab_command));
270 W.write<uint32_t>(SymbolOffset);
271 W.write<uint32_t>(NumSymbols);
272 W.write<uint32_t>(StringTableOffset);
273 W.write<uint32_t>(StringTableSize);
274
275 assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
276 }
277
writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,uint32_t NumLocalSymbols,uint32_t FirstExternalSymbol,uint32_t NumExternalSymbols,uint32_t FirstUndefinedSymbol,uint32_t NumUndefinedSymbols,uint32_t IndirectSymbolOffset,uint32_t NumIndirectSymbols)278 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
279 uint32_t NumLocalSymbols,
280 uint32_t FirstExternalSymbol,
281 uint32_t NumExternalSymbols,
282 uint32_t FirstUndefinedSymbol,
283 uint32_t NumUndefinedSymbols,
284 uint32_t IndirectSymbolOffset,
285 uint32_t NumIndirectSymbols) {
286 // struct dysymtab_command (80 bytes)
287
288 uint64_t Start = W.OS.tell();
289 (void) Start;
290
291 W.write<uint32_t>(MachO::LC_DYSYMTAB);
292 W.write<uint32_t>(sizeof(MachO::dysymtab_command));
293 W.write<uint32_t>(FirstLocalSymbol);
294 W.write<uint32_t>(NumLocalSymbols);
295 W.write<uint32_t>(FirstExternalSymbol);
296 W.write<uint32_t>(NumExternalSymbols);
297 W.write<uint32_t>(FirstUndefinedSymbol);
298 W.write<uint32_t>(NumUndefinedSymbols);
299 W.write<uint32_t>(0); // tocoff
300 W.write<uint32_t>(0); // ntoc
301 W.write<uint32_t>(0); // modtaboff
302 W.write<uint32_t>(0); // nmodtab
303 W.write<uint32_t>(0); // extrefsymoff
304 W.write<uint32_t>(0); // nextrefsyms
305 W.write<uint32_t>(IndirectSymbolOffset);
306 W.write<uint32_t>(NumIndirectSymbols);
307 W.write<uint32_t>(0); // extreloff
308 W.write<uint32_t>(0); // nextrel
309 W.write<uint32_t>(0); // locreloff
310 W.write<uint32_t>(0); // nlocrel
311
312 assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
313 }
314
315 MachObjectWriter::MachSymbolData *
findSymbolData(const MCSymbol & Sym)316 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
317 for (auto *SymbolData :
318 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
319 for (MachSymbolData &Entry : *SymbolData)
320 if (Entry.Symbol == &Sym)
321 return &Entry;
322
323 return nullptr;
324 }
325
findAliasedSymbol(const MCSymbol & Sym) const326 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
327 const MCSymbol *S = &Sym;
328 while (S->isVariable()) {
329 const MCExpr *Value = S->getVariableValue();
330 const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
331 if (!Ref)
332 return *S;
333 S = &Ref->getSymbol();
334 }
335 return *S;
336 }
337
writeNlist(MachSymbolData & MSD,const MCAsmLayout & Layout)338 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
339 const MCAsmLayout &Layout) {
340 const MCSymbol *Symbol = MSD.Symbol;
341 const MCSymbol &Data = *Symbol;
342 const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
343 uint8_t SectionIndex = MSD.SectionIndex;
344 uint8_t Type = 0;
345 uint64_t Address = 0;
346 bool IsAlias = Symbol != AliasedSymbol;
347
348 const MCSymbol &OrigSymbol = *Symbol;
349 MachSymbolData *AliaseeInfo;
350 if (IsAlias) {
351 AliaseeInfo = findSymbolData(*AliasedSymbol);
352 if (AliaseeInfo)
353 SectionIndex = AliaseeInfo->SectionIndex;
354 Symbol = AliasedSymbol;
355 // FIXME: Should this update Data as well?
356 }
357
358 // Set the N_TYPE bits. See <mach-o/nlist.h>.
359 //
360 // FIXME: Are the prebound or indirect fields possible here?
361 if (IsAlias && Symbol->isUndefined())
362 Type = MachO::N_INDR;
363 else if (Symbol->isUndefined())
364 Type = MachO::N_UNDF;
365 else if (Symbol->isAbsolute())
366 Type = MachO::N_ABS;
367 else
368 Type = MachO::N_SECT;
369
370 // FIXME: Set STAB bits.
371
372 if (Data.isPrivateExtern())
373 Type |= MachO::N_PEXT;
374
375 // Set external bit.
376 if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
377 Type |= MachO::N_EXT;
378
379 // Compute the symbol address.
380 if (IsAlias && Symbol->isUndefined())
381 Address = AliaseeInfo->StringIndex;
382 else if (Symbol->isDefined())
383 Address = getSymbolAddress(OrigSymbol, Layout);
384 else if (Symbol->isCommon()) {
385 // Common symbols are encoded with the size in the address
386 // field, and their alignment in the flags.
387 Address = Symbol->getCommonSize();
388 }
389
390 // struct nlist (12 bytes)
391
392 W.write<uint32_t>(MSD.StringIndex);
393 W.OS << char(Type);
394 W.OS << char(SectionIndex);
395
396 // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
397 // value.
398 bool EncodeAsAltEntry =
399 IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
400 W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
401 if (is64Bit())
402 W.write<uint64_t>(Address);
403 else
404 W.write<uint32_t>(Address);
405 }
406
writeLinkeditLoadCommand(uint32_t Type,uint32_t DataOffset,uint32_t DataSize)407 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
408 uint32_t DataOffset,
409 uint32_t DataSize) {
410 uint64_t Start = W.OS.tell();
411 (void) Start;
412
413 W.write<uint32_t>(Type);
414 W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
415 W.write<uint32_t>(DataOffset);
416 W.write<uint32_t>(DataSize);
417
418 assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
419 }
420
ComputeLinkerOptionsLoadCommandSize(const std::vector<std::string> & Options,bool is64Bit)421 static unsigned ComputeLinkerOptionsLoadCommandSize(
422 const std::vector<std::string> &Options, bool is64Bit)
423 {
424 unsigned Size = sizeof(MachO::linker_option_command);
425 for (const std::string &Option : Options)
426 Size += Option.size() + 1;
427 return alignTo(Size, is64Bit ? 8 : 4);
428 }
429
writeLinkerOptionsLoadCommand(const std::vector<std::string> & Options)430 void MachObjectWriter::writeLinkerOptionsLoadCommand(
431 const std::vector<std::string> &Options)
432 {
433 unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
434 uint64_t Start = W.OS.tell();
435 (void) Start;
436
437 W.write<uint32_t>(MachO::LC_LINKER_OPTION);
438 W.write<uint32_t>(Size);
439 W.write<uint32_t>(Options.size());
440 uint64_t BytesWritten = sizeof(MachO::linker_option_command);
441 for (const std::string &Option : Options) {
442 // Write each string, including the null byte.
443 W.OS << Option << '\0';
444 BytesWritten += Option.size() + 1;
445 }
446
447 // Pad to a multiple of the pointer size.
448 W.OS.write_zeros(
449 offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
450
451 assert(W.OS.tell() - Start == Size);
452 }
453
isFixupTargetValid(const MCValue & Target)454 static bool isFixupTargetValid(const MCValue &Target) {
455 // Target is (LHS - RHS + cst).
456 // We don't support the form where LHS is null: -RHS + cst
457 if (!Target.getSymA() && Target.getSymB())
458 return false;
459 return true;
460 }
461
recordRelocation(MCAssembler & Asm,const MCAsmLayout & Layout,const MCFragment * Fragment,const MCFixup & Fixup,MCValue Target,uint64_t & FixedValue)462 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
463 const MCAsmLayout &Layout,
464 const MCFragment *Fragment,
465 const MCFixup &Fixup, MCValue Target,
466 uint64_t &FixedValue) {
467 if (!isFixupTargetValid(Target)) {
468 Asm.getContext().reportError(Fixup.getLoc(),
469 "unsupported relocation expression");
470 return;
471 }
472
473 TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
474 Target, FixedValue);
475 }
476
bindIndirectSymbols(MCAssembler & Asm)477 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
478 // This is the point where 'as' creates actual symbols for indirect symbols
479 // (in the following two passes). It would be easier for us to do this sooner
480 // when we see the attribute, but that makes getting the order in the symbol
481 // table much more complicated than it is worth.
482 //
483 // FIXME: Revisit this when the dust settles.
484
485 // Report errors for use of .indirect_symbol not in a symbol pointer section
486 // or stub section.
487 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
488 ie = Asm.indirect_symbol_end(); it != ie; ++it) {
489 const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
490
491 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
492 Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
493 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
494 Section.getType() != MachO::S_SYMBOL_STUBS) {
495 MCSymbol &Symbol = *it->Symbol;
496 report_fatal_error("indirect symbol '" + Symbol.getName() +
497 "' not in a symbol pointer or stub section");
498 }
499 }
500
501 // Bind non-lazy symbol pointers first.
502 unsigned IndirectIndex = 0;
503 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
504 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
505 const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
506
507 if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
508 Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
509 continue;
510
511 // Initialize the section indirect symbol base, if necessary.
512 IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
513
514 Asm.registerSymbol(*it->Symbol);
515 }
516
517 // Then lazy symbol pointers and symbol stubs.
518 IndirectIndex = 0;
519 for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
520 ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
521 const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
522
523 if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
524 Section.getType() != MachO::S_SYMBOL_STUBS)
525 continue;
526
527 // Initialize the section indirect symbol base, if necessary.
528 IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
529
530 // Set the symbol type to undefined lazy, but only on construction.
531 //
532 // FIXME: Do not hardcode.
533 bool Created;
534 Asm.registerSymbol(*it->Symbol, &Created);
535 if (Created)
536 cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
537 }
538 }
539
540 /// computeSymbolTable - Compute the symbol table data
computeSymbolTable(MCAssembler & Asm,std::vector<MachSymbolData> & LocalSymbolData,std::vector<MachSymbolData> & ExternalSymbolData,std::vector<MachSymbolData> & UndefinedSymbolData)541 void MachObjectWriter::computeSymbolTable(
542 MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
543 std::vector<MachSymbolData> &ExternalSymbolData,
544 std::vector<MachSymbolData> &UndefinedSymbolData) {
545 // Build section lookup table.
546 DenseMap<const MCSection*, uint8_t> SectionIndexMap;
547 unsigned Index = 1;
548 for (MCAssembler::iterator it = Asm.begin(),
549 ie = Asm.end(); it != ie; ++it, ++Index)
550 SectionIndexMap[&*it] = Index;
551 assert(Index <= 256 && "Too many sections!");
552
553 // Build the string table.
554 for (const MCSymbol &Symbol : Asm.symbols()) {
555 if (!Asm.isSymbolLinkerVisible(Symbol))
556 continue;
557
558 StringTable.add(Symbol.getName());
559 }
560 StringTable.finalize();
561
562 // Build the symbol arrays but only for non-local symbols.
563 //
564 // The particular order that we collect and then sort the symbols is chosen to
565 // match 'as'. Even though it doesn't matter for correctness, this is
566 // important for letting us diff .o files.
567 for (const MCSymbol &Symbol : Asm.symbols()) {
568 // Ignore non-linker visible symbols.
569 if (!Asm.isSymbolLinkerVisible(Symbol))
570 continue;
571
572 if (!Symbol.isExternal() && !Symbol.isUndefined())
573 continue;
574
575 MachSymbolData MSD;
576 MSD.Symbol = &Symbol;
577 MSD.StringIndex = StringTable.getOffset(Symbol.getName());
578
579 if (Symbol.isUndefined()) {
580 MSD.SectionIndex = 0;
581 UndefinedSymbolData.push_back(MSD);
582 } else if (Symbol.isAbsolute()) {
583 MSD.SectionIndex = 0;
584 ExternalSymbolData.push_back(MSD);
585 } else {
586 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
587 assert(MSD.SectionIndex && "Invalid section index!");
588 ExternalSymbolData.push_back(MSD);
589 }
590 }
591
592 // Now add the data for local symbols.
593 for (const MCSymbol &Symbol : Asm.symbols()) {
594 // Ignore non-linker visible symbols.
595 if (!Asm.isSymbolLinkerVisible(Symbol))
596 continue;
597
598 if (Symbol.isExternal() || Symbol.isUndefined())
599 continue;
600
601 MachSymbolData MSD;
602 MSD.Symbol = &Symbol;
603 MSD.StringIndex = StringTable.getOffset(Symbol.getName());
604
605 if (Symbol.isAbsolute()) {
606 MSD.SectionIndex = 0;
607 LocalSymbolData.push_back(MSD);
608 } else {
609 MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
610 assert(MSD.SectionIndex && "Invalid section index!");
611 LocalSymbolData.push_back(MSD);
612 }
613 }
614
615 // External and undefined symbols are required to be in lexicographic order.
616 llvm::sort(ExternalSymbolData);
617 llvm::sort(UndefinedSymbolData);
618
619 // Set the symbol indices.
620 Index = 0;
621 for (auto *SymbolData :
622 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
623 for (MachSymbolData &Entry : *SymbolData)
624 Entry.Symbol->setIndex(Index++);
625
626 for (const MCSection &Section : Asm) {
627 for (RelAndSymbol &Rel : Relocations[&Section]) {
628 if (!Rel.Sym)
629 continue;
630
631 // Set the Index and the IsExtern bit.
632 unsigned Index = Rel.Sym->getIndex();
633 assert(isInt<24>(Index));
634 if (W.Endian == support::little)
635 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
636 else
637 Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
638 }
639 }
640 }
641
computeSectionAddresses(const MCAssembler & Asm,const MCAsmLayout & Layout)642 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
643 const MCAsmLayout &Layout) {
644 uint64_t StartAddress = 0;
645 for (const MCSection *Sec : Layout.getSectionOrder()) {
646 StartAddress = alignTo(StartAddress, Sec->getAlignment());
647 SectionAddress[Sec] = StartAddress;
648 StartAddress += Layout.getSectionAddressSize(Sec);
649
650 // Explicitly pad the section to match the alignment requirements of the
651 // following one. This is for 'gas' compatibility, it shouldn't
652 /// strictly be necessary.
653 StartAddress += getPaddingSize(Sec, Layout);
654 }
655 }
656
executePostLayoutBinding(MCAssembler & Asm,const MCAsmLayout & Layout)657 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
658 const MCAsmLayout &Layout) {
659 computeSectionAddresses(Asm, Layout);
660
661 // Create symbol data for any indirect symbols.
662 bindIndirectSymbols(Asm);
663 }
664
isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler & Asm,const MCSymbol & A,const MCSymbol & B,bool InSet) const665 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
666 const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
667 bool InSet) const {
668 // FIXME: We don't handle things like
669 // foo = .
670 // creating atoms.
671 if (A.isVariable() || B.isVariable())
672 return false;
673 return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
674 InSet);
675 }
676
isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler & Asm,const MCSymbol & SymA,const MCFragment & FB,bool InSet,bool IsPCRel) const677 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
678 const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
679 bool InSet, bool IsPCRel) const {
680 if (InSet)
681 return true;
682
683 // The effective address is
684 // addr(atom(A)) + offset(A)
685 // - addr(atom(B)) - offset(B)
686 // and the offsets are not relocatable, so the fixup is fully resolved when
687 // addr(atom(A)) - addr(atom(B)) == 0.
688 const MCSymbol &SA = findAliasedSymbol(SymA);
689 const MCSection &SecA = SA.getSection();
690 const MCSection &SecB = *FB.getParent();
691
692 if (IsPCRel) {
693 // The simple (Darwin, except on x86_64) way of dealing with this was to
694 // assume that any reference to a temporary symbol *must* be a temporary
695 // symbol in the same atom, unless the sections differ. Therefore, any PCrel
696 // relocation to a temporary symbol (in the same section) is fully
697 // resolved. This also works in conjunction with absolutized .set, which
698 // requires the compiler to use .set to absolutize the differences between
699 // symbols which the compiler knows to be assembly time constants, so we
700 // don't need to worry about considering symbol differences fully resolved.
701 //
702 // If the file isn't using sub-sections-via-symbols, we can make the
703 // same assumptions about any symbol that we normally make about
704 // assembler locals.
705
706 bool hasReliableSymbolDifference = isX86_64();
707 if (!hasReliableSymbolDifference) {
708 if (!SA.isInSection() || &SecA != &SecB ||
709 (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
710 Asm.getSubsectionsViaSymbols()))
711 return false;
712 return true;
713 }
714 // For Darwin x86_64, there is one special case when the reference IsPCRel.
715 // If the fragment with the reference does not have a base symbol but meets
716 // the simple way of dealing with this, in that it is a temporary symbol in
717 // the same atom then it is assumed to be fully resolved. This is needed so
718 // a relocation entry is not created and so the static linker does not
719 // mess up the reference later.
720 else if(!FB.getAtom() &&
721 SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
722 return true;
723 }
724 }
725
726 // If they are not in the same section, we can't compute the diff.
727 if (&SecA != &SecB)
728 return false;
729
730 const MCFragment *FA = SA.getFragment();
731
732 // Bail if the symbol has no fragment.
733 if (!FA)
734 return false;
735
736 // If the atoms are the same, they are guaranteed to have the same address.
737 if (FA->getAtom() == FB.getAtom())
738 return true;
739
740 // Otherwise, we can't prove this is fully resolved.
741 return false;
742 }
743
getLCFromMCVM(MCVersionMinType Type)744 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
745 switch (Type) {
746 case MCVM_OSXVersionMin: return MachO::LC_VERSION_MIN_MACOSX;
747 case MCVM_IOSVersionMin: return MachO::LC_VERSION_MIN_IPHONEOS;
748 case MCVM_TvOSVersionMin: return MachO::LC_VERSION_MIN_TVOS;
749 case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
750 }
751 llvm_unreachable("Invalid mc version min type");
752 }
753
writeObject(MCAssembler & Asm,const MCAsmLayout & Layout)754 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
755 const MCAsmLayout &Layout) {
756 uint64_t StartOffset = W.OS.tell();
757
758 // Compute symbol table information and bind symbol indices.
759 computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
760 UndefinedSymbolData);
761
762 unsigned NumSections = Asm.size();
763 const MCAssembler::VersionInfoType &VersionInfo =
764 Layout.getAssembler().getVersionInfo();
765
766 // The section data starts after the header, the segment load command (and
767 // section headers) and the symbol table.
768 unsigned NumLoadCommands = 1;
769 uint64_t LoadCommandsSize = is64Bit() ?
770 sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
771 sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
772
773 // Add the deployment target version info load command size, if used.
774 if (VersionInfo.Major != 0) {
775 ++NumLoadCommands;
776 if (VersionInfo.EmitBuildVersion)
777 LoadCommandsSize += sizeof(MachO::build_version_command);
778 else
779 LoadCommandsSize += sizeof(MachO::version_min_command);
780 }
781
782 // Add the data-in-code load command size, if used.
783 unsigned NumDataRegions = Asm.getDataRegions().size();
784 if (NumDataRegions) {
785 ++NumLoadCommands;
786 LoadCommandsSize += sizeof(MachO::linkedit_data_command);
787 }
788
789 // Add the loh load command size, if used.
790 uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
791 uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
792 if (LOHSize) {
793 ++NumLoadCommands;
794 LoadCommandsSize += sizeof(MachO::linkedit_data_command);
795 }
796
797 // Add the symbol table load command sizes, if used.
798 unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
799 UndefinedSymbolData.size();
800 if (NumSymbols) {
801 NumLoadCommands += 2;
802 LoadCommandsSize += (sizeof(MachO::symtab_command) +
803 sizeof(MachO::dysymtab_command));
804 }
805
806 // Add the linker option load commands sizes.
807 for (const auto &Option : Asm.getLinkerOptions()) {
808 ++NumLoadCommands;
809 LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
810 }
811
812 // Compute the total size of the section data, as well as its file size and vm
813 // size.
814 uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
815 sizeof(MachO::mach_header)) + LoadCommandsSize;
816 uint64_t SectionDataSize = 0;
817 uint64_t SectionDataFileSize = 0;
818 uint64_t VMSize = 0;
819 for (const MCSection &Sec : Asm) {
820 uint64_t Address = getSectionAddress(&Sec);
821 uint64_t Size = Layout.getSectionAddressSize(&Sec);
822 uint64_t FileSize = Layout.getSectionFileSize(&Sec);
823 FileSize += getPaddingSize(&Sec, Layout);
824
825 VMSize = std::max(VMSize, Address + Size);
826
827 if (Sec.isVirtualSection())
828 continue;
829
830 SectionDataSize = std::max(SectionDataSize, Address + Size);
831 SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
832 }
833
834 // The section data is padded to 4 bytes.
835 //
836 // FIXME: Is this machine dependent?
837 unsigned SectionDataPadding =
838 offsetToAlignment(SectionDataFileSize, Align(4));
839 SectionDataFileSize += SectionDataPadding;
840
841 // Write the prolog, starting with the header and load command...
842 writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
843 Asm.getSubsectionsViaSymbols());
844 uint32_t Prot =
845 MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
846 writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
847 SectionDataSize, Prot, Prot);
848
849 // ... and then the section headers.
850 uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
851 for (const MCSection &Section : Asm) {
852 const auto &Sec = cast<MCSectionMachO>(Section);
853 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
854 unsigned NumRelocs = Relocs.size();
855 uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
856 unsigned Flags = Sec.getTypeAndAttributes();
857 if (Sec.hasInstructions())
858 Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
859 writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
860 RelocTableEnd, NumRelocs);
861 RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
862 }
863
864 // Write out the deployment target information, if it's available.
865 if (VersionInfo.Major != 0) {
866 auto EncodeVersion = [](VersionTuple V) -> uint32_t {
867 assert(!V.empty() && "empty version");
868 unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
869 unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
870 assert(Update < 256 && "unencodable update target version");
871 assert(Minor < 256 && "unencodable minor target version");
872 assert(V.getMajor() < 65536 && "unencodable major target version");
873 return Update | (Minor << 8) | (V.getMajor() << 16);
874 };
875 uint32_t EncodedVersion = EncodeVersion(
876 VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
877 uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
878 ? EncodeVersion(VersionInfo.SDKVersion)
879 : 0;
880 if (VersionInfo.EmitBuildVersion) {
881 // FIXME: Currently empty tools. Add clang version in the future.
882 W.write<uint32_t>(MachO::LC_BUILD_VERSION);
883 W.write<uint32_t>(sizeof(MachO::build_version_command));
884 W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
885 W.write<uint32_t>(EncodedVersion);
886 W.write<uint32_t>(SDKVersion);
887 W.write<uint32_t>(0); // Empty tools list.
888 } else {
889 MachO::LoadCommandType LCType
890 = getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
891 W.write<uint32_t>(LCType);
892 W.write<uint32_t>(sizeof(MachO::version_min_command));
893 W.write<uint32_t>(EncodedVersion);
894 W.write<uint32_t>(SDKVersion);
895 }
896 }
897
898 // Write the data-in-code load command, if used.
899 uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
900 if (NumDataRegions) {
901 uint64_t DataRegionsOffset = RelocTableEnd;
902 uint64_t DataRegionsSize = NumDataRegions * 8;
903 writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
904 DataRegionsSize);
905 }
906
907 // Write the loh load command, if used.
908 uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
909 if (LOHSize)
910 writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
911 DataInCodeTableEnd, LOHSize);
912
913 // Write the symbol table load command, if used.
914 if (NumSymbols) {
915 unsigned FirstLocalSymbol = 0;
916 unsigned NumLocalSymbols = LocalSymbolData.size();
917 unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
918 unsigned NumExternalSymbols = ExternalSymbolData.size();
919 unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
920 unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
921 unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
922 unsigned NumSymTabSymbols =
923 NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
924 uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
925 uint64_t IndirectSymbolOffset = 0;
926
927 // If used, the indirect symbols are written after the section data.
928 if (NumIndirectSymbols)
929 IndirectSymbolOffset = LOHTableEnd;
930
931 // The symbol table is written after the indirect symbol data.
932 uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
933
934 // The string table is written after symbol table.
935 uint64_t StringTableOffset =
936 SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
937 sizeof(MachO::nlist_64) :
938 sizeof(MachO::nlist));
939 writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
940 StringTableOffset, StringTable.getSize());
941
942 writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
943 FirstExternalSymbol, NumExternalSymbols,
944 FirstUndefinedSymbol, NumUndefinedSymbols,
945 IndirectSymbolOffset, NumIndirectSymbols);
946 }
947
948 // Write the linker options load commands.
949 for (const auto &Option : Asm.getLinkerOptions())
950 writeLinkerOptionsLoadCommand(Option);
951
952 // Write the actual section data.
953 for (const MCSection &Sec : Asm) {
954 Asm.writeSectionData(W.OS, &Sec, Layout);
955
956 uint64_t Pad = getPaddingSize(&Sec, Layout);
957 W.OS.write_zeros(Pad);
958 }
959
960 // Write the extra padding.
961 W.OS.write_zeros(SectionDataPadding);
962
963 // Write the relocation entries.
964 for (const MCSection &Sec : Asm) {
965 // Write the section relocation entries, in reverse order to match 'as'
966 // (approximately, the exact algorithm is more complicated than this).
967 std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
968 for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
969 W.write<uint32_t>(Rel.MRE.r_word0);
970 W.write<uint32_t>(Rel.MRE.r_word1);
971 }
972 }
973
974 // Write out the data-in-code region payload, if there is one.
975 for (MCAssembler::const_data_region_iterator
976 it = Asm.data_region_begin(), ie = Asm.data_region_end();
977 it != ie; ++it) {
978 const DataRegionData *Data = &(*it);
979 uint64_t Start = getSymbolAddress(*Data->Start, Layout);
980 uint64_t End;
981 if (Data->End)
982 End = getSymbolAddress(*Data->End, Layout);
983 else
984 report_fatal_error("Data region not terminated");
985
986 LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
987 << " start: " << Start << "(" << Data->Start->getName()
988 << ")"
989 << " end: " << End << "(" << Data->End->getName() << ")"
990 << " size: " << End - Start << "\n");
991 W.write<uint32_t>(Start);
992 W.write<uint16_t>(End - Start);
993 W.write<uint16_t>(Data->Kind);
994 }
995
996 // Write out the loh commands, if there is one.
997 if (LOHSize) {
998 #ifndef NDEBUG
999 unsigned Start = W.OS.tell();
1000 #endif
1001 Asm.getLOHContainer().emit(*this, Layout);
1002 // Pad to a multiple of the pointer size.
1003 W.OS.write_zeros(
1004 offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
1005 assert(W.OS.tell() - Start == LOHSize);
1006 }
1007
1008 // Write the symbol table data, if used.
1009 if (NumSymbols) {
1010 // Write the indirect symbol entries.
1011 for (MCAssembler::const_indirect_symbol_iterator
1012 it = Asm.indirect_symbol_begin(),
1013 ie = Asm.indirect_symbol_end(); it != ie; ++it) {
1014 // Indirect symbols in the non-lazy symbol pointer section have some
1015 // special handling.
1016 const MCSectionMachO &Section =
1017 static_cast<const MCSectionMachO &>(*it->Section);
1018 if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
1019 // If this symbol is defined and internal, mark it as such.
1020 if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
1021 uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
1022 if (it->Symbol->isAbsolute())
1023 Flags |= MachO::INDIRECT_SYMBOL_ABS;
1024 W.write<uint32_t>(Flags);
1025 continue;
1026 }
1027 }
1028
1029 W.write<uint32_t>(it->Symbol->getIndex());
1030 }
1031
1032 // FIXME: Check that offsets match computed ones.
1033
1034 // Write the symbol table entries.
1035 for (auto *SymbolData :
1036 {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
1037 for (MachSymbolData &Entry : *SymbolData)
1038 writeNlist(Entry, Layout);
1039
1040 // Write the string table.
1041 StringTable.write(W.OS);
1042 }
1043
1044 return W.OS.tell() - StartOffset;
1045 }
1046
1047 std::unique_ptr<MCObjectWriter>
createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,raw_pwrite_stream & OS,bool IsLittleEndian)1048 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
1049 raw_pwrite_stream &OS, bool IsLittleEndian) {
1050 return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
1051 IsLittleEndian);
1052 }
1053