1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MachOObjcopy.h"
10 #include "../CopyConfig.h"
11 #include "../llvm-objcopy.h"
12 #include "MachOReader.h"
13 #include "MachOWriter.h"
14 #include "llvm/ADT/DenseSet.h"
15 #include "llvm/Object/ArchiveWriter.h"
16 #include "llvm/Object/MachOUniversal.h"
17 #include "llvm/Object/MachOUniversalWriter.h"
18 #include "llvm/Support/Errc.h"
19 #include "llvm/Support/Error.h"
20
21 namespace llvm {
22 namespace objcopy {
23 namespace macho {
24
25 using namespace object;
26 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
27 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
28
29 #ifndef NDEBUG
isLoadCommandWithPayloadString(const LoadCommand & LC)30 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
31 // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
32 // LC_LAZY_LOAD_DYLIB
33 return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
34 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
35 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
36 LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
37 }
38 #endif
39
getPayloadString(const LoadCommand & LC)40 static StringRef getPayloadString(const LoadCommand &LC) {
41 assert(isLoadCommandWithPayloadString(LC) &&
42 "unsupported load command encountered");
43
44 return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
45 LC.Payload.size())
46 .rtrim('\0');
47 }
48
removeSections(const CopyConfig & Config,Object & Obj)49 static Error removeSections(const CopyConfig &Config, Object &Obj) {
50 SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
51 return false;
52 };
53
54 if (!Config.ToRemove.empty()) {
55 RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
56 return Config.ToRemove.matches(Sec->CanonicalName);
57 };
58 }
59
60 if (Config.StripAll || Config.StripDebug) {
61 // Remove all debug sections.
62 RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
63 if (Sec->Segname == "__DWARF")
64 return true;
65
66 return RemovePred(Sec);
67 };
68 }
69
70 if (!Config.OnlySection.empty()) {
71 // Overwrite RemovePred because --only-section takes priority.
72 RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
73 return !Config.OnlySection.matches(Sec->CanonicalName);
74 };
75 }
76
77 return Obj.removeSections(RemovePred);
78 }
79
markSymbols(const CopyConfig & Config,Object & Obj)80 static void markSymbols(const CopyConfig &Config, Object &Obj) {
81 // Symbols referenced from the indirect symbol table must not be removed.
82 for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
83 if (ISE.Symbol)
84 (*ISE.Symbol)->Referenced = true;
85 }
86
updateAndRemoveSymbols(const CopyConfig & Config,Object & Obj)87 static void updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
88 for (SymbolEntry &Sym : Obj.SymTable) {
89 auto I = Config.SymbolsToRename.find(Sym.Name);
90 if (I != Config.SymbolsToRename.end())
91 Sym.Name = std::string(I->getValue());
92 }
93
94 auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) {
95 if (N->Referenced)
96 return false;
97 if (Config.StripAll)
98 return true;
99 if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
100 return true;
101 // This behavior is consistent with cctools' strip.
102 if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) &&
103 Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol())
104 return true;
105 return false;
106 };
107
108 Obj.SymTable.removeSymbols(RemovePred);
109 }
110
111 template <typename LCType>
updateLoadCommandPayloadString(LoadCommand & LC,StringRef S)112 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
113 assert(isLoadCommandWithPayloadString(LC) &&
114 "unsupported load command encountered");
115
116 uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
117
118 LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
119 LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
120 std::copy(S.begin(), S.end(), LC.Payload.begin());
121 }
122
buildRPathLoadCommand(StringRef Path)123 static LoadCommand buildRPathLoadCommand(StringRef Path) {
124 LoadCommand LC;
125 MachO::rpath_command RPathLC;
126 RPathLC.cmd = MachO::LC_RPATH;
127 RPathLC.path = sizeof(MachO::rpath_command);
128 RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
129 LC.MachOLoadCommand.rpath_command_data = RPathLC;
130 LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
131 std::copy(Path.begin(), Path.end(), LC.Payload.begin());
132 return LC;
133 }
134
processLoadCommands(const CopyConfig & Config,Object & Obj)135 static Error processLoadCommands(const CopyConfig &Config, Object &Obj) {
136 // Remove RPaths.
137 DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
138 Config.RPathsToRemove.end());
139
140 LoadCommandPred RemovePred = [&RPathsToRemove,
141 &Config](const LoadCommand &LC) {
142 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
143 // When removing all RPaths we don't need to care
144 // about what it contains
145 if (Config.RemoveAllRpaths)
146 return true;
147
148 StringRef RPath = getPayloadString(LC);
149 if (RPathsToRemove.count(RPath)) {
150 RPathsToRemove.erase(RPath);
151 return true;
152 }
153 }
154 return false;
155 };
156
157 if (Error E = Obj.removeLoadCommands(RemovePred))
158 return E;
159
160 // Emit an error if the Mach-O binary does not contain an rpath path name
161 // specified in -delete_rpath.
162 for (StringRef RPath : Config.RPathsToRemove) {
163 if (RPathsToRemove.count(RPath))
164 return createStringError(errc::invalid_argument,
165 "no LC_RPATH load command with path: %s",
166 RPath.str().c_str());
167 }
168
169 DenseSet<StringRef> RPaths;
170
171 // Get all existing RPaths.
172 for (LoadCommand &LC : Obj.LoadCommands) {
173 if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
174 RPaths.insert(getPayloadString(LC));
175 }
176
177 // Throw errors for invalid RPaths.
178 for (const auto &OldNew : Config.RPathsToUpdate) {
179 StringRef Old = OldNew.getFirst();
180 StringRef New = OldNew.getSecond();
181 if (RPaths.count(Old) == 0)
182 return createStringError(errc::invalid_argument,
183 "no LC_RPATH load command with path: " + Old);
184 if (RPaths.count(New) != 0)
185 return createStringError(errc::invalid_argument,
186 "rpath '" + New +
187 "' would create a duplicate load command");
188 }
189
190 // Update load commands.
191 for (LoadCommand &LC : Obj.LoadCommands) {
192 switch (LC.MachOLoadCommand.load_command_data.cmd) {
193 case MachO::LC_ID_DYLIB:
194 if (Config.SharedLibId)
195 updateLoadCommandPayloadString<MachO::dylib_command>(
196 LC, *Config.SharedLibId);
197 break;
198
199 case MachO::LC_RPATH: {
200 StringRef RPath = getPayloadString(LC);
201 StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath);
202 if (!NewRPath.empty())
203 updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
204 break;
205 }
206
207 // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
208 // here once llvm-objcopy supports them.
209 case MachO::LC_LOAD_DYLIB:
210 case MachO::LC_LOAD_WEAK_DYLIB:
211 StringRef InstallName = getPayloadString(LC);
212 StringRef NewInstallName =
213 Config.InstallNamesToUpdate.lookup(InstallName);
214 if (!NewInstallName.empty())
215 updateLoadCommandPayloadString<MachO::dylib_command>(LC,
216 NewInstallName);
217 break;
218 }
219 }
220
221 // Add new RPaths.
222 for (StringRef RPath : Config.RPathToAdd) {
223 if (RPaths.count(RPath) != 0)
224 return createStringError(errc::invalid_argument,
225 "rpath '" + RPath +
226 "' would create a duplicate load command");
227 RPaths.insert(RPath);
228 Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
229 }
230
231 for (StringRef RPath : Config.RPathToPrepend) {
232 if (RPaths.count(RPath) != 0)
233 return createStringError(errc::invalid_argument,
234 "rpath '" + RPath +
235 "' would create a duplicate load command");
236
237 RPaths.insert(RPath);
238 Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
239 buildRPathLoadCommand(RPath));
240 }
241
242 // Unlike appending rpaths, the indexes of subsequent load commands must
243 // be recalculated after prepending one.
244 if (!Config.RPathToPrepend.empty())
245 Obj.updateLoadCommandIndexes();
246
247 return Error::success();
248 }
249
dumpSectionToFile(StringRef SecName,StringRef Filename,Object & Obj)250 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
251 Object &Obj) {
252 for (LoadCommand &LC : Obj.LoadCommands)
253 for (const std::unique_ptr<Section> &Sec : LC.Sections) {
254 if (Sec->CanonicalName == SecName) {
255 Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
256 FileOutputBuffer::create(Filename, Sec->Content.size());
257 if (!BufferOrErr)
258 return BufferOrErr.takeError();
259 std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
260 llvm::copy(Sec->Content, Buf->getBufferStart());
261
262 if (Error E = Buf->commit())
263 return E;
264 return Error::success();
265 }
266 }
267
268 return createStringError(object_error::parse_failed, "section '%s' not found",
269 SecName.str().c_str());
270 }
271
addSection(StringRef SecName,StringRef Filename,Object & Obj)272 static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
273 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
274 MemoryBuffer::getFile(Filename);
275 if (!BufOrErr)
276 return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
277 std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
278
279 std::pair<StringRef, StringRef> Pair = SecName.split(',');
280 StringRef TargetSegName = Pair.first;
281 Section Sec(TargetSegName, Pair.second);
282 Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer());
283 Sec.Size = Sec.Content.size();
284
285 // Add the a section into an existing segment.
286 for (LoadCommand &LC : Obj.LoadCommands) {
287 Optional<StringRef> SegName = LC.getSegmentName();
288 if (SegName && SegName == TargetSegName) {
289 uint64_t Addr = *LC.getSegmentVMAddr();
290 for (const std::unique_ptr<Section> &S : LC.Sections)
291 Addr = std::max(Addr, S->Addr + S->Size);
292 LC.Sections.push_back(std::make_unique<Section>(Sec));
293 LC.Sections.back()->Addr = Addr;
294 return Error::success();
295 }
296 }
297
298 // There's no segment named TargetSegName. Create a new load command and
299 // Insert a new section into it.
300 LoadCommand &NewSegment =
301 Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
302 NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
303 NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
304 return Error::success();
305 }
306
307 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
308 // ("<segment>,<section>") and lengths of both segment and section names are
309 // valid.
isValidMachOCannonicalName(StringRef Name)310 static Error isValidMachOCannonicalName(StringRef Name) {
311 if (Name.count(',') != 1)
312 return createStringError(errc::invalid_argument,
313 "invalid section name '%s' (should be formatted "
314 "as '<segment name>,<section name>')",
315 Name.str().c_str());
316
317 std::pair<StringRef, StringRef> Pair = Name.split(',');
318 if (Pair.first.size() > 16)
319 return createStringError(errc::invalid_argument,
320 "too long segment name: '%s'",
321 Pair.first.str().c_str());
322 if (Pair.second.size() > 16)
323 return createStringError(errc::invalid_argument,
324 "too long section name: '%s'",
325 Pair.second.str().c_str());
326 return Error::success();
327 }
328
handleArgs(const CopyConfig & Config,Object & Obj)329 static Error handleArgs(const CopyConfig &Config, Object &Obj) {
330 if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
331 Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
332 !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
333 !Config.AllocSectionsPrefix.empty() || !Config.KeepSection.empty() ||
334 Config.NewSymbolVisibility || !Config.SymbolsToGlobalize.empty() ||
335 !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() ||
336 !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() ||
337 !Config.SectionsToRename.empty() ||
338 !Config.UnneededSymbolsToRemove.empty() ||
339 !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() ||
340 Config.ExtractDWO || Config.LocalizeHidden || Config.PreserveDates ||
341 Config.StripAllGNU || Config.StripDWO || Config.StripNonAlloc ||
342 Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
343 Config.StripUnneeded || Config.DiscardMode == DiscardType::Locals ||
344 !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
345 return createStringError(llvm::errc::invalid_argument,
346 "option not supported by llvm-objcopy for MachO");
347 }
348
349 // Dump sections before add/remove for compatibility with GNU objcopy.
350 for (StringRef Flag : Config.DumpSection) {
351 StringRef SectionName;
352 StringRef FileName;
353 std::tie(SectionName, FileName) = Flag.split('=');
354 if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
355 return E;
356 }
357
358 if (Error E = removeSections(Config, Obj))
359 return E;
360
361 // Mark symbols to determine which symbols are still needed.
362 if (Config.StripAll)
363 markSymbols(Config, Obj);
364
365 updateAndRemoveSymbols(Config, Obj);
366
367 if (Config.StripAll)
368 for (LoadCommand &LC : Obj.LoadCommands)
369 for (std::unique_ptr<Section> &Sec : LC.Sections)
370 Sec->Relocations.clear();
371
372 for (const auto &Flag : Config.AddSection) {
373 std::pair<StringRef, StringRef> SecPair = Flag.split("=");
374 StringRef SecName = SecPair.first;
375 StringRef File = SecPair.second;
376 if (Error E = isValidMachOCannonicalName(SecName))
377 return E;
378 if (Error E = addSection(SecName, File, Obj))
379 return E;
380 }
381
382 if (Error E = processLoadCommands(Config, Obj))
383 return E;
384
385 return Error::success();
386 }
387
executeObjcopyOnBinary(const CopyConfig & Config,object::MachOObjectFile & In,Buffer & Out)388 Error executeObjcopyOnBinary(const CopyConfig &Config,
389 object::MachOObjectFile &In, Buffer &Out) {
390 MachOReader Reader(In);
391 Expected<std::unique_ptr<Object>> O = Reader.create();
392 if (!O)
393 return createFileError(Config.InputFilename, O.takeError());
394
395 if (Error E = handleArgs(Config, **O))
396 return createFileError(Config.InputFilename, std::move(E));
397
398 // Page size used for alignment of segment sizes in Mach-O executables and
399 // dynamic libraries.
400 uint64_t PageSize;
401 switch (In.getArch()) {
402 case Triple::ArchType::arm:
403 case Triple::ArchType::aarch64:
404 case Triple::ArchType::aarch64_32:
405 PageSize = 16384;
406 break;
407 default:
408 PageSize = 4096;
409 }
410
411 MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
412 if (auto E = Writer.finalize())
413 return E;
414 return Writer.write();
415 }
416
executeObjcopyOnMachOUniversalBinary(CopyConfig & Config,const MachOUniversalBinary & In,Buffer & Out)417 Error executeObjcopyOnMachOUniversalBinary(CopyConfig &Config,
418 const MachOUniversalBinary &In,
419 Buffer &Out) {
420 SmallVector<OwningBinary<Binary>, 2> Binaries;
421 SmallVector<Slice, 2> Slices;
422 for (const auto &O : In.objects()) {
423 Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
424 if (ArOrErr) {
425 Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
426 createNewArchiveMembers(Config, **ArOrErr);
427 if (!NewArchiveMembersOrErr)
428 return NewArchiveMembersOrErr.takeError();
429 Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
430 writeArchiveToBuffer(*NewArchiveMembersOrErr,
431 (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(),
432 Config.DeterministicArchives,
433 (*ArOrErr)->isThin());
434 if (!OutputBufferOrErr)
435 return OutputBufferOrErr.takeError();
436 Expected<std::unique_ptr<Binary>> BinaryOrErr =
437 object::createBinary(**OutputBufferOrErr);
438 if (!BinaryOrErr)
439 return BinaryOrErr.takeError();
440 Binaries.emplace_back(std::move(*BinaryOrErr),
441 std::move(*OutputBufferOrErr));
442 Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
443 O.getCPUType(), O.getCPUSubType(),
444 O.getArchFlagName(), O.getAlign());
445 continue;
446 }
447 // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
448 // ObjectForArch return an Error in case of the type mismatch. We need to
449 // check each in turn to see what kind of slice this is, so ignore errors
450 // produced along the way.
451 consumeError(ArOrErr.takeError());
452
453 Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
454 if (!ObjOrErr) {
455 consumeError(ObjOrErr.takeError());
456 return createStringError(std::errc::invalid_argument,
457 "slice for '%s' of the universal Mach-O binary "
458 "'%s' is not a Mach-O object or an archive",
459 O.getArchFlagName().c_str(),
460 Config.InputFilename.str().c_str());
461 }
462 std::string ArchFlagName = O.getArchFlagName();
463 MemBuffer MB(ArchFlagName);
464 if (Error E = executeObjcopyOnBinary(Config, **ObjOrErr, MB))
465 return E;
466 std::unique_ptr<WritableMemoryBuffer> OutputBuffer =
467 MB.releaseMemoryBuffer();
468 Expected<std::unique_ptr<Binary>> BinaryOrErr =
469 object::createBinary(*OutputBuffer);
470 if (!BinaryOrErr)
471 return BinaryOrErr.takeError();
472 Binaries.emplace_back(std::move(*BinaryOrErr), std::move(OutputBuffer));
473 Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
474 O.getAlign());
475 }
476 Expected<std::unique_ptr<MemoryBuffer>> B =
477 writeUniversalBinaryToBuffer(Slices);
478 if (!B)
479 return B.takeError();
480 if (Error E = Out.allocate((*B)->getBufferSize()))
481 return E;
482 memcpy(Out.getBufferStart(), (*B)->getBufferStart(), (*B)->getBufferSize());
483 return Out.commit();
484 }
485
486 } // end namespace macho
487 } // end namespace objcopy
488 } // end namespace llvm
489