1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Thin Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
16
17 #ifdef HAVE_LLVM_REVISION
18 #include "LLVMLTORevision.h"
19 #endif
20
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/Bitcode/BitcodeWriterPass.h"
27 #include "llvm/Bitcode/ReaderWriter.h"
28 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/LegacyPassManager.h"
32 #include "llvm/IR/Mangler.h"
33 #include "llvm/IRReader/IRReader.h"
34 #include "llvm/LTO/LTO.h"
35 #include "llvm/Linker/Linker.h"
36 #include "llvm/MC/SubtargetFeature.h"
37 #include "llvm/Object/IRObjectFile.h"
38 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
39 #include "llvm/Support/CachePruning.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/SHA1.h"
43 #include "llvm/Support/TargetRegistry.h"
44 #include "llvm/Support/ThreadPool.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Transforms/IPO.h"
47 #include "llvm/Transforms/IPO/FunctionImport.h"
48 #include "llvm/Transforms/IPO/Internalize.h"
49 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
50 #include "llvm/Transforms/ObjCARC.h"
51 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
52
53 #include <numeric>
54
55 using namespace llvm;
56
57 #define DEBUG_TYPE "thinlto"
58
59 namespace llvm {
60 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
61 extern cl::opt<bool> LTODiscardValueNames;
62 }
63
64 namespace {
65
66 static cl::opt<int> ThreadCount("threads",
67 cl::init(std::thread::hardware_concurrency()));
68
diagnosticHandler(const DiagnosticInfo & DI)69 static void diagnosticHandler(const DiagnosticInfo &DI) {
70 DiagnosticPrinterRawOStream DP(errs());
71 DI.print(DP);
72 errs() << '\n';
73 }
74
75 // Simple helper to save temporary files for debug.
saveTempBitcode(const Module & TheModule,StringRef TempDir,unsigned count,StringRef Suffix)76 static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
77 unsigned count, StringRef Suffix) {
78 if (TempDir.empty())
79 return;
80 // User asked to save temps, let dump the bitcode file after import.
81 auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
82 std::error_code EC;
83 raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
84 if (EC)
85 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
86 " to save optimized bitcode\n");
87 WriteBitcodeToFile(&TheModule, OS, /* ShouldPreserveUseListOrder */ true);
88 }
89
90 static const GlobalValueSummary *
getFirstDefinitionForLinker(const GlobalValueSummaryList & GVSummaryList)91 getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
92 // If there is any strong definition anywhere, get it.
93 auto StrongDefForLinker = llvm::find_if(
94 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
95 auto Linkage = Summary->linkage();
96 return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
97 !GlobalValue::isWeakForLinker(Linkage);
98 });
99 if (StrongDefForLinker != GVSummaryList.end())
100 return StrongDefForLinker->get();
101 // Get the first *linker visible* definition for this global in the summary
102 // list.
103 auto FirstDefForLinker = llvm::find_if(
104 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
105 auto Linkage = Summary->linkage();
106 return !GlobalValue::isAvailableExternallyLinkage(Linkage);
107 });
108 // Extern templates can be emitted as available_externally.
109 if (FirstDefForLinker == GVSummaryList.end())
110 return nullptr;
111 return FirstDefForLinker->get();
112 }
113
114 // Populate map of GUID to the prevailing copy for any multiply defined
115 // symbols. Currently assume first copy is prevailing, or any strong
116 // definition. Can be refined with Linker information in the future.
computePrevailingCopies(const ModuleSummaryIndex & Index,DenseMap<GlobalValue::GUID,const GlobalValueSummary * > & PrevailingCopy)117 static void computePrevailingCopies(
118 const ModuleSummaryIndex &Index,
119 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) {
120 auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) {
121 return GVSummaryList.size() > 1;
122 };
123
124 for (auto &I : Index) {
125 if (HasMultipleCopies(I.second))
126 PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second);
127 }
128 }
129
130 static StringMap<MemoryBufferRef>
generateModuleMap(const std::vector<MemoryBufferRef> & Modules)131 generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
132 StringMap<MemoryBufferRef> ModuleMap;
133 for (auto &ModuleBuffer : Modules) {
134 assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
135 ModuleMap.end() &&
136 "Expect unique Buffer Identifier");
137 ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
138 }
139 return ModuleMap;
140 }
141
promoteModule(Module & TheModule,const ModuleSummaryIndex & Index)142 static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) {
143 if (renameModuleForThinLTO(TheModule, Index))
144 report_fatal_error("renameModuleForThinLTO failed");
145 }
146
147 static void
crossImportIntoModule(Module & TheModule,const ModuleSummaryIndex & Index,StringMap<MemoryBufferRef> & ModuleMap,const FunctionImporter::ImportMapTy & ImportList)148 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
149 StringMap<MemoryBufferRef> &ModuleMap,
150 const FunctionImporter::ImportMapTy &ImportList) {
151 ModuleLoader Loader(TheModule.getContext(), ModuleMap);
152 FunctionImporter Importer(Index, Loader);
153 Importer.importFunctions(TheModule, ImportList);
154 }
155
optimizeModule(Module & TheModule,TargetMachine & TM)156 static void optimizeModule(Module &TheModule, TargetMachine &TM) {
157 // Populate the PassManager
158 PassManagerBuilder PMB;
159 PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
160 PMB.Inliner = createFunctionInliningPass();
161 // FIXME: should get it from the bitcode?
162 PMB.OptLevel = 3;
163 PMB.LoopVectorize = true;
164 PMB.SLPVectorize = true;
165 PMB.VerifyInput = true;
166 PMB.VerifyOutput = false;
167
168 legacy::PassManager PM;
169
170 // Add the TTI (required to inform the vectorizer about register size for
171 // instance)
172 PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
173
174 // Add optimizations
175 PMB.populateThinLTOPassManager(PM);
176
177 PM.run(TheModule);
178 }
179
180 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
181 static DenseSet<GlobalValue::GUID>
computeGUIDPreservedSymbols(const StringSet<> & PreservedSymbols,const Triple & TheTriple)182 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
183 const Triple &TheTriple) {
184 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
185 for (auto &Entry : PreservedSymbols) {
186 StringRef Name = Entry.first();
187 if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
188 Name = Name.drop_front();
189 GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
190 }
191 return GUIDPreservedSymbols;
192 }
193
codegenModule(Module & TheModule,TargetMachine & TM)194 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
195 TargetMachine &TM) {
196 SmallVector<char, 128> OutputBuffer;
197
198 // CodeGen
199 {
200 raw_svector_ostream OS(OutputBuffer);
201 legacy::PassManager PM;
202
203 // If the bitcode files contain ARC code and were compiled with optimization,
204 // the ObjCARCContractPass must be run, so do it unconditionally here.
205 PM.add(createObjCARCContractPass());
206
207 // Setup the codegen now.
208 if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
209 /* DisableVerify */ true))
210 report_fatal_error("Failed to setup codegen");
211
212 // Run codegen now. resulting binary is in OutputBuffer.
213 PM.run(TheModule);
214 }
215 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
216 }
217
218 /// Manage caching for a single Module.
219 class ModuleCacheEntry {
220 SmallString<128> EntryPath;
221
222 public:
223 // Create a cache entry. This compute a unique hash for the Module considering
224 // the current list of export/import, and offer an interface to query to
225 // access the content in the cache.
ModuleCacheEntry(StringRef CachePath,const ModuleSummaryIndex & Index,StringRef ModuleID,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,const GVSummaryMapTy & DefinedFunctions,const DenseSet<GlobalValue::GUID> & PreservedSymbols)226 ModuleCacheEntry(
227 StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID,
228 const FunctionImporter::ImportMapTy &ImportList,
229 const FunctionImporter::ExportSetTy &ExportList,
230 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
231 const GVSummaryMapTy &DefinedFunctions,
232 const DenseSet<GlobalValue::GUID> &PreservedSymbols) {
233 if (CachePath.empty())
234 return;
235
236 // Compute the unique hash for this entry
237 // This is based on the current compiler version, the module itself, the
238 // export list, the hash for every single module in the import list, the
239 // list of ResolvedODR for the module, and the list of preserved symbols.
240
241 SHA1 Hasher;
242
243 // Start with the compiler revision
244 Hasher.update(LLVM_VERSION_STRING);
245 #ifdef HAVE_LLVM_REVISION
246 Hasher.update(LLVM_REVISION);
247 #endif
248
249 // Include the hash for the current module
250 auto ModHash = Index.getModuleHash(ModuleID);
251 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
252 for (auto F : ExportList)
253 // The export list can impact the internalization, be conservative here
254 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
255
256 // Include the hash for every module we import functions from
257 for (auto &Entry : ImportList) {
258 auto ModHash = Index.getModuleHash(Entry.first());
259 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
260 }
261
262 // Include the hash for the resolved ODR.
263 for (auto &Entry : ResolvedODR) {
264 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
265 sizeof(GlobalValue::GUID)));
266 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
267 sizeof(GlobalValue::LinkageTypes)));
268 }
269
270 // Include the hash for the preserved symbols.
271 for (auto &Entry : PreservedSymbols) {
272 if (DefinedFunctions.count(Entry))
273 Hasher.update(
274 ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
275 }
276
277 sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
278 }
279
280 // Access the path to this entry in the cache.
getEntryPath()281 StringRef getEntryPath() { return EntryPath; }
282
283 // Try loading the buffer for this cache entry.
tryLoadingBuffer()284 ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
285 if (EntryPath.empty())
286 return std::error_code();
287 return MemoryBuffer::getFile(EntryPath);
288 }
289
290 // Cache the Produced object file
291 std::unique_ptr<MemoryBuffer>
write(std::unique_ptr<MemoryBuffer> OutputBuffer)292 write(std::unique_ptr<MemoryBuffer> OutputBuffer) {
293 if (EntryPath.empty())
294 return OutputBuffer;
295
296 // Write to a temporary to avoid race condition
297 SmallString<128> TempFilename;
298 int TempFD;
299 std::error_code EC =
300 sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
301 if (EC) {
302 errs() << "Error: " << EC.message() << "\n";
303 report_fatal_error("ThinLTO: Can't get a temporary file");
304 }
305 {
306 raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
307 OS << OutputBuffer->getBuffer();
308 }
309 // Rename to final destination (hopefully race condition won't matter here)
310 EC = sys::fs::rename(TempFilename, EntryPath);
311 if (EC) {
312 sys::fs::remove(TempFilename);
313 raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
314 if (EC)
315 report_fatal_error(Twine("Failed to open ") + EntryPath +
316 " to save cached entry\n");
317 OS << OutputBuffer->getBuffer();
318 }
319 auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath);
320 if (auto EC = ReloadedBufferOrErr.getError()) {
321 // FIXME diagnose
322 errs() << "error: can't reload cached file '" << EntryPath
323 << "': " << EC.message() << "\n";
324 return OutputBuffer;
325 }
326 return std::move(*ReloadedBufferOrErr);
327 }
328 };
329
330 static std::unique_ptr<MemoryBuffer>
ProcessThinLTOModule(Module & TheModule,ModuleSummaryIndex & Index,StringMap<MemoryBufferRef> & ModuleMap,TargetMachine & TM,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,const GVSummaryMapTy & DefinedGlobals,const ThinLTOCodeGenerator::CachingOptions & CacheOptions,bool DisableCodeGen,StringRef SaveTempsDir,unsigned count)331 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
332 StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
333 const FunctionImporter::ImportMapTy &ImportList,
334 const FunctionImporter::ExportSetTy &ExportList,
335 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
336 const GVSummaryMapTy &DefinedGlobals,
337 const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
338 bool DisableCodeGen, StringRef SaveTempsDir,
339 unsigned count) {
340
341 // "Benchmark"-like optimization: single-source case
342 bool SingleModule = (ModuleMap.size() == 1);
343
344 if (!SingleModule) {
345 promoteModule(TheModule, Index);
346
347 // Apply summary-based LinkOnce/Weak resolution decisions.
348 thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals);
349
350 // Save temps: after promotion.
351 saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
352 }
353
354 // Be friendly and don't nuke totally the module when the client didn't
355 // supply anything to preserve.
356 if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) {
357 // Apply summary-based internalization decisions.
358 thinLTOInternalizeModule(TheModule, DefinedGlobals);
359 }
360
361 // Save internalized bitcode
362 saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc");
363
364 if (!SingleModule) {
365 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
366
367 // Save temps: after cross-module import.
368 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
369 }
370
371 optimizeModule(TheModule, TM);
372
373 saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
374
375 if (DisableCodeGen) {
376 // Configured to stop before CodeGen, serialize the bitcode and return.
377 SmallVector<char, 128> OutputBuffer;
378 {
379 raw_svector_ostream OS(OutputBuffer);
380 ModuleSummaryIndexBuilder IndexBuilder(&TheModule);
381 WriteBitcodeToFile(&TheModule, OS, true, &IndexBuilder.getIndex());
382 }
383 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
384 }
385
386 return codegenModule(TheModule, TM);
387 }
388
389 /// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map
390 /// for caching, and in the \p Index for application during the ThinLTO
391 /// backends. This is needed for correctness for exported symbols (ensure
392 /// at least one copy kept) and a compile-time optimization (to drop duplicate
393 /// copies when possible).
resolveWeakForLinkerInIndex(ModuleSummaryIndex & Index,StringMap<std::map<GlobalValue::GUID,GlobalValue::LinkageTypes>> & ResolvedODR)394 static void resolveWeakForLinkerInIndex(
395 ModuleSummaryIndex &Index,
396 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
397 &ResolvedODR) {
398
399 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
400 computePrevailingCopies(Index, PrevailingCopy);
401
402 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
403 const auto &Prevailing = PrevailingCopy.find(GUID);
404 // Not in map means that there was only one copy, which must be prevailing.
405 if (Prevailing == PrevailingCopy.end())
406 return true;
407 return Prevailing->second == S;
408 };
409
410 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
411 GlobalValue::GUID GUID,
412 GlobalValue::LinkageTypes NewLinkage) {
413 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
414 };
415
416 thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage);
417 }
418
419 // Initialize the TargetMachine builder for a given Triple
initTMBuilder(TargetMachineBuilder & TMBuilder,const Triple & TheTriple)420 static void initTMBuilder(TargetMachineBuilder &TMBuilder,
421 const Triple &TheTriple) {
422 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
423 // FIXME this looks pretty terrible...
424 if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
425 if (TheTriple.getArch() == llvm::Triple::x86_64)
426 TMBuilder.MCpu = "core2";
427 else if (TheTriple.getArch() == llvm::Triple::x86)
428 TMBuilder.MCpu = "yonah";
429 else if (TheTriple.getArch() == llvm::Triple::aarch64)
430 TMBuilder.MCpu = "cyclone";
431 }
432 TMBuilder.TheTriple = std::move(TheTriple);
433 }
434
435 } // end anonymous namespace
436
addModule(StringRef Identifier,StringRef Data)437 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
438 MemoryBufferRef Buffer(Data, Identifier);
439 if (Modules.empty()) {
440 // First module added, so initialize the triple and some options
441 LLVMContext Context;
442 Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
443 initTMBuilder(TMBuilder, Triple(TheTriple));
444 }
445 #ifndef NDEBUG
446 else {
447 LLVMContext Context;
448 assert(TMBuilder.TheTriple.str() ==
449 getBitcodeTargetTriple(Buffer, Context) &&
450 "ThinLTO modules with different triple not supported");
451 }
452 #endif
453 Modules.push_back(Buffer);
454 }
455
preserveSymbol(StringRef Name)456 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
457 PreservedSymbols.insert(Name);
458 }
459
crossReferenceSymbol(StringRef Name)460 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
461 // FIXME: At the moment, we don't take advantage of this extra information,
462 // we're conservatively considering cross-references as preserved.
463 // CrossReferencedSymbols.insert(Name);
464 PreservedSymbols.insert(Name);
465 }
466
467 // TargetMachine factory
create() const468 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
469 std::string ErrMsg;
470 const Target *TheTarget =
471 TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
472 if (!TheTarget) {
473 report_fatal_error("Can't load target for this Triple: " + ErrMsg);
474 }
475
476 // Use MAttr as the default set of features.
477 SubtargetFeatures Features(MAttr);
478 Features.getDefaultSubtargetFeatures(TheTriple);
479 std::string FeatureStr = Features.getString();
480 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
481 TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
482 CodeModel::Default, CGOptLevel));
483 }
484
485 /**
486 * Produce the combined summary index from all the bitcode files:
487 * "thin-link".
488 */
linkCombinedIndex()489 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
490 std::unique_ptr<ModuleSummaryIndex> CombinedIndex;
491 uint64_t NextModuleId = 0;
492 for (auto &ModuleBuffer : Modules) {
493 ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
494 object::ModuleSummaryIndexObjectFile::create(ModuleBuffer,
495 diagnosticHandler);
496 if (std::error_code EC = ObjOrErr.getError()) {
497 // FIXME diagnose
498 errs() << "error: can't create ModuleSummaryIndexObjectFile for buffer: "
499 << EC.message() << "\n";
500 return nullptr;
501 }
502 auto Index = (*ObjOrErr)->takeIndex();
503 if (CombinedIndex) {
504 CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
505 } else {
506 CombinedIndex = std::move(Index);
507 }
508 }
509 return CombinedIndex;
510 }
511
512 /**
513 * Perform promotion and renaming of exported internal functions.
514 * Index is updated to reflect linkage changes from weak resolution.
515 */
promote(Module & TheModule,ModuleSummaryIndex & Index)516 void ThinLTOCodeGenerator::promote(Module &TheModule,
517 ModuleSummaryIndex &Index) {
518 auto ModuleCount = Index.modulePaths().size();
519 auto ModuleIdentifier = TheModule.getModuleIdentifier();
520 // Collect for each module the list of function it defines (GUID -> Summary).
521 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
522 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
523
524 // Generate import/export list
525 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
526 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
527 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
528 ExportLists);
529
530 // Resolve LinkOnce/Weak symbols.
531 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
532 resolveWeakForLinkerInIndex(Index, ResolvedODR);
533
534 thinLTOResolveWeakForLinkerModule(
535 TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
536
537 promoteModule(TheModule, Index);
538 }
539
540 /**
541 * Perform cross-module importing for the module identified by ModuleIdentifier.
542 */
crossModuleImport(Module & TheModule,ModuleSummaryIndex & Index)543 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
544 ModuleSummaryIndex &Index) {
545 auto ModuleMap = generateModuleMap(Modules);
546 auto ModuleCount = Index.modulePaths().size();
547
548 // Collect for each module the list of function it defines (GUID -> Summary).
549 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
550 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
551
552 // Generate import/export list
553 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
554 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
555 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
556 ExportLists);
557 auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
558
559 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
560 }
561
562 /**
563 * Compute the list of summaries needed for importing into module.
564 */
gatherImportedSummariesForModule(StringRef ModulePath,ModuleSummaryIndex & Index,std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex)565 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
566 StringRef ModulePath, ModuleSummaryIndex &Index,
567 std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
568 auto ModuleCount = Index.modulePaths().size();
569
570 // Collect for each module the list of function it defines (GUID -> Summary).
571 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
572 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
573
574 // Generate import/export list
575 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
576 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
577 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
578 ExportLists);
579
580 llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
581 ImportLists,
582 ModuleToSummariesForIndex);
583 }
584
585 /**
586 * Emit the list of files needed for importing into module.
587 */
emitImports(StringRef ModulePath,StringRef OutputName,ModuleSummaryIndex & Index)588 void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
589 StringRef OutputName,
590 ModuleSummaryIndex &Index) {
591 auto ModuleCount = Index.modulePaths().size();
592
593 // Collect for each module the list of function it defines (GUID -> Summary).
594 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
595 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
596
597 // Generate import/export list
598 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
599 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
600 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
601 ExportLists);
602
603 std::error_code EC;
604 if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists)))
605 report_fatal_error(Twine("Failed to open ") + OutputName +
606 " to save imports lists\n");
607 }
608
609 /**
610 * Perform internalization. Index is updated to reflect linkage changes.
611 */
internalize(Module & TheModule,ModuleSummaryIndex & Index)612 void ThinLTOCodeGenerator::internalize(Module &TheModule,
613 ModuleSummaryIndex &Index) {
614 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
615 auto ModuleCount = Index.modulePaths().size();
616 auto ModuleIdentifier = TheModule.getModuleIdentifier();
617
618 // Convert the preserved symbols set from string to GUID
619 auto GUIDPreservedSymbols =
620 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
621
622 // Collect for each module the list of function it defines (GUID -> Summary).
623 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
624 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
625
626 // Generate import/export list
627 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
628 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
629 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
630 ExportLists);
631 auto &ExportList = ExportLists[ModuleIdentifier];
632
633 // Be friendly and don't nuke totally the module when the client didn't
634 // supply anything to preserve.
635 if (ExportList.empty() && GUIDPreservedSymbols.empty())
636 return;
637
638 // Internalization
639 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
640 const auto &ExportList = ExportLists.find(ModuleIdentifier);
641 return (ExportList != ExportLists.end() &&
642 ExportList->second.count(GUID)) ||
643 GUIDPreservedSymbols.count(GUID);
644 };
645 thinLTOInternalizeAndPromoteInIndex(Index, isExported);
646 thinLTOInternalizeModule(TheModule,
647 ModuleToDefinedGVSummaries[ModuleIdentifier]);
648 }
649
650 /**
651 * Perform post-importing ThinLTO optimizations.
652 */
optimize(Module & TheModule)653 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
654 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
655
656 // Optimize now
657 optimizeModule(TheModule, *TMBuilder.create());
658 }
659
660 /**
661 * Perform ThinLTO CodeGen.
662 */
codegen(Module & TheModule)663 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
664 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
665 return codegenModule(TheModule, *TMBuilder.create());
666 }
667
668 // Main entry point for the ThinLTO processing
run()669 void ThinLTOCodeGenerator::run() {
670 if (CodeGenOnly) {
671 // Perform only parallel codegen and return.
672 ThreadPool Pool;
673 assert(ProducedBinaries.empty() && "The generator should not be reused");
674 ProducedBinaries.resize(Modules.size());
675 int count = 0;
676 for (auto &ModuleBuffer : Modules) {
677 Pool.async([&](int count) {
678 LLVMContext Context;
679 Context.setDiscardValueNames(LTODiscardValueNames);
680
681 // Parse module now
682 auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
683
684 // CodeGen
685 ProducedBinaries[count] = codegen(*TheModule);
686 }, count++);
687 }
688
689 return;
690 }
691
692 // Sequential linking phase
693 auto Index = linkCombinedIndex();
694
695 // Save temps: index.
696 if (!SaveTempsDir.empty()) {
697 auto SaveTempPath = SaveTempsDir + "index.bc";
698 std::error_code EC;
699 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
700 if (EC)
701 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
702 " to save optimized bitcode\n");
703 WriteIndexToFile(*Index, OS);
704 }
705
706 // Prepare the resulting object vector
707 assert(ProducedBinaries.empty() && "The generator should not be reused");
708 ProducedBinaries.resize(Modules.size());
709
710 // Prepare the module map.
711 auto ModuleMap = generateModuleMap(Modules);
712 auto ModuleCount = Modules.size();
713
714 // Collect for each module the list of function it defines (GUID -> Summary).
715 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
716 Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
717
718 // Collect the import/export lists for all modules from the call-graph in the
719 // combined index.
720 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
721 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
722 ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
723 ExportLists);
724
725 // Convert the preserved symbols set from string to GUID, this is needed for
726 // computing the caching hash and the internalization.
727 auto GUIDPreservedSymbols =
728 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
729
730 // We use a std::map here to be able to have a defined ordering when
731 // producing a hash for the cache entry.
732 // FIXME: we should be able to compute the caching hash for the entry based
733 // on the index, and nuke this map.
734 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
735
736 // Resolve LinkOnce/Weak symbols, this has to be computed early because it
737 // impacts the caching.
738 resolveWeakForLinkerInIndex(*Index, ResolvedODR);
739
740 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
741 const auto &ExportList = ExportLists.find(ModuleIdentifier);
742 return (ExportList != ExportLists.end() &&
743 ExportList->second.count(GUID)) ||
744 GUIDPreservedSymbols.count(GUID);
745 };
746
747 // Use global summary-based analysis to identify symbols that can be
748 // internalized (because they aren't exported or preserved as per callback).
749 // Changes are made in the index, consumed in the ThinLTO backends.
750 thinLTOInternalizeAndPromoteInIndex(*Index, isExported);
751
752 // Make sure that every module has an entry in the ExportLists and
753 // ResolvedODR maps to enable threaded access to these maps below.
754 for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
755 ExportLists[DefinedGVSummaries.first()];
756 ResolvedODR[DefinedGVSummaries.first()];
757 }
758
759 // Compute the ordering we will process the inputs: the rough heuristic here
760 // is to sort them per size so that the largest module get schedule as soon as
761 // possible. This is purely a compile-time optimization.
762 std::vector<int> ModulesOrdering;
763 ModulesOrdering.resize(Modules.size());
764 std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
765 std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
766 [&](int LeftIndex, int RightIndex) {
767 auto LSize = Modules[LeftIndex].getBufferSize();
768 auto RSize = Modules[RightIndex].getBufferSize();
769 return LSize > RSize;
770 });
771
772 // Parallel optimizer + codegen
773 {
774 ThreadPool Pool(ThreadCount);
775 for (auto IndexCount : ModulesOrdering) {
776 auto &ModuleBuffer = Modules[IndexCount];
777 Pool.async([&](int count) {
778 auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
779 auto &ExportList = ExportLists[ModuleIdentifier];
780
781 auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
782
783 // The module may be cached, this helps handling it.
784 ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
785 ImportLists[ModuleIdentifier], ExportList,
786 ResolvedODR[ModuleIdentifier],
787 DefinedFunctions, GUIDPreservedSymbols);
788
789 {
790 auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
791 DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
792 << CacheEntry.getEntryPath() << "' for buffer " << count
793 << " " << ModuleIdentifier << "\n");
794
795 if (ErrOrBuffer) {
796 // Cache Hit!
797 ProducedBinaries[count] = std::move(ErrOrBuffer.get());
798 return;
799 }
800 }
801
802 LLVMContext Context;
803 Context.setDiscardValueNames(LTODiscardValueNames);
804 Context.enableDebugTypeODRUniquing();
805
806 // Parse module now
807 auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
808
809 // Save temps: original file.
810 saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
811
812 auto &ImportList = ImportLists[ModuleIdentifier];
813 // Run the main process now, and generates a binary
814 auto OutputBuffer = ProcessThinLTOModule(
815 *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
816 ExportList, GUIDPreservedSymbols,
817 ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
818 DisableCodeGen, SaveTempsDir, count);
819
820 OutputBuffer = CacheEntry.write(std::move(OutputBuffer));
821 ProducedBinaries[count] = std::move(OutputBuffer);
822 }, IndexCount);
823 }
824 }
825
826 CachePruning(CacheOptions.Path)
827 .setPruningInterval(CacheOptions.PruningInterval)
828 .setEntryExpiration(CacheOptions.Expiration)
829 .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace)
830 .prune();
831
832 // If statistics were requested, print them out now.
833 if (llvm::AreStatisticsEnabled())
834 llvm::PrintStatistics();
835 }
836