• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- SplitModule.cpp - Split a module into partitions -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the function llvm::SplitModule, which splits a module
11 // into multiple linkable partitions. It can be used to implement parallel code
12 // generation for link-time optimization.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SplitModule.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/EquivalenceClasses.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/Comdat.h"
23 #include "llvm/IR/Constant.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/GlobalAlias.h"
27 #include "llvm/IR/GlobalObject.h"
28 #include "llvm/IR/GlobalIndirectSymbol.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/GlobalVariable.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/User.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/ErrorHandling.h"
38 #include "llvm/Support/MD5.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Utils/Cloning.h"
41 #include "llvm/Transforms/Utils/ValueMapper.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <iterator>
45 #include <memory>
46 #include <queue>
47 #include <utility>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "split-module"
53 
54 namespace {
55 
56 using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
57 using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
58 using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
59 
60 } // end anonymous namespace
61 
addNonConstUser(ClusterMapType & GVtoClusterMap,const GlobalValue * GV,const User * U)62 static void addNonConstUser(ClusterMapType &GVtoClusterMap,
63                             const GlobalValue *GV, const User *U) {
64   assert((!isa<Constant>(U) || isa<GlobalValue>(U)) && "Bad user");
65 
66   if (const Instruction *I = dyn_cast<Instruction>(U)) {
67     const GlobalValue *F = I->getParent()->getParent();
68     GVtoClusterMap.unionSets(GV, F);
69   } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
70              isa<GlobalVariable>(U)) {
71     GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
72   } else {
73     llvm_unreachable("Underimplemented use case");
74   }
75 }
76 
77 // Adds all GlobalValue users of V to the same cluster as GV.
addAllGlobalValueUsers(ClusterMapType & GVtoClusterMap,const GlobalValue * GV,const Value * V)78 static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
79                                    const GlobalValue *GV, const Value *V) {
80   for (auto *U : V->users()) {
81     SmallVector<const User *, 4> Worklist;
82     Worklist.push_back(U);
83     while (!Worklist.empty()) {
84       const User *UU = Worklist.pop_back_val();
85       // For each constant that is not a GV (a pure const) recurse.
86       if (isa<Constant>(UU) && !isa<GlobalValue>(UU)) {
87         Worklist.append(UU->user_begin(), UU->user_end());
88         continue;
89       }
90       addNonConstUser(GVtoClusterMap, GV, UU);
91     }
92   }
93 }
94 
95 // Find partitions for module in the way that no locals need to be
96 // globalized.
97 // Try to balance pack those partitions into N files since this roughly equals
98 // thread balancing for the backend codegen step.
findPartitions(Module * M,ClusterIDMapType & ClusterIDMap,unsigned N)99 static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,
100                            unsigned N) {
101   // At this point module should have the proper mix of globals and locals.
102   // As we attempt to partition this module, we must not change any
103   // locals to globals.
104   LLVM_DEBUG(dbgs() << "Partition module with (" << M->size()
105                     << ")functions\n");
106   ClusterMapType GVtoClusterMap;
107   ComdatMembersType ComdatMembers;
108 
109   auto recordGVSet = [&GVtoClusterMap, &ComdatMembers](GlobalValue &GV) {
110     if (GV.isDeclaration())
111       return;
112 
113     if (!GV.hasName())
114       GV.setName("__llvmsplit_unnamed");
115 
116     // Comdat groups must not be partitioned. For comdat groups that contain
117     // locals, record all their members here so we can keep them together.
118     // Comdat groups that only contain external globals are already handled by
119     // the MD5-based partitioning.
120     if (const Comdat *C = GV.getComdat()) {
121       auto &Member = ComdatMembers[C];
122       if (Member)
123         GVtoClusterMap.unionSets(Member, &GV);
124       else
125         Member = &GV;
126     }
127 
128     // For aliases we should not separate them from their aliasees regardless
129     // of linkage.
130     if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
131       if (const GlobalObject *Base = GIS->getBaseObject())
132         GVtoClusterMap.unionSets(&GV, Base);
133     }
134 
135     if (const Function *F = dyn_cast<Function>(&GV)) {
136       for (const BasicBlock &BB : *F) {
137         BlockAddress *BA = BlockAddress::lookup(&BB);
138         if (!BA || !BA->isConstantUsed())
139           continue;
140         addAllGlobalValueUsers(GVtoClusterMap, F, BA);
141       }
142     }
143 
144     if (GV.hasLocalLinkage())
145       addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV);
146   };
147 
148   llvm::for_each(M->functions(), recordGVSet);
149   llvm::for_each(M->globals(), recordGVSet);
150   llvm::for_each(M->aliases(), recordGVSet);
151 
152   // Assigned all GVs to merged clusters while balancing number of objects in
153   // each.
154   auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
155                             const std::pair<unsigned, unsigned> &b) {
156     if (a.second || b.second)
157       return a.second > b.second;
158     else
159       return a.first > b.first;
160   };
161 
162   std::priority_queue<std::pair<unsigned, unsigned>,
163                       std::vector<std::pair<unsigned, unsigned>>,
164                       decltype(CompareClusters)>
165       BalancinQueue(CompareClusters);
166   // Pre-populate priority queue with N slot blanks.
167   for (unsigned i = 0; i < N; ++i)
168     BalancinQueue.push(std::make_pair(i, 0));
169 
170   using SortType = std::pair<unsigned, ClusterMapType::iterator>;
171 
172   SmallVector<SortType, 64> Sets;
173   SmallPtrSet<const GlobalValue *, 32> Visited;
174 
175   // To guarantee determinism, we have to sort SCC according to size.
176   // When size is the same, use leader's name.
177   for (ClusterMapType::iterator I = GVtoClusterMap.begin(),
178                                 E = GVtoClusterMap.end(); I != E; ++I)
179     if (I->isLeader())
180       Sets.push_back(
181           std::make_pair(std::distance(GVtoClusterMap.member_begin(I),
182                                        GVtoClusterMap.member_end()), I));
183 
184   llvm::sort(Sets.begin(), Sets.end(),
185              [](const SortType &a, const SortType &b) {
186                if (a.first == b.first)
187                  return a.second->getData()->getName() >
188                         b.second->getData()->getName();
189                else
190                  return a.first > b.first;
191              });
192 
193   for (auto &I : Sets) {
194     unsigned CurrentClusterID = BalancinQueue.top().first;
195     unsigned CurrentClusterSize = BalancinQueue.top().second;
196     BalancinQueue.pop();
197 
198     LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size("
199                       << I.first << ") ----> " << I.second->getData()->getName()
200                       << "\n");
201 
202     for (ClusterMapType::member_iterator MI =
203              GVtoClusterMap.findLeader(I.second);
204          MI != GVtoClusterMap.member_end(); ++MI) {
205       if (!Visited.insert(*MI).second)
206         continue;
207       LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName()
208                         << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");
209       Visited.insert(*MI);
210       ClusterIDMap[*MI] = CurrentClusterID;
211       CurrentClusterSize++;
212     }
213     // Add this set size to the number of entries in this cluster.
214     BalancinQueue.push(std::make_pair(CurrentClusterID, CurrentClusterSize));
215   }
216 }
217 
externalize(GlobalValue * GV)218 static void externalize(GlobalValue *GV) {
219   if (GV->hasLocalLinkage()) {
220     GV->setLinkage(GlobalValue::ExternalLinkage);
221     GV->setVisibility(GlobalValue::HiddenVisibility);
222   }
223 
224   // Unnamed entities must be named consistently between modules. setName will
225   // give a distinct name to each such entity.
226   if (!GV->hasName())
227     GV->setName("__llvmsplit_unnamed");
228 }
229 
230 // Returns whether GV should be in partition (0-based) I of N.
isInPartition(const GlobalValue * GV,unsigned I,unsigned N)231 static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
232   if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
233     if (const GlobalObject *Base = GIS->getBaseObject())
234       GV = Base;
235 
236   StringRef Name;
237   if (const Comdat *C = GV->getComdat())
238     Name = C->getName();
239   else
240     Name = GV->getName();
241 
242   // Partition by MD5 hash. We only need a few bits for evenness as the number
243   // of partitions will generally be in the 1-2 figure range; the low 16 bits
244   // are enough.
245   MD5 H;
246   MD5::MD5Result R;
247   H.update(Name);
248   H.final(R);
249   return (R[0] | (R[1] << 8)) % N == I;
250 }
251 
SplitModule(std::unique_ptr<Module> M,unsigned N,function_ref<void (std::unique_ptr<Module> MPart)> ModuleCallback,bool PreserveLocals)252 void llvm::SplitModule(
253     std::unique_ptr<Module> M, unsigned N,
254     function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
255     bool PreserveLocals) {
256   if (!PreserveLocals) {
257     for (Function &F : *M)
258       externalize(&F);
259     for (GlobalVariable &GV : M->globals())
260       externalize(&GV);
261     for (GlobalAlias &GA : M->aliases())
262       externalize(&GA);
263     for (GlobalIFunc &GIF : M->ifuncs())
264       externalize(&GIF);
265   }
266 
267   // This performs splitting without a need for externalization, which might not
268   // always be possible.
269   ClusterIDMapType ClusterIDMap;
270   findPartitions(M.get(), ClusterIDMap, N);
271 
272   // FIXME: We should be able to reuse M as the last partition instead of
273   // cloning it.
274   for (unsigned I = 0; I < N; ++I) {
275     ValueToValueMapTy VMap;
276     std::unique_ptr<Module> MPart(
277         CloneModule(*M, VMap, [&](const GlobalValue *GV) {
278           if (ClusterIDMap.count(GV))
279             return (ClusterIDMap[GV] == I);
280           else
281             return isInPartition(GV, I, N);
282         }));
283     if (I != 0)
284       MPart->setModuleInlineAsm("");
285     ModuleCallback(std::move(MPart));
286   }
287 }
288