1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Defines aspects of the compilation that persist across multiple
12 /// functions.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "IceGlobalContext.h"
17
18 #include "IceCfg.h"
19 #include "IceCfgNode.h"
20 #include "IceClFlags.h"
21 #include "IceDefs.h"
22 #include "IceELFObjectWriter.h"
23 #include "IceGlobalInits.h"
24 #include "IceLiveness.h"
25 #include "IceOperand.h"
26 #include "IceRevision.h"
27 #include "IceTargetLowering.h"
28 #include "IceTimerTree.h"
29 #include "IceTypes.def"
30 #include "IceTypes.h"
31
32 #ifdef __clang__
33 #pragma clang diagnostic push
34 #pragma clang diagnostic ignored "-Wunused-parameter"
35 #endif // __clang__
36
37 #include "llvm/Support/Timer.h"
38
39 #ifdef __clang__
40 #pragma clang diagnostic pop
41 #endif // __clang__
42
43 #include <algorithm> // max()
44
45 namespace std {
46 template <> struct hash<Ice::RelocatableTuple> {
operator ()std::hash47 size_t operator()(const Ice::RelocatableTuple &Key) const {
48 // Use the relocatable's name, plus the hash of a combination of the number
49 // of OffsetExprs and the known, fixed offset for the reloc. We left shift
50 // the known relocatable by 5 trying to minimize the interaction between the
51 // bits in OffsetExpr.size() and Key.Offset.
52 return hash<Ice::SizeT>()(Key.Name.getID()) +
53 hash<std::size_t>()(Key.OffsetExpr.size() + (Key.Offset << 5));
54 }
55 };
56 } // end of namespace std
57
58 namespace Ice {
59
60 namespace {
61
62 // Define the key comparison function for the constant pool's unordered_map,
63 // but only for key types of interest: integer types, floating point types, and
64 // the special RelocatableTuple.
65 template <typename KeyType, class Enable = void> struct KeyCompare {};
66
67 template <typename KeyType>
68 struct KeyCompare<KeyType,
69 typename std::enable_if<
70 std::is_integral<KeyType>::value ||
71 std::is_same<KeyType, RelocatableTuple>::value>::type> {
operator ()Ice::__anon7a06662c0111::KeyCompare72 bool operator()(const KeyType &Value1, const KeyType &Value2) const {
73 return Value1 == Value2;
74 }
75 };
76 template <typename KeyType>
77 struct KeyCompare<KeyType, typename std::enable_if<
78 std::is_floating_point<KeyType>::value>::type> {
operator ()Ice::__anon7a06662c0111::KeyCompare79 bool operator()(const KeyType &Value1, const KeyType &Value2) const {
80 return !memcmp(&Value1, &Value2, sizeof(KeyType));
81 }
82 };
83
84 // Define a key comparison function for sorting the constant pool's values
85 // after they are dumped to a vector. This covers integer types, floating point
86 // types, and ConstantRelocatable values.
87 template <typename ValueType, class Enable = void> struct KeyCompareLess {};
88
89 template <typename ValueType>
90 struct KeyCompareLess<ValueType,
91 typename std::enable_if<std::is_floating_point<
92 typename ValueType::PrimType>::value>::type> {
operator ()Ice::__anon7a06662c0111::KeyCompareLess93 bool operator()(const Constant *Const1, const Constant *Const2) const {
94 using CompareType = uint64_t;
95 static_assert(sizeof(typename ValueType::PrimType) <= sizeof(CompareType),
96 "Expected floating-point type of width 64-bit or less");
97 typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue();
98 typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue();
99 // We avoid "V1<V2" because of NaN.
100 // We avoid "memcmp(&V1,&V2,sizeof(V1))<0" which depends on the
101 // endian-ness of the host system running Subzero.
102 // Instead, compare the result of bit_cast to uint64_t.
103 uint64_t I1 = 0, I2 = 0;
104 memcpy(&I1, &V1, sizeof(V1));
105 memcpy(&I2, &V2, sizeof(V2));
106 return I1 < I2;
107 }
108 };
109 template <typename ValueType>
110 struct KeyCompareLess<ValueType,
111 typename std::enable_if<std::is_integral<
112 typename ValueType::PrimType>::value>::type> {
operator ()Ice::__anon7a06662c0111::KeyCompareLess113 bool operator()(const Constant *Const1, const Constant *Const2) const {
114 typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue();
115 typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue();
116 return V1 < V2;
117 }
118 };
119 template <typename ValueType>
120 struct KeyCompareLess<
121 ValueType, typename std::enable_if<
122 std::is_same<ValueType, ConstantRelocatable>::value>::type> {
operator ()Ice::__anon7a06662c0111::KeyCompareLess123 bool operator()(const Constant *Const1, const Constant *Const2) const {
124 auto *V1 = llvm::cast<ValueType>(Const1);
125 auto *V2 = llvm::cast<ValueType>(Const2);
126 if (V1->getName() == V2->getName())
127 return V1->getOffset() < V2->getOffset();
128 return V1->getName() < V2->getName();
129 }
130 };
131
132 // TypePool maps constants of type KeyType (e.g. float) to pointers to
133 // type ValueType (e.g. ConstantFloat).
134 template <Type Ty, typename KeyType, typename ValueType> class TypePool {
135 TypePool(const TypePool &) = delete;
136 TypePool &operator=(const TypePool &) = delete;
137
138 public:
139 TypePool() = default;
getOrAdd(GlobalContext * Ctx,KeyType Key)140 ValueType *getOrAdd(GlobalContext *Ctx, KeyType Key) {
141 auto Iter = Pool.find(Key);
142 if (Iter != Pool.end()) {
143 Iter->second->updateLookupCount();
144 return Iter->second;
145 }
146 auto *Result = ValueType::create(Ctx, Ty, Key);
147 Pool[Key] = Result;
148 Result->updateLookupCount();
149 return Result;
150 }
getConstantPool() const151 ConstantList getConstantPool() const {
152 ConstantList Constants;
153 Constants.reserve(Pool.size());
154 for (auto &I : Pool)
155 Constants.push_back(I.second);
156 // The sort (and its KeyCompareLess machinery) is not strictly necessary,
157 // but is desirable for producing output that is deterministic across
158 // unordered_map::iterator implementations.
159 std::sort(Constants.begin(), Constants.end(), KeyCompareLess<ValueType>());
160 return Constants;
161 }
size() const162 size_t size() const { return Pool.size(); }
163
164 private:
165 // Use the default hash function, and a custom key comparison function. The
166 // key comparison function for floating point variables can't use the default
167 // == based implementation because of special C++ semantics regarding +0.0,
168 // -0.0, and NaN comparison. However, it's OK to use the default hash for
169 // floating point values because KeyCompare is the final source of truth - in
170 // the worst case a "false" collision must be resolved.
171 using ContainerType =
172 std::unordered_map<KeyType, ValueType *, std::hash<KeyType>,
173 KeyCompare<KeyType>>;
174 ContainerType Pool;
175 };
176
177 // UndefPool maps ICE types to the corresponding ConstantUndef values.
178 class UndefPool {
179 UndefPool(const UndefPool &) = delete;
180 UndefPool &operator=(const UndefPool &) = delete;
181
182 public:
UndefPool()183 UndefPool() : Pool(IceType_NUM) {}
184
getOrAdd(GlobalContext * Ctx,Type Ty)185 ConstantUndef *getOrAdd(GlobalContext *Ctx, Type Ty) {
186 if (Pool[Ty] == nullptr)
187 Pool[Ty] = ConstantUndef::create(Ctx, Ty);
188 return Pool[Ty];
189 }
190
191 private:
192 std::vector<ConstantUndef *> Pool;
193 };
194
195 } // end of anonymous namespace
196
197 // The global constant pool bundles individual pools of each type of
198 // interest.
199 class ConstantPool {
200 ConstantPool(const ConstantPool &) = delete;
201 ConstantPool &operator=(const ConstantPool &) = delete;
202
203 public:
204 ConstantPool() = default;
205 TypePool<IceType_f32, float, ConstantFloat> Floats;
206 TypePool<IceType_f64, double, ConstantDouble> Doubles;
207 TypePool<IceType_i1, int8_t, ConstantInteger32> Integers1;
208 TypePool<IceType_i8, int8_t, ConstantInteger32> Integers8;
209 TypePool<IceType_i16, int16_t, ConstantInteger32> Integers16;
210 TypePool<IceType_i32, int32_t, ConstantInteger32> Integers32;
211 TypePool<IceType_i64, int64_t, ConstantInteger64> Integers64;
212 TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> Relocatables;
213 TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable>
214 ExternRelocatables;
215 UndefPool Undefs;
216 };
217
waitForWorkerThreads()218 void GlobalContext::waitForWorkerThreads() {
219 if (WaitForWorkerThreadsCalled.exchange(true))
220 return;
221 optQueueNotifyEnd();
222 for (std::thread &Worker : TranslationThreads) {
223 Worker.join();
224 }
225 TranslationThreads.clear();
226
227 // Only notify the emit queue to end after all the translation threads have
228 // ended.
229 emitQueueNotifyEnd();
230 for (std::thread &Worker : EmitterThreads) {
231 Worker.join();
232 }
233 EmitterThreads.clear();
234
235 if (BuildDefs::timers()) {
236 auto Timers = getTimers();
237 for (ThreadContext *TLS : AllThreadContexts)
238 Timers->mergeFrom(TLS->Timers);
239 }
240 if (BuildDefs::dump()) {
241 // Do a separate loop over AllThreadContexts to avoid holding two locks at
242 // once.
243 auto Stats = getStatsCumulative();
244 for (ThreadContext *TLS : AllThreadContexts)
245 Stats->add(TLS->StatsCumulative);
246 }
247 }
248
dump(const Cfg * Func,GlobalContext * Ctx)249 void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) {
250 if (!BuildDefs::dump())
251 return;
252 OstreamLocker _(Ctx);
253 Ostream &Str = Ctx->getStrDump();
254 const std::string Name =
255 (Func == nullptr ? "_FINAL_" : Func->getFunctionNameAndSize());
256 #define X(str, tag) \
257 Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n";
258 CODESTATS_TABLE
259 #undef X
260 Str << "|" << Name << "|Spills+Fills|"
261 << Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n";
262 Str << "|" << Name << "|Memory Usage |";
263 if (const auto MemUsed = static_cast<size_t>(
264 llvm::TimeRecord::getCurrentTime(false).getMemUsed())) {
265 static constexpr size_t _1MB = 1024 * 1024;
266 Str << (MemUsed / _1MB) << " MB";
267 } else {
268 Str << "(requires '-track-memory')";
269 }
270 Str << "\n";
271 Str << "|" << Name << "|CPool Sizes ";
272 {
273 auto Pool = Ctx->getConstPool();
274 Str << "|f32=" << Pool->Floats.size();
275 Str << "|f64=" << Pool->Doubles.size();
276 Str << "|i1=" << Pool->Integers1.size();
277 Str << "|i8=" << Pool->Integers8.size();
278 Str << "|i16=" << Pool->Integers16.size();
279 Str << "|i32=" << Pool->Integers32.size();
280 Str << "|i64=" << Pool->Integers64.size();
281 Str << "|Rel=" << Pool->Relocatables.size();
282 Str << "|ExtRel=" << Pool->ExternRelocatables.size();
283 }
284 Str << "\n";
285 if (Func != nullptr) {
286 Str << "|" << Name << "|Cfg Memory |" << Func->getTotalMemoryMB()
287 << " MB\n";
288 Str << "|" << Name << "|Liveness Memory |" << Func->getLivenessMemoryMB()
289 << " MB\n";
290 }
291 }
292
293 namespace {
294
295 // By default, wake up the main parser thread when the OptQ gets half empty.
296 static constexpr size_t DefaultOptQWakeupSize = GlobalContext::MaxOptQSize >> 1;
297
298 } // end of anonymous namespace
299
GlobalContext(Ostream * OsDump,Ostream * OsEmit,Ostream * OsError,ELFStreamer * ELFStr)300 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
301 ELFStreamer *ELFStr)
302 : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(),
303 StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), ObjectWriter(),
304 OptQWakeupSize(std::max(DefaultOptQWakeupSize,
305 size_t(getFlags().getNumTranslationThreads()))),
306 OptQ(/*Sequential=*/getFlags().isSequential(),
307 /*MaxSize=*/
308 getFlags().isParseParallel()
309 ? MaxOptQSize
310 : getFlags().getNumTranslationThreads()),
311 // EmitQ is allowed unlimited size.
312 EmitQ(/*Sequential=*/getFlags().isSequential()),
313 DataLowering(TargetDataLowering::createLowering(this)) {
314 assert(OsDump && "OsDump is not defined for GlobalContext");
315 assert(OsEmit && "OsEmit is not defined for GlobalContext");
316 assert(OsError && "OsError is not defined for GlobalContext");
317 // Make sure thread_local fields are properly initialized before any
318 // accesses are made. Do this here instead of at the start of
319 // main() so that all clients (e.g. unit tests) can benefit for
320 // free.
321 GlobalContext::TlsInit();
322 Cfg::TlsInit();
323 Liveness::TlsInit();
324 // Create a new ThreadContext for the current thread. No need to
325 // lock AllThreadContexts at this point since no other threads have
326 // access yet to this GlobalContext object.
327 ThreadContext *MyTLS = new ThreadContext();
328 AllThreadContexts.push_back(MyTLS);
329 ICE_TLS_SET_FIELD(TLS, MyTLS);
330 // Pre-register built-in stack names.
331 if (BuildDefs::timers()) {
332 // TODO(stichnot): There needs to be a strong relationship between
333 // the newTimerStackID() return values and TSK_Default/TSK_Funcs.
334 newTimerStackID("Total across all functions");
335 newTimerStackID("Per-function summary");
336 }
337 Timers.initInto(MyTLS->Timers);
338 switch (getFlags().getOutFileType()) {
339 case FT_Elf:
340 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr));
341 break;
342 case FT_Asm:
343 case FT_Iasm:
344 break;
345 }
346 // Cache up front common constants.
347 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
348 ConstZeroForType[IceType_##tag] = getConstantZeroInternal(IceType_##tag);
349 ICETYPE_TABLE;
350 #undef X
351 ConstantTrue = getConstantInt1Internal(1);
352 // Define runtime helper functions.
353 #define X(Tag, Name) \
354 RuntimeHelperFunc[static_cast<size_t>(RuntimeHelper::H_##Tag)] = \
355 getConstantExternSym(getGlobalString(Name));
356 RUNTIME_HELPER_FUNCTIONS_TABLE
357 #undef X
358
359 TargetLowering::staticInit(this);
360
361 if (getFlags().getEmitRevision()) {
362 // Embed the Subzero revision into the compiled binary by creating a special
363 // global variable initialized with the revision string.
364 auto *Revision = VariableDeclaration::create(&Globals, true);
365 Revision->setName(this, "__Sz_revision");
366 Revision->setIsConstant(true);
367 const char *RevisionString = getSubzeroRevision();
368 Revision->addInitializer(VariableDeclaration::DataInitializer::create(
369 &Globals, RevisionString, 1 + strlen(RevisionString)));
370 Globals.push_back(Revision);
371 }
372 }
373
translateFunctionsWrapper(ThreadContext * MyTLS)374 void GlobalContext::translateFunctionsWrapper(ThreadContext *MyTLS) {
375 ICE_TLS_SET_FIELD(TLS, MyTLS);
376 translateFunctions();
377 }
378
translateFunctions()379 void GlobalContext::translateFunctions() {
380 TimerMarker Timer(TimerStack::TT_translateFunctions, this);
381 while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) {
382 std::unique_ptr<EmitterWorkItem> Item;
383 auto Func = OptItem->getParsedCfg();
384 // Install Func in TLS for Cfg-specific container allocators.
385 CfgLocalAllocatorScope _(Func.get());
386 // Reset per-function stats being accumulated in TLS.
387 resetStats();
388 // Set verbose level to none if the current function does NOT match the
389 // -verbose-focus command-line option.
390 if (!getFlags().matchVerboseFocusOn(Func->getFunctionName(),
391 Func->getSequenceNumber()))
392 Func->setVerbose(IceV_None);
393 // Disable translation if -notranslate is specified, or if the current
394 // function matches the -translate-only option. If translation is disabled,
395 // just dump the high-level IR and continue.
396 if (getFlags().getDisableTranslation() ||
397 !getFlags().matchTranslateOnly(Func->getFunctionName(),
398 Func->getSequenceNumber())) {
399 Func->dump();
400 // Add a dummy work item as a placeholder. This maintains sequence
401 // numbers so that the emitter thread will emit subsequent functions.
402 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber());
403 emitQueueBlockingPush(std::move(Item));
404 continue; // Func goes out of scope and gets deleted
405 }
406
407 Func->translate();
408 if (Func->hasError()) {
409 getErrorStatus()->assign(EC_Translation);
410 OstreamLocker L(this);
411 getStrError() << "ICE translation error: " << Func->getFunctionName()
412 << ": " << Func->getError() << ": "
413 << Func->getFunctionNameAndSize() << "\n";
414 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber());
415 } else {
416 Func->getAssembler<>()->setInternal(Func->getInternal());
417 switch (getFlags().getOutFileType()) {
418 case FT_Elf:
419 case FT_Iasm: {
420 Func->emitIAS();
421 // The Cfg has already emitted into the assembly buffer, so
422 // stats have been fully collected into this thread's TLS.
423 // Dump them before TLS is reset for the next Cfg.
424 if (BuildDefs::dump())
425 dumpStats(Func.get());
426 auto Asm = Func->releaseAssembler();
427 // Copy relevant fields into Asm before Func is deleted.
428 Asm->setFunctionName(Func->getFunctionName());
429 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(),
430 std::move(Asm));
431 Item->setGlobalInits(Func->getGlobalInits());
432 } break;
433 case FT_Asm:
434 // The Cfg has not been emitted yet, so stats are not ready
435 // to be dumped.
436 std::unique_ptr<VariableDeclarationList> GlobalInits =
437 Func->getGlobalInits();
438 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(),
439 std::move(Func));
440 Item->setGlobalInits(std::move(GlobalInits));
441 break;
442 }
443 }
444 assert(Item != nullptr);
445 emitQueueBlockingPush(std::move(Item));
446 // The Cfg now gets deleted as Func goes out of scope.
447 }
448 }
449
450 namespace {
451
452 // Ensure Pending is large enough that Pending[Index] is valid.
resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> * Pending,uint32_t Index)453 void resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> *Pending,
454 uint32_t Index) {
455 if (Index >= Pending->size())
456 Utils::reserveAndResize(*Pending, Index + 1);
457 }
458
459 } // end of anonymous namespace
460
461 // static
TlsInit()462 void GlobalContext::TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
463
emitFileHeader()464 void GlobalContext::emitFileHeader() {
465 TimerMarker T1(Ice::TimerStack::TT_emitAsm, this);
466 if (getFlags().getOutFileType() == FT_Elf) {
467 getObjectWriter()->writeInitialELFHeader();
468 } else {
469 if (!BuildDefs::dump()) {
470 getStrError() << "emitFileHeader for non-ELF";
471 getErrorStatus()->assign(EC_Translation);
472 }
473 TargetHeaderLowering::createLowering(this)->lower();
474 }
475 }
476
lowerConstants()477 void GlobalContext::lowerConstants() { DataLowering->lowerConstants(); }
478
lowerJumpTables()479 void GlobalContext::lowerJumpTables() { DataLowering->lowerJumpTables(); }
480
emitTargetRODataSections()481 void GlobalContext::emitTargetRODataSections() {
482 DataLowering->emitTargetRODataSections();
483 }
484
lowerGlobals(const std::string & SectionSuffix)485 void GlobalContext::lowerGlobals(const std::string &SectionSuffix) {
486 TimerMarker T(TimerStack::TT_emitGlobalInitializers, this);
487 const bool DumpGlobalVariables =
488 BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit) &&
489 getFlags().matchVerboseFocusOn("", 0);
490 if (DumpGlobalVariables) {
491 OstreamLocker L(this);
492 Ostream &Stream = getStrDump();
493 for (const Ice::VariableDeclaration *Global : Globals) {
494 Global->dump(Stream);
495 }
496 }
497 if (getFlags().getDisableTranslation())
498 return;
499
500 if (!BuildDefs::minimal() && Instrumentor)
501 Instrumentor->instrumentGlobals(Globals);
502
503 DataLowering->lowerGlobals(Globals, SectionSuffix);
504 if (DisposeGlobalVariablesAfterLowering) {
505 Globals.clearAndPurge();
506 } else {
507 Globals.clear();
508 }
509 }
510
emitterWrapper(ThreadContext * MyTLS)511 void GlobalContext::emitterWrapper(ThreadContext *MyTLS) {
512 ICE_TLS_SET_FIELD(TLS, MyTLS);
513 emitItems();
514 }
515
emitItems()516 void GlobalContext::emitItems() {
517 const bool Threaded = !getFlags().isSequential();
518 // Pending is a vector containing the reassembled, ordered list of
519 // work items. When we're ready for the next item, we first check
520 // whether it's in the Pending list. If not, we take an item from
521 // the work queue, and if it's not the item we're waiting for, we
522 // insert it into Pending and repeat. The work item is deleted
523 // after it is processed.
524 std::vector<std::unique_ptr<EmitterWorkItem>> Pending;
525 uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
526 uint32_t ShuffleStartIndex = DesiredSequenceNumber;
527 uint32_t ShuffleEndIndex = DesiredSequenceNumber;
528 bool EmitQueueEmpty = false;
529
530 while (!EmitQueueEmpty) {
531 resizePending(&Pending, DesiredSequenceNumber);
532 // See if Pending contains DesiredSequenceNumber.
533 if (Pending[DesiredSequenceNumber] == nullptr) {
534 // We need to fetch an EmitterWorkItem from the queue.
535 auto RawItem = emitQueueBlockingPop();
536 if (RawItem == nullptr) {
537 // This is the notifier for an empty queue.
538 EmitQueueEmpty = true;
539 } else {
540 // We get an EmitterWorkItem, we need to add it to Pending.
541 uint32_t ItemSeq = RawItem->getSequenceNumber();
542 if (Threaded && ItemSeq != DesiredSequenceNumber) {
543 // Not the desired one, add it to Pending but do not increase
544 // DesiredSequenceNumber. Continue the loop, do not emit the item.
545 resizePending(&Pending, ItemSeq);
546 Pending[ItemSeq] = std::move(RawItem);
547 continue;
548 }
549 // ItemSeq == DesiredSequenceNumber, we need to check if we should
550 // emit it or not. If !Threaded, we're OK with ItemSeq !=
551 // DesiredSequenceNumber.
552 Pending[DesiredSequenceNumber] = std::move(RawItem);
553 }
554 }
555
556 // We have the desired EmitterWorkItem or nullptr as the end notifier.
557 // If the emitter queue is not empty, increase DesiredSequenceNumber and
558 // ShuffleEndIndex.
559 if (!EmitQueueEmpty) {
560 DesiredSequenceNumber++;
561 ShuffleEndIndex++;
562 }
563
564 // Emit the item from ShuffleStartIndex to ShuffleEndIndex.
565 for (uint32_t I = ShuffleStartIndex; I < ShuffleEndIndex; I++) {
566 std::unique_ptr<EmitterWorkItem> Item = std::move(Pending[I]);
567
568 switch (Item->getKind()) {
569 case EmitterWorkItem::WI_Nop:
570 break;
571 case EmitterWorkItem::WI_GlobalInits: {
572 accumulateGlobals(Item->getGlobalInits());
573 } break;
574 case EmitterWorkItem::WI_Asm: {
575 lowerGlobalsIfNoCodeHasBeenSeen();
576 accumulateGlobals(Item->getGlobalInits());
577
578 std::unique_ptr<Assembler> Asm = Item->getAsm();
579 Asm->alignFunction();
580 GlobalString Name = Asm->getFunctionName();
581 switch (getFlags().getOutFileType()) {
582 case FT_Elf:
583 getObjectWriter()->writeFunctionCode(Name, Asm->getInternal(),
584 Asm.get());
585 break;
586 case FT_Iasm: {
587 OstreamLocker L(this);
588 Cfg::emitTextHeader(Name, this, Asm.get());
589 Asm->emitIASBytes(this);
590 } break;
591 case FT_Asm:
592 llvm::report_fatal_error("Unexpected FT_Asm");
593 break;
594 }
595 } break;
596 case EmitterWorkItem::WI_Cfg: {
597 if (!BuildDefs::dump())
598 llvm::report_fatal_error("WI_Cfg work item created inappropriately");
599 lowerGlobalsIfNoCodeHasBeenSeen();
600 accumulateGlobals(Item->getGlobalInits());
601
602 assert(getFlags().getOutFileType() == FT_Asm);
603 std::unique_ptr<Cfg> Func = Item->getCfg();
604 // Unfortunately, we have to temporarily install the Cfg in TLS
605 // because Variable::asType() uses the allocator to create the
606 // differently-typed copy.
607 CfgLocalAllocatorScope _(Func.get());
608 Func->emit();
609 dumpStats(Func.get());
610 } break;
611 }
612 }
613 // Update the start index for next shuffling queue
614 ShuffleStartIndex = ShuffleEndIndex;
615 }
616
617 // In case there are no code to be generated, we invoke the conditional
618 // lowerGlobals again -- this is a no-op if code has been emitted.
619 lowerGlobalsIfNoCodeHasBeenSeen();
620 }
621
~GlobalContext()622 GlobalContext::~GlobalContext() {
623 llvm::DeleteContainerPointers(AllThreadContexts);
624 LockedPtr<DestructorArray> Dtors = getDestructors();
625 // Destructors are invoked in the opposite object construction order.
626 for (const auto &Dtor : reverse_range(*Dtors))
627 Dtor();
628 }
629
dumpStrings()630 void GlobalContext::dumpStrings() {
631 if (!getFlags().getDumpStrings())
632 return;
633 OstreamLocker _(this);
634 Ostream &Str = getStrDump();
635 Str << "GlobalContext strings:\n";
636 getStrings()->dump(Str);
637 }
638
dumpConstantLookupCounts()639 void GlobalContext::dumpConstantLookupCounts() {
640 if (!BuildDefs::dump())
641 return;
642 const bool DumpCounts = (getFlags().getVerbose() & IceV_ConstPoolStats) &&
643 getFlags().matchVerboseFocusOn("", 0);
644 if (!DumpCounts)
645 return;
646
647 OstreamLocker _(this);
648 Ostream &Str = getStrDump();
649 Str << "Constant pool use stats: count+value+type\n";
650 #define X(WhichPool) \
651 for (auto *C : getConstPool()->WhichPool.getConstantPool()) { \
652 Str << C->getLookupCount() << " "; \
653 C->dump(Str); \
654 Str << " " << C->getType() << "\n"; \
655 }
656 X(Integers1);
657 X(Integers8);
658 X(Integers16);
659 X(Integers32);
660 X(Integers64);
661 X(Floats);
662 X(Doubles);
663 X(Relocatables);
664 X(ExternRelocatables);
665 #undef X
666 }
667
668 // TODO(stichnot): Consider adding thread-local caches of constant pool entries
669 // to reduce contention.
670
671 // All locking is done by the getConstantInt[0-9]+() target function.
getConstantInt(Type Ty,int64_t Value)672 Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) {
673 switch (Ty) {
674 case IceType_i1:
675 return getConstantInt1(Value);
676 case IceType_i8:
677 return getConstantInt8(Value);
678 case IceType_i16:
679 return getConstantInt16(Value);
680 case IceType_i32:
681 return getConstantInt32(Value);
682 case IceType_i64:
683 return getConstantInt64(Value);
684 default:
685 llvm_unreachable("Bad integer type for getConstant");
686 }
687 return nullptr;
688 }
689
getConstantInt1Internal(int8_t ConstantInt1)690 Constant *GlobalContext::getConstantInt1Internal(int8_t ConstantInt1) {
691 ConstantInt1 &= INT8_C(1);
692 return getConstPool()->Integers1.getOrAdd(this, ConstantInt1);
693 }
694
getConstantInt8Internal(int8_t ConstantInt8)695 Constant *GlobalContext::getConstantInt8Internal(int8_t ConstantInt8) {
696 return getConstPool()->Integers8.getOrAdd(this, ConstantInt8);
697 }
698
getConstantInt16Internal(int16_t ConstantInt16)699 Constant *GlobalContext::getConstantInt16Internal(int16_t ConstantInt16) {
700 return getConstPool()->Integers16.getOrAdd(this, ConstantInt16);
701 }
702
getConstantInt32Internal(int32_t ConstantInt32)703 Constant *GlobalContext::getConstantInt32Internal(int32_t ConstantInt32) {
704 return getConstPool()->Integers32.getOrAdd(this, ConstantInt32);
705 }
706
getConstantInt64Internal(int64_t ConstantInt64)707 Constant *GlobalContext::getConstantInt64Internal(int64_t ConstantInt64) {
708 return getConstPool()->Integers64.getOrAdd(this, ConstantInt64);
709 }
710
getConstantFloat(float ConstantFloat)711 Constant *GlobalContext::getConstantFloat(float ConstantFloat) {
712 return getConstPool()->Floats.getOrAdd(this, ConstantFloat);
713 }
714
getConstantDouble(double ConstantDouble)715 Constant *GlobalContext::getConstantDouble(double ConstantDouble) {
716 return getConstPool()->Doubles.getOrAdd(this, ConstantDouble);
717 }
718
getConstantSymWithEmitString(const RelocOffsetT Offset,const RelocOffsetArray & OffsetExpr,GlobalString Name,const std::string & EmitString)719 Constant *GlobalContext::getConstantSymWithEmitString(
720 const RelocOffsetT Offset, const RelocOffsetArray &OffsetExpr,
721 GlobalString Name, const std::string &EmitString) {
722 return getConstPool()->Relocatables.getOrAdd(
723 this, RelocatableTuple(Offset, OffsetExpr, Name, EmitString));
724 }
725
getConstantSym(RelocOffsetT Offset,GlobalString Name)726 Constant *GlobalContext::getConstantSym(RelocOffsetT Offset,
727 GlobalString Name) {
728 constexpr char EmptyEmitString[] = "";
729 return getConstantSymWithEmitString(Offset, {}, Name, EmptyEmitString);
730 }
731
getConstantExternSym(GlobalString Name)732 Constant *GlobalContext::getConstantExternSym(GlobalString Name) {
733 constexpr RelocOffsetT Offset = 0;
734 return getConstPool()->ExternRelocatables.getOrAdd(
735 this, RelocatableTuple(Offset, {}, Name));
736 }
737
getConstantUndef(Type Ty)738 Constant *GlobalContext::getConstantUndef(Type Ty) {
739 return getConstPool()->Undefs.getOrAdd(this, Ty);
740 }
741
getConstantZero(Type Ty)742 Constant *GlobalContext::getConstantZero(Type Ty) {
743 Constant *Zero = ConstZeroForType[Ty];
744 if (Zero == nullptr)
745 llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty));
746 return Zero;
747 }
748
749 // All locking is done by the getConstant*() target function.
getConstantZeroInternal(Type Ty)750 Constant *GlobalContext::getConstantZeroInternal(Type Ty) {
751 switch (Ty) {
752 case IceType_i1:
753 return getConstantInt1Internal(0);
754 case IceType_i8:
755 return getConstantInt8Internal(0);
756 case IceType_i16:
757 return getConstantInt16Internal(0);
758 case IceType_i32:
759 return getConstantInt32Internal(0);
760 case IceType_i64:
761 return getConstantInt64Internal(0);
762 case IceType_f32:
763 return getConstantFloat(0);
764 case IceType_f64:
765 return getConstantDouble(0);
766 default:
767 return nullptr;
768 }
769 }
770
getConstantPool(Type Ty)771 ConstantList GlobalContext::getConstantPool(Type Ty) {
772 switch (Ty) {
773 case IceType_i1:
774 case IceType_i8:
775 return getConstPool()->Integers8.getConstantPool();
776 case IceType_i16:
777 return getConstPool()->Integers16.getConstantPool();
778 case IceType_i32:
779 return getConstPool()->Integers32.getConstantPool();
780 case IceType_i64:
781 return getConstPool()->Integers64.getConstantPool();
782 case IceType_f32:
783 return getConstPool()->Floats.getConstantPool();
784 case IceType_f64:
785 return getConstPool()->Doubles.getConstantPool();
786 case IceType_v4i1:
787 case IceType_v8i1:
788 case IceType_v16i1:
789 case IceType_v16i8:
790 case IceType_v8i16:
791 case IceType_v4i32:
792 case IceType_v4f32:
793 llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty));
794 break;
795 case IceType_void:
796 case IceType_NUM:
797 break;
798 }
799 llvm_unreachable("Unknown type");
800 }
801
getConstantExternSyms()802 ConstantList GlobalContext::getConstantExternSyms() {
803 return getConstPool()->ExternRelocatables.getConstantPool();
804 }
805
getGlobalString(const std::string & Name)806 GlobalString GlobalContext::getGlobalString(const std::string &Name) {
807 return GlobalString::createWithString(this, Name);
808 }
809
getJumpTables()810 JumpTableDataList GlobalContext::getJumpTables() {
811 JumpTableDataList JumpTables(*getJumpTableList());
812 // Make order deterministic by sorting into functions and then ID of the jump
813 // table within that function.
814 std::sort(JumpTables.begin(), JumpTables.end(),
815 [](const JumpTableData &A, const JumpTableData &B) {
816 if (A.getFunctionName() != B.getFunctionName())
817 return A.getFunctionName() < B.getFunctionName();
818 return A.getId() < B.getId();
819 });
820
821 return JumpTables;
822 }
823
addJumpTableData(JumpTableData JumpTable)824 void GlobalContext::addJumpTableData(JumpTableData JumpTable) {
825 getJumpTableList()->emplace_back(std::move(JumpTable));
826 }
827
newTimerStackID(const std::string & Name)828 TimerStackIdT GlobalContext::newTimerStackID(const std::string &Name) {
829 if (!BuildDefs::timers())
830 return 0;
831 auto Timers = getTimers();
832 TimerStackIdT NewID = Timers->size();
833 Timers->push_back(TimerStack(Name));
834 return NewID;
835 }
836
getTimerID(TimerStackIdT StackID,const std::string & Name)837 TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
838 const std::string &Name) {
839 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
840 assert(StackID < Timers->size());
841 return Timers->at(StackID).getTimerID(Name);
842 }
843
pushTimer(TimerIdT ID,TimerStackIdT StackID)844 void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
845 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
846 assert(StackID < Timers->size());
847 Timers->at(StackID).push(ID);
848 }
849
popTimer(TimerIdT ID,TimerStackIdT StackID)850 void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
851 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
852 assert(StackID < Timers->size());
853 Timers->at(StackID).pop(ID);
854 }
855
resetTimer(TimerStackIdT StackID)856 void GlobalContext::resetTimer(TimerStackIdT StackID) {
857 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
858 assert(StackID < Timers->size());
859 Timers->at(StackID).reset();
860 }
861
getTimerName(TimerStackIdT StackID)862 std::string GlobalContext::getTimerName(TimerStackIdT StackID) {
863 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
864 assert(StackID < Timers->size());
865 return Timers->at(StackID).getName();
866 }
867
setTimerName(TimerStackIdT StackID,const std::string & NewName)868 void GlobalContext::setTimerName(TimerStackIdT StackID,
869 const std::string &NewName) {
870 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
871 assert(StackID < Timers->size());
872 Timers->at(StackID).setName(NewName);
873 }
874
875 // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the
876 // interface to take and transfer ownership, but they internally store the raw
877 // Cfg pointer in the work queue. This allows e.g. future queue optimizations
878 // such as the use of atomics to modify queue elements.
optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item)879 void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) {
880 assert(Item);
881 {
882 TimerMarker _(TimerStack::TT_qTransPush, this);
883 OptQ.blockingPush(std::move(Item));
884 }
885 if (getFlags().isSequential())
886 translateFunctions();
887 }
888
optQueueBlockingPop()889 std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() {
890 TimerMarker _(TimerStack::TT_qTransPop, this);
891 return OptQ.blockingPop(OptQWakeupSize);
892 }
893
emitQueueBlockingPush(std::unique_ptr<EmitterWorkItem> Item)894 void GlobalContext::emitQueueBlockingPush(
895 std::unique_ptr<EmitterWorkItem> Item) {
896 assert(Item);
897 {
898 TimerMarker _(TimerStack::TT_qEmitPush, this);
899 EmitQ.blockingPush(std::move(Item));
900 }
901 if (getFlags().isSequential())
902 emitItems();
903 }
904
emitQueueBlockingPop()905 std::unique_ptr<EmitterWorkItem> GlobalContext::emitQueueBlockingPop() {
906 TimerMarker _(TimerStack::TT_qEmitPop, this);
907 return EmitQ.blockingPop();
908 }
909
initParserThread()910 void GlobalContext::initParserThread() {
911 ThreadContext *Tls = new ThreadContext();
912 auto Timers = getTimers();
913 Timers->initInto(Tls->Timers);
914 AllThreadContexts.push_back(Tls);
915 ICE_TLS_SET_FIELD(TLS, Tls);
916 }
917
startWorkerThreads()918 void GlobalContext::startWorkerThreads() {
919 size_t NumWorkers = getFlags().getNumTranslationThreads();
920 auto Timers = getTimers();
921 for (size_t i = 0; i < NumWorkers; ++i) {
922 ThreadContext *WorkerTLS = new ThreadContext();
923 Timers->initInto(WorkerTLS->Timers);
924 AllThreadContexts.push_back(WorkerTLS);
925 TranslationThreads.push_back(std::thread(
926 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
927 }
928 if (NumWorkers) {
929 ThreadContext *WorkerTLS = new ThreadContext();
930 Timers->initInto(WorkerTLS->Timers);
931 AllThreadContexts.push_back(WorkerTLS);
932 EmitterThreads.push_back(
933 std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS));
934 }
935 }
936
resetStats()937 void GlobalContext::resetStats() {
938 if (BuildDefs::dump())
939 ICE_TLS_GET_FIELD(TLS)->StatsFunction.reset();
940 }
941
dumpStats(const Cfg * Func)942 void GlobalContext::dumpStats(const Cfg *Func) {
943 if (!getFlags().getDumpStats())
944 return;
945 if (Func == nullptr) {
946 getStatsCumulative()->dump(Func, this);
947 } else {
948 ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Func, this);
949 }
950 }
951
statsUpdateEmitted(uint32_t InstCount)952 void GlobalContext::statsUpdateEmitted(uint32_t InstCount) {
953 if (!getFlags().getDumpStats())
954 return;
955 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
956 Tls->StatsFunction.update(CodeStats::CS_InstCount, InstCount);
957 Tls->StatsCumulative.update(CodeStats::CS_InstCount, InstCount);
958 }
959
statsUpdateRegistersSaved(uint32_t Num)960 void GlobalContext::statsUpdateRegistersSaved(uint32_t Num) {
961 if (!getFlags().getDumpStats())
962 return;
963 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
964 Tls->StatsFunction.update(CodeStats::CS_RegsSaved, Num);
965 Tls->StatsCumulative.update(CodeStats::CS_RegsSaved, Num);
966 }
967
statsUpdateFrameBytes(uint32_t Bytes)968 void GlobalContext::statsUpdateFrameBytes(uint32_t Bytes) {
969 if (!getFlags().getDumpStats())
970 return;
971 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
972 Tls->StatsFunction.update(CodeStats::CS_FrameByte, Bytes);
973 Tls->StatsCumulative.update(CodeStats::CS_FrameByte, Bytes);
974 }
975
statsUpdateSpills()976 void GlobalContext::statsUpdateSpills() {
977 if (!getFlags().getDumpStats())
978 return;
979 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
980 Tls->StatsFunction.update(CodeStats::CS_NumSpills);
981 Tls->StatsCumulative.update(CodeStats::CS_NumSpills);
982 }
983
statsUpdateFills()984 void GlobalContext::statsUpdateFills() {
985 if (!getFlags().getDumpStats())
986 return;
987 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
988 Tls->StatsFunction.update(CodeStats::CS_NumFills);
989 Tls->StatsCumulative.update(CodeStats::CS_NumFills);
990 }
991
statsUpdateRPImms()992 void GlobalContext::statsUpdateRPImms() {
993 if (!getFlags().getDumpStats())
994 return;
995 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
996 Tls->StatsFunction.update(CodeStats::CS_NumRPImms);
997 Tls->StatsCumulative.update(CodeStats::CS_NumRPImms);
998 }
999
dumpTimers(TimerStackIdT StackID,bool DumpCumulative)1000 void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
1001 if (!BuildDefs::timers())
1002 return;
1003 auto Timers = getTimers();
1004 assert(Timers->size() > StackID);
1005 OstreamLocker L(this);
1006 Timers->at(StackID).dump(getStrDump(), DumpCumulative);
1007 }
1008
dumpLocalTimers(const std::string & TimerNameOverride,TimerStackIdT StackID,bool DumpCumulative)1009 void GlobalContext::dumpLocalTimers(const std::string &TimerNameOverride,
1010 TimerStackIdT StackID,
1011 bool DumpCumulative) {
1012 if (!BuildDefs::timers())
1013 return;
1014 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
1015 assert(Timers->size() > StackID);
1016 // Temporarily override the thread-local timer name with the given name.
1017 // Don't do it permanently because the final timer merge at the end expects
1018 // the thread-local timer names to be the same as the global timer name.
1019 auto OrigName = getTimerName(StackID);
1020 setTimerName(StackID, TimerNameOverride);
1021 {
1022 OstreamLocker _(this);
1023 Timers->at(StackID).dump(getStrDump(), DumpCumulative);
1024 }
1025 setTimerName(StackID, OrigName);
1026 }
1027
1028 LockedPtr<StringPool>
getStrings(const GlobalContext * PoolOwner)1029 GlobalStringPoolTraits::getStrings(const GlobalContext *PoolOwner) {
1030 return PoolOwner->getStrings();
1031 }
1032
getTimerIdFromFuncName(GlobalContext * Ctx,const std::string & FuncName)1033 TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx,
1034 const std::string &FuncName) {
1035 if (!BuildDefs::timers())
1036 return 0;
1037 if (!getFlags().getTimeEachFunction())
1038 return 0;
1039 return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName);
1040 }
1041
push()1042 void TimerMarker::push() {
1043 switch (StackID) {
1044 case GlobalContext::TSK_Default:
1045 Active = getFlags().getSubzeroTimingEnabled() ||
1046 !getFlags().getTimingFocusOnString().empty();
1047 break;
1048 case GlobalContext::TSK_Funcs:
1049 Active = getFlags().getTimeEachFunction();
1050 break;
1051 default:
1052 break;
1053 }
1054 if (Active)
1055 Ctx->pushTimer(ID, StackID);
1056 }
1057
pushCfg(const Cfg * Func)1058 void TimerMarker::pushCfg(const Cfg *Func) {
1059 Ctx = Func->getContext();
1060 Active = Func->getFocusedTiming() || getFlags().getSubzeroTimingEnabled();
1061 if (Active)
1062 Ctx->pushTimer(ID, StackID);
1063 }
1064
1065 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS);
1066
1067 } // end of namespace Ice
1068