1 //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains classes used to discover if for a particular value
9 // there from sue to definition that crosses a suspend block.
10 //
11 // Using the information discovered we form a Coroutine Frame structure to
12 // contain those values. All uses of those values are replaced with appropriate
13 // GEP + load from the coroutine frame. At the point of the definition we spill
14 // the value into the coroutine frame.
15 //
16 // TODO: pack values tightly using liveness info.
17 //===----------------------------------------------------------------------===//
18
19 #include "CoroInternal.h"
20 #include "llvm/ADT/BitVector.h"
21 #include "llvm/Analysis/PtrUseVisitor.h"
22 #include "llvm/Transforms/Utils/Local.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/InstIterator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/circular_raw_ostream.h"
31 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
32 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
33
34 using namespace llvm;
35
36 // The "coro-suspend-crossing" flag is very noisy. There is another debug type,
37 // "coro-frame", which results in leaner debug spew.
38 #define DEBUG_TYPE "coro-suspend-crossing"
39
40 enum { SmallVectorThreshold = 32 };
41
42 // Provides two way mapping between the blocks and numbers.
43 namespace {
44 class BlockToIndexMapping {
45 SmallVector<BasicBlock *, SmallVectorThreshold> V;
46
47 public:
size() const48 size_t size() const { return V.size(); }
49
BlockToIndexMapping(Function & F)50 BlockToIndexMapping(Function &F) {
51 for (BasicBlock &BB : F)
52 V.push_back(&BB);
53 llvm::sort(V);
54 }
55
blockToIndex(BasicBlock * BB) const56 size_t blockToIndex(BasicBlock *BB) const {
57 auto *I = llvm::lower_bound(V, BB);
58 assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
59 return I - V.begin();
60 }
61
indexToBlock(unsigned Index) const62 BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
63 };
64 } // end anonymous namespace
65
66 // The SuspendCrossingInfo maintains data that allows to answer a question
67 // whether given two BasicBlocks A and B there is a path from A to B that
68 // passes through a suspend point.
69 //
70 // For every basic block 'i' it maintains a BlockData that consists of:
71 // Consumes: a bit vector which contains a set of indices of blocks that can
72 // reach block 'i'
73 // Kills: a bit vector which contains a set of indices of blocks that can
74 // reach block 'i', but one of the path will cross a suspend point
75 // Suspend: a boolean indicating whether block 'i' contains a suspend point.
76 // End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
77 //
78 namespace {
79 struct SuspendCrossingInfo {
80 BlockToIndexMapping Mapping;
81
82 struct BlockData {
83 BitVector Consumes;
84 BitVector Kills;
85 bool Suspend = false;
86 bool End = false;
87 };
88 SmallVector<BlockData, SmallVectorThreshold> Block;
89
successors__anonb46c30f20311::SuspendCrossingInfo90 iterator_range<succ_iterator> successors(BlockData const &BD) const {
91 BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
92 return llvm::successors(BB);
93 }
94
getBlockData__anonb46c30f20311::SuspendCrossingInfo95 BlockData &getBlockData(BasicBlock *BB) {
96 return Block[Mapping.blockToIndex(BB)];
97 }
98
99 void dump() const;
100 void dump(StringRef Label, BitVector const &BV) const;
101
102 SuspendCrossingInfo(Function &F, coro::Shape &Shape);
103
hasPathCrossingSuspendPoint__anonb46c30f20311::SuspendCrossingInfo104 bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
105 size_t const DefIndex = Mapping.blockToIndex(DefBB);
106 size_t const UseIndex = Mapping.blockToIndex(UseBB);
107
108 assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def");
109 bool const Result = Block[UseIndex].Kills[DefIndex];
110 LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
111 << " answer is " << Result << "\n");
112 return Result;
113 }
114
isDefinitionAcrossSuspend__anonb46c30f20311::SuspendCrossingInfo115 bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
116 auto *I = cast<Instruction>(U);
117
118 // We rewrote PHINodes, so that only the ones with exactly one incoming
119 // value need to be analyzed.
120 if (auto *PN = dyn_cast<PHINode>(I))
121 if (PN->getNumIncomingValues() > 1)
122 return false;
123
124 BasicBlock *UseBB = I->getParent();
125
126 // As a special case, treat uses by an llvm.coro.suspend.retcon
127 // as if they were uses in the suspend's single predecessor: the
128 // uses conceptually occur before the suspend.
129 if (isa<CoroSuspendRetconInst>(I)) {
130 UseBB = UseBB->getSinglePredecessor();
131 assert(UseBB && "should have split coro.suspend into its own block");
132 }
133
134 return hasPathCrossingSuspendPoint(DefBB, UseBB);
135 }
136
isDefinitionAcrossSuspend__anonb46c30f20311::SuspendCrossingInfo137 bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
138 return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
139 }
140
isDefinitionAcrossSuspend__anonb46c30f20311::SuspendCrossingInfo141 bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
142 auto *DefBB = I.getParent();
143
144 // As a special case, treat values produced by an llvm.coro.suspend.*
145 // as if they were defined in the single successor: the uses
146 // conceptually occur after the suspend.
147 if (isa<AnyCoroSuspendInst>(I)) {
148 DefBB = DefBB->getSingleSuccessor();
149 assert(DefBB && "should have split coro.suspend into its own block");
150 }
151
152 return isDefinitionAcrossSuspend(DefBB, U);
153 }
154 };
155 } // end anonymous namespace
156
157 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump(StringRef Label,BitVector const & BV) const158 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
159 BitVector const &BV) const {
160 dbgs() << Label << ":";
161 for (size_t I = 0, N = BV.size(); I < N; ++I)
162 if (BV[I])
163 dbgs() << " " << Mapping.indexToBlock(I)->getName();
164 dbgs() << "\n";
165 }
166
dump() const167 LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
168 for (size_t I = 0, N = Block.size(); I < N; ++I) {
169 BasicBlock *const B = Mapping.indexToBlock(I);
170 dbgs() << B->getName() << ":\n";
171 dump(" Consumes", Block[I].Consumes);
172 dump(" Kills", Block[I].Kills);
173 }
174 dbgs() << "\n";
175 }
176 #endif
177
SuspendCrossingInfo(Function & F,coro::Shape & Shape)178 SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
179 : Mapping(F) {
180 const size_t N = Mapping.size();
181 Block.resize(N);
182
183 // Initialize every block so that it consumes itself
184 for (size_t I = 0; I < N; ++I) {
185 auto &B = Block[I];
186 B.Consumes.resize(N);
187 B.Kills.resize(N);
188 B.Consumes.set(I);
189 }
190
191 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
192 // the code beyond coro.end is reachable during initial invocation of the
193 // coroutine.
194 for (auto *CE : Shape.CoroEnds)
195 getBlockData(CE->getParent()).End = true;
196
197 // Mark all suspend blocks and indicate that they kill everything they
198 // consume. Note, that crossing coro.save also requires a spill, as any code
199 // between coro.save and coro.suspend may resume the coroutine and all of the
200 // state needs to be saved by that time.
201 auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
202 BasicBlock *SuspendBlock = BarrierInst->getParent();
203 auto &B = getBlockData(SuspendBlock);
204 B.Suspend = true;
205 B.Kills |= B.Consumes;
206 };
207 for (auto *CSI : Shape.CoroSuspends) {
208 markSuspendBlock(CSI);
209 if (auto *Save = CSI->getCoroSave())
210 markSuspendBlock(Save);
211 }
212
213 // Iterate propagating consumes and kills until they stop changing.
214 int Iteration = 0;
215 (void)Iteration;
216
217 bool Changed;
218 do {
219 LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
220 LLVM_DEBUG(dbgs() << "==============\n");
221
222 Changed = false;
223 for (size_t I = 0; I < N; ++I) {
224 auto &B = Block[I];
225 for (BasicBlock *SI : successors(B)) {
226
227 auto SuccNo = Mapping.blockToIndex(SI);
228
229 // Saved Consumes and Kills bitsets so that it is easy to see
230 // if anything changed after propagation.
231 auto &S = Block[SuccNo];
232 auto SavedConsumes = S.Consumes;
233 auto SavedKills = S.Kills;
234
235 // Propagate Kills and Consumes from block B into its successor S.
236 S.Consumes |= B.Consumes;
237 S.Kills |= B.Kills;
238
239 // If block B is a suspend block, it should propagate kills into the
240 // its successor for every block B consumes.
241 if (B.Suspend) {
242 S.Kills |= B.Consumes;
243 }
244 if (S.Suspend) {
245 // If block S is a suspend block, it should kill all of the blocks it
246 // consumes.
247 S.Kills |= S.Consumes;
248 } else if (S.End) {
249 // If block S is an end block, it should not propagate kills as the
250 // blocks following coro.end() are reached during initial invocation
251 // of the coroutine while all the data are still available on the
252 // stack or in the registers.
253 S.Kills.reset();
254 } else {
255 // This is reached when S block it not Suspend nor coro.end and it
256 // need to make sure that it is not in the kill set.
257 S.Kills.reset(SuccNo);
258 }
259
260 // See if anything changed.
261 Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
262
263 if (S.Kills != SavedKills) {
264 LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
265 << "\n");
266 LLVM_DEBUG(dump("S.Kills", S.Kills));
267 LLVM_DEBUG(dump("SavedKills", SavedKills));
268 }
269 if (S.Consumes != SavedConsumes) {
270 LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
271 LLVM_DEBUG(dump("S.Consume", S.Consumes));
272 LLVM_DEBUG(dump("SavedCons", SavedConsumes));
273 }
274 }
275 }
276 } while (Changed);
277 LLVM_DEBUG(dump());
278 }
279
280 #undef DEBUG_TYPE // "coro-suspend-crossing"
281 #define DEBUG_TYPE "coro-frame"
282
283 // We build up the list of spills for every case where a use is separated
284 // from the definition by a suspend point.
285
286 static const unsigned InvalidFieldIndex = ~0U;
287
288 namespace {
289 class Spill {
290 Value *Def = nullptr;
291 Instruction *User = nullptr;
292 unsigned FieldNo = InvalidFieldIndex;
293
294 public:
Spill(Value * Def,llvm::User * U)295 Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {}
296
def() const297 Value *def() const { return Def; }
user() const298 Instruction *user() const { return User; }
userBlock() const299 BasicBlock *userBlock() const { return User->getParent(); }
300
301 // Note that field index is stored in the first SpillEntry for a particular
302 // definition. Subsequent mentions of a defintion do not have fieldNo
303 // assigned. This works out fine as the users of Spills capture the info about
304 // the definition the first time they encounter it. Consider refactoring
305 // SpillInfo into two arrays to normalize the spill representation.
fieldIndex() const306 unsigned fieldIndex() const {
307 assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field");
308 return FieldNo;
309 }
setFieldIndex(unsigned FieldNumber)310 void setFieldIndex(unsigned FieldNumber) {
311 assert(FieldNo == InvalidFieldIndex && "Reassigning field number");
312 FieldNo = FieldNumber;
313 }
314 };
315 } // namespace
316
317 // Note that there may be more than one record with the same value of Def in
318 // the SpillInfo vector.
319 using SpillInfo = SmallVector<Spill, 8>;
320
321 #ifndef NDEBUG
dump(StringRef Title,SpillInfo const & Spills)322 static void dump(StringRef Title, SpillInfo const &Spills) {
323 dbgs() << "------------- " << Title << "--------------\n";
324 Value *CurrentValue = nullptr;
325 for (auto const &E : Spills) {
326 if (CurrentValue != E.def()) {
327 CurrentValue = E.def();
328 CurrentValue->dump();
329 }
330 dbgs() << " user: ";
331 E.user()->dump();
332 }
333 }
334 #endif
335
336 namespace {
337 // We cannot rely solely on natural alignment of a type when building a
338 // coroutine frame and if the alignment specified on the Alloca instruction
339 // differs from the natural alignment of the alloca type we will need to insert
340 // padding.
341 struct PaddingCalculator {
342 const DataLayout &DL;
343 LLVMContext &Context;
344 unsigned StructSize = 0;
345
PaddingCalculator__anonb46c30f20611::PaddingCalculator346 PaddingCalculator(LLVMContext &Context, DataLayout const &DL)
347 : DL(DL), Context(Context) {}
348
349 // Replicate the logic from IR/DataLayout.cpp to match field offset
350 // computation for LLVM structs.
addType__anonb46c30f20611::PaddingCalculator351 void addType(Type *Ty) {
352 unsigned TyAlign = DL.getABITypeAlignment(Ty);
353 if ((StructSize & (TyAlign - 1)) != 0)
354 StructSize = alignTo(StructSize, TyAlign);
355
356 StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item.
357 }
358
addTypes__anonb46c30f20611::PaddingCalculator359 void addTypes(SmallVectorImpl<Type *> const &Types) {
360 for (auto *Ty : Types)
361 addType(Ty);
362 }
363
computePadding__anonb46c30f20611::PaddingCalculator364 unsigned computePadding(Type *Ty, unsigned ForcedAlignment) {
365 unsigned TyAlign = DL.getABITypeAlignment(Ty);
366 auto Natural = alignTo(StructSize, TyAlign);
367 auto Forced = alignTo(StructSize, ForcedAlignment);
368
369 // Return how many bytes of padding we need to insert.
370 if (Natural != Forced)
371 return std::max(Natural, Forced) - StructSize;
372
373 // Rely on natural alignment.
374 return 0;
375 }
376
377 // If padding required, return the padding field type to insert.
getPaddingType__anonb46c30f20611::PaddingCalculator378 ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) {
379 if (auto Padding = computePadding(Ty, ForcedAlignment))
380 return ArrayType::get(Type::getInt8Ty(Context), Padding);
381
382 return nullptr;
383 }
384 };
385 } // namespace
386
387 // Build a struct that will keep state for an active coroutine.
388 // struct f.frame {
389 // ResumeFnTy ResumeFnAddr;
390 // ResumeFnTy DestroyFnAddr;
391 // int ResumeIndex;
392 // ... promise (if present) ...
393 // ... spills ...
394 // };
buildFrameType(Function & F,coro::Shape & Shape,SpillInfo & Spills)395 static StructType *buildFrameType(Function &F, coro::Shape &Shape,
396 SpillInfo &Spills) {
397 LLVMContext &C = F.getContext();
398 const DataLayout &DL = F.getParent()->getDataLayout();
399 PaddingCalculator Padder(C, DL);
400 SmallString<32> Name(F.getName());
401 Name.append(".Frame");
402 StructType *FrameTy = StructType::create(C, Name);
403 SmallVector<Type *, 8> Types;
404
405 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
406
407 if (Shape.ABI == coro::ABI::Switch) {
408 auto *FramePtrTy = FrameTy->getPointerTo();
409 auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
410 /*IsVarArg=*/false);
411 auto *FnPtrTy = FnTy->getPointerTo();
412
413 // Figure out how wide should be an integer type storing the suspend index.
414 unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
415 Type *PromiseType = PromiseAlloca
416 ? PromiseAlloca->getType()->getElementType()
417 : Type::getInt1Ty(C);
418 Type *IndexType = Type::getIntNTy(C, IndexBits);
419 Types.push_back(FnPtrTy);
420 Types.push_back(FnPtrTy);
421 Types.push_back(PromiseType);
422 Types.push_back(IndexType);
423 } else {
424 assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
425 }
426
427 Value *CurrentDef = nullptr;
428
429 Padder.addTypes(Types);
430
431 // Create an entry for every spilled value.
432 for (auto &S : Spills) {
433 if (CurrentDef == S.def())
434 continue;
435
436 CurrentDef = S.def();
437 // PromiseAlloca was already added to Types array earlier.
438 if (CurrentDef == PromiseAlloca)
439 continue;
440
441 uint64_t Count = 1;
442 Type *Ty = nullptr;
443 if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
444 Ty = AI->getAllocatedType();
445 if (unsigned AllocaAlignment = AI->getAlignment()) {
446 // If alignment is specified in alloca, see if we need to insert extra
447 // padding.
448 if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) {
449 Types.push_back(PaddingTy);
450 Padder.addType(PaddingTy);
451 }
452 }
453 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
454 Count = CI->getValue().getZExtValue();
455 else
456 report_fatal_error("Coroutines cannot handle non static allocas yet");
457 } else {
458 Ty = CurrentDef->getType();
459 }
460 S.setFieldIndex(Types.size());
461 if (Count == 1)
462 Types.push_back(Ty);
463 else
464 Types.push_back(ArrayType::get(Ty, Count));
465 Padder.addType(Ty);
466 }
467 FrameTy->setBody(Types);
468
469 switch (Shape.ABI) {
470 case coro::ABI::Switch:
471 break;
472
473 // Remember whether the frame is inline in the storage.
474 case coro::ABI::Retcon:
475 case coro::ABI::RetconOnce: {
476 auto &Layout = F.getParent()->getDataLayout();
477 auto Id = Shape.getRetconCoroId();
478 Shape.RetconLowering.IsFrameInlineInStorage
479 = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() &&
480 Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment());
481 break;
482 }
483 }
484
485 return FrameTy;
486 }
487
488 // We use a pointer use visitor to discover if there are any writes into an
489 // alloca that dominates CoroBegin. If that is the case, insertSpills will copy
490 // the value from the alloca into the coroutine frame spill slot corresponding
491 // to that alloca.
492 namespace {
493 struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
494 using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor__anonb46c30f20711::AllocaUseVisitor495 AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
496 const CoroBeginInst &CB)
497 : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {}
498
499 // We are only interested in uses that dominate coro.begin.
visit__anonb46c30f20711::AllocaUseVisitor500 void visit(Instruction &I) {
501 if (DT.dominates(&I, &CoroBegin))
502 Base::visit(I);
503 }
504 // We need to provide this overload as PtrUseVisitor uses a pointer based
505 // visiting function.
visit__anonb46c30f20711::AllocaUseVisitor506 void visit(Instruction *I) { return visit(*I); }
507
visitLoadInst__anonb46c30f20711::AllocaUseVisitor508 void visitLoadInst(LoadInst &) {} // Good. Nothing to do.
509
510 // If the use is an operand, the pointer escaped and anything can write into
511 // that memory. If the use is the pointer, we are definitely writing into the
512 // alloca and therefore we need to copy.
visitStoreInst__anonb46c30f20711::AllocaUseVisitor513 void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); }
514
515 // Any other instruction that is not filtered out by PtrUseVisitor, will
516 // result in the copy.
visitInstruction__anonb46c30f20711::AllocaUseVisitor517 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
518
519 private:
520 const DominatorTree &DT;
521 const CoroBeginInst &CoroBegin;
522 };
523 } // namespace
mightWriteIntoAllocaPtr(AllocaInst & A,const DominatorTree & DT,const CoroBeginInst & CB)524 static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT,
525 const CoroBeginInst &CB) {
526 const DataLayout &DL = A.getModule()->getDataLayout();
527 AllocaUseVisitor Visitor(DL, DT, CB);
528 auto PtrI = Visitor.visitPtr(A);
529 if (PtrI.isEscaped() || PtrI.isAborted()) {
530 auto *PointerEscapingInstr = PtrI.getEscapingInst()
531 ? PtrI.getEscapingInst()
532 : PtrI.getAbortingInst();
533 if (PointerEscapingInstr) {
534 LLVM_DEBUG(
535 dbgs() << "AllocaInst copy was triggered by instruction: "
536 << *PointerEscapingInstr << "\n");
537 }
538 return true;
539 }
540 return false;
541 }
542
543 // We need to make room to insert a spill after initial PHIs, but before
544 // catchswitch instruction. Placing it before violates the requirement that
545 // catchswitch, like all other EHPads must be the first nonPHI in a block.
546 //
547 // Split away catchswitch into a separate block and insert in its place:
548 //
549 // cleanuppad <InsertPt> cleanupret.
550 //
551 // cleanupret instruction will act as an insert point for the spill.
splitBeforeCatchSwitch(CatchSwitchInst * CatchSwitch)552 static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
553 BasicBlock *CurrentBlock = CatchSwitch->getParent();
554 BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
555 CurrentBlock->getTerminator()->eraseFromParent();
556
557 auto *CleanupPad =
558 CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
559 auto *CleanupRet =
560 CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
561 return CleanupRet;
562 }
563
564 // Replace all alloca and SSA values that are accessed across suspend points
565 // with GetElementPointer from coroutine frame + loads and stores. Create an
566 // AllocaSpillBB that will become the new entry block for the resume parts of
567 // the coroutine:
568 //
569 // %hdl = coro.begin(...)
570 // whatever
571 //
572 // becomes:
573 //
574 // %hdl = coro.begin(...)
575 // %FramePtr = bitcast i8* hdl to %f.frame*
576 // br label %AllocaSpillBB
577 //
578 // AllocaSpillBB:
579 // ; geps corresponding to allocas that were moved to coroutine frame
580 // br label PostSpill
581 //
582 // PostSpill:
583 // whatever
584 //
585 //
insertSpills(const SpillInfo & Spills,coro::Shape & Shape)586 static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
587 auto *CB = Shape.CoroBegin;
588 LLVMContext &C = CB->getContext();
589 IRBuilder<> Builder(CB->getNextNode());
590 StructType *FrameTy = Shape.FrameTy;
591 PointerType *FramePtrTy = FrameTy->getPointerTo();
592 auto *FramePtr =
593 cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
594 DominatorTree DT(*CB->getFunction());
595
596 Value *CurrentValue = nullptr;
597 BasicBlock *CurrentBlock = nullptr;
598 Value *CurrentReload = nullptr;
599
600 // Proper field number will be read from field definition.
601 unsigned Index = InvalidFieldIndex;
602
603 // We need to keep track of any allocas that need "spilling"
604 // since they will live in the coroutine frame now, all access to them
605 // need to be changed, not just the access across suspend points
606 // we remember allocas and their indices to be handled once we processed
607 // all the spills.
608 SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
609 // Promise alloca (if present) has a fixed field number.
610 if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
611 assert(Shape.ABI == coro::ABI::Switch);
612 Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise);
613 }
614
615 // Create a GEP with the given index into the coroutine frame for the original
616 // value Orig. Appends an extra 0 index for array-allocas, preserving the
617 // original type.
618 auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
619 SmallVector<Value *, 3> Indices = {
620 ConstantInt::get(Type::getInt32Ty(C), 0),
621 ConstantInt::get(Type::getInt32Ty(C), Index),
622 };
623
624 if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
625 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
626 auto Count = CI->getValue().getZExtValue();
627 if (Count > 1) {
628 Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
629 }
630 } else {
631 report_fatal_error("Coroutines cannot handle non static allocas yet");
632 }
633 }
634
635 return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
636 };
637
638 // Create a load instruction to reload the spilled value from the coroutine
639 // frame.
640 auto CreateReload = [&](Instruction *InsertBefore) {
641 assert(Index != InvalidFieldIndex && "accessing unassigned field number");
642 Builder.SetInsertPoint(InsertBefore);
643
644 auto *G = GetFramePointer(Index, CurrentValue);
645 G->setName(CurrentValue->getName() + Twine(".reload.addr"));
646
647 return isa<AllocaInst>(CurrentValue)
648 ? G
649 : Builder.CreateLoad(FrameTy->getElementType(Index), G,
650 CurrentValue->getName() + Twine(".reload"));
651 };
652
653 for (auto const &E : Spills) {
654 // If we have not seen the value, generate a spill.
655 if (CurrentValue != E.def()) {
656 CurrentValue = E.def();
657 CurrentBlock = nullptr;
658 CurrentReload = nullptr;
659
660 Index = E.fieldIndex();
661
662 if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
663 // Spilled AllocaInst will be replaced with GEP from the coroutine frame
664 // there is no spill required.
665 Allocas.emplace_back(AI, Index);
666 if (!AI->isStaticAlloca())
667 report_fatal_error("Coroutines cannot handle non static allocas yet");
668 } else {
669 // Otherwise, create a store instruction storing the value into the
670 // coroutine frame.
671
672 Instruction *InsertPt = nullptr;
673 if (auto Arg = dyn_cast<Argument>(CurrentValue)) {
674 // For arguments, we will place the store instruction right after
675 // the coroutine frame pointer instruction, i.e. bitcast of
676 // coro.begin from i8* to %f.frame*.
677 InsertPt = FramePtr->getNextNode();
678
679 // If we're spilling an Argument, make sure we clear 'nocapture'
680 // from the coroutine function.
681 Arg->getParent()->removeParamAttr(Arg->getArgNo(),
682 Attribute::NoCapture);
683
684 } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) {
685 // If we are spilling the result of the invoke instruction, split the
686 // normal edge and insert the spill in the new block.
687 auto NewBB = SplitEdge(II->getParent(), II->getNormalDest());
688 InsertPt = NewBB->getTerminator();
689 } else if (isa<PHINode>(CurrentValue)) {
690 // Skip the PHINodes and EH pads instructions.
691 BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent();
692 if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
693 InsertPt = splitBeforeCatchSwitch(CSI);
694 else
695 InsertPt = &*DefBlock->getFirstInsertionPt();
696 } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) {
697 // Don't spill immediately after a suspend; splitting assumes
698 // that the suspend will be followed by a branch.
699 InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
700 } else {
701 auto *I = cast<Instruction>(E.def());
702 assert(!I->isTerminator() && "unexpected terminator");
703 // For all other values, the spill is placed immediately after
704 // the definition.
705 if (DT.dominates(CB, I)) {
706 InsertPt = I->getNextNode();
707 } else {
708 // Unless, it is not dominated by CoroBegin, then it will be
709 // inserted immediately after CoroFrame is computed.
710 InsertPt = FramePtr->getNextNode();
711 }
712 }
713
714 Builder.SetInsertPoint(InsertPt);
715 auto *G = Builder.CreateConstInBoundsGEP2_32(
716 FrameTy, FramePtr, 0, Index,
717 CurrentValue->getName() + Twine(".spill.addr"));
718 Builder.CreateStore(CurrentValue, G);
719 }
720 }
721
722 // If we have not seen the use block, generate a reload in it.
723 if (CurrentBlock != E.userBlock()) {
724 CurrentBlock = E.userBlock();
725 CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt());
726 }
727
728 // If we have a single edge PHINode, remove it and replace it with a reload
729 // from the coroutine frame. (We already took care of multi edge PHINodes
730 // by rewriting them in the rewritePHIs function).
731 if (auto *PN = dyn_cast<PHINode>(E.user())) {
732 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
733 "values in the PHINode");
734 PN->replaceAllUsesWith(CurrentReload);
735 PN->eraseFromParent();
736 continue;
737 }
738
739 // Replace all uses of CurrentValue in the current instruction with reload.
740 E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
741 }
742
743 BasicBlock *FramePtrBB = FramePtr->getParent();
744
745 auto SpillBlock =
746 FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
747 SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
748 Shape.AllocaSpillBlock = SpillBlock;
749 // If we found any allocas, replace all of their remaining uses with Geps.
750 // Note: we cannot do it indiscriminately as some of the uses may not be
751 // dominated by CoroBegin.
752 bool MightNeedToCopy = false;
753 Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
754 SmallVector<Instruction *, 4> UsersToUpdate;
755 for (auto &P : Allocas) {
756 AllocaInst *const A = P.first;
757 UsersToUpdate.clear();
758 for (User *U : A->users()) {
759 auto *I = cast<Instruction>(U);
760 if (DT.dominates(CB, I))
761 UsersToUpdate.push_back(I);
762 else
763 MightNeedToCopy = true;
764 }
765 if (!UsersToUpdate.empty()) {
766 auto *G = GetFramePointer(P.second, A);
767 G->takeName(A);
768 for (Instruction *I : UsersToUpdate)
769 I->replaceUsesOfWith(A, G);
770 }
771 }
772 // If we discovered such uses not dominated by CoroBegin, see if any of them
773 // preceed coro begin and have instructions that can modify the
774 // value of the alloca and therefore would require a copying the value into
775 // the spill slot in the coroutine frame.
776 if (MightNeedToCopy) {
777 Builder.SetInsertPoint(FramePtr->getNextNode());
778
779 for (auto &P : Allocas) {
780 AllocaInst *const A = P.first;
781 if (mightWriteIntoAllocaPtr(*A, DT, *CB)) {
782 if (A->isArrayAllocation())
783 report_fatal_error(
784 "Coroutines cannot handle copying of array allocas yet");
785
786 auto *G = GetFramePointer(P.second, A);
787 auto *Value = Builder.CreateLoad(A);
788 Builder.CreateStore(Value, G);
789 }
790 }
791 }
792 return FramePtr;
793 }
794
795 // Sets the unwind edge of an instruction to a particular successor.
setUnwindEdgeTo(Instruction * TI,BasicBlock * Succ)796 static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
797 if (auto *II = dyn_cast<InvokeInst>(TI))
798 II->setUnwindDest(Succ);
799 else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
800 CS->setUnwindDest(Succ);
801 else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
802 CR->setUnwindDest(Succ);
803 else
804 llvm_unreachable("unexpected terminator instruction");
805 }
806
807 // Replaces all uses of OldPred with the NewPred block in all PHINodes in a
808 // block.
updatePhiNodes(BasicBlock * DestBB,BasicBlock * OldPred,BasicBlock * NewPred,PHINode * LandingPadReplacement)809 static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
810 BasicBlock *NewPred,
811 PHINode *LandingPadReplacement) {
812 unsigned BBIdx = 0;
813 for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
814 PHINode *PN = cast<PHINode>(I);
815
816 // We manually update the LandingPadReplacement PHINode and it is the last
817 // PHI Node. So, if we find it, we are done.
818 if (LandingPadReplacement == PN)
819 break;
820
821 // Reuse the previous value of BBIdx if it lines up. In cases where we
822 // have multiple phi nodes with *lots* of predecessors, this is a speed
823 // win because we don't have to scan the PHI looking for TIBB. This
824 // happens because the BB list of PHI nodes are usually in the same
825 // order.
826 if (PN->getIncomingBlock(BBIdx) != OldPred)
827 BBIdx = PN->getBasicBlockIndex(OldPred);
828
829 assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
830 PN->setIncomingBlock(BBIdx, NewPred);
831 }
832 }
833
834 // Uses SplitEdge unless the successor block is an EHPad, in which case do EH
835 // specific handling.
ehAwareSplitEdge(BasicBlock * BB,BasicBlock * Succ,LandingPadInst * OriginalPad,PHINode * LandingPadReplacement)836 static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
837 LandingPadInst *OriginalPad,
838 PHINode *LandingPadReplacement) {
839 auto *PadInst = Succ->getFirstNonPHI();
840 if (!LandingPadReplacement && !PadInst->isEHPad())
841 return SplitEdge(BB, Succ);
842
843 auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
844 setUnwindEdgeTo(BB->getTerminator(), NewBB);
845 updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
846
847 if (LandingPadReplacement) {
848 auto *NewLP = OriginalPad->clone();
849 auto *Terminator = BranchInst::Create(Succ, NewBB);
850 NewLP->insertBefore(Terminator);
851 LandingPadReplacement->addIncoming(NewLP, NewBB);
852 return NewBB;
853 }
854 Value *ParentPad = nullptr;
855 if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
856 ParentPad = FuncletPad->getParentPad();
857 else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
858 ParentPad = CatchSwitch->getParentPad();
859 else
860 llvm_unreachable("handling for other EHPads not implemented yet");
861
862 auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
863 CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
864 return NewBB;
865 }
866
rewritePHIs(BasicBlock & BB)867 static void rewritePHIs(BasicBlock &BB) {
868 // For every incoming edge we will create a block holding all
869 // incoming values in a single PHI nodes.
870 //
871 // loop:
872 // %n.val = phi i32[%n, %entry], [%inc, %loop]
873 //
874 // It will create:
875 //
876 // loop.from.entry:
877 // %n.loop.pre = phi i32 [%n, %entry]
878 // br %label loop
879 // loop.from.loop:
880 // %inc.loop.pre = phi i32 [%inc, %loop]
881 // br %label loop
882 //
883 // After this rewrite, further analysis will ignore any phi nodes with more
884 // than one incoming edge.
885
886 // TODO: Simplify PHINodes in the basic block to remove duplicate
887 // predecessors.
888
889 LandingPadInst *LandingPad = nullptr;
890 PHINode *ReplPHI = nullptr;
891 if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
892 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
893 // We replace the original landing pad with a PHINode that will collect the
894 // results from all of them.
895 ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
896 ReplPHI->takeName(LandingPad);
897 LandingPad->replaceAllUsesWith(ReplPHI);
898 // We will erase the original landing pad at the end of this function after
899 // ehAwareSplitEdge cloned it in the transition blocks.
900 }
901
902 SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
903 for (BasicBlock *Pred : Preds) {
904 auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
905 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
906 auto *PN = cast<PHINode>(&BB.front());
907 do {
908 int Index = PN->getBasicBlockIndex(IncomingBB);
909 Value *V = PN->getIncomingValue(Index);
910 PHINode *InputV = PHINode::Create(
911 V->getType(), 1, V->getName() + Twine(".") + BB.getName(),
912 &IncomingBB->front());
913 InputV->addIncoming(V, Pred);
914 PN->setIncomingValue(Index, InputV);
915 PN = dyn_cast<PHINode>(PN->getNextNode());
916 } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced
917 // the landing pad.
918 }
919
920 if (LandingPad) {
921 // Calls to ehAwareSplitEdge function cloned the original lading pad.
922 // No longer need it.
923 LandingPad->eraseFromParent();
924 }
925 }
926
rewritePHIs(Function & F)927 static void rewritePHIs(Function &F) {
928 SmallVector<BasicBlock *, 8> WorkList;
929
930 for (BasicBlock &BB : F)
931 if (auto *PN = dyn_cast<PHINode>(&BB.front()))
932 if (PN->getNumIncomingValues() > 1)
933 WorkList.push_back(&BB);
934
935 for (BasicBlock *BB : WorkList)
936 rewritePHIs(*BB);
937 }
938
939 // Check for instructions that we can recreate on resume as opposed to spill
940 // the result into a coroutine frame.
materializable(Instruction & V)941 static bool materializable(Instruction &V) {
942 return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
943 isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
944 }
945
946 // Check for structural coroutine intrinsics that should not be spilled into
947 // the coroutine frame.
isCoroutineStructureIntrinsic(Instruction & I)948 static bool isCoroutineStructureIntrinsic(Instruction &I) {
949 return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
950 isa<CoroSuspendInst>(&I);
951 }
952
953 // For every use of the value that is across suspend point, recreate that value
954 // after a suspend point.
rewriteMaterializableInstructions(IRBuilder<> & IRB,SpillInfo const & Spills)955 static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
956 SpillInfo const &Spills) {
957 BasicBlock *CurrentBlock = nullptr;
958 Instruction *CurrentMaterialization = nullptr;
959 Instruction *CurrentDef = nullptr;
960
961 for (auto const &E : Spills) {
962 // If it is a new definition, update CurrentXXX variables.
963 if (CurrentDef != E.def()) {
964 CurrentDef = cast<Instruction>(E.def());
965 CurrentBlock = nullptr;
966 CurrentMaterialization = nullptr;
967 }
968
969 // If we have not seen this block, materialize the value.
970 if (CurrentBlock != E.userBlock()) {
971 CurrentBlock = E.userBlock();
972 CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
973 CurrentMaterialization->setName(CurrentDef->getName());
974 CurrentMaterialization->insertBefore(
975 &*CurrentBlock->getFirstInsertionPt());
976 }
977
978 if (auto *PN = dyn_cast<PHINode>(E.user())) {
979 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
980 "values in the PHINode");
981 PN->replaceAllUsesWith(CurrentMaterialization);
982 PN->eraseFromParent();
983 continue;
984 }
985
986 // Replace all uses of CurrentDef in the current instruction with the
987 // CurrentMaterialization for the block.
988 E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
989 }
990 }
991
992 // Splits the block at a particular instruction unless it is the first
993 // instruction in the block with a single predecessor.
splitBlockIfNotFirst(Instruction * I,const Twine & Name)994 static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
995 auto *BB = I->getParent();
996 if (&BB->front() == I) {
997 if (BB->getSinglePredecessor()) {
998 BB->setName(Name);
999 return BB;
1000 }
1001 }
1002 return BB->splitBasicBlock(I, Name);
1003 }
1004
1005 // Split above and below a particular instruction so that it
1006 // will be all alone by itself in a block.
splitAround(Instruction * I,const Twine & Name)1007 static void splitAround(Instruction *I, const Twine &Name) {
1008 splitBlockIfNotFirst(I, Name);
1009 splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1010 }
1011
isSuspendBlock(BasicBlock * BB)1012 static bool isSuspendBlock(BasicBlock *BB) {
1013 return isa<AnyCoroSuspendInst>(BB->front());
1014 }
1015
1016 typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
1017
1018 /// Does control flow starting at the given block ever reach a suspend
1019 /// instruction before reaching a block in VisitedOrFreeBBs?
isSuspendReachableFrom(BasicBlock * From,VisitedBlocksSet & VisitedOrFreeBBs)1020 static bool isSuspendReachableFrom(BasicBlock *From,
1021 VisitedBlocksSet &VisitedOrFreeBBs) {
1022 // Eagerly try to add this block to the visited set. If it's already
1023 // there, stop recursing; this path doesn't reach a suspend before
1024 // either looping or reaching a freeing block.
1025 if (!VisitedOrFreeBBs.insert(From).second)
1026 return false;
1027
1028 // We assume that we'll already have split suspends into their own blocks.
1029 if (isSuspendBlock(From))
1030 return true;
1031
1032 // Recurse on the successors.
1033 for (auto Succ : successors(From)) {
1034 if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
1035 return true;
1036 }
1037
1038 return false;
1039 }
1040
1041 /// Is the given alloca "local", i.e. bounded in lifetime to not cross a
1042 /// suspend point?
isLocalAlloca(CoroAllocaAllocInst * AI)1043 static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
1044 // Seed the visited set with all the basic blocks containing a free
1045 // so that we won't pass them up.
1046 VisitedBlocksSet VisitedOrFreeBBs;
1047 for (auto User : AI->users()) {
1048 if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
1049 VisitedOrFreeBBs.insert(FI->getParent());
1050 }
1051
1052 return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
1053 }
1054
1055 /// After we split the coroutine, will the given basic block be along
1056 /// an obvious exit path for the resumption function?
willLeaveFunctionImmediatelyAfter(BasicBlock * BB,unsigned depth=3)1057 static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1058 unsigned depth = 3) {
1059 // If we've bottomed out our depth count, stop searching and assume
1060 // that the path might loop back.
1061 if (depth == 0) return false;
1062
1063 // If this is a suspend block, we're about to exit the resumption function.
1064 if (isSuspendBlock(BB)) return true;
1065
1066 // Recurse into the successors.
1067 for (auto Succ : successors(BB)) {
1068 if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1069 return false;
1070 }
1071
1072 // If none of the successors leads back in a loop, we're on an exit/abort.
1073 return true;
1074 }
1075
localAllocaNeedsStackSave(CoroAllocaAllocInst * AI)1076 static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1077 // Look for a free that isn't sufficiently obviously followed by
1078 // either a suspend or a termination, i.e. something that will leave
1079 // the coro resumption frame.
1080 for (auto U : AI->users()) {
1081 auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1082 if (!FI) continue;
1083
1084 if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1085 return true;
1086 }
1087
1088 // If we never found one, we don't need a stack save.
1089 return false;
1090 }
1091
1092 /// Turn each of the given local allocas into a normal (dynamic) alloca
1093 /// instruction.
lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst * > LocalAllocas,SmallVectorImpl<Instruction * > & DeadInsts)1094 static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1095 SmallVectorImpl<Instruction*> &DeadInsts) {
1096 for (auto AI : LocalAllocas) {
1097 auto M = AI->getModule();
1098 IRBuilder<> Builder(AI);
1099
1100 // Save the stack depth. Try to avoid doing this if the stackrestore
1101 // is going to immediately precede a return or something.
1102 Value *StackSave = nullptr;
1103 if (localAllocaNeedsStackSave(AI))
1104 StackSave = Builder.CreateCall(
1105 Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1106
1107 // Allocate memory.
1108 auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1109 Alloca->setAlignment(MaybeAlign(AI->getAlignment()));
1110
1111 for (auto U : AI->users()) {
1112 // Replace gets with the allocation.
1113 if (isa<CoroAllocaGetInst>(U)) {
1114 U->replaceAllUsesWith(Alloca);
1115
1116 // Replace frees with stackrestores. This is safe because
1117 // alloca.alloc is required to obey a stack discipline, although we
1118 // don't enforce that structurally.
1119 } else {
1120 auto FI = cast<CoroAllocaFreeInst>(U);
1121 if (StackSave) {
1122 Builder.SetInsertPoint(FI);
1123 Builder.CreateCall(
1124 Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1125 StackSave);
1126 }
1127 }
1128 DeadInsts.push_back(cast<Instruction>(U));
1129 }
1130
1131 DeadInsts.push_back(AI);
1132 }
1133 }
1134
1135 /// Turn the given coro.alloca.alloc call into a dynamic allocation.
1136 /// This happens during the all-instructions iteration, so it must not
1137 /// delete the call.
lowerNonLocalAlloca(CoroAllocaAllocInst * AI,coro::Shape & Shape,SmallVectorImpl<Instruction * > & DeadInsts)1138 static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
1139 coro::Shape &Shape,
1140 SmallVectorImpl<Instruction*> &DeadInsts) {
1141 IRBuilder<> Builder(AI);
1142 auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
1143
1144 for (User *U : AI->users()) {
1145 if (isa<CoroAllocaGetInst>(U)) {
1146 U->replaceAllUsesWith(Alloc);
1147 } else {
1148 auto FI = cast<CoroAllocaFreeInst>(U);
1149 Builder.SetInsertPoint(FI);
1150 Shape.emitDealloc(Builder, Alloc, nullptr);
1151 }
1152 DeadInsts.push_back(cast<Instruction>(U));
1153 }
1154
1155 // Push this on last so that it gets deleted after all the others.
1156 DeadInsts.push_back(AI);
1157
1158 // Return the new allocation value so that we can check for needed spills.
1159 return cast<Instruction>(Alloc);
1160 }
1161
1162 /// Get the current swifterror value.
emitGetSwiftErrorValue(IRBuilder<> & Builder,Type * ValueTy,coro::Shape & Shape)1163 static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1164 coro::Shape &Shape) {
1165 // Make a fake function pointer as a sort of intrinsic.
1166 auto FnTy = FunctionType::get(ValueTy, {}, false);
1167 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1168
1169 auto Call = Builder.CreateCall(Fn, {});
1170 Shape.SwiftErrorOps.push_back(Call);
1171
1172 return Call;
1173 }
1174
1175 /// Set the given value as the current swifterror value.
1176 ///
1177 /// Returns a slot that can be used as a swifterror slot.
emitSetSwiftErrorValue(IRBuilder<> & Builder,Value * V,coro::Shape & Shape)1178 static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1179 coro::Shape &Shape) {
1180 // Make a fake function pointer as a sort of intrinsic.
1181 auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
1182 {V->getType()}, false);
1183 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1184
1185 auto Call = Builder.CreateCall(Fn, { V });
1186 Shape.SwiftErrorOps.push_back(Call);
1187
1188 return Call;
1189 }
1190
1191 /// Set the swifterror value from the given alloca before a call,
1192 /// then put in back in the alloca afterwards.
1193 ///
1194 /// Returns an address that will stand in for the swifterror slot
1195 /// until splitting.
emitSetAndGetSwiftErrorValueAround(Instruction * Call,AllocaInst * Alloca,coro::Shape & Shape)1196 static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1197 AllocaInst *Alloca,
1198 coro::Shape &Shape) {
1199 auto ValueTy = Alloca->getAllocatedType();
1200 IRBuilder<> Builder(Call);
1201
1202 // Load the current value from the alloca and set it as the
1203 // swifterror value.
1204 auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1205 auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1206
1207 // Move to after the call. Since swifterror only has a guaranteed
1208 // value on normal exits, we can ignore implicit and explicit unwind
1209 // edges.
1210 if (isa<CallInst>(Call)) {
1211 Builder.SetInsertPoint(Call->getNextNode());
1212 } else {
1213 auto Invoke = cast<InvokeInst>(Call);
1214 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1215 }
1216
1217 // Get the current swifterror value and store it to the alloca.
1218 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1219 Builder.CreateStore(ValueAfterCall, Alloca);
1220
1221 return Addr;
1222 }
1223
1224 /// Eliminate a formerly-swifterror alloca by inserting the get/set
1225 /// intrinsics and attempting to MemToReg the alloca away.
eliminateSwiftErrorAlloca(Function & F,AllocaInst * Alloca,coro::Shape & Shape)1226 static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1227 coro::Shape &Shape) {
1228 for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
1229 // We're likely changing the use list, so use a mutation-safe
1230 // iteration pattern.
1231 auto &Use = *UI;
1232 ++UI;
1233
1234 // swifterror values can only be used in very specific ways.
1235 // We take advantage of that here.
1236 auto User = Use.getUser();
1237 if (isa<LoadInst>(User) || isa<StoreInst>(User))
1238 continue;
1239
1240 assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1241 auto Call = cast<Instruction>(User);
1242
1243 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1244
1245 // Use the returned slot address as the call argument.
1246 Use.set(Addr);
1247 }
1248
1249 // All the uses should be loads and stores now.
1250 assert(isAllocaPromotable(Alloca));
1251 }
1252
1253 /// "Eliminate" a swifterror argument by reducing it to the alloca case
1254 /// and then loading and storing in the prologue and epilog.
1255 ///
1256 /// The argument keeps the swifterror flag.
eliminateSwiftErrorArgument(Function & F,Argument & Arg,coro::Shape & Shape,SmallVectorImpl<AllocaInst * > & AllocasToPromote)1257 static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1258 coro::Shape &Shape,
1259 SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1260 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1261
1262 auto ArgTy = cast<PointerType>(Arg.getType());
1263 auto ValueTy = ArgTy->getElementType();
1264
1265 // Reduce to the alloca case:
1266
1267 // Create an alloca and replace all uses of the arg with it.
1268 auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1269 Arg.replaceAllUsesWith(Alloca);
1270
1271 // Set an initial value in the alloca. swifterror is always null on entry.
1272 auto InitialValue = Constant::getNullValue(ValueTy);
1273 Builder.CreateStore(InitialValue, Alloca);
1274
1275 // Find all the suspends in the function and save and restore around them.
1276 for (auto Suspend : Shape.CoroSuspends) {
1277 (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1278 }
1279
1280 // Find all the coro.ends in the function and restore the error value.
1281 for (auto End : Shape.CoroEnds) {
1282 Builder.SetInsertPoint(End);
1283 auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1284 (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1285 }
1286
1287 // Now we can use the alloca logic.
1288 AllocasToPromote.push_back(Alloca);
1289 eliminateSwiftErrorAlloca(F, Alloca, Shape);
1290 }
1291
1292 /// Eliminate all problematic uses of swifterror arguments and allocas
1293 /// from the function. We'll fix them up later when splitting the function.
eliminateSwiftError(Function & F,coro::Shape & Shape)1294 static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1295 SmallVector<AllocaInst*, 4> AllocasToPromote;
1296
1297 // Look for a swifterror argument.
1298 for (auto &Arg : F.args()) {
1299 if (!Arg.hasSwiftErrorAttr()) continue;
1300
1301 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1302 break;
1303 }
1304
1305 // Look for swifterror allocas.
1306 for (auto &Inst : F.getEntryBlock()) {
1307 auto Alloca = dyn_cast<AllocaInst>(&Inst);
1308 if (!Alloca || !Alloca->isSwiftError()) continue;
1309
1310 // Clear the swifterror flag.
1311 Alloca->setSwiftError(false);
1312
1313 AllocasToPromote.push_back(Alloca);
1314 eliminateSwiftErrorAlloca(F, Alloca, Shape);
1315 }
1316
1317 // If we have any allocas to promote, compute a dominator tree and
1318 // promote them en masse.
1319 if (!AllocasToPromote.empty()) {
1320 DominatorTree DT(F);
1321 PromoteMemToReg(AllocasToPromote, DT);
1322 }
1323 }
1324
buildCoroutineFrame(Function & F,Shape & Shape)1325 void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
1326 // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
1327 // access to local variables.
1328 LowerDbgDeclare(F);
1329
1330 eliminateSwiftError(F, Shape);
1331
1332 if (Shape.ABI == coro::ABI::Switch &&
1333 Shape.SwitchLowering.PromiseAlloca) {
1334 Shape.getSwitchCoroId()->clearPromise();
1335 }
1336
1337 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
1338 // intrinsics are in their own blocks to simplify the logic of building up
1339 // SuspendCrossing data.
1340 for (auto *CSI : Shape.CoroSuspends) {
1341 if (auto *Save = CSI->getCoroSave())
1342 splitAround(Save, "CoroSave");
1343 splitAround(CSI, "CoroSuspend");
1344 }
1345
1346 // Put CoroEnds into their own blocks.
1347 for (CoroEndInst *CE : Shape.CoroEnds)
1348 splitAround(CE, "CoroEnd");
1349
1350 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
1351 // never has its definition separated from the PHI by the suspend point.
1352 rewritePHIs(F);
1353
1354 // Build suspend crossing info.
1355 SuspendCrossingInfo Checker(F, Shape);
1356
1357 IRBuilder<> Builder(F.getContext());
1358 SpillInfo Spills;
1359 SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
1360 SmallVector<Instruction*, 4> DeadInstructions;
1361
1362 for (int Repeat = 0; Repeat < 4; ++Repeat) {
1363 // See if there are materializable instructions across suspend points.
1364 for (Instruction &I : instructions(F))
1365 if (materializable(I))
1366 for (User *U : I.users())
1367 if (Checker.isDefinitionAcrossSuspend(I, U))
1368 Spills.emplace_back(&I, U);
1369
1370 if (Spills.empty())
1371 break;
1372
1373 // Rewrite materializable instructions to be materialized at the use point.
1374 LLVM_DEBUG(dump("Materializations", Spills));
1375 rewriteMaterializableInstructions(Builder, Spills);
1376 Spills.clear();
1377 }
1378
1379 // Collect the spills for arguments and other not-materializable values.
1380 for (Argument &A : F.args())
1381 for (User *U : A.users())
1382 if (Checker.isDefinitionAcrossSuspend(A, U))
1383 Spills.emplace_back(&A, U);
1384
1385 for (Instruction &I : instructions(F)) {
1386 // Values returned from coroutine structure intrinsics should not be part
1387 // of the Coroutine Frame.
1388 if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
1389 continue;
1390
1391 // The Coroutine Promise always included into coroutine frame, no need to
1392 // check for suspend crossing.
1393 if (Shape.ABI == coro::ABI::Switch &&
1394 Shape.SwitchLowering.PromiseAlloca == &I)
1395 continue;
1396
1397 // Handle alloca.alloc specially here.
1398 if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
1399 // Check whether the alloca's lifetime is bounded by suspend points.
1400 if (isLocalAlloca(AI)) {
1401 LocalAllocas.push_back(AI);
1402 continue;
1403 }
1404
1405 // If not, do a quick rewrite of the alloca and then add spills of
1406 // the rewritten value. The rewrite doesn't invalidate anything in
1407 // Spills because the other alloca intrinsics have no other operands
1408 // besides AI, and it doesn't invalidate the iteration because we delay
1409 // erasing AI.
1410 auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
1411
1412 for (User *U : Alloc->users()) {
1413 if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
1414 Spills.emplace_back(Alloc, U);
1415 }
1416 continue;
1417 }
1418
1419 // Ignore alloca.get; we process this as part of coro.alloca.alloc.
1420 if (isa<CoroAllocaGetInst>(I)) {
1421 continue;
1422 }
1423
1424 for (User *U : I.users())
1425 if (Checker.isDefinitionAcrossSuspend(I, U)) {
1426 // We cannot spill a token.
1427 if (I.getType()->isTokenTy())
1428 report_fatal_error(
1429 "token definition is separated from the use by a suspend point");
1430 Spills.emplace_back(&I, U);
1431 }
1432 }
1433 LLVM_DEBUG(dump("Spills", Spills));
1434 Shape.FrameTy = buildFrameType(F, Shape, Spills);
1435 Shape.FramePtr = insertSpills(Spills, Shape);
1436 lowerLocalAllocas(LocalAllocas, DeadInstructions);
1437
1438 for (auto I : DeadInstructions)
1439 I->eraseFromParent();
1440 }
1441