1 //===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the BlockGenerator and VectorBlockGenerator classes, which 10 // generate sequential code and vectorized code for a polyhedral statement, 11 // respectively. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef POLLY_BLOCK_GENERATORS_H 16 #define POLLY_BLOCK_GENERATORS_H 17 18 #include "polly/CodeGen/IRBuilder.h" 19 #include "polly/Support/ScopHelper.h" 20 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 21 #include "isl/isl-noexceptions.h" 22 23 namespace polly { 24 using namespace llvm; 25 class MemoryAccess; 26 class ScopArrayInfo; 27 class IslExprBuilder; 28 29 /// Generate a new basic block for a polyhedral statement. 30 class BlockGenerator { 31 public: 32 typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT; 33 34 /// Map types to resolve scalar dependences. 35 /// 36 ///@{ 37 using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>; 38 39 /// Simple vector of instructions to store escape users. 40 using EscapeUserVectorTy = SmallVector<Instruction *, 4>; 41 42 /// Map type to resolve escaping users for scalar instructions. 43 /// 44 /// @see The EscapeMap member. 45 using EscapeUsersAllocaMapTy = 46 MapVector<Instruction *, 47 std::pair<AssertingVH<Value>, EscapeUserVectorTy>>; 48 49 ///@} 50 51 /// Create a generator for basic blocks. 52 /// 53 /// @param Builder The LLVM-IR Builder used to generate the statement. The 54 /// code is generated at the location, the Builder points 55 /// to. 56 /// @param LI The loop info for the current function 57 /// @param SE The scalar evolution info for the current function 58 /// @param DT The dominator tree of this function. 59 /// @param ScalarMap Map from scalars to their demoted location. 60 /// @param EscapeMap Map from scalars to their escape users and locations. 61 /// @param GlobalMap A mapping from llvm::Values used in the original scop 62 /// region to a new set of llvm::Values. Each reference to 63 /// an original value appearing in this mapping is replaced 64 /// with the new value it is mapped to. 65 /// @param ExprBuilder An expression builder to generate new access functions. 66 /// @param StartBlock The first basic block after the RTC. 67 BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE, 68 DominatorTree &DT, AllocaMapTy &ScalarMap, 69 EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, 70 IslExprBuilder *ExprBuilder, BasicBlock *StartBlock); 71 72 /// Copy the basic block. 73 /// 74 /// This copies the entire basic block and updates references to old values 75 /// with references to new values, as defined by GlobalMap. 76 /// 77 /// @param Stmt The block statement to code generate. 78 /// @param LTS A map from old loops to new induction variables as 79 /// SCEVs. 80 /// @param NewAccesses A map from memory access ids to new ast expressions, 81 /// which may contain new access expressions for certain 82 /// memory accesses. 83 void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 84 isl_id_to_ast_expr *NewAccesses); 85 86 /// Remove a ScopArrayInfo's allocation from the ScalarMap. 87 /// 88 /// This function allows to remove values from the ScalarMap. This is useful 89 /// if the corresponding alloca instruction will be deleted (or moved into 90 /// another module), as without removing these values the underlying 91 /// AssertingVH will trigger due to us still keeping reference to this 92 /// scalar. 93 /// 94 /// @param Array The array for which the alloca was generated. freeScalarAlloc(ScopArrayInfo * Array)95 void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); } 96 97 /// Return the alloca for @p Access. 98 /// 99 /// If no alloca was mapped for @p Access a new one is created. 100 /// 101 /// @param Access The memory access for which to generate the alloca. 102 /// 103 /// @returns The alloca for @p Access or a replacement value taken from 104 /// GlobalMap. 105 Value *getOrCreateAlloca(const MemoryAccess &Access); 106 107 /// Return the alloca for @p Array. 108 /// 109 /// If no alloca was mapped for @p Array a new one is created. 110 /// 111 /// @param Array The array for which to generate the alloca. 112 /// 113 /// @returns The alloca for @p Array or a replacement value taken from 114 /// GlobalMap. 115 Value *getOrCreateAlloca(const ScopArrayInfo *Array); 116 117 /// Finalize the code generation for the SCoP @p S. 118 /// 119 /// This will initialize and finalize the scalar variables we demoted during 120 /// the code generation. 121 /// 122 /// @see createScalarInitialization(Scop &) 123 /// @see createScalarFinalization(Region &) 124 void finalizeSCoP(Scop &S); 125 126 /// An empty destructor ~BlockGenerator()127 virtual ~BlockGenerator() {} 128 129 BlockGenerator(const BlockGenerator &) = default; 130 131 protected: 132 PollyIRBuilder &Builder; 133 LoopInfo &LI; 134 ScalarEvolution &SE; 135 IslExprBuilder *ExprBuilder; 136 137 /// The dominator tree of this function. 138 DominatorTree &DT; 139 140 /// The entry block of the current function. 141 BasicBlock *EntryBB; 142 143 /// Map to resolve scalar dependences for PHI operands and scalars. 144 /// 145 /// When translating code that contains scalar dependences as they result from 146 /// inter-block scalar dependences (including the use of data carrying PHI 147 /// nodes), we do not directly regenerate in-register SSA code, but instead 148 /// allocate some stack memory through which these scalar values are passed. 149 /// Only a later pass of -mem2reg will then (re)introduce in-register 150 /// computations. 151 /// 152 /// To keep track of the memory location(s) used to store the data computed by 153 /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a 154 /// given ScopArrayInfo to the junk of stack allocated memory, that is 155 /// used for code generation. 156 /// 157 /// Up to two different ScopArrayInfo objects are associated with each 158 /// llvm::Value: 159 /// 160 /// MemoryType::Value objects are used for normal scalar dependences that go 161 /// from a scalar definition to its use. Such dependences are lowered by 162 /// directly writing the value an instruction computes into the corresponding 163 /// chunk of memory and reading it back from this chunk of memory right before 164 /// every use of this original scalar value. The memory allocations for 165 /// MemoryType::Value objects end with '.s2a'. 166 /// 167 /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI 168 /// nodes. For each PHI nodes we introduce, besides the Array of type 169 /// MemoryType::Value, a second chunk of memory into which we write at the end 170 /// of each basic block preceding the PHI instruction the value passed 171 /// through this basic block. At the place where the PHI node is executed, we 172 /// replace the PHI node with a load from the corresponding MemoryType::PHI 173 /// memory location. The memory allocations for MemoryType::PHI end with 174 /// '.phiops'. 175 /// 176 /// Example: 177 /// 178 /// Input C Code 179 /// ============ 180 /// 181 /// S1: x1 = ... 182 /// for (i=0...N) { 183 /// S2: x2 = phi(x1, add) 184 /// S3: add = x2 + 42; 185 /// } 186 /// S4: print(x1) 187 /// print(x2) 188 /// print(add) 189 /// 190 /// 191 /// Unmodified IR IR After expansion 192 /// ============= ================== 193 /// 194 /// S1: x1 = ... S1: x1 = ... 195 /// x1.s2a = s1 196 /// x2.phiops = s1 197 /// | | 198 /// | <--<--<--<--< | <--<--<--<--< 199 /// | / \ | / \ . 200 /// V V \ V V \ . 201 /// S2: x2 = phi (x1, add) | S2: x2 = x2.phiops | 202 /// | x2.s2a = x2 | 203 /// | | 204 /// S3: add = x2 + 42 | S3: add = x2 + 42 | 205 /// | add.s2a = add | 206 /// | x2.phiops = add | 207 /// | \ / | \ / 208 /// | \ / | \ / 209 /// | >-->-->-->--> | >-->-->-->--> 210 /// V V 211 /// 212 /// S4: x1 = x1.s2a 213 /// S4: ... = x1 ... = x1 214 /// x2 = x2.s2a 215 /// ... = x2 ... = x2 216 /// add = add.s2a 217 /// ... = add ... = add 218 /// 219 /// ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a, 220 /// add:Value -> add.s2a, x2:PHI -> x2.phiops } 221 /// 222 /// ??? Why does a PHI-node require two memory chunks ??? 223 /// 224 /// One may wonder why a PHI node requires two memory chunks and not just 225 /// all data is stored in a single location. The following example tries 226 /// to store all data in .s2a and drops the .phiops location: 227 /// 228 /// S1: x1 = ... 229 /// x1.s2a = s1 230 /// x2.s2a = s1 // use .s2a instead of .phiops 231 /// | 232 /// | <--<--<--<--< 233 /// | / \ . 234 /// V V \ . 235 /// S2: x2 = x2.s2a | // value is same as above, but read 236 /// | // from .s2a 237 /// | 238 /// x2.s2a = x2 | // store into .s2a as normal 239 /// | 240 /// S3: add = x2 + 42 | 241 /// add.s2a = add | 242 /// x2.s2a = add | // use s2a instead of .phiops 243 /// | \ / // !!! This is wrong, as x2.s2a now 244 /// | >-->-->-->--> // contains add instead of x2. 245 /// V 246 /// 247 /// S4: x1 = x1.s2a 248 /// ... = x1 249 /// x2 = x2.s2a // !!! We now read 'add' instead of 250 /// ... = x2 // 'x2' 251 /// add = add.s2a 252 /// ... = add 253 /// 254 /// As visible in the example, the SSA value of the PHI node may still be 255 /// needed _after_ the basic block, which could conceptually branch to the 256 /// PHI node, has been run and has overwritten the PHI's old value. Hence, a 257 /// single memory location is not enough to code-generate a PHI node. 258 /// 259 /// Memory locations used for the special PHI node modeling. 260 AllocaMapTy &ScalarMap; 261 262 /// Map from instructions to their escape users as well as the alloca. 263 EscapeUsersAllocaMapTy &EscapeMap; 264 265 /// A map from llvm::Values referenced in the old code to a new set of 266 /// llvm::Values, which is used to replace these old values during 267 /// code generation. 268 ValueMapT &GlobalMap; 269 270 /// The first basic block after the RTC. 271 BasicBlock *StartBlock; 272 273 /// Split @p BB to create a new one we can use to clone @p BB in. 274 BasicBlock *splitBB(BasicBlock *BB); 275 276 /// Copy the given basic block. 277 /// 278 /// @param Stmt The statement to code generate. 279 /// @param BB The basic block to code generate. 280 /// @param BBMap A mapping from old values to their new values in this 281 /// block. 282 /// @param LTS A map from old loops to new induction variables as 283 /// SCEVs. 284 /// @param NewAccesses A map from memory access ids to new ast expressions, 285 /// which may contain new access expressions for certain 286 /// memory accesses. 287 /// 288 /// @returns The copy of the basic block. 289 BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, 290 LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); 291 292 /// Copy the given basic block. 293 /// 294 /// @param Stmt The statement to code generate. 295 /// @param BB The basic block to code generate. 296 /// @param BBCopy The new basic block to generate code in. 297 /// @param BBMap A mapping from old values to their new values in this 298 /// block. 299 /// @param LTS A map from old loops to new induction variables as 300 /// SCEVs. 301 /// @param NewAccesses A map from memory access ids to new ast expressions, 302 /// which may contain new access expressions for certain 303 /// memory accesses. 304 void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy, 305 ValueMapT &BBMap, LoopToScevMapT <S, 306 isl_id_to_ast_expr *NewAccesses); 307 308 /// Generate reload of scalars demoted to memory and needed by @p Stmt. 309 /// 310 /// @param Stmt The statement we generate code for. 311 /// @param LTS A mapping from loops virtual canonical induction 312 /// variable to their new values. 313 /// @param BBMap A mapping from old values to their new values in this block. 314 /// @param NewAccesses A map from memory access ids to new ast expressions. 315 void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT <S, 316 ValueMapT &BBMap, 317 __isl_keep isl_id_to_ast_expr *NewAccesses); 318 319 /// When statement tracing is enabled, build the print instructions for 320 /// printing the current statement instance. 321 /// 322 /// The printed output looks like: 323 /// 324 /// Stmt1(0) 325 /// 326 /// If printing of scalars is enabled, it also appends the value of each 327 /// scalar to the line: 328 /// 329 /// Stmt1(0) %i=1 %sum=5 330 /// 331 /// @param Stmt The statement we generate code for. 332 /// @param LTS A mapping from loops virtual canonical induction 333 /// variable to their new values. 334 /// @param BBMap A mapping from old values to their new values in this block. 335 void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S, 336 ValueMapT &BBMap); 337 338 /// Generate instructions that compute whether one instance of @p Set is 339 /// executed. 340 /// 341 /// @param Stmt The statement we generate code for. 342 /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in 343 /// @p Stmt's domain are ignored. 344 /// 345 /// @return An expression of type i1, generated into the current builder 346 /// position, that evaluates to 1 if the executed instance is part of 347 /// @p Set. 348 Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain); 349 350 /// Generate code that executes in a subset of @p Stmt's domain. 351 /// 352 /// @param Stmt The statement we generate code for. 353 /// @param Subdomain The condition for some code to be executed. 354 /// @param Subject A name for the code that is executed 355 /// conditionally. Used to name new basic blocks and 356 /// instructions. 357 /// @param GenThenFunc Callback which generates the code to be executed 358 /// when the current executed instance is in @p Set. The 359 /// IRBuilder's position is moved to within the block that 360 /// executes conditionally for this callback. 361 void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain, 362 StringRef Subject, 363 const std::function<void()> &GenThenFunc); 364 365 /// Generate the scalar stores for the given statement. 366 /// 367 /// After the statement @p Stmt was copied all inner-SCoP scalar dependences 368 /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to 369 /// be demoted to memory. 370 /// 371 /// @param Stmt The statement we generate code for. 372 /// @param LTS A mapping from loops virtual canonical induction 373 /// variable to their new values 374 /// (for values recalculated in the new ScoP, but not 375 /// within this basic block) 376 /// @param BBMap A mapping from old values to their new values in this block. 377 /// @param NewAccesses A map from memory access ids to new ast expressions. 378 virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, 379 ValueMapT &BBMap, 380 __isl_keep isl_id_to_ast_expr *NewAccesses); 381 382 /// Handle users of @p Array outside the SCoP. 383 /// 384 /// @param S The current SCoP. 385 /// @param Inst The ScopArrayInfo to handle. 386 void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array); 387 388 /// Find scalar statements that have outside users. 389 /// 390 /// We register these scalar values to later update subsequent scalar uses of 391 /// these values to either use the newly computed value from within the scop 392 /// (if the scop was executed) or the unchanged original code (if the run-time 393 /// check failed). 394 /// 395 /// @param S The scop for which to find the outside users. 396 void findOutsideUsers(Scop &S); 397 398 /// Initialize the memory of demoted scalars. 399 /// 400 /// @param S The scop for which to generate the scalar initializers. 401 void createScalarInitialization(Scop &S); 402 403 /// Create exit PHI node merges for PHI nodes with more than two edges 404 /// from inside the scop. 405 /// 406 /// For scops which have a PHI node in the exit block that has more than two 407 /// incoming edges from inside the scop region, we require some special 408 /// handling to understand which of the possible values will be passed to the 409 /// PHI node from inside the optimized version of the scop. To do so ScopInfo 410 /// models the possible incoming values as write accesses of the ScopStmts. 411 /// 412 /// This function creates corresponding code to reload the computed outgoing 413 /// value from the stack slot it has been stored into and to pass it on to the 414 /// PHI node in the original exit block. 415 /// 416 /// @param S The scop for which to generate the exiting PHI nodes. 417 void createExitPHINodeMerges(Scop &S); 418 419 /// Promote the values of demoted scalars after the SCoP. 420 /// 421 /// If a scalar value was used outside the SCoP we need to promote the value 422 /// stored in the memory cell allocated for that scalar and combine it with 423 /// the original value in the non-optimized SCoP. 424 void createScalarFinalization(Scop &S); 425 426 /// Try to synthesize a new value 427 /// 428 /// Given an old value, we try to synthesize it in a new context from its 429 /// original SCEV expression. We start from the original SCEV expression, 430 /// then replace outdated parameter and loop references, and finally 431 /// expand it to code that computes this updated expression. 432 /// 433 /// @param Stmt The statement to code generate 434 /// @param Old The old Value 435 /// @param BBMap A mapping from old values to their new values 436 /// (for values recalculated within this basic block) 437 /// @param LTS A mapping from loops virtual canonical induction 438 /// variable to their new values 439 /// (for values recalculated in the new ScoP, but not 440 /// within this basic block) 441 /// @param L The loop that surrounded the instruction that referenced 442 /// this value in the original code. This loop is used to 443 /// evaluate the scalar evolution at the right scope. 444 /// 445 /// @returns o A newly synthesized value. 446 /// o NULL, if synthesizing the value failed. 447 Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, 448 LoopToScevMapT <S, Loop *L) const; 449 450 /// Get the new version of a value. 451 /// 452 /// Given an old value, we first check if a new version of this value is 453 /// available in the BBMap or GlobalMap. In case it is not and the value can 454 /// be recomputed using SCEV, we do so. If we can not recompute a value 455 /// using SCEV, but we understand that the value is constant within the scop, 456 /// we return the old value. If the value can still not be derived, this 457 /// function will assert. 458 /// 459 /// @param Stmt The statement to code generate. 460 /// @param Old The old Value. 461 /// @param BBMap A mapping from old values to their new values 462 /// (for values recalculated within this basic block). 463 /// @param LTS A mapping from loops virtual canonical induction 464 /// variable to their new values 465 /// (for values recalculated in the new ScoP, but not 466 /// within this basic block). 467 /// @param L The loop that surrounded the instruction that referenced 468 /// this value in the original code. This loop is used to 469 /// evaluate the scalar evolution at the right scope. 470 /// 471 /// @returns o The old value, if it is still valid. 472 /// o The new value, if available. 473 /// o NULL, if no value is found. 474 Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, 475 LoopToScevMapT <S, Loop *L) const; 476 477 void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, 478 LoopToScevMapT <S); 479 480 /// Get the innermost loop that surrounds the statement @p Stmt. 481 Loop *getLoopForStmt(const ScopStmt &Stmt) const; 482 483 /// Generate the operand address 484 /// @param NewAccesses A map from memory access ids to new ast expressions, 485 /// which may contain new access expressions for certain 486 /// memory accesses. 487 Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst, 488 ValueMapT &BBMap, LoopToScevMapT <S, 489 isl_id_to_ast_expr *NewAccesses); 490 491 /// Generate the operand address. 492 /// 493 /// @param Stmt The statement to generate code for. 494 /// @param L The innermost loop that surrounds the statement. 495 /// @param Pointer If the access expression is not changed (ie. not found 496 /// in @p LTS), use this Pointer from the original code 497 /// instead. 498 /// @param BBMap A mapping from old values to their new values. 499 /// @param LTS A mapping from loops virtual canonical induction 500 /// variable to their new values. 501 /// @param NewAccesses Ahead-of-time generated access expressions. 502 /// @param Id Identifier of the MemoryAccess to generate. 503 /// @param ExpectedType The type the returned value should have. 504 /// 505 /// @return The generated address. 506 Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer, 507 ValueMapT &BBMap, LoopToScevMapT <S, 508 isl_id_to_ast_expr *NewAccesses, 509 __isl_take isl_id *Id, Type *ExpectedType); 510 511 /// Generate the pointer value that is accesses by @p Access. 512 /// 513 /// For write accesses, generate the target address. For read accesses, 514 /// generate the source address. 515 /// The access can be either an array access or a scalar access. In the first 516 /// case, the returned address will point to an element into that array. In 517 /// the scalar case, an alloca is used. 518 /// If a new AccessRelation is set for the MemoryAccess, the new relation will 519 /// be used. 520 /// 521 /// @param Access The access to generate a pointer for. 522 /// @param L The innermost loop that surrounds the statement. 523 /// @param LTS A mapping from loops virtual canonical induction 524 /// variable to their new values. 525 /// @param BBMap A mapping from old values to their new values. 526 /// @param NewAccesses A map from memory access ids to new ast expressions. 527 /// 528 /// @return The generated address. 529 Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT <S, 530 ValueMapT &BBMap, 531 __isl_keep isl_id_to_ast_expr *NewAccesses); 532 533 /// @param NewAccesses A map from memory access ids to new ast expressions, 534 /// which may contain new access expressions for certain 535 /// memory accesses. 536 Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap, 537 LoopToScevMapT <S, 538 isl_id_to_ast_expr *NewAccesses); 539 540 /// @param NewAccesses A map from memory access ids to new ast expressions, 541 /// which may contain new access expressions for certain 542 /// memory accesses. 543 void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap, 544 LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); 545 546 /// Copy a single PHI instruction. 547 /// 548 /// The implementation in the BlockGenerator is trivial, however it allows 549 /// subclasses to handle PHIs different. copyPHIInstruction(ScopStmt &,PHINode *,ValueMapT &,LoopToScevMapT &)550 virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &, 551 LoopToScevMapT &) {} 552 553 /// Copy a single Instruction. 554 /// 555 /// This copies a single Instruction and updates references to old values 556 /// with references to new values, as defined by GlobalMap and BBMap. 557 /// 558 /// @param Stmt The statement to code generate. 559 /// @param Inst The instruction to copy. 560 /// @param BBMap A mapping from old values to their new values 561 /// (for values recalculated within this basic block). 562 /// @param GlobalMap A mapping from old values to their new values 563 /// (for values recalculated in the new ScoP, but not 564 /// within this basic block). 565 /// @param LTS A mapping from loops virtual canonical induction 566 /// variable to their new values 567 /// (for values recalculated in the new ScoP, but not 568 /// within this basic block). 569 /// @param NewAccesses A map from memory access ids to new ast expressions, 570 /// which may contain new access expressions for certain 571 /// memory accesses. 572 void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, 573 LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); 574 575 /// Helper to determine if @p Inst can be synthesized in @p Stmt. 576 /// 577 /// @returns false, iff @p Inst can be synthesized in @p Stmt. 578 bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst); 579 580 /// Remove dead instructions generated for BB 581 /// 582 /// @param BB The basic block code for which code has been generated. 583 /// @param BBMap A local map from old to new instructions. 584 void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap); 585 586 /// Invalidate the scalar evolution expressions for a scop. 587 /// 588 /// This function invalidates the scalar evolution results for all 589 /// instructions that are part of a given scop, and the loops 590 /// surrounding the users of merge blocks. This is necessary to ensure that 591 /// later scops do not obtain scalar evolution expressions that reference 592 /// values that earlier dominated the later scop, but have been moved in the 593 /// conditional part of an earlier scop and consequently do not any more 594 /// dominate the later scop. 595 /// 596 /// @param S The scop to invalidate. 597 void invalidateScalarEvolution(Scop &S); 598 }; 599 600 /// Generate a new vector basic block for a polyhedral statement. 601 /// 602 /// The only public function exposed is generate(). 603 class VectorBlockGenerator : BlockGenerator { 604 public: 605 /// Generate a new vector basic block for a ScoPStmt. 606 /// 607 /// This code generation is similar to the normal, scalar code generation, 608 /// except that each instruction is code generated for several vector lanes 609 /// at a time. If possible instructions are issued as actual vector 610 /// instructions, but e.g. for address calculation instructions we currently 611 /// generate scalar instructions for each vector lane. 612 /// 613 /// @param BlockGen A block generator object used as parent. 614 /// @param Stmt The statement to code generate. 615 /// @param VLTS A mapping from loops virtual canonical induction 616 /// variable to their new values 617 /// (for values recalculated in the new ScoP, but not 618 /// within this basic block), one for each lane. 619 /// @param Schedule A map from the statement to a schedule where the 620 /// innermost dimension is the dimension of the innermost 621 /// loop containing the statement. 622 /// @param NewAccesses A map from memory access ids to new ast expressions, 623 /// which may contain new access expressions for certain 624 /// memory accesses. generate(BlockGenerator & BlockGen,ScopStmt & Stmt,std::vector<LoopToScevMapT> & VLTS,__isl_keep isl_map * Schedule,__isl_keep isl_id_to_ast_expr * NewAccesses)625 static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt, 626 std::vector<LoopToScevMapT> &VLTS, 627 __isl_keep isl_map *Schedule, 628 __isl_keep isl_id_to_ast_expr *NewAccesses) { 629 VectorBlockGenerator Generator(BlockGen, VLTS, Schedule); 630 Generator.copyStmt(Stmt, NewAccesses); 631 } 632 633 private: 634 // This is a vector of loop->scev maps. The first map is used for the first 635 // vector lane, ... 636 // Each map, contains information about Instructions in the old ScoP, which 637 // are recalculated in the new SCoP. When copying the basic block, we replace 638 // all references to the old instructions with their recalculated values. 639 // 640 // For example, when the code generator produces this AST: 641 // 642 // for (int c1 = 0; c1 <= 1023; c1 += 1) 643 // for (int c2 = 0; c2 <= 1023; c2 += VF) 644 // for (int lane = 0; lane <= VF; lane += 1) 645 // Stmt(c2 + lane + 3, c1); 646 // 647 // VLTS[lane] contains a map: 648 // "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"), 649 // "inner loop in the old loop nest" -> SCEV("c1"). 650 std::vector<LoopToScevMapT> &VLTS; 651 652 // A map from the statement to a schedule where the innermost dimension is the 653 // dimension of the innermost loop containing the statement. 654 isl_map *Schedule; 655 656 VectorBlockGenerator(BlockGenerator &BlockGen, 657 std::vector<LoopToScevMapT> &VLTS, 658 __isl_keep isl_map *Schedule); 659 660 int getVectorWidth(); 661 662 Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap, 663 VectorValueMapT &ScalarMaps, Loop *L); 664 665 Type *getVectorPtrTy(const Value *V, int Width); 666 667 /// Load a vector from a set of adjacent scalars 668 /// 669 /// In case a set of scalars is known to be next to each other in memory, 670 /// create a vector load that loads those scalars 671 /// 672 /// %vector_ptr= bitcast double* %p to <4 x double>* 673 /// %vec_full = load <4 x double>* %vector_ptr 674 /// 675 /// @param Stmt The statement to code generate. 676 /// @param NegativeStride This is used to indicate a -1 stride. In such 677 /// a case we load the end of a base address and 678 /// shuffle the accesses in reverse order into the 679 /// vector. By default we would do only positive 680 /// strides. 681 /// 682 /// @param NewAccesses A map from memory access ids to new ast 683 /// expressions, which may contain new access 684 /// expressions for certain memory accesses. 685 Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load, 686 VectorValueMapT &ScalarMaps, 687 __isl_keep isl_id_to_ast_expr *NewAccesses, 688 bool NegativeStride); 689 690 /// Load a vector initialized from a single scalar in memory 691 /// 692 /// In case all elements of a vector are initialized to the same 693 /// scalar value, this value is loaded and shuffled into all elements 694 /// of the vector. 695 /// 696 /// %splat_one = load <1 x double>* %p 697 /// %splat = shufflevector <1 x double> %splat_one, <1 x 698 /// double> %splat_one, <4 x i32> zeroinitializer 699 /// 700 /// @param NewAccesses A map from memory access ids to new ast expressions, 701 /// which may contain new access expressions for certain 702 /// memory accesses. 703 Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load, 704 ValueMapT &BBMap, 705 __isl_keep isl_id_to_ast_expr *NewAccesses); 706 707 /// Load a vector from scalars distributed in memory 708 /// 709 /// In case some scalars a distributed randomly in memory. Create a vector 710 /// by loading each scalar and by inserting one after the other into the 711 /// vector. 712 /// 713 /// %scalar_1= load double* %p_1 714 /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0 715 /// %scalar 2 = load double* %p_2 716 /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 717 /// 718 /// @param NewAccesses A map from memory access ids to new ast expressions, 719 /// which may contain new access expressions for certain 720 /// memory accesses. 721 Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load, 722 VectorValueMapT &ScalarMaps, 723 __isl_keep isl_id_to_ast_expr *NewAccesses); 724 725 /// @param NewAccesses A map from memory access ids to new ast expressions, 726 /// which may contain new access expressions for certain 727 /// memory accesses. 728 void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, 729 VectorValueMapT &ScalarMaps, 730 __isl_keep isl_id_to_ast_expr *NewAccesses); 731 732 void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst, 733 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); 734 735 void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst, 736 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); 737 738 /// @param NewAccesses A map from memory access ids to new ast expressions, 739 /// which may contain new access expressions for certain 740 /// memory accesses. 741 void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap, 742 VectorValueMapT &ScalarMaps, 743 __isl_keep isl_id_to_ast_expr *NewAccesses); 744 745 /// @param NewAccesses A map from memory access ids to new ast expressions, 746 /// which may contain new access expressions for certain 747 /// memory accesses. 748 void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst, 749 ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, 750 __isl_keep isl_id_to_ast_expr *NewAccesses); 751 752 bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap, 753 VectorValueMapT &ScalarMaps); 754 755 bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); 756 757 /// Generate vector loads for scalars. 758 /// 759 /// @param Stmt The scop statement for which to generate the loads. 760 /// @param VectorBlockMap A map that will be updated to relate the original 761 /// values with the newly generated vector loads. 762 void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap); 763 764 /// Verify absence of scalar stores. 765 /// 766 /// @param Stmt The scop statement to check for scalar stores. 767 void verifyNoScalarStores(ScopStmt &Stmt); 768 769 /// @param NewAccesses A map from memory access ids to new ast expressions, 770 /// which may contain new access expressions for certain 771 /// memory accesses. 772 void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, 773 VectorValueMapT &ScalarMaps, 774 __isl_keep isl_id_to_ast_expr *NewAccesses); 775 776 /// @param NewAccesses A map from memory access ids to new ast expressions, 777 /// which may contain new access expressions for certain 778 /// memory accesses. 779 void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses); 780 }; 781 782 /// Generator for new versions of polyhedral region statements. 783 class RegionGenerator : public BlockGenerator { 784 public: 785 /// Create a generator for regions. 786 /// 787 /// @param BlockGen A generator for basic blocks. RegionGenerator(BlockGenerator & BlockGen)788 RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {} 789 ~RegionGenerator()790 virtual ~RegionGenerator() {} 791 792 /// Copy the region statement @p Stmt. 793 /// 794 /// This copies the entire region represented by @p Stmt and updates 795 /// references to old values with references to new values, as defined by 796 /// GlobalMap. 797 /// 798 /// @param Stmt The statement to code generate. 799 /// @param LTS A map from old loops to new induction variables as SCEVs. 800 void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, 801 __isl_keep isl_id_to_ast_expr *IdToAstExp); 802 803 private: 804 /// A map from old to the first new block in the region, that was created to 805 /// model the old basic block. 806 DenseMap<BasicBlock *, BasicBlock *> StartBlockMap; 807 808 /// A map from old to the last new block in the region, that was created to 809 /// model the old basic block. 810 DenseMap<BasicBlock *, BasicBlock *> EndBlockMap; 811 812 /// The "BBMaps" for the whole region (one for each block). In case a basic 813 /// block is code generated to multiple basic blocks (e.g., for partial 814 /// writes), the StartBasic is used as index for the RegionMap. 815 DenseMap<BasicBlock *, ValueMapT> RegionMaps; 816 817 /// Mapping to remember PHI nodes that still need incoming values. 818 using PHINodePairTy = std::pair<PHINode *, PHINode *>; 819 DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap; 820 821 /// Repair the dominance tree after we created a copy block for @p BB. 822 /// 823 /// @returns The immediate dominator in the DT for @p BBCopy if in the region. 824 BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy); 825 826 /// Add the new operand from the copy of @p IncomingBB to @p PHICopy. 827 /// 828 /// PHI nodes, which may have (multiple) edges that enter from outside the 829 /// non-affine subregion and even from outside the scop, are code generated as 830 /// follows: 831 /// 832 /// # Original 833 /// 834 /// Region: %A-> %exit 835 /// NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC) 836 /// 837 /// pre: 838 /// %val = add i64 1, 1 839 /// 840 /// A: 841 /// br label %nonaff 842 /// 843 /// nonaffB: 844 /// %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D] 845 /// %cmp = <nonaff> 846 /// br i1 %cmp, label %C, label %nonaffC 847 /// 848 /// nonaffC: 849 /// %valC = add i64 1, 1 850 /// br i1 undef, label %D, label %nonaffB 851 /// 852 /// D: 853 /// %valD = ... 854 /// %exit_cond = <loopexit> 855 /// br i1 %exit_cond, label %nonaffB, label %exit 856 /// 857 /// exit: 858 /// ... 859 /// 860 /// - %start and %C enter from outside the non-affine region. 861 /// - %nonaffC enters from within the non-affine region. 862 /// 863 /// # New 864 /// 865 /// polly.A: 866 /// store i64 %val, i64* %phi.phiops 867 /// br label %polly.nonaffA.entry 868 /// 869 /// polly.nonaffB.entry: 870 /// %phi.phiops.reload = load i64, i64* %phi.phiops 871 /// br label %nonaffB 872 /// 873 /// polly.nonaffB: 874 /// %polly.phi = [%phi.phiops.reload, %nonaffB.entry], 875 /// [%p.valC, %polly.nonaffC] 876 /// 877 /// polly.nonaffC: 878 /// %p.valC = add i64 1, 1 879 /// br i1 undef, label %polly.D, label %polly.nonaffB 880 /// 881 /// polly.D: 882 /// %p.valD = ... 883 /// store i64 %p.valD, i64* %phi.phiops 884 /// %p.exit_cond = <loopexit> 885 /// br i1 %p.exit_cond, label %polly.nonaffB, label %exit 886 /// 887 /// Values that enter the PHI from outside the non-affine region are stored 888 /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and 889 /// reloaded in %polly.nonaffB.entry, a basic block generated before the 890 /// actual non-affine region. 891 /// 892 /// When generating the PHI node of the non-affine region in %polly.nonaffB, 893 /// incoming edges from outside the region are combined into a single branch 894 /// from %polly.nonaffB.entry which has as incoming value the value reloaded 895 /// from the %phi.phiops stack slot. Incoming edges from within the region 896 /// refer to the copied instructions (%p.valC) and basic blocks 897 /// (%polly.nonaffC) of the non-affine region. 898 /// 899 /// @param Stmt The statement to code generate. 900 /// @param PHI The original PHI we copy. 901 /// @param PHICopy The copy of @p PHI. 902 /// @param IncomingBB An incoming block of @p PHI. 903 /// @param LTS A map from old loops to new induction variables as 904 /// SCEVs. 905 void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy, 906 BasicBlock *IncomingBB, LoopToScevMapT <S); 907 908 /// Create a PHI that combines the incoming values from all incoming blocks 909 /// that are in the subregion. 910 /// 911 /// PHIs in the subregion's exit block can have incoming edges from within and 912 /// outside the subregion. This function combines the incoming values from 913 /// within the subregion to appear as if there is only one incoming edge from 914 /// the subregion (an additional exit block is created by RegionGenerator). 915 /// This is to avoid that a value is written to the .phiops location without 916 /// leaving the subregion because the exiting block as an edge back into the 917 /// subregion. 918 /// 919 /// @param MA The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in 920 /// the subregion's exit block. 921 /// @param LTS Virtual induction variable mapping. 922 /// @param BBMap A mapping from old values to their new values in this block. 923 /// @param L Loop surrounding this region statement. 924 /// 925 /// @returns The constructed PHI node. 926 PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap, 927 Loop *L); 928 929 /// @param Return the new value of a scalar write, creating a PHINode if 930 /// necessary. 931 /// 932 /// @param MA A scalar WRITE MemoryAccess. 933 /// @param LTS Virtual induction variable mapping. 934 /// @param BBMap A mapping from old values to their new values in this block. 935 /// 936 /// @returns The effective value of @p MA's written value when leaving the 937 /// subregion. 938 /// @see buildExitPHI 939 Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap); 940 941 /// Generate the scalar stores for the given statement. 942 /// 943 /// After the statement @p Stmt was copied all inner-SCoP scalar dependences 944 /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to 945 /// be demoted to memory. 946 /// 947 /// @param Stmt The statement we generate code for. 948 /// @param LTS A mapping from loops virtual canonical induction variable to 949 /// their new values (for values recalculated in the new ScoP, 950 /// but not within this basic block) 951 /// @param BBMap A mapping from old values to their new values in this block. 952 /// @param LTS A mapping from loops virtual canonical induction variable to 953 /// their new values. 954 virtual void 955 generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp, 956 __isl_keep isl_id_to_ast_expr *NewAccesses) override; 957 958 /// Copy a single PHI instruction. 959 /// 960 /// This copies a single PHI instruction and updates references to old values 961 /// with references to new values, as defined by GlobalMap and BBMap. 962 /// 963 /// @param Stmt The statement to code generate. 964 /// @param PHI The PHI instruction to copy. 965 /// @param BBMap A mapping from old values to their new values 966 /// (for values recalculated within this basic block). 967 /// @param LTS A map from old loops to new induction variables as SCEVs. 968 virtual void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst, 969 ValueMapT &BBMap, 970 LoopToScevMapT <S) override; 971 }; 972 } // namespace polly 973 #endif 974