1 //===- InstCombiner.h - InstCombine implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file provides the interface for the instcombine pass implementation. 11 /// The interface is used for generic transformations in this folder and 12 /// target specific combinations in the targets. 13 /// The visitor implementation is in \c InstCombinerImpl in 14 /// \c InstCombineInternal.h. 15 /// 16 //===----------------------------------------------------------------------===// 17 18 #ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H 19 #define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H 20 21 #include "llvm/Analysis/InstructionSimplify.h" 22 #include "llvm/Analysis/TargetFolder.h" 23 #include "llvm/Analysis/ValueTracking.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/Support/Debug.h" 27 #include "llvm/Support/KnownBits.h" 28 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" 29 #include <cassert> 30 31 #define DEBUG_TYPE "instcombine" 32 33 namespace llvm { 34 35 class AAResults; 36 class AssumptionCache; 37 class ProfileSummaryInfo; 38 class TargetLibraryInfo; 39 class TargetTransformInfo; 40 41 /// The core instruction combiner logic. 42 /// 43 /// This class provides both the logic to recursively visit instructions and 44 /// combine them. 45 class LLVM_LIBRARY_VISIBILITY InstCombiner { 46 /// Only used to call target specific inst combining. 47 TargetTransformInfo &TTI; 48 49 public: 50 /// Maximum size of array considered when transforming. 51 uint64_t MaxArraySizeForCombine = 0; 52 53 /// An IRBuilder that automatically inserts new instructions into the 54 /// worklist. 55 using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>; 56 BuilderTy &Builder; 57 58 protected: 59 /// A worklist of the instructions that need to be simplified. 60 InstCombineWorklist &Worklist; 61 62 // Mode in which we are running the combiner. 63 const bool MinimizeSize; 64 65 AAResults *AA; 66 67 // Required analyses. 68 AssumptionCache &AC; 69 TargetLibraryInfo &TLI; 70 DominatorTree &DT; 71 const DataLayout &DL; 72 const SimplifyQuery SQ; 73 OptimizationRemarkEmitter &ORE; 74 BlockFrequencyInfo *BFI; 75 ProfileSummaryInfo *PSI; 76 77 // Optional analyses. When non-null, these can both be used to do better 78 // combining and will be updated to reflect any changes. 79 LoopInfo *LI; 80 81 bool MadeIRChange = false; 82 83 public: InstCombiner(InstCombineWorklist & Worklist,BuilderTy & Builder,bool MinimizeSize,AAResults * AA,AssumptionCache & AC,TargetLibraryInfo & TLI,TargetTransformInfo & TTI,DominatorTree & DT,OptimizationRemarkEmitter & ORE,BlockFrequencyInfo * BFI,ProfileSummaryInfo * PSI,const DataLayout & DL,LoopInfo * LI)84 InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, 85 bool MinimizeSize, AAResults *AA, AssumptionCache &AC, 86 TargetLibraryInfo &TLI, TargetTransformInfo &TTI, 87 DominatorTree &DT, OptimizationRemarkEmitter &ORE, 88 BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, 89 const DataLayout &DL, LoopInfo *LI) 90 : TTI(TTI), Builder(Builder), Worklist(Worklist), 91 MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), 92 SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} 93 ~InstCombiner()94 virtual ~InstCombiner() {} 95 96 /// Return the source operand of a potentially bitcasted value while 97 /// optionally checking if it has one use. If there is no bitcast or the one 98 /// use check is not met, return the input value itself. 99 static Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) { 100 if (auto *BitCast = dyn_cast<BitCastInst>(V)) 101 if (!OneUseOnly || BitCast->hasOneUse()) 102 return BitCast->getOperand(0); 103 104 // V is not a bitcast or V has more than one use and OneUseOnly is true. 105 return V; 106 } 107 108 /// Assign a complexity or rank value to LLVM Values. This is used to reduce 109 /// the amount of pattern matching needed for compares and commutative 110 /// instructions. For example, if we have: 111 /// icmp ugt X, Constant 112 /// or 113 /// xor (add X, Constant), cast Z 114 /// 115 /// We do not have to consider the commuted variants of these patterns because 116 /// canonicalization based on complexity guarantees the above ordering. 117 /// 118 /// This routine maps IR values to various complexity ranks: 119 /// 0 -> undef 120 /// 1 -> Constants 121 /// 2 -> Other non-instructions 122 /// 3 -> Arguments 123 /// 4 -> Cast and (f)neg/not instructions 124 /// 5 -> Other instructions getComplexity(Value * V)125 static unsigned getComplexity(Value *V) { 126 if (isa<Instruction>(V)) { 127 if (isa<CastInst>(V) || match(V, m_Neg(PatternMatch::m_Value())) || 128 match(V, m_Not(PatternMatch::m_Value())) || 129 match(V, m_FNeg(PatternMatch::m_Value()))) 130 return 4; 131 return 5; 132 } 133 if (isa<Argument>(V)) 134 return 3; 135 return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; 136 } 137 138 /// Predicate canonicalization reduces the number of patterns that need to be 139 /// matched by other transforms. For example, we may swap the operands of a 140 /// conditional branch or select to create a compare with a canonical 141 /// (inverted) predicate which is then more likely to be matched with other 142 /// values. isCanonicalPredicate(CmpInst::Predicate Pred)143 static bool isCanonicalPredicate(CmpInst::Predicate Pred) { 144 switch (Pred) { 145 case CmpInst::ICMP_NE: 146 case CmpInst::ICMP_ULE: 147 case CmpInst::ICMP_SLE: 148 case CmpInst::ICMP_UGE: 149 case CmpInst::ICMP_SGE: 150 // TODO: There are 16 FCMP predicates. Should others be (not) canonical? 151 case CmpInst::FCMP_ONE: 152 case CmpInst::FCMP_OLE: 153 case CmpInst::FCMP_OGE: 154 return false; 155 default: 156 return true; 157 } 158 } 159 160 /// Given an exploded icmp instruction, return true if the comparison only 161 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if 162 /// the result of the comparison is true when the input value is signed. isSignBitCheck(ICmpInst::Predicate Pred,const APInt & RHS,bool & TrueIfSigned)163 static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, 164 bool &TrueIfSigned) { 165 switch (Pred) { 166 case ICmpInst::ICMP_SLT: // True if LHS s< 0 167 TrueIfSigned = true; 168 return RHS.isNullValue(); 169 case ICmpInst::ICMP_SLE: // True if LHS s<= -1 170 TrueIfSigned = true; 171 return RHS.isAllOnesValue(); 172 case ICmpInst::ICMP_SGT: // True if LHS s> -1 173 TrueIfSigned = false; 174 return RHS.isAllOnesValue(); 175 case ICmpInst::ICMP_SGE: // True if LHS s>= 0 176 TrueIfSigned = false; 177 return RHS.isNullValue(); 178 case ICmpInst::ICMP_UGT: 179 // True if LHS u> RHS and RHS == sign-bit-mask - 1 180 TrueIfSigned = true; 181 return RHS.isMaxSignedValue(); 182 case ICmpInst::ICMP_UGE: 183 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) 184 TrueIfSigned = true; 185 return RHS.isMinSignedValue(); 186 case ICmpInst::ICMP_ULT: 187 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) 188 TrueIfSigned = false; 189 return RHS.isMinSignedValue(); 190 case ICmpInst::ICMP_ULE: 191 // True if LHS u<= RHS and RHS == sign-bit-mask - 1 192 TrueIfSigned = false; 193 return RHS.isMaxSignedValue(); 194 default: 195 return false; 196 } 197 } 198 199 /// Add one to a Constant AddOne(Constant * C)200 static Constant *AddOne(Constant *C) { 201 return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); 202 } 203 204 /// Subtract one from a Constant SubOne(Constant * C)205 static Constant *SubOne(Constant *C) { 206 return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); 207 } 208 209 llvm::Optional<std::pair< 210 CmpInst::Predicate, 211 Constant *>> static getFlippedStrictnessPredicateAndConstant(CmpInst:: 212 Predicate 213 Pred, 214 Constant *C); 215 216 /// Return true if the specified value is free to invert (apply ~ to). 217 /// This happens in cases where the ~ can be eliminated. If WillInvertAllUses 218 /// is true, work under the assumption that the caller intends to remove all 219 /// uses of V and only keep uses of ~V. 220 /// 221 /// See also: canFreelyInvertAllUsersOf() isFreeToInvert(Value * V,bool WillInvertAllUses)222 static bool isFreeToInvert(Value *V, bool WillInvertAllUses) { 223 // ~(~(X)) -> X. 224 if (match(V, m_Not(PatternMatch::m_Value()))) 225 return true; 226 227 // Constants can be considered to be not'ed values. 228 if (match(V, PatternMatch::m_AnyIntegralConstant())) 229 return true; 230 231 // Compares can be inverted if all of their uses are being modified to use 232 // the ~V. 233 if (isa<CmpInst>(V)) 234 return WillInvertAllUses; 235 236 // If `V` is of the form `A + Constant` then `-1 - V` can be folded into 237 // `(-1 - Constant) - A` if we are willing to invert all of the uses. 238 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) 239 if (BO->getOpcode() == Instruction::Add || 240 BO->getOpcode() == Instruction::Sub) 241 if (isa<Constant>(BO->getOperand(0)) || 242 isa<Constant>(BO->getOperand(1))) 243 return WillInvertAllUses; 244 245 // Selects with invertible operands are freely invertible 246 if (match(V, 247 m_Select(PatternMatch::m_Value(), m_Not(PatternMatch::m_Value()), 248 m_Not(PatternMatch::m_Value())))) 249 return WillInvertAllUses; 250 251 return false; 252 } 253 254 /// Given i1 V, can every user of V be freely adapted if V is changed to !V ? 255 /// InstCombine's canonicalizeICmpPredicate() must be kept in sync with this 256 /// fn. 257 /// 258 /// See also: isFreeToInvert() canFreelyInvertAllUsersOf(Value * V,Value * IgnoredUser)259 static bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) { 260 // Look at every user of V. 261 for (Use &U : V->uses()) { 262 if (U.getUser() == IgnoredUser) 263 continue; // Don't consider this user. 264 265 auto *I = cast<Instruction>(U.getUser()); 266 switch (I->getOpcode()) { 267 case Instruction::Select: 268 if (U.getOperandNo() != 0) // Only if the value is used as select cond. 269 return false; 270 break; 271 case Instruction::Br: 272 assert(U.getOperandNo() == 0 && "Must be branching on that value."); 273 break; // Free to invert by swapping true/false values/destinations. 274 case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring 275 // it. 276 if (!match(I, m_Not(PatternMatch::m_Value()))) 277 return false; // Not a 'not'. 278 break; 279 default: 280 return false; // Don't know, likely not freely invertible. 281 } 282 // So far all users were free to invert... 283 } 284 return true; // Can freely invert all users! 285 } 286 287 /// Some binary operators require special handling to avoid poison and 288 /// undefined behavior. If a constant vector has undef elements, replace those 289 /// undefs with identity constants if possible because those are always safe 290 /// to execute. If no identity constant exists, replace undef with some other 291 /// safe constant. 292 static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode,Constant * In,bool IsRHSConstant)293 getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, 294 bool IsRHSConstant) { 295 auto *InVTy = cast<FixedVectorType>(In->getType()); 296 297 Type *EltTy = InVTy->getElementType(); 298 auto *SafeC = ConstantExpr::getBinOpIdentity(Opcode, EltTy, IsRHSConstant); 299 if (!SafeC) { 300 // TODO: Should this be available as a constant utility function? It is 301 // similar to getBinOpAbsorber(). 302 if (IsRHSConstant) { 303 switch (Opcode) { 304 case Instruction::SRem: // X % 1 = 0 305 case Instruction::URem: // X %u 1 = 0 306 SafeC = ConstantInt::get(EltTy, 1); 307 break; 308 case Instruction::FRem: // X % 1.0 (doesn't simplify, but it is safe) 309 SafeC = ConstantFP::get(EltTy, 1.0); 310 break; 311 default: 312 llvm_unreachable( 313 "Only rem opcodes have no identity constant for RHS"); 314 } 315 } else { 316 switch (Opcode) { 317 case Instruction::Shl: // 0 << X = 0 318 case Instruction::LShr: // 0 >>u X = 0 319 case Instruction::AShr: // 0 >> X = 0 320 case Instruction::SDiv: // 0 / X = 0 321 case Instruction::UDiv: // 0 /u X = 0 322 case Instruction::SRem: // 0 % X = 0 323 case Instruction::URem: // 0 %u X = 0 324 case Instruction::Sub: // 0 - X (doesn't simplify, but it is safe) 325 case Instruction::FSub: // 0.0 - X (doesn't simplify, but it is safe) 326 case Instruction::FDiv: // 0.0 / X (doesn't simplify, but it is safe) 327 case Instruction::FRem: // 0.0 % X = 0 328 SafeC = Constant::getNullValue(EltTy); 329 break; 330 default: 331 llvm_unreachable("Expected to find identity constant for opcode"); 332 } 333 } 334 } 335 assert(SafeC && "Must have safe constant for binop"); 336 unsigned NumElts = InVTy->getNumElements(); 337 SmallVector<Constant *, 16> Out(NumElts); 338 for (unsigned i = 0; i != NumElts; ++i) { 339 Constant *C = In->getAggregateElement(i); 340 Out[i] = isa<UndefValue>(C) ? SafeC : C; 341 } 342 return ConstantVector::get(Out); 343 } 344 345 /// Create and insert the idiom we use to indicate a block is unreachable 346 /// without having to rewrite the CFG from within InstCombine. CreateNonTerminatorUnreachable(Instruction * InsertAt)347 static void CreateNonTerminatorUnreachable(Instruction *InsertAt) { 348 auto &Ctx = InsertAt->getContext(); 349 new StoreInst(ConstantInt::getTrue(Ctx), 350 UndefValue::get(Type::getInt1PtrTy(Ctx)), InsertAt); 351 } 352 addToWorklist(Instruction * I)353 void addToWorklist(Instruction *I) { Worklist.push(I); } 354 getAssumptionCache()355 AssumptionCache &getAssumptionCache() const { return AC; } getTargetLibraryInfo()356 TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; } getDominatorTree()357 DominatorTree &getDominatorTree() const { return DT; } getDataLayout()358 const DataLayout &getDataLayout() const { return DL; } getSimplifyQuery()359 const SimplifyQuery &getSimplifyQuery() const { return SQ; } getOptimizationRemarkEmitter()360 OptimizationRemarkEmitter &getOptimizationRemarkEmitter() const { 361 return ORE; 362 } getBlockFrequencyInfo()363 BlockFrequencyInfo *getBlockFrequencyInfo() const { return BFI; } getProfileSummaryInfo()364 ProfileSummaryInfo *getProfileSummaryInfo() const { return PSI; } getLoopInfo()365 LoopInfo *getLoopInfo() const { return LI; } 366 367 // Call target specific combiners 368 Optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II); 369 Optional<Value *> 370 targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, 371 KnownBits &Known, 372 bool &KnownBitsComputed); 373 Optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic( 374 IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 375 APInt &UndefElts2, APInt &UndefElts3, 376 std::function<void(Instruction *, unsigned, APInt, APInt &)> 377 SimplifyAndSetOp); 378 379 /// Inserts an instruction \p New before instruction \p Old 380 /// 381 /// Also adds the new instruction to the worklist and returns \p New so that 382 /// it is suitable for use as the return from the visitation patterns. InsertNewInstBefore(Instruction * New,Instruction & Old)383 Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { 384 assert(New && !New->getParent() && 385 "New instruction already inserted into a basic block!"); 386 BasicBlock *BB = Old.getParent(); 387 BB->getInstList().insert(Old.getIterator(), New); // Insert inst 388 Worklist.push(New); 389 return New; 390 } 391 392 /// Same as InsertNewInstBefore, but also sets the debug loc. InsertNewInstWith(Instruction * New,Instruction & Old)393 Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { 394 New->setDebugLoc(Old.getDebugLoc()); 395 return InsertNewInstBefore(New, Old); 396 } 397 398 /// A combiner-aware RAUW-like routine. 399 /// 400 /// This method is to be used when an instruction is found to be dead, 401 /// replaceable with another preexisting expression. Here we add all uses of 402 /// I to the worklist, replace all uses of I with the new value, then return 403 /// I, so that the inst combiner will know that I was modified. replaceInstUsesWith(Instruction & I,Value * V)404 Instruction *replaceInstUsesWith(Instruction &I, Value *V) { 405 // If there are no uses to replace, then we return nullptr to indicate that 406 // no changes were made to the program. 407 if (I.use_empty()) 408 return nullptr; 409 410 Worklist.pushUsersToWorkList(I); // Add all modified instrs to worklist. 411 412 // If we are replacing the instruction with itself, this must be in a 413 // segment of unreachable code, so just clobber the instruction. 414 if (&I == V) 415 V = UndefValue::get(I.getType()); 416 417 LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n" 418 << " with " << *V << '\n'); 419 420 I.replaceAllUsesWith(V); 421 return &I; 422 } 423 424 /// Replace operand of instruction and add old operand to the worklist. replaceOperand(Instruction & I,unsigned OpNum,Value * V)425 Instruction *replaceOperand(Instruction &I, unsigned OpNum, Value *V) { 426 Worklist.addValue(I.getOperand(OpNum)); 427 I.setOperand(OpNum, V); 428 return &I; 429 } 430 431 /// Replace use and add the previously used value to the worklist. replaceUse(Use & U,Value * NewValue)432 void replaceUse(Use &U, Value *NewValue) { 433 Worklist.addValue(U); 434 U = NewValue; 435 } 436 437 /// Combiner aware instruction erasure. 438 /// 439 /// When dealing with an instruction that has side effects or produces a void 440 /// value, we can't rely on DCE to delete the instruction. Instead, visit 441 /// methods should return the value returned by this function. 442 virtual Instruction *eraseInstFromFunction(Instruction &I) = 0; 443 computeKnownBits(const Value * V,KnownBits & Known,unsigned Depth,const Instruction * CxtI)444 void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, 445 const Instruction *CxtI) const { 446 llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); 447 } 448 computeKnownBits(const Value * V,unsigned Depth,const Instruction * CxtI)449 KnownBits computeKnownBits(const Value *V, unsigned Depth, 450 const Instruction *CxtI) const { 451 return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); 452 } 453 454 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false, 455 unsigned Depth = 0, 456 const Instruction *CxtI = nullptr) { 457 return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT); 458 } 459 460 bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, 461 const Instruction *CxtI = nullptr) const { 462 return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); 463 } 464 465 unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0, 466 const Instruction *CxtI = nullptr) const { 467 return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); 468 } 469 computeOverflowForUnsignedMul(const Value * LHS,const Value * RHS,const Instruction * CxtI)470 OverflowResult computeOverflowForUnsignedMul(const Value *LHS, 471 const Value *RHS, 472 const Instruction *CxtI) const { 473 return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); 474 } 475 computeOverflowForSignedMul(const Value * LHS,const Value * RHS,const Instruction * CxtI)476 OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS, 477 const Instruction *CxtI) const { 478 return llvm::computeOverflowForSignedMul(LHS, RHS, DL, &AC, CxtI, &DT); 479 } 480 computeOverflowForUnsignedAdd(const Value * LHS,const Value * RHS,const Instruction * CxtI)481 OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, 482 const Value *RHS, 483 const Instruction *CxtI) const { 484 return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); 485 } 486 computeOverflowForSignedAdd(const Value * LHS,const Value * RHS,const Instruction * CxtI)487 OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, 488 const Instruction *CxtI) const { 489 return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); 490 } 491 computeOverflowForUnsignedSub(const Value * LHS,const Value * RHS,const Instruction * CxtI)492 OverflowResult computeOverflowForUnsignedSub(const Value *LHS, 493 const Value *RHS, 494 const Instruction *CxtI) const { 495 return llvm::computeOverflowForUnsignedSub(LHS, RHS, DL, &AC, CxtI, &DT); 496 } 497 computeOverflowForSignedSub(const Value * LHS,const Value * RHS,const Instruction * CxtI)498 OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS, 499 const Instruction *CxtI) const { 500 return llvm::computeOverflowForSignedSub(LHS, RHS, DL, &AC, CxtI, &DT); 501 } 502 503 virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, 504 const APInt &DemandedMask, KnownBits &Known, 505 unsigned Depth = 0) = 0; 506 virtual Value * 507 SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, 508 unsigned Depth = 0, 509 bool AllowMultipleUsers = false) = 0; 510 }; 511 512 } // namespace llvm 513 514 #undef DEBUG_TYPE 515 516 #endif 517