• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file provides helpers for the implementation of
10 /// a TargetTransformInfo-conforming class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16 
17 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/Analysis/VectorUtils.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GetElementPtrTypeIterator.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/Type.h"
26 
27 namespace llvm {
28 
29 /// Base class for use as a mix-in that aids implementing
30 /// a TargetTransformInfo-compatible class.
31 class TargetTransformInfoImplBase {
32 protected:
33   typedef TargetTransformInfo TTI;
34 
35   const DataLayout &DL;
36 
TargetTransformInfoImplBase(const DataLayout & DL)37   explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
38 
39 public:
40   // Provide value semantics. MSVC requires that we spell all of these out.
TargetTransformInfoImplBase(const TargetTransformInfoImplBase & Arg)41   TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42       : DL(Arg.DL) {}
TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)43   TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
44 
getDataLayout()45   const DataLayout &getDataLayout() const { return DL; }
46 
getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)47   unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
48     switch (Opcode) {
49     default:
50       // By default, just classify everything as 'basic'.
51       return TTI::TCC_Basic;
52 
53     case Instruction::GetElementPtr:
54       llvm_unreachable("Use getGEPCost for GEP operations!");
55 
56     case Instruction::BitCast:
57       assert(OpTy && "Cast instructions must provide the operand type");
58       if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
59         // Identity and pointer-to-pointer casts are free.
60         return TTI::TCC_Free;
61 
62       // Otherwise, the default basic cost is used.
63       return TTI::TCC_Basic;
64 
65     case Instruction::FDiv:
66     case Instruction::FRem:
67     case Instruction::SDiv:
68     case Instruction::SRem:
69     case Instruction::UDiv:
70     case Instruction::URem:
71       return TTI::TCC_Expensive;
72 
73     case Instruction::IntToPtr: {
74       // An inttoptr cast is free so long as the input is a legal integer type
75       // which doesn't contain values outside the range of a pointer.
76       unsigned OpSize = OpTy->getScalarSizeInBits();
77       if (DL.isLegalInteger(OpSize) &&
78           OpSize <= DL.getPointerTypeSizeInBits(Ty))
79         return TTI::TCC_Free;
80 
81       // Otherwise it's not a no-op.
82       return TTI::TCC_Basic;
83     }
84     case Instruction::PtrToInt: {
85       // A ptrtoint cast is free so long as the result is large enough to store
86       // the pointer, and a legal integer type.
87       unsigned DestSize = Ty->getScalarSizeInBits();
88       if (DL.isLegalInteger(DestSize) &&
89           DestSize >= DL.getPointerTypeSizeInBits(OpTy))
90         return TTI::TCC_Free;
91 
92       // Otherwise it's not a no-op.
93       return TTI::TCC_Basic;
94     }
95     case Instruction::Trunc:
96       // trunc to a native type is free (assuming the target has compare and
97       // shift-right of the same width).
98       if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
99         return TTI::TCC_Free;
100 
101       return TTI::TCC_Basic;
102     }
103   }
104 
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)105   int getGEPCost(Type *PointeeType, const Value *Ptr,
106                  ArrayRef<const Value *> Operands) {
107     // In the basic model, we just assume that all-constant GEPs will be folded
108     // into their uses via addressing modes.
109     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
110       if (!isa<Constant>(Operands[Idx]))
111         return TTI::TCC_Basic;
112 
113     return TTI::TCC_Free;
114   }
115 
getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)116   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
117                                             unsigned &JTSize,
118                                             ProfileSummaryInfo *PSI,
119                                             BlockFrequencyInfo *BFI) {
120     (void)PSI;
121     (void)BFI;
122     JTSize = 0;
123     return SI.getNumCases();
124   }
125 
getExtCost(const Instruction * I,const Value * Src)126   int getExtCost(const Instruction *I, const Value *Src) {
127     return TTI::TCC_Basic;
128   }
129 
getCallCost(FunctionType * FTy,int NumArgs,const User * U)130   unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
131     assert(FTy && "FunctionType must be provided to this routine.");
132 
133     // The target-independent implementation just measures the size of the
134     // function by approximating that each argument will take on average one
135     // instruction to prepare.
136 
137     if (NumArgs < 0)
138       // Set the argument number to the number of explicit arguments in the
139       // function.
140       NumArgs = FTy->getNumParams();
141 
142     return TTI::TCC_Basic * (NumArgs + 1);
143   }
144 
getInliningThresholdMultiplier()145   unsigned getInliningThresholdMultiplier() { return 1; }
146 
getInlinerVectorBonusPercent()147   int getInlinerVectorBonusPercent() { return 150; }
148 
getMemcpyCost(const Instruction * I)149   unsigned getMemcpyCost(const Instruction *I) {
150     return TTI::TCC_Expensive;
151   }
152 
hasBranchDivergence()153   bool hasBranchDivergence() { return false; }
154 
isSourceOfDivergence(const Value * V)155   bool isSourceOfDivergence(const Value *V) { return false; }
156 
isAlwaysUniform(const Value * V)157   bool isAlwaysUniform(const Value *V) { return false; }
158 
getFlatAddressSpace()159   unsigned getFlatAddressSpace () {
160     return -1;
161   }
162 
collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)163   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
164                                   Intrinsic::ID IID) const {
165     return false;
166   }
167 
rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)168   bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
169                                         Value *OldV, Value *NewV) const {
170     return false;
171   }
172 
isLoweredToCall(const Function * F)173   bool isLoweredToCall(const Function *F) {
174     assert(F && "A concrete function must be provided to this routine.");
175 
176     // FIXME: These should almost certainly not be handled here, and instead
177     // handled with the help of TLI or the target itself. This was largely
178     // ported from existing analysis heuristics here so that such refactorings
179     // can take place in the future.
180 
181     if (F->isIntrinsic())
182       return false;
183 
184     if (F->hasLocalLinkage() || !F->hasName())
185       return true;
186 
187     StringRef Name = F->getName();
188 
189     // These will all likely lower to a single selection DAG node.
190     if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
191         Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
192         Name == "fmin" || Name == "fminf" || Name == "fminl" ||
193         Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
194         Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
195         Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
196       return false;
197 
198     // These are all likely to be optimized into something smaller.
199     if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
200         Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
201         Name == "floorf" || Name == "ceil" || Name == "round" ||
202         Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
203         Name == "llabs")
204       return false;
205 
206     return true;
207   }
208 
isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)209   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
210                                 AssumptionCache &AC,
211                                 TargetLibraryInfo *LibInfo,
212                                 HardwareLoopInfo &HWLoopInfo) {
213     return false;
214   }
215 
preferPredicateOverEpilogue(Loop * L,LoopInfo * LI,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * TLI,DominatorTree * DT,const LoopAccessInfo * LAI)216   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
217                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
218                                    DominatorTree *DT,
219                                    const LoopAccessInfo *LAI) const {
220     return false;
221   }
222 
getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &)223   void getUnrollingPreferences(Loop *, ScalarEvolution &,
224                                TTI::UnrollingPreferences &) {}
225 
isLegalAddImmediate(int64_t Imm)226   bool isLegalAddImmediate(int64_t Imm) { return false; }
227 
isLegalICmpImmediate(int64_t Imm)228   bool isLegalICmpImmediate(int64_t Imm) { return false; }
229 
230   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
231                              bool HasBaseReg, int64_t Scale,
232                              unsigned AddrSpace, Instruction *I = nullptr) {
233     // Guess that only reg and reg+reg addressing is allowed. This heuristic is
234     // taken from the implementation of LSR.
235     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
236   }
237 
isLSRCostLess(TTI::LSRCost & C1,TTI::LSRCost & C2)238   bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
239     return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
240                     C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
241            std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
242                     C2.ScaleCost, C2.ImmCost, C2.SetupCost);
243   }
244 
canMacroFuseCmp()245   bool canMacroFuseCmp() { return false; }
246 
canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)247   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
248                   DominatorTree *DT, AssumptionCache *AC,
249                   TargetLibraryInfo *LibInfo) {
250     return false;
251   }
252 
shouldFavorPostInc()253   bool shouldFavorPostInc() const { return false; }
254 
shouldFavorBackedgeIndex(const Loop * L)255   bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
256 
isLegalMaskedStore(Type * DataType,MaybeAlign Alignment)257   bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
258 
isLegalMaskedLoad(Type * DataType,MaybeAlign Alignment)259   bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
260 
isLegalNTStore(Type * DataType,Align Alignment)261   bool isLegalNTStore(Type *DataType, Align Alignment) {
262     // By default, assume nontemporal memory stores are available for stores
263     // that are aligned and have a size that is a power of 2.
264     unsigned DataSize = DL.getTypeStoreSize(DataType);
265     return Alignment >= DataSize && isPowerOf2_32(DataSize);
266   }
267 
isLegalNTLoad(Type * DataType,Align Alignment)268   bool isLegalNTLoad(Type *DataType, Align Alignment) {
269     // By default, assume nontemporal memory loads are available for loads that
270     // are aligned and have a size that is a power of 2.
271     unsigned DataSize = DL.getTypeStoreSize(DataType);
272     return Alignment >= DataSize && isPowerOf2_32(DataSize);
273   }
274 
isLegalMaskedScatter(Type * DataType,MaybeAlign Alignment)275   bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
276     return false;
277   }
278 
isLegalMaskedGather(Type * DataType,MaybeAlign Alignment)279   bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
280     return false;
281   }
282 
isLegalMaskedCompressStore(Type * DataType)283   bool isLegalMaskedCompressStore(Type *DataType) { return false; }
284 
isLegalMaskedExpandLoad(Type * DataType)285   bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
286 
hasDivRemOp(Type * DataType,bool IsSigned)287   bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
288 
hasVolatileVariant(Instruction * I,unsigned AddrSpace)289   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
290 
prefersVectorizedAddressing()291   bool prefersVectorizedAddressing() { return true; }
292 
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)293   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
294                            bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
295     // Guess that all legal addressing mode are free.
296     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
297                               Scale, AddrSpace))
298       return 0;
299     return -1;
300   }
301 
LSRWithInstrQueries()302   bool LSRWithInstrQueries() { return false; }
303 
isTruncateFree(Type * Ty1,Type * Ty2)304   bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
305 
isProfitableToHoist(Instruction * I)306   bool isProfitableToHoist(Instruction *I) { return true; }
307 
useAA()308   bool useAA() { return false; }
309 
isTypeLegal(Type * Ty)310   bool isTypeLegal(Type *Ty) { return false; }
311 
shouldBuildLookupTables()312   bool shouldBuildLookupTables() { return true; }
shouldBuildLookupTablesForConstant(Constant * C)313   bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
314 
useColdCCForColdCall(Function & F)315   bool useColdCCForColdCall(Function &F) { return false; }
316 
getScalarizationOverhead(Type * Ty,bool Insert,bool Extract)317   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
318     return 0;
319   }
320 
getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,unsigned VF)321   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
322                                             unsigned VF) { return 0; }
323 
supportsEfficientVectorElementLoadStore()324   bool supportsEfficientVectorElementLoadStore() { return false; }
325 
enableAggressiveInterleaving(bool LoopHasReductions)326   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
327 
enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)328   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
329                                                     bool IsZeroCmp) const {
330     return {};
331   }
332 
enableInterleavedAccessVectorization()333   bool enableInterleavedAccessVectorization() { return false; }
334 
enableMaskedInterleavedAccessVectorization()335   bool enableMaskedInterleavedAccessVectorization() { return false; }
336 
isFPVectorizationPotentiallyUnsafe()337   bool isFPVectorizationPotentiallyUnsafe() { return false; }
338 
allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)339   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
340                                       unsigned BitWidth,
341                                       unsigned AddressSpace,
342                                       unsigned Alignment,
343                                       bool *Fast) { return false; }
344 
getPopcntSupport(unsigned IntTyWidthInBit)345   TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
346     return TTI::PSK_Software;
347   }
348 
haveFastSqrt(Type * Ty)349   bool haveFastSqrt(Type *Ty) { return false; }
350 
isFCmpOrdCheaperThanFCmpZero(Type * Ty)351   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
352 
getFPOpCost(Type * Ty)353   unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
354 
getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)355   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
356                             Type *Ty) {
357     return 0;
358   }
359 
getIntImmCost(const APInt & Imm,Type * Ty)360   unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
361 
getIntImmCostInst(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)362   unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
363                              Type *Ty) {
364     return TTI::TCC_Free;
365   }
366 
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)367   unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
368                                const APInt &Imm, Type *Ty) {
369     return TTI::TCC_Free;
370   }
371 
getNumberOfRegisters(unsigned ClassID)372   unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
373 
374   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
375     return Vector ? 1 : 0;
376   };
377 
getRegisterClassName(unsigned ClassID)378   const char* getRegisterClassName(unsigned ClassID) const {
379     switch (ClassID) {
380       default:
381         return "Generic::Unknown Register Class";
382       case 0: return "Generic::ScalarRC";
383       case 1: return "Generic::VectorRC";
384     }
385   }
386 
getRegisterBitWidth(bool Vector)387   unsigned getRegisterBitWidth(bool Vector) const { return 32; }
388 
getMinVectorRegisterBitWidth()389   unsigned getMinVectorRegisterBitWidth() { return 128; }
390 
shouldMaximizeVectorBandwidth(bool OptSize)391   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
392 
getMinimumVF(unsigned ElemWidth)393   unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
394 
395   bool
shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)396   shouldConsiderAddressTypePromotion(const Instruction &I,
397                                      bool &AllowPromotionWithoutCommonHeader) {
398     AllowPromotionWithoutCommonHeader = false;
399     return false;
400   }
401 
getCacheLineSize()402   unsigned getCacheLineSize() const { return 0; }
403 
getCacheSize(TargetTransformInfo::CacheLevel Level)404   llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
405     switch (Level) {
406     case TargetTransformInfo::CacheLevel::L1D:
407       LLVM_FALLTHROUGH;
408     case TargetTransformInfo::CacheLevel::L2D:
409       return llvm::Optional<unsigned>();
410     }
411     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
412   }
413 
getCacheAssociativity(TargetTransformInfo::CacheLevel Level)414   llvm::Optional<unsigned> getCacheAssociativity(
415     TargetTransformInfo::CacheLevel Level) const {
416     switch (Level) {
417     case TargetTransformInfo::CacheLevel::L1D:
418       LLVM_FALLTHROUGH;
419     case TargetTransformInfo::CacheLevel::L2D:
420       return llvm::Optional<unsigned>();
421     }
422 
423     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
424   }
425 
getPrefetchDistance()426   unsigned getPrefetchDistance() const { return 0; }
getMinPrefetchStride()427   unsigned getMinPrefetchStride() const { return 1; }
getMaxPrefetchIterationsAhead()428   unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
429 
getMaxInterleaveFactor(unsigned VF)430   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
431 
432   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
433                                   TTI::OperandValueKind Opd1Info,
434                                   TTI::OperandValueKind Opd2Info,
435                                   TTI::OperandValueProperties Opd1PropInfo,
436                                   TTI::OperandValueProperties Opd2PropInfo,
437                                   ArrayRef<const Value *> Args,
438                                   const Instruction *CxtI = nullptr) {
439     return 1;
440   }
441 
getShuffleCost(TTI::ShuffleKind Kind,Type * Ty,int Index,Type * SubTp)442   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
443                           Type *SubTp) {
444     return 1;
445   }
446 
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,const Instruction * I)447   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
448                             const Instruction *I) { return 1; }
449 
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)450   unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
451                                     VectorType *VecTy, unsigned Index) {
452     return 1;
453   }
454 
getCFInstrCost(unsigned Opcode)455   unsigned getCFInstrCost(unsigned Opcode) { return 1; }
456 
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,const Instruction * I)457   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
458                               const Instruction *I) {
459     return 1;
460   }
461 
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)462   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
463     return 1;
464   }
465 
getMemoryOpCost(unsigned Opcode,Type * Src,MaybeAlign Alignment,unsigned AddressSpace,const Instruction * I)466   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
467                            unsigned AddressSpace, const Instruction *I) {
468     return 1;
469   }
470 
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)471   unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
472                                  unsigned AddressSpace) {
473     return 1;
474   }
475 
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)476   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
477                                   bool VariableMask,
478                                   unsigned Alignment) {
479     return 1;
480   }
481 
482   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
483                                       unsigned Factor,
484                                       ArrayRef<unsigned> Indices,
485                                       unsigned Alignment, unsigned AddressSpace,
486                                       bool UseMaskForCond = false,
487                                       bool UseMaskForGaps = false) {
488     return 1;
489   }
490 
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF,unsigned ScalarizationCostPassed)491   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
492                                  ArrayRef<Type *> Tys, FastMathFlags FMF,
493                                  unsigned ScalarizationCostPassed) {
494     return 1;
495   }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF,unsigned VF)496   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
497             ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
498     return 1;
499   }
500 
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)501   unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
502     return 1;
503   }
504 
getNumberOfParts(Type * Tp)505   unsigned getNumberOfParts(Type *Tp) { return 0; }
506 
getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)507   unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
508                                      const SCEV *) {
509     return 0;
510   }
511 
getArithmeticReductionCost(unsigned,Type *,bool)512   unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
513 
getMinMaxReductionCost(Type *,Type *,bool,bool)514   unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
515 
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)516   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
517 
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)518   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
519     return false;
520   }
521 
getAtomicMemIntrinsicMaxElementSize()522   unsigned getAtomicMemIntrinsicMaxElementSize() const {
523     // Note for overrides: You must ensure for all element unordered-atomic
524     // memory intrinsics that all power-of-2 element sizes up to, and
525     // including, the return value of this method have a corresponding
526     // runtime lib call. These runtime lib call definitions can be found
527     // in RuntimeLibcalls.h
528     return 0;
529   }
530 
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)531   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
532                                            Type *ExpectedType) {
533     return nullptr;
534   }
535 
getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAlign,unsigned DestAlign)536   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
537                                   unsigned SrcAlign, unsigned DestAlign) const {
538     return Type::getInt8Ty(Context);
539   }
540 
getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAlign,unsigned DestAlign)541   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
542                                          LLVMContext &Context,
543                                          unsigned RemainingBytes,
544                                          unsigned SrcAlign,
545                                          unsigned DestAlign) const {
546     for (unsigned i = 0; i != RemainingBytes; ++i)
547       OpsOut.push_back(Type::getInt8Ty(Context));
548   }
549 
areInlineCompatible(const Function * Caller,const Function * Callee)550   bool areInlineCompatible(const Function *Caller,
551                            const Function *Callee) const {
552     return (Caller->getFnAttribute("target-cpu") ==
553             Callee->getFnAttribute("target-cpu")) &&
554            (Caller->getFnAttribute("target-features") ==
555             Callee->getFnAttribute("target-features"));
556   }
557 
areFunctionArgsABICompatible(const Function * Caller,const Function * Callee,SmallPtrSetImpl<Argument * > & Args)558   bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
559                                     SmallPtrSetImpl<Argument *> &Args) const {
560     return (Caller->getFnAttribute("target-cpu") ==
561             Callee->getFnAttribute("target-cpu")) &&
562            (Caller->getFnAttribute("target-features") ==
563             Callee->getFnAttribute("target-features"));
564   }
565 
isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)566   bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
567                           const DataLayout &DL) const {
568     return false;
569   }
570 
isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)571   bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
572                            const DataLayout &DL) const {
573     return false;
574   }
575 
getLoadStoreVecRegBitWidth(unsigned AddrSpace)576   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
577 
isLegalToVectorizeLoad(LoadInst * LI)578   bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
579 
isLegalToVectorizeStore(StoreInst * SI)580   bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
581 
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)582   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
583                                    unsigned Alignment,
584                                    unsigned AddrSpace) const {
585     return true;
586   }
587 
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)588   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
589                                     unsigned Alignment,
590                                     unsigned AddrSpace) const {
591     return true;
592   }
593 
getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)594   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
595                                unsigned ChainSizeInBytes,
596                                VectorType *VecTy) const {
597     return VF;
598   }
599 
getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)600   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
601                                 unsigned ChainSizeInBytes,
602                                 VectorType *VecTy) const {
603     return VF;
604   }
605 
useReductionIntrinsic(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)606   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
607                              TTI::ReductionFlags Flags) const {
608     return false;
609   }
610 
shouldExpandReduction(const IntrinsicInst * II)611   bool shouldExpandReduction(const IntrinsicInst *II) const {
612     return true;
613   }
614 
getGISelRematGlobalCost()615   unsigned getGISelRematGlobalCost() const {
616     return 1;
617   }
618 
619 protected:
620   // Obtain the minimum required size to hold the value (without the sign)
621   // In case of a vector it returns the min required size for one element.
minRequiredElementSize(const Value * Val,bool & isSigned)622   unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
623     if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
624       const auto* VectorValue = cast<Constant>(Val);
625 
626       // In case of a vector need to pick the max between the min
627       // required size for each element
628       auto *VT = cast<VectorType>(Val->getType());
629 
630       // Assume unsigned elements
631       isSigned = false;
632 
633       // The max required size is the total vector width divided by num
634       // of elements in the vector
635       unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
636 
637       unsigned MinRequiredSize = 0;
638       for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
639         if (auto* IntElement =
640               dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
641           bool signedElement = IntElement->getValue().isNegative();
642           // Get the element min required size.
643           unsigned ElementMinRequiredSize =
644             IntElement->getValue().getMinSignedBits() - 1;
645           // In case one element is signed then all the vector is signed.
646           isSigned |= signedElement;
647           // Save the max required bit size between all the elements.
648           MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
649         }
650         else {
651           // not an int constant element
652           return MaxRequiredSize;
653         }
654       }
655       return MinRequiredSize;
656     }
657 
658     if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
659       isSigned = CI->getValue().isNegative();
660       return CI->getValue().getMinSignedBits() - 1;
661     }
662 
663     if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
664       isSigned = true;
665       return Cast->getSrcTy()->getScalarSizeInBits() - 1;
666     }
667 
668     if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
669       isSigned = false;
670       return Cast->getSrcTy()->getScalarSizeInBits();
671     }
672 
673     isSigned = false;
674     return Val->getType()->getScalarSizeInBits();
675   }
676 
isStridedAccess(const SCEV * Ptr)677   bool isStridedAccess(const SCEV *Ptr) {
678     return Ptr && isa<SCEVAddRecExpr>(Ptr);
679   }
680 
getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)681   const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
682                                             const SCEV *Ptr) {
683     if (!isStridedAccess(Ptr))
684       return nullptr;
685     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
686     return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
687   }
688 
isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)689   bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
690                                        int64_t MergeDistance) {
691     const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
692     if (!Step)
693       return false;
694     APInt StrideVal = Step->getAPInt();
695     if (StrideVal.getBitWidth() > 64)
696       return false;
697     // FIXME: Need to take absolute value for negative stride case.
698     return StrideVal.getSExtValue() < MergeDistance;
699   }
700 };
701 
702 /// CRTP base class for use as a mix-in that aids implementing
703 /// a TargetTransformInfo-compatible class.
704 template <typename T>
705 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
706 private:
707   typedef TargetTransformInfoImplBase BaseT;
708 
709 protected:
TargetTransformInfoImplCRTPBase(const DataLayout & DL)710   explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
711 
712 public:
713   using BaseT::getCallCost;
714 
getCallCost(const Function * F,int NumArgs,const User * U)715   unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
716     assert(F && "A concrete function must be provided to this routine.");
717 
718     if (NumArgs < 0)
719       // Set the argument number to the number of explicit arguments in the
720       // function.
721       NumArgs = F->arg_size();
722 
723     if (Intrinsic::ID IID = F->getIntrinsicID()) {
724       FunctionType *FTy = F->getFunctionType();
725       SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
726       return static_cast<T *>(this)
727           ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
728     }
729 
730     if (!static_cast<T *>(this)->isLoweredToCall(F))
731       return TTI::TCC_Basic; // Give a basic cost if it will be lowered
732                              // directly.
733 
734     return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
735   }
736 
getCallCost(const Function * F,ArrayRef<const Value * > Arguments,const User * U)737   unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
738                        const User *U) {
739     // Simply delegate to generic handling of the call.
740     // FIXME: We should use instsimplify or something else to catch calls which
741     // will constant fold with these arguments.
742     return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
743   }
744 
745   using BaseT::getGEPCost;
746 
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)747   int getGEPCost(Type *PointeeType, const Value *Ptr,
748                  ArrayRef<const Value *> Operands) {
749     assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
750     // TODO: will remove this when pointers have an opaque type.
751     assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
752                PointeeType &&
753            "explicit pointee type doesn't match operand's pointee type");
754     auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
755     bool HasBaseReg = (BaseGV == nullptr);
756 
757     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
758     APInt BaseOffset(PtrSizeBits, 0);
759     int64_t Scale = 0;
760 
761     auto GTI = gep_type_begin(PointeeType, Operands);
762     Type *TargetType = nullptr;
763 
764     // Handle the case where the GEP instruction has a single operand,
765     // the basis, therefore TargetType is a nullptr.
766     if (Operands.empty())
767       return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
768 
769     for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
770       TargetType = GTI.getIndexedType();
771       // We assume that the cost of Scalar GEP with constant index and the
772       // cost of Vector GEP with splat constant index are the same.
773       const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
774       if (!ConstIdx)
775         if (auto Splat = getSplatValue(*I))
776           ConstIdx = dyn_cast<ConstantInt>(Splat);
777       if (StructType *STy = GTI.getStructTypeOrNull()) {
778         // For structures the index is always splat or scalar constant
779         assert(ConstIdx && "Unexpected GEP index");
780         uint64_t Field = ConstIdx->getZExtValue();
781         BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
782       } else {
783         int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
784         if (ConstIdx) {
785           BaseOffset +=
786               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
787         } else {
788           // Needs scale register.
789           if (Scale != 0)
790             // No addressing mode takes two scale registers.
791             return TTI::TCC_Basic;
792           Scale = ElementSize;
793         }
794       }
795     }
796 
797     if (static_cast<T *>(this)->isLegalAddressingMode(
798             TargetType, const_cast<GlobalValue *>(BaseGV),
799             BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
800             Ptr->getType()->getPointerAddressSpace()))
801       return TTI::TCC_Free;
802     return TTI::TCC_Basic;
803   }
804 
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > ParamTys,const User * U)805   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
806                             ArrayRef<Type *> ParamTys, const User *U) {
807     switch (IID) {
808     default:
809       // Intrinsics rarely (if ever) have normal argument setup constraints.
810       // Model them as having a basic instruction cost.
811       return TTI::TCC_Basic;
812 
813     // TODO: other libc intrinsics.
814     case Intrinsic::memcpy:
815       return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
816 
817     case Intrinsic::annotation:
818     case Intrinsic::assume:
819     case Intrinsic::sideeffect:
820     case Intrinsic::dbg_declare:
821     case Intrinsic::dbg_value:
822     case Intrinsic::dbg_label:
823     case Intrinsic::invariant_start:
824     case Intrinsic::invariant_end:
825     case Intrinsic::launder_invariant_group:
826     case Intrinsic::strip_invariant_group:
827     case Intrinsic::is_constant:
828     case Intrinsic::lifetime_start:
829     case Intrinsic::lifetime_end:
830     case Intrinsic::objectsize:
831     case Intrinsic::ptr_annotation:
832     case Intrinsic::var_annotation:
833     case Intrinsic::experimental_gc_result:
834     case Intrinsic::experimental_gc_relocate:
835     case Intrinsic::coro_alloc:
836     case Intrinsic::coro_begin:
837     case Intrinsic::coro_free:
838     case Intrinsic::coro_end:
839     case Intrinsic::coro_frame:
840     case Intrinsic::coro_size:
841     case Intrinsic::coro_suspend:
842     case Intrinsic::coro_param:
843     case Intrinsic::coro_subfn_addr:
844       // These intrinsics don't actually represent code after lowering.
845       return TTI::TCC_Free;
846     }
847   }
848 
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<const Value * > Arguments,const User * U)849   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
850                             ArrayRef<const Value *> Arguments, const User *U) {
851     // Delegate to the generic intrinsic handling code. This mostly provides an
852     // opportunity for targets to (for example) special case the cost of
853     // certain intrinsics based on constants used as arguments.
854     SmallVector<Type *, 8> ParamTys;
855     ParamTys.reserve(Arguments.size());
856     for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
857       ParamTys.push_back(Arguments[Idx]->getType());
858     return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
859   }
860 
getUserCost(const User * U,ArrayRef<const Value * > Operands)861   unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
862     if (isa<PHINode>(U))
863       return TTI::TCC_Free; // Model all PHI nodes as free.
864 
865     if (isa<ExtractValueInst>(U))
866       return TTI::TCC_Free; // Model all ExtractValue nodes as free.
867 
868     // Static alloca doesn't generate target instructions.
869     if (auto *A = dyn_cast<AllocaInst>(U))
870       if (A->isStaticAlloca())
871         return TTI::TCC_Free;
872 
873     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
874       return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
875                                                 GEP->getPointerOperand(),
876                                                 Operands.drop_front());
877     }
878 
879     if (auto CS = ImmutableCallSite(U)) {
880       const Function *F = CS.getCalledFunction();
881       if (!F) {
882         // Just use the called value type.
883         Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
884         return static_cast<T *>(this)
885             ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
886       }
887 
888       SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
889       return static_cast<T *>(this)->getCallCost(F, Arguments, U);
890     }
891 
892     if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
893       // The old behaviour of generally treating extensions of icmp to be free
894       // has been removed. A target that needs it should override getUserCost().
895       return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
896                                                 Operands.back());
897 
898     return static_cast<T *>(this)->getOperationCost(
899         Operator::getOpcode(U), U->getType(),
900         U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
901   }
902 
getInstructionLatency(const Instruction * I)903   int getInstructionLatency(const Instruction *I) {
904     SmallVector<const Value *, 4> Operands(I->value_op_begin(),
905                                            I->value_op_end());
906     if (getUserCost(I, Operands) == TTI::TCC_Free)
907       return 0;
908 
909     if (isa<LoadInst>(I))
910       return 4;
911 
912     Type *DstTy = I->getType();
913 
914     // Usually an intrinsic is a simple instruction.
915     // A real function call is much slower.
916     if (auto *CI = dyn_cast<CallInst>(I)) {
917       const Function *F = CI->getCalledFunction();
918       if (!F || static_cast<T *>(this)->isLoweredToCall(F))
919         return 40;
920       // Some intrinsics return a value and a flag, we use the value type
921       // to decide its latency.
922       if (StructType* StructTy = dyn_cast<StructType>(DstTy))
923         DstTy = StructTy->getElementType(0);
924       // Fall through to simple instructions.
925     }
926 
927     if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
928       DstTy = VectorTy->getElementType();
929     if (DstTy->isFloatingPointTy())
930       return 3;
931 
932     return 1;
933   }
934 };
935 }
936 
937 #endif
938