• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 ///    information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/AtomicOrdering.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/ScalarEvolution.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/Analysis/AssumptionCache.h"
34 #include <functional>
35 
36 namespace llvm {
37 
38 namespace Intrinsic {
39 typedef unsigned ID;
40 }
41 
42 class AssumptionCache;
43 class BlockFrequencyInfo;
44 class BranchInst;
45 class Function;
46 class GlobalValue;
47 class IntrinsicInst;
48 class LoadInst;
49 class LoopAccessInfo;
50 class Loop;
51 class ProfileSummaryInfo;
52 class SCEV;
53 class ScalarEvolution;
54 class StoreInst;
55 class SwitchInst;
56 class TargetLibraryInfo;
57 class Type;
58 class User;
59 class Value;
60 
61 /// Information about a load/store intrinsic defined by the target.
62 struct MemIntrinsicInfo {
63   /// This is the pointer that the intrinsic is loading from or storing to.
64   /// If this is non-null, then analysis/optimization passes can assume that
65   /// this intrinsic is functionally equivalent to a load/store from this
66   /// pointer.
67   Value *PtrVal = nullptr;
68 
69   // Ordering for atomic operations.
70   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
71 
72   // Same Id is set by the target for corresponding load/store intrinsics.
73   unsigned short MatchingId = 0;
74 
75   bool ReadMem = false;
76   bool WriteMem = false;
77   bool IsVolatile = false;
78 
isUnorderedMemIntrinsicInfo79   bool isUnordered() const {
80     return (Ordering == AtomicOrdering::NotAtomic ||
81             Ordering == AtomicOrdering::Unordered) && !IsVolatile;
82   }
83 };
84 
85 /// Attributes of a target dependent hardware loop.
86 struct HardwareLoopInfo {
87   HardwareLoopInfo() = delete;
HardwareLoopInfoHardwareLoopInfo88   HardwareLoopInfo(Loop *L) : L(L) {}
89   Loop *L = nullptr;
90   BasicBlock *ExitBlock = nullptr;
91   BranchInst *ExitBranch = nullptr;
92   const SCEV *ExitCount = nullptr;
93   IntegerType *CountType = nullptr;
94   Value *LoopDecrement = nullptr; // Decrement the loop counter by this
95                                   // value in every iteration.
96   bool IsNestingLegal = false;    // Can a hardware loop be a parent to
97                                   // another hardware loop?
98   bool CounterInReg = false;      // Should loop counter be updated in
99                                   // the loop via a phi?
100   bool PerformEntryTest = false;  // Generate the intrinsic which also performs
101                                   // icmp ne zero on the loop counter value and
102                                   // produces an i1 to guard the loop entry.
103   bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
104                                DominatorTree &DT, bool ForceNestedLoop = false,
105                                bool ForceHardwareLoopPHI = false);
106   bool canAnalyze(LoopInfo &LI);
107 };
108 
109 /// This pass provides access to the codegen interfaces that are needed
110 /// for IR-level transformations.
111 class TargetTransformInfo {
112 public:
113   /// Construct a TTI object using a type implementing the \c Concept
114   /// API below.
115   ///
116   /// This is used by targets to construct a TTI wrapping their target-specific
117   /// implementation that encodes appropriate costs for their target.
118   template <typename T> TargetTransformInfo(T Impl);
119 
120   /// Construct a baseline TTI object using a minimal implementation of
121   /// the \c Concept API below.
122   ///
123   /// The TTI implementation will reflect the information in the DataLayout
124   /// provided if non-null.
125   explicit TargetTransformInfo(const DataLayout &DL);
126 
127   // Provide move semantics.
128   TargetTransformInfo(TargetTransformInfo &&Arg);
129   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
130 
131   // We need to define the destructor out-of-line to define our sub-classes
132   // out-of-line.
133   ~TargetTransformInfo();
134 
135   /// Handle the invalidation of this information.
136   ///
137   /// When used as a result of \c TargetIRAnalysis this method will be called
138   /// when the function this was computed for changes. When it returns false,
139   /// the information is preserved across those changes.
invalidate(Function &,const PreservedAnalyses &,FunctionAnalysisManager::Invalidator &)140   bool invalidate(Function &, const PreservedAnalyses &,
141                   FunctionAnalysisManager::Invalidator &) {
142     // FIXME: We should probably in some way ensure that the subtarget
143     // information for a function hasn't changed.
144     return false;
145   }
146 
147   /// \name Generic Target Information
148   /// @{
149 
150   /// The kind of cost model.
151   ///
152   /// There are several different cost models that can be customized by the
153   /// target. The normalization of each cost model may be target specific.
154   enum TargetCostKind {
155     TCK_RecipThroughput, ///< Reciprocal throughput.
156     TCK_Latency,         ///< The latency of instruction.
157     TCK_CodeSize         ///< Instruction code size.
158   };
159 
160   /// Query the cost of a specified instruction.
161   ///
162   /// Clients should use this interface to query the cost of an existing
163   /// instruction. The instruction must have a valid parent (basic block).
164   ///
165   /// Note, this method does not cache the cost calculation and it
166   /// can be expensive in some cases.
getInstructionCost(const Instruction * I,enum TargetCostKind kind)167   int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
168     switch (kind){
169     case TCK_RecipThroughput:
170       return getInstructionThroughput(I);
171 
172     case TCK_Latency:
173       return getInstructionLatency(I);
174 
175     case TCK_CodeSize:
176       return getUserCost(I);
177     }
178     llvm_unreachable("Unknown instruction cost kind");
179   }
180 
181   /// Underlying constants for 'cost' values in this interface.
182   ///
183   /// Many APIs in this interface return a cost. This enum defines the
184   /// fundamental values that should be used to interpret (and produce) those
185   /// costs. The costs are returned as an int rather than a member of this
186   /// enumeration because it is expected that the cost of one IR instruction
187   /// may have a multiplicative factor to it or otherwise won't fit directly
188   /// into the enum. Moreover, it is common to sum or average costs which works
189   /// better as simple integral values. Thus this enum only provides constants.
190   /// Also note that the returned costs are signed integers to make it natural
191   /// to add, subtract, and test with zero (a common boundary condition). It is
192   /// not expected that 2^32 is a realistic cost to be modeling at any point.
193   ///
194   /// Note that these costs should usually reflect the intersection of code-size
195   /// cost and execution cost. A free instruction is typically one that folds
196   /// into another instruction. For example, reg-to-reg moves can often be
197   /// skipped by renaming the registers in the CPU, but they still are encoded
198   /// and thus wouldn't be considered 'free' here.
199   enum TargetCostConstants {
200     TCC_Free = 0,     ///< Expected to fold away in lowering.
201     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
202     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
203   };
204 
205   /// Estimate the cost of a specific operation when lowered.
206   ///
207   /// Note that this is designed to work on an arbitrary synthetic opcode, and
208   /// thus work for hypothetical queries before an instruction has even been
209   /// formed. However, this does *not* work for GEPs, and must not be called
210   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
211   /// analyzing a GEP's cost required more information.
212   ///
213   /// Typically only the result type is required, and the operand type can be
214   /// omitted. However, if the opcode is one of the cast instructions, the
215   /// operand type is required.
216   ///
217   /// The returned cost is defined in terms of \c TargetCostConstants, see its
218   /// comments for a detailed explanation of the cost values.
219   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
220 
221   /// Estimate the cost of a GEP operation when lowered.
222   ///
223   /// The contract for this function is the same as \c getOperationCost except
224   /// that it supports an interface that provides extra information specific to
225   /// the GEP operation.
226   int getGEPCost(Type *PointeeType, const Value *Ptr,
227                  ArrayRef<const Value *> Operands) const;
228 
229   /// Estimate the cost of a EXT operation when lowered.
230   ///
231   /// The contract for this function is the same as \c getOperationCost except
232   /// that it supports an interface that provides extra information specific to
233   /// the EXT operation.
234   int getExtCost(const Instruction *I, const Value *Src) const;
235 
236   /// Estimate the cost of a function call when lowered.
237   ///
238   /// The contract for this is the same as \c getOperationCost except that it
239   /// supports an interface that provides extra information specific to call
240   /// instructions.
241   ///
242   /// This is the most basic query for estimating call cost: it only knows the
243   /// function type and (potentially) the number of arguments at the call site.
244   /// The latter is only interesting for varargs function types.
245   int getCallCost(FunctionType *FTy, int NumArgs = -1,
246                   const User *U = nullptr) const;
247 
248   /// Estimate the cost of calling a specific function when lowered.
249   ///
250   /// This overload adds the ability to reason about the particular function
251   /// being called in the event it is a library call with special lowering.
252   int getCallCost(const Function *F, int NumArgs = -1,
253                   const User *U = nullptr) const;
254 
255   /// Estimate the cost of calling a specific function when lowered.
256   ///
257   /// This overload allows specifying a set of candidate argument values.
258   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
259                   const User *U = nullptr) const;
260 
261   /// \returns A value by which our inlining threshold should be multiplied.
262   /// This is primarily used to bump up the inlining threshold wholesale on
263   /// targets where calls are unusually expensive.
264   ///
265   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
266   /// individual classes of instructions would be better.
267   unsigned getInliningThresholdMultiplier() const;
268 
269   /// \returns Vector bonus in percent.
270   ///
271   /// Vector bonuses: We want to more aggressively inline vector-dense kernels
272   /// and apply this bonus based on the percentage of vector instructions. A
273   /// bonus is applied if the vector instructions exceed 50% and half that amount
274   /// is applied if it exceeds 10%. Note that these bonuses are some what
275   /// arbitrary and evolved over time by accident as much as because they are
276   /// principled bonuses.
277   /// FIXME: It would be nice to base the bonus values on something more
278   /// scientific. A target may has no bonus on vector instructions.
279   int getInlinerVectorBonusPercent() const;
280 
281   /// Estimate the cost of an intrinsic when lowered.
282   ///
283   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
284   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
285                        ArrayRef<Type *> ParamTys,
286                        const User *U = nullptr) const;
287 
288   /// Estimate the cost of an intrinsic when lowered.
289   ///
290   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
291   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
292                        ArrayRef<const Value *> Arguments,
293                        const User *U = nullptr) const;
294 
295   /// \return the expected cost of a memcpy, which could e.g. depend on the
296   /// source/destination type and alignment and the number of bytes copied.
297   int getMemcpyCost(const Instruction *I) const;
298 
299   /// \return The estimated number of case clusters when lowering \p 'SI'.
300   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
301   /// table.
302   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
303                                             unsigned &JTSize,
304                                             ProfileSummaryInfo *PSI,
305                                             BlockFrequencyInfo *BFI) const;
306 
307   /// Estimate the cost of a given IR user when lowered.
308   ///
309   /// This can estimate the cost of either a ConstantExpr or Instruction when
310   /// lowered. It has two primary advantages over the \c getOperationCost and
311   /// \c getGEPCost above, and one significant disadvantage: it can only be
312   /// used when the IR construct has already been formed.
313   ///
314   /// The advantages are that it can inspect the SSA use graph to reason more
315   /// accurately about the cost. For example, all-constant-GEPs can often be
316   /// folded into a load or other instruction, but if they are used in some
317   /// other context they may not be folded. This routine can distinguish such
318   /// cases.
319   ///
320   /// \p Operands is a list of operands which can be a result of transformations
321   /// of the current operands. The number of the operands on the list must equal
322   /// to the number of the current operands the IR user has. Their order on the
323   /// list must be the same as the order of the current operands the IR user
324   /// has.
325   ///
326   /// The returned cost is defined in terms of \c TargetCostConstants, see its
327   /// comments for a detailed explanation of the cost values.
328   int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
329 
330   /// This is a helper function which calls the two-argument getUserCost
331   /// with \p Operands which are the current operands U has.
getUserCost(const User * U)332   int getUserCost(const User *U) const {
333     SmallVector<const Value *, 4> Operands(U->value_op_begin(),
334                                            U->value_op_end());
335     return getUserCost(U, Operands);
336   }
337 
338   /// Return true if branch divergence exists.
339   ///
340   /// Branch divergence has a significantly negative impact on GPU performance
341   /// when threads in the same wavefront take different paths due to conditional
342   /// branches.
343   bool hasBranchDivergence() const;
344 
345   /// Returns whether V is a source of divergence.
346   ///
347   /// This function provides the target-dependent information for
348   /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
349   /// builds the dependency graph, and then runs the reachability algorithm
350   /// starting with the sources of divergence.
351   bool isSourceOfDivergence(const Value *V) const;
352 
353   // Returns true for the target specific
354   // set of operations which produce uniform result
355   // even taking non-uniform arguments
356   bool isAlwaysUniform(const Value *V) const;
357 
358   /// Returns the address space ID for a target's 'flat' address space. Note
359   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
360   /// refers to as the generic address space. The flat address space is a
361   /// generic address space that can be used access multiple segments of memory
362   /// with different address spaces. Access of a memory location through a
363   /// pointer with this address space is expected to be legal but slower
364   /// compared to the same memory location accessed through a pointer with a
365   /// different address space.
366   //
367   /// This is for targets with different pointer representations which can
368   /// be converted with the addrspacecast instruction. If a pointer is converted
369   /// to this address space, optimizations should attempt to replace the access
370   /// with the source address space.
371   ///
372   /// \returns ~0u if the target does not have such a flat address space to
373   /// optimize away.
374   unsigned getFlatAddressSpace() const;
375 
376   /// Return any intrinsic address operand indexes which may be rewritten if
377   /// they use a flat address space pointer.
378   ///
379   /// \returns true if the intrinsic was handled.
380   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
381                                   Intrinsic::ID IID) const;
382 
383   /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
384   /// NewV, which has a different address space. This should happen for every
385   /// operand index that collectFlatAddressOperands returned for the intrinsic.
386   /// \returns true if the intrinsic /// was handled.
387   bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
388                                         Value *OldV, Value *NewV) const;
389 
390   /// Test whether calls to a function lower to actual program function
391   /// calls.
392   ///
393   /// The idea is to test whether the program is likely to require a 'call'
394   /// instruction or equivalent in order to call the given function.
395   ///
396   /// FIXME: It's not clear that this is a good or useful query API. Client's
397   /// should probably move to simpler cost metrics using the above.
398   /// Alternatively, we could split the cost interface into distinct code-size
399   /// and execution-speed costs. This would allow modelling the core of this
400   /// query more accurately as a call is a single small instruction, but
401   /// incurs significant execution cost.
402   bool isLoweredToCall(const Function *F) const;
403 
404   struct LSRCost {
405     /// TODO: Some of these could be merged. Also, a lexical ordering
406     /// isn't always optimal.
407     unsigned Insns;
408     unsigned NumRegs;
409     unsigned AddRecCost;
410     unsigned NumIVMuls;
411     unsigned NumBaseAdds;
412     unsigned ImmCost;
413     unsigned SetupCost;
414     unsigned ScaleCost;
415   };
416 
417   /// Parameters that control the generic loop unrolling transformation.
418   struct UnrollingPreferences {
419     /// The cost threshold for the unrolled loop. Should be relative to the
420     /// getUserCost values returned by this API, and the expectation is that
421     /// the unrolled loop's instructions when run through that interface should
422     /// not exceed this cost. However, this is only an estimate. Also, specific
423     /// loops may be unrolled even with a cost above this threshold if deemed
424     /// profitable. Set this to UINT_MAX to disable the loop body cost
425     /// restriction.
426     unsigned Threshold;
427     /// If complete unrolling will reduce the cost of the loop, we will boost
428     /// the Threshold by a certain percent to allow more aggressive complete
429     /// unrolling. This value provides the maximum boost percentage that we
430     /// can apply to Threshold (The value should be no less than 100).
431     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
432     ///                                    MaxPercentThresholdBoost / 100)
433     /// E.g. if complete unrolling reduces the loop execution time by 50%
434     /// then we boost the threshold by the factor of 2x. If unrolling is not
435     /// expected to reduce the running time, then we do not increase the
436     /// threshold.
437     unsigned MaxPercentThresholdBoost;
438     /// The cost threshold for the unrolled loop when optimizing for size (set
439     /// to UINT_MAX to disable).
440     unsigned OptSizeThreshold;
441     /// The cost threshold for the unrolled loop, like Threshold, but used
442     /// for partial/runtime unrolling (set to UINT_MAX to disable).
443     unsigned PartialThreshold;
444     /// The cost threshold for the unrolled loop when optimizing for size, like
445     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
446     /// UINT_MAX to disable).
447     unsigned PartialOptSizeThreshold;
448     /// A forced unrolling factor (the number of concatenated bodies of the
449     /// original loop in the unrolled loop body). When set to 0, the unrolling
450     /// transformation will select an unrolling factor based on the current cost
451     /// threshold and other factors.
452     unsigned Count;
453     /// A forced peeling factor (the number of bodied of the original loop
454     /// that should be peeled off before the loop body). When set to 0, the
455     /// unrolling transformation will select a peeling factor based on profile
456     /// information and other factors.
457     unsigned PeelCount;
458     /// Default unroll count for loops with run-time trip count.
459     unsigned DefaultUnrollRuntimeCount;
460     // Set the maximum unrolling factor. The unrolling factor may be selected
461     // using the appropriate cost threshold, but may not exceed this number
462     // (set to UINT_MAX to disable). This does not apply in cases where the
463     // loop is being fully unrolled.
464     unsigned MaxCount;
465     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
466     /// applies even if full unrolling is selected. This allows a target to fall
467     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
468     unsigned FullUnrollMaxCount;
469     // Represents number of instructions optimized when "back edge"
470     // becomes "fall through" in unrolled loop.
471     // For now we count a conditional branch on a backedge and a comparison
472     // feeding it.
473     unsigned BEInsns;
474     /// Allow partial unrolling (unrolling of loops to expand the size of the
475     /// loop body, not only to eliminate small constant-trip-count loops).
476     bool Partial;
477     /// Allow runtime unrolling (unrolling of loops to expand the size of the
478     /// loop body even when the number of loop iterations is not known at
479     /// compile time).
480     bool Runtime;
481     /// Allow generation of a loop remainder (extra iterations after unroll).
482     bool AllowRemainder;
483     /// Allow emitting expensive instructions (such as divisions) when computing
484     /// the trip count of a loop for runtime unrolling.
485     bool AllowExpensiveTripCount;
486     /// Apply loop unroll on any kind of loop
487     /// (mainly to loops that fail runtime unrolling).
488     bool Force;
489     /// Allow using trip count upper bound to unroll loops.
490     bool UpperBound;
491     /// Allow peeling off loop iterations.
492     bool AllowPeeling;
493     /// Allow unrolling of all the iterations of the runtime loop remainder.
494     bool UnrollRemainder;
495     /// Allow unroll and jam. Used to enable unroll and jam for the target.
496     bool UnrollAndJam;
497     /// Allow peeling basing on profile. Uses to enable peeling off all
498     /// iterations basing on provided profile.
499     /// If the value is true the peeling cost model can decide to peel only
500     /// some iterations and in this case it will set this to false.
501     bool PeelProfiledIterations;
502     /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
503     /// value above is used during unroll and jam for the outer loop size.
504     /// This value is used in the same manner to limit the size of the inner
505     /// loop.
506     unsigned UnrollAndJamInnerLoopThreshold;
507   };
508 
509   /// Get target-customized preferences for the generic loop unrolling
510   /// transformation. The caller will initialize UP with the current
511   /// target-independent defaults.
512   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
513                                UnrollingPreferences &UP) const;
514 
515   /// Query the target whether it would be profitable to convert the given loop
516   /// into a hardware loop.
517   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
518                                 AssumptionCache &AC,
519                                 TargetLibraryInfo *LibInfo,
520                                 HardwareLoopInfo &HWLoopInfo) const;
521 
522   /// Query the target whether it would be prefered to create a predicated vector
523   /// loop, which can avoid the need to emit a scalar epilogue loop.
524   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
525                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
526                                    DominatorTree *DT,
527                                    const LoopAccessInfo *LAI) const;
528 
529   /// @}
530 
531   /// \name Scalar Target Information
532   /// @{
533 
534   /// Flags indicating the kind of support for population count.
535   ///
536   /// Compared to the SW implementation, HW support is supposed to
537   /// significantly boost the performance when the population is dense, and it
538   /// may or may not degrade performance if the population is sparse. A HW
539   /// support is considered as "Fast" if it can outperform, or is on a par
540   /// with, SW implementation when the population is sparse; otherwise, it is
541   /// considered as "Slow".
542   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
543 
544   /// Return true if the specified immediate is legal add immediate, that
545   /// is the target has add instructions which can add a register with the
546   /// immediate without having to materialize the immediate into a register.
547   bool isLegalAddImmediate(int64_t Imm) const;
548 
549   /// Return true if the specified immediate is legal icmp immediate,
550   /// that is the target has icmp instructions which can compare a register
551   /// against the immediate without having to materialize the immediate into a
552   /// register.
553   bool isLegalICmpImmediate(int64_t Imm) const;
554 
555   /// Return true if the addressing mode represented by AM is legal for
556   /// this target, for a load/store of the specified type.
557   /// The type may be VoidTy, in which case only return true if the addressing
558   /// mode is legal for a load/store of any legal type.
559   /// If target returns true in LSRWithInstrQueries(), I may be valid.
560   /// TODO: Handle pre/postinc as well.
561   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
562                              bool HasBaseReg, int64_t Scale,
563                              unsigned AddrSpace = 0,
564                              Instruction *I = nullptr) const;
565 
566   /// Return true if LSR cost of C1 is lower than C1.
567   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
568                      TargetTransformInfo::LSRCost &C2) const;
569 
570   /// Return true if the target can fuse a compare and branch.
571   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
572   /// calculation for the instructions in a loop.
573   bool canMacroFuseCmp() const;
574 
575   /// Return true if the target can save a compare for loop count, for example
576   /// hardware loop saves a compare.
577   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
578                   DominatorTree *DT, AssumptionCache *AC,
579                   TargetLibraryInfo *LibInfo) const;
580 
581   /// \return True is LSR should make efforts to create/preserve post-inc
582   /// addressing mode expressions.
583   bool shouldFavorPostInc() const;
584 
585   /// Return true if LSR should make efforts to generate indexed addressing
586   /// modes that operate across loop iterations.
587   bool shouldFavorBackedgeIndex(const Loop *L) const;
588 
589   /// Return true if the target supports masked store.
590   bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const;
591   /// Return true if the target supports masked load.
592   bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const;
593 
594   /// Return true if the target supports nontemporal store.
595   bool isLegalNTStore(Type *DataType, Align Alignment) const;
596   /// Return true if the target supports nontemporal load.
597   bool isLegalNTLoad(Type *DataType, Align Alignment) const;
598 
599   /// Return true if the target supports masked scatter.
600   bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) const;
601   /// Return true if the target supports masked gather.
602   bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) const;
603 
604   /// Return true if the target supports masked compress store.
605   bool isLegalMaskedCompressStore(Type *DataType) const;
606   /// Return true if the target supports masked expand load.
607   bool isLegalMaskedExpandLoad(Type *DataType) const;
608 
609   /// Return true if the target has a unified operation to calculate division
610   /// and remainder. If so, the additional implicit multiplication and
611   /// subtraction required to calculate a remainder from division are free. This
612   /// can enable more aggressive transformations for division and remainder than
613   /// would typically be allowed using throughput or size cost models.
614   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
615 
616   /// Return true if the given instruction (assumed to be a memory access
617   /// instruction) has a volatile variant. If that's the case then we can avoid
618   /// addrspacecast to generic AS for volatile loads/stores. Default
619   /// implementation returns false, which prevents address space inference for
620   /// volatile loads/stores.
621   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
622 
623   /// Return true if target doesn't mind addresses in vectors.
624   bool prefersVectorizedAddressing() const;
625 
626   /// Return the cost of the scaling factor used in the addressing
627   /// mode represented by AM for this target, for a load/store
628   /// of the specified type.
629   /// If the AM is supported, the return value must be >= 0.
630   /// If the AM is not supported, it returns a negative value.
631   /// TODO: Handle pre/postinc as well.
632   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
633                            bool HasBaseReg, int64_t Scale,
634                            unsigned AddrSpace = 0) const;
635 
636   /// Return true if the loop strength reduce pass should make
637   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
638   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
639   /// immediate offset and no index register.
640   bool LSRWithInstrQueries() const;
641 
642   /// Return true if it's free to truncate a value of type Ty1 to type
643   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
644   /// by referencing its sub-register AX.
645   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
646 
647   /// Return true if it is profitable to hoist instruction in the
648   /// then/else to before if.
649   bool isProfitableToHoist(Instruction *I) const;
650 
651   bool useAA() const;
652 
653   /// Return true if this type is legal.
654   bool isTypeLegal(Type *Ty) const;
655 
656   /// Return true if switches should be turned into lookup tables for the
657   /// target.
658   bool shouldBuildLookupTables() const;
659 
660   /// Return true if switches should be turned into lookup tables
661   /// containing this constant value for the target.
662   bool shouldBuildLookupTablesForConstant(Constant *C) const;
663 
664   /// Return true if the input function which is cold at all call sites,
665   ///  should use coldcc calling convention.
666   bool useColdCCForColdCall(Function &F) const;
667 
668   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
669 
670   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
671                                             unsigned VF) const;
672 
673   /// If target has efficient vector element load/store instructions, it can
674   /// return true here so that insertion/extraction costs are not added to
675   /// the scalarization cost of a load/store.
676   bool supportsEfficientVectorElementLoadStore() const;
677 
678   /// Don't restrict interleaved unrolling to small loops.
679   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
680 
681   /// Returns options for expansion of memcmp. IsZeroCmp is
682   // true if this is the expansion of memcmp(p1, p2, s) == 0.
683   struct MemCmpExpansionOptions {
684     // Return true if memcmp expansion is enabled.
685     operator bool() const { return MaxNumLoads > 0; }
686 
687     // Maximum number of load operations.
688     unsigned MaxNumLoads = 0;
689 
690     // The list of available load sizes (in bytes), sorted in decreasing order.
691     SmallVector<unsigned, 8> LoadSizes;
692 
693     // For memcmp expansion when the memcmp result is only compared equal or
694     // not-equal to 0, allow up to this number of load pairs per block. As an
695     // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
696     //   a0 = load2bytes &a[0]
697     //   b0 = load2bytes &b[0]
698     //   a2 = load1byte  &a[2]
699     //   b2 = load1byte  &b[2]
700     //   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
701     unsigned NumLoadsPerBlock = 1;
702 
703     // Set to true to allow overlapping loads. For example, 7-byte compares can
704     // be done with two 4-byte compares instead of 4+2+1-byte compares. This
705     // requires all loads in LoadSizes to be doable in an unaligned way.
706     bool AllowOverlappingLoads = false;
707   };
708   MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
709                                                bool IsZeroCmp) const;
710 
711   /// Enable matching of interleaved access groups.
712   bool enableInterleavedAccessVectorization() const;
713 
714   /// Enable matching of interleaved access groups that contain predicated
715   /// accesses or gaps and therefore vectorized using masked
716   /// vector loads/stores.
717   bool enableMaskedInterleavedAccessVectorization() const;
718 
719   /// Indicate that it is potentially unsafe to automatically vectorize
720   /// floating-point operations because the semantics of vector and scalar
721   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
722   /// does not support IEEE-754 denormal numbers, while depending on the
723   /// platform, scalar floating-point math does.
724   /// This applies to floating-point math operations and calls, not memory
725   /// operations, shuffles, or casts.
726   bool isFPVectorizationPotentiallyUnsafe() const;
727 
728   /// Determine if the target supports unaligned memory accesses.
729   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
730                                       unsigned BitWidth, unsigned AddressSpace = 0,
731                                       unsigned Alignment = 1,
732                                       bool *Fast = nullptr) const;
733 
734   /// Return hardware support for population count.
735   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
736 
737   /// Return true if the hardware has a fast square-root instruction.
738   bool haveFastSqrt(Type *Ty) const;
739 
740   /// Return true if it is faster to check if a floating-point value is NaN
741   /// (or not-NaN) versus a comparison against a constant FP zero value.
742   /// Targets should override this if materializing a 0.0 for comparison is
743   /// generally as cheap as checking for ordered/unordered.
744   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
745 
746   /// Return the expected cost of supporting the floating point operation
747   /// of the specified type.
748   int getFPOpCost(Type *Ty) const;
749 
750   /// Return the expected cost of materializing for the given integer
751   /// immediate of the specified type.
752   int getIntImmCost(const APInt &Imm, Type *Ty) const;
753 
754   /// Return the expected cost of materialization for the given integer
755   /// immediate of the specified type for a given instruction. The cost can be
756   /// zero if the immediate can be folded into the specified instruction.
757   int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
758                         Type *Ty) const;
759   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
760                           Type *Ty) const;
761 
762   /// Return the expected cost for the given integer when optimising
763   /// for size. This is different than the other integer immediate cost
764   /// functions in that it is subtarget agnostic. This is useful when you e.g.
765   /// target one ISA such as Aarch32 but smaller encodings could be possible
766   /// with another such as Thumb. This return value is used as a penalty when
767   /// the total costs for a constant is calculated (the bigger the cost, the
768   /// more beneficial constant hoisting is).
769   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
770                             Type *Ty) const;
771   /// @}
772 
773   /// \name Vector Target Information
774   /// @{
775 
776   /// The various kinds of shuffle patterns for vector queries.
777   enum ShuffleKind {
778     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
779     SK_Reverse,         ///< Reverse the order of the vector.
780     SK_Select,          ///< Selects elements from the corresponding lane of
781                         ///< either source operand. This is equivalent to a
782                         ///< vector select with a constant condition operand.
783     SK_Transpose,       ///< Transpose two vectors.
784     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
785     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
786     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
787                         ///< with any shuffle mask.
788     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
789                         ///< shuffle mask.
790   };
791 
792   /// Additional information about an operand's possible values.
793   enum OperandValueKind {
794     OK_AnyValue,               // Operand can have any value.
795     OK_UniformValue,           // Operand is uniform (splat of a value).
796     OK_UniformConstantValue,   // Operand is uniform constant.
797     OK_NonUniformConstantValue // Operand is a non uniform constant value.
798   };
799 
800   /// Additional properties of an operand's values.
801   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
802 
803   /// \return the number of registers in the target-provided register class.
804   unsigned getNumberOfRegisters(unsigned ClassID) const;
805 
806   /// \return the target-provided register class ID for the provided type,
807   /// accounting for type promotion and other type-legalization techniques that the target might apply.
808   /// However, it specifically does not account for the scalarization or splitting of vector types.
809   /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
810   /// that type should be mapped to a register class containing no registers.
811   /// Specifically, this is designed to provide a simple, high-level view of the register allocation
812   /// later performed by the backend. These register classes don't necessarily map onto the
813   /// register classes used by the backend.
814   /// FIXME: It's not currently possible to determine how many registers
815   /// are used by the provided type.
816   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
817 
818   /// \return the target-provided register class name
819   const char* getRegisterClassName(unsigned ClassID) const;
820 
821   /// \return The width of the largest scalar or vector register type.
822   unsigned getRegisterBitWidth(bool Vector) const;
823 
824   /// \return The width of the smallest vector register type.
825   unsigned getMinVectorRegisterBitWidth() const;
826 
827   /// \return True if the vectorization factor should be chosen to
828   /// make the vector of the smallest element type match the size of a
829   /// vector register. For wider element types, this could result in
830   /// creating vectors that span multiple vector registers.
831   /// If false, the vectorization factor will be chosen based on the
832   /// size of the widest element type.
833   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
834 
835   /// \return The minimum vectorization factor for types of given element
836   /// bit width, or 0 if there is no minimum VF. The returned value only
837   /// applies when shouldMaximizeVectorBandwidth returns true.
838   unsigned getMinimumVF(unsigned ElemWidth) const;
839 
840   /// \return True if it should be considered for address type promotion.
841   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
842   /// profitable without finding other extensions fed by the same input.
843   bool shouldConsiderAddressTypePromotion(
844       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
845 
846   /// \return The size of a cache line in bytes.
847   unsigned getCacheLineSize() const;
848 
849   /// The possible cache levels
850   enum class CacheLevel {
851     L1D,   // The L1 data cache
852     L2D,   // The L2 data cache
853 
854     // We currently do not model L3 caches, as their sizes differ widely between
855     // microarchitectures. Also, we currently do not have a use for L3 cache
856     // size modeling yet.
857   };
858 
859   /// \return The size of the cache level in bytes, if available.
860   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
861 
862   /// \return The associativity of the cache level, if available.
863   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
864 
865   /// \return How much before a load we should place the prefetch
866   /// instruction.  This is currently measured in number of
867   /// instructions.
868   unsigned getPrefetchDistance() const;
869 
870   /// \return Some HW prefetchers can handle accesses up to a certain
871   /// constant stride.  This is the minimum stride in bytes where it
872   /// makes sense to start adding SW prefetches.  The default is 1,
873   /// i.e. prefetch with any stride.
874   unsigned getMinPrefetchStride() const;
875 
876   /// \return The maximum number of iterations to prefetch ahead.  If
877   /// the required number of iterations is more than this number, no
878   /// prefetching is performed.
879   unsigned getMaxPrefetchIterationsAhead() const;
880 
881   /// \return The maximum interleave factor that any transform should try to
882   /// perform for this target. This number depends on the level of parallelism
883   /// and the number of execution units in the CPU.
884   unsigned getMaxInterleaveFactor(unsigned VF) const;
885 
886   /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
887   static OperandValueKind getOperandInfo(Value *V,
888                                          OperandValueProperties &OpProps);
889 
890   /// This is an approximation of reciprocal throughput of a math/logic op.
891   /// A higher cost indicates less expected throughput.
892   /// From Agner Fog's guides, reciprocal throughput is "the average number of
893   /// clock cycles per instruction when the instructions are not part of a
894   /// limiting dependency chain."
895   /// Therefore, costs should be scaled to account for multiple execution units
896   /// on the target that can process this type of instruction. For example, if
897   /// there are 5 scalar integer units and 2 vector integer units that can
898   /// calculate an 'add' in a single cycle, this model should indicate that the
899   /// cost of the vector add instruction is 2.5 times the cost of the scalar
900   /// add instruction.
901   /// \p Args is an optional argument which holds the instruction operands
902   /// values so the TTI can analyze those values searching for special
903   /// cases or optimizations based on those values.
904   /// \p CxtI is the optional original context instruction, if one exists, to
905   /// provide even more information.
906   int getArithmeticInstrCost(
907       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
908       OperandValueKind Opd2Info = OK_AnyValue,
909       OperandValueProperties Opd1PropInfo = OP_None,
910       OperandValueProperties Opd2PropInfo = OP_None,
911       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
912       const Instruction *CxtI = nullptr) const;
913 
914   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
915   /// The index and subtype parameters are used by the subvector insertion and
916   /// extraction shuffle kinds to show the insert/extract point and the type of
917   /// the subvector being inserted/extracted.
918   /// NOTE: For subvector extractions Tp represents the source type.
919   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
920                      Type *SubTp = nullptr) const;
921 
922   /// \return The expected cost of cast instructions, such as bitcast, trunc,
923   /// zext, etc. If there is an existing instruction that holds Opcode, it
924   /// may be passed in the 'I' parameter.
925   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
926                        const Instruction *I = nullptr) const;
927 
928   /// \return The expected cost of a sign- or zero-extended vector extract. Use
929   /// -1 to indicate that there is no information about the index value.
930   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
931                                unsigned Index = -1) const;
932 
933   /// \return The expected cost of control-flow related instructions such as
934   /// Phi, Ret, Br.
935   int getCFInstrCost(unsigned Opcode) const;
936 
937   /// \returns The expected cost of compare and select instructions. If there
938   /// is an existing instruction that holds Opcode, it may be passed in the
939   /// 'I' parameter.
940   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
941                  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
942 
943   /// \return The expected cost of vector Insert and Extract.
944   /// Use -1 to indicate that there is no information on the index value.
945   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
946 
947   /// \return The cost of Load and Store instructions.
948   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
949                       unsigned AddressSpace,
950                       const Instruction *I = nullptr) const;
951 
952   /// \return The cost of masked Load and Store instructions.
953   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
954                             unsigned AddressSpace) const;
955 
956   /// \return The cost of Gather or Scatter operation
957   /// \p Opcode - is a type of memory access Load or Store
958   /// \p DataTy - a vector type of the data to be loaded or stored
959   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
960   /// \p VariableMask - true when the memory access is predicated with a mask
961   ///                   that is not a compile-time constant
962   /// \p Alignment - alignment of single element
963   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
964                              bool VariableMask, unsigned Alignment) const;
965 
966   /// \return The cost of the interleaved memory operation.
967   /// \p Opcode is the memory operation code
968   /// \p VecTy is the vector type of the interleaved access.
969   /// \p Factor is the interleave factor
970   /// \p Indices is the indices for interleaved load members (as interleaved
971   ///    load allows gaps)
972   /// \p Alignment is the alignment of the memory operation
973   /// \p AddressSpace is address space of the pointer.
974   /// \p UseMaskForCond indicates if the memory access is predicated.
975   /// \p UseMaskForGaps indicates if gaps should be masked.
976   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
977                                  ArrayRef<unsigned> Indices, unsigned Alignment,
978                                  unsigned AddressSpace,
979                                  bool UseMaskForCond = false,
980                                  bool UseMaskForGaps = false) const;
981 
982   /// Calculate the cost of performing a vector reduction.
983   ///
984   /// This is the cost of reducing the vector value of type \p Ty to a scalar
985   /// value using the operation denoted by \p Opcode. The form of the reduction
986   /// can either be a pairwise reduction or a reduction that splits the vector
987   /// at every reduction level.
988   ///
989   /// Pairwise:
990   ///  (v0, v1, v2, v3)
991   ///  ((v0+v1), (v2+v3), undef, undef)
992   /// Split:
993   ///  (v0, v1, v2, v3)
994   ///  ((v0+v2), (v1+v3), undef, undef)
995   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
996                                  bool IsPairwiseForm) const;
997   int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
998                              bool IsUnsigned) const;
999 
1000   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1001   /// Three cases are handled: 1. scalar instruction 2. vector instruction
1002   /// 3. scalar instruction which is to be vectorized with VF.
1003   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1004                             ArrayRef<Value *> Args, FastMathFlags FMF,
1005                             unsigned VF = 1) const;
1006 
1007   /// \returns The cost of Intrinsic instructions. Types analysis only.
1008   /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
1009   /// arguments and the return value will be computed based on types.
1010   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1011                             ArrayRef<Type *> Tys, FastMathFlags FMF,
1012                             unsigned ScalarizationCostPassed = UINT_MAX) const;
1013 
1014   /// \returns The cost of Call instructions.
1015   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
1016 
1017   /// \returns The number of pieces into which the provided type must be
1018   /// split during legalization. Zero is returned when the answer is unknown.
1019   unsigned getNumberOfParts(Type *Tp) const;
1020 
1021   /// \returns The cost of the address computation. For most targets this can be
1022   /// merged into the instruction indexing mode. Some targets might want to
1023   /// distinguish between address computation for memory operations on vector
1024   /// types and scalar types. Such targets should override this function.
1025   /// The 'SE' parameter holds pointer for the scalar evolution object which
1026   /// is used in order to get the Ptr step value in case of constant stride.
1027   /// The 'Ptr' parameter holds SCEV of the access pointer.
1028   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
1029                                 const SCEV *Ptr = nullptr) const;
1030 
1031   /// \returns The cost, if any, of keeping values of the given types alive
1032   /// over a callsite.
1033   ///
1034   /// Some types may require the use of register classes that do not have
1035   /// any callee-saved registers, so would require a spill and fill.
1036   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1037 
1038   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
1039   /// will contain additional information - whether the intrinsic may write
1040   /// or read to memory, volatility and the pointer.  Info is undefined
1041   /// if false is returned.
1042   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1043 
1044   /// \returns The maximum element size, in bytes, for an element
1045   /// unordered-atomic memory intrinsic.
1046   unsigned getAtomicMemIntrinsicMaxElementSize() const;
1047 
1048   /// \returns A value which is the result of the given memory intrinsic.  New
1049   /// instructions may be created to extract the result from the given intrinsic
1050   /// memory operation.  Returns nullptr if the target cannot create a result
1051   /// from the given intrinsic.
1052   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1053                                            Type *ExpectedType) const;
1054 
1055   /// \returns The type to use in a loop expansion of a memcpy call.
1056   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1057                                   unsigned SrcAlign, unsigned DestAlign) const;
1058 
1059   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1060   /// \param RemainingBytes The number of bytes to copy.
1061   ///
1062   /// Calculates the operand types to use when copying \p RemainingBytes of
1063   /// memory, where source and destination alignments are \p SrcAlign and
1064   /// \p DestAlign respectively.
1065   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1066                                          LLVMContext &Context,
1067                                          unsigned RemainingBytes,
1068                                          unsigned SrcAlign,
1069                                          unsigned DestAlign) const;
1070 
1071   /// \returns True if the two functions have compatible attributes for inlining
1072   /// purposes.
1073   bool areInlineCompatible(const Function *Caller,
1074                            const Function *Callee) const;
1075 
1076   /// \returns True if the caller and callee agree on how \p Args will be passed
1077   /// to the callee.
1078   /// \param[out] Args The list of compatible arguments.  The implementation may
1079   /// filter out any incompatible args from this list.
1080   bool areFunctionArgsABICompatible(const Function *Caller,
1081                                     const Function *Callee,
1082                                     SmallPtrSetImpl<Argument *> &Args) const;
1083 
1084   /// The type of load/store indexing.
1085   enum MemIndexedMode {
1086     MIM_Unindexed,  ///< No indexing.
1087     MIM_PreInc,     ///< Pre-incrementing.
1088     MIM_PreDec,     ///< Pre-decrementing.
1089     MIM_PostInc,    ///< Post-incrementing.
1090     MIM_PostDec     ///< Post-decrementing.
1091   };
1092 
1093   /// \returns True if the specified indexed load for the given type is legal.
1094   bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1095 
1096   /// \returns True if the specified indexed store for the given type is legal.
1097   bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1098 
1099   /// \returns The bitwidth of the largest vector type that should be used to
1100   /// load/store in the given address space.
1101   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1102 
1103   /// \returns True if the load instruction is legal to vectorize.
1104   bool isLegalToVectorizeLoad(LoadInst *LI) const;
1105 
1106   /// \returns True if the store instruction is legal to vectorize.
1107   bool isLegalToVectorizeStore(StoreInst *SI) const;
1108 
1109   /// \returns True if it is legal to vectorize the given load chain.
1110   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1111                                    unsigned Alignment,
1112                                    unsigned AddrSpace) const;
1113 
1114   /// \returns True if it is legal to vectorize the given store chain.
1115   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1116                                     unsigned Alignment,
1117                                     unsigned AddrSpace) const;
1118 
1119   /// \returns The new vector factor value if the target doesn't support \p
1120   /// SizeInBytes loads or has a better vector factor.
1121   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1122                                unsigned ChainSizeInBytes,
1123                                VectorType *VecTy) const;
1124 
1125   /// \returns The new vector factor value if the target doesn't support \p
1126   /// SizeInBytes stores or has a better vector factor.
1127   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1128                                 unsigned ChainSizeInBytes,
1129                                 VectorType *VecTy) const;
1130 
1131   /// Flags describing the kind of vector reduction.
1132   struct ReductionFlags {
ReductionFlagsReductionFlags1133     ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1134     bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
1135     bool IsSigned; ///< Whether the operation is a signed int reduction.
1136     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
1137   };
1138 
1139   /// \returns True if the target wants to handle the given reduction idiom in
1140   /// the intrinsics form instead of the shuffle form.
1141   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1142                              ReductionFlags Flags) const;
1143 
1144   /// \returns True if the target wants to expand the given reduction intrinsic
1145   /// into a shuffle sequence.
1146   bool shouldExpandReduction(const IntrinsicInst *II) const;
1147 
1148   /// \returns the size cost of rematerializing a GlobalValue address relative
1149   /// to a stack reload.
1150   unsigned getGISelRematGlobalCost() const;
1151 
1152   /// @}
1153 
1154 private:
1155   /// Estimate the latency of specified instruction.
1156   /// Returns 1 as the default value.
1157   int getInstructionLatency(const Instruction *I) const;
1158 
1159   /// Returns the expected throughput cost of the instruction.
1160   /// Returns -1 if the cost is unknown.
1161   int getInstructionThroughput(const Instruction *I) const;
1162 
1163   /// The abstract base class used to type erase specific TTI
1164   /// implementations.
1165   class Concept;
1166 
1167   /// The template model for the base class which wraps a concrete
1168   /// implementation in a type erased interface.
1169   template <typename T> class Model;
1170 
1171   std::unique_ptr<Concept> TTIImpl;
1172 };
1173 
1174 class TargetTransformInfo::Concept {
1175 public:
1176   virtual ~Concept() = 0;
1177   virtual const DataLayout &getDataLayout() const = 0;
1178   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1179   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1180                          ArrayRef<const Value *> Operands) = 0;
1181   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1182   virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1183   virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1184   virtual int getCallCost(const Function *F,
1185                           ArrayRef<const Value *> Arguments, const User *U) = 0;
1186   virtual unsigned getInliningThresholdMultiplier() = 0;
1187   virtual int getInlinerVectorBonusPercent() = 0;
1188   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1189                                ArrayRef<Type *> ParamTys, const User *U) = 0;
1190   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1191                                ArrayRef<const Value *> Arguments,
1192                                const User *U) = 0;
1193   virtual int getMemcpyCost(const Instruction *I) = 0;
1194   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1195                                                     unsigned &JTSize,
1196                                                     ProfileSummaryInfo *PSI,
1197                                                     BlockFrequencyInfo *BFI) = 0;
1198   virtual int
1199   getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1200   virtual bool hasBranchDivergence() = 0;
1201   virtual bool isSourceOfDivergence(const Value *V) = 0;
1202   virtual bool isAlwaysUniform(const Value *V) = 0;
1203   virtual unsigned getFlatAddressSpace() = 0;
1204   virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1205                                           Intrinsic::ID IID) const = 0;
1206   virtual bool rewriteIntrinsicWithAddressSpace(
1207     IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
1208   virtual bool isLoweredToCall(const Function *F) = 0;
1209   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1210                                        UnrollingPreferences &UP) = 0;
1211   virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1212                                         AssumptionCache &AC,
1213                                         TargetLibraryInfo *LibInfo,
1214                                         HardwareLoopInfo &HWLoopInfo) = 0;
1215   virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
1216                                            ScalarEvolution &SE,
1217                                            AssumptionCache &AC,
1218                                            TargetLibraryInfo *TLI,
1219                                            DominatorTree *DT,
1220                                            const LoopAccessInfo *LAI) = 0;
1221   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1222   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1223   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1224                                      int64_t BaseOffset, bool HasBaseReg,
1225                                      int64_t Scale,
1226                                      unsigned AddrSpace,
1227                                      Instruction *I) = 0;
1228   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1229                              TargetTransformInfo::LSRCost &C2) = 0;
1230   virtual bool canMacroFuseCmp() = 0;
1231   virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1232                           LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1233                           TargetLibraryInfo *LibInfo) = 0;
1234   virtual bool shouldFavorPostInc() const = 0;
1235   virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1236   virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0;
1237   virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
1238   virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1239   virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1240   virtual bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) = 0;
1241   virtual bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) = 0;
1242   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1243   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1244   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1245   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1246   virtual bool prefersVectorizedAddressing() = 0;
1247   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1248                                    int64_t BaseOffset, bool HasBaseReg,
1249                                    int64_t Scale, unsigned AddrSpace) = 0;
1250   virtual bool LSRWithInstrQueries() = 0;
1251   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1252   virtual bool isProfitableToHoist(Instruction *I) = 0;
1253   virtual bool useAA() = 0;
1254   virtual bool isTypeLegal(Type *Ty) = 0;
1255   virtual bool shouldBuildLookupTables() = 0;
1256   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1257   virtual bool useColdCCForColdCall(Function &F) = 0;
1258   virtual unsigned
1259   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1260   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1261                                                     unsigned VF) = 0;
1262   virtual bool supportsEfficientVectorElementLoadStore() = 0;
1263   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1264   virtual MemCmpExpansionOptions
1265   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1266   virtual bool enableInterleavedAccessVectorization() = 0;
1267   virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1268   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1269   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1270                                               unsigned BitWidth,
1271                                               unsigned AddressSpace,
1272                                               unsigned Alignment,
1273                                               bool *Fast) = 0;
1274   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1275   virtual bool haveFastSqrt(Type *Ty) = 0;
1276   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1277   virtual int getFPOpCost(Type *Ty) = 0;
1278   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1279                                     Type *Ty) = 0;
1280   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1281   virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1282                                 Type *Ty) = 0;
1283   virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1284                                   const APInt &Imm, Type *Ty) = 0;
1285   virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1286   virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
1287   virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
1288   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1289   virtual unsigned getMinVectorRegisterBitWidth() = 0;
1290   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1291   virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1292   virtual bool shouldConsiderAddressTypePromotion(
1293       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1294   virtual unsigned getCacheLineSize() const = 0;
1295   virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1296   virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
1297 
1298   /// \return How much before a load we should place the prefetch
1299   /// instruction.  This is currently measured in number of
1300   /// instructions.
1301   virtual unsigned getPrefetchDistance() const = 0;
1302 
1303   /// \return Some HW prefetchers can handle accesses up to a certain
1304   /// constant stride.  This is the minimum stride in bytes where it
1305   /// makes sense to start adding SW prefetches.  The default is 1,
1306   /// i.e. prefetch with any stride.
1307   virtual unsigned getMinPrefetchStride() const = 0;
1308 
1309   /// \return The maximum number of iterations to prefetch ahead.  If
1310   /// the required number of iterations is more than this number, no
1311   /// prefetching is performed.
1312   virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1313 
1314   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1315   virtual unsigned getArithmeticInstrCost(
1316       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1317       OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
1318       OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
1319       const Instruction *CxtI = nullptr) = 0;
1320   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1321                              Type *SubTp) = 0;
1322   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1323                                const Instruction *I) = 0;
1324   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1325                                        VectorType *VecTy, unsigned Index) = 0;
1326   virtual int getCFInstrCost(unsigned Opcode) = 0;
1327   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1328                                 Type *CondTy, const Instruction *I) = 0;
1329   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1330                                  unsigned Index) = 0;
1331   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1332                               unsigned AddressSpace, const Instruction *I) = 0;
1333   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1334                                     unsigned Alignment,
1335                                     unsigned AddressSpace) = 0;
1336   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1337                                      Value *Ptr, bool VariableMask,
1338                                      unsigned Alignment) = 0;
1339   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1340                                          unsigned Factor,
1341                                          ArrayRef<unsigned> Indices,
1342                                          unsigned Alignment,
1343                                          unsigned AddressSpace,
1344                                          bool UseMaskForCond = false,
1345                                          bool UseMaskForGaps = false) = 0;
1346   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1347                                          bool IsPairwiseForm) = 0;
1348   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1349                                      bool IsPairwiseForm, bool IsUnsigned) = 0;
1350   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1351                       ArrayRef<Type *> Tys, FastMathFlags FMF,
1352                       unsigned ScalarizationCostPassed) = 0;
1353   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1354          ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1355   virtual int getCallInstrCost(Function *F, Type *RetTy,
1356                                ArrayRef<Type *> Tys) = 0;
1357   virtual unsigned getNumberOfParts(Type *Tp) = 0;
1358   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1359                                         const SCEV *Ptr) = 0;
1360   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1361   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1362                                   MemIntrinsicInfo &Info) = 0;
1363   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1364   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1365                                                    Type *ExpectedType) = 0;
1366   virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1367                                           unsigned SrcAlign,
1368                                           unsigned DestAlign) const = 0;
1369   virtual void getMemcpyLoopResidualLoweringType(
1370       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1371       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1372   virtual bool areInlineCompatible(const Function *Caller,
1373                                    const Function *Callee) const = 0;
1374   virtual bool
1375   areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1376                                SmallPtrSetImpl<Argument *> &Args) const = 0;
1377   virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1378   virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1379   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1380   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1381   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1382   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1383                                            unsigned Alignment,
1384                                            unsigned AddrSpace) const = 0;
1385   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1386                                             unsigned Alignment,
1387                                             unsigned AddrSpace) const = 0;
1388   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1389                                        unsigned ChainSizeInBytes,
1390                                        VectorType *VecTy) const = 0;
1391   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1392                                         unsigned ChainSizeInBytes,
1393                                         VectorType *VecTy) const = 0;
1394   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1395                                      ReductionFlags) const = 0;
1396   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1397   virtual unsigned getGISelRematGlobalCost() const = 0;
1398   virtual int getInstructionLatency(const Instruction *I) = 0;
1399 };
1400 
1401 template <typename T>
1402 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1403   T Impl;
1404 
1405 public:
Model(T Impl)1406   Model(T Impl) : Impl(std::move(Impl)) {}
~Model()1407   ~Model() override {}
1408 
getDataLayout()1409   const DataLayout &getDataLayout() const override {
1410     return Impl.getDataLayout();
1411   }
1412 
getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)1413   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1414     return Impl.getOperationCost(Opcode, Ty, OpTy);
1415   }
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)1416   int getGEPCost(Type *PointeeType, const Value *Ptr,
1417                  ArrayRef<const Value *> Operands) override {
1418     return Impl.getGEPCost(PointeeType, Ptr, Operands);
1419   }
getExtCost(const Instruction * I,const Value * Src)1420   int getExtCost(const Instruction *I, const Value *Src) override {
1421     return Impl.getExtCost(I, Src);
1422   }
getCallCost(FunctionType * FTy,int NumArgs,const User * U)1423   int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1424     return Impl.getCallCost(FTy, NumArgs, U);
1425   }
getCallCost(const Function * F,int NumArgs,const User * U)1426   int getCallCost(const Function *F, int NumArgs, const User *U) override {
1427     return Impl.getCallCost(F, NumArgs, U);
1428   }
getCallCost(const Function * F,ArrayRef<const Value * > Arguments,const User * U)1429   int getCallCost(const Function *F,
1430                   ArrayRef<const Value *> Arguments, const User *U) override {
1431     return Impl.getCallCost(F, Arguments, U);
1432   }
getInliningThresholdMultiplier()1433   unsigned getInliningThresholdMultiplier() override {
1434     return Impl.getInliningThresholdMultiplier();
1435   }
getInlinerVectorBonusPercent()1436   int getInlinerVectorBonusPercent() override {
1437     return Impl.getInlinerVectorBonusPercent();
1438   }
1439   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1440                        ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1441     return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1442   }
1443   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1444                        ArrayRef<const Value *> Arguments,
1445                        const User *U = nullptr) override {
1446     return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1447   }
getMemcpyCost(const Instruction * I)1448   int getMemcpyCost(const Instruction *I) override {
1449     return Impl.getMemcpyCost(I);
1450   }
getUserCost(const User * U,ArrayRef<const Value * > Operands)1451   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1452     return Impl.getUserCost(U, Operands);
1453   }
hasBranchDivergence()1454   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
isSourceOfDivergence(const Value * V)1455   bool isSourceOfDivergence(const Value *V) override {
1456     return Impl.isSourceOfDivergence(V);
1457   }
1458 
isAlwaysUniform(const Value * V)1459   bool isAlwaysUniform(const Value *V) override {
1460     return Impl.isAlwaysUniform(V);
1461   }
1462 
getFlatAddressSpace()1463   unsigned getFlatAddressSpace() override {
1464     return Impl.getFlatAddressSpace();
1465   }
1466 
collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)1467   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1468                                   Intrinsic::ID IID) const override {
1469     return Impl.collectFlatAddressOperands(OpIndexes, IID);
1470   }
1471 
rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)1472   bool rewriteIntrinsicWithAddressSpace(
1473     IntrinsicInst *II, Value *OldV, Value *NewV) const override {
1474     return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1475   }
1476 
isLoweredToCall(const Function * F)1477   bool isLoweredToCall(const Function *F) override {
1478     return Impl.isLoweredToCall(F);
1479   }
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,UnrollingPreferences & UP)1480   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1481                                UnrollingPreferences &UP) override {
1482     return Impl.getUnrollingPreferences(L, SE, UP);
1483   }
isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)1484   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1485                                 AssumptionCache &AC,
1486                                 TargetLibraryInfo *LibInfo,
1487                                 HardwareLoopInfo &HWLoopInfo) override {
1488     return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1489   }
preferPredicateOverEpilogue(Loop * L,LoopInfo * LI,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * TLI,DominatorTree * DT,const LoopAccessInfo * LAI)1490   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1491                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
1492                                    DominatorTree *DT,
1493                                    const LoopAccessInfo *LAI) override {
1494     return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1495   }
isLegalAddImmediate(int64_t Imm)1496   bool isLegalAddImmediate(int64_t Imm) override {
1497     return Impl.isLegalAddImmediate(Imm);
1498   }
isLegalICmpImmediate(int64_t Imm)1499   bool isLegalICmpImmediate(int64_t Imm) override {
1500     return Impl.isLegalICmpImmediate(Imm);
1501   }
isLegalAddressingMode(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace,Instruction * I)1502   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1503                              bool HasBaseReg, int64_t Scale,
1504                              unsigned AddrSpace,
1505                              Instruction *I) override {
1506     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1507                                       Scale, AddrSpace, I);
1508   }
isLSRCostLess(TargetTransformInfo::LSRCost & C1,TargetTransformInfo::LSRCost & C2)1509   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1510                      TargetTransformInfo::LSRCost &C2) override {
1511     return Impl.isLSRCostLess(C1, C2);
1512   }
canMacroFuseCmp()1513   bool canMacroFuseCmp() override {
1514     return Impl.canMacroFuseCmp();
1515   }
canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)1516   bool canSaveCmp(Loop *L, BranchInst **BI,
1517                         ScalarEvolution *SE,
1518                         LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1519                         TargetLibraryInfo *LibInfo) override {
1520     return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1521   }
shouldFavorPostInc()1522   bool shouldFavorPostInc() const override {
1523     return Impl.shouldFavorPostInc();
1524   }
shouldFavorBackedgeIndex(const Loop * L)1525   bool shouldFavorBackedgeIndex(const Loop *L) const override {
1526     return Impl.shouldFavorBackedgeIndex(L);
1527   }
isLegalMaskedStore(Type * DataType,MaybeAlign Alignment)1528   bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override {
1529     return Impl.isLegalMaskedStore(DataType, Alignment);
1530   }
isLegalMaskedLoad(Type * DataType,MaybeAlign Alignment)1531   bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override {
1532     return Impl.isLegalMaskedLoad(DataType, Alignment);
1533   }
isLegalNTStore(Type * DataType,Align Alignment)1534   bool isLegalNTStore(Type *DataType, Align Alignment) override {
1535     return Impl.isLegalNTStore(DataType, Alignment);
1536   }
isLegalNTLoad(Type * DataType,Align Alignment)1537   bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1538     return Impl.isLegalNTLoad(DataType, Alignment);
1539   }
isLegalMaskedScatter(Type * DataType,MaybeAlign Alignment)1540   bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) override {
1541     return Impl.isLegalMaskedScatter(DataType, Alignment);
1542   }
isLegalMaskedGather(Type * DataType,MaybeAlign Alignment)1543   bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) override {
1544     return Impl.isLegalMaskedGather(DataType, Alignment);
1545   }
isLegalMaskedCompressStore(Type * DataType)1546   bool isLegalMaskedCompressStore(Type *DataType) override {
1547     return Impl.isLegalMaskedCompressStore(DataType);
1548   }
isLegalMaskedExpandLoad(Type * DataType)1549   bool isLegalMaskedExpandLoad(Type *DataType) override {
1550     return Impl.isLegalMaskedExpandLoad(DataType);
1551   }
hasDivRemOp(Type * DataType,bool IsSigned)1552   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1553     return Impl.hasDivRemOp(DataType, IsSigned);
1554   }
hasVolatileVariant(Instruction * I,unsigned AddrSpace)1555   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1556     return Impl.hasVolatileVariant(I, AddrSpace);
1557   }
prefersVectorizedAddressing()1558   bool prefersVectorizedAddressing() override {
1559     return Impl.prefersVectorizedAddressing();
1560   }
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)1561   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1562                            bool HasBaseReg, int64_t Scale,
1563                            unsigned AddrSpace) override {
1564     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1565                                      Scale, AddrSpace);
1566   }
LSRWithInstrQueries()1567   bool LSRWithInstrQueries() override {
1568     return Impl.LSRWithInstrQueries();
1569   }
isTruncateFree(Type * Ty1,Type * Ty2)1570   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1571     return Impl.isTruncateFree(Ty1, Ty2);
1572   }
isProfitableToHoist(Instruction * I)1573   bool isProfitableToHoist(Instruction *I) override {
1574     return Impl.isProfitableToHoist(I);
1575   }
useAA()1576   bool useAA() override { return Impl.useAA(); }
isTypeLegal(Type * Ty)1577   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
shouldBuildLookupTables()1578   bool shouldBuildLookupTables() override {
1579     return Impl.shouldBuildLookupTables();
1580   }
shouldBuildLookupTablesForConstant(Constant * C)1581   bool shouldBuildLookupTablesForConstant(Constant *C) override {
1582     return Impl.shouldBuildLookupTablesForConstant(C);
1583   }
useColdCCForColdCall(Function & F)1584   bool useColdCCForColdCall(Function &F) override {
1585     return Impl.useColdCCForColdCall(F);
1586   }
1587 
getScalarizationOverhead(Type * Ty,bool Insert,bool Extract)1588   unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1589                                     bool Extract) override {
1590     return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1591   }
getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,unsigned VF)1592   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1593                                             unsigned VF) override {
1594     return Impl.getOperandsScalarizationOverhead(Args, VF);
1595   }
1596 
supportsEfficientVectorElementLoadStore()1597   bool supportsEfficientVectorElementLoadStore() override {
1598     return Impl.supportsEfficientVectorElementLoadStore();
1599   }
1600 
enableAggressiveInterleaving(bool LoopHasReductions)1601   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1602     return Impl.enableAggressiveInterleaving(LoopHasReductions);
1603   }
enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)1604   MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1605                                                bool IsZeroCmp) const override {
1606     return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1607   }
enableInterleavedAccessVectorization()1608   bool enableInterleavedAccessVectorization() override {
1609     return Impl.enableInterleavedAccessVectorization();
1610   }
enableMaskedInterleavedAccessVectorization()1611   bool enableMaskedInterleavedAccessVectorization() override {
1612     return Impl.enableMaskedInterleavedAccessVectorization();
1613   }
isFPVectorizationPotentiallyUnsafe()1614   bool isFPVectorizationPotentiallyUnsafe() override {
1615     return Impl.isFPVectorizationPotentiallyUnsafe();
1616   }
allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)1617   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1618                                       unsigned BitWidth, unsigned AddressSpace,
1619                                       unsigned Alignment, bool *Fast) override {
1620     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1621                                                Alignment, Fast);
1622   }
getPopcntSupport(unsigned IntTyWidthInBit)1623   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1624     return Impl.getPopcntSupport(IntTyWidthInBit);
1625   }
haveFastSqrt(Type * Ty)1626   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1627 
isFCmpOrdCheaperThanFCmpZero(Type * Ty)1628   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1629     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1630   }
1631 
getFPOpCost(Type * Ty)1632   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1633 
getIntImmCodeSizeCost(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)1634   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1635                             Type *Ty) override {
1636     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1637   }
getIntImmCost(const APInt & Imm,Type * Ty)1638   int getIntImmCost(const APInt &Imm, Type *Ty) override {
1639     return Impl.getIntImmCost(Imm, Ty);
1640   }
getIntImmCostInst(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)1641   int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1642                         Type *Ty) override {
1643     return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty);
1644   }
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)1645   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1646                           Type *Ty) override {
1647     return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty);
1648   }
getNumberOfRegisters(unsigned ClassID)1649   unsigned getNumberOfRegisters(unsigned ClassID) const override {
1650     return Impl.getNumberOfRegisters(ClassID);
1651   }
1652   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
1653     return Impl.getRegisterClassForType(Vector, Ty);
1654   }
getRegisterClassName(unsigned ClassID)1655   const char* getRegisterClassName(unsigned ClassID) const override {
1656     return Impl.getRegisterClassName(ClassID);
1657   }
getRegisterBitWidth(bool Vector)1658   unsigned getRegisterBitWidth(bool Vector) const override {
1659     return Impl.getRegisterBitWidth(Vector);
1660   }
getMinVectorRegisterBitWidth()1661   unsigned getMinVectorRegisterBitWidth() override {
1662     return Impl.getMinVectorRegisterBitWidth();
1663   }
shouldMaximizeVectorBandwidth(bool OptSize)1664   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1665     return Impl.shouldMaximizeVectorBandwidth(OptSize);
1666   }
getMinimumVF(unsigned ElemWidth)1667   unsigned getMinimumVF(unsigned ElemWidth) const override {
1668     return Impl.getMinimumVF(ElemWidth);
1669   }
shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)1670   bool shouldConsiderAddressTypePromotion(
1671       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1672     return Impl.shouldConsiderAddressTypePromotion(
1673         I, AllowPromotionWithoutCommonHeader);
1674   }
getCacheLineSize()1675   unsigned getCacheLineSize() const override {
1676     return Impl.getCacheLineSize();
1677   }
getCacheSize(CacheLevel Level)1678   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override {
1679     return Impl.getCacheSize(Level);
1680   }
getCacheAssociativity(CacheLevel Level)1681   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
1682     return Impl.getCacheAssociativity(Level);
1683   }
1684 
1685   /// Return the preferred prefetch distance in terms of instructions.
1686   ///
getPrefetchDistance()1687   unsigned getPrefetchDistance() const override {
1688     return Impl.getPrefetchDistance();
1689   }
1690 
1691   /// Return the minimum stride necessary to trigger software
1692   /// prefetching.
1693   ///
getMinPrefetchStride()1694   unsigned getMinPrefetchStride() const override {
1695     return Impl.getMinPrefetchStride();
1696   }
1697 
1698   /// Return the maximum prefetch distance in terms of loop
1699   /// iterations.
1700   ///
getMaxPrefetchIterationsAhead()1701   unsigned getMaxPrefetchIterationsAhead() const override {
1702     return Impl.getMaxPrefetchIterationsAhead();
1703   }
1704 
getMaxInterleaveFactor(unsigned VF)1705   unsigned getMaxInterleaveFactor(unsigned VF) override {
1706     return Impl.getMaxInterleaveFactor(VF);
1707   }
getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)1708   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1709                                             unsigned &JTSize,
1710                                             ProfileSummaryInfo *PSI,
1711                                             BlockFrequencyInfo *BFI) override {
1712     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
1713   }
1714   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
1715                                   OperandValueKind Opd1Info,
1716                                   OperandValueKind Opd2Info,
1717                                   OperandValueProperties Opd1PropInfo,
1718                                   OperandValueProperties Opd2PropInfo,
1719                                   ArrayRef<const Value *> Args,
1720                                   const Instruction *CxtI = nullptr) override {
1721     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1722                                        Opd1PropInfo, Opd2PropInfo, Args, CxtI);
1723   }
getShuffleCost(ShuffleKind Kind,Type * Tp,int Index,Type * SubTp)1724   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1725                      Type *SubTp) override {
1726     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1727   }
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,const Instruction * I)1728   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1729                        const Instruction *I) override {
1730     return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1731   }
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)1732   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1733                                unsigned Index) override {
1734     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1735   }
getCFInstrCost(unsigned Opcode)1736   int getCFInstrCost(unsigned Opcode) override {
1737     return Impl.getCFInstrCost(Opcode);
1738   }
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,const Instruction * I)1739   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1740                          const Instruction *I) override {
1741     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1742   }
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)1743   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1744     return Impl.getVectorInstrCost(Opcode, Val, Index);
1745   }
getMemoryOpCost(unsigned Opcode,Type * Src,MaybeAlign Alignment,unsigned AddressSpace,const Instruction * I)1746   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1747                       unsigned AddressSpace, const Instruction *I) override {
1748     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1749   }
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)1750   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1751                             unsigned AddressSpace) override {
1752     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1753   }
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)1754   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1755                              Value *Ptr, bool VariableMask,
1756                              unsigned Alignment) override {
1757     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1758                                        Alignment);
1759   }
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace,bool UseMaskForCond,bool UseMaskForGaps)1760   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1761                                  ArrayRef<unsigned> Indices, unsigned Alignment,
1762                                  unsigned AddressSpace, bool UseMaskForCond,
1763                                  bool UseMaskForGaps) override {
1764     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1765                                            Alignment, AddressSpace,
1766                                            UseMaskForCond, UseMaskForGaps);
1767   }
getArithmeticReductionCost(unsigned Opcode,Type * Ty,bool IsPairwiseForm)1768   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1769                                  bool IsPairwiseForm) override {
1770     return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1771   }
getMinMaxReductionCost(Type * Ty,Type * CondTy,bool IsPairwiseForm,bool IsUnsigned)1772   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1773                              bool IsPairwiseForm, bool IsUnsigned) override {
1774     return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1775    }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF,unsigned ScalarizationCostPassed)1776   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1777                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1778     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1779                                       ScalarizationCostPassed);
1780   }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF,unsigned VF)1781   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1782        ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1783     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1784   }
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)1785   int getCallInstrCost(Function *F, Type *RetTy,
1786                        ArrayRef<Type *> Tys) override {
1787     return Impl.getCallInstrCost(F, RetTy, Tys);
1788   }
getNumberOfParts(Type * Tp)1789   unsigned getNumberOfParts(Type *Tp) override {
1790     return Impl.getNumberOfParts(Tp);
1791   }
getAddressComputationCost(Type * Ty,ScalarEvolution * SE,const SCEV * Ptr)1792   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1793                                 const SCEV *Ptr) override {
1794     return Impl.getAddressComputationCost(Ty, SE, Ptr);
1795   }
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)1796   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1797     return Impl.getCostOfKeepingLiveOverCall(Tys);
1798   }
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)1799   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1800                           MemIntrinsicInfo &Info) override {
1801     return Impl.getTgtMemIntrinsic(Inst, Info);
1802   }
getAtomicMemIntrinsicMaxElementSize()1803   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1804     return Impl.getAtomicMemIntrinsicMaxElementSize();
1805   }
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)1806   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1807                                            Type *ExpectedType) override {
1808     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1809   }
getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAlign,unsigned DestAlign)1810   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1811                                   unsigned SrcAlign,
1812                                   unsigned DestAlign) const override {
1813     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1814   }
getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAlign,unsigned DestAlign)1815   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1816                                          LLVMContext &Context,
1817                                          unsigned RemainingBytes,
1818                                          unsigned SrcAlign,
1819                                          unsigned DestAlign) const override {
1820     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1821                                            SrcAlign, DestAlign);
1822   }
areInlineCompatible(const Function * Caller,const Function * Callee)1823   bool areInlineCompatible(const Function *Caller,
1824                            const Function *Callee) const override {
1825     return Impl.areInlineCompatible(Caller, Callee);
1826   }
areFunctionArgsABICompatible(const Function * Caller,const Function * Callee,SmallPtrSetImpl<Argument * > & Args)1827   bool areFunctionArgsABICompatible(
1828       const Function *Caller, const Function *Callee,
1829       SmallPtrSetImpl<Argument *> &Args) const override {
1830     return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1831   }
isIndexedLoadLegal(MemIndexedMode Mode,Type * Ty)1832   bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1833     return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1834   }
isIndexedStoreLegal(MemIndexedMode Mode,Type * Ty)1835   bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1836     return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1837   }
getLoadStoreVecRegBitWidth(unsigned AddrSpace)1838   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1839     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1840   }
isLegalToVectorizeLoad(LoadInst * LI)1841   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1842     return Impl.isLegalToVectorizeLoad(LI);
1843   }
isLegalToVectorizeStore(StoreInst * SI)1844   bool isLegalToVectorizeStore(StoreInst *SI) const override {
1845     return Impl.isLegalToVectorizeStore(SI);
1846   }
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)1847   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1848                                    unsigned Alignment,
1849                                    unsigned AddrSpace) const override {
1850     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1851                                             AddrSpace);
1852   }
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)1853   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1854                                     unsigned Alignment,
1855                                     unsigned AddrSpace) const override {
1856     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1857                                              AddrSpace);
1858   }
getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)1859   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1860                                unsigned ChainSizeInBytes,
1861                                VectorType *VecTy) const override {
1862     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1863   }
getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)1864   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1865                                 unsigned ChainSizeInBytes,
1866                                 VectorType *VecTy) const override {
1867     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1868   }
useReductionIntrinsic(unsigned Opcode,Type * Ty,ReductionFlags Flags)1869   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1870                              ReductionFlags Flags) const override {
1871     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1872   }
shouldExpandReduction(const IntrinsicInst * II)1873   bool shouldExpandReduction(const IntrinsicInst *II) const override {
1874     return Impl.shouldExpandReduction(II);
1875   }
1876 
getGISelRematGlobalCost()1877   unsigned getGISelRematGlobalCost() const override {
1878     return Impl.getGISelRematGlobalCost();
1879   }
1880 
getInstructionLatency(const Instruction * I)1881   int getInstructionLatency(const Instruction *I) override {
1882     return Impl.getInstructionLatency(I);
1883   }
1884 };
1885 
1886 template <typename T>
TargetTransformInfo(T Impl)1887 TargetTransformInfo::TargetTransformInfo(T Impl)
1888     : TTIImpl(new Model<T>(Impl)) {}
1889 
1890 /// Analysis pass providing the \c TargetTransformInfo.
1891 ///
1892 /// The core idea of the TargetIRAnalysis is to expose an interface through
1893 /// which LLVM targets can analyze and provide information about the middle
1894 /// end's target-independent IR. This supports use cases such as target-aware
1895 /// cost modeling of IR constructs.
1896 ///
1897 /// This is a function analysis because much of the cost modeling for targets
1898 /// is done in a subtarget specific way and LLVM supports compiling different
1899 /// functions targeting different subtargets in order to support runtime
1900 /// dispatch according to the observed subtarget.
1901 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1902 public:
1903   typedef TargetTransformInfo Result;
1904 
1905   /// Default construct a target IR analysis.
1906   ///
1907   /// This will use the module's datalayout to construct a baseline
1908   /// conservative TTI result.
1909   TargetIRAnalysis();
1910 
1911   /// Construct an IR analysis pass around a target-provide callback.
1912   ///
1913   /// The callback will be called with a particular function for which the TTI
1914   /// is needed and must return a TTI object for that function.
1915   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1916 
1917   // Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis & Arg)1918   TargetIRAnalysis(const TargetIRAnalysis &Arg)
1919       : TTICallback(Arg.TTICallback) {}
TargetIRAnalysis(TargetIRAnalysis && Arg)1920   TargetIRAnalysis(TargetIRAnalysis &&Arg)
1921       : TTICallback(std::move(Arg.TTICallback)) {}
1922   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1923     TTICallback = RHS.TTICallback;
1924     return *this;
1925   }
1926   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1927     TTICallback = std::move(RHS.TTICallback);
1928     return *this;
1929   }
1930 
1931   Result run(const Function &F, FunctionAnalysisManager &);
1932 
1933 private:
1934   friend AnalysisInfoMixin<TargetIRAnalysis>;
1935   static AnalysisKey Key;
1936 
1937   /// The callback used to produce a result.
1938   ///
1939   /// We use a completely opaque callback so that targets can provide whatever
1940   /// mechanism they desire for constructing the TTI for a given function.
1941   ///
1942   /// FIXME: Should we really use std::function? It's relatively inefficient.
1943   /// It might be possible to arrange for even stateful callbacks to outlive
1944   /// the analysis and thus use a function_ref which would be lighter weight.
1945   /// This may also be less error prone as the callback is likely to reference
1946   /// the external TargetMachine, and that reference needs to never dangle.
1947   std::function<Result(const Function &)> TTICallback;
1948 
1949   /// Helper function used as the callback in the default constructor.
1950   static Result getDefaultTTI(const Function &F);
1951 };
1952 
1953 /// Wrapper pass for TargetTransformInfo.
1954 ///
1955 /// This pass can be constructed from a TTI object which it stores internally
1956 /// and is queried by passes.
1957 class TargetTransformInfoWrapperPass : public ImmutablePass {
1958   TargetIRAnalysis TIRA;
1959   Optional<TargetTransformInfo> TTI;
1960 
1961   virtual void anchor();
1962 
1963 public:
1964   static char ID;
1965 
1966   /// We must provide a default constructor for the pass but it should
1967   /// never be used.
1968   ///
1969   /// Use the constructor below or call one of the creation routines.
1970   TargetTransformInfoWrapperPass();
1971 
1972   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1973 
1974   TargetTransformInfo &getTTI(const Function &F);
1975 };
1976 
1977 /// Create an analysis pass wrapper around a TTI object.
1978 ///
1979 /// This analysis pass just holds the TTI instance and makes it available to
1980 /// clients.
1981 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1982 
1983 } // End llvm namespace
1984 
1985 #endif
1986