1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/AtomicOrdering.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/Analysis/ScalarEvolution.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/Analysis/AssumptionCache.h"
34 #include <functional>
35
36 namespace llvm {
37
38 namespace Intrinsic {
39 typedef unsigned ID;
40 }
41
42 class AssumptionCache;
43 class BlockFrequencyInfo;
44 class BranchInst;
45 class Function;
46 class GlobalValue;
47 class IntrinsicInst;
48 class LoadInst;
49 class LoopAccessInfo;
50 class Loop;
51 class ProfileSummaryInfo;
52 class SCEV;
53 class ScalarEvolution;
54 class StoreInst;
55 class SwitchInst;
56 class TargetLibraryInfo;
57 class Type;
58 class User;
59 class Value;
60
61 /// Information about a load/store intrinsic defined by the target.
62 struct MemIntrinsicInfo {
63 /// This is the pointer that the intrinsic is loading from or storing to.
64 /// If this is non-null, then analysis/optimization passes can assume that
65 /// this intrinsic is functionally equivalent to a load/store from this
66 /// pointer.
67 Value *PtrVal = nullptr;
68
69 // Ordering for atomic operations.
70 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
71
72 // Same Id is set by the target for corresponding load/store intrinsics.
73 unsigned short MatchingId = 0;
74
75 bool ReadMem = false;
76 bool WriteMem = false;
77 bool IsVolatile = false;
78
isUnorderedMemIntrinsicInfo79 bool isUnordered() const {
80 return (Ordering == AtomicOrdering::NotAtomic ||
81 Ordering == AtomicOrdering::Unordered) && !IsVolatile;
82 }
83 };
84
85 /// Attributes of a target dependent hardware loop.
86 struct HardwareLoopInfo {
87 HardwareLoopInfo() = delete;
HardwareLoopInfoHardwareLoopInfo88 HardwareLoopInfo(Loop *L) : L(L) {}
89 Loop *L = nullptr;
90 BasicBlock *ExitBlock = nullptr;
91 BranchInst *ExitBranch = nullptr;
92 const SCEV *ExitCount = nullptr;
93 IntegerType *CountType = nullptr;
94 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
95 // value in every iteration.
96 bool IsNestingLegal = false; // Can a hardware loop be a parent to
97 // another hardware loop?
98 bool CounterInReg = false; // Should loop counter be updated in
99 // the loop via a phi?
100 bool PerformEntryTest = false; // Generate the intrinsic which also performs
101 // icmp ne zero on the loop counter value and
102 // produces an i1 to guard the loop entry.
103 bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
104 DominatorTree &DT, bool ForceNestedLoop = false,
105 bool ForceHardwareLoopPHI = false);
106 bool canAnalyze(LoopInfo &LI);
107 };
108
109 /// This pass provides access to the codegen interfaces that are needed
110 /// for IR-level transformations.
111 class TargetTransformInfo {
112 public:
113 /// Construct a TTI object using a type implementing the \c Concept
114 /// API below.
115 ///
116 /// This is used by targets to construct a TTI wrapping their target-specific
117 /// implementation that encodes appropriate costs for their target.
118 template <typename T> TargetTransformInfo(T Impl);
119
120 /// Construct a baseline TTI object using a minimal implementation of
121 /// the \c Concept API below.
122 ///
123 /// The TTI implementation will reflect the information in the DataLayout
124 /// provided if non-null.
125 explicit TargetTransformInfo(const DataLayout &DL);
126
127 // Provide move semantics.
128 TargetTransformInfo(TargetTransformInfo &&Arg);
129 TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
130
131 // We need to define the destructor out-of-line to define our sub-classes
132 // out-of-line.
133 ~TargetTransformInfo();
134
135 /// Handle the invalidation of this information.
136 ///
137 /// When used as a result of \c TargetIRAnalysis this method will be called
138 /// when the function this was computed for changes. When it returns false,
139 /// the information is preserved across those changes.
invalidate(Function &,const PreservedAnalyses &,FunctionAnalysisManager::Invalidator &)140 bool invalidate(Function &, const PreservedAnalyses &,
141 FunctionAnalysisManager::Invalidator &) {
142 // FIXME: We should probably in some way ensure that the subtarget
143 // information for a function hasn't changed.
144 return false;
145 }
146
147 /// \name Generic Target Information
148 /// @{
149
150 /// The kind of cost model.
151 ///
152 /// There are several different cost models that can be customized by the
153 /// target. The normalization of each cost model may be target specific.
154 enum TargetCostKind {
155 TCK_RecipThroughput, ///< Reciprocal throughput.
156 TCK_Latency, ///< The latency of instruction.
157 TCK_CodeSize ///< Instruction code size.
158 };
159
160 /// Query the cost of a specified instruction.
161 ///
162 /// Clients should use this interface to query the cost of an existing
163 /// instruction. The instruction must have a valid parent (basic block).
164 ///
165 /// Note, this method does not cache the cost calculation and it
166 /// can be expensive in some cases.
getInstructionCost(const Instruction * I,enum TargetCostKind kind)167 int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
168 switch (kind){
169 case TCK_RecipThroughput:
170 return getInstructionThroughput(I);
171
172 case TCK_Latency:
173 return getInstructionLatency(I);
174
175 case TCK_CodeSize:
176 return getUserCost(I);
177 }
178 llvm_unreachable("Unknown instruction cost kind");
179 }
180
181 /// Underlying constants for 'cost' values in this interface.
182 ///
183 /// Many APIs in this interface return a cost. This enum defines the
184 /// fundamental values that should be used to interpret (and produce) those
185 /// costs. The costs are returned as an int rather than a member of this
186 /// enumeration because it is expected that the cost of one IR instruction
187 /// may have a multiplicative factor to it or otherwise won't fit directly
188 /// into the enum. Moreover, it is common to sum or average costs which works
189 /// better as simple integral values. Thus this enum only provides constants.
190 /// Also note that the returned costs are signed integers to make it natural
191 /// to add, subtract, and test with zero (a common boundary condition). It is
192 /// not expected that 2^32 is a realistic cost to be modeling at any point.
193 ///
194 /// Note that these costs should usually reflect the intersection of code-size
195 /// cost and execution cost. A free instruction is typically one that folds
196 /// into another instruction. For example, reg-to-reg moves can often be
197 /// skipped by renaming the registers in the CPU, but they still are encoded
198 /// and thus wouldn't be considered 'free' here.
199 enum TargetCostConstants {
200 TCC_Free = 0, ///< Expected to fold away in lowering.
201 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
202 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
203 };
204
205 /// Estimate the cost of a specific operation when lowered.
206 ///
207 /// Note that this is designed to work on an arbitrary synthetic opcode, and
208 /// thus work for hypothetical queries before an instruction has even been
209 /// formed. However, this does *not* work for GEPs, and must not be called
210 /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
211 /// analyzing a GEP's cost required more information.
212 ///
213 /// Typically only the result type is required, and the operand type can be
214 /// omitted. However, if the opcode is one of the cast instructions, the
215 /// operand type is required.
216 ///
217 /// The returned cost is defined in terms of \c TargetCostConstants, see its
218 /// comments for a detailed explanation of the cost values.
219 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
220
221 /// Estimate the cost of a GEP operation when lowered.
222 ///
223 /// The contract for this function is the same as \c getOperationCost except
224 /// that it supports an interface that provides extra information specific to
225 /// the GEP operation.
226 int getGEPCost(Type *PointeeType, const Value *Ptr,
227 ArrayRef<const Value *> Operands) const;
228
229 /// Estimate the cost of a EXT operation when lowered.
230 ///
231 /// The contract for this function is the same as \c getOperationCost except
232 /// that it supports an interface that provides extra information specific to
233 /// the EXT operation.
234 int getExtCost(const Instruction *I, const Value *Src) const;
235
236 /// Estimate the cost of a function call when lowered.
237 ///
238 /// The contract for this is the same as \c getOperationCost except that it
239 /// supports an interface that provides extra information specific to call
240 /// instructions.
241 ///
242 /// This is the most basic query for estimating call cost: it only knows the
243 /// function type and (potentially) the number of arguments at the call site.
244 /// The latter is only interesting for varargs function types.
245 int getCallCost(FunctionType *FTy, int NumArgs = -1,
246 const User *U = nullptr) const;
247
248 /// Estimate the cost of calling a specific function when lowered.
249 ///
250 /// This overload adds the ability to reason about the particular function
251 /// being called in the event it is a library call with special lowering.
252 int getCallCost(const Function *F, int NumArgs = -1,
253 const User *U = nullptr) const;
254
255 /// Estimate the cost of calling a specific function when lowered.
256 ///
257 /// This overload allows specifying a set of candidate argument values.
258 int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
259 const User *U = nullptr) const;
260
261 /// \returns A value by which our inlining threshold should be multiplied.
262 /// This is primarily used to bump up the inlining threshold wholesale on
263 /// targets where calls are unusually expensive.
264 ///
265 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
266 /// individual classes of instructions would be better.
267 unsigned getInliningThresholdMultiplier() const;
268
269 /// \returns Vector bonus in percent.
270 ///
271 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
272 /// and apply this bonus based on the percentage of vector instructions. A
273 /// bonus is applied if the vector instructions exceed 50% and half that amount
274 /// is applied if it exceeds 10%. Note that these bonuses are some what
275 /// arbitrary and evolved over time by accident as much as because they are
276 /// principled bonuses.
277 /// FIXME: It would be nice to base the bonus values on something more
278 /// scientific. A target may has no bonus on vector instructions.
279 int getInlinerVectorBonusPercent() const;
280
281 /// Estimate the cost of an intrinsic when lowered.
282 ///
283 /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
284 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
285 ArrayRef<Type *> ParamTys,
286 const User *U = nullptr) const;
287
288 /// Estimate the cost of an intrinsic when lowered.
289 ///
290 /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
291 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
292 ArrayRef<const Value *> Arguments,
293 const User *U = nullptr) const;
294
295 /// \return the expected cost of a memcpy, which could e.g. depend on the
296 /// source/destination type and alignment and the number of bytes copied.
297 int getMemcpyCost(const Instruction *I) const;
298
299 /// \return The estimated number of case clusters when lowering \p 'SI'.
300 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
301 /// table.
302 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
303 unsigned &JTSize,
304 ProfileSummaryInfo *PSI,
305 BlockFrequencyInfo *BFI) const;
306
307 /// Estimate the cost of a given IR user when lowered.
308 ///
309 /// This can estimate the cost of either a ConstantExpr or Instruction when
310 /// lowered. It has two primary advantages over the \c getOperationCost and
311 /// \c getGEPCost above, and one significant disadvantage: it can only be
312 /// used when the IR construct has already been formed.
313 ///
314 /// The advantages are that it can inspect the SSA use graph to reason more
315 /// accurately about the cost. For example, all-constant-GEPs can often be
316 /// folded into a load or other instruction, but if they are used in some
317 /// other context they may not be folded. This routine can distinguish such
318 /// cases.
319 ///
320 /// \p Operands is a list of operands which can be a result of transformations
321 /// of the current operands. The number of the operands on the list must equal
322 /// to the number of the current operands the IR user has. Their order on the
323 /// list must be the same as the order of the current operands the IR user
324 /// has.
325 ///
326 /// The returned cost is defined in terms of \c TargetCostConstants, see its
327 /// comments for a detailed explanation of the cost values.
328 int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
329
330 /// This is a helper function which calls the two-argument getUserCost
331 /// with \p Operands which are the current operands U has.
getUserCost(const User * U)332 int getUserCost(const User *U) const {
333 SmallVector<const Value *, 4> Operands(U->value_op_begin(),
334 U->value_op_end());
335 return getUserCost(U, Operands);
336 }
337
338 /// Return true if branch divergence exists.
339 ///
340 /// Branch divergence has a significantly negative impact on GPU performance
341 /// when threads in the same wavefront take different paths due to conditional
342 /// branches.
343 bool hasBranchDivergence() const;
344
345 /// Returns whether V is a source of divergence.
346 ///
347 /// This function provides the target-dependent information for
348 /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
349 /// builds the dependency graph, and then runs the reachability algorithm
350 /// starting with the sources of divergence.
351 bool isSourceOfDivergence(const Value *V) const;
352
353 // Returns true for the target specific
354 // set of operations which produce uniform result
355 // even taking non-uniform arguments
356 bool isAlwaysUniform(const Value *V) const;
357
358 /// Returns the address space ID for a target's 'flat' address space. Note
359 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
360 /// refers to as the generic address space. The flat address space is a
361 /// generic address space that can be used access multiple segments of memory
362 /// with different address spaces. Access of a memory location through a
363 /// pointer with this address space is expected to be legal but slower
364 /// compared to the same memory location accessed through a pointer with a
365 /// different address space.
366 //
367 /// This is for targets with different pointer representations which can
368 /// be converted with the addrspacecast instruction. If a pointer is converted
369 /// to this address space, optimizations should attempt to replace the access
370 /// with the source address space.
371 ///
372 /// \returns ~0u if the target does not have such a flat address space to
373 /// optimize away.
374 unsigned getFlatAddressSpace() const;
375
376 /// Return any intrinsic address operand indexes which may be rewritten if
377 /// they use a flat address space pointer.
378 ///
379 /// \returns true if the intrinsic was handled.
380 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
381 Intrinsic::ID IID) const;
382
383 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
384 /// NewV, which has a different address space. This should happen for every
385 /// operand index that collectFlatAddressOperands returned for the intrinsic.
386 /// \returns true if the intrinsic /// was handled.
387 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
388 Value *OldV, Value *NewV) const;
389
390 /// Test whether calls to a function lower to actual program function
391 /// calls.
392 ///
393 /// The idea is to test whether the program is likely to require a 'call'
394 /// instruction or equivalent in order to call the given function.
395 ///
396 /// FIXME: It's not clear that this is a good or useful query API. Client's
397 /// should probably move to simpler cost metrics using the above.
398 /// Alternatively, we could split the cost interface into distinct code-size
399 /// and execution-speed costs. This would allow modelling the core of this
400 /// query more accurately as a call is a single small instruction, but
401 /// incurs significant execution cost.
402 bool isLoweredToCall(const Function *F) const;
403
404 struct LSRCost {
405 /// TODO: Some of these could be merged. Also, a lexical ordering
406 /// isn't always optimal.
407 unsigned Insns;
408 unsigned NumRegs;
409 unsigned AddRecCost;
410 unsigned NumIVMuls;
411 unsigned NumBaseAdds;
412 unsigned ImmCost;
413 unsigned SetupCost;
414 unsigned ScaleCost;
415 };
416
417 /// Parameters that control the generic loop unrolling transformation.
418 struct UnrollingPreferences {
419 /// The cost threshold for the unrolled loop. Should be relative to the
420 /// getUserCost values returned by this API, and the expectation is that
421 /// the unrolled loop's instructions when run through that interface should
422 /// not exceed this cost. However, this is only an estimate. Also, specific
423 /// loops may be unrolled even with a cost above this threshold if deemed
424 /// profitable. Set this to UINT_MAX to disable the loop body cost
425 /// restriction.
426 unsigned Threshold;
427 /// If complete unrolling will reduce the cost of the loop, we will boost
428 /// the Threshold by a certain percent to allow more aggressive complete
429 /// unrolling. This value provides the maximum boost percentage that we
430 /// can apply to Threshold (The value should be no less than 100).
431 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
432 /// MaxPercentThresholdBoost / 100)
433 /// E.g. if complete unrolling reduces the loop execution time by 50%
434 /// then we boost the threshold by the factor of 2x. If unrolling is not
435 /// expected to reduce the running time, then we do not increase the
436 /// threshold.
437 unsigned MaxPercentThresholdBoost;
438 /// The cost threshold for the unrolled loop when optimizing for size (set
439 /// to UINT_MAX to disable).
440 unsigned OptSizeThreshold;
441 /// The cost threshold for the unrolled loop, like Threshold, but used
442 /// for partial/runtime unrolling (set to UINT_MAX to disable).
443 unsigned PartialThreshold;
444 /// The cost threshold for the unrolled loop when optimizing for size, like
445 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
446 /// UINT_MAX to disable).
447 unsigned PartialOptSizeThreshold;
448 /// A forced unrolling factor (the number of concatenated bodies of the
449 /// original loop in the unrolled loop body). When set to 0, the unrolling
450 /// transformation will select an unrolling factor based on the current cost
451 /// threshold and other factors.
452 unsigned Count;
453 /// A forced peeling factor (the number of bodied of the original loop
454 /// that should be peeled off before the loop body). When set to 0, the
455 /// unrolling transformation will select a peeling factor based on profile
456 /// information and other factors.
457 unsigned PeelCount;
458 /// Default unroll count for loops with run-time trip count.
459 unsigned DefaultUnrollRuntimeCount;
460 // Set the maximum unrolling factor. The unrolling factor may be selected
461 // using the appropriate cost threshold, but may not exceed this number
462 // (set to UINT_MAX to disable). This does not apply in cases where the
463 // loop is being fully unrolled.
464 unsigned MaxCount;
465 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
466 /// applies even if full unrolling is selected. This allows a target to fall
467 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
468 unsigned FullUnrollMaxCount;
469 // Represents number of instructions optimized when "back edge"
470 // becomes "fall through" in unrolled loop.
471 // For now we count a conditional branch on a backedge and a comparison
472 // feeding it.
473 unsigned BEInsns;
474 /// Allow partial unrolling (unrolling of loops to expand the size of the
475 /// loop body, not only to eliminate small constant-trip-count loops).
476 bool Partial;
477 /// Allow runtime unrolling (unrolling of loops to expand the size of the
478 /// loop body even when the number of loop iterations is not known at
479 /// compile time).
480 bool Runtime;
481 /// Allow generation of a loop remainder (extra iterations after unroll).
482 bool AllowRemainder;
483 /// Allow emitting expensive instructions (such as divisions) when computing
484 /// the trip count of a loop for runtime unrolling.
485 bool AllowExpensiveTripCount;
486 /// Apply loop unroll on any kind of loop
487 /// (mainly to loops that fail runtime unrolling).
488 bool Force;
489 /// Allow using trip count upper bound to unroll loops.
490 bool UpperBound;
491 /// Allow peeling off loop iterations.
492 bool AllowPeeling;
493 /// Allow unrolling of all the iterations of the runtime loop remainder.
494 bool UnrollRemainder;
495 /// Allow unroll and jam. Used to enable unroll and jam for the target.
496 bool UnrollAndJam;
497 /// Allow peeling basing on profile. Uses to enable peeling off all
498 /// iterations basing on provided profile.
499 /// If the value is true the peeling cost model can decide to peel only
500 /// some iterations and in this case it will set this to false.
501 bool PeelProfiledIterations;
502 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
503 /// value above is used during unroll and jam for the outer loop size.
504 /// This value is used in the same manner to limit the size of the inner
505 /// loop.
506 unsigned UnrollAndJamInnerLoopThreshold;
507 };
508
509 /// Get target-customized preferences for the generic loop unrolling
510 /// transformation. The caller will initialize UP with the current
511 /// target-independent defaults.
512 void getUnrollingPreferences(Loop *L, ScalarEvolution &,
513 UnrollingPreferences &UP) const;
514
515 /// Query the target whether it would be profitable to convert the given loop
516 /// into a hardware loop.
517 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
518 AssumptionCache &AC,
519 TargetLibraryInfo *LibInfo,
520 HardwareLoopInfo &HWLoopInfo) const;
521
522 /// Query the target whether it would be prefered to create a predicated vector
523 /// loop, which can avoid the need to emit a scalar epilogue loop.
524 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
525 AssumptionCache &AC, TargetLibraryInfo *TLI,
526 DominatorTree *DT,
527 const LoopAccessInfo *LAI) const;
528
529 /// @}
530
531 /// \name Scalar Target Information
532 /// @{
533
534 /// Flags indicating the kind of support for population count.
535 ///
536 /// Compared to the SW implementation, HW support is supposed to
537 /// significantly boost the performance when the population is dense, and it
538 /// may or may not degrade performance if the population is sparse. A HW
539 /// support is considered as "Fast" if it can outperform, or is on a par
540 /// with, SW implementation when the population is sparse; otherwise, it is
541 /// considered as "Slow".
542 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
543
544 /// Return true if the specified immediate is legal add immediate, that
545 /// is the target has add instructions which can add a register with the
546 /// immediate without having to materialize the immediate into a register.
547 bool isLegalAddImmediate(int64_t Imm) const;
548
549 /// Return true if the specified immediate is legal icmp immediate,
550 /// that is the target has icmp instructions which can compare a register
551 /// against the immediate without having to materialize the immediate into a
552 /// register.
553 bool isLegalICmpImmediate(int64_t Imm) const;
554
555 /// Return true if the addressing mode represented by AM is legal for
556 /// this target, for a load/store of the specified type.
557 /// The type may be VoidTy, in which case only return true if the addressing
558 /// mode is legal for a load/store of any legal type.
559 /// If target returns true in LSRWithInstrQueries(), I may be valid.
560 /// TODO: Handle pre/postinc as well.
561 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
562 bool HasBaseReg, int64_t Scale,
563 unsigned AddrSpace = 0,
564 Instruction *I = nullptr) const;
565
566 /// Return true if LSR cost of C1 is lower than C1.
567 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
568 TargetTransformInfo::LSRCost &C2) const;
569
570 /// Return true if the target can fuse a compare and branch.
571 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
572 /// calculation for the instructions in a loop.
573 bool canMacroFuseCmp() const;
574
575 /// Return true if the target can save a compare for loop count, for example
576 /// hardware loop saves a compare.
577 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
578 DominatorTree *DT, AssumptionCache *AC,
579 TargetLibraryInfo *LibInfo) const;
580
581 /// \return True is LSR should make efforts to create/preserve post-inc
582 /// addressing mode expressions.
583 bool shouldFavorPostInc() const;
584
585 /// Return true if LSR should make efforts to generate indexed addressing
586 /// modes that operate across loop iterations.
587 bool shouldFavorBackedgeIndex(const Loop *L) const;
588
589 /// Return true if the target supports masked store.
590 bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const;
591 /// Return true if the target supports masked load.
592 bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const;
593
594 /// Return true if the target supports nontemporal store.
595 bool isLegalNTStore(Type *DataType, Align Alignment) const;
596 /// Return true if the target supports nontemporal load.
597 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
598
599 /// Return true if the target supports masked scatter.
600 bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) const;
601 /// Return true if the target supports masked gather.
602 bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) const;
603
604 /// Return true if the target supports masked compress store.
605 bool isLegalMaskedCompressStore(Type *DataType) const;
606 /// Return true if the target supports masked expand load.
607 bool isLegalMaskedExpandLoad(Type *DataType) const;
608
609 /// Return true if the target has a unified operation to calculate division
610 /// and remainder. If so, the additional implicit multiplication and
611 /// subtraction required to calculate a remainder from division are free. This
612 /// can enable more aggressive transformations for division and remainder than
613 /// would typically be allowed using throughput or size cost models.
614 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
615
616 /// Return true if the given instruction (assumed to be a memory access
617 /// instruction) has a volatile variant. If that's the case then we can avoid
618 /// addrspacecast to generic AS for volatile loads/stores. Default
619 /// implementation returns false, which prevents address space inference for
620 /// volatile loads/stores.
621 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
622
623 /// Return true if target doesn't mind addresses in vectors.
624 bool prefersVectorizedAddressing() const;
625
626 /// Return the cost of the scaling factor used in the addressing
627 /// mode represented by AM for this target, for a load/store
628 /// of the specified type.
629 /// If the AM is supported, the return value must be >= 0.
630 /// If the AM is not supported, it returns a negative value.
631 /// TODO: Handle pre/postinc as well.
632 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
633 bool HasBaseReg, int64_t Scale,
634 unsigned AddrSpace = 0) const;
635
636 /// Return true if the loop strength reduce pass should make
637 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
638 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
639 /// immediate offset and no index register.
640 bool LSRWithInstrQueries() const;
641
642 /// Return true if it's free to truncate a value of type Ty1 to type
643 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
644 /// by referencing its sub-register AX.
645 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
646
647 /// Return true if it is profitable to hoist instruction in the
648 /// then/else to before if.
649 bool isProfitableToHoist(Instruction *I) const;
650
651 bool useAA() const;
652
653 /// Return true if this type is legal.
654 bool isTypeLegal(Type *Ty) const;
655
656 /// Return true if switches should be turned into lookup tables for the
657 /// target.
658 bool shouldBuildLookupTables() const;
659
660 /// Return true if switches should be turned into lookup tables
661 /// containing this constant value for the target.
662 bool shouldBuildLookupTablesForConstant(Constant *C) const;
663
664 /// Return true if the input function which is cold at all call sites,
665 /// should use coldcc calling convention.
666 bool useColdCCForColdCall(Function &F) const;
667
668 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
669
670 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
671 unsigned VF) const;
672
673 /// If target has efficient vector element load/store instructions, it can
674 /// return true here so that insertion/extraction costs are not added to
675 /// the scalarization cost of a load/store.
676 bool supportsEfficientVectorElementLoadStore() const;
677
678 /// Don't restrict interleaved unrolling to small loops.
679 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
680
681 /// Returns options for expansion of memcmp. IsZeroCmp is
682 // true if this is the expansion of memcmp(p1, p2, s) == 0.
683 struct MemCmpExpansionOptions {
684 // Return true if memcmp expansion is enabled.
685 operator bool() const { return MaxNumLoads > 0; }
686
687 // Maximum number of load operations.
688 unsigned MaxNumLoads = 0;
689
690 // The list of available load sizes (in bytes), sorted in decreasing order.
691 SmallVector<unsigned, 8> LoadSizes;
692
693 // For memcmp expansion when the memcmp result is only compared equal or
694 // not-equal to 0, allow up to this number of load pairs per block. As an
695 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
696 // a0 = load2bytes &a[0]
697 // b0 = load2bytes &b[0]
698 // a2 = load1byte &a[2]
699 // b2 = load1byte &b[2]
700 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
701 unsigned NumLoadsPerBlock = 1;
702
703 // Set to true to allow overlapping loads. For example, 7-byte compares can
704 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
705 // requires all loads in LoadSizes to be doable in an unaligned way.
706 bool AllowOverlappingLoads = false;
707 };
708 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
709 bool IsZeroCmp) const;
710
711 /// Enable matching of interleaved access groups.
712 bool enableInterleavedAccessVectorization() const;
713
714 /// Enable matching of interleaved access groups that contain predicated
715 /// accesses or gaps and therefore vectorized using masked
716 /// vector loads/stores.
717 bool enableMaskedInterleavedAccessVectorization() const;
718
719 /// Indicate that it is potentially unsafe to automatically vectorize
720 /// floating-point operations because the semantics of vector and scalar
721 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
722 /// does not support IEEE-754 denormal numbers, while depending on the
723 /// platform, scalar floating-point math does.
724 /// This applies to floating-point math operations and calls, not memory
725 /// operations, shuffles, or casts.
726 bool isFPVectorizationPotentiallyUnsafe() const;
727
728 /// Determine if the target supports unaligned memory accesses.
729 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
730 unsigned BitWidth, unsigned AddressSpace = 0,
731 unsigned Alignment = 1,
732 bool *Fast = nullptr) const;
733
734 /// Return hardware support for population count.
735 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
736
737 /// Return true if the hardware has a fast square-root instruction.
738 bool haveFastSqrt(Type *Ty) const;
739
740 /// Return true if it is faster to check if a floating-point value is NaN
741 /// (or not-NaN) versus a comparison against a constant FP zero value.
742 /// Targets should override this if materializing a 0.0 for comparison is
743 /// generally as cheap as checking for ordered/unordered.
744 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
745
746 /// Return the expected cost of supporting the floating point operation
747 /// of the specified type.
748 int getFPOpCost(Type *Ty) const;
749
750 /// Return the expected cost of materializing for the given integer
751 /// immediate of the specified type.
752 int getIntImmCost(const APInt &Imm, Type *Ty) const;
753
754 /// Return the expected cost of materialization for the given integer
755 /// immediate of the specified type for a given instruction. The cost can be
756 /// zero if the immediate can be folded into the specified instruction.
757 int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
758 Type *Ty) const;
759 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
760 Type *Ty) const;
761
762 /// Return the expected cost for the given integer when optimising
763 /// for size. This is different than the other integer immediate cost
764 /// functions in that it is subtarget agnostic. This is useful when you e.g.
765 /// target one ISA such as Aarch32 but smaller encodings could be possible
766 /// with another such as Thumb. This return value is used as a penalty when
767 /// the total costs for a constant is calculated (the bigger the cost, the
768 /// more beneficial constant hoisting is).
769 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
770 Type *Ty) const;
771 /// @}
772
773 /// \name Vector Target Information
774 /// @{
775
776 /// The various kinds of shuffle patterns for vector queries.
777 enum ShuffleKind {
778 SK_Broadcast, ///< Broadcast element 0 to all other elements.
779 SK_Reverse, ///< Reverse the order of the vector.
780 SK_Select, ///< Selects elements from the corresponding lane of
781 ///< either source operand. This is equivalent to a
782 ///< vector select with a constant condition operand.
783 SK_Transpose, ///< Transpose two vectors.
784 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
785 SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
786 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
787 ///< with any shuffle mask.
788 SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
789 ///< shuffle mask.
790 };
791
792 /// Additional information about an operand's possible values.
793 enum OperandValueKind {
794 OK_AnyValue, // Operand can have any value.
795 OK_UniformValue, // Operand is uniform (splat of a value).
796 OK_UniformConstantValue, // Operand is uniform constant.
797 OK_NonUniformConstantValue // Operand is a non uniform constant value.
798 };
799
800 /// Additional properties of an operand's values.
801 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
802
803 /// \return the number of registers in the target-provided register class.
804 unsigned getNumberOfRegisters(unsigned ClassID) const;
805
806 /// \return the target-provided register class ID for the provided type,
807 /// accounting for type promotion and other type-legalization techniques that the target might apply.
808 /// However, it specifically does not account for the scalarization or splitting of vector types.
809 /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
810 /// that type should be mapped to a register class containing no registers.
811 /// Specifically, this is designed to provide a simple, high-level view of the register allocation
812 /// later performed by the backend. These register classes don't necessarily map onto the
813 /// register classes used by the backend.
814 /// FIXME: It's not currently possible to determine how many registers
815 /// are used by the provided type.
816 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
817
818 /// \return the target-provided register class name
819 const char* getRegisterClassName(unsigned ClassID) const;
820
821 /// \return The width of the largest scalar or vector register type.
822 unsigned getRegisterBitWidth(bool Vector) const;
823
824 /// \return The width of the smallest vector register type.
825 unsigned getMinVectorRegisterBitWidth() const;
826
827 /// \return True if the vectorization factor should be chosen to
828 /// make the vector of the smallest element type match the size of a
829 /// vector register. For wider element types, this could result in
830 /// creating vectors that span multiple vector registers.
831 /// If false, the vectorization factor will be chosen based on the
832 /// size of the widest element type.
833 bool shouldMaximizeVectorBandwidth(bool OptSize) const;
834
835 /// \return The minimum vectorization factor for types of given element
836 /// bit width, or 0 if there is no minimum VF. The returned value only
837 /// applies when shouldMaximizeVectorBandwidth returns true.
838 unsigned getMinimumVF(unsigned ElemWidth) const;
839
840 /// \return True if it should be considered for address type promotion.
841 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
842 /// profitable without finding other extensions fed by the same input.
843 bool shouldConsiderAddressTypePromotion(
844 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
845
846 /// \return The size of a cache line in bytes.
847 unsigned getCacheLineSize() const;
848
849 /// The possible cache levels
850 enum class CacheLevel {
851 L1D, // The L1 data cache
852 L2D, // The L2 data cache
853
854 // We currently do not model L3 caches, as their sizes differ widely between
855 // microarchitectures. Also, we currently do not have a use for L3 cache
856 // size modeling yet.
857 };
858
859 /// \return The size of the cache level in bytes, if available.
860 llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
861
862 /// \return The associativity of the cache level, if available.
863 llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
864
865 /// \return How much before a load we should place the prefetch
866 /// instruction. This is currently measured in number of
867 /// instructions.
868 unsigned getPrefetchDistance() const;
869
870 /// \return Some HW prefetchers can handle accesses up to a certain
871 /// constant stride. This is the minimum stride in bytes where it
872 /// makes sense to start adding SW prefetches. The default is 1,
873 /// i.e. prefetch with any stride.
874 unsigned getMinPrefetchStride() const;
875
876 /// \return The maximum number of iterations to prefetch ahead. If
877 /// the required number of iterations is more than this number, no
878 /// prefetching is performed.
879 unsigned getMaxPrefetchIterationsAhead() const;
880
881 /// \return The maximum interleave factor that any transform should try to
882 /// perform for this target. This number depends on the level of parallelism
883 /// and the number of execution units in the CPU.
884 unsigned getMaxInterleaveFactor(unsigned VF) const;
885
886 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
887 static OperandValueKind getOperandInfo(Value *V,
888 OperandValueProperties &OpProps);
889
890 /// This is an approximation of reciprocal throughput of a math/logic op.
891 /// A higher cost indicates less expected throughput.
892 /// From Agner Fog's guides, reciprocal throughput is "the average number of
893 /// clock cycles per instruction when the instructions are not part of a
894 /// limiting dependency chain."
895 /// Therefore, costs should be scaled to account for multiple execution units
896 /// on the target that can process this type of instruction. For example, if
897 /// there are 5 scalar integer units and 2 vector integer units that can
898 /// calculate an 'add' in a single cycle, this model should indicate that the
899 /// cost of the vector add instruction is 2.5 times the cost of the scalar
900 /// add instruction.
901 /// \p Args is an optional argument which holds the instruction operands
902 /// values so the TTI can analyze those values searching for special
903 /// cases or optimizations based on those values.
904 /// \p CxtI is the optional original context instruction, if one exists, to
905 /// provide even more information.
906 int getArithmeticInstrCost(
907 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
908 OperandValueKind Opd2Info = OK_AnyValue,
909 OperandValueProperties Opd1PropInfo = OP_None,
910 OperandValueProperties Opd2PropInfo = OP_None,
911 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
912 const Instruction *CxtI = nullptr) const;
913
914 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
915 /// The index and subtype parameters are used by the subvector insertion and
916 /// extraction shuffle kinds to show the insert/extract point and the type of
917 /// the subvector being inserted/extracted.
918 /// NOTE: For subvector extractions Tp represents the source type.
919 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
920 Type *SubTp = nullptr) const;
921
922 /// \return The expected cost of cast instructions, such as bitcast, trunc,
923 /// zext, etc. If there is an existing instruction that holds Opcode, it
924 /// may be passed in the 'I' parameter.
925 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
926 const Instruction *I = nullptr) const;
927
928 /// \return The expected cost of a sign- or zero-extended vector extract. Use
929 /// -1 to indicate that there is no information about the index value.
930 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
931 unsigned Index = -1) const;
932
933 /// \return The expected cost of control-flow related instructions such as
934 /// Phi, Ret, Br.
935 int getCFInstrCost(unsigned Opcode) const;
936
937 /// \returns The expected cost of compare and select instructions. If there
938 /// is an existing instruction that holds Opcode, it may be passed in the
939 /// 'I' parameter.
940 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
941 Type *CondTy = nullptr, const Instruction *I = nullptr) const;
942
943 /// \return The expected cost of vector Insert and Extract.
944 /// Use -1 to indicate that there is no information on the index value.
945 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
946
947 /// \return The cost of Load and Store instructions.
948 int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
949 unsigned AddressSpace,
950 const Instruction *I = nullptr) const;
951
952 /// \return The cost of masked Load and Store instructions.
953 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
954 unsigned AddressSpace) const;
955
956 /// \return The cost of Gather or Scatter operation
957 /// \p Opcode - is a type of memory access Load or Store
958 /// \p DataTy - a vector type of the data to be loaded or stored
959 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
960 /// \p VariableMask - true when the memory access is predicated with a mask
961 /// that is not a compile-time constant
962 /// \p Alignment - alignment of single element
963 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
964 bool VariableMask, unsigned Alignment) const;
965
966 /// \return The cost of the interleaved memory operation.
967 /// \p Opcode is the memory operation code
968 /// \p VecTy is the vector type of the interleaved access.
969 /// \p Factor is the interleave factor
970 /// \p Indices is the indices for interleaved load members (as interleaved
971 /// load allows gaps)
972 /// \p Alignment is the alignment of the memory operation
973 /// \p AddressSpace is address space of the pointer.
974 /// \p UseMaskForCond indicates if the memory access is predicated.
975 /// \p UseMaskForGaps indicates if gaps should be masked.
976 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
977 ArrayRef<unsigned> Indices, unsigned Alignment,
978 unsigned AddressSpace,
979 bool UseMaskForCond = false,
980 bool UseMaskForGaps = false) const;
981
982 /// Calculate the cost of performing a vector reduction.
983 ///
984 /// This is the cost of reducing the vector value of type \p Ty to a scalar
985 /// value using the operation denoted by \p Opcode. The form of the reduction
986 /// can either be a pairwise reduction or a reduction that splits the vector
987 /// at every reduction level.
988 ///
989 /// Pairwise:
990 /// (v0, v1, v2, v3)
991 /// ((v0+v1), (v2+v3), undef, undef)
992 /// Split:
993 /// (v0, v1, v2, v3)
994 /// ((v0+v2), (v1+v3), undef, undef)
995 int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
996 bool IsPairwiseForm) const;
997 int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
998 bool IsUnsigned) const;
999
1000 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1001 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1002 /// 3. scalar instruction which is to be vectorized with VF.
1003 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1004 ArrayRef<Value *> Args, FastMathFlags FMF,
1005 unsigned VF = 1) const;
1006
1007 /// \returns The cost of Intrinsic instructions. Types analysis only.
1008 /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
1009 /// arguments and the return value will be computed based on types.
1010 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1011 ArrayRef<Type *> Tys, FastMathFlags FMF,
1012 unsigned ScalarizationCostPassed = UINT_MAX) const;
1013
1014 /// \returns The cost of Call instructions.
1015 int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
1016
1017 /// \returns The number of pieces into which the provided type must be
1018 /// split during legalization. Zero is returned when the answer is unknown.
1019 unsigned getNumberOfParts(Type *Tp) const;
1020
1021 /// \returns The cost of the address computation. For most targets this can be
1022 /// merged into the instruction indexing mode. Some targets might want to
1023 /// distinguish between address computation for memory operations on vector
1024 /// types and scalar types. Such targets should override this function.
1025 /// The 'SE' parameter holds pointer for the scalar evolution object which
1026 /// is used in order to get the Ptr step value in case of constant stride.
1027 /// The 'Ptr' parameter holds SCEV of the access pointer.
1028 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
1029 const SCEV *Ptr = nullptr) const;
1030
1031 /// \returns The cost, if any, of keeping values of the given types alive
1032 /// over a callsite.
1033 ///
1034 /// Some types may require the use of register classes that do not have
1035 /// any callee-saved registers, so would require a spill and fill.
1036 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1037
1038 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1039 /// will contain additional information - whether the intrinsic may write
1040 /// or read to memory, volatility and the pointer. Info is undefined
1041 /// if false is returned.
1042 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1043
1044 /// \returns The maximum element size, in bytes, for an element
1045 /// unordered-atomic memory intrinsic.
1046 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1047
1048 /// \returns A value which is the result of the given memory intrinsic. New
1049 /// instructions may be created to extract the result from the given intrinsic
1050 /// memory operation. Returns nullptr if the target cannot create a result
1051 /// from the given intrinsic.
1052 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1053 Type *ExpectedType) const;
1054
1055 /// \returns The type to use in a loop expansion of a memcpy call.
1056 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1057 unsigned SrcAlign, unsigned DestAlign) const;
1058
1059 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1060 /// \param RemainingBytes The number of bytes to copy.
1061 ///
1062 /// Calculates the operand types to use when copying \p RemainingBytes of
1063 /// memory, where source and destination alignments are \p SrcAlign and
1064 /// \p DestAlign respectively.
1065 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1066 LLVMContext &Context,
1067 unsigned RemainingBytes,
1068 unsigned SrcAlign,
1069 unsigned DestAlign) const;
1070
1071 /// \returns True if the two functions have compatible attributes for inlining
1072 /// purposes.
1073 bool areInlineCompatible(const Function *Caller,
1074 const Function *Callee) const;
1075
1076 /// \returns True if the caller and callee agree on how \p Args will be passed
1077 /// to the callee.
1078 /// \param[out] Args The list of compatible arguments. The implementation may
1079 /// filter out any incompatible args from this list.
1080 bool areFunctionArgsABICompatible(const Function *Caller,
1081 const Function *Callee,
1082 SmallPtrSetImpl<Argument *> &Args) const;
1083
1084 /// The type of load/store indexing.
1085 enum MemIndexedMode {
1086 MIM_Unindexed, ///< No indexing.
1087 MIM_PreInc, ///< Pre-incrementing.
1088 MIM_PreDec, ///< Pre-decrementing.
1089 MIM_PostInc, ///< Post-incrementing.
1090 MIM_PostDec ///< Post-decrementing.
1091 };
1092
1093 /// \returns True if the specified indexed load for the given type is legal.
1094 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1095
1096 /// \returns True if the specified indexed store for the given type is legal.
1097 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1098
1099 /// \returns The bitwidth of the largest vector type that should be used to
1100 /// load/store in the given address space.
1101 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1102
1103 /// \returns True if the load instruction is legal to vectorize.
1104 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1105
1106 /// \returns True if the store instruction is legal to vectorize.
1107 bool isLegalToVectorizeStore(StoreInst *SI) const;
1108
1109 /// \returns True if it is legal to vectorize the given load chain.
1110 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1111 unsigned Alignment,
1112 unsigned AddrSpace) const;
1113
1114 /// \returns True if it is legal to vectorize the given store chain.
1115 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1116 unsigned Alignment,
1117 unsigned AddrSpace) const;
1118
1119 /// \returns The new vector factor value if the target doesn't support \p
1120 /// SizeInBytes loads or has a better vector factor.
1121 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1122 unsigned ChainSizeInBytes,
1123 VectorType *VecTy) const;
1124
1125 /// \returns The new vector factor value if the target doesn't support \p
1126 /// SizeInBytes stores or has a better vector factor.
1127 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1128 unsigned ChainSizeInBytes,
1129 VectorType *VecTy) const;
1130
1131 /// Flags describing the kind of vector reduction.
1132 struct ReductionFlags {
ReductionFlagsReductionFlags1133 ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1134 bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1135 bool IsSigned; ///< Whether the operation is a signed int reduction.
1136 bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1137 };
1138
1139 /// \returns True if the target wants to handle the given reduction idiom in
1140 /// the intrinsics form instead of the shuffle form.
1141 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1142 ReductionFlags Flags) const;
1143
1144 /// \returns True if the target wants to expand the given reduction intrinsic
1145 /// into a shuffle sequence.
1146 bool shouldExpandReduction(const IntrinsicInst *II) const;
1147
1148 /// \returns the size cost of rematerializing a GlobalValue address relative
1149 /// to a stack reload.
1150 unsigned getGISelRematGlobalCost() const;
1151
1152 /// @}
1153
1154 private:
1155 /// Estimate the latency of specified instruction.
1156 /// Returns 1 as the default value.
1157 int getInstructionLatency(const Instruction *I) const;
1158
1159 /// Returns the expected throughput cost of the instruction.
1160 /// Returns -1 if the cost is unknown.
1161 int getInstructionThroughput(const Instruction *I) const;
1162
1163 /// The abstract base class used to type erase specific TTI
1164 /// implementations.
1165 class Concept;
1166
1167 /// The template model for the base class which wraps a concrete
1168 /// implementation in a type erased interface.
1169 template <typename T> class Model;
1170
1171 std::unique_ptr<Concept> TTIImpl;
1172 };
1173
1174 class TargetTransformInfo::Concept {
1175 public:
1176 virtual ~Concept() = 0;
1177 virtual const DataLayout &getDataLayout() const = 0;
1178 virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1179 virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1180 ArrayRef<const Value *> Operands) = 0;
1181 virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1182 virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1183 virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1184 virtual int getCallCost(const Function *F,
1185 ArrayRef<const Value *> Arguments, const User *U) = 0;
1186 virtual unsigned getInliningThresholdMultiplier() = 0;
1187 virtual int getInlinerVectorBonusPercent() = 0;
1188 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1189 ArrayRef<Type *> ParamTys, const User *U) = 0;
1190 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1191 ArrayRef<const Value *> Arguments,
1192 const User *U) = 0;
1193 virtual int getMemcpyCost(const Instruction *I) = 0;
1194 virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1195 unsigned &JTSize,
1196 ProfileSummaryInfo *PSI,
1197 BlockFrequencyInfo *BFI) = 0;
1198 virtual int
1199 getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1200 virtual bool hasBranchDivergence() = 0;
1201 virtual bool isSourceOfDivergence(const Value *V) = 0;
1202 virtual bool isAlwaysUniform(const Value *V) = 0;
1203 virtual unsigned getFlatAddressSpace() = 0;
1204 virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1205 Intrinsic::ID IID) const = 0;
1206 virtual bool rewriteIntrinsicWithAddressSpace(
1207 IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
1208 virtual bool isLoweredToCall(const Function *F) = 0;
1209 virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1210 UnrollingPreferences &UP) = 0;
1211 virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1212 AssumptionCache &AC,
1213 TargetLibraryInfo *LibInfo,
1214 HardwareLoopInfo &HWLoopInfo) = 0;
1215 virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
1216 ScalarEvolution &SE,
1217 AssumptionCache &AC,
1218 TargetLibraryInfo *TLI,
1219 DominatorTree *DT,
1220 const LoopAccessInfo *LAI) = 0;
1221 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1222 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1223 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1224 int64_t BaseOffset, bool HasBaseReg,
1225 int64_t Scale,
1226 unsigned AddrSpace,
1227 Instruction *I) = 0;
1228 virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1229 TargetTransformInfo::LSRCost &C2) = 0;
1230 virtual bool canMacroFuseCmp() = 0;
1231 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1232 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1233 TargetLibraryInfo *LibInfo) = 0;
1234 virtual bool shouldFavorPostInc() const = 0;
1235 virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1236 virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0;
1237 virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
1238 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1239 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1240 virtual bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) = 0;
1241 virtual bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) = 0;
1242 virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1243 virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1244 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1245 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1246 virtual bool prefersVectorizedAddressing() = 0;
1247 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1248 int64_t BaseOffset, bool HasBaseReg,
1249 int64_t Scale, unsigned AddrSpace) = 0;
1250 virtual bool LSRWithInstrQueries() = 0;
1251 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1252 virtual bool isProfitableToHoist(Instruction *I) = 0;
1253 virtual bool useAA() = 0;
1254 virtual bool isTypeLegal(Type *Ty) = 0;
1255 virtual bool shouldBuildLookupTables() = 0;
1256 virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1257 virtual bool useColdCCForColdCall(Function &F) = 0;
1258 virtual unsigned
1259 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1260 virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1261 unsigned VF) = 0;
1262 virtual bool supportsEfficientVectorElementLoadStore() = 0;
1263 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1264 virtual MemCmpExpansionOptions
1265 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1266 virtual bool enableInterleavedAccessVectorization() = 0;
1267 virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1268 virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1269 virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1270 unsigned BitWidth,
1271 unsigned AddressSpace,
1272 unsigned Alignment,
1273 bool *Fast) = 0;
1274 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1275 virtual bool haveFastSqrt(Type *Ty) = 0;
1276 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1277 virtual int getFPOpCost(Type *Ty) = 0;
1278 virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1279 Type *Ty) = 0;
1280 virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1281 virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1282 Type *Ty) = 0;
1283 virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1284 const APInt &Imm, Type *Ty) = 0;
1285 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1286 virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
1287 virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
1288 virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1289 virtual unsigned getMinVectorRegisterBitWidth() = 0;
1290 virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1291 virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1292 virtual bool shouldConsiderAddressTypePromotion(
1293 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1294 virtual unsigned getCacheLineSize() const = 0;
1295 virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1296 virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
1297
1298 /// \return How much before a load we should place the prefetch
1299 /// instruction. This is currently measured in number of
1300 /// instructions.
1301 virtual unsigned getPrefetchDistance() const = 0;
1302
1303 /// \return Some HW prefetchers can handle accesses up to a certain
1304 /// constant stride. This is the minimum stride in bytes where it
1305 /// makes sense to start adding SW prefetches. The default is 1,
1306 /// i.e. prefetch with any stride.
1307 virtual unsigned getMinPrefetchStride() const = 0;
1308
1309 /// \return The maximum number of iterations to prefetch ahead. If
1310 /// the required number of iterations is more than this number, no
1311 /// prefetching is performed.
1312 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1313
1314 virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1315 virtual unsigned getArithmeticInstrCost(
1316 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1317 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
1318 OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
1319 const Instruction *CxtI = nullptr) = 0;
1320 virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1321 Type *SubTp) = 0;
1322 virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1323 const Instruction *I) = 0;
1324 virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1325 VectorType *VecTy, unsigned Index) = 0;
1326 virtual int getCFInstrCost(unsigned Opcode) = 0;
1327 virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1328 Type *CondTy, const Instruction *I) = 0;
1329 virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1330 unsigned Index) = 0;
1331 virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1332 unsigned AddressSpace, const Instruction *I) = 0;
1333 virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1334 unsigned Alignment,
1335 unsigned AddressSpace) = 0;
1336 virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1337 Value *Ptr, bool VariableMask,
1338 unsigned Alignment) = 0;
1339 virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1340 unsigned Factor,
1341 ArrayRef<unsigned> Indices,
1342 unsigned Alignment,
1343 unsigned AddressSpace,
1344 bool UseMaskForCond = false,
1345 bool UseMaskForGaps = false) = 0;
1346 virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1347 bool IsPairwiseForm) = 0;
1348 virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1349 bool IsPairwiseForm, bool IsUnsigned) = 0;
1350 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1351 ArrayRef<Type *> Tys, FastMathFlags FMF,
1352 unsigned ScalarizationCostPassed) = 0;
1353 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1354 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1355 virtual int getCallInstrCost(Function *F, Type *RetTy,
1356 ArrayRef<Type *> Tys) = 0;
1357 virtual unsigned getNumberOfParts(Type *Tp) = 0;
1358 virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1359 const SCEV *Ptr) = 0;
1360 virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1361 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1362 MemIntrinsicInfo &Info) = 0;
1363 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1364 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1365 Type *ExpectedType) = 0;
1366 virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1367 unsigned SrcAlign,
1368 unsigned DestAlign) const = 0;
1369 virtual void getMemcpyLoopResidualLoweringType(
1370 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1371 unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1372 virtual bool areInlineCompatible(const Function *Caller,
1373 const Function *Callee) const = 0;
1374 virtual bool
1375 areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1376 SmallPtrSetImpl<Argument *> &Args) const = 0;
1377 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1378 virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1379 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1380 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1381 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1382 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1383 unsigned Alignment,
1384 unsigned AddrSpace) const = 0;
1385 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1386 unsigned Alignment,
1387 unsigned AddrSpace) const = 0;
1388 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1389 unsigned ChainSizeInBytes,
1390 VectorType *VecTy) const = 0;
1391 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1392 unsigned ChainSizeInBytes,
1393 VectorType *VecTy) const = 0;
1394 virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1395 ReductionFlags) const = 0;
1396 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1397 virtual unsigned getGISelRematGlobalCost() const = 0;
1398 virtual int getInstructionLatency(const Instruction *I) = 0;
1399 };
1400
1401 template <typename T>
1402 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1403 T Impl;
1404
1405 public:
Model(T Impl)1406 Model(T Impl) : Impl(std::move(Impl)) {}
~Model()1407 ~Model() override {}
1408
getDataLayout()1409 const DataLayout &getDataLayout() const override {
1410 return Impl.getDataLayout();
1411 }
1412
getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)1413 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1414 return Impl.getOperationCost(Opcode, Ty, OpTy);
1415 }
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)1416 int getGEPCost(Type *PointeeType, const Value *Ptr,
1417 ArrayRef<const Value *> Operands) override {
1418 return Impl.getGEPCost(PointeeType, Ptr, Operands);
1419 }
getExtCost(const Instruction * I,const Value * Src)1420 int getExtCost(const Instruction *I, const Value *Src) override {
1421 return Impl.getExtCost(I, Src);
1422 }
getCallCost(FunctionType * FTy,int NumArgs,const User * U)1423 int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1424 return Impl.getCallCost(FTy, NumArgs, U);
1425 }
getCallCost(const Function * F,int NumArgs,const User * U)1426 int getCallCost(const Function *F, int NumArgs, const User *U) override {
1427 return Impl.getCallCost(F, NumArgs, U);
1428 }
getCallCost(const Function * F,ArrayRef<const Value * > Arguments,const User * U)1429 int getCallCost(const Function *F,
1430 ArrayRef<const Value *> Arguments, const User *U) override {
1431 return Impl.getCallCost(F, Arguments, U);
1432 }
getInliningThresholdMultiplier()1433 unsigned getInliningThresholdMultiplier() override {
1434 return Impl.getInliningThresholdMultiplier();
1435 }
getInlinerVectorBonusPercent()1436 int getInlinerVectorBonusPercent() override {
1437 return Impl.getInlinerVectorBonusPercent();
1438 }
1439 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1440 ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1441 return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1442 }
1443 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1444 ArrayRef<const Value *> Arguments,
1445 const User *U = nullptr) override {
1446 return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1447 }
getMemcpyCost(const Instruction * I)1448 int getMemcpyCost(const Instruction *I) override {
1449 return Impl.getMemcpyCost(I);
1450 }
getUserCost(const User * U,ArrayRef<const Value * > Operands)1451 int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1452 return Impl.getUserCost(U, Operands);
1453 }
hasBranchDivergence()1454 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
isSourceOfDivergence(const Value * V)1455 bool isSourceOfDivergence(const Value *V) override {
1456 return Impl.isSourceOfDivergence(V);
1457 }
1458
isAlwaysUniform(const Value * V)1459 bool isAlwaysUniform(const Value *V) override {
1460 return Impl.isAlwaysUniform(V);
1461 }
1462
getFlatAddressSpace()1463 unsigned getFlatAddressSpace() override {
1464 return Impl.getFlatAddressSpace();
1465 }
1466
collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)1467 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1468 Intrinsic::ID IID) const override {
1469 return Impl.collectFlatAddressOperands(OpIndexes, IID);
1470 }
1471
rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)1472 bool rewriteIntrinsicWithAddressSpace(
1473 IntrinsicInst *II, Value *OldV, Value *NewV) const override {
1474 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1475 }
1476
isLoweredToCall(const Function * F)1477 bool isLoweredToCall(const Function *F) override {
1478 return Impl.isLoweredToCall(F);
1479 }
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,UnrollingPreferences & UP)1480 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1481 UnrollingPreferences &UP) override {
1482 return Impl.getUnrollingPreferences(L, SE, UP);
1483 }
isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)1484 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1485 AssumptionCache &AC,
1486 TargetLibraryInfo *LibInfo,
1487 HardwareLoopInfo &HWLoopInfo) override {
1488 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1489 }
preferPredicateOverEpilogue(Loop * L,LoopInfo * LI,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * TLI,DominatorTree * DT,const LoopAccessInfo * LAI)1490 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1491 AssumptionCache &AC, TargetLibraryInfo *TLI,
1492 DominatorTree *DT,
1493 const LoopAccessInfo *LAI) override {
1494 return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1495 }
isLegalAddImmediate(int64_t Imm)1496 bool isLegalAddImmediate(int64_t Imm) override {
1497 return Impl.isLegalAddImmediate(Imm);
1498 }
isLegalICmpImmediate(int64_t Imm)1499 bool isLegalICmpImmediate(int64_t Imm) override {
1500 return Impl.isLegalICmpImmediate(Imm);
1501 }
isLegalAddressingMode(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace,Instruction * I)1502 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1503 bool HasBaseReg, int64_t Scale,
1504 unsigned AddrSpace,
1505 Instruction *I) override {
1506 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1507 Scale, AddrSpace, I);
1508 }
isLSRCostLess(TargetTransformInfo::LSRCost & C1,TargetTransformInfo::LSRCost & C2)1509 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1510 TargetTransformInfo::LSRCost &C2) override {
1511 return Impl.isLSRCostLess(C1, C2);
1512 }
canMacroFuseCmp()1513 bool canMacroFuseCmp() override {
1514 return Impl.canMacroFuseCmp();
1515 }
canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)1516 bool canSaveCmp(Loop *L, BranchInst **BI,
1517 ScalarEvolution *SE,
1518 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1519 TargetLibraryInfo *LibInfo) override {
1520 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1521 }
shouldFavorPostInc()1522 bool shouldFavorPostInc() const override {
1523 return Impl.shouldFavorPostInc();
1524 }
shouldFavorBackedgeIndex(const Loop * L)1525 bool shouldFavorBackedgeIndex(const Loop *L) const override {
1526 return Impl.shouldFavorBackedgeIndex(L);
1527 }
isLegalMaskedStore(Type * DataType,MaybeAlign Alignment)1528 bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override {
1529 return Impl.isLegalMaskedStore(DataType, Alignment);
1530 }
isLegalMaskedLoad(Type * DataType,MaybeAlign Alignment)1531 bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override {
1532 return Impl.isLegalMaskedLoad(DataType, Alignment);
1533 }
isLegalNTStore(Type * DataType,Align Alignment)1534 bool isLegalNTStore(Type *DataType, Align Alignment) override {
1535 return Impl.isLegalNTStore(DataType, Alignment);
1536 }
isLegalNTLoad(Type * DataType,Align Alignment)1537 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1538 return Impl.isLegalNTLoad(DataType, Alignment);
1539 }
isLegalMaskedScatter(Type * DataType,MaybeAlign Alignment)1540 bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) override {
1541 return Impl.isLegalMaskedScatter(DataType, Alignment);
1542 }
isLegalMaskedGather(Type * DataType,MaybeAlign Alignment)1543 bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) override {
1544 return Impl.isLegalMaskedGather(DataType, Alignment);
1545 }
isLegalMaskedCompressStore(Type * DataType)1546 bool isLegalMaskedCompressStore(Type *DataType) override {
1547 return Impl.isLegalMaskedCompressStore(DataType);
1548 }
isLegalMaskedExpandLoad(Type * DataType)1549 bool isLegalMaskedExpandLoad(Type *DataType) override {
1550 return Impl.isLegalMaskedExpandLoad(DataType);
1551 }
hasDivRemOp(Type * DataType,bool IsSigned)1552 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1553 return Impl.hasDivRemOp(DataType, IsSigned);
1554 }
hasVolatileVariant(Instruction * I,unsigned AddrSpace)1555 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1556 return Impl.hasVolatileVariant(I, AddrSpace);
1557 }
prefersVectorizedAddressing()1558 bool prefersVectorizedAddressing() override {
1559 return Impl.prefersVectorizedAddressing();
1560 }
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)1561 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1562 bool HasBaseReg, int64_t Scale,
1563 unsigned AddrSpace) override {
1564 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1565 Scale, AddrSpace);
1566 }
LSRWithInstrQueries()1567 bool LSRWithInstrQueries() override {
1568 return Impl.LSRWithInstrQueries();
1569 }
isTruncateFree(Type * Ty1,Type * Ty2)1570 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1571 return Impl.isTruncateFree(Ty1, Ty2);
1572 }
isProfitableToHoist(Instruction * I)1573 bool isProfitableToHoist(Instruction *I) override {
1574 return Impl.isProfitableToHoist(I);
1575 }
useAA()1576 bool useAA() override { return Impl.useAA(); }
isTypeLegal(Type * Ty)1577 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
shouldBuildLookupTables()1578 bool shouldBuildLookupTables() override {
1579 return Impl.shouldBuildLookupTables();
1580 }
shouldBuildLookupTablesForConstant(Constant * C)1581 bool shouldBuildLookupTablesForConstant(Constant *C) override {
1582 return Impl.shouldBuildLookupTablesForConstant(C);
1583 }
useColdCCForColdCall(Function & F)1584 bool useColdCCForColdCall(Function &F) override {
1585 return Impl.useColdCCForColdCall(F);
1586 }
1587
getScalarizationOverhead(Type * Ty,bool Insert,bool Extract)1588 unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1589 bool Extract) override {
1590 return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1591 }
getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,unsigned VF)1592 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1593 unsigned VF) override {
1594 return Impl.getOperandsScalarizationOverhead(Args, VF);
1595 }
1596
supportsEfficientVectorElementLoadStore()1597 bool supportsEfficientVectorElementLoadStore() override {
1598 return Impl.supportsEfficientVectorElementLoadStore();
1599 }
1600
enableAggressiveInterleaving(bool LoopHasReductions)1601 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1602 return Impl.enableAggressiveInterleaving(LoopHasReductions);
1603 }
enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)1604 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1605 bool IsZeroCmp) const override {
1606 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1607 }
enableInterleavedAccessVectorization()1608 bool enableInterleavedAccessVectorization() override {
1609 return Impl.enableInterleavedAccessVectorization();
1610 }
enableMaskedInterleavedAccessVectorization()1611 bool enableMaskedInterleavedAccessVectorization() override {
1612 return Impl.enableMaskedInterleavedAccessVectorization();
1613 }
isFPVectorizationPotentiallyUnsafe()1614 bool isFPVectorizationPotentiallyUnsafe() override {
1615 return Impl.isFPVectorizationPotentiallyUnsafe();
1616 }
allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)1617 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1618 unsigned BitWidth, unsigned AddressSpace,
1619 unsigned Alignment, bool *Fast) override {
1620 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1621 Alignment, Fast);
1622 }
getPopcntSupport(unsigned IntTyWidthInBit)1623 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1624 return Impl.getPopcntSupport(IntTyWidthInBit);
1625 }
haveFastSqrt(Type * Ty)1626 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1627
isFCmpOrdCheaperThanFCmpZero(Type * Ty)1628 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1629 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1630 }
1631
getFPOpCost(Type * Ty)1632 int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1633
getIntImmCodeSizeCost(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)1634 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1635 Type *Ty) override {
1636 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1637 }
getIntImmCost(const APInt & Imm,Type * Ty)1638 int getIntImmCost(const APInt &Imm, Type *Ty) override {
1639 return Impl.getIntImmCost(Imm, Ty);
1640 }
getIntImmCostInst(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)1641 int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1642 Type *Ty) override {
1643 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty);
1644 }
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)1645 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1646 Type *Ty) override {
1647 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty);
1648 }
getNumberOfRegisters(unsigned ClassID)1649 unsigned getNumberOfRegisters(unsigned ClassID) const override {
1650 return Impl.getNumberOfRegisters(ClassID);
1651 }
1652 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
1653 return Impl.getRegisterClassForType(Vector, Ty);
1654 }
getRegisterClassName(unsigned ClassID)1655 const char* getRegisterClassName(unsigned ClassID) const override {
1656 return Impl.getRegisterClassName(ClassID);
1657 }
getRegisterBitWidth(bool Vector)1658 unsigned getRegisterBitWidth(bool Vector) const override {
1659 return Impl.getRegisterBitWidth(Vector);
1660 }
getMinVectorRegisterBitWidth()1661 unsigned getMinVectorRegisterBitWidth() override {
1662 return Impl.getMinVectorRegisterBitWidth();
1663 }
shouldMaximizeVectorBandwidth(bool OptSize)1664 bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1665 return Impl.shouldMaximizeVectorBandwidth(OptSize);
1666 }
getMinimumVF(unsigned ElemWidth)1667 unsigned getMinimumVF(unsigned ElemWidth) const override {
1668 return Impl.getMinimumVF(ElemWidth);
1669 }
shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)1670 bool shouldConsiderAddressTypePromotion(
1671 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1672 return Impl.shouldConsiderAddressTypePromotion(
1673 I, AllowPromotionWithoutCommonHeader);
1674 }
getCacheLineSize()1675 unsigned getCacheLineSize() const override {
1676 return Impl.getCacheLineSize();
1677 }
getCacheSize(CacheLevel Level)1678 llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override {
1679 return Impl.getCacheSize(Level);
1680 }
getCacheAssociativity(CacheLevel Level)1681 llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
1682 return Impl.getCacheAssociativity(Level);
1683 }
1684
1685 /// Return the preferred prefetch distance in terms of instructions.
1686 ///
getPrefetchDistance()1687 unsigned getPrefetchDistance() const override {
1688 return Impl.getPrefetchDistance();
1689 }
1690
1691 /// Return the minimum stride necessary to trigger software
1692 /// prefetching.
1693 ///
getMinPrefetchStride()1694 unsigned getMinPrefetchStride() const override {
1695 return Impl.getMinPrefetchStride();
1696 }
1697
1698 /// Return the maximum prefetch distance in terms of loop
1699 /// iterations.
1700 ///
getMaxPrefetchIterationsAhead()1701 unsigned getMaxPrefetchIterationsAhead() const override {
1702 return Impl.getMaxPrefetchIterationsAhead();
1703 }
1704
getMaxInterleaveFactor(unsigned VF)1705 unsigned getMaxInterleaveFactor(unsigned VF) override {
1706 return Impl.getMaxInterleaveFactor(VF);
1707 }
getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)1708 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1709 unsigned &JTSize,
1710 ProfileSummaryInfo *PSI,
1711 BlockFrequencyInfo *BFI) override {
1712 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
1713 }
1714 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
1715 OperandValueKind Opd1Info,
1716 OperandValueKind Opd2Info,
1717 OperandValueProperties Opd1PropInfo,
1718 OperandValueProperties Opd2PropInfo,
1719 ArrayRef<const Value *> Args,
1720 const Instruction *CxtI = nullptr) override {
1721 return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1722 Opd1PropInfo, Opd2PropInfo, Args, CxtI);
1723 }
getShuffleCost(ShuffleKind Kind,Type * Tp,int Index,Type * SubTp)1724 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1725 Type *SubTp) override {
1726 return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1727 }
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,const Instruction * I)1728 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1729 const Instruction *I) override {
1730 return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1731 }
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)1732 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1733 unsigned Index) override {
1734 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1735 }
getCFInstrCost(unsigned Opcode)1736 int getCFInstrCost(unsigned Opcode) override {
1737 return Impl.getCFInstrCost(Opcode);
1738 }
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,const Instruction * I)1739 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1740 const Instruction *I) override {
1741 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1742 }
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)1743 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1744 return Impl.getVectorInstrCost(Opcode, Val, Index);
1745 }
getMemoryOpCost(unsigned Opcode,Type * Src,MaybeAlign Alignment,unsigned AddressSpace,const Instruction * I)1746 int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1747 unsigned AddressSpace, const Instruction *I) override {
1748 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1749 }
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)1750 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1751 unsigned AddressSpace) override {
1752 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1753 }
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)1754 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1755 Value *Ptr, bool VariableMask,
1756 unsigned Alignment) override {
1757 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1758 Alignment);
1759 }
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace,bool UseMaskForCond,bool UseMaskForGaps)1760 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1761 ArrayRef<unsigned> Indices, unsigned Alignment,
1762 unsigned AddressSpace, bool UseMaskForCond,
1763 bool UseMaskForGaps) override {
1764 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1765 Alignment, AddressSpace,
1766 UseMaskForCond, UseMaskForGaps);
1767 }
getArithmeticReductionCost(unsigned Opcode,Type * Ty,bool IsPairwiseForm)1768 int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1769 bool IsPairwiseForm) override {
1770 return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1771 }
getMinMaxReductionCost(Type * Ty,Type * CondTy,bool IsPairwiseForm,bool IsUnsigned)1772 int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1773 bool IsPairwiseForm, bool IsUnsigned) override {
1774 return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1775 }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF,unsigned ScalarizationCostPassed)1776 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1777 FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1778 return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1779 ScalarizationCostPassed);
1780 }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF,unsigned VF)1781 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1782 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1783 return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1784 }
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)1785 int getCallInstrCost(Function *F, Type *RetTy,
1786 ArrayRef<Type *> Tys) override {
1787 return Impl.getCallInstrCost(F, RetTy, Tys);
1788 }
getNumberOfParts(Type * Tp)1789 unsigned getNumberOfParts(Type *Tp) override {
1790 return Impl.getNumberOfParts(Tp);
1791 }
getAddressComputationCost(Type * Ty,ScalarEvolution * SE,const SCEV * Ptr)1792 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1793 const SCEV *Ptr) override {
1794 return Impl.getAddressComputationCost(Ty, SE, Ptr);
1795 }
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)1796 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1797 return Impl.getCostOfKeepingLiveOverCall(Tys);
1798 }
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)1799 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1800 MemIntrinsicInfo &Info) override {
1801 return Impl.getTgtMemIntrinsic(Inst, Info);
1802 }
getAtomicMemIntrinsicMaxElementSize()1803 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1804 return Impl.getAtomicMemIntrinsicMaxElementSize();
1805 }
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)1806 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1807 Type *ExpectedType) override {
1808 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1809 }
getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAlign,unsigned DestAlign)1810 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1811 unsigned SrcAlign,
1812 unsigned DestAlign) const override {
1813 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1814 }
getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAlign,unsigned DestAlign)1815 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1816 LLVMContext &Context,
1817 unsigned RemainingBytes,
1818 unsigned SrcAlign,
1819 unsigned DestAlign) const override {
1820 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1821 SrcAlign, DestAlign);
1822 }
areInlineCompatible(const Function * Caller,const Function * Callee)1823 bool areInlineCompatible(const Function *Caller,
1824 const Function *Callee) const override {
1825 return Impl.areInlineCompatible(Caller, Callee);
1826 }
areFunctionArgsABICompatible(const Function * Caller,const Function * Callee,SmallPtrSetImpl<Argument * > & Args)1827 bool areFunctionArgsABICompatible(
1828 const Function *Caller, const Function *Callee,
1829 SmallPtrSetImpl<Argument *> &Args) const override {
1830 return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1831 }
isIndexedLoadLegal(MemIndexedMode Mode,Type * Ty)1832 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1833 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1834 }
isIndexedStoreLegal(MemIndexedMode Mode,Type * Ty)1835 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1836 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1837 }
getLoadStoreVecRegBitWidth(unsigned AddrSpace)1838 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1839 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1840 }
isLegalToVectorizeLoad(LoadInst * LI)1841 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1842 return Impl.isLegalToVectorizeLoad(LI);
1843 }
isLegalToVectorizeStore(StoreInst * SI)1844 bool isLegalToVectorizeStore(StoreInst *SI) const override {
1845 return Impl.isLegalToVectorizeStore(SI);
1846 }
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)1847 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1848 unsigned Alignment,
1849 unsigned AddrSpace) const override {
1850 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1851 AddrSpace);
1852 }
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)1853 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1854 unsigned Alignment,
1855 unsigned AddrSpace) const override {
1856 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1857 AddrSpace);
1858 }
getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)1859 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1860 unsigned ChainSizeInBytes,
1861 VectorType *VecTy) const override {
1862 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1863 }
getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)1864 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1865 unsigned ChainSizeInBytes,
1866 VectorType *VecTy) const override {
1867 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1868 }
useReductionIntrinsic(unsigned Opcode,Type * Ty,ReductionFlags Flags)1869 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1870 ReductionFlags Flags) const override {
1871 return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1872 }
shouldExpandReduction(const IntrinsicInst * II)1873 bool shouldExpandReduction(const IntrinsicInst *II) const override {
1874 return Impl.shouldExpandReduction(II);
1875 }
1876
getGISelRematGlobalCost()1877 unsigned getGISelRematGlobalCost() const override {
1878 return Impl.getGISelRematGlobalCost();
1879 }
1880
getInstructionLatency(const Instruction * I)1881 int getInstructionLatency(const Instruction *I) override {
1882 return Impl.getInstructionLatency(I);
1883 }
1884 };
1885
1886 template <typename T>
TargetTransformInfo(T Impl)1887 TargetTransformInfo::TargetTransformInfo(T Impl)
1888 : TTIImpl(new Model<T>(Impl)) {}
1889
1890 /// Analysis pass providing the \c TargetTransformInfo.
1891 ///
1892 /// The core idea of the TargetIRAnalysis is to expose an interface through
1893 /// which LLVM targets can analyze and provide information about the middle
1894 /// end's target-independent IR. This supports use cases such as target-aware
1895 /// cost modeling of IR constructs.
1896 ///
1897 /// This is a function analysis because much of the cost modeling for targets
1898 /// is done in a subtarget specific way and LLVM supports compiling different
1899 /// functions targeting different subtargets in order to support runtime
1900 /// dispatch according to the observed subtarget.
1901 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1902 public:
1903 typedef TargetTransformInfo Result;
1904
1905 /// Default construct a target IR analysis.
1906 ///
1907 /// This will use the module's datalayout to construct a baseline
1908 /// conservative TTI result.
1909 TargetIRAnalysis();
1910
1911 /// Construct an IR analysis pass around a target-provide callback.
1912 ///
1913 /// The callback will be called with a particular function for which the TTI
1914 /// is needed and must return a TTI object for that function.
1915 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1916
1917 // Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis & Arg)1918 TargetIRAnalysis(const TargetIRAnalysis &Arg)
1919 : TTICallback(Arg.TTICallback) {}
TargetIRAnalysis(TargetIRAnalysis && Arg)1920 TargetIRAnalysis(TargetIRAnalysis &&Arg)
1921 : TTICallback(std::move(Arg.TTICallback)) {}
1922 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1923 TTICallback = RHS.TTICallback;
1924 return *this;
1925 }
1926 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1927 TTICallback = std::move(RHS.TTICallback);
1928 return *this;
1929 }
1930
1931 Result run(const Function &F, FunctionAnalysisManager &);
1932
1933 private:
1934 friend AnalysisInfoMixin<TargetIRAnalysis>;
1935 static AnalysisKey Key;
1936
1937 /// The callback used to produce a result.
1938 ///
1939 /// We use a completely opaque callback so that targets can provide whatever
1940 /// mechanism they desire for constructing the TTI for a given function.
1941 ///
1942 /// FIXME: Should we really use std::function? It's relatively inefficient.
1943 /// It might be possible to arrange for even stateful callbacks to outlive
1944 /// the analysis and thus use a function_ref which would be lighter weight.
1945 /// This may also be less error prone as the callback is likely to reference
1946 /// the external TargetMachine, and that reference needs to never dangle.
1947 std::function<Result(const Function &)> TTICallback;
1948
1949 /// Helper function used as the callback in the default constructor.
1950 static Result getDefaultTTI(const Function &F);
1951 };
1952
1953 /// Wrapper pass for TargetTransformInfo.
1954 ///
1955 /// This pass can be constructed from a TTI object which it stores internally
1956 /// and is queried by passes.
1957 class TargetTransformInfoWrapperPass : public ImmutablePass {
1958 TargetIRAnalysis TIRA;
1959 Optional<TargetTransformInfo> TTI;
1960
1961 virtual void anchor();
1962
1963 public:
1964 static char ID;
1965
1966 /// We must provide a default constructor for the pass but it should
1967 /// never be used.
1968 ///
1969 /// Use the constructor below or call one of the creation routines.
1970 TargetTransformInfoWrapperPass();
1971
1972 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1973
1974 TargetTransformInfo &getTTI(const Function &F);
1975 };
1976
1977 /// Create an analysis pass wrapper around a TTI object.
1978 ///
1979 /// This analysis pass just holds the TTI instance and makes it available to
1980 /// clients.
1981 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1982
1983 } // End llvm namespace
1984
1985 #endif
1986