1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This pass exposes codegen information to IR-level passes. Every
11 /// transformation that uses codegen information is broken into three parts:
12 /// 1. The IR-level analysis pass.
13 /// 2. The IR-level transformation interface which provides the needed
14 /// information.
15 /// 3. Codegen-level implementation which uses target-specific hooks.
16 ///
17 /// This file defines #2, which is the interface that IR-level transformations
18 /// use for querying the codegen.
19 ///
20 //===----------------------------------------------------------------------===//
21
22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/Intrinsics.h"
28 #include "llvm/IR/Operator.h"
29 #include "llvm/IR/PassManager.h"
30 #include "llvm/Pass.h"
31 #include "llvm/Support/DataTypes.h"
32 #include <functional>
33
34 namespace llvm {
35
36 class Function;
37 class GlobalValue;
38 class Loop;
39 class Type;
40 class User;
41 class Value;
42
43 /// \brief Information about a load/store intrinsic defined by the target.
44 struct MemIntrinsicInfo {
MemIntrinsicInfoMemIntrinsicInfo45 MemIntrinsicInfo()
46 : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
47 NumMemRefs(0), PtrVal(nullptr) {}
48 bool ReadMem;
49 bool WriteMem;
50 /// True only if this memory operation is non-volatile, non-atomic, and
51 /// unordered. (See LoadInst/StoreInst for details on each)
52 bool IsSimple;
53 // Same Id is set by the target for corresponding load/store intrinsics.
54 unsigned short MatchingId;
55 int NumMemRefs;
56 Value *PtrVal;
57 };
58
59 /// \brief This pass provides access to the codegen interfaces that are needed
60 /// for IR-level transformations.
61 class TargetTransformInfo {
62 public:
63 /// \brief Construct a TTI object using a type implementing the \c Concept
64 /// API below.
65 ///
66 /// This is used by targets to construct a TTI wrapping their target-specific
67 /// implementaion that encodes appropriate costs for their target.
68 template <typename T> TargetTransformInfo(T Impl);
69
70 /// \brief Construct a baseline TTI object using a minimal implementation of
71 /// the \c Concept API below.
72 ///
73 /// The TTI implementation will reflect the information in the DataLayout
74 /// provided if non-null.
75 explicit TargetTransformInfo(const DataLayout &DL);
76
77 // Provide move semantics.
78 TargetTransformInfo(TargetTransformInfo &&Arg);
79 TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
80
81 // We need to define the destructor out-of-line to define our sub-classes
82 // out-of-line.
83 ~TargetTransformInfo();
84
85 /// \brief Handle the invalidation of this information.
86 ///
87 /// When used as a result of \c TargetIRAnalysis this method will be called
88 /// when the function this was computed for changes. When it returns false,
89 /// the information is preserved across those changes.
invalidate(Function &,const PreservedAnalyses &)90 bool invalidate(Function &, const PreservedAnalyses &) {
91 // FIXME: We should probably in some way ensure that the subtarget
92 // information for a function hasn't changed.
93 return false;
94 }
95
96 /// \name Generic Target Information
97 /// @{
98
99 /// \brief Underlying constants for 'cost' values in this interface.
100 ///
101 /// Many APIs in this interface return a cost. This enum defines the
102 /// fundamental values that should be used to interpret (and produce) those
103 /// costs. The costs are returned as an int rather than a member of this
104 /// enumeration because it is expected that the cost of one IR instruction
105 /// may have a multiplicative factor to it or otherwise won't fit directly
106 /// into the enum. Moreover, it is common to sum or average costs which works
107 /// better as simple integral values. Thus this enum only provides constants.
108 /// Also note that the returned costs are signed integers to make it natural
109 /// to add, subtract, and test with zero (a common boundary condition). It is
110 /// not expected that 2^32 is a realistic cost to be modeling at any point.
111 ///
112 /// Note that these costs should usually reflect the intersection of code-size
113 /// cost and execution cost. A free instruction is typically one that folds
114 /// into another instruction. For example, reg-to-reg moves can often be
115 /// skipped by renaming the registers in the CPU, but they still are encoded
116 /// and thus wouldn't be considered 'free' here.
117 enum TargetCostConstants {
118 TCC_Free = 0, ///< Expected to fold away in lowering.
119 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
120 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
121 };
122
123 /// \brief Estimate the cost of a specific operation when lowered.
124 ///
125 /// Note that this is designed to work on an arbitrary synthetic opcode, and
126 /// thus work for hypothetical queries before an instruction has even been
127 /// formed. However, this does *not* work for GEPs, and must not be called
128 /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
129 /// analyzing a GEP's cost required more information.
130 ///
131 /// Typically only the result type is required, and the operand type can be
132 /// omitted. However, if the opcode is one of the cast instructions, the
133 /// operand type is required.
134 ///
135 /// The returned cost is defined in terms of \c TargetCostConstants, see its
136 /// comments for a detailed explanation of the cost values.
137 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
138
139 /// \brief Estimate the cost of a GEP operation when lowered.
140 ///
141 /// The contract for this function is the same as \c getOperationCost except
142 /// that it supports an interface that provides extra information specific to
143 /// the GEP operation.
144 int getGEPCost(Type *PointeeType, const Value *Ptr,
145 ArrayRef<const Value *> Operands) const;
146
147 /// \brief Estimate the cost of a function call when lowered.
148 ///
149 /// The contract for this is the same as \c getOperationCost except that it
150 /// supports an interface that provides extra information specific to call
151 /// instructions.
152 ///
153 /// This is the most basic query for estimating call cost: it only knows the
154 /// function type and (potentially) the number of arguments at the call site.
155 /// The latter is only interesting for varargs function types.
156 int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
157
158 /// \brief Estimate the cost of calling a specific function when lowered.
159 ///
160 /// This overload adds the ability to reason about the particular function
161 /// being called in the event it is a library call with special lowering.
162 int getCallCost(const Function *F, int NumArgs = -1) const;
163
164 /// \brief Estimate the cost of calling a specific function when lowered.
165 ///
166 /// This overload allows specifying a set of candidate argument values.
167 int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
168
169 /// \returns A value by which our inlining threshold should be multiplied.
170 /// This is primarily used to bump up the inlining threshold wholesale on
171 /// targets where calls are unusually expensive.
172 ///
173 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
174 /// individual classes of instructions would be better.
175 unsigned getInliningThresholdMultiplier() const;
176
177 /// \brief Estimate the cost of an intrinsic when lowered.
178 ///
179 /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
180 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
181 ArrayRef<Type *> ParamTys) const;
182
183 /// \brief Estimate the cost of an intrinsic when lowered.
184 ///
185 /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
186 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
187 ArrayRef<const Value *> Arguments) const;
188
189 /// \brief Estimate the cost of a given IR user when lowered.
190 ///
191 /// This can estimate the cost of either a ConstantExpr or Instruction when
192 /// lowered. It has two primary advantages over the \c getOperationCost and
193 /// \c getGEPCost above, and one significant disadvantage: it can only be
194 /// used when the IR construct has already been formed.
195 ///
196 /// The advantages are that it can inspect the SSA use graph to reason more
197 /// accurately about the cost. For example, all-constant-GEPs can often be
198 /// folded into a load or other instruction, but if they are used in some
199 /// other context they may not be folded. This routine can distinguish such
200 /// cases.
201 ///
202 /// The returned cost is defined in terms of \c TargetCostConstants, see its
203 /// comments for a detailed explanation of the cost values.
204 int getUserCost(const User *U) const;
205
206 /// \brief Return true if branch divergence exists.
207 ///
208 /// Branch divergence has a significantly negative impact on GPU performance
209 /// when threads in the same wavefront take different paths due to conditional
210 /// branches.
211 bool hasBranchDivergence() const;
212
213 /// \brief Returns whether V is a source of divergence.
214 ///
215 /// This function provides the target-dependent information for
216 /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
217 /// builds the dependency graph, and then runs the reachability algorithm
218 /// starting with the sources of divergence.
219 bool isSourceOfDivergence(const Value *V) const;
220
221 /// \brief Test whether calls to a function lower to actual program function
222 /// calls.
223 ///
224 /// The idea is to test whether the program is likely to require a 'call'
225 /// instruction or equivalent in order to call the given function.
226 ///
227 /// FIXME: It's not clear that this is a good or useful query API. Client's
228 /// should probably move to simpler cost metrics using the above.
229 /// Alternatively, we could split the cost interface into distinct code-size
230 /// and execution-speed costs. This would allow modelling the core of this
231 /// query more accurately as a call is a single small instruction, but
232 /// incurs significant execution cost.
233 bool isLoweredToCall(const Function *F) const;
234
235 /// Parameters that control the generic loop unrolling transformation.
236 struct UnrollingPreferences {
237 /// The cost threshold for the unrolled loop. Should be relative to the
238 /// getUserCost values returned by this API, and the expectation is that
239 /// the unrolled loop's instructions when run through that interface should
240 /// not exceed this cost. However, this is only an estimate. Also, specific
241 /// loops may be unrolled even with a cost above this threshold if deemed
242 /// profitable. Set this to UINT_MAX to disable the loop body cost
243 /// restriction.
244 unsigned Threshold;
245 /// If complete unrolling will reduce the cost of the loop below its
246 /// expected dynamic cost while rolled by this percentage, apply a discount
247 /// (below) to its unrolled cost.
248 unsigned PercentDynamicCostSavedThreshold;
249 /// The discount applied to the unrolled cost when the *dynamic* cost
250 /// savings of unrolling exceed the \c PercentDynamicCostSavedThreshold.
251 unsigned DynamicCostSavingsDiscount;
252 /// The cost threshold for the unrolled loop when optimizing for size (set
253 /// to UINT_MAX to disable).
254 unsigned OptSizeThreshold;
255 /// The cost threshold for the unrolled loop, like Threshold, but used
256 /// for partial/runtime unrolling (set to UINT_MAX to disable).
257 unsigned PartialThreshold;
258 /// The cost threshold for the unrolled loop when optimizing for size, like
259 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
260 /// UINT_MAX to disable).
261 unsigned PartialOptSizeThreshold;
262 /// A forced unrolling factor (the number of concatenated bodies of the
263 /// original loop in the unrolled loop body). When set to 0, the unrolling
264 /// transformation will select an unrolling factor based on the current cost
265 /// threshold and other factors.
266 unsigned Count;
267 // Set the maximum unrolling factor. The unrolling factor may be selected
268 // using the appropriate cost threshold, but may not exceed this number
269 // (set to UINT_MAX to disable). This does not apply in cases where the
270 // loop is being fully unrolled.
271 unsigned MaxCount;
272 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
273 /// applies even if full unrolling is selected. This allows a target to fall
274 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
275 unsigned FullUnrollMaxCount;
276 /// Allow partial unrolling (unrolling of loops to expand the size of the
277 /// loop body, not only to eliminate small constant-trip-count loops).
278 bool Partial;
279 /// Allow runtime unrolling (unrolling of loops to expand the size of the
280 /// loop body even when the number of loop iterations is not known at
281 /// compile time).
282 bool Runtime;
283 /// Allow generation of a loop remainder (extra iterations after unroll).
284 bool AllowRemainder;
285 /// Allow emitting expensive instructions (such as divisions) when computing
286 /// the trip count of a loop for runtime unrolling.
287 bool AllowExpensiveTripCount;
288 /// Apply loop unroll on any kind of loop
289 /// (mainly to loops that fail runtime unrolling).
290 bool Force;
291 };
292
293 /// \brief Get target-customized preferences for the generic loop unrolling
294 /// transformation. The caller will initialize UP with the current
295 /// target-independent defaults.
296 void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
297
298 /// @}
299
300 /// \name Scalar Target Information
301 /// @{
302
303 /// \brief Flags indicating the kind of support for population count.
304 ///
305 /// Compared to the SW implementation, HW support is supposed to
306 /// significantly boost the performance when the population is dense, and it
307 /// may or may not degrade performance if the population is sparse. A HW
308 /// support is considered as "Fast" if it can outperform, or is on a par
309 /// with, SW implementation when the population is sparse; otherwise, it is
310 /// considered as "Slow".
311 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
312
313 /// \brief Return true if the specified immediate is legal add immediate, that
314 /// is the target has add instructions which can add a register with the
315 /// immediate without having to materialize the immediate into a register.
316 bool isLegalAddImmediate(int64_t Imm) const;
317
318 /// \brief Return true if the specified immediate is legal icmp immediate,
319 /// that is the target has icmp instructions which can compare a register
320 /// against the immediate without having to materialize the immediate into a
321 /// register.
322 bool isLegalICmpImmediate(int64_t Imm) const;
323
324 /// \brief Return true if the addressing mode represented by AM is legal for
325 /// this target, for a load/store of the specified type.
326 /// The type may be VoidTy, in which case only return true if the addressing
327 /// mode is legal for a load/store of any legal type.
328 /// TODO: Handle pre/postinc as well.
329 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
330 bool HasBaseReg, int64_t Scale,
331 unsigned AddrSpace = 0) const;
332
333 /// \brief Return true if the target supports masked load/store
334 /// AVX2 and AVX-512 targets allow masks for consecutive load and store
335 bool isLegalMaskedStore(Type *DataType) const;
336 bool isLegalMaskedLoad(Type *DataType) const;
337
338 /// \brief Return true if the target supports masked gather/scatter
339 /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
340 /// bits scalar type.
341 bool isLegalMaskedScatter(Type *DataType) const;
342 bool isLegalMaskedGather(Type *DataType) const;
343
344 /// \brief Return the cost of the scaling factor used in the addressing
345 /// mode represented by AM for this target, for a load/store
346 /// of the specified type.
347 /// If the AM is supported, the return value must be >= 0.
348 /// If the AM is not supported, it returns a negative value.
349 /// TODO: Handle pre/postinc as well.
350 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
351 bool HasBaseReg, int64_t Scale,
352 unsigned AddrSpace = 0) const;
353
354 /// \brief Return true if it's free to truncate a value of type Ty1 to type
355 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
356 /// by referencing its sub-register AX.
357 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
358
359 /// \brief Return true if it is profitable to hoist instruction in the
360 /// then/else to before if.
361 bool isProfitableToHoist(Instruction *I) const;
362
363 /// \brief Return true if this type is legal.
364 bool isTypeLegal(Type *Ty) const;
365
366 /// \brief Returns the target's jmp_buf alignment in bytes.
367 unsigned getJumpBufAlignment() const;
368
369 /// \brief Returns the target's jmp_buf size in bytes.
370 unsigned getJumpBufSize() const;
371
372 /// \brief Return true if switches should be turned into lookup tables for the
373 /// target.
374 bool shouldBuildLookupTables() const;
375
376 /// \brief Don't restrict interleaved unrolling to small loops.
377 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
378
379 /// \brief Enable matching of interleaved access groups.
380 bool enableInterleavedAccessVectorization() const;
381
382 /// \brief Indicate that it is potentially unsafe to automatically vectorize
383 /// floating-point operations because the semantics of vector and scalar
384 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
385 /// does not support IEEE-754 denormal numbers, while depending on the
386 /// platform, scalar floating-point math does.
387 /// This applies to floating-point math operations and calls, not memory
388 /// operations, shuffles, or casts.
389 bool isFPVectorizationPotentiallyUnsafe() const;
390
391 /// \brief Determine if the target supports unaligned memory accesses.
392 bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
393 unsigned Alignment = 1,
394 bool *Fast = nullptr) const;
395
396 /// \brief Return hardware support for population count.
397 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
398
399 /// \brief Return true if the hardware has a fast square-root instruction.
400 bool haveFastSqrt(Type *Ty) const;
401
402 /// \brief Return the expected cost of supporting the floating point operation
403 /// of the specified type.
404 int getFPOpCost(Type *Ty) const;
405
406 /// \brief Return the expected cost of materializing for the given integer
407 /// immediate of the specified type.
408 int getIntImmCost(const APInt &Imm, Type *Ty) const;
409
410 /// \brief Return the expected cost of materialization for the given integer
411 /// immediate of the specified type for a given instruction. The cost can be
412 /// zero if the immediate can be folded into the specified instruction.
413 int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
414 Type *Ty) const;
415 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
416 Type *Ty) const;
417
418 /// \brief Return the expected cost for the given integer when optimising
419 /// for size. This is different than the other integer immediate cost
420 /// functions in that it is subtarget agnostic. This is useful when you e.g.
421 /// target one ISA such as Aarch32 but smaller encodings could be possible
422 /// with another such as Thumb. This return value is used as a penalty when
423 /// the total costs for a constant is calculated (the bigger the cost, the
424 /// more beneficial constant hoisting is).
425 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
426 Type *Ty) const;
427 /// @}
428
429 /// \name Vector Target Information
430 /// @{
431
432 /// \brief The various kinds of shuffle patterns for vector queries.
433 enum ShuffleKind {
434 SK_Broadcast, ///< Broadcast element 0 to all other elements.
435 SK_Reverse, ///< Reverse the order of the vector.
436 SK_Alternate, ///< Choose alternate elements from vector.
437 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
438 SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
439 };
440
441 /// \brief Additional information about an operand's possible values.
442 enum OperandValueKind {
443 OK_AnyValue, // Operand can have any value.
444 OK_UniformValue, // Operand is uniform (splat of a value).
445 OK_UniformConstantValue, // Operand is uniform constant.
446 OK_NonUniformConstantValue // Operand is a non uniform constant value.
447 };
448
449 /// \brief Additional properties of an operand's values.
450 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
451
452 /// \return The number of scalar or vector registers that the target has.
453 /// If 'Vectors' is true, it returns the number of vector registers. If it is
454 /// set to false, it returns the number of scalar registers.
455 unsigned getNumberOfRegisters(bool Vector) const;
456
457 /// \return The width of the largest scalar or vector register type.
458 unsigned getRegisterBitWidth(bool Vector) const;
459
460 /// \return The bitwidth of the largest vector type that should be used to
461 /// load/store in the given address space.
462 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
463
464 /// \return The size of a cache line in bytes.
465 unsigned getCacheLineSize() const;
466
467 /// \return How much before a load we should place the prefetch instruction.
468 /// This is currently measured in number of instructions.
469 unsigned getPrefetchDistance() const;
470
471 /// \return Some HW prefetchers can handle accesses up to a certain constant
472 /// stride. This is the minimum stride in bytes where it makes sense to start
473 /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
474 unsigned getMinPrefetchStride() const;
475
476 /// \return The maximum number of iterations to prefetch ahead. If the
477 /// required number of iterations is more than this number, no prefetching is
478 /// performed.
479 unsigned getMaxPrefetchIterationsAhead() const;
480
481 /// \return The maximum interleave factor that any transform should try to
482 /// perform for this target. This number depends on the level of parallelism
483 /// and the number of execution units in the CPU.
484 unsigned getMaxInterleaveFactor(unsigned VF) const;
485
486 /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
487 int getArithmeticInstrCost(
488 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
489 OperandValueKind Opd2Info = OK_AnyValue,
490 OperandValueProperties Opd1PropInfo = OP_None,
491 OperandValueProperties Opd2PropInfo = OP_None) const;
492
493 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
494 /// The index and subtype parameters are used by the subvector insertion and
495 /// extraction shuffle kinds.
496 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
497 Type *SubTp = nullptr) const;
498
499 /// \return The expected cost of cast instructions, such as bitcast, trunc,
500 /// zext, etc.
501 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
502
503 /// \return The expected cost of a sign- or zero-extended vector extract. Use
504 /// -1 to indicate that there is no information about the index value.
505 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
506 unsigned Index = -1) const;
507
508 /// \return The expected cost of control-flow related instructions such as
509 /// Phi, Ret, Br.
510 int getCFInstrCost(unsigned Opcode) const;
511
512 /// \returns The expected cost of compare and select instructions.
513 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
514 Type *CondTy = nullptr) const;
515
516 /// \return The expected cost of vector Insert and Extract.
517 /// Use -1 to indicate that there is no information on the index value.
518 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
519
520 /// \return The cost of Load and Store instructions.
521 int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
522 unsigned AddressSpace) const;
523
524 /// \return The cost of masked Load and Store instructions.
525 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
526 unsigned AddressSpace) const;
527
528 /// \return The cost of Gather or Scatter operation
529 /// \p Opcode - is a type of memory access Load or Store
530 /// \p DataTy - a vector type of the data to be loaded or stored
531 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
532 /// \p VariableMask - true when the memory access is predicated with a mask
533 /// that is not a compile-time constant
534 /// \p Alignment - alignment of single element
535 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
536 bool VariableMask, unsigned Alignment) const;
537
538 /// \return The cost of the interleaved memory operation.
539 /// \p Opcode is the memory operation code
540 /// \p VecTy is the vector type of the interleaved access.
541 /// \p Factor is the interleave factor
542 /// \p Indices is the indices for interleaved load members (as interleaved
543 /// load allows gaps)
544 /// \p Alignment is the alignment of the memory operation
545 /// \p AddressSpace is address space of the pointer.
546 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
547 ArrayRef<unsigned> Indices, unsigned Alignment,
548 unsigned AddressSpace) const;
549
550 /// \brief Calculate the cost of performing a vector reduction.
551 ///
552 /// This is the cost of reducing the vector value of type \p Ty to a scalar
553 /// value using the operation denoted by \p Opcode. The form of the reduction
554 /// can either be a pairwise reduction or a reduction that splits the vector
555 /// at every reduction level.
556 ///
557 /// Pairwise:
558 /// (v0, v1, v2, v3)
559 /// ((v0+v1), (v2, v3), undef, undef)
560 /// Split:
561 /// (v0, v1, v2, v3)
562 /// ((v0+v2), (v1+v3), undef, undef)
563 int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
564
565 /// \returns The cost of Intrinsic instructions. Types analysis only.
566 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
567 ArrayRef<Type *> Tys, FastMathFlags FMF) const;
568
569 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
570 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
571 ArrayRef<Value *> Args, FastMathFlags FMF) const;
572
573 /// \returns The cost of Call instructions.
574 int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
575
576 /// \returns The number of pieces into which the provided type must be
577 /// split during legalization. Zero is returned when the answer is unknown.
578 unsigned getNumberOfParts(Type *Tp) const;
579
580 /// \returns The cost of the address computation. For most targets this can be
581 /// merged into the instruction indexing mode. Some targets might want to
582 /// distinguish between address computation for memory operations on vector
583 /// types and scalar types. Such targets should override this function.
584 /// The 'IsComplex' parameter is a hint that the address computation is likely
585 /// to involve multiple instructions and as such unlikely to be merged into
586 /// the address indexing mode.
587 int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
588
589 /// \returns The cost, if any, of keeping values of the given types alive
590 /// over a callsite.
591 ///
592 /// Some types may require the use of register classes that do not have
593 /// any callee-saved registers, so would require a spill and fill.
594 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
595
596 /// \returns True if the intrinsic is a supported memory intrinsic. Info
597 /// will contain additional information - whether the intrinsic may write
598 /// or read to memory, volatility and the pointer. Info is undefined
599 /// if false is returned.
600 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
601
602 /// \returns A value which is the result of the given memory intrinsic. New
603 /// instructions may be created to extract the result from the given intrinsic
604 /// memory operation. Returns nullptr if the target cannot create a result
605 /// from the given intrinsic.
606 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
607 Type *ExpectedType) const;
608
609 /// \returns True if the two functions have compatible attributes for inlining
610 /// purposes.
611 bool areInlineCompatible(const Function *Caller,
612 const Function *Callee) const;
613
614 /// @}
615
616 private:
617 /// \brief The abstract base class used to type erase specific TTI
618 /// implementations.
619 class Concept;
620
621 /// \brief The template model for the base class which wraps a concrete
622 /// implementation in a type erased interface.
623 template <typename T> class Model;
624
625 std::unique_ptr<Concept> TTIImpl;
626 };
627
628 class TargetTransformInfo::Concept {
629 public:
630 virtual ~Concept() = 0;
631 virtual const DataLayout &getDataLayout() const = 0;
632 virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
633 virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
634 ArrayRef<const Value *> Operands) = 0;
635 virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
636 virtual int getCallCost(const Function *F, int NumArgs) = 0;
637 virtual int getCallCost(const Function *F,
638 ArrayRef<const Value *> Arguments) = 0;
639 virtual unsigned getInliningThresholdMultiplier() = 0;
640 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
641 ArrayRef<Type *> ParamTys) = 0;
642 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
643 ArrayRef<const Value *> Arguments) = 0;
644 virtual int getUserCost(const User *U) = 0;
645 virtual bool hasBranchDivergence() = 0;
646 virtual bool isSourceOfDivergence(const Value *V) = 0;
647 virtual bool isLoweredToCall(const Function *F) = 0;
648 virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
649 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
650 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
651 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
652 int64_t BaseOffset, bool HasBaseReg,
653 int64_t Scale,
654 unsigned AddrSpace) = 0;
655 virtual bool isLegalMaskedStore(Type *DataType) = 0;
656 virtual bool isLegalMaskedLoad(Type *DataType) = 0;
657 virtual bool isLegalMaskedScatter(Type *DataType) = 0;
658 virtual bool isLegalMaskedGather(Type *DataType) = 0;
659 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
660 int64_t BaseOffset, bool HasBaseReg,
661 int64_t Scale, unsigned AddrSpace) = 0;
662 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
663 virtual bool isProfitableToHoist(Instruction *I) = 0;
664 virtual bool isTypeLegal(Type *Ty) = 0;
665 virtual unsigned getJumpBufAlignment() = 0;
666 virtual unsigned getJumpBufSize() = 0;
667 virtual bool shouldBuildLookupTables() = 0;
668 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
669 virtual bool enableInterleavedAccessVectorization() = 0;
670 virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
671 virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
672 unsigned AddressSpace,
673 unsigned Alignment,
674 bool *Fast) = 0;
675 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
676 virtual bool haveFastSqrt(Type *Ty) = 0;
677 virtual int getFPOpCost(Type *Ty) = 0;
678 virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
679 Type *Ty) = 0;
680 virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
681 virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
682 Type *Ty) = 0;
683 virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
684 Type *Ty) = 0;
685 virtual unsigned getNumberOfRegisters(bool Vector) = 0;
686 virtual unsigned getRegisterBitWidth(bool Vector) = 0;
687 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0;
688 virtual unsigned getCacheLineSize() = 0;
689 virtual unsigned getPrefetchDistance() = 0;
690 virtual unsigned getMinPrefetchStride() = 0;
691 virtual unsigned getMaxPrefetchIterationsAhead() = 0;
692 virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
693 virtual unsigned
694 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
695 OperandValueKind Opd2Info,
696 OperandValueProperties Opd1PropInfo,
697 OperandValueProperties Opd2PropInfo) = 0;
698 virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
699 Type *SubTp) = 0;
700 virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
701 virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
702 VectorType *VecTy, unsigned Index) = 0;
703 virtual int getCFInstrCost(unsigned Opcode) = 0;
704 virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
705 Type *CondTy) = 0;
706 virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
707 unsigned Index) = 0;
708 virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
709 unsigned AddressSpace) = 0;
710 virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
711 unsigned Alignment,
712 unsigned AddressSpace) = 0;
713 virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
714 Value *Ptr, bool VariableMask,
715 unsigned Alignment) = 0;
716 virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
717 unsigned Factor,
718 ArrayRef<unsigned> Indices,
719 unsigned Alignment,
720 unsigned AddressSpace) = 0;
721 virtual int getReductionCost(unsigned Opcode, Type *Ty,
722 bool IsPairwiseForm) = 0;
723 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
724 ArrayRef<Type *> Tys,
725 FastMathFlags FMF) = 0;
726 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
727 ArrayRef<Value *> Args,
728 FastMathFlags FMF) = 0;
729 virtual int getCallInstrCost(Function *F, Type *RetTy,
730 ArrayRef<Type *> Tys) = 0;
731 virtual unsigned getNumberOfParts(Type *Tp) = 0;
732 virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
733 virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
734 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
735 MemIntrinsicInfo &Info) = 0;
736 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
737 Type *ExpectedType) = 0;
738 virtual bool areInlineCompatible(const Function *Caller,
739 const Function *Callee) const = 0;
740 };
741
742 template <typename T>
743 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
744 T Impl;
745
746 public:
Model(T Impl)747 Model(T Impl) : Impl(std::move(Impl)) {}
~Model()748 ~Model() override {}
749
getDataLayout()750 const DataLayout &getDataLayout() const override {
751 return Impl.getDataLayout();
752 }
753
getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)754 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
755 return Impl.getOperationCost(Opcode, Ty, OpTy);
756 }
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)757 int getGEPCost(Type *PointeeType, const Value *Ptr,
758 ArrayRef<const Value *> Operands) override {
759 return Impl.getGEPCost(PointeeType, Ptr, Operands);
760 }
getCallCost(FunctionType * FTy,int NumArgs)761 int getCallCost(FunctionType *FTy, int NumArgs) override {
762 return Impl.getCallCost(FTy, NumArgs);
763 }
getCallCost(const Function * F,int NumArgs)764 int getCallCost(const Function *F, int NumArgs) override {
765 return Impl.getCallCost(F, NumArgs);
766 }
getCallCost(const Function * F,ArrayRef<const Value * > Arguments)767 int getCallCost(const Function *F,
768 ArrayRef<const Value *> Arguments) override {
769 return Impl.getCallCost(F, Arguments);
770 }
getInliningThresholdMultiplier()771 unsigned getInliningThresholdMultiplier() override {
772 return Impl.getInliningThresholdMultiplier();
773 }
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > ParamTys)774 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
775 ArrayRef<Type *> ParamTys) override {
776 return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
777 }
getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<const Value * > Arguments)778 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
779 ArrayRef<const Value *> Arguments) override {
780 return Impl.getIntrinsicCost(IID, RetTy, Arguments);
781 }
getUserCost(const User * U)782 int getUserCost(const User *U) override { return Impl.getUserCost(U); }
hasBranchDivergence()783 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
isSourceOfDivergence(const Value * V)784 bool isSourceOfDivergence(const Value *V) override {
785 return Impl.isSourceOfDivergence(V);
786 }
isLoweredToCall(const Function * F)787 bool isLoweredToCall(const Function *F) override {
788 return Impl.isLoweredToCall(F);
789 }
getUnrollingPreferences(Loop * L,UnrollingPreferences & UP)790 void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
791 return Impl.getUnrollingPreferences(L, UP);
792 }
isLegalAddImmediate(int64_t Imm)793 bool isLegalAddImmediate(int64_t Imm) override {
794 return Impl.isLegalAddImmediate(Imm);
795 }
isLegalICmpImmediate(int64_t Imm)796 bool isLegalICmpImmediate(int64_t Imm) override {
797 return Impl.isLegalICmpImmediate(Imm);
798 }
isLegalAddressingMode(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)799 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
800 bool HasBaseReg, int64_t Scale,
801 unsigned AddrSpace) override {
802 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
803 Scale, AddrSpace);
804 }
isLegalMaskedStore(Type * DataType)805 bool isLegalMaskedStore(Type *DataType) override {
806 return Impl.isLegalMaskedStore(DataType);
807 }
isLegalMaskedLoad(Type * DataType)808 bool isLegalMaskedLoad(Type *DataType) override {
809 return Impl.isLegalMaskedLoad(DataType);
810 }
isLegalMaskedScatter(Type * DataType)811 bool isLegalMaskedScatter(Type *DataType) override {
812 return Impl.isLegalMaskedScatter(DataType);
813 }
isLegalMaskedGather(Type * DataType)814 bool isLegalMaskedGather(Type *DataType) override {
815 return Impl.isLegalMaskedGather(DataType);
816 }
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)817 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
818 bool HasBaseReg, int64_t Scale,
819 unsigned AddrSpace) override {
820 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
821 Scale, AddrSpace);
822 }
isTruncateFree(Type * Ty1,Type * Ty2)823 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
824 return Impl.isTruncateFree(Ty1, Ty2);
825 }
isProfitableToHoist(Instruction * I)826 bool isProfitableToHoist(Instruction *I) override {
827 return Impl.isProfitableToHoist(I);
828 }
isTypeLegal(Type * Ty)829 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
getJumpBufAlignment()830 unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
getJumpBufSize()831 unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
shouldBuildLookupTables()832 bool shouldBuildLookupTables() override {
833 return Impl.shouldBuildLookupTables();
834 }
enableAggressiveInterleaving(bool LoopHasReductions)835 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
836 return Impl.enableAggressiveInterleaving(LoopHasReductions);
837 }
enableInterleavedAccessVectorization()838 bool enableInterleavedAccessVectorization() override {
839 return Impl.enableInterleavedAccessVectorization();
840 }
isFPVectorizationPotentiallyUnsafe()841 bool isFPVectorizationPotentiallyUnsafe() override {
842 return Impl.isFPVectorizationPotentiallyUnsafe();
843 }
allowsMisalignedMemoryAccesses(unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)844 bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
845 unsigned Alignment, bool *Fast) override {
846 return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
847 Alignment, Fast);
848 }
getPopcntSupport(unsigned IntTyWidthInBit)849 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
850 return Impl.getPopcntSupport(IntTyWidthInBit);
851 }
haveFastSqrt(Type * Ty)852 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
853
getFPOpCost(Type * Ty)854 int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
855
getIntImmCodeSizeCost(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)856 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
857 Type *Ty) override {
858 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
859 }
getIntImmCost(const APInt & Imm,Type * Ty)860 int getIntImmCost(const APInt &Imm, Type *Ty) override {
861 return Impl.getIntImmCost(Imm, Ty);
862 }
getIntImmCost(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)863 int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
864 Type *Ty) override {
865 return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
866 }
getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)867 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
868 Type *Ty) override {
869 return Impl.getIntImmCost(IID, Idx, Imm, Ty);
870 }
getNumberOfRegisters(bool Vector)871 unsigned getNumberOfRegisters(bool Vector) override {
872 return Impl.getNumberOfRegisters(Vector);
873 }
getRegisterBitWidth(bool Vector)874 unsigned getRegisterBitWidth(bool Vector) override {
875 return Impl.getRegisterBitWidth(Vector);
876 }
877
getLoadStoreVecRegBitWidth(unsigned AddrSpace)878 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override {
879 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
880 }
881
getCacheLineSize()882 unsigned getCacheLineSize() override {
883 return Impl.getCacheLineSize();
884 }
getPrefetchDistance()885 unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
getMinPrefetchStride()886 unsigned getMinPrefetchStride() override {
887 return Impl.getMinPrefetchStride();
888 }
getMaxPrefetchIterationsAhead()889 unsigned getMaxPrefetchIterationsAhead() override {
890 return Impl.getMaxPrefetchIterationsAhead();
891 }
getMaxInterleaveFactor(unsigned VF)892 unsigned getMaxInterleaveFactor(unsigned VF) override {
893 return Impl.getMaxInterleaveFactor(VF);
894 }
895 unsigned
getArithmeticInstrCost(unsigned Opcode,Type * Ty,OperandValueKind Opd1Info,OperandValueKind Opd2Info,OperandValueProperties Opd1PropInfo,OperandValueProperties Opd2PropInfo)896 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
897 OperandValueKind Opd2Info,
898 OperandValueProperties Opd1PropInfo,
899 OperandValueProperties Opd2PropInfo) override {
900 return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
901 Opd1PropInfo, Opd2PropInfo);
902 }
getShuffleCost(ShuffleKind Kind,Type * Tp,int Index,Type * SubTp)903 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
904 Type *SubTp) override {
905 return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
906 }
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src)907 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
908 return Impl.getCastInstrCost(Opcode, Dst, Src);
909 }
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)910 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
911 unsigned Index) override {
912 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
913 }
getCFInstrCost(unsigned Opcode)914 int getCFInstrCost(unsigned Opcode) override {
915 return Impl.getCFInstrCost(Opcode);
916 }
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy)917 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
918 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
919 }
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)920 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
921 return Impl.getVectorInstrCost(Opcode, Val, Index);
922 }
getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)923 int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
924 unsigned AddressSpace) override {
925 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
926 }
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)927 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
928 unsigned AddressSpace) override {
929 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
930 }
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)931 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
932 Value *Ptr, bool VariableMask,
933 unsigned Alignment) override {
934 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
935 Alignment);
936 }
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,unsigned Alignment,unsigned AddressSpace)937 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
938 ArrayRef<unsigned> Indices, unsigned Alignment,
939 unsigned AddressSpace) override {
940 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
941 Alignment, AddressSpace);
942 }
getReductionCost(unsigned Opcode,Type * Ty,bool IsPairwiseForm)943 int getReductionCost(unsigned Opcode, Type *Ty,
944 bool IsPairwiseForm) override {
945 return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
946 }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF)947 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
948 FastMathFlags FMF) override {
949 return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
950 }
getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF)951 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
952 ArrayRef<Value *> Args,
953 FastMathFlags FMF) override {
954 return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
955 }
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)956 int getCallInstrCost(Function *F, Type *RetTy,
957 ArrayRef<Type *> Tys) override {
958 return Impl.getCallInstrCost(F, RetTy, Tys);
959 }
getNumberOfParts(Type * Tp)960 unsigned getNumberOfParts(Type *Tp) override {
961 return Impl.getNumberOfParts(Tp);
962 }
getAddressComputationCost(Type * Ty,bool IsComplex)963 int getAddressComputationCost(Type *Ty, bool IsComplex) override {
964 return Impl.getAddressComputationCost(Ty, IsComplex);
965 }
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)966 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
967 return Impl.getCostOfKeepingLiveOverCall(Tys);
968 }
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)969 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
970 MemIntrinsicInfo &Info) override {
971 return Impl.getTgtMemIntrinsic(Inst, Info);
972 }
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)973 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
974 Type *ExpectedType) override {
975 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
976 }
areInlineCompatible(const Function * Caller,const Function * Callee)977 bool areInlineCompatible(const Function *Caller,
978 const Function *Callee) const override {
979 return Impl.areInlineCompatible(Caller, Callee);
980 }
981 };
982
983 template <typename T>
TargetTransformInfo(T Impl)984 TargetTransformInfo::TargetTransformInfo(T Impl)
985 : TTIImpl(new Model<T>(Impl)) {}
986
987 /// \brief Analysis pass providing the \c TargetTransformInfo.
988 ///
989 /// The core idea of the TargetIRAnalysis is to expose an interface through
990 /// which LLVM targets can analyze and provide information about the middle
991 /// end's target-independent IR. This supports use cases such as target-aware
992 /// cost modeling of IR constructs.
993 ///
994 /// This is a function analysis because much of the cost modeling for targets
995 /// is done in a subtarget specific way and LLVM supports compiling different
996 /// functions targeting different subtargets in order to support runtime
997 /// dispatch according to the observed subtarget.
998 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
999 public:
1000 typedef TargetTransformInfo Result;
1001
1002 /// \brief Default construct a target IR analysis.
1003 ///
1004 /// This will use the module's datalayout to construct a baseline
1005 /// conservative TTI result.
1006 TargetIRAnalysis();
1007
1008 /// \brief Construct an IR analysis pass around a target-provide callback.
1009 ///
1010 /// The callback will be called with a particular function for which the TTI
1011 /// is needed and must return a TTI object for that function.
1012 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1013
1014 // Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis & Arg)1015 TargetIRAnalysis(const TargetIRAnalysis &Arg)
1016 : TTICallback(Arg.TTICallback) {}
TargetIRAnalysis(TargetIRAnalysis && Arg)1017 TargetIRAnalysis(TargetIRAnalysis &&Arg)
1018 : TTICallback(std::move(Arg.TTICallback)) {}
1019 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1020 TTICallback = RHS.TTICallback;
1021 return *this;
1022 }
1023 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1024 TTICallback = std::move(RHS.TTICallback);
1025 return *this;
1026 }
1027
1028 Result run(const Function &F, AnalysisManager<Function> &);
1029
1030 private:
1031 friend AnalysisInfoMixin<TargetIRAnalysis>;
1032 static char PassID;
1033
1034 /// \brief The callback used to produce a result.
1035 ///
1036 /// We use a completely opaque callback so that targets can provide whatever
1037 /// mechanism they desire for constructing the TTI for a given function.
1038 ///
1039 /// FIXME: Should we really use std::function? It's relatively inefficient.
1040 /// It might be possible to arrange for even stateful callbacks to outlive
1041 /// the analysis and thus use a function_ref which would be lighter weight.
1042 /// This may also be less error prone as the callback is likely to reference
1043 /// the external TargetMachine, and that reference needs to never dangle.
1044 std::function<Result(const Function &)> TTICallback;
1045
1046 /// \brief Helper function used as the callback in the default constructor.
1047 static Result getDefaultTTI(const Function &F);
1048 };
1049
1050 /// \brief Wrapper pass for TargetTransformInfo.
1051 ///
1052 /// This pass can be constructed from a TTI object which it stores internally
1053 /// and is queried by passes.
1054 class TargetTransformInfoWrapperPass : public ImmutablePass {
1055 TargetIRAnalysis TIRA;
1056 Optional<TargetTransformInfo> TTI;
1057
1058 virtual void anchor();
1059
1060 public:
1061 static char ID;
1062
1063 /// \brief We must provide a default constructor for the pass but it should
1064 /// never be used.
1065 ///
1066 /// Use the constructor below or call one of the creation routines.
1067 TargetTransformInfoWrapperPass();
1068
1069 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1070
1071 TargetTransformInfo &getTTI(const Function &F);
1072 };
1073
1074 /// \brief Create an analysis pass wrapper around a TTI object.
1075 ///
1076 /// This analysis pass just holds the TTI instance and makes it available to
1077 /// clients.
1078 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1079
1080 } // End llvm namespace
1081
1082 #endif
1083