1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Instruction.h"
23
24 namespace llvm {
25
26 namespace AArch64ISD {
27
28 // For predicated nodes where the result is a vector, the operation is
29 // controlled by a governing predicate and the inactive lanes are explicitly
30 // defined with a value, please stick the following naming convention:
31 //
32 // _MERGE_OP<n> The result value is a vector with inactive lanes equal
33 // to source operand OP<n>.
34 //
35 // _MERGE_ZERO The result value is a vector with inactive lanes
36 // actively zeroed.
37 //
38 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
39 // to the last source operand which only purpose is being
40 // a passthru value.
41 //
42 // For other cases where no explicit action is needed to set the inactive lanes,
43 // or when the result is not a vector and it is needed or helpful to
44 // distinguish a node from similar unpredicated nodes, use:
45 //
46 // _PRED
47 //
48 enum NodeType : unsigned {
49 FIRST_NUMBER = ISD::BUILTIN_OP_END,
50 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
51 CALL, // Function call.
52
53 // Produces the full sequence of instructions for getting the thread pointer
54 // offset of a variable into X0, using the TLSDesc model.
55 TLSDESC_CALLSEQ,
56 ADRP, // Page address of a TargetGlobalAddress operand.
57 ADR, // ADR
58 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
59 LOADgot, // Load from automatically generated descriptor (e.g. Global
60 // Offset Table, TLS record).
61 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
62 BRCOND, // Conditional branch instruction; "b.cond".
63 CSEL,
64 FCSEL, // Conditional move instruction.
65 CSINV, // Conditional select invert.
66 CSNEG, // Conditional select negate.
67 CSINC, // Conditional select increment.
68
69 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
70 // ELF.
71 THREAD_POINTER,
72 ADC,
73 SBC, // adc, sbc instructions
74
75 // Predicated instructions where inactive lanes produce undefined results.
76 ADD_PRED,
77 FADD_PRED,
78 FDIV_PRED,
79 FMA_PRED,
80 FMAXNM_PRED,
81 FMINNM_PRED,
82 FMUL_PRED,
83 FSUB_PRED,
84 MUL_PRED,
85 SDIV_PRED,
86 SHL_PRED,
87 SMAX_PRED,
88 SMIN_PRED,
89 SRA_PRED,
90 SRL_PRED,
91 SUB_PRED,
92 UDIV_PRED,
93 UMAX_PRED,
94 UMIN_PRED,
95
96 // Predicated instructions with the result of inactive lanes provided by the
97 // last operand.
98 FABS_MERGE_PASSTHRU,
99 FCEIL_MERGE_PASSTHRU,
100 FFLOOR_MERGE_PASSTHRU,
101 FNEARBYINT_MERGE_PASSTHRU,
102 FNEG_MERGE_PASSTHRU,
103 FRECPX_MERGE_PASSTHRU,
104 FRINT_MERGE_PASSTHRU,
105 FROUND_MERGE_PASSTHRU,
106 FROUNDEVEN_MERGE_PASSTHRU,
107 FSQRT_MERGE_PASSTHRU,
108 FTRUNC_MERGE_PASSTHRU,
109 FP_ROUND_MERGE_PASSTHRU,
110 FP_EXTEND_MERGE_PASSTHRU,
111 UINT_TO_FP_MERGE_PASSTHRU,
112 SINT_TO_FP_MERGE_PASSTHRU,
113 FCVTZU_MERGE_PASSTHRU,
114 FCVTZS_MERGE_PASSTHRU,
115 SIGN_EXTEND_INREG_MERGE_PASSTHRU,
116 ZERO_EXTEND_INREG_MERGE_PASSTHRU,
117
118 SETCC_MERGE_ZERO,
119
120 // Arithmetic instructions which write flags.
121 ADDS,
122 SUBS,
123 ADCS,
124 SBCS,
125 ANDS,
126
127 // Conditional compares. Operands: left,right,falsecc,cc,flags
128 CCMP,
129 CCMN,
130 FCCMP,
131
132 // Floating point comparison
133 FCMP,
134
135 // Scalar extract
136 EXTR,
137
138 // Scalar-to-vector duplication
139 DUP,
140 DUPLANE8,
141 DUPLANE16,
142 DUPLANE32,
143 DUPLANE64,
144
145 // Vector immedate moves
146 MOVI,
147 MOVIshift,
148 MOVIedit,
149 MOVImsl,
150 FMOV,
151 MVNIshift,
152 MVNImsl,
153
154 // Vector immediate ops
155 BICi,
156 ORRi,
157
158 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
159 // element must be identical.
160 BSP,
161
162 // Vector arithmetic negation
163 NEG,
164
165 // Vector shuffles
166 ZIP1,
167 ZIP2,
168 UZP1,
169 UZP2,
170 TRN1,
171 TRN2,
172 REV16,
173 REV32,
174 REV64,
175 EXT,
176
177 // Vector shift by scalar
178 VSHL,
179 VLSHR,
180 VASHR,
181
182 // Vector shift by scalar (again)
183 SQSHL_I,
184 UQSHL_I,
185 SQSHLU_I,
186 SRSHR_I,
187 URSHR_I,
188
189 // Vector shift by constant and insert
190 VSLI,
191 VSRI,
192
193 // Vector comparisons
194 CMEQ,
195 CMGE,
196 CMGT,
197 CMHI,
198 CMHS,
199 FCMEQ,
200 FCMGE,
201 FCMGT,
202
203 // Vector zero comparisons
204 CMEQz,
205 CMGEz,
206 CMGTz,
207 CMLEz,
208 CMLTz,
209 FCMEQz,
210 FCMGEz,
211 FCMGTz,
212 FCMLEz,
213 FCMLTz,
214
215 // Vector across-lanes addition
216 // Only the lower result lane is defined.
217 SADDV,
218 UADDV,
219
220 // Vector halving addition
221 SHADD,
222 UHADD,
223
224 // Vector rounding halving addition
225 SRHADD,
226 URHADD,
227
228 // Absolute difference
229 UABD,
230 SABD,
231
232 // Vector across-lanes min/max
233 // Only the lower result lane is defined.
234 SMINV,
235 UMINV,
236 SMAXV,
237 UMAXV,
238
239 SADDV_PRED,
240 UADDV_PRED,
241 SMAXV_PRED,
242 UMAXV_PRED,
243 SMINV_PRED,
244 UMINV_PRED,
245 ORV_PRED,
246 EORV_PRED,
247 ANDV_PRED,
248
249 // Vector bitwise insertion
250 BIT,
251
252 // Compare-and-branch
253 CBZ,
254 CBNZ,
255 TBZ,
256 TBNZ,
257
258 // Tail calls
259 TC_RETURN,
260
261 // Custom prefetch handling
262 PREFETCH,
263
264 // {s|u}int to FP within a FP register.
265 SITOF,
266 UITOF,
267
268 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
269 /// world w.r.t vectors; which causes additional REV instructions to be
270 /// generated to compensate for the byte-swapping. But sometimes we do
271 /// need to re-interpret the data in SIMD vector registers in big-endian
272 /// mode without emitting such REV instructions.
273 NVCAST,
274
275 SMULL,
276 UMULL,
277
278 // Reciprocal estimates and steps.
279 FRECPE,
280 FRECPS,
281 FRSQRTE,
282 FRSQRTS,
283
284 SUNPKHI,
285 SUNPKLO,
286 UUNPKHI,
287 UUNPKLO,
288
289 CLASTA_N,
290 CLASTB_N,
291 LASTA,
292 LASTB,
293 REV,
294 TBL,
295
296 // Floating-point reductions.
297 FADDA_PRED,
298 FADDV_PRED,
299 FMAXV_PRED,
300 FMAXNMV_PRED,
301 FMINV_PRED,
302 FMINNMV_PRED,
303
304 INSR,
305 PTEST,
306 PTRUE,
307
308 DUP_MERGE_PASSTHRU,
309 INDEX_VECTOR,
310
311 REINTERPRET_CAST,
312
313 LD1_MERGE_ZERO,
314 LD1S_MERGE_ZERO,
315 LDNF1_MERGE_ZERO,
316 LDNF1S_MERGE_ZERO,
317 LDFF1_MERGE_ZERO,
318 LDFF1S_MERGE_ZERO,
319 LD1RQ_MERGE_ZERO,
320 LD1RO_MERGE_ZERO,
321
322 // Structured loads.
323 SVE_LD2_MERGE_ZERO,
324 SVE_LD3_MERGE_ZERO,
325 SVE_LD4_MERGE_ZERO,
326
327 // Unsigned gather loads.
328 GLD1_MERGE_ZERO,
329 GLD1_SCALED_MERGE_ZERO,
330 GLD1_UXTW_MERGE_ZERO,
331 GLD1_SXTW_MERGE_ZERO,
332 GLD1_UXTW_SCALED_MERGE_ZERO,
333 GLD1_SXTW_SCALED_MERGE_ZERO,
334 GLD1_IMM_MERGE_ZERO,
335
336 // Signed gather loads
337 GLD1S_MERGE_ZERO,
338 GLD1S_SCALED_MERGE_ZERO,
339 GLD1S_UXTW_MERGE_ZERO,
340 GLD1S_SXTW_MERGE_ZERO,
341 GLD1S_UXTW_SCALED_MERGE_ZERO,
342 GLD1S_SXTW_SCALED_MERGE_ZERO,
343 GLD1S_IMM_MERGE_ZERO,
344
345 // Unsigned gather loads.
346 GLDFF1_MERGE_ZERO,
347 GLDFF1_SCALED_MERGE_ZERO,
348 GLDFF1_UXTW_MERGE_ZERO,
349 GLDFF1_SXTW_MERGE_ZERO,
350 GLDFF1_UXTW_SCALED_MERGE_ZERO,
351 GLDFF1_SXTW_SCALED_MERGE_ZERO,
352 GLDFF1_IMM_MERGE_ZERO,
353
354 // Signed gather loads.
355 GLDFF1S_MERGE_ZERO,
356 GLDFF1S_SCALED_MERGE_ZERO,
357 GLDFF1S_UXTW_MERGE_ZERO,
358 GLDFF1S_SXTW_MERGE_ZERO,
359 GLDFF1S_UXTW_SCALED_MERGE_ZERO,
360 GLDFF1S_SXTW_SCALED_MERGE_ZERO,
361 GLDFF1S_IMM_MERGE_ZERO,
362
363 // Non-temporal gather loads
364 GLDNT1_MERGE_ZERO,
365 GLDNT1_INDEX_MERGE_ZERO,
366 GLDNT1S_MERGE_ZERO,
367
368 // Contiguous masked store.
369 ST1_PRED,
370
371 // Scatter store
372 SST1_PRED,
373 SST1_SCALED_PRED,
374 SST1_UXTW_PRED,
375 SST1_SXTW_PRED,
376 SST1_UXTW_SCALED_PRED,
377 SST1_SXTW_SCALED_PRED,
378 SST1_IMM_PRED,
379
380 // Non-temporal scatter store
381 SSTNT1_PRED,
382 SSTNT1_INDEX_PRED,
383
384 // Strict (exception-raising) floating point comparison
385 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
386 STRICT_FCMPE,
387
388 // NEON Load/Store with post-increment base updates
389 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
390 LD3post,
391 LD4post,
392 ST2post,
393 ST3post,
394 ST4post,
395 LD1x2post,
396 LD1x3post,
397 LD1x4post,
398 ST1x2post,
399 ST1x3post,
400 ST1x4post,
401 LD1DUPpost,
402 LD2DUPpost,
403 LD3DUPpost,
404 LD4DUPpost,
405 LD1LANEpost,
406 LD2LANEpost,
407 LD3LANEpost,
408 LD4LANEpost,
409 ST2LANEpost,
410 ST3LANEpost,
411 ST4LANEpost,
412
413 STG,
414 STZG,
415 ST2G,
416 STZ2G,
417
418 LDP,
419 STP,
420 STNP
421 };
422
423 } // end namespace AArch64ISD
424
425 namespace {
426
427 // Any instruction that defines a 32-bit result zeros out the high half of the
428 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
429 // be copying from a truncate. But any other 32-bit operation will zero-extend
430 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
431 // 32 bits, they're probably just qualifying a CopyFromReg.
432 // FIXME: X86 also checks for CMOV here. Do we need something similar?
isDef32(const SDNode & N)433 static inline bool isDef32(const SDNode &N) {
434 unsigned Opc = N.getOpcode();
435 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
436 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
437 Opc != ISD::AssertZext;
438 }
439
440 } // end anonymous namespace
441
442 class AArch64Subtarget;
443 class AArch64TargetMachine;
444
445 class AArch64TargetLowering : public TargetLowering {
446 public:
447 explicit AArch64TargetLowering(const TargetMachine &TM,
448 const AArch64Subtarget &STI);
449
450 /// Selects the correct CCAssignFn for a given CallingConvention value.
451 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
452
453 /// Selects the correct CCAssignFn for a given CallingConvention value.
454 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
455
456 /// Determine which of the bits specified in Mask are known to be either zero
457 /// or one and return them in the KnownZero/KnownOne bitsets.
458 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
459 const APInt &DemandedElts,
460 const SelectionDAG &DAG,
461 unsigned Depth = 0) const override;
462
463 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
464 // Returning i64 unconditionally here (i.e. even for ILP32) means that the
465 // *DAG* representation of pointers will always be 64-bits. They will be
466 // truncated and extended when transferred to memory, but the 64-bit DAG
467 // allows us to use AArch64's addressing modes much more easily.
468 return MVT::getIntegerVT(64);
469 }
470
471 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
472 const APInt &DemandedElts,
473 TargetLoweringOpt &TLO) const override;
474
475 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
476
477 /// Returns true if the target allows unaligned memory accesses of the
478 /// specified type.
479 bool allowsMisalignedMemoryAccesses(
480 EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
481 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
482 bool *Fast = nullptr) const override;
483 /// LLT variant.
484 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
485 Align Alignment,
486 MachineMemOperand::Flags Flags,
487 bool *Fast = nullptr) const override;
488
489 /// Provide custom lowering hooks for some operations.
490 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
491
492 const char *getTargetNodeName(unsigned Opcode) const override;
493
494 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
495
496 /// This method returns a target specific FastISel object, or null if the
497 /// target does not support "fast" ISel.
498 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
499 const TargetLibraryInfo *libInfo) const override;
500
501 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
502
503 bool isFPImmLegal(const APFloat &Imm, EVT VT,
504 bool ForCodeSize) const override;
505
506 /// Return true if the given shuffle mask can be codegen'd directly, or if it
507 /// should be stack expanded.
508 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
509
510 /// Return the ISD::SETCC ValueType.
511 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
512 EVT VT) const override;
513
514 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
515
516 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
517 MachineBasicBlock *BB) const;
518
519 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
520 MachineBasicBlock *BB) const;
521
522 MachineBasicBlock *
523 EmitInstrWithCustomInserter(MachineInstr &MI,
524 MachineBasicBlock *MBB) const override;
525
526 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
527 MachineFunction &MF,
528 unsigned Intrinsic) const override;
529
530 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
531 EVT NewVT) const override;
532
533 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
534 bool isTruncateFree(EVT VT1, EVT VT2) const override;
535
536 bool isProfitableToHoist(Instruction *I) const override;
537
538 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
539 bool isZExtFree(EVT VT1, EVT VT2) const override;
540 bool isZExtFree(SDValue Val, EVT VT2) const override;
541
542 bool shouldSinkOperands(Instruction *I,
543 SmallVectorImpl<Use *> &Ops) const override;
544
545 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
546
getMaxSupportedInterleaveFactor()547 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
548
549 bool lowerInterleavedLoad(LoadInst *LI,
550 ArrayRef<ShuffleVectorInst *> Shuffles,
551 ArrayRef<unsigned> Indices,
552 unsigned Factor) const override;
553 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
554 unsigned Factor) const override;
555
556 bool isLegalAddImmediate(int64_t) const override;
557 bool isLegalICmpImmediate(int64_t) const override;
558
559 bool shouldConsiderGEPOffsetSplit() const override;
560
561 EVT getOptimalMemOpType(const MemOp &Op,
562 const AttributeList &FuncAttributes) const override;
563
564 LLT getOptimalMemOpLLT(const MemOp &Op,
565 const AttributeList &FuncAttributes) const override;
566
567 /// Return true if the addressing mode represented by AM is legal for this
568 /// target, for a load/store of the specified type.
569 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
570 unsigned AS,
571 Instruction *I = nullptr) const override;
572
573 /// Return the cost of the scaling factor used in the addressing
574 /// mode represented by AM for this target, for a load/store
575 /// of the specified type.
576 /// If the AM is supported, the return value must be >= 0.
577 /// If the AM is not supported, it returns a negative value.
578 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
579 unsigned AS) const override;
580
581 /// Return true if an FMA operation is faster than a pair of fmul and fadd
582 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
583 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
584 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
585 EVT VT) const override;
586 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
587
588 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
589
590 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
591 bool isDesirableToCommuteWithShift(const SDNode *N,
592 CombineLevel Level) const override;
593
594 /// Returns true if it is beneficial to convert a load of a constant
595 /// to just the constant itself.
596 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
597 Type *Ty) const override;
598
599 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
600 /// with this index.
601 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
602 unsigned Index) const override;
603
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)604 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
605 bool MathUsed) const override {
606 // Using overflow ops for overflow checks only should beneficial on
607 // AArch64.
608 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
609 }
610
611 Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
612 AtomicOrdering Ord) const override;
613 Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
614 Value *Addr, AtomicOrdering Ord) const override;
615
616 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
617
618 TargetLoweringBase::AtomicExpansionKind
619 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
620 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
621 TargetLoweringBase::AtomicExpansionKind
622 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
623
624 TargetLoweringBase::AtomicExpansionKind
625 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
626
627 bool useLoadStackGuardNode() const override;
628 TargetLoweringBase::LegalizeTypeAction
629 getPreferredVectorAction(MVT VT) const override;
630
631 /// If the target has a standard location for the stack protector cookie,
632 /// returns the address of that location. Otherwise, returns nullptr.
633 Value *getIRStackGuard(IRBuilder<> &IRB) const override;
634
635 void insertSSPDeclarations(Module &M) const override;
636 Value *getSDagStackGuard(const Module &M) const override;
637 Function *getSSPStackGuardCheck(const Module &M) const override;
638
639 /// If the target has a standard location for the unsafe stack pointer,
640 /// returns the address of that location. Otherwise, returns nullptr.
641 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
642
643 /// If a physical register, this returns the register that receives the
644 /// exception address on entry to an EH pad.
645 Register
getExceptionPointerRegister(const Constant * PersonalityFn)646 getExceptionPointerRegister(const Constant *PersonalityFn) const override {
647 // FIXME: This is a guess. Has this been defined yet?
648 return AArch64::X0;
649 }
650
651 /// If a physical register, this returns the register that receives the
652 /// exception typeid on entry to a landing pad.
653 Register
getExceptionSelectorRegister(const Constant * PersonalityFn)654 getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
655 // FIXME: This is a guess. Has this been defined yet?
656 return AArch64::X1;
657 }
658
659 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
660
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const SelectionDAG & DAG)661 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
662 const SelectionDAG &DAG) const override {
663 // Do not merge to float value size (128 bytes) if no implicit
664 // float attribute is set.
665
666 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
667 Attribute::NoImplicitFloat);
668
669 if (NoFloat)
670 return (MemVT.getSizeInBits() <= 64);
671 return true;
672 }
673
isCheapToSpeculateCttz()674 bool isCheapToSpeculateCttz() const override {
675 return true;
676 }
677
isCheapToSpeculateCtlz()678 bool isCheapToSpeculateCtlz() const override {
679 return true;
680 }
681
682 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
683
hasAndNotCompare(SDValue V)684 bool hasAndNotCompare(SDValue V) const override {
685 // We can use bics for any scalar.
686 return V.getValueType().isScalarInteger();
687 }
688
hasAndNot(SDValue Y)689 bool hasAndNot(SDValue Y) const override {
690 EVT VT = Y.getValueType();
691
692 if (!VT.isVector())
693 return hasAndNotCompare(Y);
694
695 return VT.getSizeInBits() >= 64; // vector 'bic'
696 }
697
698 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
699 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
700 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
701 SelectionDAG &DAG) const override;
702
703 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
704
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)705 bool shouldTransformSignedTruncationCheck(EVT XVT,
706 unsigned KeptBits) const override {
707 // For vectors, we don't have a preference..
708 if (XVT.isVector())
709 return false;
710
711 auto VTIsOk = [](EVT VT) -> bool {
712 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
713 VT == MVT::i64;
714 };
715
716 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
717 // XVT will be larger than KeptBitsVT.
718 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
719 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
720 }
721
722 bool preferIncOfAddToSubOfNot(EVT VT) const override;
723
hasBitPreservingFPLogic(EVT VT)724 bool hasBitPreservingFPLogic(EVT VT) const override {
725 // FIXME: Is this always true? It should be true for vectors at least.
726 return VT == MVT::f32 || VT == MVT::f64;
727 }
728
supportSplitCSR(MachineFunction * MF)729 bool supportSplitCSR(MachineFunction *MF) const override {
730 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
731 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
732 }
733 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
734 void insertCopiesSplitCSR(
735 MachineBasicBlock *Entry,
736 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
737
supportSwiftError()738 bool supportSwiftError() const override {
739 return true;
740 }
741
742 /// Enable aggressive FMA fusion on targets that want it.
743 bool enableAggressiveFMAFusion(EVT VT) const override;
744
745 /// Returns the size of the platform's va_list object.
746 unsigned getVaListSizeInBits(const DataLayout &DL) const override;
747
748 /// Returns true if \p VecTy is a legal interleaved access type. This
749 /// function checks the vector element type and the overall width of the
750 /// vector.
751 bool isLegalInterleavedAccessType(VectorType *VecTy,
752 const DataLayout &DL) const;
753
754 /// Returns the number of interleaved accesses that will be generated when
755 /// lowering accesses of the given type.
756 unsigned getNumInterleavedAccesses(VectorType *VecTy,
757 const DataLayout &DL) const;
758
759 MachineMemOperand::Flags getTargetMMOFlags(
760 const Instruction &I) const override;
761
762 bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
763 CallingConv::ID CallConv,
764 bool isVarArg) const override;
765 /// Used for exception handling on Win64.
766 bool needsFixedCatchObjects() const override;
767
768 bool fallBackToDAGISel(const Instruction &Inst) const override;
769
770 /// SVE code generation for fixed length vectors does not custom lower
771 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
772 /// merge. However, merging them creates a BUILD_VECTOR that is just as
773 /// illegal as the original, thus leading to an infinite legalisation loop.
774 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
775 /// vector types this override can be removed.
776 bool mergeStoresAfterLegalization(EVT VT) const override;
777
778 private:
779 /// Keep a pointer to the AArch64Subtarget around so that we can
780 /// make the right decision when generating code for different targets.
781 const AArch64Subtarget *Subtarget;
782
783 bool isExtFreeImpl(const Instruction *Ext) const override;
784
785 void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
786 void addTypeForFixedLengthSVE(MVT VT);
787 void addDRTypeForNEON(MVT VT);
788 void addQRTypeForNEON(MVT VT);
789
790 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
791 bool isVarArg,
792 const SmallVectorImpl<ISD::InputArg> &Ins,
793 const SDLoc &DL, SelectionDAG &DAG,
794 SmallVectorImpl<SDValue> &InVals) const override;
795
796 SDValue LowerCall(CallLoweringInfo & /*CLI*/,
797 SmallVectorImpl<SDValue> &InVals) const override;
798
799 SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
800 CallingConv::ID CallConv, bool isVarArg,
801 const SmallVectorImpl<ISD::InputArg> &Ins,
802 const SDLoc &DL, SelectionDAG &DAG,
803 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
804 SDValue ThisVal) const;
805
806 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
807
808 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
809 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
810
811 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
812
813 bool isEligibleForTailCallOptimization(
814 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
815 const SmallVectorImpl<ISD::OutputArg> &Outs,
816 const SmallVectorImpl<SDValue> &OutVals,
817 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
818
819 /// Finds the incoming stack arguments which overlap the given fixed stack
820 /// object and incorporates their load into the current chain. This prevents
821 /// an upcoming store from clobbering the stack argument before it's used.
822 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
823 MachineFrameInfo &MFI, int ClobberedFI) const;
824
825 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
826
827 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
828 SDValue &Chain) const;
829
830 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
831 bool isVarArg,
832 const SmallVectorImpl<ISD::OutputArg> &Outs,
833 LLVMContext &Context) const override;
834
835 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
836 const SmallVectorImpl<ISD::OutputArg> &Outs,
837 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
838 SelectionDAG &DAG) const override;
839
840 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
841 unsigned Flag) const;
842 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
843 unsigned Flag) const;
844 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
845 unsigned Flag) const;
846 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
847 unsigned Flag) const;
848 template <class NodeTy>
849 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
850 template <class NodeTy>
851 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
852 template <class NodeTy>
853 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
854 template <class NodeTy>
855 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
856 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
857 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
858 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
859 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
860 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
861 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
862 const SDLoc &DL, SelectionDAG &DAG) const;
863 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
864 SelectionDAG &DAG) const;
865 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
866 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
867 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
868 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
869 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
870 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
871 SDValue TVal, SDValue FVal, const SDLoc &dl,
872 SelectionDAG &DAG) const;
873 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
874 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
875 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
876 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
877 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
878 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
879 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
880 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
881 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
882 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
883 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
884 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
885 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
886 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
887 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
888 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
889 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
890 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
891 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
892 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
893 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
894 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
895 bool OverrideNEON = false) const;
896 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
897 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
898 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
899 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
900 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
901 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
902 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
903 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
904 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
905 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
906 SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
907 RTLIB::Libcall Call) const;
908 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
909 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
910 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
911 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
912 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
913 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
914 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
915 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
916 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
917 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
918 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
919 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
920 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
921 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
922 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
923 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
924 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
925 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
926 SDValue &Size,
927 SelectionDAG &DAG) const;
928 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
929 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
930
931 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
932 SelectionDAG &DAG) const;
933 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
934 SelectionDAG &DAG) const;
935 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
936 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
937 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
938 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
939 SelectionDAG &DAG) const;
940 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
941 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
942 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
943 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
944 SelectionDAG &DAG) const;
945
946 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
947 SmallVectorImpl<SDNode *> &Created) const override;
948 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
949 int &ExtraSteps, bool &UseOneConst,
950 bool Reciprocal) const override;
951 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
952 int &ExtraSteps) const override;
953 unsigned combineRepeatedFPDivisors() const override;
954
955 ConstraintType getConstraintType(StringRef Constraint) const override;
956 Register getRegisterByName(const char* RegName, LLT VT,
957 const MachineFunction &MF) const override;
958
959 /// Examine constraint string and operand type and determine a weight value.
960 /// The operand object must already have been set up with the operand type.
961 ConstraintWeight
962 getSingleConstraintMatchWeight(AsmOperandInfo &info,
963 const char *constraint) const override;
964
965 std::pair<unsigned, const TargetRegisterClass *>
966 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
967 StringRef Constraint, MVT VT) const override;
968
969 const char *LowerXConstraint(EVT ConstraintVT) const override;
970
971 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
972 std::vector<SDValue> &Ops,
973 SelectionDAG &DAG) const override;
974
getInlineAsmMemConstraint(StringRef ConstraintCode)975 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
976 if (ConstraintCode == "Q")
977 return InlineAsm::Constraint_Q;
978 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
979 // followed by llvm_unreachable so we'll leave them unimplemented in
980 // the backend for now.
981 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
982 }
983
984 bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
985 bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
986 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
987 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
988 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
989 ISD::MemIndexedMode &AM, bool &IsInc,
990 SelectionDAG &DAG) const;
991 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
992 ISD::MemIndexedMode &AM,
993 SelectionDAG &DAG) const override;
994 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
995 SDValue &Offset, ISD::MemIndexedMode &AM,
996 SelectionDAG &DAG) const override;
997
998 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
999 SelectionDAG &DAG) const override;
1000 void ReplaceExtractSubVectorResults(SDNode *N,
1001 SmallVectorImpl<SDValue> &Results,
1002 SelectionDAG &DAG) const;
1003
1004 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1005
1006 void finalizeLowering(MachineFunction &MF) const override;
1007
1008 bool shouldLocalize(const MachineInstr &MI,
1009 const TargetTransformInfo *TTI) const override;
1010
1011 // Normally SVE is only used for byte size vectors that do not fit within a
1012 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1013 // used for 64bit and 128bit vectors as well.
1014 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1015 };
1016
1017 namespace AArch64 {
1018 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1019 const TargetLibraryInfo *libInfo);
1020 } // end namespace AArch64
1021
1022 } // end namespace llvm
1023
1024 #endif
1025