• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Instruction.h"
23 
24 namespace llvm {
25 
26 namespace AArch64ISD {
27 
28 // For predicated nodes where the result is a vector, the operation is
29 // controlled by a governing predicate and the inactive lanes are explicitly
30 // defined with a value, please stick the following naming convention:
31 //
32 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
33 //                        to source operand OP<n>.
34 //
35 //    _MERGE_ZERO         The result value is a vector with inactive lanes
36 //                        actively zeroed.
37 //
38 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
39 //                        to the last source operand which only purpose is being
40 //                        a passthru value.
41 //
42 // For other cases where no explicit action is needed to set the inactive lanes,
43 // or when the result is not a vector and it is needed or helpful to
44 // distinguish a node from similar unpredicated nodes, use:
45 //
46 //    _PRED
47 //
48 enum NodeType : unsigned {
49   FIRST_NUMBER = ISD::BUILTIN_OP_END,
50   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
51   CALL,         // Function call.
52 
53   // Produces the full sequence of instructions for getting the thread pointer
54   // offset of a variable into X0, using the TLSDesc model.
55   TLSDESC_CALLSEQ,
56   ADRP,     // Page address of a TargetGlobalAddress operand.
57   ADR,      // ADR
58   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
59   LOADgot,  // Load from automatically generated descriptor (e.g. Global
60             // Offset Table, TLS record).
61   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
62   BRCOND,   // Conditional branch instruction; "b.cond".
63   CSEL,
64   FCSEL, // Conditional move instruction.
65   CSINV, // Conditional select invert.
66   CSNEG, // Conditional select negate.
67   CSINC, // Conditional select increment.
68 
69   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
70   // ELF.
71   THREAD_POINTER,
72   ADC,
73   SBC, // adc, sbc instructions
74 
75   // Predicated instructions where inactive lanes produce undefined results.
76   ADD_PRED,
77   FADD_PRED,
78   FDIV_PRED,
79   FMA_PRED,
80   FMAXNM_PRED,
81   FMINNM_PRED,
82   FMUL_PRED,
83   FSUB_PRED,
84   MUL_PRED,
85   SDIV_PRED,
86   SHL_PRED,
87   SMAX_PRED,
88   SMIN_PRED,
89   SRA_PRED,
90   SRL_PRED,
91   SUB_PRED,
92   UDIV_PRED,
93   UMAX_PRED,
94   UMIN_PRED,
95 
96   // Predicated instructions with the result of inactive lanes provided by the
97   // last operand.
98   FABS_MERGE_PASSTHRU,
99   FCEIL_MERGE_PASSTHRU,
100   FFLOOR_MERGE_PASSTHRU,
101   FNEARBYINT_MERGE_PASSTHRU,
102   FNEG_MERGE_PASSTHRU,
103   FRECPX_MERGE_PASSTHRU,
104   FRINT_MERGE_PASSTHRU,
105   FROUND_MERGE_PASSTHRU,
106   FROUNDEVEN_MERGE_PASSTHRU,
107   FSQRT_MERGE_PASSTHRU,
108   FTRUNC_MERGE_PASSTHRU,
109   FP_ROUND_MERGE_PASSTHRU,
110   FP_EXTEND_MERGE_PASSTHRU,
111   UINT_TO_FP_MERGE_PASSTHRU,
112   SINT_TO_FP_MERGE_PASSTHRU,
113   FCVTZU_MERGE_PASSTHRU,
114   FCVTZS_MERGE_PASSTHRU,
115   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
116   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
117 
118   SETCC_MERGE_ZERO,
119 
120   // Arithmetic instructions which write flags.
121   ADDS,
122   SUBS,
123   ADCS,
124   SBCS,
125   ANDS,
126 
127   // Conditional compares. Operands: left,right,falsecc,cc,flags
128   CCMP,
129   CCMN,
130   FCCMP,
131 
132   // Floating point comparison
133   FCMP,
134 
135   // Scalar extract
136   EXTR,
137 
138   // Scalar-to-vector duplication
139   DUP,
140   DUPLANE8,
141   DUPLANE16,
142   DUPLANE32,
143   DUPLANE64,
144 
145   // Vector immedate moves
146   MOVI,
147   MOVIshift,
148   MOVIedit,
149   MOVImsl,
150   FMOV,
151   MVNIshift,
152   MVNImsl,
153 
154   // Vector immediate ops
155   BICi,
156   ORRi,
157 
158   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
159   // element must be identical.
160   BSP,
161 
162   // Vector arithmetic negation
163   NEG,
164 
165   // Vector shuffles
166   ZIP1,
167   ZIP2,
168   UZP1,
169   UZP2,
170   TRN1,
171   TRN2,
172   REV16,
173   REV32,
174   REV64,
175   EXT,
176 
177   // Vector shift by scalar
178   VSHL,
179   VLSHR,
180   VASHR,
181 
182   // Vector shift by scalar (again)
183   SQSHL_I,
184   UQSHL_I,
185   SQSHLU_I,
186   SRSHR_I,
187   URSHR_I,
188 
189   // Vector shift by constant and insert
190   VSLI,
191   VSRI,
192 
193   // Vector comparisons
194   CMEQ,
195   CMGE,
196   CMGT,
197   CMHI,
198   CMHS,
199   FCMEQ,
200   FCMGE,
201   FCMGT,
202 
203   // Vector zero comparisons
204   CMEQz,
205   CMGEz,
206   CMGTz,
207   CMLEz,
208   CMLTz,
209   FCMEQz,
210   FCMGEz,
211   FCMGTz,
212   FCMLEz,
213   FCMLTz,
214 
215   // Vector across-lanes addition
216   // Only the lower result lane is defined.
217   SADDV,
218   UADDV,
219 
220   // Vector halving addition
221   SHADD,
222   UHADD,
223 
224   // Vector rounding halving addition
225   SRHADD,
226   URHADD,
227 
228   // Absolute difference
229   UABD,
230   SABD,
231 
232   // Vector across-lanes min/max
233   // Only the lower result lane is defined.
234   SMINV,
235   UMINV,
236   SMAXV,
237   UMAXV,
238 
239   SADDV_PRED,
240   UADDV_PRED,
241   SMAXV_PRED,
242   UMAXV_PRED,
243   SMINV_PRED,
244   UMINV_PRED,
245   ORV_PRED,
246   EORV_PRED,
247   ANDV_PRED,
248 
249   // Vector bitwise insertion
250   BIT,
251 
252   // Compare-and-branch
253   CBZ,
254   CBNZ,
255   TBZ,
256   TBNZ,
257 
258   // Tail calls
259   TC_RETURN,
260 
261   // Custom prefetch handling
262   PREFETCH,
263 
264   // {s|u}int to FP within a FP register.
265   SITOF,
266   UITOF,
267 
268   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
269   /// world w.r.t vectors; which causes additional REV instructions to be
270   /// generated to compensate for the byte-swapping. But sometimes we do
271   /// need to re-interpret the data in SIMD vector registers in big-endian
272   /// mode without emitting such REV instructions.
273   NVCAST,
274 
275   SMULL,
276   UMULL,
277 
278   // Reciprocal estimates and steps.
279   FRECPE,
280   FRECPS,
281   FRSQRTE,
282   FRSQRTS,
283 
284   SUNPKHI,
285   SUNPKLO,
286   UUNPKHI,
287   UUNPKLO,
288 
289   CLASTA_N,
290   CLASTB_N,
291   LASTA,
292   LASTB,
293   REV,
294   TBL,
295 
296   // Floating-point reductions.
297   FADDA_PRED,
298   FADDV_PRED,
299   FMAXV_PRED,
300   FMAXNMV_PRED,
301   FMINV_PRED,
302   FMINNMV_PRED,
303 
304   INSR,
305   PTEST,
306   PTRUE,
307 
308   DUP_MERGE_PASSTHRU,
309   INDEX_VECTOR,
310 
311   REINTERPRET_CAST,
312 
313   LD1_MERGE_ZERO,
314   LD1S_MERGE_ZERO,
315   LDNF1_MERGE_ZERO,
316   LDNF1S_MERGE_ZERO,
317   LDFF1_MERGE_ZERO,
318   LDFF1S_MERGE_ZERO,
319   LD1RQ_MERGE_ZERO,
320   LD1RO_MERGE_ZERO,
321 
322   // Structured loads.
323   SVE_LD2_MERGE_ZERO,
324   SVE_LD3_MERGE_ZERO,
325   SVE_LD4_MERGE_ZERO,
326 
327   // Unsigned gather loads.
328   GLD1_MERGE_ZERO,
329   GLD1_SCALED_MERGE_ZERO,
330   GLD1_UXTW_MERGE_ZERO,
331   GLD1_SXTW_MERGE_ZERO,
332   GLD1_UXTW_SCALED_MERGE_ZERO,
333   GLD1_SXTW_SCALED_MERGE_ZERO,
334   GLD1_IMM_MERGE_ZERO,
335 
336   // Signed gather loads
337   GLD1S_MERGE_ZERO,
338   GLD1S_SCALED_MERGE_ZERO,
339   GLD1S_UXTW_MERGE_ZERO,
340   GLD1S_SXTW_MERGE_ZERO,
341   GLD1S_UXTW_SCALED_MERGE_ZERO,
342   GLD1S_SXTW_SCALED_MERGE_ZERO,
343   GLD1S_IMM_MERGE_ZERO,
344 
345   // Unsigned gather loads.
346   GLDFF1_MERGE_ZERO,
347   GLDFF1_SCALED_MERGE_ZERO,
348   GLDFF1_UXTW_MERGE_ZERO,
349   GLDFF1_SXTW_MERGE_ZERO,
350   GLDFF1_UXTW_SCALED_MERGE_ZERO,
351   GLDFF1_SXTW_SCALED_MERGE_ZERO,
352   GLDFF1_IMM_MERGE_ZERO,
353 
354   // Signed gather loads.
355   GLDFF1S_MERGE_ZERO,
356   GLDFF1S_SCALED_MERGE_ZERO,
357   GLDFF1S_UXTW_MERGE_ZERO,
358   GLDFF1S_SXTW_MERGE_ZERO,
359   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
360   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
361   GLDFF1S_IMM_MERGE_ZERO,
362 
363   // Non-temporal gather loads
364   GLDNT1_MERGE_ZERO,
365   GLDNT1_INDEX_MERGE_ZERO,
366   GLDNT1S_MERGE_ZERO,
367 
368   // Contiguous masked store.
369   ST1_PRED,
370 
371   // Scatter store
372   SST1_PRED,
373   SST1_SCALED_PRED,
374   SST1_UXTW_PRED,
375   SST1_SXTW_PRED,
376   SST1_UXTW_SCALED_PRED,
377   SST1_SXTW_SCALED_PRED,
378   SST1_IMM_PRED,
379 
380   // Non-temporal scatter store
381   SSTNT1_PRED,
382   SSTNT1_INDEX_PRED,
383 
384   // Strict (exception-raising) floating point comparison
385   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
386   STRICT_FCMPE,
387 
388   // NEON Load/Store with post-increment base updates
389   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
390   LD3post,
391   LD4post,
392   ST2post,
393   ST3post,
394   ST4post,
395   LD1x2post,
396   LD1x3post,
397   LD1x4post,
398   ST1x2post,
399   ST1x3post,
400   ST1x4post,
401   LD1DUPpost,
402   LD2DUPpost,
403   LD3DUPpost,
404   LD4DUPpost,
405   LD1LANEpost,
406   LD2LANEpost,
407   LD3LANEpost,
408   LD4LANEpost,
409   ST2LANEpost,
410   ST3LANEpost,
411   ST4LANEpost,
412 
413   STG,
414   STZG,
415   ST2G,
416   STZ2G,
417 
418   LDP,
419   STP,
420   STNP
421 };
422 
423 } // end namespace AArch64ISD
424 
425 namespace {
426 
427 // Any instruction that defines a 32-bit result zeros out the high half of the
428 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
429 // be copying from a truncate. But any other 32-bit operation will zero-extend
430 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
431 // 32 bits, they're probably just qualifying a CopyFromReg.
432 // FIXME: X86 also checks for CMOV here. Do we need something similar?
isDef32(const SDNode & N)433 static inline bool isDef32(const SDNode &N) {
434   unsigned Opc = N.getOpcode();
435   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
436          Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
437          Opc != ISD::AssertZext;
438 }
439 
440 } // end anonymous namespace
441 
442 class AArch64Subtarget;
443 class AArch64TargetMachine;
444 
445 class AArch64TargetLowering : public TargetLowering {
446 public:
447   explicit AArch64TargetLowering(const TargetMachine &TM,
448                                  const AArch64Subtarget &STI);
449 
450   /// Selects the correct CCAssignFn for a given CallingConvention value.
451   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
452 
453   /// Selects the correct CCAssignFn for a given CallingConvention value.
454   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
455 
456   /// Determine which of the bits specified in Mask are known to be either zero
457   /// or one and return them in the KnownZero/KnownOne bitsets.
458   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
459                                      const APInt &DemandedElts,
460                                      const SelectionDAG &DAG,
461                                      unsigned Depth = 0) const override;
462 
463   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
464     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
465     // *DAG* representation of pointers will always be 64-bits. They will be
466     // truncated and extended when transferred to memory, but the 64-bit DAG
467     // allows us to use AArch64's addressing modes much more easily.
468     return MVT::getIntegerVT(64);
469   }
470 
471   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
472                                     const APInt &DemandedElts,
473                                     TargetLoweringOpt &TLO) const override;
474 
475   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
476 
477   /// Returns true if the target allows unaligned memory accesses of the
478   /// specified type.
479   bool allowsMisalignedMemoryAccesses(
480       EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
481       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
482       bool *Fast = nullptr) const override;
483   /// LLT variant.
484   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
485                                       Align Alignment,
486                                       MachineMemOperand::Flags Flags,
487                                       bool *Fast = nullptr) const override;
488 
489   /// Provide custom lowering hooks for some operations.
490   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
491 
492   const char *getTargetNodeName(unsigned Opcode) const override;
493 
494   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
495 
496   /// This method returns a target specific FastISel object, or null if the
497   /// target does not support "fast" ISel.
498   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
499                            const TargetLibraryInfo *libInfo) const override;
500 
501   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
502 
503   bool isFPImmLegal(const APFloat &Imm, EVT VT,
504                     bool ForCodeSize) const override;
505 
506   /// Return true if the given shuffle mask can be codegen'd directly, or if it
507   /// should be stack expanded.
508   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
509 
510   /// Return the ISD::SETCC ValueType.
511   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
512                          EVT VT) const override;
513 
514   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
515 
516   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
517                                   MachineBasicBlock *BB) const;
518 
519   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
520                                            MachineBasicBlock *BB) const;
521 
522   MachineBasicBlock *
523   EmitInstrWithCustomInserter(MachineInstr &MI,
524                               MachineBasicBlock *MBB) const override;
525 
526   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
527                           MachineFunction &MF,
528                           unsigned Intrinsic) const override;
529 
530   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
531                              EVT NewVT) const override;
532 
533   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
534   bool isTruncateFree(EVT VT1, EVT VT2) const override;
535 
536   bool isProfitableToHoist(Instruction *I) const override;
537 
538   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
539   bool isZExtFree(EVT VT1, EVT VT2) const override;
540   bool isZExtFree(SDValue Val, EVT VT2) const override;
541 
542   bool shouldSinkOperands(Instruction *I,
543                           SmallVectorImpl<Use *> &Ops) const override;
544 
545   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
546 
getMaxSupportedInterleaveFactor()547   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
548 
549   bool lowerInterleavedLoad(LoadInst *LI,
550                             ArrayRef<ShuffleVectorInst *> Shuffles,
551                             ArrayRef<unsigned> Indices,
552                             unsigned Factor) const override;
553   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
554                              unsigned Factor) const override;
555 
556   bool isLegalAddImmediate(int64_t) const override;
557   bool isLegalICmpImmediate(int64_t) const override;
558 
559   bool shouldConsiderGEPOffsetSplit() const override;
560 
561   EVT getOptimalMemOpType(const MemOp &Op,
562                           const AttributeList &FuncAttributes) const override;
563 
564   LLT getOptimalMemOpLLT(const MemOp &Op,
565                          const AttributeList &FuncAttributes) const override;
566 
567   /// Return true if the addressing mode represented by AM is legal for this
568   /// target, for a load/store of the specified type.
569   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
570                              unsigned AS,
571                              Instruction *I = nullptr) const override;
572 
573   /// Return the cost of the scaling factor used in the addressing
574   /// mode represented by AM for this target, for a load/store
575   /// of the specified type.
576   /// If the AM is supported, the return value must be >= 0.
577   /// If the AM is not supported, it returns a negative value.
578   int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
579                            unsigned AS) const override;
580 
581   /// Return true if an FMA operation is faster than a pair of fmul and fadd
582   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
583   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
584   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
585                                   EVT VT) const override;
586   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
587 
588   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
589 
590   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
591   bool isDesirableToCommuteWithShift(const SDNode *N,
592                                      CombineLevel Level) const override;
593 
594   /// Returns true if it is beneficial to convert a load of a constant
595   /// to just the constant itself.
596   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
597                                          Type *Ty) const override;
598 
599   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
600   /// with this index.
601   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
602                                unsigned Index) const override;
603 
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)604   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
605                             bool MathUsed) const override {
606     // Using overflow ops for overflow checks only should beneficial on
607     // AArch64.
608     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
609   }
610 
611   Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
612                         AtomicOrdering Ord) const override;
613   Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
614                               Value *Addr, AtomicOrdering Ord) const override;
615 
616   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
617 
618   TargetLoweringBase::AtomicExpansionKind
619   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
620   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
621   TargetLoweringBase::AtomicExpansionKind
622   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
623 
624   TargetLoweringBase::AtomicExpansionKind
625   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
626 
627   bool useLoadStackGuardNode() const override;
628   TargetLoweringBase::LegalizeTypeAction
629   getPreferredVectorAction(MVT VT) const override;
630 
631   /// If the target has a standard location for the stack protector cookie,
632   /// returns the address of that location. Otherwise, returns nullptr.
633   Value *getIRStackGuard(IRBuilder<> &IRB) const override;
634 
635   void insertSSPDeclarations(Module &M) const override;
636   Value *getSDagStackGuard(const Module &M) const override;
637   Function *getSSPStackGuardCheck(const Module &M) const override;
638 
639   /// If the target has a standard location for the unsafe stack pointer,
640   /// returns the address of that location. Otherwise, returns nullptr.
641   Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
642 
643   /// If a physical register, this returns the register that receives the
644   /// exception address on entry to an EH pad.
645   Register
getExceptionPointerRegister(const Constant * PersonalityFn)646   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
647     // FIXME: This is a guess. Has this been defined yet?
648     return AArch64::X0;
649   }
650 
651   /// If a physical register, this returns the register that receives the
652   /// exception typeid on entry to a landing pad.
653   Register
getExceptionSelectorRegister(const Constant * PersonalityFn)654   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
655     // FIXME: This is a guess. Has this been defined yet?
656     return AArch64::X1;
657   }
658 
659   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
660 
canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const SelectionDAG & DAG)661   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
662                         const SelectionDAG &DAG) const override {
663     // Do not merge to float value size (128 bytes) if no implicit
664     // float attribute is set.
665 
666     bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
667         Attribute::NoImplicitFloat);
668 
669     if (NoFloat)
670       return (MemVT.getSizeInBits() <= 64);
671     return true;
672   }
673 
isCheapToSpeculateCttz()674   bool isCheapToSpeculateCttz() const override {
675     return true;
676   }
677 
isCheapToSpeculateCtlz()678   bool isCheapToSpeculateCtlz() const override {
679     return true;
680   }
681 
682   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
683 
hasAndNotCompare(SDValue V)684   bool hasAndNotCompare(SDValue V) const override {
685     // We can use bics for any scalar.
686     return V.getValueType().isScalarInteger();
687   }
688 
hasAndNot(SDValue Y)689   bool hasAndNot(SDValue Y) const override {
690     EVT VT = Y.getValueType();
691 
692     if (!VT.isVector())
693       return hasAndNotCompare(Y);
694 
695     return VT.getSizeInBits() >= 64; // vector 'bic'
696   }
697 
698   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
699       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
700       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
701       SelectionDAG &DAG) const override;
702 
703   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
704 
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)705   bool shouldTransformSignedTruncationCheck(EVT XVT,
706                                             unsigned KeptBits) const override {
707     // For vectors, we don't have a preference..
708     if (XVT.isVector())
709       return false;
710 
711     auto VTIsOk = [](EVT VT) -> bool {
712       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
713              VT == MVT::i64;
714     };
715 
716     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
717     // XVT will be larger than KeptBitsVT.
718     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
719     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
720   }
721 
722   bool preferIncOfAddToSubOfNot(EVT VT) const override;
723 
hasBitPreservingFPLogic(EVT VT)724   bool hasBitPreservingFPLogic(EVT VT) const override {
725     // FIXME: Is this always true? It should be true for vectors at least.
726     return VT == MVT::f32 || VT == MVT::f64;
727   }
728 
supportSplitCSR(MachineFunction * MF)729   bool supportSplitCSR(MachineFunction *MF) const override {
730     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
731            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
732   }
733   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
734   void insertCopiesSplitCSR(
735       MachineBasicBlock *Entry,
736       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
737 
supportSwiftError()738   bool supportSwiftError() const override {
739     return true;
740   }
741 
742   /// Enable aggressive FMA fusion on targets that want it.
743   bool enableAggressiveFMAFusion(EVT VT) const override;
744 
745   /// Returns the size of the platform's va_list object.
746   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
747 
748   /// Returns true if \p VecTy is a legal interleaved access type. This
749   /// function checks the vector element type and the overall width of the
750   /// vector.
751   bool isLegalInterleavedAccessType(VectorType *VecTy,
752                                     const DataLayout &DL) const;
753 
754   /// Returns the number of interleaved accesses that will be generated when
755   /// lowering accesses of the given type.
756   unsigned getNumInterleavedAccesses(VectorType *VecTy,
757                                      const DataLayout &DL) const;
758 
759   MachineMemOperand::Flags getTargetMMOFlags(
760     const Instruction &I) const override;
761 
762   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
763                                                  CallingConv::ID CallConv,
764                                                  bool isVarArg) const override;
765   /// Used for exception handling on Win64.
766   bool needsFixedCatchObjects() const override;
767 
768   bool fallBackToDAGISel(const Instruction &Inst) const override;
769 
770   /// SVE code generation for fixed length vectors does not custom lower
771   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
772   /// merge. However, merging them creates a BUILD_VECTOR that is just as
773   /// illegal as the original, thus leading to an infinite legalisation loop.
774   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
775   /// vector types this override can be removed.
776   bool mergeStoresAfterLegalization(EVT VT) const override;
777 
778 private:
779   /// Keep a pointer to the AArch64Subtarget around so that we can
780   /// make the right decision when generating code for different targets.
781   const AArch64Subtarget *Subtarget;
782 
783   bool isExtFreeImpl(const Instruction *Ext) const override;
784 
785   void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
786   void addTypeForFixedLengthSVE(MVT VT);
787   void addDRTypeForNEON(MVT VT);
788   void addQRTypeForNEON(MVT VT);
789 
790   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
791                                bool isVarArg,
792                                const SmallVectorImpl<ISD::InputArg> &Ins,
793                                const SDLoc &DL, SelectionDAG &DAG,
794                                SmallVectorImpl<SDValue> &InVals) const override;
795 
796   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
797                     SmallVectorImpl<SDValue> &InVals) const override;
798 
799   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
800                           CallingConv::ID CallConv, bool isVarArg,
801                           const SmallVectorImpl<ISD::InputArg> &Ins,
802                           const SDLoc &DL, SelectionDAG &DAG,
803                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
804                           SDValue ThisVal) const;
805 
806   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
807 
808   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
809   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
810 
811   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
812 
813   bool isEligibleForTailCallOptimization(
814       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
815       const SmallVectorImpl<ISD::OutputArg> &Outs,
816       const SmallVectorImpl<SDValue> &OutVals,
817       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
818 
819   /// Finds the incoming stack arguments which overlap the given fixed stack
820   /// object and incorporates their load into the current chain. This prevents
821   /// an upcoming store from clobbering the stack argument before it's used.
822   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
823                               MachineFrameInfo &MFI, int ClobberedFI) const;
824 
825   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
826 
827   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
828                            SDValue &Chain) const;
829 
830   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
831                       bool isVarArg,
832                       const SmallVectorImpl<ISD::OutputArg> &Outs,
833                       LLVMContext &Context) const override;
834 
835   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
836                       const SmallVectorImpl<ISD::OutputArg> &Outs,
837                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
838                       SelectionDAG &DAG) const override;
839 
840   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
841                         unsigned Flag) const;
842   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
843                         unsigned Flag) const;
844   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
845                         unsigned Flag) const;
846   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
847                         unsigned Flag) const;
848   template <class NodeTy>
849   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
850   template <class NodeTy>
851   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
852   template <class NodeTy>
853   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
854   template <class NodeTy>
855   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
856   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
857   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
858   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
859   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
860   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
861   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
862                                const SDLoc &DL, SelectionDAG &DAG) const;
863   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
864                                  SelectionDAG &DAG) const;
865   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
866   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
867   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
868   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
869   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
870   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
871                          SDValue TVal, SDValue FVal, const SDLoc &dl,
872                          SelectionDAG &DAG) const;
873   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
874   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
875   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
876   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
877   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
878   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
879   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
880   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
881   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
882   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
883   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
884   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
885   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
886   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
887   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
888   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
889   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
890   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
891   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
892   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
893   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
894   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
895                               bool OverrideNEON = false) const;
896   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
897   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
898   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
899   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
900   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
901   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
902   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
903   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
904   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
905   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
906   SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
907                         RTLIB::Libcall Call) const;
908   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
909   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
910   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
911   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
912   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
913   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
914   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
915   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
916   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
917   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
918   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
919   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
920   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
921   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
922   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
923   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
924   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
925   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
926                                          SDValue &Size,
927                                          SelectionDAG &DAG) const;
928   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
929                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
930 
931   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
932                                                SelectionDAG &DAG) const;
933   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
934                                                SelectionDAG &DAG) const;
935   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
936   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
937   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
938   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
939                               SelectionDAG &DAG) const;
940   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
941   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
942   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
943   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
944                                               SelectionDAG &DAG) const;
945 
946   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
947                         SmallVectorImpl<SDNode *> &Created) const override;
948   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
949                           int &ExtraSteps, bool &UseOneConst,
950                           bool Reciprocal) const override;
951   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
952                            int &ExtraSteps) const override;
953   unsigned combineRepeatedFPDivisors() const override;
954 
955   ConstraintType getConstraintType(StringRef Constraint) const override;
956   Register getRegisterByName(const char* RegName, LLT VT,
957                              const MachineFunction &MF) const override;
958 
959   /// Examine constraint string and operand type and determine a weight value.
960   /// The operand object must already have been set up with the operand type.
961   ConstraintWeight
962   getSingleConstraintMatchWeight(AsmOperandInfo &info,
963                                  const char *constraint) const override;
964 
965   std::pair<unsigned, const TargetRegisterClass *>
966   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
967                                StringRef Constraint, MVT VT) const override;
968 
969   const char *LowerXConstraint(EVT ConstraintVT) const override;
970 
971   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
972                                     std::vector<SDValue> &Ops,
973                                     SelectionDAG &DAG) const override;
974 
getInlineAsmMemConstraint(StringRef ConstraintCode)975   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
976     if (ConstraintCode == "Q")
977       return InlineAsm::Constraint_Q;
978     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
979     //        followed by llvm_unreachable so we'll leave them unimplemented in
980     //        the backend for now.
981     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
982   }
983 
984   bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
985   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
986   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
987   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
988   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
989                               ISD::MemIndexedMode &AM, bool &IsInc,
990                               SelectionDAG &DAG) const;
991   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
992                                  ISD::MemIndexedMode &AM,
993                                  SelectionDAG &DAG) const override;
994   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
995                                   SDValue &Offset, ISD::MemIndexedMode &AM,
996                                   SelectionDAG &DAG) const override;
997 
998   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
999                           SelectionDAG &DAG) const override;
1000   void ReplaceExtractSubVectorResults(SDNode *N,
1001                                       SmallVectorImpl<SDValue> &Results,
1002                                       SelectionDAG &DAG) const;
1003 
1004   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1005 
1006   void finalizeLowering(MachineFunction &MF) const override;
1007 
1008   bool shouldLocalize(const MachineInstr &MI,
1009                       const TargetTransformInfo *TTI) const override;
1010 
1011   // Normally SVE is only used for byte size vectors that do not fit within a
1012   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1013   // used for 64bit and 128bit vectors as well.
1014   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1015 };
1016 
1017 namespace AArch64 {
1018 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1019                          const TargetLibraryInfo *libInfo);
1020 } // end namespace AArch64
1021 
1022 } // end namespace llvm
1023 
1024 #endif
1025