• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the ARM target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARM.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMTargetMachine.h"
17 #include "MCTargetDesc/ARMAddressingModes.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Target/TargetLowering.h"
35 #include "llvm/Target/TargetOptions.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "arm-isel"
40 
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43   cl::desc("Disable isel of shifter-op"),
44   cl::init(false));
45 
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
51 
52 enum AddrMode2Type {
53   AM2_BASE, // Simple AM2 (+-imm12)
54   AM2_SHOP  // Shifter-op AM2
55 };
56 
57 class ARMDAGToDAGISel : public SelectionDAGISel {
58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
59   /// make the right decision when generating code for different targets.
60   const ARMSubtarget *Subtarget;
61 
62 public:
ARMDAGToDAGISel(ARMBaseTargetMachine & tm,CodeGenOpt::Level OptLevel)63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
64       : SelectionDAGISel(tm, OptLevel) {}
65 
runOnMachineFunction(MachineFunction & MF)66   bool runOnMachineFunction(MachineFunction &MF) override {
67     // Reset the subtarget each time through.
68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
69     SelectionDAGISel::runOnMachineFunction(MF);
70     return true;
71   }
72 
getPassName() const73   const char *getPassName() const override {
74     return "ARM Instruction Selection";
75   }
76 
77   void PreprocessISelDAG() override;
78 
79   /// getI32Imm - Return a target constant of type i32 with the specified
80   /// value.
getI32Imm(unsigned Imm,const SDLoc & dl)81   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
82     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
83   }
84 
85   void Select(SDNode *N) override;
86 
87   bool hasNoVMLxHazardUse(SDNode *N) const;
88   bool isShifterOpProfitable(const SDValue &Shift,
89                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
90   bool SelectRegShifterOperand(SDValue N, SDValue &A,
91                                SDValue &B, SDValue &C,
92                                bool CheckProfitability = true);
93   bool SelectImmShifterOperand(SDValue N, SDValue &A,
94                                SDValue &B, bool CheckProfitability = true);
SelectShiftRegShifterOperand(SDValue N,SDValue & A,SDValue & B,SDValue & C)95   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
96                                     SDValue &B, SDValue &C) {
97     // Don't apply the profitability check
98     return SelectRegShifterOperand(N, A, B, C, false);
99   }
SelectShiftImmShifterOperand(SDValue N,SDValue & A,SDValue & B)100   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
101                                     SDValue &B) {
102     // Don't apply the profitability check
103     return SelectImmShifterOperand(N, A, B, false);
104   }
105 
106   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
107   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
108 
109   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
110                                       SDValue &Offset, SDValue &Opc);
SelectAddrMode2Base(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)111   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
112                            SDValue &Opc) {
113     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
114   }
115 
SelectAddrMode2ShOp(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)116   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
117                            SDValue &Opc) {
118     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
119   }
120 
SelectAddrMode2(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)121   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
122                        SDValue &Opc) {
123     SelectAddrMode2Worker(N, Base, Offset, Opc);
124 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
125     // This always matches one way or another.
126     return true;
127   }
128 
SelectCMOVPred(SDValue N,SDValue & Pred,SDValue & Reg)129   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
130     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
131     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
132     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
133     return true;
134   }
135 
136   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
137                              SDValue &Offset, SDValue &Opc);
138   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
139                              SDValue &Offset, SDValue &Opc);
140   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
141                              SDValue &Offset, SDValue &Opc);
142   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
143   bool SelectAddrMode3(SDValue N, SDValue &Base,
144                        SDValue &Offset, SDValue &Opc);
145   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
146                              SDValue &Offset, SDValue &Opc);
147   bool SelectAddrMode5(SDValue N, SDValue &Base,
148                        SDValue &Offset);
149   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
150   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
151 
152   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
153 
154   // Thumb Addressing Modes:
155   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
156   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
157                                 SDValue &OffImm);
158   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
159                                  SDValue &OffImm);
160   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
161                                  SDValue &OffImm);
162   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
163                                  SDValue &OffImm);
164   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
165 
166   // Thumb 2 Addressing Modes:
167   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
168   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
169                             SDValue &OffImm);
170   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
171                                  SDValue &OffImm);
172   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
173                              SDValue &OffReg, SDValue &ShImm);
174   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
175 
is_so_imm(unsigned Imm) const176   inline bool is_so_imm(unsigned Imm) const {
177     return ARM_AM::getSOImmVal(Imm) != -1;
178   }
179 
is_so_imm_not(unsigned Imm) const180   inline bool is_so_imm_not(unsigned Imm) const {
181     return ARM_AM::getSOImmVal(~Imm) != -1;
182   }
183 
is_t2_so_imm(unsigned Imm) const184   inline bool is_t2_so_imm(unsigned Imm) const {
185     return ARM_AM::getT2SOImmVal(Imm) != -1;
186   }
187 
is_t2_so_imm_not(unsigned Imm) const188   inline bool is_t2_so_imm_not(unsigned Imm) const {
189     return ARM_AM::getT2SOImmVal(~Imm) != -1;
190   }
191 
192   // Include the pieces autogenerated from the target description.
193 #include "ARMGenDAGISel.inc"
194 
195 private:
196   /// Indexed (pre/post inc/dec) load matching code for ARM.
197   bool tryARMIndexedLoad(SDNode *N);
198   bool tryT2IndexedLoad(SDNode *N);
199 
200   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
201   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
202   /// loads of D registers and even subregs and odd subregs of Q registers.
203   /// For NumVecs <= 2, QOpcodes1 is not used.
204   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
205                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
206                  const uint16_t *QOpcodes1);
207 
208   /// SelectVST - Select NEON store intrinsics.  NumVecs should
209   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
210   /// stores of D registers and even subregs and odd subregs of Q registers.
211   /// For NumVecs <= 2, QOpcodes1 is not used.
212   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
213                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
214                  const uint16_t *QOpcodes1);
215 
216   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
217   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
218   /// load/store of D registers and Q registers.
219   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
220                        unsigned NumVecs, const uint16_t *DOpcodes,
221                        const uint16_t *QOpcodes);
222 
223   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
224   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
225   /// for loading D registers.  (Q registers are not supported.)
226   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
227                     const uint16_t *Opcodes);
228 
229   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
230   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
231   /// generated to force the table registers to be consecutive.
232   void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
233 
234   /// Try to select SBFX/UBFX instructions for ARM.
235   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
236 
237   // Select special operations if node forms integer ABS pattern
238   bool tryABSOp(SDNode *N);
239 
240   bool tryReadRegister(SDNode *N);
241   bool tryWriteRegister(SDNode *N);
242 
243   bool tryInlineAsm(SDNode *N);
244 
245   void SelectConcatVector(SDNode *N);
246 
247   bool trySMLAWSMULW(SDNode *N);
248 
249   void SelectCMP_SWAP(SDNode *N);
250 
251   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
252   /// inline asm expressions.
253   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
254                                     std::vector<SDValue> &OutOps) override;
255 
256   // Form pairs of consecutive R, S, D, or Q registers.
257   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
258   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
259   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
260   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
261 
262   // Form sequences of 4 consecutive S, D, or Q registers.
263   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
264   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
265   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
266 
267   // Get the alignment operand for a NEON VLD or VST instruction.
268   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
269                         bool is64BitVector);
270 
271   /// Returns the number of instructions required to materialize the given
272   /// constant in a register, or 3 if a literal pool load is needed.
273   unsigned ConstantMaterializationCost(unsigned Val) const;
274 
275   /// Checks if N is a multiplication by a constant where we can extract out a
276   /// power of two from the constant so that it can be used in a shift, but only
277   /// if it simplifies the materialization of the constant. Returns true if it
278   /// is, and assigns to PowerOfTwo the power of two that should be extracted
279   /// out and to NewMulConst the new constant to be multiplied by.
280   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
281                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
282 
283   /// Replace N with M in CurDAG, in a way that also ensures that M gets
284   /// selected when N would have been selected.
285   void replaceDAGValue(const SDValue &N, SDValue M);
286 };
287 }
288 
289 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
290 /// operand. If so Imm will receive the 32-bit value.
isInt32Immediate(SDNode * N,unsigned & Imm)291 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
292   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
293     Imm = cast<ConstantSDNode>(N)->getZExtValue();
294     return true;
295   }
296   return false;
297 }
298 
299 // isInt32Immediate - This method tests to see if a constant operand.
300 // If so Imm will receive the 32 bit value.
isInt32Immediate(SDValue N,unsigned & Imm)301 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
302   return isInt32Immediate(N.getNode(), Imm);
303 }
304 
305 // isOpcWithIntImmediate - This method tests to see if the node is a specific
306 // opcode and that it has a immediate integer right operand.
307 // If so Imm will receive the 32 bit value.
isOpcWithIntImmediate(SDNode * N,unsigned Opc,unsigned & Imm)308 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
309   return N->getOpcode() == Opc &&
310          isInt32Immediate(N->getOperand(1).getNode(), Imm);
311 }
312 
313 /// \brief Check whether a particular node is a constant value representable as
314 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
315 ///
316 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
isScaledConstantInRange(SDValue Node,int Scale,int RangeMin,int RangeMax,int & ScaledConstant)317 static bool isScaledConstantInRange(SDValue Node, int Scale,
318                                     int RangeMin, int RangeMax,
319                                     int &ScaledConstant) {
320   assert(Scale > 0 && "Invalid scale!");
321 
322   // Check that this is a constant.
323   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
324   if (!C)
325     return false;
326 
327   ScaledConstant = (int) C->getZExtValue();
328   if ((ScaledConstant % Scale) != 0)
329     return false;
330 
331   ScaledConstant /= Scale;
332   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
333 }
334 
PreprocessISelDAG()335 void ARMDAGToDAGISel::PreprocessISelDAG() {
336   if (!Subtarget->hasV6T2Ops())
337     return;
338 
339   bool isThumb2 = Subtarget->isThumb();
340   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
341        E = CurDAG->allnodes_end(); I != E; ) {
342     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
343 
344     if (N->getOpcode() != ISD::ADD)
345       continue;
346 
347     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
348     // leading zeros, followed by consecutive set bits, followed by 1 or 2
349     // trailing zeros, e.g. 1020.
350     // Transform the expression to
351     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
352     // of trailing zeros of c2. The left shift would be folded as an shifter
353     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
354     // node (UBFX).
355 
356     SDValue N0 = N->getOperand(0);
357     SDValue N1 = N->getOperand(1);
358     unsigned And_imm = 0;
359     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
360       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
361         std::swap(N0, N1);
362     }
363     if (!And_imm)
364       continue;
365 
366     // Check if the AND mask is an immediate of the form: 000.....1111111100
367     unsigned TZ = countTrailingZeros(And_imm);
368     if (TZ != 1 && TZ != 2)
369       // Be conservative here. Shifter operands aren't always free. e.g. On
370       // Swift, left shifter operand of 1 / 2 for free but others are not.
371       // e.g.
372       //  ubfx   r3, r1, #16, #8
373       //  ldr.w  r3, [r0, r3, lsl #2]
374       // vs.
375       //  mov.w  r9, #1020
376       //  and.w  r2, r9, r1, lsr #14
377       //  ldr    r2, [r0, r2]
378       continue;
379     And_imm >>= TZ;
380     if (And_imm & (And_imm + 1))
381       continue;
382 
383     // Look for (and (srl X, c1), c2).
384     SDValue Srl = N1.getOperand(0);
385     unsigned Srl_imm = 0;
386     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
387         (Srl_imm <= 2))
388       continue;
389 
390     // Make sure first operand is not a shifter operand which would prevent
391     // folding of the left shift.
392     SDValue CPTmp0;
393     SDValue CPTmp1;
394     SDValue CPTmp2;
395     if (isThumb2) {
396       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
397         continue;
398     } else {
399       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
400           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
401         continue;
402     }
403 
404     // Now make the transformation.
405     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
406                           Srl.getOperand(0),
407                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
408                                               MVT::i32));
409     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
410                          Srl,
411                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
412     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
413                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
414     CurDAG->UpdateNodeOperands(N, N0, N1);
415   }
416 }
417 
418 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
419 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
420 /// least on current ARM implementations) which should be avoidded.
hasNoVMLxHazardUse(SDNode * N) const421 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
422   if (OptLevel == CodeGenOpt::None)
423     return true;
424 
425   if (!Subtarget->hasVMLxHazards())
426     return true;
427 
428   if (!N->hasOneUse())
429     return false;
430 
431   SDNode *Use = *N->use_begin();
432   if (Use->getOpcode() == ISD::CopyToReg)
433     return true;
434   if (Use->isMachineOpcode()) {
435     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
436         CurDAG->getSubtarget().getInstrInfo());
437 
438     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
439     if (MCID.mayStore())
440       return true;
441     unsigned Opcode = MCID.getOpcode();
442     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
443       return true;
444     // vmlx feeding into another vmlx. We actually want to unfold
445     // the use later in the MLxExpansion pass. e.g.
446     // vmla
447     // vmla (stall 8 cycles)
448     //
449     // vmul (5 cycles)
450     // vadd (5 cycles)
451     // vmla
452     // This adds up to about 18 - 19 cycles.
453     //
454     // vmla
455     // vmul (stall 4 cycles)
456     // vadd adds up to about 14 cycles.
457     return TII->isFpMLxInstruction(Opcode);
458   }
459 
460   return false;
461 }
462 
isShifterOpProfitable(const SDValue & Shift,ARM_AM::ShiftOpc ShOpcVal,unsigned ShAmt)463 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
464                                             ARM_AM::ShiftOpc ShOpcVal,
465                                             unsigned ShAmt) {
466   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
467     return true;
468   if (Shift.hasOneUse())
469     return true;
470   // R << 2 is free.
471   return ShOpcVal == ARM_AM::lsl &&
472          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
473 }
474 
ConstantMaterializationCost(unsigned Val) const475 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
476   if (Subtarget->isThumb()) {
477     if (Val <= 255) return 1;                               // MOV
478     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
479     if (Val <= 510) return 2;                               // MOV + ADDi8
480     if (~Val <= 255) return 2;                              // MOV + MVN
481     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
482   } else {
483     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
484     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
485     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
486     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
487   }
488   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
489   return 3; // Literal pool load
490 }
491 
canExtractShiftFromMul(const SDValue & N,unsigned MaxShift,unsigned & PowerOfTwo,SDValue & NewMulConst) const492 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
493                                              unsigned MaxShift,
494                                              unsigned &PowerOfTwo,
495                                              SDValue &NewMulConst) const {
496   assert(N.getOpcode() == ISD::MUL);
497   assert(MaxShift > 0);
498 
499   // If the multiply is used in more than one place then changing the constant
500   // will make other uses incorrect, so don't.
501   if (!N.hasOneUse()) return false;
502   // Check if the multiply is by a constant
503   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
504   if (!MulConst) return false;
505   // If the constant is used in more than one place then modifying it will mean
506   // we need to materialize two constants instead of one, which is a bad idea.
507   if (!MulConst->hasOneUse()) return false;
508   unsigned MulConstVal = MulConst->getZExtValue();
509   if (MulConstVal == 0) return false;
510 
511   // Find the largest power of 2 that MulConstVal is a multiple of
512   PowerOfTwo = MaxShift;
513   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
514     --PowerOfTwo;
515     if (PowerOfTwo == 0) return false;
516   }
517 
518   // Only optimise if the new cost is better
519   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
520   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
521   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
522   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
523   return NewCost < OldCost;
524 }
525 
replaceDAGValue(const SDValue & N,SDValue M)526 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
527   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
528   CurDAG->ReplaceAllUsesWith(N, M);
529 }
530 
SelectImmShifterOperand(SDValue N,SDValue & BaseReg,SDValue & Opc,bool CheckProfitability)531 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
532                                               SDValue &BaseReg,
533                                               SDValue &Opc,
534                                               bool CheckProfitability) {
535   if (DisableShifterOp)
536     return false;
537 
538   // If N is a multiply-by-constant and it's profitable to extract a shift and
539   // use it in a shifted operand do so.
540   if (N.getOpcode() == ISD::MUL) {
541     unsigned PowerOfTwo = 0;
542     SDValue NewMulConst;
543     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
544       HandleSDNode Handle(N);
545       replaceDAGValue(N.getOperand(1), NewMulConst);
546       BaseReg = Handle.getValue();
547       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
548                                                           PowerOfTwo),
549                                       SDLoc(N), MVT::i32);
550       return true;
551     }
552   }
553 
554   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
555 
556   // Don't match base register only case. That is matched to a separate
557   // lower complexity pattern with explicit register operand.
558   if (ShOpcVal == ARM_AM::no_shift) return false;
559 
560   BaseReg = N.getOperand(0);
561   unsigned ShImmVal = 0;
562   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
563   if (!RHS) return false;
564   ShImmVal = RHS->getZExtValue() & 31;
565   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
566                                   SDLoc(N), MVT::i32);
567   return true;
568 }
569 
SelectRegShifterOperand(SDValue N,SDValue & BaseReg,SDValue & ShReg,SDValue & Opc,bool CheckProfitability)570 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
571                                               SDValue &BaseReg,
572                                               SDValue &ShReg,
573                                               SDValue &Opc,
574                                               bool CheckProfitability) {
575   if (DisableShifterOp)
576     return false;
577 
578   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
579 
580   // Don't match base register only case. That is matched to a separate
581   // lower complexity pattern with explicit register operand.
582   if (ShOpcVal == ARM_AM::no_shift) return false;
583 
584   BaseReg = N.getOperand(0);
585   unsigned ShImmVal = 0;
586   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
587   if (RHS) return false;
588 
589   ShReg = N.getOperand(1);
590   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
591     return false;
592   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
593                                   SDLoc(N), MVT::i32);
594   return true;
595 }
596 
597 
SelectAddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)598 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
599                                           SDValue &Base,
600                                           SDValue &OffImm) {
601   // Match simple R + imm12 operands.
602 
603   // Base only.
604   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
605       !CurDAG->isBaseWithConstantOffset(N)) {
606     if (N.getOpcode() == ISD::FrameIndex) {
607       // Match frame index.
608       int FI = cast<FrameIndexSDNode>(N)->getIndex();
609       Base = CurDAG->getTargetFrameIndex(
610           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
611       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
612       return true;
613     }
614 
615     if (N.getOpcode() == ARMISD::Wrapper &&
616         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
617         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
618         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
619       Base = N.getOperand(0);
620     } else
621       Base = N;
622     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
623     return true;
624   }
625 
626   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
627     int RHSC = (int)RHS->getSExtValue();
628     if (N.getOpcode() == ISD::SUB)
629       RHSC = -RHSC;
630 
631     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
632       Base   = N.getOperand(0);
633       if (Base.getOpcode() == ISD::FrameIndex) {
634         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
635         Base = CurDAG->getTargetFrameIndex(
636             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
637       }
638       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
639       return true;
640     }
641   }
642 
643   // Base only.
644   Base = N;
645   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
646   return true;
647 }
648 
649 
650 
SelectLdStSOReg(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)651 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
652                                       SDValue &Opc) {
653   if (N.getOpcode() == ISD::MUL &&
654       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
655     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
656       // X * [3,5,9] -> X + X * [2,4,8] etc.
657       int RHSC = (int)RHS->getZExtValue();
658       if (RHSC & 1) {
659         RHSC = RHSC & ~1;
660         ARM_AM::AddrOpc AddSub = ARM_AM::add;
661         if (RHSC < 0) {
662           AddSub = ARM_AM::sub;
663           RHSC = - RHSC;
664         }
665         if (isPowerOf2_32(RHSC)) {
666           unsigned ShAmt = Log2_32(RHSC);
667           Base = Offset = N.getOperand(0);
668           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
669                                                             ARM_AM::lsl),
670                                           SDLoc(N), MVT::i32);
671           return true;
672         }
673       }
674     }
675   }
676 
677   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
678       // ISD::OR that is equivalent to an ISD::ADD.
679       !CurDAG->isBaseWithConstantOffset(N))
680     return false;
681 
682   // Leave simple R +/- imm12 operands for LDRi12
683   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
684     int RHSC;
685     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
686                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
687       return false;
688   }
689 
690   // Otherwise this is R +/- [possibly shifted] R.
691   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
692   ARM_AM::ShiftOpc ShOpcVal =
693     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
694   unsigned ShAmt = 0;
695 
696   Base   = N.getOperand(0);
697   Offset = N.getOperand(1);
698 
699   if (ShOpcVal != ARM_AM::no_shift) {
700     // Check to see if the RHS of the shift is a constant, if not, we can't fold
701     // it.
702     if (ConstantSDNode *Sh =
703            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
704       ShAmt = Sh->getZExtValue();
705       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
706         Offset = N.getOperand(1).getOperand(0);
707       else {
708         ShAmt = 0;
709         ShOpcVal = ARM_AM::no_shift;
710       }
711     } else {
712       ShOpcVal = ARM_AM::no_shift;
713     }
714   }
715 
716   // Try matching (R shl C) + (R).
717   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
718       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
719         N.getOperand(0).hasOneUse())) {
720     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
721     if (ShOpcVal != ARM_AM::no_shift) {
722       // Check to see if the RHS of the shift is a constant, if not, we can't
723       // fold it.
724       if (ConstantSDNode *Sh =
725           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
726         ShAmt = Sh->getZExtValue();
727         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
728           Offset = N.getOperand(0).getOperand(0);
729           Base = N.getOperand(1);
730         } else {
731           ShAmt = 0;
732           ShOpcVal = ARM_AM::no_shift;
733         }
734       } else {
735         ShOpcVal = ARM_AM::no_shift;
736       }
737     }
738   }
739 
740   // If Offset is a multiply-by-constant and it's profitable to extract a shift
741   // and use it in a shifted operand do so.
742   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
743     unsigned PowerOfTwo = 0;
744     SDValue NewMulConst;
745     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
746       replaceDAGValue(Offset.getOperand(1), NewMulConst);
747       ShAmt = PowerOfTwo;
748       ShOpcVal = ARM_AM::lsl;
749     }
750   }
751 
752   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
753                                   SDLoc(N), MVT::i32);
754   return true;
755 }
756 
757 
758 //-----
759 
SelectAddrMode2Worker(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)760 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
761                                                      SDValue &Base,
762                                                      SDValue &Offset,
763                                                      SDValue &Opc) {
764   if (N.getOpcode() == ISD::MUL &&
765       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
766     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
767       // X * [3,5,9] -> X + X * [2,4,8] etc.
768       int RHSC = (int)RHS->getZExtValue();
769       if (RHSC & 1) {
770         RHSC = RHSC & ~1;
771         ARM_AM::AddrOpc AddSub = ARM_AM::add;
772         if (RHSC < 0) {
773           AddSub = ARM_AM::sub;
774           RHSC = - RHSC;
775         }
776         if (isPowerOf2_32(RHSC)) {
777           unsigned ShAmt = Log2_32(RHSC);
778           Base = Offset = N.getOperand(0);
779           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
780                                                             ARM_AM::lsl),
781                                           SDLoc(N), MVT::i32);
782           return AM2_SHOP;
783         }
784       }
785     }
786   }
787 
788   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
789       // ISD::OR that is equivalent to an ADD.
790       !CurDAG->isBaseWithConstantOffset(N)) {
791     Base = N;
792     if (N.getOpcode() == ISD::FrameIndex) {
793       int FI = cast<FrameIndexSDNode>(N)->getIndex();
794       Base = CurDAG->getTargetFrameIndex(
795           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
796     } else if (N.getOpcode() == ARMISD::Wrapper &&
797                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
798                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
799                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
800       Base = N.getOperand(0);
801     }
802     Offset = CurDAG->getRegister(0, MVT::i32);
803     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
804                                                       ARM_AM::no_shift),
805                                     SDLoc(N), MVT::i32);
806     return AM2_BASE;
807   }
808 
809   // Match simple R +/- imm12 operands.
810   if (N.getOpcode() != ISD::SUB) {
811     int RHSC;
812     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
813                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
814       Base = N.getOperand(0);
815       if (Base.getOpcode() == ISD::FrameIndex) {
816         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
817         Base = CurDAG->getTargetFrameIndex(
818             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
819       }
820       Offset = CurDAG->getRegister(0, MVT::i32);
821 
822       ARM_AM::AddrOpc AddSub = ARM_AM::add;
823       if (RHSC < 0) {
824         AddSub = ARM_AM::sub;
825         RHSC = - RHSC;
826       }
827       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
828                                                         ARM_AM::no_shift),
829                                       SDLoc(N), MVT::i32);
830       return AM2_BASE;
831     }
832   }
833 
834   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
835     // Compute R +/- (R << N) and reuse it.
836     Base = N;
837     Offset = CurDAG->getRegister(0, MVT::i32);
838     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
839                                                       ARM_AM::no_shift),
840                                     SDLoc(N), MVT::i32);
841     return AM2_BASE;
842   }
843 
844   // Otherwise this is R +/- [possibly shifted] R.
845   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
846   ARM_AM::ShiftOpc ShOpcVal =
847     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
848   unsigned ShAmt = 0;
849 
850   Base   = N.getOperand(0);
851   Offset = N.getOperand(1);
852 
853   if (ShOpcVal != ARM_AM::no_shift) {
854     // Check to see if the RHS of the shift is a constant, if not, we can't fold
855     // it.
856     if (ConstantSDNode *Sh =
857            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
858       ShAmt = Sh->getZExtValue();
859       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
860         Offset = N.getOperand(1).getOperand(0);
861       else {
862         ShAmt = 0;
863         ShOpcVal = ARM_AM::no_shift;
864       }
865     } else {
866       ShOpcVal = ARM_AM::no_shift;
867     }
868   }
869 
870   // Try matching (R shl C) + (R).
871   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
872       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
873         N.getOperand(0).hasOneUse())) {
874     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
875     if (ShOpcVal != ARM_AM::no_shift) {
876       // Check to see if the RHS of the shift is a constant, if not, we can't
877       // fold it.
878       if (ConstantSDNode *Sh =
879           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
880         ShAmt = Sh->getZExtValue();
881         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
882           Offset = N.getOperand(0).getOperand(0);
883           Base = N.getOperand(1);
884         } else {
885           ShAmt = 0;
886           ShOpcVal = ARM_AM::no_shift;
887         }
888       } else {
889         ShOpcVal = ARM_AM::no_shift;
890       }
891     }
892   }
893 
894   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
895                                   SDLoc(N), MVT::i32);
896   return AM2_SHOP;
897 }
898 
SelectAddrMode2OffsetReg(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)899 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
900                                             SDValue &Offset, SDValue &Opc) {
901   unsigned Opcode = Op->getOpcode();
902   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
903     ? cast<LoadSDNode>(Op)->getAddressingMode()
904     : cast<StoreSDNode>(Op)->getAddressingMode();
905   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
906     ? ARM_AM::add : ARM_AM::sub;
907   int Val;
908   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
909     return false;
910 
911   Offset = N;
912   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
913   unsigned ShAmt = 0;
914   if (ShOpcVal != ARM_AM::no_shift) {
915     // Check to see if the RHS of the shift is a constant, if not, we can't fold
916     // it.
917     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
918       ShAmt = Sh->getZExtValue();
919       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
920         Offset = N.getOperand(0);
921       else {
922         ShAmt = 0;
923         ShOpcVal = ARM_AM::no_shift;
924       }
925     } else {
926       ShOpcVal = ARM_AM::no_shift;
927     }
928   }
929 
930   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
931                                   SDLoc(N), MVT::i32);
932   return true;
933 }
934 
SelectAddrMode2OffsetImmPre(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)935 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
936                                             SDValue &Offset, SDValue &Opc) {
937   unsigned Opcode = Op->getOpcode();
938   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
939     ? cast<LoadSDNode>(Op)->getAddressingMode()
940     : cast<StoreSDNode>(Op)->getAddressingMode();
941   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
942     ? ARM_AM::add : ARM_AM::sub;
943   int Val;
944   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
945     if (AddSub == ARM_AM::sub) Val *= -1;
946     Offset = CurDAG->getRegister(0, MVT::i32);
947     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
948     return true;
949   }
950 
951   return false;
952 }
953 
954 
SelectAddrMode2OffsetImm(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)955 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
956                                             SDValue &Offset, SDValue &Opc) {
957   unsigned Opcode = Op->getOpcode();
958   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
959     ? cast<LoadSDNode>(Op)->getAddressingMode()
960     : cast<StoreSDNode>(Op)->getAddressingMode();
961   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
962     ? ARM_AM::add : ARM_AM::sub;
963   int Val;
964   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
965     Offset = CurDAG->getRegister(0, MVT::i32);
966     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
967                                                       ARM_AM::no_shift),
968                                     SDLoc(Op), MVT::i32);
969     return true;
970   }
971 
972   return false;
973 }
974 
SelectAddrOffsetNone(SDValue N,SDValue & Base)975 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
976   Base = N;
977   return true;
978 }
979 
SelectAddrMode3(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)980 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
981                                       SDValue &Base, SDValue &Offset,
982                                       SDValue &Opc) {
983   if (N.getOpcode() == ISD::SUB) {
984     // X - C  is canonicalize to X + -C, no need to handle it here.
985     Base = N.getOperand(0);
986     Offset = N.getOperand(1);
987     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
988                                     MVT::i32);
989     return true;
990   }
991 
992   if (!CurDAG->isBaseWithConstantOffset(N)) {
993     Base = N;
994     if (N.getOpcode() == ISD::FrameIndex) {
995       int FI = cast<FrameIndexSDNode>(N)->getIndex();
996       Base = CurDAG->getTargetFrameIndex(
997           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
998     }
999     Offset = CurDAG->getRegister(0, MVT::i32);
1000     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1001                                     MVT::i32);
1002     return true;
1003   }
1004 
1005   // If the RHS is +/- imm8, fold into addr mode.
1006   int RHSC;
1007   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1008                               -256 + 1, 256, RHSC)) { // 8 bits.
1009     Base = N.getOperand(0);
1010     if (Base.getOpcode() == ISD::FrameIndex) {
1011       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1012       Base = CurDAG->getTargetFrameIndex(
1013           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1014     }
1015     Offset = CurDAG->getRegister(0, MVT::i32);
1016 
1017     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1018     if (RHSC < 0) {
1019       AddSub = ARM_AM::sub;
1020       RHSC = -RHSC;
1021     }
1022     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1023                                     MVT::i32);
1024     return true;
1025   }
1026 
1027   Base = N.getOperand(0);
1028   Offset = N.getOperand(1);
1029   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1030                                   MVT::i32);
1031   return true;
1032 }
1033 
SelectAddrMode3Offset(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)1034 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1035                                             SDValue &Offset, SDValue &Opc) {
1036   unsigned Opcode = Op->getOpcode();
1037   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1038     ? cast<LoadSDNode>(Op)->getAddressingMode()
1039     : cast<StoreSDNode>(Op)->getAddressingMode();
1040   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1041     ? ARM_AM::add : ARM_AM::sub;
1042   int Val;
1043   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1044     Offset = CurDAG->getRegister(0, MVT::i32);
1045     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1046                                     MVT::i32);
1047     return true;
1048   }
1049 
1050   Offset = N;
1051   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1052                                   MVT::i32);
1053   return true;
1054 }
1055 
SelectAddrMode5(SDValue N,SDValue & Base,SDValue & Offset)1056 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1057                                       SDValue &Base, SDValue &Offset) {
1058   if (!CurDAG->isBaseWithConstantOffset(N)) {
1059     Base = N;
1060     if (N.getOpcode() == ISD::FrameIndex) {
1061       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1062       Base = CurDAG->getTargetFrameIndex(
1063           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1064     } else if (N.getOpcode() == ARMISD::Wrapper &&
1065                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1066                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1067                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1068       Base = N.getOperand(0);
1069     }
1070     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1071                                        SDLoc(N), MVT::i32);
1072     return true;
1073   }
1074 
1075   // If the RHS is +/- imm8, fold into addr mode.
1076   int RHSC;
1077   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1078                               -256 + 1, 256, RHSC)) {
1079     Base = N.getOperand(0);
1080     if (Base.getOpcode() == ISD::FrameIndex) {
1081       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1082       Base = CurDAG->getTargetFrameIndex(
1083           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1084     }
1085 
1086     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1087     if (RHSC < 0) {
1088       AddSub = ARM_AM::sub;
1089       RHSC = -RHSC;
1090     }
1091     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1092                                        SDLoc(N), MVT::i32);
1093     return true;
1094   }
1095 
1096   Base = N;
1097   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1098                                      SDLoc(N), MVT::i32);
1099   return true;
1100 }
1101 
SelectAddrMode6(SDNode * Parent,SDValue N,SDValue & Addr,SDValue & Align)1102 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1103                                       SDValue &Align) {
1104   Addr = N;
1105 
1106   unsigned Alignment = 0;
1107 
1108   MemSDNode *MemN = cast<MemSDNode>(Parent);
1109 
1110   if (isa<LSBaseSDNode>(MemN) ||
1111       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1112         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1113        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1114     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1115     // The maximum alignment is equal to the memory size being referenced.
1116     unsigned MMOAlign = MemN->getAlignment();
1117     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1118     if (MMOAlign >= MemSize && MemSize > 1)
1119       Alignment = MemSize;
1120   } else {
1121     // All other uses of addrmode6 are for intrinsics.  For now just record
1122     // the raw alignment value; it will be refined later based on the legal
1123     // alignment operands for the intrinsic.
1124     Alignment = MemN->getAlignment();
1125   }
1126 
1127   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1128   return true;
1129 }
1130 
SelectAddrMode6Offset(SDNode * Op,SDValue N,SDValue & Offset)1131 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1132                                             SDValue &Offset) {
1133   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1134   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1135   if (AM != ISD::POST_INC)
1136     return false;
1137   Offset = N;
1138   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1139     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1140       Offset = CurDAG->getRegister(0, MVT::i32);
1141   }
1142   return true;
1143 }
1144 
SelectAddrModePC(SDValue N,SDValue & Offset,SDValue & Label)1145 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1146                                        SDValue &Offset, SDValue &Label) {
1147   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1148     Offset = N.getOperand(0);
1149     SDValue N1 = N.getOperand(1);
1150     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1151                                       SDLoc(N), MVT::i32);
1152     return true;
1153   }
1154 
1155   return false;
1156 }
1157 
1158 
1159 //===----------------------------------------------------------------------===//
1160 //                         Thumb Addressing Modes
1161 //===----------------------------------------------------------------------===//
1162 
SelectThumbAddrModeRR(SDValue N,SDValue & Base,SDValue & Offset)1163 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1164                                             SDValue &Base, SDValue &Offset){
1165   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1166     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1167     if (!NC || !NC->isNullValue())
1168       return false;
1169 
1170     Base = Offset = N;
1171     return true;
1172   }
1173 
1174   Base = N.getOperand(0);
1175   Offset = N.getOperand(1);
1176   return true;
1177 }
1178 
1179 bool
SelectThumbAddrModeImm5S(SDValue N,unsigned Scale,SDValue & Base,SDValue & OffImm)1180 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1181                                           SDValue &Base, SDValue &OffImm) {
1182   if (!CurDAG->isBaseWithConstantOffset(N)) {
1183     if (N.getOpcode() == ISD::ADD) {
1184       return false; // We want to select register offset instead
1185     } else if (N.getOpcode() == ARMISD::Wrapper &&
1186         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1187         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1188         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1189       Base = N.getOperand(0);
1190     } else {
1191       Base = N;
1192     }
1193 
1194     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1195     return true;
1196   }
1197 
1198   // If the RHS is + imm5 * scale, fold into addr mode.
1199   int RHSC;
1200   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1201     Base = N.getOperand(0);
1202     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1203     return true;
1204   }
1205 
1206   // Offset is too large, so use register offset instead.
1207   return false;
1208 }
1209 
1210 bool
SelectThumbAddrModeImm5S4(SDValue N,SDValue & Base,SDValue & OffImm)1211 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1212                                            SDValue &OffImm) {
1213   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1214 }
1215 
1216 bool
SelectThumbAddrModeImm5S2(SDValue N,SDValue & Base,SDValue & OffImm)1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1218                                            SDValue &OffImm) {
1219   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1220 }
1221 
1222 bool
SelectThumbAddrModeImm5S1(SDValue N,SDValue & Base,SDValue & OffImm)1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1224                                            SDValue &OffImm) {
1225   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1226 }
1227 
SelectThumbAddrModeSP(SDValue N,SDValue & Base,SDValue & OffImm)1228 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1229                                             SDValue &Base, SDValue &OffImm) {
1230   if (N.getOpcode() == ISD::FrameIndex) {
1231     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1232     // Only multiples of 4 are allowed for the offset, so the frame object
1233     // alignment must be at least 4.
1234     MachineFrameInfo *MFI = MF->getFrameInfo();
1235     if (MFI->getObjectAlignment(FI) < 4)
1236       MFI->setObjectAlignment(FI, 4);
1237     Base = CurDAG->getTargetFrameIndex(
1238         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1239     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1240     return true;
1241   }
1242 
1243   if (!CurDAG->isBaseWithConstantOffset(N))
1244     return false;
1245 
1246   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1247   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1248       (LHSR && LHSR->getReg() == ARM::SP)) {
1249     // If the RHS is + imm8 * scale, fold into addr mode.
1250     int RHSC;
1251     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1252       Base = N.getOperand(0);
1253       if (Base.getOpcode() == ISD::FrameIndex) {
1254         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1255         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1256         // indexed by the LHS must be 4-byte aligned.
1257         MachineFrameInfo *MFI = MF->getFrameInfo();
1258         if (MFI->getObjectAlignment(FI) < 4)
1259           MFI->setObjectAlignment(FI, 4);
1260         Base = CurDAG->getTargetFrameIndex(
1261             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1262       }
1263       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1264       return true;
1265     }
1266   }
1267 
1268   return false;
1269 }
1270 
1271 
1272 //===----------------------------------------------------------------------===//
1273 //                        Thumb 2 Addressing Modes
1274 //===----------------------------------------------------------------------===//
1275 
1276 
SelectT2AddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1278                                             SDValue &Base, SDValue &OffImm) {
1279   // Match simple R + imm12 operands.
1280 
1281   // Base only.
1282   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1283       !CurDAG->isBaseWithConstantOffset(N)) {
1284     if (N.getOpcode() == ISD::FrameIndex) {
1285       // Match frame index.
1286       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1287       Base = CurDAG->getTargetFrameIndex(
1288           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1289       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1290       return true;
1291     }
1292 
1293     if (N.getOpcode() == ARMISD::Wrapper &&
1294         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1295         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1296         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1297       Base = N.getOperand(0);
1298       if (Base.getOpcode() == ISD::TargetConstantPool)
1299         return false;  // We want to select t2LDRpci instead.
1300     } else
1301       Base = N;
1302     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1303     return true;
1304   }
1305 
1306   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1307     if (SelectT2AddrModeImm8(N, Base, OffImm))
1308       // Let t2LDRi8 handle (R - imm8).
1309       return false;
1310 
1311     int RHSC = (int)RHS->getZExtValue();
1312     if (N.getOpcode() == ISD::SUB)
1313       RHSC = -RHSC;
1314 
1315     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1316       Base   = N.getOperand(0);
1317       if (Base.getOpcode() == ISD::FrameIndex) {
1318         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1319         Base = CurDAG->getTargetFrameIndex(
1320             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1321       }
1322       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1323       return true;
1324     }
1325   }
1326 
1327   // Base only.
1328   Base = N;
1329   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1330   return true;
1331 }
1332 
SelectT2AddrModeImm8(SDValue N,SDValue & Base,SDValue & OffImm)1333 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1334                                            SDValue &Base, SDValue &OffImm) {
1335   // Match simple R - imm8 operands.
1336   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1337       !CurDAG->isBaseWithConstantOffset(N))
1338     return false;
1339 
1340   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1341     int RHSC = (int)RHS->getSExtValue();
1342     if (N.getOpcode() == ISD::SUB)
1343       RHSC = -RHSC;
1344 
1345     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1346       Base = N.getOperand(0);
1347       if (Base.getOpcode() == ISD::FrameIndex) {
1348         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1349         Base = CurDAG->getTargetFrameIndex(
1350             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1351       }
1352       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1353       return true;
1354     }
1355   }
1356 
1357   return false;
1358 }
1359 
SelectT2AddrModeImm8Offset(SDNode * Op,SDValue N,SDValue & OffImm)1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1361                                                  SDValue &OffImm){
1362   unsigned Opcode = Op->getOpcode();
1363   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1364     ? cast<LoadSDNode>(Op)->getAddressingMode()
1365     : cast<StoreSDNode>(Op)->getAddressingMode();
1366   int RHSC;
1367   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1368     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1369       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1370       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1371     return true;
1372   }
1373 
1374   return false;
1375 }
1376 
SelectT2AddrModeSoReg(SDValue N,SDValue & Base,SDValue & OffReg,SDValue & ShImm)1377 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1378                                             SDValue &Base,
1379                                             SDValue &OffReg, SDValue &ShImm) {
1380   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1381   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1382     return false;
1383 
1384   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1385   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1386     int RHSC = (int)RHS->getZExtValue();
1387     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1388       return false;
1389     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1390       return false;
1391   }
1392 
1393   // Look for (R + R) or (R + (R << [1,2,3])).
1394   unsigned ShAmt = 0;
1395   Base   = N.getOperand(0);
1396   OffReg = N.getOperand(1);
1397 
1398   // Swap if it is ((R << c) + R).
1399   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1400   if (ShOpcVal != ARM_AM::lsl) {
1401     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1402     if (ShOpcVal == ARM_AM::lsl)
1403       std::swap(Base, OffReg);
1404   }
1405 
1406   if (ShOpcVal == ARM_AM::lsl) {
1407     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1408     // it.
1409     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1410       ShAmt = Sh->getZExtValue();
1411       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1412         OffReg = OffReg.getOperand(0);
1413       else {
1414         ShAmt = 0;
1415       }
1416     }
1417   }
1418 
1419   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1420   // and use it in a shifted operand do so.
1421   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1422     unsigned PowerOfTwo = 0;
1423     SDValue NewMulConst;
1424     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1425       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1426       ShAmt = PowerOfTwo;
1427     }
1428   }
1429 
1430   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1431 
1432   return true;
1433 }
1434 
SelectT2AddrModeExclusive(SDValue N,SDValue & Base,SDValue & OffImm)1435 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1436                                                 SDValue &OffImm) {
1437   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1438   // instructions.
1439   Base = N;
1440   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1441 
1442   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1443     return true;
1444 
1445   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1446   if (!RHS)
1447     return true;
1448 
1449   uint32_t RHSC = (int)RHS->getZExtValue();
1450   if (RHSC > 1020 || RHSC % 4 != 0)
1451     return true;
1452 
1453   Base = N.getOperand(0);
1454   if (Base.getOpcode() == ISD::FrameIndex) {
1455     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1456     Base = CurDAG->getTargetFrameIndex(
1457         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1458   }
1459 
1460   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1461   return true;
1462 }
1463 
1464 //===--------------------------------------------------------------------===//
1465 
1466 /// getAL - Returns a ARMCC::AL immediate node.
getAL(SelectionDAG * CurDAG,const SDLoc & dl)1467 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1468   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1469 }
1470 
tryARMIndexedLoad(SDNode * N)1471 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1472   LoadSDNode *LD = cast<LoadSDNode>(N);
1473   ISD::MemIndexedMode AM = LD->getAddressingMode();
1474   if (AM == ISD::UNINDEXED)
1475     return false;
1476 
1477   EVT LoadedVT = LD->getMemoryVT();
1478   SDValue Offset, AMOpc;
1479   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1480   unsigned Opcode = 0;
1481   bool Match = false;
1482   if (LoadedVT == MVT::i32 && isPre &&
1483       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1484     Opcode = ARM::LDR_PRE_IMM;
1485     Match = true;
1486   } else if (LoadedVT == MVT::i32 && !isPre &&
1487       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1488     Opcode = ARM::LDR_POST_IMM;
1489     Match = true;
1490   } else if (LoadedVT == MVT::i32 &&
1491       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1492     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1493     Match = true;
1494 
1495   } else if (LoadedVT == MVT::i16 &&
1496              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1497     Match = true;
1498     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1499       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1500       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1501   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1502     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1503       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1504         Match = true;
1505         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1506       }
1507     } else {
1508       if (isPre &&
1509           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1510         Match = true;
1511         Opcode = ARM::LDRB_PRE_IMM;
1512       } else if (!isPre &&
1513                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1514         Match = true;
1515         Opcode = ARM::LDRB_POST_IMM;
1516       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1517         Match = true;
1518         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1519       }
1520     }
1521   }
1522 
1523   if (Match) {
1524     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1525       SDValue Chain = LD->getChain();
1526       SDValue Base = LD->getBasePtr();
1527       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1528                        CurDAG->getRegister(0, MVT::i32), Chain };
1529       ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1530                                             MVT::i32, MVT::Other, Ops));
1531       return true;
1532     } else {
1533       SDValue Chain = LD->getChain();
1534       SDValue Base = LD->getBasePtr();
1535       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1536                        CurDAG->getRegister(0, MVT::i32), Chain };
1537       ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1538                                             MVT::i32, MVT::Other, Ops));
1539       return true;
1540     }
1541   }
1542 
1543   return false;
1544 }
1545 
tryT2IndexedLoad(SDNode * N)1546 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1547   LoadSDNode *LD = cast<LoadSDNode>(N);
1548   ISD::MemIndexedMode AM = LD->getAddressingMode();
1549   if (AM == ISD::UNINDEXED)
1550     return false;
1551 
1552   EVT LoadedVT = LD->getMemoryVT();
1553   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1554   SDValue Offset;
1555   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1556   unsigned Opcode = 0;
1557   bool Match = false;
1558   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1559     switch (LoadedVT.getSimpleVT().SimpleTy) {
1560     case MVT::i32:
1561       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1562       break;
1563     case MVT::i16:
1564       if (isSExtLd)
1565         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1566       else
1567         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1568       break;
1569     case MVT::i8:
1570     case MVT::i1:
1571       if (isSExtLd)
1572         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1573       else
1574         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1575       break;
1576     default:
1577       return false;
1578     }
1579     Match = true;
1580   }
1581 
1582   if (Match) {
1583     SDValue Chain = LD->getChain();
1584     SDValue Base = LD->getBasePtr();
1585     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1586                      CurDAG->getRegister(0, MVT::i32), Chain };
1587     ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1588                                           MVT::Other, Ops));
1589     return true;
1590   }
1591 
1592   return false;
1593 }
1594 
1595 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
createGPRPairNode(EVT VT,SDValue V0,SDValue V1)1596 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1597   SDLoc dl(V0.getNode());
1598   SDValue RegClass =
1599     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1600   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1601   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1602   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1603   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1604 }
1605 
1606 /// \brief Form a D register from a pair of S registers.
createSRegPairNode(EVT VT,SDValue V0,SDValue V1)1607 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1608   SDLoc dl(V0.getNode());
1609   SDValue RegClass =
1610     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1611   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1612   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1613   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1614   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1615 }
1616 
1617 /// \brief Form a quad register from a pair of D registers.
createDRegPairNode(EVT VT,SDValue V0,SDValue V1)1618 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1619   SDLoc dl(V0.getNode());
1620   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1621                                                MVT::i32);
1622   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1623   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1624   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1625   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1626 }
1627 
1628 /// \brief Form 4 consecutive D registers from a pair of Q registers.
createQRegPairNode(EVT VT,SDValue V0,SDValue V1)1629 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1630   SDLoc dl(V0.getNode());
1631   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1632                                                MVT::i32);
1633   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1634   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1635   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1636   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1637 }
1638 
1639 /// \brief Form 4 consecutive S registers.
createQuadSRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1640 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1641                                    SDValue V2, SDValue V3) {
1642   SDLoc dl(V0.getNode());
1643   SDValue RegClass =
1644     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1645   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1646   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1647   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1648   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1649   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1650                                     V2, SubReg2, V3, SubReg3 };
1651   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1652 }
1653 
1654 /// \brief Form 4 consecutive D registers.
createQuadDRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1655 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1656                                    SDValue V2, SDValue V3) {
1657   SDLoc dl(V0.getNode());
1658   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1659                                                MVT::i32);
1660   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1661   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1662   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1663   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1664   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1665                                     V2, SubReg2, V3, SubReg3 };
1666   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1667 }
1668 
1669 /// \brief Form 4 consecutive Q registers.
createQuadQRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1670 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1671                                    SDValue V2, SDValue V3) {
1672   SDLoc dl(V0.getNode());
1673   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1674                                                MVT::i32);
1675   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1676   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1677   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1678   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1679   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1680                                     V2, SubReg2, V3, SubReg3 };
1681   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1682 }
1683 
1684 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1685 /// of a NEON VLD or VST instruction.  The supported values depend on the
1686 /// number of registers being loaded.
GetVLDSTAlign(SDValue Align,const SDLoc & dl,unsigned NumVecs,bool is64BitVector)1687 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1688                                        unsigned NumVecs, bool is64BitVector) {
1689   unsigned NumRegs = NumVecs;
1690   if (!is64BitVector && NumVecs < 3)
1691     NumRegs *= 2;
1692 
1693   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1694   if (Alignment >= 32 && NumRegs == 4)
1695     Alignment = 32;
1696   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1697     Alignment = 16;
1698   else if (Alignment >= 8)
1699     Alignment = 8;
1700   else
1701     Alignment = 0;
1702 
1703   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1704 }
1705 
isVLDfixed(unsigned Opc)1706 static bool isVLDfixed(unsigned Opc)
1707 {
1708   switch (Opc) {
1709   default: return false;
1710   case ARM::VLD1d8wb_fixed : return true;
1711   case ARM::VLD1d16wb_fixed : return true;
1712   case ARM::VLD1d64Qwb_fixed : return true;
1713   case ARM::VLD1d32wb_fixed : return true;
1714   case ARM::VLD1d64wb_fixed : return true;
1715   case ARM::VLD1d64TPseudoWB_fixed : return true;
1716   case ARM::VLD1d64QPseudoWB_fixed : return true;
1717   case ARM::VLD1q8wb_fixed : return true;
1718   case ARM::VLD1q16wb_fixed : return true;
1719   case ARM::VLD1q32wb_fixed : return true;
1720   case ARM::VLD1q64wb_fixed : return true;
1721   case ARM::VLD2d8wb_fixed : return true;
1722   case ARM::VLD2d16wb_fixed : return true;
1723   case ARM::VLD2d32wb_fixed : return true;
1724   case ARM::VLD2q8PseudoWB_fixed : return true;
1725   case ARM::VLD2q16PseudoWB_fixed : return true;
1726   case ARM::VLD2q32PseudoWB_fixed : return true;
1727   case ARM::VLD2DUPd8wb_fixed : return true;
1728   case ARM::VLD2DUPd16wb_fixed : return true;
1729   case ARM::VLD2DUPd32wb_fixed : return true;
1730   }
1731 }
1732 
isVSTfixed(unsigned Opc)1733 static bool isVSTfixed(unsigned Opc)
1734 {
1735   switch (Opc) {
1736   default: return false;
1737   case ARM::VST1d8wb_fixed : return true;
1738   case ARM::VST1d16wb_fixed : return true;
1739   case ARM::VST1d32wb_fixed : return true;
1740   case ARM::VST1d64wb_fixed : return true;
1741   case ARM::VST1q8wb_fixed : return true;
1742   case ARM::VST1q16wb_fixed : return true;
1743   case ARM::VST1q32wb_fixed : return true;
1744   case ARM::VST1q64wb_fixed : return true;
1745   case ARM::VST1d64TPseudoWB_fixed : return true;
1746   case ARM::VST1d64QPseudoWB_fixed : return true;
1747   case ARM::VST2d8wb_fixed : return true;
1748   case ARM::VST2d16wb_fixed : return true;
1749   case ARM::VST2d32wb_fixed : return true;
1750   case ARM::VST2q8PseudoWB_fixed : return true;
1751   case ARM::VST2q16PseudoWB_fixed : return true;
1752   case ARM::VST2q32PseudoWB_fixed : return true;
1753   }
1754 }
1755 
1756 // Get the register stride update opcode of a VLD/VST instruction that
1757 // is otherwise equivalent to the given fixed stride updating instruction.
getVLDSTRegisterUpdateOpcode(unsigned Opc)1758 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1759   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1760     && "Incorrect fixed stride updating instruction.");
1761   switch (Opc) {
1762   default: break;
1763   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1764   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1765   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1766   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1767   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1768   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1769   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1770   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1771   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1772   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1773   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1774   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1775 
1776   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1777   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1778   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1779   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1780   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1781   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1782   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1783   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1784   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1785   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1786 
1787   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1788   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1789   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1790   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1791   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1792   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1793 
1794   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1795   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1796   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1797   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1798   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1799   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1800 
1801   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1802   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1803   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1804   }
1805   return Opc; // If not one we handle, return it unchanged.
1806 }
1807 
SelectVLD(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)1808 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1809                                 const uint16_t *DOpcodes,
1810                                 const uint16_t *QOpcodes0,
1811                                 const uint16_t *QOpcodes1) {
1812   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1813   SDLoc dl(N);
1814 
1815   SDValue MemAddr, Align;
1816   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1817   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1818     return;
1819 
1820   SDValue Chain = N->getOperand(0);
1821   EVT VT = N->getValueType(0);
1822   bool is64BitVector = VT.is64BitVector();
1823   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1824 
1825   unsigned OpcodeIndex;
1826   switch (VT.getSimpleVT().SimpleTy) {
1827   default: llvm_unreachable("unhandled vld type");
1828     // Double-register operations:
1829   case MVT::v8i8:  OpcodeIndex = 0; break;
1830   case MVT::v4i16: OpcodeIndex = 1; break;
1831   case MVT::v2f32:
1832   case MVT::v2i32: OpcodeIndex = 2; break;
1833   case MVT::v1i64: OpcodeIndex = 3; break;
1834     // Quad-register operations:
1835   case MVT::v16i8: OpcodeIndex = 0; break;
1836   case MVT::v8i16: OpcodeIndex = 1; break;
1837   case MVT::v4f32:
1838   case MVT::v4i32: OpcodeIndex = 2; break;
1839   case MVT::v2f64:
1840   case MVT::v2i64: OpcodeIndex = 3;
1841     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1842     break;
1843   }
1844 
1845   EVT ResTy;
1846   if (NumVecs == 1)
1847     ResTy = VT;
1848   else {
1849     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1850     if (!is64BitVector)
1851       ResTyElts *= 2;
1852     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1853   }
1854   std::vector<EVT> ResTys;
1855   ResTys.push_back(ResTy);
1856   if (isUpdating)
1857     ResTys.push_back(MVT::i32);
1858   ResTys.push_back(MVT::Other);
1859 
1860   SDValue Pred = getAL(CurDAG, dl);
1861   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1862   SDNode *VLd;
1863   SmallVector<SDValue, 7> Ops;
1864 
1865   // Double registers and VLD1/VLD2 quad registers are directly supported.
1866   if (is64BitVector || NumVecs <= 2) {
1867     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1868                     QOpcodes0[OpcodeIndex]);
1869     Ops.push_back(MemAddr);
1870     Ops.push_back(Align);
1871     if (isUpdating) {
1872       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1873       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1874       // case entirely when the rest are updated to that form, too.
1875       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1876         Opc = getVLDSTRegisterUpdateOpcode(Opc);
1877       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1878       // check for that explicitly too. Horribly hacky, but temporary.
1879       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1880           !isa<ConstantSDNode>(Inc.getNode()))
1881         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1882     }
1883     Ops.push_back(Pred);
1884     Ops.push_back(Reg0);
1885     Ops.push_back(Chain);
1886     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1887 
1888   } else {
1889     // Otherwise, quad registers are loaded with two separate instructions,
1890     // where one loads the even registers and the other loads the odd registers.
1891     EVT AddrTy = MemAddr.getValueType();
1892 
1893     // Load the even subregs.  This is always an updating load, so that it
1894     // provides the address to the second load for the odd subregs.
1895     SDValue ImplDef =
1896       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1897     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1898     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1899                                           ResTy, AddrTy, MVT::Other, OpsA);
1900     Chain = SDValue(VLdA, 2);
1901 
1902     // Load the odd subregs.
1903     Ops.push_back(SDValue(VLdA, 1));
1904     Ops.push_back(Align);
1905     if (isUpdating) {
1906       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1907       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1908              "only constant post-increment update allowed for VLD3/4");
1909       (void)Inc;
1910       Ops.push_back(Reg0);
1911     }
1912     Ops.push_back(SDValue(VLdA, 0));
1913     Ops.push_back(Pred);
1914     Ops.push_back(Reg0);
1915     Ops.push_back(Chain);
1916     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1917   }
1918 
1919   // Transfer memoperands.
1920   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1921   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1922   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1923 
1924   if (NumVecs == 1) {
1925     ReplaceNode(N, VLd);
1926     return;
1927   }
1928 
1929   // Extract out the subregisters.
1930   SDValue SuperReg = SDValue(VLd, 0);
1931   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1932                     ARM::qsub_3 == ARM::qsub_0 + 3,
1933                 "Unexpected subreg numbering");
1934   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1935   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1936     ReplaceUses(SDValue(N, Vec),
1937                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1938   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1939   if (isUpdating)
1940     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1941   CurDAG->RemoveDeadNode(N);
1942 }
1943 
SelectVST(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)1944 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1945                                 const uint16_t *DOpcodes,
1946                                 const uint16_t *QOpcodes0,
1947                                 const uint16_t *QOpcodes1) {
1948   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1949   SDLoc dl(N);
1950 
1951   SDValue MemAddr, Align;
1952   unsigned AddrOpIdx = isUpdating ? 1 : 2;
1953   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1954   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1955     return;
1956 
1957   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1958   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1959 
1960   SDValue Chain = N->getOperand(0);
1961   EVT VT = N->getOperand(Vec0Idx).getValueType();
1962   bool is64BitVector = VT.is64BitVector();
1963   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1964 
1965   unsigned OpcodeIndex;
1966   switch (VT.getSimpleVT().SimpleTy) {
1967   default: llvm_unreachable("unhandled vst type");
1968     // Double-register operations:
1969   case MVT::v8i8:  OpcodeIndex = 0; break;
1970   case MVT::v4i16: OpcodeIndex = 1; break;
1971   case MVT::v2f32:
1972   case MVT::v2i32: OpcodeIndex = 2; break;
1973   case MVT::v1i64: OpcodeIndex = 3; break;
1974     // Quad-register operations:
1975   case MVT::v16i8: OpcodeIndex = 0; break;
1976   case MVT::v8i16: OpcodeIndex = 1; break;
1977   case MVT::v4f32:
1978   case MVT::v4i32: OpcodeIndex = 2; break;
1979   case MVT::v2f64:
1980   case MVT::v2i64: OpcodeIndex = 3;
1981     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1982     break;
1983   }
1984 
1985   std::vector<EVT> ResTys;
1986   if (isUpdating)
1987     ResTys.push_back(MVT::i32);
1988   ResTys.push_back(MVT::Other);
1989 
1990   SDValue Pred = getAL(CurDAG, dl);
1991   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1992   SmallVector<SDValue, 7> Ops;
1993 
1994   // Double registers and VST1/VST2 quad registers are directly supported.
1995   if (is64BitVector || NumVecs <= 2) {
1996     SDValue SrcReg;
1997     if (NumVecs == 1) {
1998       SrcReg = N->getOperand(Vec0Idx);
1999     } else if (is64BitVector) {
2000       // Form a REG_SEQUENCE to force register allocation.
2001       SDValue V0 = N->getOperand(Vec0Idx + 0);
2002       SDValue V1 = N->getOperand(Vec0Idx + 1);
2003       if (NumVecs == 2)
2004         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2005       else {
2006         SDValue V2 = N->getOperand(Vec0Idx + 2);
2007         // If it's a vst3, form a quad D-register and leave the last part as
2008         // an undef.
2009         SDValue V3 = (NumVecs == 3)
2010           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2011           : N->getOperand(Vec0Idx + 3);
2012         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2013       }
2014     } else {
2015       // Form a QQ register.
2016       SDValue Q0 = N->getOperand(Vec0Idx);
2017       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2018       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2019     }
2020 
2021     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2022                     QOpcodes0[OpcodeIndex]);
2023     Ops.push_back(MemAddr);
2024     Ops.push_back(Align);
2025     if (isUpdating) {
2026       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2027       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2028       // case entirely when the rest are updated to that form, too.
2029       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2030         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2031       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2032       // check for that explicitly too. Horribly hacky, but temporary.
2033       if  (!isa<ConstantSDNode>(Inc.getNode()))
2034         Ops.push_back(Inc);
2035       else if (NumVecs > 2 && !isVSTfixed(Opc))
2036         Ops.push_back(Reg0);
2037     }
2038     Ops.push_back(SrcReg);
2039     Ops.push_back(Pred);
2040     Ops.push_back(Reg0);
2041     Ops.push_back(Chain);
2042     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2043 
2044     // Transfer memoperands.
2045     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2046 
2047     ReplaceNode(N, VSt);
2048     return;
2049   }
2050 
2051   // Otherwise, quad registers are stored with two separate instructions,
2052   // where one stores the even registers and the other stores the odd registers.
2053 
2054   // Form the QQQQ REG_SEQUENCE.
2055   SDValue V0 = N->getOperand(Vec0Idx + 0);
2056   SDValue V1 = N->getOperand(Vec0Idx + 1);
2057   SDValue V2 = N->getOperand(Vec0Idx + 2);
2058   SDValue V3 = (NumVecs == 3)
2059     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2060     : N->getOperand(Vec0Idx + 3);
2061   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2062 
2063   // Store the even D registers.  This is always an updating store, so that it
2064   // provides the address to the second store for the odd subregs.
2065   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2066   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2067                                         MemAddr.getValueType(),
2068                                         MVT::Other, OpsA);
2069   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2070   Chain = SDValue(VStA, 1);
2071 
2072   // Store the odd D registers.
2073   Ops.push_back(SDValue(VStA, 0));
2074   Ops.push_back(Align);
2075   if (isUpdating) {
2076     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2077     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2078            "only constant post-increment update allowed for VST3/4");
2079     (void)Inc;
2080     Ops.push_back(Reg0);
2081   }
2082   Ops.push_back(RegSeq);
2083   Ops.push_back(Pred);
2084   Ops.push_back(Reg0);
2085   Ops.push_back(Chain);
2086   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2087                                         Ops);
2088   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2089   ReplaceNode(N, VStB);
2090 }
2091 
SelectVLDSTLane(SDNode * N,bool IsLoad,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes)2092 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2093                                       unsigned NumVecs,
2094                                       const uint16_t *DOpcodes,
2095                                       const uint16_t *QOpcodes) {
2096   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2097   SDLoc dl(N);
2098 
2099   SDValue MemAddr, Align;
2100   unsigned AddrOpIdx = isUpdating ? 1 : 2;
2101   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2102   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2103     return;
2104 
2105   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2106   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2107 
2108   SDValue Chain = N->getOperand(0);
2109   unsigned Lane =
2110     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2111   EVT VT = N->getOperand(Vec0Idx).getValueType();
2112   bool is64BitVector = VT.is64BitVector();
2113 
2114   unsigned Alignment = 0;
2115   if (NumVecs != 3) {
2116     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2117     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2118     if (Alignment > NumBytes)
2119       Alignment = NumBytes;
2120     if (Alignment < 8 && Alignment < NumBytes)
2121       Alignment = 0;
2122     // Alignment must be a power of two; make sure of that.
2123     Alignment = (Alignment & -Alignment);
2124     if (Alignment == 1)
2125       Alignment = 0;
2126   }
2127   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2128 
2129   unsigned OpcodeIndex;
2130   switch (VT.getSimpleVT().SimpleTy) {
2131   default: llvm_unreachable("unhandled vld/vst lane type");
2132     // Double-register operations:
2133   case MVT::v8i8:  OpcodeIndex = 0; break;
2134   case MVT::v4i16: OpcodeIndex = 1; break;
2135   case MVT::v2f32:
2136   case MVT::v2i32: OpcodeIndex = 2; break;
2137     // Quad-register operations:
2138   case MVT::v8i16: OpcodeIndex = 0; break;
2139   case MVT::v4f32:
2140   case MVT::v4i32: OpcodeIndex = 1; break;
2141   }
2142 
2143   std::vector<EVT> ResTys;
2144   if (IsLoad) {
2145     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2146     if (!is64BitVector)
2147       ResTyElts *= 2;
2148     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2149                                       MVT::i64, ResTyElts));
2150   }
2151   if (isUpdating)
2152     ResTys.push_back(MVT::i32);
2153   ResTys.push_back(MVT::Other);
2154 
2155   SDValue Pred = getAL(CurDAG, dl);
2156   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2157 
2158   SmallVector<SDValue, 8> Ops;
2159   Ops.push_back(MemAddr);
2160   Ops.push_back(Align);
2161   if (isUpdating) {
2162     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2163     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2164   }
2165 
2166   SDValue SuperReg;
2167   SDValue V0 = N->getOperand(Vec0Idx + 0);
2168   SDValue V1 = N->getOperand(Vec0Idx + 1);
2169   if (NumVecs == 2) {
2170     if (is64BitVector)
2171       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2172     else
2173       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2174   } else {
2175     SDValue V2 = N->getOperand(Vec0Idx + 2);
2176     SDValue V3 = (NumVecs == 3)
2177       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2178       : N->getOperand(Vec0Idx + 3);
2179     if (is64BitVector)
2180       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2181     else
2182       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2183   }
2184   Ops.push_back(SuperReg);
2185   Ops.push_back(getI32Imm(Lane, dl));
2186   Ops.push_back(Pred);
2187   Ops.push_back(Reg0);
2188   Ops.push_back(Chain);
2189 
2190   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2191                                   QOpcodes[OpcodeIndex]);
2192   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2193   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2194   if (!IsLoad) {
2195     ReplaceNode(N, VLdLn);
2196     return;
2197   }
2198 
2199   // Extract the subregisters.
2200   SuperReg = SDValue(VLdLn, 0);
2201   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2202                     ARM::qsub_3 == ARM::qsub_0 + 3,
2203                 "Unexpected subreg numbering");
2204   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2205   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2206     ReplaceUses(SDValue(N, Vec),
2207                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2208   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2209   if (isUpdating)
2210     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2211   CurDAG->RemoveDeadNode(N);
2212 }
2213 
SelectVLDDup(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * Opcodes)2214 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2215                                    const uint16_t *Opcodes) {
2216   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2217   SDLoc dl(N);
2218 
2219   SDValue MemAddr, Align;
2220   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2221     return;
2222 
2223   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2224   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2225 
2226   SDValue Chain = N->getOperand(0);
2227   EVT VT = N->getValueType(0);
2228 
2229   unsigned Alignment = 0;
2230   if (NumVecs != 3) {
2231     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2232     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2233     if (Alignment > NumBytes)
2234       Alignment = NumBytes;
2235     if (Alignment < 8 && Alignment < NumBytes)
2236       Alignment = 0;
2237     // Alignment must be a power of two; make sure of that.
2238     Alignment = (Alignment & -Alignment);
2239     if (Alignment == 1)
2240       Alignment = 0;
2241   }
2242   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2243 
2244   unsigned OpcodeIndex;
2245   switch (VT.getSimpleVT().SimpleTy) {
2246   default: llvm_unreachable("unhandled vld-dup type");
2247   case MVT::v8i8:  OpcodeIndex = 0; break;
2248   case MVT::v4i16: OpcodeIndex = 1; break;
2249   case MVT::v2f32:
2250   case MVT::v2i32: OpcodeIndex = 2; break;
2251   }
2252 
2253   SDValue Pred = getAL(CurDAG, dl);
2254   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2255   SDValue SuperReg;
2256   unsigned Opc = Opcodes[OpcodeIndex];
2257   SmallVector<SDValue, 6> Ops;
2258   Ops.push_back(MemAddr);
2259   Ops.push_back(Align);
2260   if (isUpdating) {
2261     // fixed-stride update instructions don't have an explicit writeback
2262     // operand. It's implicit in the opcode itself.
2263     SDValue Inc = N->getOperand(2);
2264     if (!isa<ConstantSDNode>(Inc.getNode()))
2265       Ops.push_back(Inc);
2266     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2267     else if (NumVecs > 2)
2268       Ops.push_back(Reg0);
2269   }
2270   Ops.push_back(Pred);
2271   Ops.push_back(Reg0);
2272   Ops.push_back(Chain);
2273 
2274   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2275   std::vector<EVT> ResTys;
2276   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2277   if (isUpdating)
2278     ResTys.push_back(MVT::i32);
2279   ResTys.push_back(MVT::Other);
2280   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2281   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2282   SuperReg = SDValue(VLdDup, 0);
2283 
2284   // Extract the subregisters.
2285   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2286   unsigned SubIdx = ARM::dsub_0;
2287   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2288     ReplaceUses(SDValue(N, Vec),
2289                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2290   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2291   if (isUpdating)
2292     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2293   CurDAG->RemoveDeadNode(N);
2294 }
2295 
SelectVTBL(SDNode * N,bool IsExt,unsigned NumVecs,unsigned Opc)2296 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2297                                  unsigned Opc) {
2298   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2299   SDLoc dl(N);
2300   EVT VT = N->getValueType(0);
2301   unsigned FirstTblReg = IsExt ? 2 : 1;
2302 
2303   // Form a REG_SEQUENCE to force register allocation.
2304   SDValue RegSeq;
2305   SDValue V0 = N->getOperand(FirstTblReg + 0);
2306   SDValue V1 = N->getOperand(FirstTblReg + 1);
2307   if (NumVecs == 2)
2308     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2309   else {
2310     SDValue V2 = N->getOperand(FirstTblReg + 2);
2311     // If it's a vtbl3, form a quad D-register and leave the last part as
2312     // an undef.
2313     SDValue V3 = (NumVecs == 3)
2314       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2315       : N->getOperand(FirstTblReg + 3);
2316     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2317   }
2318 
2319   SmallVector<SDValue, 6> Ops;
2320   if (IsExt)
2321     Ops.push_back(N->getOperand(1));
2322   Ops.push_back(RegSeq);
2323   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2324   Ops.push_back(getAL(CurDAG, dl)); // predicate
2325   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2326   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2327 }
2328 
tryV6T2BitfieldExtractOp(SDNode * N,bool isSigned)2329 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2330   if (!Subtarget->hasV6T2Ops())
2331     return false;
2332 
2333   unsigned Opc = isSigned
2334     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2335     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2336   SDLoc dl(N);
2337 
2338   // For unsigned extracts, check for a shift right and mask
2339   unsigned And_imm = 0;
2340   if (N->getOpcode() == ISD::AND) {
2341     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2342 
2343       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2344       if (And_imm & (And_imm + 1))
2345         return false;
2346 
2347       unsigned Srl_imm = 0;
2348       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2349                                 Srl_imm)) {
2350         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2351 
2352         // Note: The width operand is encoded as width-1.
2353         unsigned Width = countTrailingOnes(And_imm) - 1;
2354         unsigned LSB = Srl_imm;
2355 
2356         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2357 
2358         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2359           // It's cheaper to use a right shift to extract the top bits.
2360           if (Subtarget->isThumb()) {
2361             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2362             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2363                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2364                               getAL(CurDAG, dl), Reg0, Reg0 };
2365             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2366             return true;
2367           }
2368 
2369           // ARM models shift instructions as MOVsi with shifter operand.
2370           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2371           SDValue ShOpc =
2372             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2373                                       MVT::i32);
2374           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2375                             getAL(CurDAG, dl), Reg0, Reg0 };
2376           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2377           return true;
2378         }
2379 
2380         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2381                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2382                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2383                           getAL(CurDAG, dl), Reg0 };
2384         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2385         return true;
2386       }
2387     }
2388     return false;
2389   }
2390 
2391   // Otherwise, we're looking for a shift of a shift
2392   unsigned Shl_imm = 0;
2393   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2394     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2395     unsigned Srl_imm = 0;
2396     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2397       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2398       // Note: The width operand is encoded as width-1.
2399       unsigned Width = 32 - Srl_imm - 1;
2400       int LSB = Srl_imm - Shl_imm;
2401       if (LSB < 0)
2402         return false;
2403       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2404       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2405                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2406                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2407                         getAL(CurDAG, dl), Reg0 };
2408       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2409       return true;
2410     }
2411   }
2412 
2413   // Or we are looking for a shift of an and, with a mask operand
2414   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2415       isShiftedMask_32(And_imm)) {
2416     unsigned Srl_imm = 0;
2417     unsigned LSB = countTrailingZeros(And_imm);
2418     // Shift must be the same as the ands lsb
2419     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2420       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2421       unsigned MSB = 31 - countLeadingZeros(And_imm);
2422       // Note: The width operand is encoded as width-1.
2423       unsigned Width = MSB - LSB;
2424       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2425       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2426                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2427                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2428                         getAL(CurDAG, dl), Reg0 };
2429       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2430       return true;
2431     }
2432   }
2433 
2434   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2435     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2436     unsigned LSB = 0;
2437     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2438         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2439       return false;
2440 
2441     if (LSB + Width > 32)
2442       return false;
2443 
2444     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2445     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2446                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2447                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2448                       getAL(CurDAG, dl), Reg0 };
2449     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2450     return true;
2451   }
2452 
2453   return false;
2454 }
2455 
2456 /// Target-specific DAG combining for ISD::XOR.
2457 /// Target-independent combining lowers SELECT_CC nodes of the form
2458 /// select_cc setg[ge] X,  0,  X, -X
2459 /// select_cc setgt    X, -1,  X, -X
2460 /// select_cc setl[te] X,  0, -X,  X
2461 /// select_cc setlt    X,  1, -X,  X
2462 /// which represent Integer ABS into:
2463 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2464 /// ARM instruction selection detects the latter and matches it to
2465 /// ARM::ABS or ARM::t2ABS machine node.
tryABSOp(SDNode * N)2466 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2467   SDValue XORSrc0 = N->getOperand(0);
2468   SDValue XORSrc1 = N->getOperand(1);
2469   EVT VT = N->getValueType(0);
2470 
2471   if (Subtarget->isThumb1Only())
2472     return false;
2473 
2474   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2475     return false;
2476 
2477   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2478   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2479   SDValue SRASrc0 = XORSrc1.getOperand(0);
2480   SDValue SRASrc1 = XORSrc1.getOperand(1);
2481   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2482   EVT XType = SRASrc0.getValueType();
2483   unsigned Size = XType.getSizeInBits() - 1;
2484 
2485   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2486       XType.isInteger() && SRAConstant != nullptr &&
2487       Size == SRAConstant->getZExtValue()) {
2488     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2489     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2490     return true;
2491   }
2492 
2493   return false;
2494 }
2495 
SearchSignedMulShort(SDValue SignExt,unsigned * Opc,SDValue & Src1,bool Accumulate)2496 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
2497                                  bool Accumulate) {
2498   // For SM*WB, we need to some form of sext.
2499   // For SM*WT, we need to search for (sra X, 16)
2500   // Src1 then gets set to X.
2501   if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
2502        SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
2503        SignExt.getOpcode() == ISD::AssertSext) &&
2504        SignExt.getValueType() == MVT::i32) {
2505 
2506     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2507     Src1 = SignExt.getOperand(0);
2508     return true;
2509   }
2510 
2511   if (SignExt.getOpcode() != ISD::SRA)
2512     return false;
2513 
2514   ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
2515   if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
2516     return false;
2517 
2518   SDValue Op0 = SignExt.getOperand(0);
2519 
2520   // The sign extend operand for SM*WB could be generated by a shl and ashr.
2521   if (Op0.getOpcode() == ISD::SHL) {
2522     SDValue SHL = Op0;
2523     ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2524     if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
2525       return false;
2526 
2527     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2528     Src1 = Op0.getOperand(0);
2529     return true;
2530   }
2531   *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
2532   Src1 = SignExt.getOperand(0);
2533   return true;
2534 }
2535 
SearchSignedMulLong(SDValue OR,unsigned * Opc,SDValue & Src0,SDValue & Src1,bool Accumulate)2536 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
2537                                 SDValue &Src1, bool Accumulate) {
2538   // First we look for:
2539   // (add (or (srl ?, 16), (shl ?, 16)))
2540   if (OR.getOpcode() != ISD::OR)
2541     return false;
2542 
2543   SDValue SRL = OR.getOperand(0);
2544   SDValue SHL = OR.getOperand(1);
2545 
2546   if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
2547     SRL = OR.getOperand(1);
2548     SHL = OR.getOperand(0);
2549     if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
2550       return false;
2551   }
2552 
2553   ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
2554   ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2555   if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
2556       SHLSrc1->getZExtValue() != 16)
2557     return false;
2558 
2559   // The first operands to the shifts need to be the two results from the
2560   // same smul_lohi node.
2561   if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
2562        SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
2563     return false;
2564 
2565   SDNode *SMULLOHI = SRL.getOperand(0).getNode();
2566   if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
2567       SHL.getOperand(0) != SDValue(SMULLOHI, 1))
2568     return false;
2569 
2570   // Now we have:
2571   // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
2572   // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
2573   // For SMLAWB the 16-bit value will signed extended somehow.
2574   // For SMLAWT only the SRA is required.
2575 
2576   // Check both sides of SMUL_LOHI
2577   if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
2578     Src0 = SMULLOHI->getOperand(1);
2579   } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
2580                                   Accumulate)) {
2581     Src0 = SMULLOHI->getOperand(0);
2582   } else {
2583     return false;
2584   }
2585   return true;
2586 }
2587 
trySMLAWSMULW(SDNode * N)2588 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
2589   SDLoc dl(N);
2590   SDValue Src0 = N->getOperand(0);
2591   SDValue Src1 = N->getOperand(1);
2592   SDValue A, B;
2593   unsigned Opc = 0;
2594 
2595   if (N->getOpcode() == ISD::ADD) {
2596     if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
2597       return false;
2598 
2599     SDValue Acc;
2600     if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
2601       Acc = Src1;
2602     } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
2603       Acc = Src0;
2604     } else {
2605       return false;
2606     }
2607     if (Opc == 0)
2608       return false;
2609 
2610     SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
2611                       CurDAG->getRegister(0, MVT::i32) };
2612     CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
2613     return true;
2614   } else if (N->getOpcode() == ISD::OR &&
2615              SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
2616     if (Opc == 0)
2617       return false;
2618 
2619     SDValue Ops[] = { A, B, getAL(CurDAG, dl),
2620                       CurDAG->getRegister(0, MVT::i32)};
2621     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2622     return true;
2623   }
2624   return false;
2625 }
2626 
2627 /// We've got special pseudo-instructions for these
SelectCMP_SWAP(SDNode * N)2628 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2629   unsigned Opcode;
2630   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2631   if (MemTy == MVT::i8)
2632     Opcode = ARM::CMP_SWAP_8;
2633   else if (MemTy == MVT::i16)
2634     Opcode = ARM::CMP_SWAP_16;
2635   else if (MemTy == MVT::i32)
2636     Opcode = ARM::CMP_SWAP_32;
2637   else
2638     llvm_unreachable("Unknown AtomicCmpSwap type");
2639 
2640   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2641                    N->getOperand(0)};
2642   SDNode *CmpSwap = CurDAG->getMachineNode(
2643       Opcode, SDLoc(N),
2644       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2645 
2646   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2647   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2648   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2649 
2650   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2651   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2652   CurDAG->RemoveDeadNode(N);
2653 }
2654 
SelectConcatVector(SDNode * N)2655 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2656   // The only time a CONCAT_VECTORS operation can have legal types is when
2657   // two 64-bit vectors are concatenated to a 128-bit vector.
2658   EVT VT = N->getValueType(0);
2659   if (!VT.is128BitVector() || N->getNumOperands() != 2)
2660     llvm_unreachable("unexpected CONCAT_VECTORS");
2661   ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
2662 }
2663 
Select(SDNode * N)2664 void ARMDAGToDAGISel::Select(SDNode *N) {
2665   SDLoc dl(N);
2666 
2667   if (N->isMachineOpcode()) {
2668     N->setNodeId(-1);
2669     return;   // Already selected.
2670   }
2671 
2672   switch (N->getOpcode()) {
2673   default: break;
2674   case ISD::ADD:
2675   case ISD::OR:
2676     if (trySMLAWSMULW(N))
2677       return;
2678     break;
2679   case ISD::WRITE_REGISTER:
2680     if (tryWriteRegister(N))
2681       return;
2682     break;
2683   case ISD::READ_REGISTER:
2684     if (tryReadRegister(N))
2685       return;
2686     break;
2687   case ISD::INLINEASM:
2688     if (tryInlineAsm(N))
2689       return;
2690     break;
2691   case ISD::XOR:
2692     // Select special operations if XOR node forms integer ABS pattern
2693     if (tryABSOp(N))
2694       return;
2695     // Other cases are autogenerated.
2696     break;
2697   case ISD::Constant: {
2698     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2699     // If we can't materialize the constant we need to use a literal pool
2700     if (ConstantMaterializationCost(Val) > 2) {
2701       SDValue CPIdx = CurDAG->getTargetConstantPool(
2702           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2703           TLI->getPointerTy(CurDAG->getDataLayout()));
2704 
2705       SDNode *ResNode;
2706       if (Subtarget->isThumb()) {
2707         SDValue Pred = getAL(CurDAG, dl);
2708         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2709         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2710         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2711                                          Ops);
2712       } else {
2713         SDValue Ops[] = {
2714           CPIdx,
2715           CurDAG->getTargetConstant(0, dl, MVT::i32),
2716           getAL(CurDAG, dl),
2717           CurDAG->getRegister(0, MVT::i32),
2718           CurDAG->getEntryNode()
2719         };
2720         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2721                                          Ops);
2722       }
2723       ReplaceNode(N, ResNode);
2724       return;
2725     }
2726 
2727     // Other cases are autogenerated.
2728     break;
2729   }
2730   case ISD::FrameIndex: {
2731     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2732     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2733     SDValue TFI = CurDAG->getTargetFrameIndex(
2734         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2735     if (Subtarget->isThumb1Only()) {
2736       // Set the alignment of the frame object to 4, to avoid having to generate
2737       // more than one ADD
2738       MachineFrameInfo *MFI = MF->getFrameInfo();
2739       if (MFI->getObjectAlignment(FI) < 4)
2740         MFI->setObjectAlignment(FI, 4);
2741       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2742                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2743       return;
2744     } else {
2745       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2746                       ARM::t2ADDri : ARM::ADDri);
2747       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2748                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2749                         CurDAG->getRegister(0, MVT::i32) };
2750       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2751       return;
2752     }
2753   }
2754   case ISD::SRL:
2755     if (tryV6T2BitfieldExtractOp(N, false))
2756       return;
2757     break;
2758   case ISD::SIGN_EXTEND_INREG:
2759   case ISD::SRA:
2760     if (tryV6T2BitfieldExtractOp(N, true))
2761       return;
2762     break;
2763   case ISD::MUL:
2764     if (Subtarget->isThumb1Only())
2765       break;
2766     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2767       unsigned RHSV = C->getZExtValue();
2768       if (!RHSV) break;
2769       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2770         unsigned ShImm = Log2_32(RHSV-1);
2771         if (ShImm >= 32)
2772           break;
2773         SDValue V = N->getOperand(0);
2774         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2775         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2776         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2777         if (Subtarget->isThumb()) {
2778           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2779           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2780           return;
2781         } else {
2782           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2783                             Reg0 };
2784           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2785           return;
2786         }
2787       }
2788       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2789         unsigned ShImm = Log2_32(RHSV+1);
2790         if (ShImm >= 32)
2791           break;
2792         SDValue V = N->getOperand(0);
2793         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2794         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2795         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2796         if (Subtarget->isThumb()) {
2797           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2798           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2799           return;
2800         } else {
2801           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2802                             Reg0 };
2803           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2804           return;
2805         }
2806       }
2807     }
2808     break;
2809   case ISD::AND: {
2810     // Check for unsigned bitfield extract
2811     if (tryV6T2BitfieldExtractOp(N, false))
2812       return;
2813 
2814     // If an immediate is used in an AND node, it is possible that the immediate
2815     // can be more optimally materialized when negated. If this is the case we
2816     // can negate the immediate and use a BIC instead.
2817     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2818     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2819       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2820 
2821       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2822       // immediate can be negated and fit in the immediate operand of
2823       // a t2BIC, don't do any manual transform here as this can be
2824       // handled by the generic ISel machinery.
2825       bool PreferImmediateEncoding =
2826         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2827       if (!PreferImmediateEncoding &&
2828           ConstantMaterializationCost(Imm) >
2829               ConstantMaterializationCost(~Imm)) {
2830         // The current immediate costs more to materialize than a negated
2831         // immediate, so negate the immediate and use a BIC.
2832         SDValue NewImm =
2833           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2834         // If the new constant didn't exist before, reposition it in the topological
2835         // ordering so it is just before N. Otherwise, don't touch its location.
2836         if (NewImm->getNodeId() == -1)
2837           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2838 
2839         if (!Subtarget->hasThumb2()) {
2840           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2841                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2842                            CurDAG->getRegister(0, MVT::i32)};
2843           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2844           return;
2845         } else {
2846           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2847                            CurDAG->getRegister(0, MVT::i32),
2848                            CurDAG->getRegister(0, MVT::i32)};
2849           ReplaceNode(N,
2850                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2851           return;
2852         }
2853       }
2854     }
2855 
2856     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2857     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2858     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2859     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2860     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2861     EVT VT = N->getValueType(0);
2862     if (VT != MVT::i32)
2863       break;
2864     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2865       ? ARM::t2MOVTi16
2866       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2867     if (!Opc)
2868       break;
2869     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2870     N1C = dyn_cast<ConstantSDNode>(N1);
2871     if (!N1C)
2872       break;
2873     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2874       SDValue N2 = N0.getOperand(1);
2875       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2876       if (!N2C)
2877         break;
2878       unsigned N1CVal = N1C->getZExtValue();
2879       unsigned N2CVal = N2C->getZExtValue();
2880       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2881           (N1CVal & 0xffffU) == 0xffffU &&
2882           (N2CVal & 0xffffU) == 0x0U) {
2883         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2884                                                   dl, MVT::i32);
2885         SDValue Ops[] = { N0.getOperand(0), Imm16,
2886                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2887         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2888         return;
2889       }
2890     }
2891     break;
2892   }
2893   case ARMISD::VMOVRRD:
2894     ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2895                                           N->getOperand(0), getAL(CurDAG, dl),
2896                                           CurDAG->getRegister(0, MVT::i32)));
2897     return;
2898   case ISD::UMUL_LOHI: {
2899     if (Subtarget->isThumb1Only())
2900       break;
2901     if (Subtarget->isThumb()) {
2902       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2903                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2904       ReplaceNode(
2905           N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
2906       return;
2907     } else {
2908       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2909                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2910                         CurDAG->getRegister(0, MVT::i32) };
2911       ReplaceNode(N, CurDAG->getMachineNode(
2912                          Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
2913                          MVT::i32, MVT::i32, Ops));
2914       return;
2915     }
2916   }
2917   case ISD::SMUL_LOHI: {
2918     if (Subtarget->isThumb1Only())
2919       break;
2920     if (Subtarget->isThumb()) {
2921       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2922                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2923       ReplaceNode(
2924           N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
2925       return;
2926     } else {
2927       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2928                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2929                         CurDAG->getRegister(0, MVT::i32) };
2930       ReplaceNode(N, CurDAG->getMachineNode(
2931                          Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
2932                          MVT::i32, MVT::i32, Ops));
2933       return;
2934     }
2935   }
2936   case ARMISD::UMAAL: {
2937     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2938     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2939                       N->getOperand(2), N->getOperand(3),
2940                       getAL(CurDAG, dl),
2941                       CurDAG->getRegister(0, MVT::i32) };
2942     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2943     return;
2944   }
2945   case ARMISD::UMLAL:{
2946     // UMAAL is similar to UMLAL but it adds two 32-bit values to the
2947     // 64-bit multiplication result.
2948     if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
2949         N->getOperand(3).getOpcode() == ARMISD::ADDE) {
2950 
2951       SDValue Addc = N->getOperand(2);
2952       SDValue Adde = N->getOperand(3);
2953 
2954       if (Adde.getOperand(2).getNode() == Addc.getNode()) {
2955 
2956         ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
2957         ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
2958 
2959         if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
2960         {
2961           // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
2962           // RdLo = one operand to be added, lower 32-bits of res
2963           // RdHi = other operand to be added, upper 32-bits of res
2964           // Rn = first multiply operand
2965           // Rm = second multiply operand
2966           SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2967                             Addc.getOperand(0), Addc.getOperand(1),
2968                             getAL(CurDAG, dl),
2969                             CurDAG->getRegister(0, MVT::i32) };
2970           unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2971           CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
2972           return;
2973         }
2974       }
2975     }
2976 
2977     if (Subtarget->isThumb()) {
2978       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2979                         N->getOperand(3), getAL(CurDAG, dl),
2980                         CurDAG->getRegister(0, MVT::i32)};
2981       ReplaceNode(
2982           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2983       return;
2984     }else{
2985       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2986                         N->getOperand(3), getAL(CurDAG, dl),
2987                         CurDAG->getRegister(0, MVT::i32),
2988                         CurDAG->getRegister(0, MVT::i32) };
2989       ReplaceNode(N, CurDAG->getMachineNode(
2990                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2991                          MVT::i32, MVT::i32, Ops));
2992       return;
2993     }
2994   }
2995   case ARMISD::SMLAL:{
2996     if (Subtarget->isThumb()) {
2997       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2998                         N->getOperand(3), getAL(CurDAG, dl),
2999                         CurDAG->getRegister(0, MVT::i32)};
3000       ReplaceNode(
3001           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3002       return;
3003     }else{
3004       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3005                         N->getOperand(3), getAL(CurDAG, dl),
3006                         CurDAG->getRegister(0, MVT::i32),
3007                         CurDAG->getRegister(0, MVT::i32) };
3008       ReplaceNode(N, CurDAG->getMachineNode(
3009                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3010                          MVT::i32, MVT::i32, Ops));
3011       return;
3012     }
3013   }
3014   case ISD::LOAD: {
3015     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3016       if (tryT2IndexedLoad(N))
3017         return;
3018     } else if (tryARMIndexedLoad(N))
3019       return;
3020     // Other cases are autogenerated.
3021     break;
3022   }
3023   case ARMISD::BRCOND: {
3024     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3025     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3026     // Pattern complexity = 6  cost = 1  size = 0
3027 
3028     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3029     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3030     // Pattern complexity = 6  cost = 1  size = 0
3031 
3032     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3033     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3034     // Pattern complexity = 6  cost = 1  size = 0
3035 
3036     unsigned Opc = Subtarget->isThumb() ?
3037       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3038     SDValue Chain = N->getOperand(0);
3039     SDValue N1 = N->getOperand(1);
3040     SDValue N2 = N->getOperand(2);
3041     SDValue N3 = N->getOperand(3);
3042     SDValue InFlag = N->getOperand(4);
3043     assert(N1.getOpcode() == ISD::BasicBlock);
3044     assert(N2.getOpcode() == ISD::Constant);
3045     assert(N3.getOpcode() == ISD::Register);
3046 
3047     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
3048                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
3049                                MVT::i32);
3050     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3051     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3052                                              MVT::Glue, Ops);
3053     Chain = SDValue(ResNode, 0);
3054     if (N->getNumValues() == 2) {
3055       InFlag = SDValue(ResNode, 1);
3056       ReplaceUses(SDValue(N, 1), InFlag);
3057     }
3058     ReplaceUses(SDValue(N, 0),
3059                 SDValue(Chain.getNode(), Chain.getResNo()));
3060     CurDAG->RemoveDeadNode(N);
3061     return;
3062   }
3063   case ARMISD::VZIP: {
3064     unsigned Opc = 0;
3065     EVT VT = N->getValueType(0);
3066     switch (VT.getSimpleVT().SimpleTy) {
3067     default: return;
3068     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3069     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3070     case MVT::v2f32:
3071     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3072     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3073     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3074     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3075     case MVT::v4f32:
3076     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3077     }
3078     SDValue Pred = getAL(CurDAG, dl);
3079     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3080     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3081     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3082     return;
3083   }
3084   case ARMISD::VUZP: {
3085     unsigned Opc = 0;
3086     EVT VT = N->getValueType(0);
3087     switch (VT.getSimpleVT().SimpleTy) {
3088     default: return;
3089     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3090     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3091     case MVT::v2f32:
3092     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3093     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3094     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3095     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3096     case MVT::v4f32:
3097     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3098     }
3099     SDValue Pred = getAL(CurDAG, dl);
3100     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3101     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3102     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3103     return;
3104   }
3105   case ARMISD::VTRN: {
3106     unsigned Opc = 0;
3107     EVT VT = N->getValueType(0);
3108     switch (VT.getSimpleVT().SimpleTy) {
3109     default: return;
3110     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3111     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3112     case MVT::v2f32:
3113     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3114     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3115     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3116     case MVT::v4f32:
3117     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3118     }
3119     SDValue Pred = getAL(CurDAG, dl);
3120     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3121     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3122     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3123     return;
3124   }
3125   case ARMISD::BUILD_VECTOR: {
3126     EVT VecVT = N->getValueType(0);
3127     EVT EltVT = VecVT.getVectorElementType();
3128     unsigned NumElts = VecVT.getVectorNumElements();
3129     if (EltVT == MVT::f64) {
3130       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3131       ReplaceNode(
3132           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3133       return;
3134     }
3135     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3136     if (NumElts == 2) {
3137       ReplaceNode(
3138           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3139       return;
3140     }
3141     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3142     ReplaceNode(N,
3143                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3144                                     N->getOperand(2), N->getOperand(3)));
3145     return;
3146   }
3147 
3148   case ARMISD::VLD2DUP: {
3149     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3150                                         ARM::VLD2DUPd32 };
3151     SelectVLDDup(N, false, 2, Opcodes);
3152     return;
3153   }
3154 
3155   case ARMISD::VLD3DUP: {
3156     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3157                                         ARM::VLD3DUPd16Pseudo,
3158                                         ARM::VLD3DUPd32Pseudo };
3159     SelectVLDDup(N, false, 3, Opcodes);
3160     return;
3161   }
3162 
3163   case ARMISD::VLD4DUP: {
3164     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3165                                         ARM::VLD4DUPd16Pseudo,
3166                                         ARM::VLD4DUPd32Pseudo };
3167     SelectVLDDup(N, false, 4, Opcodes);
3168     return;
3169   }
3170 
3171   case ARMISD::VLD2DUP_UPD: {
3172     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3173                                         ARM::VLD2DUPd16wb_fixed,
3174                                         ARM::VLD2DUPd32wb_fixed };
3175     SelectVLDDup(N, true, 2, Opcodes);
3176     return;
3177   }
3178 
3179   case ARMISD::VLD3DUP_UPD: {
3180     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3181                                         ARM::VLD3DUPd16Pseudo_UPD,
3182                                         ARM::VLD3DUPd32Pseudo_UPD };
3183     SelectVLDDup(N, true, 3, Opcodes);
3184     return;
3185   }
3186 
3187   case ARMISD::VLD4DUP_UPD: {
3188     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3189                                         ARM::VLD4DUPd16Pseudo_UPD,
3190                                         ARM::VLD4DUPd32Pseudo_UPD };
3191     SelectVLDDup(N, true, 4, Opcodes);
3192     return;
3193   }
3194 
3195   case ARMISD::VLD1_UPD: {
3196     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3197                                          ARM::VLD1d16wb_fixed,
3198                                          ARM::VLD1d32wb_fixed,
3199                                          ARM::VLD1d64wb_fixed };
3200     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3201                                          ARM::VLD1q16wb_fixed,
3202                                          ARM::VLD1q32wb_fixed,
3203                                          ARM::VLD1q64wb_fixed };
3204     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3205     return;
3206   }
3207 
3208   case ARMISD::VLD2_UPD: {
3209     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3210                                          ARM::VLD2d16wb_fixed,
3211                                          ARM::VLD2d32wb_fixed,
3212                                          ARM::VLD1q64wb_fixed};
3213     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3214                                          ARM::VLD2q16PseudoWB_fixed,
3215                                          ARM::VLD2q32PseudoWB_fixed };
3216     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3217     return;
3218   }
3219 
3220   case ARMISD::VLD3_UPD: {
3221     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3222                                          ARM::VLD3d16Pseudo_UPD,
3223                                          ARM::VLD3d32Pseudo_UPD,
3224                                          ARM::VLD1d64TPseudoWB_fixed};
3225     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3226                                           ARM::VLD3q16Pseudo_UPD,
3227                                           ARM::VLD3q32Pseudo_UPD };
3228     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3229                                           ARM::VLD3q16oddPseudo_UPD,
3230                                           ARM::VLD3q32oddPseudo_UPD };
3231     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3232     return;
3233   }
3234 
3235   case ARMISD::VLD4_UPD: {
3236     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3237                                          ARM::VLD4d16Pseudo_UPD,
3238                                          ARM::VLD4d32Pseudo_UPD,
3239                                          ARM::VLD1d64QPseudoWB_fixed};
3240     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3241                                           ARM::VLD4q16Pseudo_UPD,
3242                                           ARM::VLD4q32Pseudo_UPD };
3243     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3244                                           ARM::VLD4q16oddPseudo_UPD,
3245                                           ARM::VLD4q32oddPseudo_UPD };
3246     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3247     return;
3248   }
3249 
3250   case ARMISD::VLD2LN_UPD: {
3251     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3252                                          ARM::VLD2LNd16Pseudo_UPD,
3253                                          ARM::VLD2LNd32Pseudo_UPD };
3254     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3255                                          ARM::VLD2LNq32Pseudo_UPD };
3256     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3257     return;
3258   }
3259 
3260   case ARMISD::VLD3LN_UPD: {
3261     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3262                                          ARM::VLD3LNd16Pseudo_UPD,
3263                                          ARM::VLD3LNd32Pseudo_UPD };
3264     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3265                                          ARM::VLD3LNq32Pseudo_UPD };
3266     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3267     return;
3268   }
3269 
3270   case ARMISD::VLD4LN_UPD: {
3271     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3272                                          ARM::VLD4LNd16Pseudo_UPD,
3273                                          ARM::VLD4LNd32Pseudo_UPD };
3274     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3275                                          ARM::VLD4LNq32Pseudo_UPD };
3276     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3277     return;
3278   }
3279 
3280   case ARMISD::VST1_UPD: {
3281     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3282                                          ARM::VST1d16wb_fixed,
3283                                          ARM::VST1d32wb_fixed,
3284                                          ARM::VST1d64wb_fixed };
3285     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3286                                          ARM::VST1q16wb_fixed,
3287                                          ARM::VST1q32wb_fixed,
3288                                          ARM::VST1q64wb_fixed };
3289     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3290     return;
3291   }
3292 
3293   case ARMISD::VST2_UPD: {
3294     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3295                                          ARM::VST2d16wb_fixed,
3296                                          ARM::VST2d32wb_fixed,
3297                                          ARM::VST1q64wb_fixed};
3298     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3299                                          ARM::VST2q16PseudoWB_fixed,
3300                                          ARM::VST2q32PseudoWB_fixed };
3301     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3302     return;
3303   }
3304 
3305   case ARMISD::VST3_UPD: {
3306     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3307                                          ARM::VST3d16Pseudo_UPD,
3308                                          ARM::VST3d32Pseudo_UPD,
3309                                          ARM::VST1d64TPseudoWB_fixed};
3310     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3311                                           ARM::VST3q16Pseudo_UPD,
3312                                           ARM::VST3q32Pseudo_UPD };
3313     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3314                                           ARM::VST3q16oddPseudo_UPD,
3315                                           ARM::VST3q32oddPseudo_UPD };
3316     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3317     return;
3318   }
3319 
3320   case ARMISD::VST4_UPD: {
3321     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3322                                          ARM::VST4d16Pseudo_UPD,
3323                                          ARM::VST4d32Pseudo_UPD,
3324                                          ARM::VST1d64QPseudoWB_fixed};
3325     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3326                                           ARM::VST4q16Pseudo_UPD,
3327                                           ARM::VST4q32Pseudo_UPD };
3328     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3329                                           ARM::VST4q16oddPseudo_UPD,
3330                                           ARM::VST4q32oddPseudo_UPD };
3331     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3332     return;
3333   }
3334 
3335   case ARMISD::VST2LN_UPD: {
3336     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3337                                          ARM::VST2LNd16Pseudo_UPD,
3338                                          ARM::VST2LNd32Pseudo_UPD };
3339     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3340                                          ARM::VST2LNq32Pseudo_UPD };
3341     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3342     return;
3343   }
3344 
3345   case ARMISD::VST3LN_UPD: {
3346     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3347                                          ARM::VST3LNd16Pseudo_UPD,
3348                                          ARM::VST3LNd32Pseudo_UPD };
3349     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3350                                          ARM::VST3LNq32Pseudo_UPD };
3351     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3352     return;
3353   }
3354 
3355   case ARMISD::VST4LN_UPD: {
3356     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3357                                          ARM::VST4LNd16Pseudo_UPD,
3358                                          ARM::VST4LNd32Pseudo_UPD };
3359     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3360                                          ARM::VST4LNq32Pseudo_UPD };
3361     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3362     return;
3363   }
3364 
3365   case ISD::INTRINSIC_VOID:
3366   case ISD::INTRINSIC_W_CHAIN: {
3367     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3368     switch (IntNo) {
3369     default:
3370       break;
3371 
3372     case Intrinsic::arm_mrrc:
3373     case Intrinsic::arm_mrrc2: {
3374       SDLoc dl(N);
3375       SDValue Chain = N->getOperand(0);
3376       unsigned Opc;
3377 
3378       if (Subtarget->isThumb())
3379         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3380       else
3381         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3382 
3383       SmallVector<SDValue, 5> Ops;
3384       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3385       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3386       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3387 
3388       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3389       // instruction will always be '1111' but it is possible in assembly language to specify
3390       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3391       if (Opc != ARM::MRRC2) {
3392         Ops.push_back(getAL(CurDAG, dl));
3393         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3394       }
3395 
3396       Ops.push_back(Chain);
3397 
3398       // Writes to two registers.
3399       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3400 
3401       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3402       return;
3403     }
3404     case Intrinsic::arm_ldaexd:
3405     case Intrinsic::arm_ldrexd: {
3406       SDLoc dl(N);
3407       SDValue Chain = N->getOperand(0);
3408       SDValue MemAddr = N->getOperand(2);
3409       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3410 
3411       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3412       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3413                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3414 
3415       // arm_ldrexd returns a i64 value in {i32, i32}
3416       std::vector<EVT> ResTys;
3417       if (isThumb) {
3418         ResTys.push_back(MVT::i32);
3419         ResTys.push_back(MVT::i32);
3420       } else
3421         ResTys.push_back(MVT::Untyped);
3422       ResTys.push_back(MVT::Other);
3423 
3424       // Place arguments in the right order.
3425       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3426                        CurDAG->getRegister(0, MVT::i32), Chain};
3427       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3428       // Transfer memoperands.
3429       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3430       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3431       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3432 
3433       // Remap uses.
3434       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3435       if (!SDValue(N, 0).use_empty()) {
3436         SDValue Result;
3437         if (isThumb)
3438           Result = SDValue(Ld, 0);
3439         else {
3440           SDValue SubRegIdx =
3441             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3442           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3443               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3444           Result = SDValue(ResNode,0);
3445         }
3446         ReplaceUses(SDValue(N, 0), Result);
3447       }
3448       if (!SDValue(N, 1).use_empty()) {
3449         SDValue Result;
3450         if (isThumb)
3451           Result = SDValue(Ld, 1);
3452         else {
3453           SDValue SubRegIdx =
3454             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3455           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3456               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3457           Result = SDValue(ResNode,0);
3458         }
3459         ReplaceUses(SDValue(N, 1), Result);
3460       }
3461       ReplaceUses(SDValue(N, 2), OutChain);
3462       CurDAG->RemoveDeadNode(N);
3463       return;
3464     }
3465     case Intrinsic::arm_stlexd:
3466     case Intrinsic::arm_strexd: {
3467       SDLoc dl(N);
3468       SDValue Chain = N->getOperand(0);
3469       SDValue Val0 = N->getOperand(2);
3470       SDValue Val1 = N->getOperand(3);
3471       SDValue MemAddr = N->getOperand(4);
3472 
3473       // Store exclusive double return a i32 value which is the return status
3474       // of the issued store.
3475       const EVT ResTys[] = {MVT::i32, MVT::Other};
3476 
3477       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3478       // Place arguments in the right order.
3479       SmallVector<SDValue, 7> Ops;
3480       if (isThumb) {
3481         Ops.push_back(Val0);
3482         Ops.push_back(Val1);
3483       } else
3484         // arm_strexd uses GPRPair.
3485         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3486       Ops.push_back(MemAddr);
3487       Ops.push_back(getAL(CurDAG, dl));
3488       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3489       Ops.push_back(Chain);
3490 
3491       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3492       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3493                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3494 
3495       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3496       // Transfer memoperands.
3497       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3498       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3499       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3500 
3501       ReplaceNode(N, St);
3502       return;
3503     }
3504 
3505     case Intrinsic::arm_neon_vld1: {
3506       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3507                                            ARM::VLD1d32, ARM::VLD1d64 };
3508       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3509                                            ARM::VLD1q32, ARM::VLD1q64};
3510       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3511       return;
3512     }
3513 
3514     case Intrinsic::arm_neon_vld2: {
3515       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3516                                            ARM::VLD2d32, ARM::VLD1q64 };
3517       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3518                                            ARM::VLD2q32Pseudo };
3519       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3520       return;
3521     }
3522 
3523     case Intrinsic::arm_neon_vld3: {
3524       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3525                                            ARM::VLD3d16Pseudo,
3526                                            ARM::VLD3d32Pseudo,
3527                                            ARM::VLD1d64TPseudo };
3528       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3529                                             ARM::VLD3q16Pseudo_UPD,
3530                                             ARM::VLD3q32Pseudo_UPD };
3531       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3532                                             ARM::VLD3q16oddPseudo,
3533                                             ARM::VLD3q32oddPseudo };
3534       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3535       return;
3536     }
3537 
3538     case Intrinsic::arm_neon_vld4: {
3539       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3540                                            ARM::VLD4d16Pseudo,
3541                                            ARM::VLD4d32Pseudo,
3542                                            ARM::VLD1d64QPseudo };
3543       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3544                                             ARM::VLD4q16Pseudo_UPD,
3545                                             ARM::VLD4q32Pseudo_UPD };
3546       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3547                                             ARM::VLD4q16oddPseudo,
3548                                             ARM::VLD4q32oddPseudo };
3549       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3550       return;
3551     }
3552 
3553     case Intrinsic::arm_neon_vld2lane: {
3554       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3555                                            ARM::VLD2LNd16Pseudo,
3556                                            ARM::VLD2LNd32Pseudo };
3557       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3558                                            ARM::VLD2LNq32Pseudo };
3559       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3560       return;
3561     }
3562 
3563     case Intrinsic::arm_neon_vld3lane: {
3564       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3565                                            ARM::VLD3LNd16Pseudo,
3566                                            ARM::VLD3LNd32Pseudo };
3567       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3568                                            ARM::VLD3LNq32Pseudo };
3569       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3570       return;
3571     }
3572 
3573     case Intrinsic::arm_neon_vld4lane: {
3574       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3575                                            ARM::VLD4LNd16Pseudo,
3576                                            ARM::VLD4LNd32Pseudo };
3577       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3578                                            ARM::VLD4LNq32Pseudo };
3579       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3580       return;
3581     }
3582 
3583     case Intrinsic::arm_neon_vst1: {
3584       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3585                                            ARM::VST1d32, ARM::VST1d64 };
3586       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3587                                            ARM::VST1q32, ARM::VST1q64 };
3588       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3589       return;
3590     }
3591 
3592     case Intrinsic::arm_neon_vst2: {
3593       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3594                                            ARM::VST2d32, ARM::VST1q64 };
3595       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3596                                            ARM::VST2q32Pseudo };
3597       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3598       return;
3599     }
3600 
3601     case Intrinsic::arm_neon_vst3: {
3602       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3603                                            ARM::VST3d16Pseudo,
3604                                            ARM::VST3d32Pseudo,
3605                                            ARM::VST1d64TPseudo };
3606       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3607                                             ARM::VST3q16Pseudo_UPD,
3608                                             ARM::VST3q32Pseudo_UPD };
3609       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3610                                             ARM::VST3q16oddPseudo,
3611                                             ARM::VST3q32oddPseudo };
3612       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3613       return;
3614     }
3615 
3616     case Intrinsic::arm_neon_vst4: {
3617       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3618                                            ARM::VST4d16Pseudo,
3619                                            ARM::VST4d32Pseudo,
3620                                            ARM::VST1d64QPseudo };
3621       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3622                                             ARM::VST4q16Pseudo_UPD,
3623                                             ARM::VST4q32Pseudo_UPD };
3624       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3625                                             ARM::VST4q16oddPseudo,
3626                                             ARM::VST4q32oddPseudo };
3627       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3628       return;
3629     }
3630 
3631     case Intrinsic::arm_neon_vst2lane: {
3632       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3633                                            ARM::VST2LNd16Pseudo,
3634                                            ARM::VST2LNd32Pseudo };
3635       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3636                                            ARM::VST2LNq32Pseudo };
3637       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3638       return;
3639     }
3640 
3641     case Intrinsic::arm_neon_vst3lane: {
3642       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3643                                            ARM::VST3LNd16Pseudo,
3644                                            ARM::VST3LNd32Pseudo };
3645       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3646                                            ARM::VST3LNq32Pseudo };
3647       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3648       return;
3649     }
3650 
3651     case Intrinsic::arm_neon_vst4lane: {
3652       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3653                                            ARM::VST4LNd16Pseudo,
3654                                            ARM::VST4LNd32Pseudo };
3655       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3656                                            ARM::VST4LNq32Pseudo };
3657       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3658       return;
3659     }
3660     }
3661     break;
3662   }
3663 
3664   case ISD::INTRINSIC_WO_CHAIN: {
3665     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3666     switch (IntNo) {
3667     default:
3668       break;
3669 
3670     case Intrinsic::arm_neon_vtbl2:
3671       SelectVTBL(N, false, 2, ARM::VTBL2);
3672       return;
3673     case Intrinsic::arm_neon_vtbl3:
3674       SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3675       return;
3676     case Intrinsic::arm_neon_vtbl4:
3677       SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3678       return;
3679 
3680     case Intrinsic::arm_neon_vtbx2:
3681       SelectVTBL(N, true, 2, ARM::VTBX2);
3682       return;
3683     case Intrinsic::arm_neon_vtbx3:
3684       SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3685       return;
3686     case Intrinsic::arm_neon_vtbx4:
3687       SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3688       return;
3689     }
3690     break;
3691   }
3692 
3693   case ARMISD::VTBL1: {
3694     SDLoc dl(N);
3695     EVT VT = N->getValueType(0);
3696     SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
3697                      getAL(CurDAG, dl),                 // Predicate
3698                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3699     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
3700     return;
3701   }
3702   case ARMISD::VTBL2: {
3703     SDLoc dl(N);
3704     EVT VT = N->getValueType(0);
3705 
3706     // Form a REG_SEQUENCE to force register allocation.
3707     SDValue V0 = N->getOperand(0);
3708     SDValue V1 = N->getOperand(1);
3709     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3710 
3711     SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
3712                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3713     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
3714     return;
3715   }
3716 
3717   case ISD::CONCAT_VECTORS:
3718     SelectConcatVector(N);
3719     return;
3720 
3721   case ISD::ATOMIC_CMP_SWAP:
3722     SelectCMP_SWAP(N);
3723     return;
3724   }
3725 
3726   SelectCode(N);
3727 }
3728 
3729 // Inspect a register string of the form
3730 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3731 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3732 // and obtain the integer operands from them, adding these operands to the
3733 // provided vector.
getIntOperandsFromRegisterString(StringRef RegString,SelectionDAG * CurDAG,const SDLoc & DL,std::vector<SDValue> & Ops)3734 static void getIntOperandsFromRegisterString(StringRef RegString,
3735                                              SelectionDAG *CurDAG,
3736                                              const SDLoc &DL,
3737                                              std::vector<SDValue> &Ops) {
3738   SmallVector<StringRef, 5> Fields;
3739   RegString.split(Fields, ':');
3740 
3741   if (Fields.size() > 1) {
3742     bool AllIntFields = true;
3743 
3744     for (StringRef Field : Fields) {
3745       // Need to trim out leading 'cp' characters and get the integer field.
3746       unsigned IntField;
3747       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3748       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3749     }
3750 
3751     assert(AllIntFields &&
3752             "Unexpected non-integer value in special register string.");
3753   }
3754 }
3755 
3756 // Maps a Banked Register string to its mask value. The mask value returned is
3757 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3758 // mask operand, which expresses which register is to be used, e.g. r8, and in
3759 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3760 // was invalid.
getBankedRegisterMask(StringRef RegString)3761 static inline int getBankedRegisterMask(StringRef RegString) {
3762   return StringSwitch<int>(RegString.lower())
3763           .Case("r8_usr", 0x00)
3764           .Case("r9_usr", 0x01)
3765           .Case("r10_usr", 0x02)
3766           .Case("r11_usr", 0x03)
3767           .Case("r12_usr", 0x04)
3768           .Case("sp_usr", 0x05)
3769           .Case("lr_usr", 0x06)
3770           .Case("r8_fiq", 0x08)
3771           .Case("r9_fiq", 0x09)
3772           .Case("r10_fiq", 0x0a)
3773           .Case("r11_fiq", 0x0b)
3774           .Case("r12_fiq", 0x0c)
3775           .Case("sp_fiq", 0x0d)
3776           .Case("lr_fiq", 0x0e)
3777           .Case("lr_irq", 0x10)
3778           .Case("sp_irq", 0x11)
3779           .Case("lr_svc", 0x12)
3780           .Case("sp_svc", 0x13)
3781           .Case("lr_abt", 0x14)
3782           .Case("sp_abt", 0x15)
3783           .Case("lr_und", 0x16)
3784           .Case("sp_und", 0x17)
3785           .Case("lr_mon", 0x1c)
3786           .Case("sp_mon", 0x1d)
3787           .Case("elr_hyp", 0x1e)
3788           .Case("sp_hyp", 0x1f)
3789           .Case("spsr_fiq", 0x2e)
3790           .Case("spsr_irq", 0x30)
3791           .Case("spsr_svc", 0x32)
3792           .Case("spsr_abt", 0x34)
3793           .Case("spsr_und", 0x36)
3794           .Case("spsr_mon", 0x3c)
3795           .Case("spsr_hyp", 0x3e)
3796           .Default(-1);
3797 }
3798 
3799 // Maps a MClass special register string to its value for use in the
3800 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3801 // Returns -1 to signify that the string was invalid.
getMClassRegisterSYSmValueMask(StringRef RegString)3802 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3803   return StringSwitch<int>(RegString.lower())
3804           .Case("apsr", 0x0)
3805           .Case("iapsr", 0x1)
3806           .Case("eapsr", 0x2)
3807           .Case("xpsr", 0x3)
3808           .Case("ipsr", 0x5)
3809           .Case("epsr", 0x6)
3810           .Case("iepsr", 0x7)
3811           .Case("msp", 0x8)
3812           .Case("psp", 0x9)
3813           .Case("primask", 0x10)
3814           .Case("basepri", 0x11)
3815           .Case("basepri_max", 0x12)
3816           .Case("faultmask", 0x13)
3817           .Case("control", 0x14)
3818           .Case("msplim", 0x0a)
3819           .Case("psplim", 0x0b)
3820           .Case("sp", 0x18)
3821           .Default(-1);
3822 }
3823 
3824 // The flags here are common to those allowed for apsr in the A class cores and
3825 // those allowed for the special registers in the M class cores. Returns a
3826 // value representing which flags were present, -1 if invalid.
getMClassFlagsMask(StringRef Flags,bool hasDSP)3827 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
3828   if (Flags.empty())
3829     return 0x2 | (int)hasDSP;
3830 
3831   return StringSwitch<int>(Flags)
3832           .Case("g", 0x1)
3833           .Case("nzcvq", 0x2)
3834           .Case("nzcvqg", 0x3)
3835           .Default(-1);
3836 }
3837 
getMClassRegisterMask(StringRef Reg,StringRef Flags,bool IsRead,const ARMSubtarget * Subtarget)3838 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3839                                  const ARMSubtarget *Subtarget) {
3840   // Ensure that the register (without flags) was a valid M Class special
3841   // register.
3842   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3843   if (SYSmvalue == -1)
3844     return -1;
3845 
3846   // basepri, basepri_max and faultmask are only valid for V7m.
3847   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3848     return -1;
3849 
3850   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3851     Flags = "";
3852     SYSmvalue |= 0x80;
3853   }
3854 
3855   if (!Subtarget->has8MSecExt() &&
3856       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3857     return -1;
3858 
3859   if (!Subtarget->hasV8MMainlineOps() &&
3860       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3861        SYSmvalue == 0x93))
3862     return -1;
3863 
3864   // If it was a read then we won't be expecting flags and so at this point
3865   // we can return the mask.
3866   if (IsRead) {
3867     if (Flags.empty())
3868       return SYSmvalue;
3869     else
3870       return -1;
3871   }
3872 
3873   // We know we are now handling a write so need to get the mask for the flags.
3874   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
3875 
3876   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3877   // shouldn't have flags present.
3878   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3879     return -1;
3880 
3881   // The _g and _nzcvqg versions are only valid if the DSP extension is
3882   // available.
3883   if (!Subtarget->hasDSP() && (Mask & 0x1))
3884     return -1;
3885 
3886   // The register was valid so need to put the mask in the correct place
3887   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3888   // construct the operand for the instruction node.
3889   if (SYSmvalue < 0x4)
3890     return SYSmvalue | Mask << 10;
3891 
3892   return SYSmvalue;
3893 }
3894 
getARClassRegisterMask(StringRef Reg,StringRef Flags)3895 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3896   // The mask operand contains the special register (R Bit) in bit 4, whether
3897   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3898   // bits 3-0 contains the fields to be accessed in the special register, set by
3899   // the flags provided with the register.
3900   int Mask = 0;
3901   if (Reg == "apsr") {
3902     // The flags permitted for apsr are the same flags that are allowed in
3903     // M class registers. We get the flag value and then shift the flags into
3904     // the correct place to combine with the mask.
3905     Mask = getMClassFlagsMask(Flags, true);
3906     if (Mask == -1)
3907       return -1;
3908     return Mask << 2;
3909   }
3910 
3911   if (Reg != "cpsr" && Reg != "spsr") {
3912     return -1;
3913   }
3914 
3915   // This is the same as if the flags were "fc"
3916   if (Flags.empty() || Flags == "all")
3917     return Mask | 0x9;
3918 
3919   // Inspect the supplied flags string and set the bits in the mask for
3920   // the relevant and valid flags allowed for cpsr and spsr.
3921   for (char Flag : Flags) {
3922     int FlagVal;
3923     switch (Flag) {
3924       case 'c':
3925         FlagVal = 0x1;
3926         break;
3927       case 'x':
3928         FlagVal = 0x2;
3929         break;
3930       case 's':
3931         FlagVal = 0x4;
3932         break;
3933       case 'f':
3934         FlagVal = 0x8;
3935         break;
3936       default:
3937         FlagVal = 0;
3938     }
3939 
3940     // This avoids allowing strings where the same flag bit appears twice.
3941     if (!FlagVal || (Mask & FlagVal))
3942       return -1;
3943     Mask |= FlagVal;
3944   }
3945 
3946   // If the register is spsr then we need to set the R bit.
3947   if (Reg == "spsr")
3948     Mask |= 0x10;
3949 
3950   return Mask;
3951 }
3952 
3953 // Lower the read_register intrinsic to ARM specific DAG nodes
3954 // using the supplied metadata string to select the instruction node to use
3955 // and the registers/masks to construct as operands for the node.
tryReadRegister(SDNode * N)3956 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3957   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3958   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3959   bool IsThumb2 = Subtarget->isThumb2();
3960   SDLoc DL(N);
3961 
3962   std::vector<SDValue> Ops;
3963   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3964 
3965   if (!Ops.empty()) {
3966     // If the special register string was constructed of fields (as defined
3967     // in the ACLE) then need to lower to MRC node (32 bit) or
3968     // MRRC node(64 bit), we can make the distinction based on the number of
3969     // operands we have.
3970     unsigned Opcode;
3971     SmallVector<EVT, 3> ResTypes;
3972     if (Ops.size() == 5){
3973       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3974       ResTypes.append({ MVT::i32, MVT::Other });
3975     } else {
3976       assert(Ops.size() == 3 &&
3977               "Invalid number of fields in special register string.");
3978       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3979       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3980     }
3981 
3982     Ops.push_back(getAL(CurDAG, DL));
3983     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3984     Ops.push_back(N->getOperand(0));
3985     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3986     return true;
3987   }
3988 
3989   std::string SpecialReg = RegString->getString().lower();
3990 
3991   int BankedReg = getBankedRegisterMask(SpecialReg);
3992   if (BankedReg != -1) {
3993     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3994             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3995             N->getOperand(0) };
3996     ReplaceNode(
3997         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3998                                   DL, MVT::i32, MVT::Other, Ops));
3999     return true;
4000   }
4001 
4002   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4003   // corresponding to the register that is being read from. So we switch on the
4004   // string to find which opcode we need to use.
4005   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4006                     .Case("fpscr", ARM::VMRS)
4007                     .Case("fpexc", ARM::VMRS_FPEXC)
4008                     .Case("fpsid", ARM::VMRS_FPSID)
4009                     .Case("mvfr0", ARM::VMRS_MVFR0)
4010                     .Case("mvfr1", ARM::VMRS_MVFR1)
4011                     .Case("mvfr2", ARM::VMRS_MVFR2)
4012                     .Case("fpinst", ARM::VMRS_FPINST)
4013                     .Case("fpinst2", ARM::VMRS_FPINST2)
4014                     .Default(0);
4015 
4016   // If an opcode was found then we can lower the read to a VFP instruction.
4017   if (Opcode) {
4018     if (!Subtarget->hasVFP2())
4019       return false;
4020     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4021       return false;
4022 
4023     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4024             N->getOperand(0) };
4025     ReplaceNode(N,
4026                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4027     return true;
4028   }
4029 
4030   // If the target is M Class then need to validate that the register string
4031   // is an acceptable value, so check that a mask can be constructed from the
4032   // string.
4033   if (Subtarget->isMClass()) {
4034     StringRef Flags = "", Reg = SpecialReg;
4035     if (Reg.endswith("_ns")) {
4036       Flags = "ns";
4037       Reg = Reg.drop_back(3);
4038     }
4039 
4040     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4041     if (SYSmValue == -1)
4042       return false;
4043 
4044     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4045                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4046                       N->getOperand(0) };
4047     ReplaceNode(
4048         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4049     return true;
4050   }
4051 
4052   // Here we know the target is not M Class so we need to check if it is one
4053   // of the remaining possible values which are apsr, cpsr or spsr.
4054   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4055     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4056             N->getOperand(0) };
4057     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4058                                           DL, MVT::i32, MVT::Other, Ops));
4059     return true;
4060   }
4061 
4062   if (SpecialReg == "spsr") {
4063     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4064             N->getOperand(0) };
4065     ReplaceNode(
4066         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4067                                   MVT::i32, MVT::Other, Ops));
4068     return true;
4069   }
4070 
4071   return false;
4072 }
4073 
4074 // Lower the write_register intrinsic to ARM specific DAG nodes
4075 // using the supplied metadata string to select the instruction node to use
4076 // and the registers/masks to use in the nodes
tryWriteRegister(SDNode * N)4077 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4078   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4079   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4080   bool IsThumb2 = Subtarget->isThumb2();
4081   SDLoc DL(N);
4082 
4083   std::vector<SDValue> Ops;
4084   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4085 
4086   if (!Ops.empty()) {
4087     // If the special register string was constructed of fields (as defined
4088     // in the ACLE) then need to lower to MCR node (32 bit) or
4089     // MCRR node(64 bit), we can make the distinction based on the number of
4090     // operands we have.
4091     unsigned Opcode;
4092     if (Ops.size() == 5) {
4093       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4094       Ops.insert(Ops.begin()+2, N->getOperand(2));
4095     } else {
4096       assert(Ops.size() == 3 &&
4097               "Invalid number of fields in special register string.");
4098       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4099       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4100       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4101     }
4102 
4103     Ops.push_back(getAL(CurDAG, DL));
4104     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4105     Ops.push_back(N->getOperand(0));
4106 
4107     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4108     return true;
4109   }
4110 
4111   std::string SpecialReg = RegString->getString().lower();
4112   int BankedReg = getBankedRegisterMask(SpecialReg);
4113   if (BankedReg != -1) {
4114     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4115             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4116             N->getOperand(0) };
4117     ReplaceNode(
4118         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4119                                   DL, MVT::Other, Ops));
4120     return true;
4121   }
4122 
4123   // The VFP registers are written to by creating SelectionDAG nodes with
4124   // opcodes corresponding to the register that is being written. So we switch
4125   // on the string to find which opcode we need to use.
4126   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4127                     .Case("fpscr", ARM::VMSR)
4128                     .Case("fpexc", ARM::VMSR_FPEXC)
4129                     .Case("fpsid", ARM::VMSR_FPSID)
4130                     .Case("fpinst", ARM::VMSR_FPINST)
4131                     .Case("fpinst2", ARM::VMSR_FPINST2)
4132                     .Default(0);
4133 
4134   if (Opcode) {
4135     if (!Subtarget->hasVFP2())
4136       return false;
4137     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4138             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4139     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4140     return true;
4141   }
4142 
4143   std::pair<StringRef, StringRef> Fields;
4144   Fields = StringRef(SpecialReg).rsplit('_');
4145   std::string Reg = Fields.first.str();
4146   StringRef Flags = Fields.second;
4147 
4148   // If the target was M Class then need to validate the special register value
4149   // and retrieve the mask for use in the instruction node.
4150   if (Subtarget->isMClass()) {
4151     // basepri_max gets split so need to correct Reg and Flags.
4152     if (SpecialReg == "basepri_max") {
4153       Reg = SpecialReg;
4154       Flags = "";
4155     }
4156     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4157     if (SYSmValue == -1)
4158       return false;
4159 
4160     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4161                       N->getOperand(2), getAL(CurDAG, DL),
4162                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4163     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4164     return true;
4165   }
4166 
4167   // We then check to see if a valid mask can be constructed for one of the
4168   // register string values permitted for the A and R class cores. These values
4169   // are apsr, spsr and cpsr; these are also valid on older cores.
4170   int Mask = getARClassRegisterMask(Reg, Flags);
4171   if (Mask != -1) {
4172     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4173             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4174             N->getOperand(0) };
4175     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4176                                           DL, MVT::Other, Ops));
4177     return true;
4178   }
4179 
4180   return false;
4181 }
4182 
tryInlineAsm(SDNode * N)4183 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4184   std::vector<SDValue> AsmNodeOperands;
4185   unsigned Flag, Kind;
4186   bool Changed = false;
4187   unsigned NumOps = N->getNumOperands();
4188 
4189   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4190   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4191   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4192   // respectively. Since there is no constraint to explicitly specify a
4193   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4194   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4195   // them into a GPRPair.
4196 
4197   SDLoc dl(N);
4198   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4199                                    : SDValue(nullptr,0);
4200 
4201   SmallVector<bool, 8> OpChanged;
4202   // Glue node will be appended late.
4203   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4204     SDValue op = N->getOperand(i);
4205     AsmNodeOperands.push_back(op);
4206 
4207     if (i < InlineAsm::Op_FirstOperand)
4208       continue;
4209 
4210     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4211       Flag = C->getZExtValue();
4212       Kind = InlineAsm::getKind(Flag);
4213     }
4214     else
4215       continue;
4216 
4217     // Immediate operands to inline asm in the SelectionDAG are modeled with
4218     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4219     // the second is a constant with the value of the immediate. If we get here
4220     // and we have a Kind_Imm, skip the next operand, and continue.
4221     if (Kind == InlineAsm::Kind_Imm) {
4222       SDValue op = N->getOperand(++i);
4223       AsmNodeOperands.push_back(op);
4224       continue;
4225     }
4226 
4227     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4228     if (NumRegs)
4229       OpChanged.push_back(false);
4230 
4231     unsigned DefIdx = 0;
4232     bool IsTiedToChangedOp = false;
4233     // If it's a use that is tied with a previous def, it has no
4234     // reg class constraint.
4235     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4236       IsTiedToChangedOp = OpChanged[DefIdx];
4237 
4238     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4239         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4240       continue;
4241 
4242     unsigned RC;
4243     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4244     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4245         || NumRegs != 2)
4246       continue;
4247 
4248     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4249     SDValue V0 = N->getOperand(i+1);
4250     SDValue V1 = N->getOperand(i+2);
4251     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4252     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4253     SDValue PairedReg;
4254     MachineRegisterInfo &MRI = MF->getRegInfo();
4255 
4256     if (Kind == InlineAsm::Kind_RegDef ||
4257         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4258       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4259       // the original GPRs.
4260 
4261       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4262       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4263       SDValue Chain = SDValue(N,0);
4264 
4265       SDNode *GU = N->getGluedUser();
4266       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4267                                                Chain.getValue(1));
4268 
4269       // Extract values from a GPRPair reg and copy to the original GPR reg.
4270       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4271                                                     RegCopy);
4272       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4273                                                     RegCopy);
4274       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4275                                         RegCopy.getValue(1));
4276       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4277 
4278       // Update the original glue user.
4279       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4280       Ops.push_back(T1.getValue(1));
4281       CurDAG->UpdateNodeOperands(GU, Ops);
4282     }
4283     else {
4284       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4285       // GPRPair and then pass the GPRPair to the inline asm.
4286       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4287 
4288       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4289       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4290                                           Chain.getValue(1));
4291       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4292                                           T0.getValue(1));
4293       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4294 
4295       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4296       // i32 VRs of inline asm with it.
4297       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4298       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4299       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4300 
4301       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4302       Glue = Chain.getValue(1);
4303     }
4304 
4305     Changed = true;
4306 
4307     if(PairedReg.getNode()) {
4308       OpChanged[OpChanged.size() -1 ] = true;
4309       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4310       if (IsTiedToChangedOp)
4311         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4312       else
4313         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4314       // Replace the current flag.
4315       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4316           Flag, dl, MVT::i32);
4317       // Add the new register node and skip the original two GPRs.
4318       AsmNodeOperands.push_back(PairedReg);
4319       // Skip the next two GPRs.
4320       i += 2;
4321     }
4322   }
4323 
4324   if (Glue.getNode())
4325     AsmNodeOperands.push_back(Glue);
4326   if (!Changed)
4327     return false;
4328 
4329   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4330       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4331   New->setNodeId(-1);
4332   ReplaceNode(N, New.getNode());
4333   return true;
4334 }
4335 
4336 
4337 bool ARMDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)4338 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4339                              std::vector<SDValue> &OutOps) {
4340   switch(ConstraintID) {
4341   default:
4342     llvm_unreachable("Unexpected asm memory constraint");
4343   case InlineAsm::Constraint_i:
4344     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4345     //        be an immediate and not a memory constraint.
4346     // Fallthrough.
4347   case InlineAsm::Constraint_m:
4348   case InlineAsm::Constraint_o:
4349   case InlineAsm::Constraint_Q:
4350   case InlineAsm::Constraint_Um:
4351   case InlineAsm::Constraint_Un:
4352   case InlineAsm::Constraint_Uq:
4353   case InlineAsm::Constraint_Us:
4354   case InlineAsm::Constraint_Ut:
4355   case InlineAsm::Constraint_Uv:
4356   case InlineAsm::Constraint_Uy:
4357     // Require the address to be in a register.  That is safe for all ARM
4358     // variants and it is hard to do anything much smarter without knowing
4359     // how the operand is used.
4360     OutOps.push_back(Op);
4361     return false;
4362   }
4363   return true;
4364 }
4365 
4366 /// createARMISelDag - This pass converts a legalized DAG into a
4367 /// ARM-specific DAG, ready for instruction scheduling.
4368 ///
createARMISelDag(ARMBaseTargetMachine & TM,CodeGenOpt::Level OptLevel)4369 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4370                                      CodeGenOpt::Level OptLevel) {
4371   return new ARMDAGToDAGISel(TM, OptLevel);
4372 }
4373