1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the ARM target.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "ARM.h"
15 #include "ARMBaseInstrInfo.h"
16 #include "ARMTargetMachine.h"
17 #include "MCTargetDesc/ARMAddressingModes.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/IR/CallingConv.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Target/TargetLowering.h"
35 #include "llvm/Target/TargetOptions.h"
36
37 using namespace llvm;
38
39 #define DEBUG_TYPE "arm-isel"
40
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43 cl::desc("Disable isel of shifter-op"),
44 cl::init(false));
45
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
51
52 enum AddrMode2Type {
53 AM2_BASE, // Simple AM2 (+-imm12)
54 AM2_SHOP // Shifter-op AM2
55 };
56
57 class ARMDAGToDAGISel : public SelectionDAGISel {
58 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
59 /// make the right decision when generating code for different targets.
60 const ARMSubtarget *Subtarget;
61
62 public:
ARMDAGToDAGISel(ARMBaseTargetMachine & tm,CodeGenOpt::Level OptLevel)63 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
64 : SelectionDAGISel(tm, OptLevel) {}
65
runOnMachineFunction(MachineFunction & MF)66 bool runOnMachineFunction(MachineFunction &MF) override {
67 // Reset the subtarget each time through.
68 Subtarget = &MF.getSubtarget<ARMSubtarget>();
69 SelectionDAGISel::runOnMachineFunction(MF);
70 return true;
71 }
72
getPassName() const73 const char *getPassName() const override {
74 return "ARM Instruction Selection";
75 }
76
77 void PreprocessISelDAG() override;
78
79 /// getI32Imm - Return a target constant of type i32 with the specified
80 /// value.
getI32Imm(unsigned Imm,const SDLoc & dl)81 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
82 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
83 }
84
85 void Select(SDNode *N) override;
86
87 bool hasNoVMLxHazardUse(SDNode *N) const;
88 bool isShifterOpProfitable(const SDValue &Shift,
89 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
90 bool SelectRegShifterOperand(SDValue N, SDValue &A,
91 SDValue &B, SDValue &C,
92 bool CheckProfitability = true);
93 bool SelectImmShifterOperand(SDValue N, SDValue &A,
94 SDValue &B, bool CheckProfitability = true);
SelectShiftRegShifterOperand(SDValue N,SDValue & A,SDValue & B,SDValue & C)95 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
96 SDValue &B, SDValue &C) {
97 // Don't apply the profitability check
98 return SelectRegShifterOperand(N, A, B, C, false);
99 }
SelectShiftImmShifterOperand(SDValue N,SDValue & A,SDValue & B)100 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
101 SDValue &B) {
102 // Don't apply the profitability check
103 return SelectImmShifterOperand(N, A, B, false);
104 }
105
106 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
107 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
108
109 AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
110 SDValue &Offset, SDValue &Opc);
SelectAddrMode2Base(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)111 bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
112 SDValue &Opc) {
113 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
114 }
115
SelectAddrMode2ShOp(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)116 bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
117 SDValue &Opc) {
118 return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
119 }
120
SelectAddrMode2(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)121 bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
122 SDValue &Opc) {
123 SelectAddrMode2Worker(N, Base, Offset, Opc);
124 // return SelectAddrMode2ShOp(N, Base, Offset, Opc);
125 // This always matches one way or another.
126 return true;
127 }
128
SelectCMOVPred(SDValue N,SDValue & Pred,SDValue & Reg)129 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
130 const ConstantSDNode *CN = cast<ConstantSDNode>(N);
131 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
132 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
133 return true;
134 }
135
136 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
137 SDValue &Offset, SDValue &Opc);
138 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
139 SDValue &Offset, SDValue &Opc);
140 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
141 SDValue &Offset, SDValue &Opc);
142 bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
143 bool SelectAddrMode3(SDValue N, SDValue &Base,
144 SDValue &Offset, SDValue &Opc);
145 bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
146 SDValue &Offset, SDValue &Opc);
147 bool SelectAddrMode5(SDValue N, SDValue &Base,
148 SDValue &Offset);
149 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
150 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
151
152 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
153
154 // Thumb Addressing Modes:
155 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
156 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
157 SDValue &OffImm);
158 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
159 SDValue &OffImm);
160 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
161 SDValue &OffImm);
162 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
163 SDValue &OffImm);
164 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
165
166 // Thumb 2 Addressing Modes:
167 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
168 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
169 SDValue &OffImm);
170 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
171 SDValue &OffImm);
172 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
173 SDValue &OffReg, SDValue &ShImm);
174 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
175
is_so_imm(unsigned Imm) const176 inline bool is_so_imm(unsigned Imm) const {
177 return ARM_AM::getSOImmVal(Imm) != -1;
178 }
179
is_so_imm_not(unsigned Imm) const180 inline bool is_so_imm_not(unsigned Imm) const {
181 return ARM_AM::getSOImmVal(~Imm) != -1;
182 }
183
is_t2_so_imm(unsigned Imm) const184 inline bool is_t2_so_imm(unsigned Imm) const {
185 return ARM_AM::getT2SOImmVal(Imm) != -1;
186 }
187
is_t2_so_imm_not(unsigned Imm) const188 inline bool is_t2_so_imm_not(unsigned Imm) const {
189 return ARM_AM::getT2SOImmVal(~Imm) != -1;
190 }
191
192 // Include the pieces autogenerated from the target description.
193 #include "ARMGenDAGISel.inc"
194
195 private:
196 /// Indexed (pre/post inc/dec) load matching code for ARM.
197 bool tryARMIndexedLoad(SDNode *N);
198 bool tryT2IndexedLoad(SDNode *N);
199
200 /// SelectVLD - Select NEON load intrinsics. NumVecs should be
201 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
202 /// loads of D registers and even subregs and odd subregs of Q registers.
203 /// For NumVecs <= 2, QOpcodes1 is not used.
204 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
205 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
206 const uint16_t *QOpcodes1);
207
208 /// SelectVST - Select NEON store intrinsics. NumVecs should
209 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
210 /// stores of D registers and even subregs and odd subregs of Q registers.
211 /// For NumVecs <= 2, QOpcodes1 is not used.
212 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
213 const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
214 const uint16_t *QOpcodes1);
215
216 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
217 /// be 2, 3 or 4. The opcode arrays specify the instructions used for
218 /// load/store of D registers and Q registers.
219 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
220 unsigned NumVecs, const uint16_t *DOpcodes,
221 const uint16_t *QOpcodes);
222
223 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
224 /// should be 2, 3 or 4. The opcode array specifies the instructions used
225 /// for loading D registers. (Q registers are not supported.)
226 void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
227 const uint16_t *Opcodes);
228
229 /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
230 /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
231 /// generated to force the table registers to be consecutive.
232 void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
233
234 /// Try to select SBFX/UBFX instructions for ARM.
235 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
236
237 // Select special operations if node forms integer ABS pattern
238 bool tryABSOp(SDNode *N);
239
240 bool tryReadRegister(SDNode *N);
241 bool tryWriteRegister(SDNode *N);
242
243 bool tryInlineAsm(SDNode *N);
244
245 void SelectConcatVector(SDNode *N);
246
247 bool trySMLAWSMULW(SDNode *N);
248
249 void SelectCMP_SWAP(SDNode *N);
250
251 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
252 /// inline asm expressions.
253 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
254 std::vector<SDValue> &OutOps) override;
255
256 // Form pairs of consecutive R, S, D, or Q registers.
257 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
258 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
259 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
260 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
261
262 // Form sequences of 4 consecutive S, D, or Q registers.
263 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
264 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
265 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
266
267 // Get the alignment operand for a NEON VLD or VST instruction.
268 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
269 bool is64BitVector);
270
271 /// Returns the number of instructions required to materialize the given
272 /// constant in a register, or 3 if a literal pool load is needed.
273 unsigned ConstantMaterializationCost(unsigned Val) const;
274
275 /// Checks if N is a multiplication by a constant where we can extract out a
276 /// power of two from the constant so that it can be used in a shift, but only
277 /// if it simplifies the materialization of the constant. Returns true if it
278 /// is, and assigns to PowerOfTwo the power of two that should be extracted
279 /// out and to NewMulConst the new constant to be multiplied by.
280 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
281 unsigned &PowerOfTwo, SDValue &NewMulConst) const;
282
283 /// Replace N with M in CurDAG, in a way that also ensures that M gets
284 /// selected when N would have been selected.
285 void replaceDAGValue(const SDValue &N, SDValue M);
286 };
287 }
288
289 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
290 /// operand. If so Imm will receive the 32-bit value.
isInt32Immediate(SDNode * N,unsigned & Imm)291 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
292 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
293 Imm = cast<ConstantSDNode>(N)->getZExtValue();
294 return true;
295 }
296 return false;
297 }
298
299 // isInt32Immediate - This method tests to see if a constant operand.
300 // If so Imm will receive the 32 bit value.
isInt32Immediate(SDValue N,unsigned & Imm)301 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
302 return isInt32Immediate(N.getNode(), Imm);
303 }
304
305 // isOpcWithIntImmediate - This method tests to see if the node is a specific
306 // opcode and that it has a immediate integer right operand.
307 // If so Imm will receive the 32 bit value.
isOpcWithIntImmediate(SDNode * N,unsigned Opc,unsigned & Imm)308 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
309 return N->getOpcode() == Opc &&
310 isInt32Immediate(N->getOperand(1).getNode(), Imm);
311 }
312
313 /// \brief Check whether a particular node is a constant value representable as
314 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
315 ///
316 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
isScaledConstantInRange(SDValue Node,int Scale,int RangeMin,int RangeMax,int & ScaledConstant)317 static bool isScaledConstantInRange(SDValue Node, int Scale,
318 int RangeMin, int RangeMax,
319 int &ScaledConstant) {
320 assert(Scale > 0 && "Invalid scale!");
321
322 // Check that this is a constant.
323 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
324 if (!C)
325 return false;
326
327 ScaledConstant = (int) C->getZExtValue();
328 if ((ScaledConstant % Scale) != 0)
329 return false;
330
331 ScaledConstant /= Scale;
332 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
333 }
334
PreprocessISelDAG()335 void ARMDAGToDAGISel::PreprocessISelDAG() {
336 if (!Subtarget->hasV6T2Ops())
337 return;
338
339 bool isThumb2 = Subtarget->isThumb();
340 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
341 E = CurDAG->allnodes_end(); I != E; ) {
342 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
343
344 if (N->getOpcode() != ISD::ADD)
345 continue;
346
347 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
348 // leading zeros, followed by consecutive set bits, followed by 1 or 2
349 // trailing zeros, e.g. 1020.
350 // Transform the expression to
351 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
352 // of trailing zeros of c2. The left shift would be folded as an shifter
353 // operand of 'add' and the 'and' and 'srl' would become a bits extraction
354 // node (UBFX).
355
356 SDValue N0 = N->getOperand(0);
357 SDValue N1 = N->getOperand(1);
358 unsigned And_imm = 0;
359 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
360 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
361 std::swap(N0, N1);
362 }
363 if (!And_imm)
364 continue;
365
366 // Check if the AND mask is an immediate of the form: 000.....1111111100
367 unsigned TZ = countTrailingZeros(And_imm);
368 if (TZ != 1 && TZ != 2)
369 // Be conservative here. Shifter operands aren't always free. e.g. On
370 // Swift, left shifter operand of 1 / 2 for free but others are not.
371 // e.g.
372 // ubfx r3, r1, #16, #8
373 // ldr.w r3, [r0, r3, lsl #2]
374 // vs.
375 // mov.w r9, #1020
376 // and.w r2, r9, r1, lsr #14
377 // ldr r2, [r0, r2]
378 continue;
379 And_imm >>= TZ;
380 if (And_imm & (And_imm + 1))
381 continue;
382
383 // Look for (and (srl X, c1), c2).
384 SDValue Srl = N1.getOperand(0);
385 unsigned Srl_imm = 0;
386 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
387 (Srl_imm <= 2))
388 continue;
389
390 // Make sure first operand is not a shifter operand which would prevent
391 // folding of the left shift.
392 SDValue CPTmp0;
393 SDValue CPTmp1;
394 SDValue CPTmp2;
395 if (isThumb2) {
396 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
397 continue;
398 } else {
399 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
400 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
401 continue;
402 }
403
404 // Now make the transformation.
405 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
406 Srl.getOperand(0),
407 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
408 MVT::i32));
409 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
410 Srl,
411 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
412 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
413 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
414 CurDAG->UpdateNodeOperands(N, N0, N1);
415 }
416 }
417
418 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
419 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
420 /// least on current ARM implementations) which should be avoidded.
hasNoVMLxHazardUse(SDNode * N) const421 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
422 if (OptLevel == CodeGenOpt::None)
423 return true;
424
425 if (!Subtarget->hasVMLxHazards())
426 return true;
427
428 if (!N->hasOneUse())
429 return false;
430
431 SDNode *Use = *N->use_begin();
432 if (Use->getOpcode() == ISD::CopyToReg)
433 return true;
434 if (Use->isMachineOpcode()) {
435 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
436 CurDAG->getSubtarget().getInstrInfo());
437
438 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
439 if (MCID.mayStore())
440 return true;
441 unsigned Opcode = MCID.getOpcode();
442 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
443 return true;
444 // vmlx feeding into another vmlx. We actually want to unfold
445 // the use later in the MLxExpansion pass. e.g.
446 // vmla
447 // vmla (stall 8 cycles)
448 //
449 // vmul (5 cycles)
450 // vadd (5 cycles)
451 // vmla
452 // This adds up to about 18 - 19 cycles.
453 //
454 // vmla
455 // vmul (stall 4 cycles)
456 // vadd adds up to about 14 cycles.
457 return TII->isFpMLxInstruction(Opcode);
458 }
459
460 return false;
461 }
462
isShifterOpProfitable(const SDValue & Shift,ARM_AM::ShiftOpc ShOpcVal,unsigned ShAmt)463 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
464 ARM_AM::ShiftOpc ShOpcVal,
465 unsigned ShAmt) {
466 if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
467 return true;
468 if (Shift.hasOneUse())
469 return true;
470 // R << 2 is free.
471 return ShOpcVal == ARM_AM::lsl &&
472 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
473 }
474
ConstantMaterializationCost(unsigned Val) const475 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
476 if (Subtarget->isThumb()) {
477 if (Val <= 255) return 1; // MOV
478 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
479 if (Val <= 510) return 2; // MOV + ADDi8
480 if (~Val <= 255) return 2; // MOV + MVN
481 if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
482 } else {
483 if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
484 if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
485 if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
486 if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
487 }
488 if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
489 return 3; // Literal pool load
490 }
491
canExtractShiftFromMul(const SDValue & N,unsigned MaxShift,unsigned & PowerOfTwo,SDValue & NewMulConst) const492 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
493 unsigned MaxShift,
494 unsigned &PowerOfTwo,
495 SDValue &NewMulConst) const {
496 assert(N.getOpcode() == ISD::MUL);
497 assert(MaxShift > 0);
498
499 // If the multiply is used in more than one place then changing the constant
500 // will make other uses incorrect, so don't.
501 if (!N.hasOneUse()) return false;
502 // Check if the multiply is by a constant
503 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
504 if (!MulConst) return false;
505 // If the constant is used in more than one place then modifying it will mean
506 // we need to materialize two constants instead of one, which is a bad idea.
507 if (!MulConst->hasOneUse()) return false;
508 unsigned MulConstVal = MulConst->getZExtValue();
509 if (MulConstVal == 0) return false;
510
511 // Find the largest power of 2 that MulConstVal is a multiple of
512 PowerOfTwo = MaxShift;
513 while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
514 --PowerOfTwo;
515 if (PowerOfTwo == 0) return false;
516 }
517
518 // Only optimise if the new cost is better
519 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
520 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
521 unsigned OldCost = ConstantMaterializationCost(MulConstVal);
522 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
523 return NewCost < OldCost;
524 }
525
replaceDAGValue(const SDValue & N,SDValue M)526 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
527 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
528 CurDAG->ReplaceAllUsesWith(N, M);
529 }
530
SelectImmShifterOperand(SDValue N,SDValue & BaseReg,SDValue & Opc,bool CheckProfitability)531 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
532 SDValue &BaseReg,
533 SDValue &Opc,
534 bool CheckProfitability) {
535 if (DisableShifterOp)
536 return false;
537
538 // If N is a multiply-by-constant and it's profitable to extract a shift and
539 // use it in a shifted operand do so.
540 if (N.getOpcode() == ISD::MUL) {
541 unsigned PowerOfTwo = 0;
542 SDValue NewMulConst;
543 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
544 HandleSDNode Handle(N);
545 replaceDAGValue(N.getOperand(1), NewMulConst);
546 BaseReg = Handle.getValue();
547 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
548 PowerOfTwo),
549 SDLoc(N), MVT::i32);
550 return true;
551 }
552 }
553
554 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
555
556 // Don't match base register only case. That is matched to a separate
557 // lower complexity pattern with explicit register operand.
558 if (ShOpcVal == ARM_AM::no_shift) return false;
559
560 BaseReg = N.getOperand(0);
561 unsigned ShImmVal = 0;
562 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
563 if (!RHS) return false;
564 ShImmVal = RHS->getZExtValue() & 31;
565 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
566 SDLoc(N), MVT::i32);
567 return true;
568 }
569
SelectRegShifterOperand(SDValue N,SDValue & BaseReg,SDValue & ShReg,SDValue & Opc,bool CheckProfitability)570 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
571 SDValue &BaseReg,
572 SDValue &ShReg,
573 SDValue &Opc,
574 bool CheckProfitability) {
575 if (DisableShifterOp)
576 return false;
577
578 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
579
580 // Don't match base register only case. That is matched to a separate
581 // lower complexity pattern with explicit register operand.
582 if (ShOpcVal == ARM_AM::no_shift) return false;
583
584 BaseReg = N.getOperand(0);
585 unsigned ShImmVal = 0;
586 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
587 if (RHS) return false;
588
589 ShReg = N.getOperand(1);
590 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
591 return false;
592 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
593 SDLoc(N), MVT::i32);
594 return true;
595 }
596
597
SelectAddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)598 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
599 SDValue &Base,
600 SDValue &OffImm) {
601 // Match simple R + imm12 operands.
602
603 // Base only.
604 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
605 !CurDAG->isBaseWithConstantOffset(N)) {
606 if (N.getOpcode() == ISD::FrameIndex) {
607 // Match frame index.
608 int FI = cast<FrameIndexSDNode>(N)->getIndex();
609 Base = CurDAG->getTargetFrameIndex(
610 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
611 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
612 return true;
613 }
614
615 if (N.getOpcode() == ARMISD::Wrapper &&
616 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
617 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
618 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
619 Base = N.getOperand(0);
620 } else
621 Base = N;
622 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
623 return true;
624 }
625
626 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
627 int RHSC = (int)RHS->getSExtValue();
628 if (N.getOpcode() == ISD::SUB)
629 RHSC = -RHSC;
630
631 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
632 Base = N.getOperand(0);
633 if (Base.getOpcode() == ISD::FrameIndex) {
634 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
635 Base = CurDAG->getTargetFrameIndex(
636 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
637 }
638 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
639 return true;
640 }
641 }
642
643 // Base only.
644 Base = N;
645 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
646 return true;
647 }
648
649
650
SelectLdStSOReg(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)651 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
652 SDValue &Opc) {
653 if (N.getOpcode() == ISD::MUL &&
654 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
655 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
656 // X * [3,5,9] -> X + X * [2,4,8] etc.
657 int RHSC = (int)RHS->getZExtValue();
658 if (RHSC & 1) {
659 RHSC = RHSC & ~1;
660 ARM_AM::AddrOpc AddSub = ARM_AM::add;
661 if (RHSC < 0) {
662 AddSub = ARM_AM::sub;
663 RHSC = - RHSC;
664 }
665 if (isPowerOf2_32(RHSC)) {
666 unsigned ShAmt = Log2_32(RHSC);
667 Base = Offset = N.getOperand(0);
668 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
669 ARM_AM::lsl),
670 SDLoc(N), MVT::i32);
671 return true;
672 }
673 }
674 }
675 }
676
677 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
678 // ISD::OR that is equivalent to an ISD::ADD.
679 !CurDAG->isBaseWithConstantOffset(N))
680 return false;
681
682 // Leave simple R +/- imm12 operands for LDRi12
683 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
684 int RHSC;
685 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
686 -0x1000+1, 0x1000, RHSC)) // 12 bits.
687 return false;
688 }
689
690 // Otherwise this is R +/- [possibly shifted] R.
691 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
692 ARM_AM::ShiftOpc ShOpcVal =
693 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
694 unsigned ShAmt = 0;
695
696 Base = N.getOperand(0);
697 Offset = N.getOperand(1);
698
699 if (ShOpcVal != ARM_AM::no_shift) {
700 // Check to see if the RHS of the shift is a constant, if not, we can't fold
701 // it.
702 if (ConstantSDNode *Sh =
703 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
704 ShAmt = Sh->getZExtValue();
705 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
706 Offset = N.getOperand(1).getOperand(0);
707 else {
708 ShAmt = 0;
709 ShOpcVal = ARM_AM::no_shift;
710 }
711 } else {
712 ShOpcVal = ARM_AM::no_shift;
713 }
714 }
715
716 // Try matching (R shl C) + (R).
717 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
718 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
719 N.getOperand(0).hasOneUse())) {
720 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
721 if (ShOpcVal != ARM_AM::no_shift) {
722 // Check to see if the RHS of the shift is a constant, if not, we can't
723 // fold it.
724 if (ConstantSDNode *Sh =
725 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
726 ShAmt = Sh->getZExtValue();
727 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
728 Offset = N.getOperand(0).getOperand(0);
729 Base = N.getOperand(1);
730 } else {
731 ShAmt = 0;
732 ShOpcVal = ARM_AM::no_shift;
733 }
734 } else {
735 ShOpcVal = ARM_AM::no_shift;
736 }
737 }
738 }
739
740 // If Offset is a multiply-by-constant and it's profitable to extract a shift
741 // and use it in a shifted operand do so.
742 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
743 unsigned PowerOfTwo = 0;
744 SDValue NewMulConst;
745 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
746 replaceDAGValue(Offset.getOperand(1), NewMulConst);
747 ShAmt = PowerOfTwo;
748 ShOpcVal = ARM_AM::lsl;
749 }
750 }
751
752 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
753 SDLoc(N), MVT::i32);
754 return true;
755 }
756
757
758 //-----
759
SelectAddrMode2Worker(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)760 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
761 SDValue &Base,
762 SDValue &Offset,
763 SDValue &Opc) {
764 if (N.getOpcode() == ISD::MUL &&
765 (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
766 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
767 // X * [3,5,9] -> X + X * [2,4,8] etc.
768 int RHSC = (int)RHS->getZExtValue();
769 if (RHSC & 1) {
770 RHSC = RHSC & ~1;
771 ARM_AM::AddrOpc AddSub = ARM_AM::add;
772 if (RHSC < 0) {
773 AddSub = ARM_AM::sub;
774 RHSC = - RHSC;
775 }
776 if (isPowerOf2_32(RHSC)) {
777 unsigned ShAmt = Log2_32(RHSC);
778 Base = Offset = N.getOperand(0);
779 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
780 ARM_AM::lsl),
781 SDLoc(N), MVT::i32);
782 return AM2_SHOP;
783 }
784 }
785 }
786 }
787
788 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
789 // ISD::OR that is equivalent to an ADD.
790 !CurDAG->isBaseWithConstantOffset(N)) {
791 Base = N;
792 if (N.getOpcode() == ISD::FrameIndex) {
793 int FI = cast<FrameIndexSDNode>(N)->getIndex();
794 Base = CurDAG->getTargetFrameIndex(
795 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
796 } else if (N.getOpcode() == ARMISD::Wrapper &&
797 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
798 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
799 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
800 Base = N.getOperand(0);
801 }
802 Offset = CurDAG->getRegister(0, MVT::i32);
803 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
804 ARM_AM::no_shift),
805 SDLoc(N), MVT::i32);
806 return AM2_BASE;
807 }
808
809 // Match simple R +/- imm12 operands.
810 if (N.getOpcode() != ISD::SUB) {
811 int RHSC;
812 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
813 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
814 Base = N.getOperand(0);
815 if (Base.getOpcode() == ISD::FrameIndex) {
816 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
817 Base = CurDAG->getTargetFrameIndex(
818 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
819 }
820 Offset = CurDAG->getRegister(0, MVT::i32);
821
822 ARM_AM::AddrOpc AddSub = ARM_AM::add;
823 if (RHSC < 0) {
824 AddSub = ARM_AM::sub;
825 RHSC = - RHSC;
826 }
827 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
828 ARM_AM::no_shift),
829 SDLoc(N), MVT::i32);
830 return AM2_BASE;
831 }
832 }
833
834 if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
835 // Compute R +/- (R << N) and reuse it.
836 Base = N;
837 Offset = CurDAG->getRegister(0, MVT::i32);
838 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
839 ARM_AM::no_shift),
840 SDLoc(N), MVT::i32);
841 return AM2_BASE;
842 }
843
844 // Otherwise this is R +/- [possibly shifted] R.
845 ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
846 ARM_AM::ShiftOpc ShOpcVal =
847 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
848 unsigned ShAmt = 0;
849
850 Base = N.getOperand(0);
851 Offset = N.getOperand(1);
852
853 if (ShOpcVal != ARM_AM::no_shift) {
854 // Check to see if the RHS of the shift is a constant, if not, we can't fold
855 // it.
856 if (ConstantSDNode *Sh =
857 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
858 ShAmt = Sh->getZExtValue();
859 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
860 Offset = N.getOperand(1).getOperand(0);
861 else {
862 ShAmt = 0;
863 ShOpcVal = ARM_AM::no_shift;
864 }
865 } else {
866 ShOpcVal = ARM_AM::no_shift;
867 }
868 }
869
870 // Try matching (R shl C) + (R).
871 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
872 !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
873 N.getOperand(0).hasOneUse())) {
874 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
875 if (ShOpcVal != ARM_AM::no_shift) {
876 // Check to see if the RHS of the shift is a constant, if not, we can't
877 // fold it.
878 if (ConstantSDNode *Sh =
879 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
880 ShAmt = Sh->getZExtValue();
881 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
882 Offset = N.getOperand(0).getOperand(0);
883 Base = N.getOperand(1);
884 } else {
885 ShAmt = 0;
886 ShOpcVal = ARM_AM::no_shift;
887 }
888 } else {
889 ShOpcVal = ARM_AM::no_shift;
890 }
891 }
892 }
893
894 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
895 SDLoc(N), MVT::i32);
896 return AM2_SHOP;
897 }
898
SelectAddrMode2OffsetReg(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)899 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
900 SDValue &Offset, SDValue &Opc) {
901 unsigned Opcode = Op->getOpcode();
902 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
903 ? cast<LoadSDNode>(Op)->getAddressingMode()
904 : cast<StoreSDNode>(Op)->getAddressingMode();
905 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
906 ? ARM_AM::add : ARM_AM::sub;
907 int Val;
908 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
909 return false;
910
911 Offset = N;
912 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
913 unsigned ShAmt = 0;
914 if (ShOpcVal != ARM_AM::no_shift) {
915 // Check to see if the RHS of the shift is a constant, if not, we can't fold
916 // it.
917 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
918 ShAmt = Sh->getZExtValue();
919 if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
920 Offset = N.getOperand(0);
921 else {
922 ShAmt = 0;
923 ShOpcVal = ARM_AM::no_shift;
924 }
925 } else {
926 ShOpcVal = ARM_AM::no_shift;
927 }
928 }
929
930 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
931 SDLoc(N), MVT::i32);
932 return true;
933 }
934
SelectAddrMode2OffsetImmPre(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)935 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
936 SDValue &Offset, SDValue &Opc) {
937 unsigned Opcode = Op->getOpcode();
938 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
939 ? cast<LoadSDNode>(Op)->getAddressingMode()
940 : cast<StoreSDNode>(Op)->getAddressingMode();
941 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
942 ? ARM_AM::add : ARM_AM::sub;
943 int Val;
944 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
945 if (AddSub == ARM_AM::sub) Val *= -1;
946 Offset = CurDAG->getRegister(0, MVT::i32);
947 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
948 return true;
949 }
950
951 return false;
952 }
953
954
SelectAddrMode2OffsetImm(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)955 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
956 SDValue &Offset, SDValue &Opc) {
957 unsigned Opcode = Op->getOpcode();
958 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
959 ? cast<LoadSDNode>(Op)->getAddressingMode()
960 : cast<StoreSDNode>(Op)->getAddressingMode();
961 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
962 ? ARM_AM::add : ARM_AM::sub;
963 int Val;
964 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
965 Offset = CurDAG->getRegister(0, MVT::i32);
966 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
967 ARM_AM::no_shift),
968 SDLoc(Op), MVT::i32);
969 return true;
970 }
971
972 return false;
973 }
974
SelectAddrOffsetNone(SDValue N,SDValue & Base)975 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
976 Base = N;
977 return true;
978 }
979
SelectAddrMode3(SDValue N,SDValue & Base,SDValue & Offset,SDValue & Opc)980 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
981 SDValue &Base, SDValue &Offset,
982 SDValue &Opc) {
983 if (N.getOpcode() == ISD::SUB) {
984 // X - C is canonicalize to X + -C, no need to handle it here.
985 Base = N.getOperand(0);
986 Offset = N.getOperand(1);
987 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
988 MVT::i32);
989 return true;
990 }
991
992 if (!CurDAG->isBaseWithConstantOffset(N)) {
993 Base = N;
994 if (N.getOpcode() == ISD::FrameIndex) {
995 int FI = cast<FrameIndexSDNode>(N)->getIndex();
996 Base = CurDAG->getTargetFrameIndex(
997 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
998 }
999 Offset = CurDAG->getRegister(0, MVT::i32);
1000 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1001 MVT::i32);
1002 return true;
1003 }
1004
1005 // If the RHS is +/- imm8, fold into addr mode.
1006 int RHSC;
1007 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
1008 -256 + 1, 256, RHSC)) { // 8 bits.
1009 Base = N.getOperand(0);
1010 if (Base.getOpcode() == ISD::FrameIndex) {
1011 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1012 Base = CurDAG->getTargetFrameIndex(
1013 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1014 }
1015 Offset = CurDAG->getRegister(0, MVT::i32);
1016
1017 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1018 if (RHSC < 0) {
1019 AddSub = ARM_AM::sub;
1020 RHSC = -RHSC;
1021 }
1022 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
1023 MVT::i32);
1024 return true;
1025 }
1026
1027 Base = N.getOperand(0);
1028 Offset = N.getOperand(1);
1029 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
1030 MVT::i32);
1031 return true;
1032 }
1033
SelectAddrMode3Offset(SDNode * Op,SDValue N,SDValue & Offset,SDValue & Opc)1034 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
1035 SDValue &Offset, SDValue &Opc) {
1036 unsigned Opcode = Op->getOpcode();
1037 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1038 ? cast<LoadSDNode>(Op)->getAddressingMode()
1039 : cast<StoreSDNode>(Op)->getAddressingMode();
1040 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
1041 ? ARM_AM::add : ARM_AM::sub;
1042 int Val;
1043 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
1044 Offset = CurDAG->getRegister(0, MVT::i32);
1045 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
1046 MVT::i32);
1047 return true;
1048 }
1049
1050 Offset = N;
1051 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
1052 MVT::i32);
1053 return true;
1054 }
1055
SelectAddrMode5(SDValue N,SDValue & Base,SDValue & Offset)1056 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1057 SDValue &Base, SDValue &Offset) {
1058 if (!CurDAG->isBaseWithConstantOffset(N)) {
1059 Base = N;
1060 if (N.getOpcode() == ISD::FrameIndex) {
1061 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1062 Base = CurDAG->getTargetFrameIndex(
1063 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1064 } else if (N.getOpcode() == ARMISD::Wrapper &&
1065 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1066 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1067 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1068 Base = N.getOperand(0);
1069 }
1070 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1071 SDLoc(N), MVT::i32);
1072 return true;
1073 }
1074
1075 // If the RHS is +/- imm8, fold into addr mode.
1076 int RHSC;
1077 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
1078 -256 + 1, 256, RHSC)) {
1079 Base = N.getOperand(0);
1080 if (Base.getOpcode() == ISD::FrameIndex) {
1081 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1082 Base = CurDAG->getTargetFrameIndex(
1083 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1084 }
1085
1086 ARM_AM::AddrOpc AddSub = ARM_AM::add;
1087 if (RHSC < 0) {
1088 AddSub = ARM_AM::sub;
1089 RHSC = -RHSC;
1090 }
1091 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1092 SDLoc(N), MVT::i32);
1093 return true;
1094 }
1095
1096 Base = N;
1097 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1098 SDLoc(N), MVT::i32);
1099 return true;
1100 }
1101
SelectAddrMode6(SDNode * Parent,SDValue N,SDValue & Addr,SDValue & Align)1102 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1103 SDValue &Align) {
1104 Addr = N;
1105
1106 unsigned Alignment = 0;
1107
1108 MemSDNode *MemN = cast<MemSDNode>(Parent);
1109
1110 if (isa<LSBaseSDNode>(MemN) ||
1111 ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1112 MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1113 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1114 // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1115 // The maximum alignment is equal to the memory size being referenced.
1116 unsigned MMOAlign = MemN->getAlignment();
1117 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1118 if (MMOAlign >= MemSize && MemSize > 1)
1119 Alignment = MemSize;
1120 } else {
1121 // All other uses of addrmode6 are for intrinsics. For now just record
1122 // the raw alignment value; it will be refined later based on the legal
1123 // alignment operands for the intrinsic.
1124 Alignment = MemN->getAlignment();
1125 }
1126
1127 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1128 return true;
1129 }
1130
SelectAddrMode6Offset(SDNode * Op,SDValue N,SDValue & Offset)1131 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1132 SDValue &Offset) {
1133 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1134 ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1135 if (AM != ISD::POST_INC)
1136 return false;
1137 Offset = N;
1138 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1139 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1140 Offset = CurDAG->getRegister(0, MVT::i32);
1141 }
1142 return true;
1143 }
1144
SelectAddrModePC(SDValue N,SDValue & Offset,SDValue & Label)1145 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1146 SDValue &Offset, SDValue &Label) {
1147 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1148 Offset = N.getOperand(0);
1149 SDValue N1 = N.getOperand(1);
1150 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1151 SDLoc(N), MVT::i32);
1152 return true;
1153 }
1154
1155 return false;
1156 }
1157
1158
1159 //===----------------------------------------------------------------------===//
1160 // Thumb Addressing Modes
1161 //===----------------------------------------------------------------------===//
1162
SelectThumbAddrModeRR(SDValue N,SDValue & Base,SDValue & Offset)1163 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
1164 SDValue &Base, SDValue &Offset){
1165 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1166 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1167 if (!NC || !NC->isNullValue())
1168 return false;
1169
1170 Base = Offset = N;
1171 return true;
1172 }
1173
1174 Base = N.getOperand(0);
1175 Offset = N.getOperand(1);
1176 return true;
1177 }
1178
1179 bool
SelectThumbAddrModeImm5S(SDValue N,unsigned Scale,SDValue & Base,SDValue & OffImm)1180 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1181 SDValue &Base, SDValue &OffImm) {
1182 if (!CurDAG->isBaseWithConstantOffset(N)) {
1183 if (N.getOpcode() == ISD::ADD) {
1184 return false; // We want to select register offset instead
1185 } else if (N.getOpcode() == ARMISD::Wrapper &&
1186 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1187 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1188 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1189 Base = N.getOperand(0);
1190 } else {
1191 Base = N;
1192 }
1193
1194 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1195 return true;
1196 }
1197
1198 // If the RHS is + imm5 * scale, fold into addr mode.
1199 int RHSC;
1200 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1201 Base = N.getOperand(0);
1202 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1203 return true;
1204 }
1205
1206 // Offset is too large, so use register offset instead.
1207 return false;
1208 }
1209
1210 bool
SelectThumbAddrModeImm5S4(SDValue N,SDValue & Base,SDValue & OffImm)1211 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1212 SDValue &OffImm) {
1213 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1214 }
1215
1216 bool
SelectThumbAddrModeImm5S2(SDValue N,SDValue & Base,SDValue & OffImm)1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1218 SDValue &OffImm) {
1219 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1220 }
1221
1222 bool
SelectThumbAddrModeImm5S1(SDValue N,SDValue & Base,SDValue & OffImm)1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1224 SDValue &OffImm) {
1225 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1226 }
1227
SelectThumbAddrModeSP(SDValue N,SDValue & Base,SDValue & OffImm)1228 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1229 SDValue &Base, SDValue &OffImm) {
1230 if (N.getOpcode() == ISD::FrameIndex) {
1231 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1232 // Only multiples of 4 are allowed for the offset, so the frame object
1233 // alignment must be at least 4.
1234 MachineFrameInfo *MFI = MF->getFrameInfo();
1235 if (MFI->getObjectAlignment(FI) < 4)
1236 MFI->setObjectAlignment(FI, 4);
1237 Base = CurDAG->getTargetFrameIndex(
1238 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1239 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1240 return true;
1241 }
1242
1243 if (!CurDAG->isBaseWithConstantOffset(N))
1244 return false;
1245
1246 RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
1247 if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
1248 (LHSR && LHSR->getReg() == ARM::SP)) {
1249 // If the RHS is + imm8 * scale, fold into addr mode.
1250 int RHSC;
1251 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1252 Base = N.getOperand(0);
1253 if (Base.getOpcode() == ISD::FrameIndex) {
1254 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1255 // For LHS+RHS to result in an offset that's a multiple of 4 the object
1256 // indexed by the LHS must be 4-byte aligned.
1257 MachineFrameInfo *MFI = MF->getFrameInfo();
1258 if (MFI->getObjectAlignment(FI) < 4)
1259 MFI->setObjectAlignment(FI, 4);
1260 Base = CurDAG->getTargetFrameIndex(
1261 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1262 }
1263 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1264 return true;
1265 }
1266 }
1267
1268 return false;
1269 }
1270
1271
1272 //===----------------------------------------------------------------------===//
1273 // Thumb 2 Addressing Modes
1274 //===----------------------------------------------------------------------===//
1275
1276
SelectT2AddrModeImm12(SDValue N,SDValue & Base,SDValue & OffImm)1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1278 SDValue &Base, SDValue &OffImm) {
1279 // Match simple R + imm12 operands.
1280
1281 // Base only.
1282 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1283 !CurDAG->isBaseWithConstantOffset(N)) {
1284 if (N.getOpcode() == ISD::FrameIndex) {
1285 // Match frame index.
1286 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1287 Base = CurDAG->getTargetFrameIndex(
1288 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1289 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1290 return true;
1291 }
1292
1293 if (N.getOpcode() == ARMISD::Wrapper &&
1294 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1295 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1296 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1297 Base = N.getOperand(0);
1298 if (Base.getOpcode() == ISD::TargetConstantPool)
1299 return false; // We want to select t2LDRpci instead.
1300 } else
1301 Base = N;
1302 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1303 return true;
1304 }
1305
1306 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1307 if (SelectT2AddrModeImm8(N, Base, OffImm))
1308 // Let t2LDRi8 handle (R - imm8).
1309 return false;
1310
1311 int RHSC = (int)RHS->getZExtValue();
1312 if (N.getOpcode() == ISD::SUB)
1313 RHSC = -RHSC;
1314
1315 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1316 Base = N.getOperand(0);
1317 if (Base.getOpcode() == ISD::FrameIndex) {
1318 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1319 Base = CurDAG->getTargetFrameIndex(
1320 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1321 }
1322 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1323 return true;
1324 }
1325 }
1326
1327 // Base only.
1328 Base = N;
1329 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1330 return true;
1331 }
1332
SelectT2AddrModeImm8(SDValue N,SDValue & Base,SDValue & OffImm)1333 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1334 SDValue &Base, SDValue &OffImm) {
1335 // Match simple R - imm8 operands.
1336 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1337 !CurDAG->isBaseWithConstantOffset(N))
1338 return false;
1339
1340 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1341 int RHSC = (int)RHS->getSExtValue();
1342 if (N.getOpcode() == ISD::SUB)
1343 RHSC = -RHSC;
1344
1345 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1346 Base = N.getOperand(0);
1347 if (Base.getOpcode() == ISD::FrameIndex) {
1348 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1349 Base = CurDAG->getTargetFrameIndex(
1350 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1351 }
1352 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1353 return true;
1354 }
1355 }
1356
1357 return false;
1358 }
1359
SelectT2AddrModeImm8Offset(SDNode * Op,SDValue N,SDValue & OffImm)1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1361 SDValue &OffImm){
1362 unsigned Opcode = Op->getOpcode();
1363 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1364 ? cast<LoadSDNode>(Op)->getAddressingMode()
1365 : cast<StoreSDNode>(Op)->getAddressingMode();
1366 int RHSC;
1367 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1368 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1369 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1370 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1371 return true;
1372 }
1373
1374 return false;
1375 }
1376
SelectT2AddrModeSoReg(SDValue N,SDValue & Base,SDValue & OffReg,SDValue & ShImm)1377 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1378 SDValue &Base,
1379 SDValue &OffReg, SDValue &ShImm) {
1380 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1381 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1382 return false;
1383
1384 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1385 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1386 int RHSC = (int)RHS->getZExtValue();
1387 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1388 return false;
1389 else if (RHSC < 0 && RHSC >= -255) // 8 bits
1390 return false;
1391 }
1392
1393 // Look for (R + R) or (R + (R << [1,2,3])).
1394 unsigned ShAmt = 0;
1395 Base = N.getOperand(0);
1396 OffReg = N.getOperand(1);
1397
1398 // Swap if it is ((R << c) + R).
1399 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1400 if (ShOpcVal != ARM_AM::lsl) {
1401 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1402 if (ShOpcVal == ARM_AM::lsl)
1403 std::swap(Base, OffReg);
1404 }
1405
1406 if (ShOpcVal == ARM_AM::lsl) {
1407 // Check to see if the RHS of the shift is a constant, if not, we can't fold
1408 // it.
1409 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1410 ShAmt = Sh->getZExtValue();
1411 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1412 OffReg = OffReg.getOperand(0);
1413 else {
1414 ShAmt = 0;
1415 }
1416 }
1417 }
1418
1419 // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1420 // and use it in a shifted operand do so.
1421 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1422 unsigned PowerOfTwo = 0;
1423 SDValue NewMulConst;
1424 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1425 replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1426 ShAmt = PowerOfTwo;
1427 }
1428 }
1429
1430 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1431
1432 return true;
1433 }
1434
SelectT2AddrModeExclusive(SDValue N,SDValue & Base,SDValue & OffImm)1435 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1436 SDValue &OffImm) {
1437 // This *must* succeed since it's used for the irreplaceable ldrex and strex
1438 // instructions.
1439 Base = N;
1440 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1441
1442 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1443 return true;
1444
1445 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1446 if (!RHS)
1447 return true;
1448
1449 uint32_t RHSC = (int)RHS->getZExtValue();
1450 if (RHSC > 1020 || RHSC % 4 != 0)
1451 return true;
1452
1453 Base = N.getOperand(0);
1454 if (Base.getOpcode() == ISD::FrameIndex) {
1455 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1456 Base = CurDAG->getTargetFrameIndex(
1457 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1458 }
1459
1460 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1461 return true;
1462 }
1463
1464 //===--------------------------------------------------------------------===//
1465
1466 /// getAL - Returns a ARMCC::AL immediate node.
getAL(SelectionDAG * CurDAG,const SDLoc & dl)1467 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1468 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1469 }
1470
tryARMIndexedLoad(SDNode * N)1471 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1472 LoadSDNode *LD = cast<LoadSDNode>(N);
1473 ISD::MemIndexedMode AM = LD->getAddressingMode();
1474 if (AM == ISD::UNINDEXED)
1475 return false;
1476
1477 EVT LoadedVT = LD->getMemoryVT();
1478 SDValue Offset, AMOpc;
1479 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1480 unsigned Opcode = 0;
1481 bool Match = false;
1482 if (LoadedVT == MVT::i32 && isPre &&
1483 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1484 Opcode = ARM::LDR_PRE_IMM;
1485 Match = true;
1486 } else if (LoadedVT == MVT::i32 && !isPre &&
1487 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1488 Opcode = ARM::LDR_POST_IMM;
1489 Match = true;
1490 } else if (LoadedVT == MVT::i32 &&
1491 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1492 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1493 Match = true;
1494
1495 } else if (LoadedVT == MVT::i16 &&
1496 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1497 Match = true;
1498 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1499 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1500 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1501 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1502 if (LD->getExtensionType() == ISD::SEXTLOAD) {
1503 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1504 Match = true;
1505 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1506 }
1507 } else {
1508 if (isPre &&
1509 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1510 Match = true;
1511 Opcode = ARM::LDRB_PRE_IMM;
1512 } else if (!isPre &&
1513 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1514 Match = true;
1515 Opcode = ARM::LDRB_POST_IMM;
1516 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1517 Match = true;
1518 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1519 }
1520 }
1521 }
1522
1523 if (Match) {
1524 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1525 SDValue Chain = LD->getChain();
1526 SDValue Base = LD->getBasePtr();
1527 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1528 CurDAG->getRegister(0, MVT::i32), Chain };
1529 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1530 MVT::i32, MVT::Other, Ops));
1531 return true;
1532 } else {
1533 SDValue Chain = LD->getChain();
1534 SDValue Base = LD->getBasePtr();
1535 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1536 CurDAG->getRegister(0, MVT::i32), Chain };
1537 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1538 MVT::i32, MVT::Other, Ops));
1539 return true;
1540 }
1541 }
1542
1543 return false;
1544 }
1545
tryT2IndexedLoad(SDNode * N)1546 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1547 LoadSDNode *LD = cast<LoadSDNode>(N);
1548 ISD::MemIndexedMode AM = LD->getAddressingMode();
1549 if (AM == ISD::UNINDEXED)
1550 return false;
1551
1552 EVT LoadedVT = LD->getMemoryVT();
1553 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1554 SDValue Offset;
1555 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1556 unsigned Opcode = 0;
1557 bool Match = false;
1558 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1559 switch (LoadedVT.getSimpleVT().SimpleTy) {
1560 case MVT::i32:
1561 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1562 break;
1563 case MVT::i16:
1564 if (isSExtLd)
1565 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1566 else
1567 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1568 break;
1569 case MVT::i8:
1570 case MVT::i1:
1571 if (isSExtLd)
1572 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1573 else
1574 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1575 break;
1576 default:
1577 return false;
1578 }
1579 Match = true;
1580 }
1581
1582 if (Match) {
1583 SDValue Chain = LD->getChain();
1584 SDValue Base = LD->getBasePtr();
1585 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1586 CurDAG->getRegister(0, MVT::i32), Chain };
1587 ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1588 MVT::Other, Ops));
1589 return true;
1590 }
1591
1592 return false;
1593 }
1594
1595 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
createGPRPairNode(EVT VT,SDValue V0,SDValue V1)1596 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1597 SDLoc dl(V0.getNode());
1598 SDValue RegClass =
1599 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1600 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1601 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1602 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1603 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1604 }
1605
1606 /// \brief Form a D register from a pair of S registers.
createSRegPairNode(EVT VT,SDValue V0,SDValue V1)1607 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1608 SDLoc dl(V0.getNode());
1609 SDValue RegClass =
1610 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1611 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1612 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1613 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1614 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1615 }
1616
1617 /// \brief Form a quad register from a pair of D registers.
createDRegPairNode(EVT VT,SDValue V0,SDValue V1)1618 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1619 SDLoc dl(V0.getNode());
1620 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1621 MVT::i32);
1622 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1623 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1624 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1625 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1626 }
1627
1628 /// \brief Form 4 consecutive D registers from a pair of Q registers.
createQRegPairNode(EVT VT,SDValue V0,SDValue V1)1629 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1630 SDLoc dl(V0.getNode());
1631 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1632 MVT::i32);
1633 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1634 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1635 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1636 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1637 }
1638
1639 /// \brief Form 4 consecutive S registers.
createQuadSRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1640 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1641 SDValue V2, SDValue V3) {
1642 SDLoc dl(V0.getNode());
1643 SDValue RegClass =
1644 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1645 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1646 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1647 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1648 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1649 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1650 V2, SubReg2, V3, SubReg3 };
1651 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1652 }
1653
1654 /// \brief Form 4 consecutive D registers.
createQuadDRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1655 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1656 SDValue V2, SDValue V3) {
1657 SDLoc dl(V0.getNode());
1658 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1659 MVT::i32);
1660 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1661 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1662 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1663 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1664 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1665 V2, SubReg2, V3, SubReg3 };
1666 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1667 }
1668
1669 /// \brief Form 4 consecutive Q registers.
createQuadQRegsNode(EVT VT,SDValue V0,SDValue V1,SDValue V2,SDValue V3)1670 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1671 SDValue V2, SDValue V3) {
1672 SDLoc dl(V0.getNode());
1673 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1674 MVT::i32);
1675 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1676 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1677 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1678 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1679 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1680 V2, SubReg2, V3, SubReg3 };
1681 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1682 }
1683
1684 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1685 /// of a NEON VLD or VST instruction. The supported values depend on the
1686 /// number of registers being loaded.
GetVLDSTAlign(SDValue Align,const SDLoc & dl,unsigned NumVecs,bool is64BitVector)1687 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1688 unsigned NumVecs, bool is64BitVector) {
1689 unsigned NumRegs = NumVecs;
1690 if (!is64BitVector && NumVecs < 3)
1691 NumRegs *= 2;
1692
1693 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1694 if (Alignment >= 32 && NumRegs == 4)
1695 Alignment = 32;
1696 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1697 Alignment = 16;
1698 else if (Alignment >= 8)
1699 Alignment = 8;
1700 else
1701 Alignment = 0;
1702
1703 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1704 }
1705
isVLDfixed(unsigned Opc)1706 static bool isVLDfixed(unsigned Opc)
1707 {
1708 switch (Opc) {
1709 default: return false;
1710 case ARM::VLD1d8wb_fixed : return true;
1711 case ARM::VLD1d16wb_fixed : return true;
1712 case ARM::VLD1d64Qwb_fixed : return true;
1713 case ARM::VLD1d32wb_fixed : return true;
1714 case ARM::VLD1d64wb_fixed : return true;
1715 case ARM::VLD1d64TPseudoWB_fixed : return true;
1716 case ARM::VLD1d64QPseudoWB_fixed : return true;
1717 case ARM::VLD1q8wb_fixed : return true;
1718 case ARM::VLD1q16wb_fixed : return true;
1719 case ARM::VLD1q32wb_fixed : return true;
1720 case ARM::VLD1q64wb_fixed : return true;
1721 case ARM::VLD2d8wb_fixed : return true;
1722 case ARM::VLD2d16wb_fixed : return true;
1723 case ARM::VLD2d32wb_fixed : return true;
1724 case ARM::VLD2q8PseudoWB_fixed : return true;
1725 case ARM::VLD2q16PseudoWB_fixed : return true;
1726 case ARM::VLD2q32PseudoWB_fixed : return true;
1727 case ARM::VLD2DUPd8wb_fixed : return true;
1728 case ARM::VLD2DUPd16wb_fixed : return true;
1729 case ARM::VLD2DUPd32wb_fixed : return true;
1730 }
1731 }
1732
isVSTfixed(unsigned Opc)1733 static bool isVSTfixed(unsigned Opc)
1734 {
1735 switch (Opc) {
1736 default: return false;
1737 case ARM::VST1d8wb_fixed : return true;
1738 case ARM::VST1d16wb_fixed : return true;
1739 case ARM::VST1d32wb_fixed : return true;
1740 case ARM::VST1d64wb_fixed : return true;
1741 case ARM::VST1q8wb_fixed : return true;
1742 case ARM::VST1q16wb_fixed : return true;
1743 case ARM::VST1q32wb_fixed : return true;
1744 case ARM::VST1q64wb_fixed : return true;
1745 case ARM::VST1d64TPseudoWB_fixed : return true;
1746 case ARM::VST1d64QPseudoWB_fixed : return true;
1747 case ARM::VST2d8wb_fixed : return true;
1748 case ARM::VST2d16wb_fixed : return true;
1749 case ARM::VST2d32wb_fixed : return true;
1750 case ARM::VST2q8PseudoWB_fixed : return true;
1751 case ARM::VST2q16PseudoWB_fixed : return true;
1752 case ARM::VST2q32PseudoWB_fixed : return true;
1753 }
1754 }
1755
1756 // Get the register stride update opcode of a VLD/VST instruction that
1757 // is otherwise equivalent to the given fixed stride updating instruction.
getVLDSTRegisterUpdateOpcode(unsigned Opc)1758 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1759 assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1760 && "Incorrect fixed stride updating instruction.");
1761 switch (Opc) {
1762 default: break;
1763 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1764 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1765 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1766 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1767 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1768 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1769 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1770 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1771 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1772 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1773 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1774 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1775
1776 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1777 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1778 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1779 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1780 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1781 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1782 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1783 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1784 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1785 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1786
1787 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1788 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1789 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1790 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1791 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1792 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1793
1794 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1795 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1796 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1797 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1798 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1799 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1800
1801 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1802 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1803 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1804 }
1805 return Opc; // If not one we handle, return it unchanged.
1806 }
1807
SelectVLD(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)1808 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1809 const uint16_t *DOpcodes,
1810 const uint16_t *QOpcodes0,
1811 const uint16_t *QOpcodes1) {
1812 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1813 SDLoc dl(N);
1814
1815 SDValue MemAddr, Align;
1816 unsigned AddrOpIdx = isUpdating ? 1 : 2;
1817 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1818 return;
1819
1820 SDValue Chain = N->getOperand(0);
1821 EVT VT = N->getValueType(0);
1822 bool is64BitVector = VT.is64BitVector();
1823 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1824
1825 unsigned OpcodeIndex;
1826 switch (VT.getSimpleVT().SimpleTy) {
1827 default: llvm_unreachable("unhandled vld type");
1828 // Double-register operations:
1829 case MVT::v8i8: OpcodeIndex = 0; break;
1830 case MVT::v4i16: OpcodeIndex = 1; break;
1831 case MVT::v2f32:
1832 case MVT::v2i32: OpcodeIndex = 2; break;
1833 case MVT::v1i64: OpcodeIndex = 3; break;
1834 // Quad-register operations:
1835 case MVT::v16i8: OpcodeIndex = 0; break;
1836 case MVT::v8i16: OpcodeIndex = 1; break;
1837 case MVT::v4f32:
1838 case MVT::v4i32: OpcodeIndex = 2; break;
1839 case MVT::v2f64:
1840 case MVT::v2i64: OpcodeIndex = 3;
1841 assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
1842 break;
1843 }
1844
1845 EVT ResTy;
1846 if (NumVecs == 1)
1847 ResTy = VT;
1848 else {
1849 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1850 if (!is64BitVector)
1851 ResTyElts *= 2;
1852 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1853 }
1854 std::vector<EVT> ResTys;
1855 ResTys.push_back(ResTy);
1856 if (isUpdating)
1857 ResTys.push_back(MVT::i32);
1858 ResTys.push_back(MVT::Other);
1859
1860 SDValue Pred = getAL(CurDAG, dl);
1861 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1862 SDNode *VLd;
1863 SmallVector<SDValue, 7> Ops;
1864
1865 // Double registers and VLD1/VLD2 quad registers are directly supported.
1866 if (is64BitVector || NumVecs <= 2) {
1867 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1868 QOpcodes0[OpcodeIndex]);
1869 Ops.push_back(MemAddr);
1870 Ops.push_back(Align);
1871 if (isUpdating) {
1872 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1873 // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
1874 // case entirely when the rest are updated to that form, too.
1875 if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
1876 Opc = getVLDSTRegisterUpdateOpcode(Opc);
1877 // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1878 // check for that explicitly too. Horribly hacky, but temporary.
1879 if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
1880 !isa<ConstantSDNode>(Inc.getNode()))
1881 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
1882 }
1883 Ops.push_back(Pred);
1884 Ops.push_back(Reg0);
1885 Ops.push_back(Chain);
1886 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1887
1888 } else {
1889 // Otherwise, quad registers are loaded with two separate instructions,
1890 // where one loads the even registers and the other loads the odd registers.
1891 EVT AddrTy = MemAddr.getValueType();
1892
1893 // Load the even subregs. This is always an updating load, so that it
1894 // provides the address to the second load for the odd subregs.
1895 SDValue ImplDef =
1896 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1897 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1898 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1899 ResTy, AddrTy, MVT::Other, OpsA);
1900 Chain = SDValue(VLdA, 2);
1901
1902 // Load the odd subregs.
1903 Ops.push_back(SDValue(VLdA, 1));
1904 Ops.push_back(Align);
1905 if (isUpdating) {
1906 SDValue Inc = N->getOperand(AddrOpIdx + 1);
1907 assert(isa<ConstantSDNode>(Inc.getNode()) &&
1908 "only constant post-increment update allowed for VLD3/4");
1909 (void)Inc;
1910 Ops.push_back(Reg0);
1911 }
1912 Ops.push_back(SDValue(VLdA, 0));
1913 Ops.push_back(Pred);
1914 Ops.push_back(Reg0);
1915 Ops.push_back(Chain);
1916 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1917 }
1918
1919 // Transfer memoperands.
1920 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1921 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1922 cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
1923
1924 if (NumVecs == 1) {
1925 ReplaceNode(N, VLd);
1926 return;
1927 }
1928
1929 // Extract out the subregisters.
1930 SDValue SuperReg = SDValue(VLd, 0);
1931 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1932 ARM::qsub_3 == ARM::qsub_0 + 3,
1933 "Unexpected subreg numbering");
1934 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1935 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1936 ReplaceUses(SDValue(N, Vec),
1937 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1938 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1939 if (isUpdating)
1940 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1941 CurDAG->RemoveDeadNode(N);
1942 }
1943
SelectVST(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes0,const uint16_t * QOpcodes1)1944 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1945 const uint16_t *DOpcodes,
1946 const uint16_t *QOpcodes0,
1947 const uint16_t *QOpcodes1) {
1948 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1949 SDLoc dl(N);
1950
1951 SDValue MemAddr, Align;
1952 unsigned AddrOpIdx = isUpdating ? 1 : 2;
1953 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1954 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1955 return;
1956
1957 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
1958 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1959
1960 SDValue Chain = N->getOperand(0);
1961 EVT VT = N->getOperand(Vec0Idx).getValueType();
1962 bool is64BitVector = VT.is64BitVector();
1963 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1964
1965 unsigned OpcodeIndex;
1966 switch (VT.getSimpleVT().SimpleTy) {
1967 default: llvm_unreachable("unhandled vst type");
1968 // Double-register operations:
1969 case MVT::v8i8: OpcodeIndex = 0; break;
1970 case MVT::v4i16: OpcodeIndex = 1; break;
1971 case MVT::v2f32:
1972 case MVT::v2i32: OpcodeIndex = 2; break;
1973 case MVT::v1i64: OpcodeIndex = 3; break;
1974 // Quad-register operations:
1975 case MVT::v16i8: OpcodeIndex = 0; break;
1976 case MVT::v8i16: OpcodeIndex = 1; break;
1977 case MVT::v4f32:
1978 case MVT::v4i32: OpcodeIndex = 2; break;
1979 case MVT::v2f64:
1980 case MVT::v2i64: OpcodeIndex = 3;
1981 assert(NumVecs == 1 && "v2i64 type only supported for VST1");
1982 break;
1983 }
1984
1985 std::vector<EVT> ResTys;
1986 if (isUpdating)
1987 ResTys.push_back(MVT::i32);
1988 ResTys.push_back(MVT::Other);
1989
1990 SDValue Pred = getAL(CurDAG, dl);
1991 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1992 SmallVector<SDValue, 7> Ops;
1993
1994 // Double registers and VST1/VST2 quad registers are directly supported.
1995 if (is64BitVector || NumVecs <= 2) {
1996 SDValue SrcReg;
1997 if (NumVecs == 1) {
1998 SrcReg = N->getOperand(Vec0Idx);
1999 } else if (is64BitVector) {
2000 // Form a REG_SEQUENCE to force register allocation.
2001 SDValue V0 = N->getOperand(Vec0Idx + 0);
2002 SDValue V1 = N->getOperand(Vec0Idx + 1);
2003 if (NumVecs == 2)
2004 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2005 else {
2006 SDValue V2 = N->getOperand(Vec0Idx + 2);
2007 // If it's a vst3, form a quad D-register and leave the last part as
2008 // an undef.
2009 SDValue V3 = (NumVecs == 3)
2010 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2011 : N->getOperand(Vec0Idx + 3);
2012 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2013 }
2014 } else {
2015 // Form a QQ register.
2016 SDValue Q0 = N->getOperand(Vec0Idx);
2017 SDValue Q1 = N->getOperand(Vec0Idx + 1);
2018 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2019 }
2020
2021 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2022 QOpcodes0[OpcodeIndex]);
2023 Ops.push_back(MemAddr);
2024 Ops.push_back(Align);
2025 if (isUpdating) {
2026 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2027 // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
2028 // case entirely when the rest are updated to that form, too.
2029 if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
2030 Opc = getVLDSTRegisterUpdateOpcode(Opc);
2031 // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
2032 // check for that explicitly too. Horribly hacky, but temporary.
2033 if (!isa<ConstantSDNode>(Inc.getNode()))
2034 Ops.push_back(Inc);
2035 else if (NumVecs > 2 && !isVSTfixed(Opc))
2036 Ops.push_back(Reg0);
2037 }
2038 Ops.push_back(SrcReg);
2039 Ops.push_back(Pred);
2040 Ops.push_back(Reg0);
2041 Ops.push_back(Chain);
2042 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2043
2044 // Transfer memoperands.
2045 cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
2046
2047 ReplaceNode(N, VSt);
2048 return;
2049 }
2050
2051 // Otherwise, quad registers are stored with two separate instructions,
2052 // where one stores the even registers and the other stores the odd registers.
2053
2054 // Form the QQQQ REG_SEQUENCE.
2055 SDValue V0 = N->getOperand(Vec0Idx + 0);
2056 SDValue V1 = N->getOperand(Vec0Idx + 1);
2057 SDValue V2 = N->getOperand(Vec0Idx + 2);
2058 SDValue V3 = (NumVecs == 3)
2059 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2060 : N->getOperand(Vec0Idx + 3);
2061 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2062
2063 // Store the even D registers. This is always an updating store, so that it
2064 // provides the address to the second store for the odd subregs.
2065 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2066 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2067 MemAddr.getValueType(),
2068 MVT::Other, OpsA);
2069 cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
2070 Chain = SDValue(VStA, 1);
2071
2072 // Store the odd D registers.
2073 Ops.push_back(SDValue(VStA, 0));
2074 Ops.push_back(Align);
2075 if (isUpdating) {
2076 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2077 assert(isa<ConstantSDNode>(Inc.getNode()) &&
2078 "only constant post-increment update allowed for VST3/4");
2079 (void)Inc;
2080 Ops.push_back(Reg0);
2081 }
2082 Ops.push_back(RegSeq);
2083 Ops.push_back(Pred);
2084 Ops.push_back(Reg0);
2085 Ops.push_back(Chain);
2086 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2087 Ops);
2088 cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
2089 ReplaceNode(N, VStB);
2090 }
2091
SelectVLDSTLane(SDNode * N,bool IsLoad,bool isUpdating,unsigned NumVecs,const uint16_t * DOpcodes,const uint16_t * QOpcodes)2092 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2093 unsigned NumVecs,
2094 const uint16_t *DOpcodes,
2095 const uint16_t *QOpcodes) {
2096 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2097 SDLoc dl(N);
2098
2099 SDValue MemAddr, Align;
2100 unsigned AddrOpIdx = isUpdating ? 1 : 2;
2101 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2102 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2103 return;
2104
2105 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2106 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2107
2108 SDValue Chain = N->getOperand(0);
2109 unsigned Lane =
2110 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2111 EVT VT = N->getOperand(Vec0Idx).getValueType();
2112 bool is64BitVector = VT.is64BitVector();
2113
2114 unsigned Alignment = 0;
2115 if (NumVecs != 3) {
2116 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2117 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2118 if (Alignment > NumBytes)
2119 Alignment = NumBytes;
2120 if (Alignment < 8 && Alignment < NumBytes)
2121 Alignment = 0;
2122 // Alignment must be a power of two; make sure of that.
2123 Alignment = (Alignment & -Alignment);
2124 if (Alignment == 1)
2125 Alignment = 0;
2126 }
2127 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2128
2129 unsigned OpcodeIndex;
2130 switch (VT.getSimpleVT().SimpleTy) {
2131 default: llvm_unreachable("unhandled vld/vst lane type");
2132 // Double-register operations:
2133 case MVT::v8i8: OpcodeIndex = 0; break;
2134 case MVT::v4i16: OpcodeIndex = 1; break;
2135 case MVT::v2f32:
2136 case MVT::v2i32: OpcodeIndex = 2; break;
2137 // Quad-register operations:
2138 case MVT::v8i16: OpcodeIndex = 0; break;
2139 case MVT::v4f32:
2140 case MVT::v4i32: OpcodeIndex = 1; break;
2141 }
2142
2143 std::vector<EVT> ResTys;
2144 if (IsLoad) {
2145 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2146 if (!is64BitVector)
2147 ResTyElts *= 2;
2148 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2149 MVT::i64, ResTyElts));
2150 }
2151 if (isUpdating)
2152 ResTys.push_back(MVT::i32);
2153 ResTys.push_back(MVT::Other);
2154
2155 SDValue Pred = getAL(CurDAG, dl);
2156 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2157
2158 SmallVector<SDValue, 8> Ops;
2159 Ops.push_back(MemAddr);
2160 Ops.push_back(Align);
2161 if (isUpdating) {
2162 SDValue Inc = N->getOperand(AddrOpIdx + 1);
2163 Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
2164 }
2165
2166 SDValue SuperReg;
2167 SDValue V0 = N->getOperand(Vec0Idx + 0);
2168 SDValue V1 = N->getOperand(Vec0Idx + 1);
2169 if (NumVecs == 2) {
2170 if (is64BitVector)
2171 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2172 else
2173 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2174 } else {
2175 SDValue V2 = N->getOperand(Vec0Idx + 2);
2176 SDValue V3 = (NumVecs == 3)
2177 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2178 : N->getOperand(Vec0Idx + 3);
2179 if (is64BitVector)
2180 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2181 else
2182 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2183 }
2184 Ops.push_back(SuperReg);
2185 Ops.push_back(getI32Imm(Lane, dl));
2186 Ops.push_back(Pred);
2187 Ops.push_back(Reg0);
2188 Ops.push_back(Chain);
2189
2190 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2191 QOpcodes[OpcodeIndex]);
2192 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2193 cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
2194 if (!IsLoad) {
2195 ReplaceNode(N, VLdLn);
2196 return;
2197 }
2198
2199 // Extract the subregisters.
2200 SuperReg = SDValue(VLdLn, 0);
2201 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2202 ARM::qsub_3 == ARM::qsub_0 + 3,
2203 "Unexpected subreg numbering");
2204 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2205 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2206 ReplaceUses(SDValue(N, Vec),
2207 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2208 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2209 if (isUpdating)
2210 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2211 CurDAG->RemoveDeadNode(N);
2212 }
2213
SelectVLDDup(SDNode * N,bool isUpdating,unsigned NumVecs,const uint16_t * Opcodes)2214 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
2215 const uint16_t *Opcodes) {
2216 assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2217 SDLoc dl(N);
2218
2219 SDValue MemAddr, Align;
2220 if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
2221 return;
2222
2223 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2224 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2225
2226 SDValue Chain = N->getOperand(0);
2227 EVT VT = N->getValueType(0);
2228
2229 unsigned Alignment = 0;
2230 if (NumVecs != 3) {
2231 Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2232 unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
2233 if (Alignment > NumBytes)
2234 Alignment = NumBytes;
2235 if (Alignment < 8 && Alignment < NumBytes)
2236 Alignment = 0;
2237 // Alignment must be a power of two; make sure of that.
2238 Alignment = (Alignment & -Alignment);
2239 if (Alignment == 1)
2240 Alignment = 0;
2241 }
2242 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2243
2244 unsigned OpcodeIndex;
2245 switch (VT.getSimpleVT().SimpleTy) {
2246 default: llvm_unreachable("unhandled vld-dup type");
2247 case MVT::v8i8: OpcodeIndex = 0; break;
2248 case MVT::v4i16: OpcodeIndex = 1; break;
2249 case MVT::v2f32:
2250 case MVT::v2i32: OpcodeIndex = 2; break;
2251 }
2252
2253 SDValue Pred = getAL(CurDAG, dl);
2254 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2255 SDValue SuperReg;
2256 unsigned Opc = Opcodes[OpcodeIndex];
2257 SmallVector<SDValue, 6> Ops;
2258 Ops.push_back(MemAddr);
2259 Ops.push_back(Align);
2260 if (isUpdating) {
2261 // fixed-stride update instructions don't have an explicit writeback
2262 // operand. It's implicit in the opcode itself.
2263 SDValue Inc = N->getOperand(2);
2264 if (!isa<ConstantSDNode>(Inc.getNode()))
2265 Ops.push_back(Inc);
2266 // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2267 else if (NumVecs > 2)
2268 Ops.push_back(Reg0);
2269 }
2270 Ops.push_back(Pred);
2271 Ops.push_back(Reg0);
2272 Ops.push_back(Chain);
2273
2274 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2275 std::vector<EVT> ResTys;
2276 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
2277 if (isUpdating)
2278 ResTys.push_back(MVT::i32);
2279 ResTys.push_back(MVT::Other);
2280 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2281 cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
2282 SuperReg = SDValue(VLdDup, 0);
2283
2284 // Extract the subregisters.
2285 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2286 unsigned SubIdx = ARM::dsub_0;
2287 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2288 ReplaceUses(SDValue(N, Vec),
2289 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2290 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2291 if (isUpdating)
2292 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2293 CurDAG->RemoveDeadNode(N);
2294 }
2295
SelectVTBL(SDNode * N,bool IsExt,unsigned NumVecs,unsigned Opc)2296 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
2297 unsigned Opc) {
2298 assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
2299 SDLoc dl(N);
2300 EVT VT = N->getValueType(0);
2301 unsigned FirstTblReg = IsExt ? 2 : 1;
2302
2303 // Form a REG_SEQUENCE to force register allocation.
2304 SDValue RegSeq;
2305 SDValue V0 = N->getOperand(FirstTblReg + 0);
2306 SDValue V1 = N->getOperand(FirstTblReg + 1);
2307 if (NumVecs == 2)
2308 RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
2309 else {
2310 SDValue V2 = N->getOperand(FirstTblReg + 2);
2311 // If it's a vtbl3, form a quad D-register and leave the last part as
2312 // an undef.
2313 SDValue V3 = (NumVecs == 3)
2314 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2315 : N->getOperand(FirstTblReg + 3);
2316 RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2317 }
2318
2319 SmallVector<SDValue, 6> Ops;
2320 if (IsExt)
2321 Ops.push_back(N->getOperand(1));
2322 Ops.push_back(RegSeq);
2323 Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
2324 Ops.push_back(getAL(CurDAG, dl)); // predicate
2325 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
2326 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2327 }
2328
tryV6T2BitfieldExtractOp(SDNode * N,bool isSigned)2329 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2330 if (!Subtarget->hasV6T2Ops())
2331 return false;
2332
2333 unsigned Opc = isSigned
2334 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2335 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2336 SDLoc dl(N);
2337
2338 // For unsigned extracts, check for a shift right and mask
2339 unsigned And_imm = 0;
2340 if (N->getOpcode() == ISD::AND) {
2341 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2342
2343 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2344 if (And_imm & (And_imm + 1))
2345 return false;
2346
2347 unsigned Srl_imm = 0;
2348 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2349 Srl_imm)) {
2350 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2351
2352 // Note: The width operand is encoded as width-1.
2353 unsigned Width = countTrailingOnes(And_imm) - 1;
2354 unsigned LSB = Srl_imm;
2355
2356 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2357
2358 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2359 // It's cheaper to use a right shift to extract the top bits.
2360 if (Subtarget->isThumb()) {
2361 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2362 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2363 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2364 getAL(CurDAG, dl), Reg0, Reg0 };
2365 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2366 return true;
2367 }
2368
2369 // ARM models shift instructions as MOVsi with shifter operand.
2370 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2371 SDValue ShOpc =
2372 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2373 MVT::i32);
2374 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2375 getAL(CurDAG, dl), Reg0, Reg0 };
2376 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2377 return true;
2378 }
2379
2380 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2381 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2382 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2383 getAL(CurDAG, dl), Reg0 };
2384 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2385 return true;
2386 }
2387 }
2388 return false;
2389 }
2390
2391 // Otherwise, we're looking for a shift of a shift
2392 unsigned Shl_imm = 0;
2393 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2394 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2395 unsigned Srl_imm = 0;
2396 if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2397 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2398 // Note: The width operand is encoded as width-1.
2399 unsigned Width = 32 - Srl_imm - 1;
2400 int LSB = Srl_imm - Shl_imm;
2401 if (LSB < 0)
2402 return false;
2403 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2404 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2405 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2406 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2407 getAL(CurDAG, dl), Reg0 };
2408 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2409 return true;
2410 }
2411 }
2412
2413 // Or we are looking for a shift of an and, with a mask operand
2414 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2415 isShiftedMask_32(And_imm)) {
2416 unsigned Srl_imm = 0;
2417 unsigned LSB = countTrailingZeros(And_imm);
2418 // Shift must be the same as the ands lsb
2419 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2420 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2421 unsigned MSB = 31 - countLeadingZeros(And_imm);
2422 // Note: The width operand is encoded as width-1.
2423 unsigned Width = MSB - LSB;
2424 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2425 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2426 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2427 CurDAG->getTargetConstant(Width, dl, MVT::i32),
2428 getAL(CurDAG, dl), Reg0 };
2429 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2430 return true;
2431 }
2432 }
2433
2434 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2435 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2436 unsigned LSB = 0;
2437 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2438 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2439 return false;
2440
2441 if (LSB + Width > 32)
2442 return false;
2443
2444 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2445 SDValue Ops[] = { N->getOperand(0).getOperand(0),
2446 CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2447 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2448 getAL(CurDAG, dl), Reg0 };
2449 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2450 return true;
2451 }
2452
2453 return false;
2454 }
2455
2456 /// Target-specific DAG combining for ISD::XOR.
2457 /// Target-independent combining lowers SELECT_CC nodes of the form
2458 /// select_cc setg[ge] X, 0, X, -X
2459 /// select_cc setgt X, -1, X, -X
2460 /// select_cc setl[te] X, 0, -X, X
2461 /// select_cc setlt X, 1, -X, X
2462 /// which represent Integer ABS into:
2463 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2464 /// ARM instruction selection detects the latter and matches it to
2465 /// ARM::ABS or ARM::t2ABS machine node.
tryABSOp(SDNode * N)2466 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2467 SDValue XORSrc0 = N->getOperand(0);
2468 SDValue XORSrc1 = N->getOperand(1);
2469 EVT VT = N->getValueType(0);
2470
2471 if (Subtarget->isThumb1Only())
2472 return false;
2473
2474 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2475 return false;
2476
2477 SDValue ADDSrc0 = XORSrc0.getOperand(0);
2478 SDValue ADDSrc1 = XORSrc0.getOperand(1);
2479 SDValue SRASrc0 = XORSrc1.getOperand(0);
2480 SDValue SRASrc1 = XORSrc1.getOperand(1);
2481 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
2482 EVT XType = SRASrc0.getValueType();
2483 unsigned Size = XType.getSizeInBits() - 1;
2484
2485 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2486 XType.isInteger() && SRAConstant != nullptr &&
2487 Size == SRAConstant->getZExtValue()) {
2488 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2489 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2490 return true;
2491 }
2492
2493 return false;
2494 }
2495
SearchSignedMulShort(SDValue SignExt,unsigned * Opc,SDValue & Src1,bool Accumulate)2496 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
2497 bool Accumulate) {
2498 // For SM*WB, we need to some form of sext.
2499 // For SM*WT, we need to search for (sra X, 16)
2500 // Src1 then gets set to X.
2501 if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
2502 SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
2503 SignExt.getOpcode() == ISD::AssertSext) &&
2504 SignExt.getValueType() == MVT::i32) {
2505
2506 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2507 Src1 = SignExt.getOperand(0);
2508 return true;
2509 }
2510
2511 if (SignExt.getOpcode() != ISD::SRA)
2512 return false;
2513
2514 ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
2515 if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
2516 return false;
2517
2518 SDValue Op0 = SignExt.getOperand(0);
2519
2520 // The sign extend operand for SM*WB could be generated by a shl and ashr.
2521 if (Op0.getOpcode() == ISD::SHL) {
2522 SDValue SHL = Op0;
2523 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2524 if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
2525 return false;
2526
2527 *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
2528 Src1 = Op0.getOperand(0);
2529 return true;
2530 }
2531 *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
2532 Src1 = SignExt.getOperand(0);
2533 return true;
2534 }
2535
SearchSignedMulLong(SDValue OR,unsigned * Opc,SDValue & Src0,SDValue & Src1,bool Accumulate)2536 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
2537 SDValue &Src1, bool Accumulate) {
2538 // First we look for:
2539 // (add (or (srl ?, 16), (shl ?, 16)))
2540 if (OR.getOpcode() != ISD::OR)
2541 return false;
2542
2543 SDValue SRL = OR.getOperand(0);
2544 SDValue SHL = OR.getOperand(1);
2545
2546 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
2547 SRL = OR.getOperand(1);
2548 SHL = OR.getOperand(0);
2549 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
2550 return false;
2551 }
2552
2553 ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
2554 ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
2555 if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
2556 SHLSrc1->getZExtValue() != 16)
2557 return false;
2558
2559 // The first operands to the shifts need to be the two results from the
2560 // same smul_lohi node.
2561 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
2562 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
2563 return false;
2564
2565 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
2566 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
2567 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
2568 return false;
2569
2570 // Now we have:
2571 // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
2572 // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
2573 // For SMLAWB the 16-bit value will signed extended somehow.
2574 // For SMLAWT only the SRA is required.
2575
2576 // Check both sides of SMUL_LOHI
2577 if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
2578 Src0 = SMULLOHI->getOperand(1);
2579 } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
2580 Accumulate)) {
2581 Src0 = SMULLOHI->getOperand(0);
2582 } else {
2583 return false;
2584 }
2585 return true;
2586 }
2587
trySMLAWSMULW(SDNode * N)2588 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
2589 SDLoc dl(N);
2590 SDValue Src0 = N->getOperand(0);
2591 SDValue Src1 = N->getOperand(1);
2592 SDValue A, B;
2593 unsigned Opc = 0;
2594
2595 if (N->getOpcode() == ISD::ADD) {
2596 if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
2597 return false;
2598
2599 SDValue Acc;
2600 if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
2601 Acc = Src1;
2602 } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
2603 Acc = Src0;
2604 } else {
2605 return false;
2606 }
2607 if (Opc == 0)
2608 return false;
2609
2610 SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
2611 CurDAG->getRegister(0, MVT::i32) };
2612 CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
2613 return true;
2614 } else if (N->getOpcode() == ISD::OR &&
2615 SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
2616 if (Opc == 0)
2617 return false;
2618
2619 SDValue Ops[] = { A, B, getAL(CurDAG, dl),
2620 CurDAG->getRegister(0, MVT::i32)};
2621 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2622 return true;
2623 }
2624 return false;
2625 }
2626
2627 /// We've got special pseudo-instructions for these
SelectCMP_SWAP(SDNode * N)2628 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2629 unsigned Opcode;
2630 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2631 if (MemTy == MVT::i8)
2632 Opcode = ARM::CMP_SWAP_8;
2633 else if (MemTy == MVT::i16)
2634 Opcode = ARM::CMP_SWAP_16;
2635 else if (MemTy == MVT::i32)
2636 Opcode = ARM::CMP_SWAP_32;
2637 else
2638 llvm_unreachable("Unknown AtomicCmpSwap type");
2639
2640 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2641 N->getOperand(0)};
2642 SDNode *CmpSwap = CurDAG->getMachineNode(
2643 Opcode, SDLoc(N),
2644 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2645
2646 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2647 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
2648 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2649
2650 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2651 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2652 CurDAG->RemoveDeadNode(N);
2653 }
2654
SelectConcatVector(SDNode * N)2655 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
2656 // The only time a CONCAT_VECTORS operation can have legal types is when
2657 // two 64-bit vectors are concatenated to a 128-bit vector.
2658 EVT VT = N->getValueType(0);
2659 if (!VT.is128BitVector() || N->getNumOperands() != 2)
2660 llvm_unreachable("unexpected CONCAT_VECTORS");
2661 ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
2662 }
2663
Select(SDNode * N)2664 void ARMDAGToDAGISel::Select(SDNode *N) {
2665 SDLoc dl(N);
2666
2667 if (N->isMachineOpcode()) {
2668 N->setNodeId(-1);
2669 return; // Already selected.
2670 }
2671
2672 switch (N->getOpcode()) {
2673 default: break;
2674 case ISD::ADD:
2675 case ISD::OR:
2676 if (trySMLAWSMULW(N))
2677 return;
2678 break;
2679 case ISD::WRITE_REGISTER:
2680 if (tryWriteRegister(N))
2681 return;
2682 break;
2683 case ISD::READ_REGISTER:
2684 if (tryReadRegister(N))
2685 return;
2686 break;
2687 case ISD::INLINEASM:
2688 if (tryInlineAsm(N))
2689 return;
2690 break;
2691 case ISD::XOR:
2692 // Select special operations if XOR node forms integer ABS pattern
2693 if (tryABSOp(N))
2694 return;
2695 // Other cases are autogenerated.
2696 break;
2697 case ISD::Constant: {
2698 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2699 // If we can't materialize the constant we need to use a literal pool
2700 if (ConstantMaterializationCost(Val) > 2) {
2701 SDValue CPIdx = CurDAG->getTargetConstantPool(
2702 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2703 TLI->getPointerTy(CurDAG->getDataLayout()));
2704
2705 SDNode *ResNode;
2706 if (Subtarget->isThumb()) {
2707 SDValue Pred = getAL(CurDAG, dl);
2708 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2709 SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
2710 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2711 Ops);
2712 } else {
2713 SDValue Ops[] = {
2714 CPIdx,
2715 CurDAG->getTargetConstant(0, dl, MVT::i32),
2716 getAL(CurDAG, dl),
2717 CurDAG->getRegister(0, MVT::i32),
2718 CurDAG->getEntryNode()
2719 };
2720 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2721 Ops);
2722 }
2723 ReplaceNode(N, ResNode);
2724 return;
2725 }
2726
2727 // Other cases are autogenerated.
2728 break;
2729 }
2730 case ISD::FrameIndex: {
2731 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2732 int FI = cast<FrameIndexSDNode>(N)->getIndex();
2733 SDValue TFI = CurDAG->getTargetFrameIndex(
2734 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2735 if (Subtarget->isThumb1Only()) {
2736 // Set the alignment of the frame object to 4, to avoid having to generate
2737 // more than one ADD
2738 MachineFrameInfo *MFI = MF->getFrameInfo();
2739 if (MFI->getObjectAlignment(FI) < 4)
2740 MFI->setObjectAlignment(FI, 4);
2741 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2742 CurDAG->getTargetConstant(0, dl, MVT::i32));
2743 return;
2744 } else {
2745 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2746 ARM::t2ADDri : ARM::ADDri);
2747 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2748 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2749 CurDAG->getRegister(0, MVT::i32) };
2750 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2751 return;
2752 }
2753 }
2754 case ISD::SRL:
2755 if (tryV6T2BitfieldExtractOp(N, false))
2756 return;
2757 break;
2758 case ISD::SIGN_EXTEND_INREG:
2759 case ISD::SRA:
2760 if (tryV6T2BitfieldExtractOp(N, true))
2761 return;
2762 break;
2763 case ISD::MUL:
2764 if (Subtarget->isThumb1Only())
2765 break;
2766 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2767 unsigned RHSV = C->getZExtValue();
2768 if (!RHSV) break;
2769 if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
2770 unsigned ShImm = Log2_32(RHSV-1);
2771 if (ShImm >= 32)
2772 break;
2773 SDValue V = N->getOperand(0);
2774 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2775 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2776 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2777 if (Subtarget->isThumb()) {
2778 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2779 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2780 return;
2781 } else {
2782 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2783 Reg0 };
2784 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2785 return;
2786 }
2787 }
2788 if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
2789 unsigned ShImm = Log2_32(RHSV+1);
2790 if (ShImm >= 32)
2791 break;
2792 SDValue V = N->getOperand(0);
2793 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2794 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2795 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2796 if (Subtarget->isThumb()) {
2797 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2798 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2799 return;
2800 } else {
2801 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2802 Reg0 };
2803 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2804 return;
2805 }
2806 }
2807 }
2808 break;
2809 case ISD::AND: {
2810 // Check for unsigned bitfield extract
2811 if (tryV6T2BitfieldExtractOp(N, false))
2812 return;
2813
2814 // If an immediate is used in an AND node, it is possible that the immediate
2815 // can be more optimally materialized when negated. If this is the case we
2816 // can negate the immediate and use a BIC instead.
2817 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2818 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2819 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2820
2821 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2822 // immediate can be negated and fit in the immediate operand of
2823 // a t2BIC, don't do any manual transform here as this can be
2824 // handled by the generic ISel machinery.
2825 bool PreferImmediateEncoding =
2826 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2827 if (!PreferImmediateEncoding &&
2828 ConstantMaterializationCost(Imm) >
2829 ConstantMaterializationCost(~Imm)) {
2830 // The current immediate costs more to materialize than a negated
2831 // immediate, so negate the immediate and use a BIC.
2832 SDValue NewImm =
2833 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2834 // If the new constant didn't exist before, reposition it in the topological
2835 // ordering so it is just before N. Otherwise, don't touch its location.
2836 if (NewImm->getNodeId() == -1)
2837 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2838
2839 if (!Subtarget->hasThumb2()) {
2840 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2841 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2842 CurDAG->getRegister(0, MVT::i32)};
2843 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2844 return;
2845 } else {
2846 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2847 CurDAG->getRegister(0, MVT::i32),
2848 CurDAG->getRegister(0, MVT::i32)};
2849 ReplaceNode(N,
2850 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2851 return;
2852 }
2853 }
2854 }
2855
2856 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2857 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2858 // are entirely contributed by c2 and lower 16-bits are entirely contributed
2859 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2860 // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2861 EVT VT = N->getValueType(0);
2862 if (VT != MVT::i32)
2863 break;
2864 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2865 ? ARM::t2MOVTi16
2866 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2867 if (!Opc)
2868 break;
2869 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2870 N1C = dyn_cast<ConstantSDNode>(N1);
2871 if (!N1C)
2872 break;
2873 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2874 SDValue N2 = N0.getOperand(1);
2875 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2876 if (!N2C)
2877 break;
2878 unsigned N1CVal = N1C->getZExtValue();
2879 unsigned N2CVal = N2C->getZExtValue();
2880 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2881 (N1CVal & 0xffffU) == 0xffffU &&
2882 (N2CVal & 0xffffU) == 0x0U) {
2883 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2884 dl, MVT::i32);
2885 SDValue Ops[] = { N0.getOperand(0), Imm16,
2886 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2887 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2888 return;
2889 }
2890 }
2891 break;
2892 }
2893 case ARMISD::VMOVRRD:
2894 ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
2895 N->getOperand(0), getAL(CurDAG, dl),
2896 CurDAG->getRegister(0, MVT::i32)));
2897 return;
2898 case ISD::UMUL_LOHI: {
2899 if (Subtarget->isThumb1Only())
2900 break;
2901 if (Subtarget->isThumb()) {
2902 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2903 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2904 ReplaceNode(
2905 N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
2906 return;
2907 } else {
2908 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2909 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2910 CurDAG->getRegister(0, MVT::i32) };
2911 ReplaceNode(N, CurDAG->getMachineNode(
2912 Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
2913 MVT::i32, MVT::i32, Ops));
2914 return;
2915 }
2916 }
2917 case ISD::SMUL_LOHI: {
2918 if (Subtarget->isThumb1Only())
2919 break;
2920 if (Subtarget->isThumb()) {
2921 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2922 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2923 ReplaceNode(
2924 N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
2925 return;
2926 } else {
2927 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2928 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2929 CurDAG->getRegister(0, MVT::i32) };
2930 ReplaceNode(N, CurDAG->getMachineNode(
2931 Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
2932 MVT::i32, MVT::i32, Ops));
2933 return;
2934 }
2935 }
2936 case ARMISD::UMAAL: {
2937 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2938 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2939 N->getOperand(2), N->getOperand(3),
2940 getAL(CurDAG, dl),
2941 CurDAG->getRegister(0, MVT::i32) };
2942 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2943 return;
2944 }
2945 case ARMISD::UMLAL:{
2946 // UMAAL is similar to UMLAL but it adds two 32-bit values to the
2947 // 64-bit multiplication result.
2948 if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
2949 N->getOperand(3).getOpcode() == ARMISD::ADDE) {
2950
2951 SDValue Addc = N->getOperand(2);
2952 SDValue Adde = N->getOperand(3);
2953
2954 if (Adde.getOperand(2).getNode() == Addc.getNode()) {
2955
2956 ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
2957 ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
2958
2959 if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
2960 {
2961 // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
2962 // RdLo = one operand to be added, lower 32-bits of res
2963 // RdHi = other operand to be added, upper 32-bits of res
2964 // Rn = first multiply operand
2965 // Rm = second multiply operand
2966 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2967 Addc.getOperand(0), Addc.getOperand(1),
2968 getAL(CurDAG, dl),
2969 CurDAG->getRegister(0, MVT::i32) };
2970 unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2971 CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
2972 return;
2973 }
2974 }
2975 }
2976
2977 if (Subtarget->isThumb()) {
2978 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2979 N->getOperand(3), getAL(CurDAG, dl),
2980 CurDAG->getRegister(0, MVT::i32)};
2981 ReplaceNode(
2982 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2983 return;
2984 }else{
2985 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2986 N->getOperand(3), getAL(CurDAG, dl),
2987 CurDAG->getRegister(0, MVT::i32),
2988 CurDAG->getRegister(0, MVT::i32) };
2989 ReplaceNode(N, CurDAG->getMachineNode(
2990 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2991 MVT::i32, MVT::i32, Ops));
2992 return;
2993 }
2994 }
2995 case ARMISD::SMLAL:{
2996 if (Subtarget->isThumb()) {
2997 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2998 N->getOperand(3), getAL(CurDAG, dl),
2999 CurDAG->getRegister(0, MVT::i32)};
3000 ReplaceNode(
3001 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3002 return;
3003 }else{
3004 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3005 N->getOperand(3), getAL(CurDAG, dl),
3006 CurDAG->getRegister(0, MVT::i32),
3007 CurDAG->getRegister(0, MVT::i32) };
3008 ReplaceNode(N, CurDAG->getMachineNode(
3009 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3010 MVT::i32, MVT::i32, Ops));
3011 return;
3012 }
3013 }
3014 case ISD::LOAD: {
3015 if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3016 if (tryT2IndexedLoad(N))
3017 return;
3018 } else if (tryARMIndexedLoad(N))
3019 return;
3020 // Other cases are autogenerated.
3021 break;
3022 }
3023 case ARMISD::BRCOND: {
3024 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3025 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3026 // Pattern complexity = 6 cost = 1 size = 0
3027
3028 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3029 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3030 // Pattern complexity = 6 cost = 1 size = 0
3031
3032 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3033 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3034 // Pattern complexity = 6 cost = 1 size = 0
3035
3036 unsigned Opc = Subtarget->isThumb() ?
3037 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3038 SDValue Chain = N->getOperand(0);
3039 SDValue N1 = N->getOperand(1);
3040 SDValue N2 = N->getOperand(2);
3041 SDValue N3 = N->getOperand(3);
3042 SDValue InFlag = N->getOperand(4);
3043 assert(N1.getOpcode() == ISD::BasicBlock);
3044 assert(N2.getOpcode() == ISD::Constant);
3045 assert(N3.getOpcode() == ISD::Register);
3046
3047 SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
3048 cast<ConstantSDNode>(N2)->getZExtValue()), dl,
3049 MVT::i32);
3050 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3051 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3052 MVT::Glue, Ops);
3053 Chain = SDValue(ResNode, 0);
3054 if (N->getNumValues() == 2) {
3055 InFlag = SDValue(ResNode, 1);
3056 ReplaceUses(SDValue(N, 1), InFlag);
3057 }
3058 ReplaceUses(SDValue(N, 0),
3059 SDValue(Chain.getNode(), Chain.getResNo()));
3060 CurDAG->RemoveDeadNode(N);
3061 return;
3062 }
3063 case ARMISD::VZIP: {
3064 unsigned Opc = 0;
3065 EVT VT = N->getValueType(0);
3066 switch (VT.getSimpleVT().SimpleTy) {
3067 default: return;
3068 case MVT::v8i8: Opc = ARM::VZIPd8; break;
3069 case MVT::v4i16: Opc = ARM::VZIPd16; break;
3070 case MVT::v2f32:
3071 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3072 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3073 case MVT::v16i8: Opc = ARM::VZIPq8; break;
3074 case MVT::v8i16: Opc = ARM::VZIPq16; break;
3075 case MVT::v4f32:
3076 case MVT::v4i32: Opc = ARM::VZIPq32; break;
3077 }
3078 SDValue Pred = getAL(CurDAG, dl);
3079 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3080 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3081 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3082 return;
3083 }
3084 case ARMISD::VUZP: {
3085 unsigned Opc = 0;
3086 EVT VT = N->getValueType(0);
3087 switch (VT.getSimpleVT().SimpleTy) {
3088 default: return;
3089 case MVT::v8i8: Opc = ARM::VUZPd8; break;
3090 case MVT::v4i16: Opc = ARM::VUZPd16; break;
3091 case MVT::v2f32:
3092 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3093 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3094 case MVT::v16i8: Opc = ARM::VUZPq8; break;
3095 case MVT::v8i16: Opc = ARM::VUZPq16; break;
3096 case MVT::v4f32:
3097 case MVT::v4i32: Opc = ARM::VUZPq32; break;
3098 }
3099 SDValue Pred = getAL(CurDAG, dl);
3100 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3101 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3102 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3103 return;
3104 }
3105 case ARMISD::VTRN: {
3106 unsigned Opc = 0;
3107 EVT VT = N->getValueType(0);
3108 switch (VT.getSimpleVT().SimpleTy) {
3109 default: return;
3110 case MVT::v8i8: Opc = ARM::VTRNd8; break;
3111 case MVT::v4i16: Opc = ARM::VTRNd16; break;
3112 case MVT::v2f32:
3113 case MVT::v2i32: Opc = ARM::VTRNd32; break;
3114 case MVT::v16i8: Opc = ARM::VTRNq8; break;
3115 case MVT::v8i16: Opc = ARM::VTRNq16; break;
3116 case MVT::v4f32:
3117 case MVT::v4i32: Opc = ARM::VTRNq32; break;
3118 }
3119 SDValue Pred = getAL(CurDAG, dl);
3120 SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3121 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3122 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3123 return;
3124 }
3125 case ARMISD::BUILD_VECTOR: {
3126 EVT VecVT = N->getValueType(0);
3127 EVT EltVT = VecVT.getVectorElementType();
3128 unsigned NumElts = VecVT.getVectorNumElements();
3129 if (EltVT == MVT::f64) {
3130 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3131 ReplaceNode(
3132 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3133 return;
3134 }
3135 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3136 if (NumElts == 2) {
3137 ReplaceNode(
3138 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3139 return;
3140 }
3141 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3142 ReplaceNode(N,
3143 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3144 N->getOperand(2), N->getOperand(3)));
3145 return;
3146 }
3147
3148 case ARMISD::VLD2DUP: {
3149 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3150 ARM::VLD2DUPd32 };
3151 SelectVLDDup(N, false, 2, Opcodes);
3152 return;
3153 }
3154
3155 case ARMISD::VLD3DUP: {
3156 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3157 ARM::VLD3DUPd16Pseudo,
3158 ARM::VLD3DUPd32Pseudo };
3159 SelectVLDDup(N, false, 3, Opcodes);
3160 return;
3161 }
3162
3163 case ARMISD::VLD4DUP: {
3164 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3165 ARM::VLD4DUPd16Pseudo,
3166 ARM::VLD4DUPd32Pseudo };
3167 SelectVLDDup(N, false, 4, Opcodes);
3168 return;
3169 }
3170
3171 case ARMISD::VLD2DUP_UPD: {
3172 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3173 ARM::VLD2DUPd16wb_fixed,
3174 ARM::VLD2DUPd32wb_fixed };
3175 SelectVLDDup(N, true, 2, Opcodes);
3176 return;
3177 }
3178
3179 case ARMISD::VLD3DUP_UPD: {
3180 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3181 ARM::VLD3DUPd16Pseudo_UPD,
3182 ARM::VLD3DUPd32Pseudo_UPD };
3183 SelectVLDDup(N, true, 3, Opcodes);
3184 return;
3185 }
3186
3187 case ARMISD::VLD4DUP_UPD: {
3188 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3189 ARM::VLD4DUPd16Pseudo_UPD,
3190 ARM::VLD4DUPd32Pseudo_UPD };
3191 SelectVLDDup(N, true, 4, Opcodes);
3192 return;
3193 }
3194
3195 case ARMISD::VLD1_UPD: {
3196 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3197 ARM::VLD1d16wb_fixed,
3198 ARM::VLD1d32wb_fixed,
3199 ARM::VLD1d64wb_fixed };
3200 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3201 ARM::VLD1q16wb_fixed,
3202 ARM::VLD1q32wb_fixed,
3203 ARM::VLD1q64wb_fixed };
3204 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3205 return;
3206 }
3207
3208 case ARMISD::VLD2_UPD: {
3209 static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3210 ARM::VLD2d16wb_fixed,
3211 ARM::VLD2d32wb_fixed,
3212 ARM::VLD1q64wb_fixed};
3213 static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3214 ARM::VLD2q16PseudoWB_fixed,
3215 ARM::VLD2q32PseudoWB_fixed };
3216 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3217 return;
3218 }
3219
3220 case ARMISD::VLD3_UPD: {
3221 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3222 ARM::VLD3d16Pseudo_UPD,
3223 ARM::VLD3d32Pseudo_UPD,
3224 ARM::VLD1d64TPseudoWB_fixed};
3225 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3226 ARM::VLD3q16Pseudo_UPD,
3227 ARM::VLD3q32Pseudo_UPD };
3228 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3229 ARM::VLD3q16oddPseudo_UPD,
3230 ARM::VLD3q32oddPseudo_UPD };
3231 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3232 return;
3233 }
3234
3235 case ARMISD::VLD4_UPD: {
3236 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3237 ARM::VLD4d16Pseudo_UPD,
3238 ARM::VLD4d32Pseudo_UPD,
3239 ARM::VLD1d64QPseudoWB_fixed};
3240 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3241 ARM::VLD4q16Pseudo_UPD,
3242 ARM::VLD4q32Pseudo_UPD };
3243 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3244 ARM::VLD4q16oddPseudo_UPD,
3245 ARM::VLD4q32oddPseudo_UPD };
3246 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3247 return;
3248 }
3249
3250 case ARMISD::VLD2LN_UPD: {
3251 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3252 ARM::VLD2LNd16Pseudo_UPD,
3253 ARM::VLD2LNd32Pseudo_UPD };
3254 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3255 ARM::VLD2LNq32Pseudo_UPD };
3256 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3257 return;
3258 }
3259
3260 case ARMISD::VLD3LN_UPD: {
3261 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3262 ARM::VLD3LNd16Pseudo_UPD,
3263 ARM::VLD3LNd32Pseudo_UPD };
3264 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3265 ARM::VLD3LNq32Pseudo_UPD };
3266 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3267 return;
3268 }
3269
3270 case ARMISD::VLD4LN_UPD: {
3271 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3272 ARM::VLD4LNd16Pseudo_UPD,
3273 ARM::VLD4LNd32Pseudo_UPD };
3274 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3275 ARM::VLD4LNq32Pseudo_UPD };
3276 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3277 return;
3278 }
3279
3280 case ARMISD::VST1_UPD: {
3281 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3282 ARM::VST1d16wb_fixed,
3283 ARM::VST1d32wb_fixed,
3284 ARM::VST1d64wb_fixed };
3285 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3286 ARM::VST1q16wb_fixed,
3287 ARM::VST1q32wb_fixed,
3288 ARM::VST1q64wb_fixed };
3289 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3290 return;
3291 }
3292
3293 case ARMISD::VST2_UPD: {
3294 static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3295 ARM::VST2d16wb_fixed,
3296 ARM::VST2d32wb_fixed,
3297 ARM::VST1q64wb_fixed};
3298 static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3299 ARM::VST2q16PseudoWB_fixed,
3300 ARM::VST2q32PseudoWB_fixed };
3301 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3302 return;
3303 }
3304
3305 case ARMISD::VST3_UPD: {
3306 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3307 ARM::VST3d16Pseudo_UPD,
3308 ARM::VST3d32Pseudo_UPD,
3309 ARM::VST1d64TPseudoWB_fixed};
3310 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3311 ARM::VST3q16Pseudo_UPD,
3312 ARM::VST3q32Pseudo_UPD };
3313 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3314 ARM::VST3q16oddPseudo_UPD,
3315 ARM::VST3q32oddPseudo_UPD };
3316 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3317 return;
3318 }
3319
3320 case ARMISD::VST4_UPD: {
3321 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3322 ARM::VST4d16Pseudo_UPD,
3323 ARM::VST4d32Pseudo_UPD,
3324 ARM::VST1d64QPseudoWB_fixed};
3325 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3326 ARM::VST4q16Pseudo_UPD,
3327 ARM::VST4q32Pseudo_UPD };
3328 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3329 ARM::VST4q16oddPseudo_UPD,
3330 ARM::VST4q32oddPseudo_UPD };
3331 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3332 return;
3333 }
3334
3335 case ARMISD::VST2LN_UPD: {
3336 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3337 ARM::VST2LNd16Pseudo_UPD,
3338 ARM::VST2LNd32Pseudo_UPD };
3339 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3340 ARM::VST2LNq32Pseudo_UPD };
3341 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3342 return;
3343 }
3344
3345 case ARMISD::VST3LN_UPD: {
3346 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3347 ARM::VST3LNd16Pseudo_UPD,
3348 ARM::VST3LNd32Pseudo_UPD };
3349 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3350 ARM::VST3LNq32Pseudo_UPD };
3351 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3352 return;
3353 }
3354
3355 case ARMISD::VST4LN_UPD: {
3356 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3357 ARM::VST4LNd16Pseudo_UPD,
3358 ARM::VST4LNd32Pseudo_UPD };
3359 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3360 ARM::VST4LNq32Pseudo_UPD };
3361 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3362 return;
3363 }
3364
3365 case ISD::INTRINSIC_VOID:
3366 case ISD::INTRINSIC_W_CHAIN: {
3367 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3368 switch (IntNo) {
3369 default:
3370 break;
3371
3372 case Intrinsic::arm_mrrc:
3373 case Intrinsic::arm_mrrc2: {
3374 SDLoc dl(N);
3375 SDValue Chain = N->getOperand(0);
3376 unsigned Opc;
3377
3378 if (Subtarget->isThumb())
3379 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3380 else
3381 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3382
3383 SmallVector<SDValue, 5> Ops;
3384 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3385 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3386 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3387
3388 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3389 // instruction will always be '1111' but it is possible in assembly language to specify
3390 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3391 if (Opc != ARM::MRRC2) {
3392 Ops.push_back(getAL(CurDAG, dl));
3393 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3394 }
3395
3396 Ops.push_back(Chain);
3397
3398 // Writes to two registers.
3399 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3400
3401 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3402 return;
3403 }
3404 case Intrinsic::arm_ldaexd:
3405 case Intrinsic::arm_ldrexd: {
3406 SDLoc dl(N);
3407 SDValue Chain = N->getOperand(0);
3408 SDValue MemAddr = N->getOperand(2);
3409 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3410
3411 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3412 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3413 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3414
3415 // arm_ldrexd returns a i64 value in {i32, i32}
3416 std::vector<EVT> ResTys;
3417 if (isThumb) {
3418 ResTys.push_back(MVT::i32);
3419 ResTys.push_back(MVT::i32);
3420 } else
3421 ResTys.push_back(MVT::Untyped);
3422 ResTys.push_back(MVT::Other);
3423
3424 // Place arguments in the right order.
3425 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3426 CurDAG->getRegister(0, MVT::i32), Chain};
3427 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3428 // Transfer memoperands.
3429 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3430 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3431 cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
3432
3433 // Remap uses.
3434 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3435 if (!SDValue(N, 0).use_empty()) {
3436 SDValue Result;
3437 if (isThumb)
3438 Result = SDValue(Ld, 0);
3439 else {
3440 SDValue SubRegIdx =
3441 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3442 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3443 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3444 Result = SDValue(ResNode,0);
3445 }
3446 ReplaceUses(SDValue(N, 0), Result);
3447 }
3448 if (!SDValue(N, 1).use_empty()) {
3449 SDValue Result;
3450 if (isThumb)
3451 Result = SDValue(Ld, 1);
3452 else {
3453 SDValue SubRegIdx =
3454 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3455 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3456 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3457 Result = SDValue(ResNode,0);
3458 }
3459 ReplaceUses(SDValue(N, 1), Result);
3460 }
3461 ReplaceUses(SDValue(N, 2), OutChain);
3462 CurDAG->RemoveDeadNode(N);
3463 return;
3464 }
3465 case Intrinsic::arm_stlexd:
3466 case Intrinsic::arm_strexd: {
3467 SDLoc dl(N);
3468 SDValue Chain = N->getOperand(0);
3469 SDValue Val0 = N->getOperand(2);
3470 SDValue Val1 = N->getOperand(3);
3471 SDValue MemAddr = N->getOperand(4);
3472
3473 // Store exclusive double return a i32 value which is the return status
3474 // of the issued store.
3475 const EVT ResTys[] = {MVT::i32, MVT::Other};
3476
3477 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3478 // Place arguments in the right order.
3479 SmallVector<SDValue, 7> Ops;
3480 if (isThumb) {
3481 Ops.push_back(Val0);
3482 Ops.push_back(Val1);
3483 } else
3484 // arm_strexd uses GPRPair.
3485 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3486 Ops.push_back(MemAddr);
3487 Ops.push_back(getAL(CurDAG, dl));
3488 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3489 Ops.push_back(Chain);
3490
3491 bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3492 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3493 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3494
3495 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3496 // Transfer memoperands.
3497 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3498 MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3499 cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
3500
3501 ReplaceNode(N, St);
3502 return;
3503 }
3504
3505 case Intrinsic::arm_neon_vld1: {
3506 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3507 ARM::VLD1d32, ARM::VLD1d64 };
3508 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3509 ARM::VLD1q32, ARM::VLD1q64};
3510 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3511 return;
3512 }
3513
3514 case Intrinsic::arm_neon_vld2: {
3515 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3516 ARM::VLD2d32, ARM::VLD1q64 };
3517 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3518 ARM::VLD2q32Pseudo };
3519 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3520 return;
3521 }
3522
3523 case Intrinsic::arm_neon_vld3: {
3524 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3525 ARM::VLD3d16Pseudo,
3526 ARM::VLD3d32Pseudo,
3527 ARM::VLD1d64TPseudo };
3528 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3529 ARM::VLD3q16Pseudo_UPD,
3530 ARM::VLD3q32Pseudo_UPD };
3531 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3532 ARM::VLD3q16oddPseudo,
3533 ARM::VLD3q32oddPseudo };
3534 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3535 return;
3536 }
3537
3538 case Intrinsic::arm_neon_vld4: {
3539 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3540 ARM::VLD4d16Pseudo,
3541 ARM::VLD4d32Pseudo,
3542 ARM::VLD1d64QPseudo };
3543 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3544 ARM::VLD4q16Pseudo_UPD,
3545 ARM::VLD4q32Pseudo_UPD };
3546 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3547 ARM::VLD4q16oddPseudo,
3548 ARM::VLD4q32oddPseudo };
3549 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3550 return;
3551 }
3552
3553 case Intrinsic::arm_neon_vld2lane: {
3554 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3555 ARM::VLD2LNd16Pseudo,
3556 ARM::VLD2LNd32Pseudo };
3557 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3558 ARM::VLD2LNq32Pseudo };
3559 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3560 return;
3561 }
3562
3563 case Intrinsic::arm_neon_vld3lane: {
3564 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3565 ARM::VLD3LNd16Pseudo,
3566 ARM::VLD3LNd32Pseudo };
3567 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3568 ARM::VLD3LNq32Pseudo };
3569 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3570 return;
3571 }
3572
3573 case Intrinsic::arm_neon_vld4lane: {
3574 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3575 ARM::VLD4LNd16Pseudo,
3576 ARM::VLD4LNd32Pseudo };
3577 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3578 ARM::VLD4LNq32Pseudo };
3579 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3580 return;
3581 }
3582
3583 case Intrinsic::arm_neon_vst1: {
3584 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3585 ARM::VST1d32, ARM::VST1d64 };
3586 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3587 ARM::VST1q32, ARM::VST1q64 };
3588 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3589 return;
3590 }
3591
3592 case Intrinsic::arm_neon_vst2: {
3593 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3594 ARM::VST2d32, ARM::VST1q64 };
3595 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3596 ARM::VST2q32Pseudo };
3597 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3598 return;
3599 }
3600
3601 case Intrinsic::arm_neon_vst3: {
3602 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3603 ARM::VST3d16Pseudo,
3604 ARM::VST3d32Pseudo,
3605 ARM::VST1d64TPseudo };
3606 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3607 ARM::VST3q16Pseudo_UPD,
3608 ARM::VST3q32Pseudo_UPD };
3609 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3610 ARM::VST3q16oddPseudo,
3611 ARM::VST3q32oddPseudo };
3612 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3613 return;
3614 }
3615
3616 case Intrinsic::arm_neon_vst4: {
3617 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3618 ARM::VST4d16Pseudo,
3619 ARM::VST4d32Pseudo,
3620 ARM::VST1d64QPseudo };
3621 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3622 ARM::VST4q16Pseudo_UPD,
3623 ARM::VST4q32Pseudo_UPD };
3624 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3625 ARM::VST4q16oddPseudo,
3626 ARM::VST4q32oddPseudo };
3627 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3628 return;
3629 }
3630
3631 case Intrinsic::arm_neon_vst2lane: {
3632 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3633 ARM::VST2LNd16Pseudo,
3634 ARM::VST2LNd32Pseudo };
3635 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3636 ARM::VST2LNq32Pseudo };
3637 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3638 return;
3639 }
3640
3641 case Intrinsic::arm_neon_vst3lane: {
3642 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3643 ARM::VST3LNd16Pseudo,
3644 ARM::VST3LNd32Pseudo };
3645 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3646 ARM::VST3LNq32Pseudo };
3647 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3648 return;
3649 }
3650
3651 case Intrinsic::arm_neon_vst4lane: {
3652 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3653 ARM::VST4LNd16Pseudo,
3654 ARM::VST4LNd32Pseudo };
3655 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3656 ARM::VST4LNq32Pseudo };
3657 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3658 return;
3659 }
3660 }
3661 break;
3662 }
3663
3664 case ISD::INTRINSIC_WO_CHAIN: {
3665 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3666 switch (IntNo) {
3667 default:
3668 break;
3669
3670 case Intrinsic::arm_neon_vtbl2:
3671 SelectVTBL(N, false, 2, ARM::VTBL2);
3672 return;
3673 case Intrinsic::arm_neon_vtbl3:
3674 SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
3675 return;
3676 case Intrinsic::arm_neon_vtbl4:
3677 SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
3678 return;
3679
3680 case Intrinsic::arm_neon_vtbx2:
3681 SelectVTBL(N, true, 2, ARM::VTBX2);
3682 return;
3683 case Intrinsic::arm_neon_vtbx3:
3684 SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
3685 return;
3686 case Intrinsic::arm_neon_vtbx4:
3687 SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
3688 return;
3689 }
3690 break;
3691 }
3692
3693 case ARMISD::VTBL1: {
3694 SDLoc dl(N);
3695 EVT VT = N->getValueType(0);
3696 SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
3697 getAL(CurDAG, dl), // Predicate
3698 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3699 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
3700 return;
3701 }
3702 case ARMISD::VTBL2: {
3703 SDLoc dl(N);
3704 EVT VT = N->getValueType(0);
3705
3706 // Form a REG_SEQUENCE to force register allocation.
3707 SDValue V0 = N->getOperand(0);
3708 SDValue V1 = N->getOperand(1);
3709 SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
3710
3711 SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
3712 CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
3713 ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
3714 return;
3715 }
3716
3717 case ISD::CONCAT_VECTORS:
3718 SelectConcatVector(N);
3719 return;
3720
3721 case ISD::ATOMIC_CMP_SWAP:
3722 SelectCMP_SWAP(N);
3723 return;
3724 }
3725
3726 SelectCode(N);
3727 }
3728
3729 // Inspect a register string of the form
3730 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3731 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3732 // and obtain the integer operands from them, adding these operands to the
3733 // provided vector.
getIntOperandsFromRegisterString(StringRef RegString,SelectionDAG * CurDAG,const SDLoc & DL,std::vector<SDValue> & Ops)3734 static void getIntOperandsFromRegisterString(StringRef RegString,
3735 SelectionDAG *CurDAG,
3736 const SDLoc &DL,
3737 std::vector<SDValue> &Ops) {
3738 SmallVector<StringRef, 5> Fields;
3739 RegString.split(Fields, ':');
3740
3741 if (Fields.size() > 1) {
3742 bool AllIntFields = true;
3743
3744 for (StringRef Field : Fields) {
3745 // Need to trim out leading 'cp' characters and get the integer field.
3746 unsigned IntField;
3747 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3748 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3749 }
3750
3751 assert(AllIntFields &&
3752 "Unexpected non-integer value in special register string.");
3753 }
3754 }
3755
3756 // Maps a Banked Register string to its mask value. The mask value returned is
3757 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3758 // mask operand, which expresses which register is to be used, e.g. r8, and in
3759 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3760 // was invalid.
getBankedRegisterMask(StringRef RegString)3761 static inline int getBankedRegisterMask(StringRef RegString) {
3762 return StringSwitch<int>(RegString.lower())
3763 .Case("r8_usr", 0x00)
3764 .Case("r9_usr", 0x01)
3765 .Case("r10_usr", 0x02)
3766 .Case("r11_usr", 0x03)
3767 .Case("r12_usr", 0x04)
3768 .Case("sp_usr", 0x05)
3769 .Case("lr_usr", 0x06)
3770 .Case("r8_fiq", 0x08)
3771 .Case("r9_fiq", 0x09)
3772 .Case("r10_fiq", 0x0a)
3773 .Case("r11_fiq", 0x0b)
3774 .Case("r12_fiq", 0x0c)
3775 .Case("sp_fiq", 0x0d)
3776 .Case("lr_fiq", 0x0e)
3777 .Case("lr_irq", 0x10)
3778 .Case("sp_irq", 0x11)
3779 .Case("lr_svc", 0x12)
3780 .Case("sp_svc", 0x13)
3781 .Case("lr_abt", 0x14)
3782 .Case("sp_abt", 0x15)
3783 .Case("lr_und", 0x16)
3784 .Case("sp_und", 0x17)
3785 .Case("lr_mon", 0x1c)
3786 .Case("sp_mon", 0x1d)
3787 .Case("elr_hyp", 0x1e)
3788 .Case("sp_hyp", 0x1f)
3789 .Case("spsr_fiq", 0x2e)
3790 .Case("spsr_irq", 0x30)
3791 .Case("spsr_svc", 0x32)
3792 .Case("spsr_abt", 0x34)
3793 .Case("spsr_und", 0x36)
3794 .Case("spsr_mon", 0x3c)
3795 .Case("spsr_hyp", 0x3e)
3796 .Default(-1);
3797 }
3798
3799 // Maps a MClass special register string to its value for use in the
3800 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
3801 // Returns -1 to signify that the string was invalid.
getMClassRegisterSYSmValueMask(StringRef RegString)3802 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
3803 return StringSwitch<int>(RegString.lower())
3804 .Case("apsr", 0x0)
3805 .Case("iapsr", 0x1)
3806 .Case("eapsr", 0x2)
3807 .Case("xpsr", 0x3)
3808 .Case("ipsr", 0x5)
3809 .Case("epsr", 0x6)
3810 .Case("iepsr", 0x7)
3811 .Case("msp", 0x8)
3812 .Case("psp", 0x9)
3813 .Case("primask", 0x10)
3814 .Case("basepri", 0x11)
3815 .Case("basepri_max", 0x12)
3816 .Case("faultmask", 0x13)
3817 .Case("control", 0x14)
3818 .Case("msplim", 0x0a)
3819 .Case("psplim", 0x0b)
3820 .Case("sp", 0x18)
3821 .Default(-1);
3822 }
3823
3824 // The flags here are common to those allowed for apsr in the A class cores and
3825 // those allowed for the special registers in the M class cores. Returns a
3826 // value representing which flags were present, -1 if invalid.
getMClassFlagsMask(StringRef Flags,bool hasDSP)3827 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
3828 if (Flags.empty())
3829 return 0x2 | (int)hasDSP;
3830
3831 return StringSwitch<int>(Flags)
3832 .Case("g", 0x1)
3833 .Case("nzcvq", 0x2)
3834 .Case("nzcvqg", 0x3)
3835 .Default(-1);
3836 }
3837
getMClassRegisterMask(StringRef Reg,StringRef Flags,bool IsRead,const ARMSubtarget * Subtarget)3838 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
3839 const ARMSubtarget *Subtarget) {
3840 // Ensure that the register (without flags) was a valid M Class special
3841 // register.
3842 int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
3843 if (SYSmvalue == -1)
3844 return -1;
3845
3846 // basepri, basepri_max and faultmask are only valid for V7m.
3847 if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
3848 return -1;
3849
3850 if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
3851 Flags = "";
3852 SYSmvalue |= 0x80;
3853 }
3854
3855 if (!Subtarget->has8MSecExt() &&
3856 (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
3857 return -1;
3858
3859 if (!Subtarget->hasV8MMainlineOps() &&
3860 (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
3861 SYSmvalue == 0x93))
3862 return -1;
3863
3864 // If it was a read then we won't be expecting flags and so at this point
3865 // we can return the mask.
3866 if (IsRead) {
3867 if (Flags.empty())
3868 return SYSmvalue;
3869 else
3870 return -1;
3871 }
3872
3873 // We know we are now handling a write so need to get the mask for the flags.
3874 int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
3875
3876 // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
3877 // shouldn't have flags present.
3878 if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
3879 return -1;
3880
3881 // The _g and _nzcvqg versions are only valid if the DSP extension is
3882 // available.
3883 if (!Subtarget->hasDSP() && (Mask & 0x1))
3884 return -1;
3885
3886 // The register was valid so need to put the mask in the correct place
3887 // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
3888 // construct the operand for the instruction node.
3889 if (SYSmvalue < 0x4)
3890 return SYSmvalue | Mask << 10;
3891
3892 return SYSmvalue;
3893 }
3894
getARClassRegisterMask(StringRef Reg,StringRef Flags)3895 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
3896 // The mask operand contains the special register (R Bit) in bit 4, whether
3897 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
3898 // bits 3-0 contains the fields to be accessed in the special register, set by
3899 // the flags provided with the register.
3900 int Mask = 0;
3901 if (Reg == "apsr") {
3902 // The flags permitted for apsr are the same flags that are allowed in
3903 // M class registers. We get the flag value and then shift the flags into
3904 // the correct place to combine with the mask.
3905 Mask = getMClassFlagsMask(Flags, true);
3906 if (Mask == -1)
3907 return -1;
3908 return Mask << 2;
3909 }
3910
3911 if (Reg != "cpsr" && Reg != "spsr") {
3912 return -1;
3913 }
3914
3915 // This is the same as if the flags were "fc"
3916 if (Flags.empty() || Flags == "all")
3917 return Mask | 0x9;
3918
3919 // Inspect the supplied flags string and set the bits in the mask for
3920 // the relevant and valid flags allowed for cpsr and spsr.
3921 for (char Flag : Flags) {
3922 int FlagVal;
3923 switch (Flag) {
3924 case 'c':
3925 FlagVal = 0x1;
3926 break;
3927 case 'x':
3928 FlagVal = 0x2;
3929 break;
3930 case 's':
3931 FlagVal = 0x4;
3932 break;
3933 case 'f':
3934 FlagVal = 0x8;
3935 break;
3936 default:
3937 FlagVal = 0;
3938 }
3939
3940 // This avoids allowing strings where the same flag bit appears twice.
3941 if (!FlagVal || (Mask & FlagVal))
3942 return -1;
3943 Mask |= FlagVal;
3944 }
3945
3946 // If the register is spsr then we need to set the R bit.
3947 if (Reg == "spsr")
3948 Mask |= 0x10;
3949
3950 return Mask;
3951 }
3952
3953 // Lower the read_register intrinsic to ARM specific DAG nodes
3954 // using the supplied metadata string to select the instruction node to use
3955 // and the registers/masks to construct as operands for the node.
tryReadRegister(SDNode * N)3956 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
3957 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
3958 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
3959 bool IsThumb2 = Subtarget->isThumb2();
3960 SDLoc DL(N);
3961
3962 std::vector<SDValue> Ops;
3963 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
3964
3965 if (!Ops.empty()) {
3966 // If the special register string was constructed of fields (as defined
3967 // in the ACLE) then need to lower to MRC node (32 bit) or
3968 // MRRC node(64 bit), we can make the distinction based on the number of
3969 // operands we have.
3970 unsigned Opcode;
3971 SmallVector<EVT, 3> ResTypes;
3972 if (Ops.size() == 5){
3973 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
3974 ResTypes.append({ MVT::i32, MVT::Other });
3975 } else {
3976 assert(Ops.size() == 3 &&
3977 "Invalid number of fields in special register string.");
3978 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
3979 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
3980 }
3981
3982 Ops.push_back(getAL(CurDAG, DL));
3983 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3984 Ops.push_back(N->getOperand(0));
3985 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
3986 return true;
3987 }
3988
3989 std::string SpecialReg = RegString->getString().lower();
3990
3991 int BankedReg = getBankedRegisterMask(SpecialReg);
3992 if (BankedReg != -1) {
3993 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
3994 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
3995 N->getOperand(0) };
3996 ReplaceNode(
3997 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
3998 DL, MVT::i32, MVT::Other, Ops));
3999 return true;
4000 }
4001
4002 // The VFP registers are read by creating SelectionDAG nodes with opcodes
4003 // corresponding to the register that is being read from. So we switch on the
4004 // string to find which opcode we need to use.
4005 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4006 .Case("fpscr", ARM::VMRS)
4007 .Case("fpexc", ARM::VMRS_FPEXC)
4008 .Case("fpsid", ARM::VMRS_FPSID)
4009 .Case("mvfr0", ARM::VMRS_MVFR0)
4010 .Case("mvfr1", ARM::VMRS_MVFR1)
4011 .Case("mvfr2", ARM::VMRS_MVFR2)
4012 .Case("fpinst", ARM::VMRS_FPINST)
4013 .Case("fpinst2", ARM::VMRS_FPINST2)
4014 .Default(0);
4015
4016 // If an opcode was found then we can lower the read to a VFP instruction.
4017 if (Opcode) {
4018 if (!Subtarget->hasVFP2())
4019 return false;
4020 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
4021 return false;
4022
4023 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4024 N->getOperand(0) };
4025 ReplaceNode(N,
4026 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4027 return true;
4028 }
4029
4030 // If the target is M Class then need to validate that the register string
4031 // is an acceptable value, so check that a mask can be constructed from the
4032 // string.
4033 if (Subtarget->isMClass()) {
4034 StringRef Flags = "", Reg = SpecialReg;
4035 if (Reg.endswith("_ns")) {
4036 Flags = "ns";
4037 Reg = Reg.drop_back(3);
4038 }
4039
4040 int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
4041 if (SYSmValue == -1)
4042 return false;
4043
4044 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4045 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4046 N->getOperand(0) };
4047 ReplaceNode(
4048 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4049 return true;
4050 }
4051
4052 // Here we know the target is not M Class so we need to check if it is one
4053 // of the remaining possible values which are apsr, cpsr or spsr.
4054 if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4055 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4056 N->getOperand(0) };
4057 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4058 DL, MVT::i32, MVT::Other, Ops));
4059 return true;
4060 }
4061
4062 if (SpecialReg == "spsr") {
4063 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4064 N->getOperand(0) };
4065 ReplaceNode(
4066 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4067 MVT::i32, MVT::Other, Ops));
4068 return true;
4069 }
4070
4071 return false;
4072 }
4073
4074 // Lower the write_register intrinsic to ARM specific DAG nodes
4075 // using the supplied metadata string to select the instruction node to use
4076 // and the registers/masks to use in the nodes
tryWriteRegister(SDNode * N)4077 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4078 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4079 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4080 bool IsThumb2 = Subtarget->isThumb2();
4081 SDLoc DL(N);
4082
4083 std::vector<SDValue> Ops;
4084 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4085
4086 if (!Ops.empty()) {
4087 // If the special register string was constructed of fields (as defined
4088 // in the ACLE) then need to lower to MCR node (32 bit) or
4089 // MCRR node(64 bit), we can make the distinction based on the number of
4090 // operands we have.
4091 unsigned Opcode;
4092 if (Ops.size() == 5) {
4093 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4094 Ops.insert(Ops.begin()+2, N->getOperand(2));
4095 } else {
4096 assert(Ops.size() == 3 &&
4097 "Invalid number of fields in special register string.");
4098 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4099 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4100 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4101 }
4102
4103 Ops.push_back(getAL(CurDAG, DL));
4104 Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4105 Ops.push_back(N->getOperand(0));
4106
4107 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4108 return true;
4109 }
4110
4111 std::string SpecialReg = RegString->getString().lower();
4112 int BankedReg = getBankedRegisterMask(SpecialReg);
4113 if (BankedReg != -1) {
4114 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4115 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4116 N->getOperand(0) };
4117 ReplaceNode(
4118 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4119 DL, MVT::Other, Ops));
4120 return true;
4121 }
4122
4123 // The VFP registers are written to by creating SelectionDAG nodes with
4124 // opcodes corresponding to the register that is being written. So we switch
4125 // on the string to find which opcode we need to use.
4126 unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4127 .Case("fpscr", ARM::VMSR)
4128 .Case("fpexc", ARM::VMSR_FPEXC)
4129 .Case("fpsid", ARM::VMSR_FPSID)
4130 .Case("fpinst", ARM::VMSR_FPINST)
4131 .Case("fpinst2", ARM::VMSR_FPINST2)
4132 .Default(0);
4133
4134 if (Opcode) {
4135 if (!Subtarget->hasVFP2())
4136 return false;
4137 Ops = { N->getOperand(2), getAL(CurDAG, DL),
4138 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4139 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4140 return true;
4141 }
4142
4143 std::pair<StringRef, StringRef> Fields;
4144 Fields = StringRef(SpecialReg).rsplit('_');
4145 std::string Reg = Fields.first.str();
4146 StringRef Flags = Fields.second;
4147
4148 // If the target was M Class then need to validate the special register value
4149 // and retrieve the mask for use in the instruction node.
4150 if (Subtarget->isMClass()) {
4151 // basepri_max gets split so need to correct Reg and Flags.
4152 if (SpecialReg == "basepri_max") {
4153 Reg = SpecialReg;
4154 Flags = "";
4155 }
4156 int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
4157 if (SYSmValue == -1)
4158 return false;
4159
4160 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4161 N->getOperand(2), getAL(CurDAG, DL),
4162 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4163 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4164 return true;
4165 }
4166
4167 // We then check to see if a valid mask can be constructed for one of the
4168 // register string values permitted for the A and R class cores. These values
4169 // are apsr, spsr and cpsr; these are also valid on older cores.
4170 int Mask = getARClassRegisterMask(Reg, Flags);
4171 if (Mask != -1) {
4172 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4173 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4174 N->getOperand(0) };
4175 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4176 DL, MVT::Other, Ops));
4177 return true;
4178 }
4179
4180 return false;
4181 }
4182
tryInlineAsm(SDNode * N)4183 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4184 std::vector<SDValue> AsmNodeOperands;
4185 unsigned Flag, Kind;
4186 bool Changed = false;
4187 unsigned NumOps = N->getNumOperands();
4188
4189 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4190 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4191 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4192 // respectively. Since there is no constraint to explicitly specify a
4193 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4194 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4195 // them into a GPRPair.
4196
4197 SDLoc dl(N);
4198 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4199 : SDValue(nullptr,0);
4200
4201 SmallVector<bool, 8> OpChanged;
4202 // Glue node will be appended late.
4203 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4204 SDValue op = N->getOperand(i);
4205 AsmNodeOperands.push_back(op);
4206
4207 if (i < InlineAsm::Op_FirstOperand)
4208 continue;
4209
4210 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4211 Flag = C->getZExtValue();
4212 Kind = InlineAsm::getKind(Flag);
4213 }
4214 else
4215 continue;
4216
4217 // Immediate operands to inline asm in the SelectionDAG are modeled with
4218 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4219 // the second is a constant with the value of the immediate. If we get here
4220 // and we have a Kind_Imm, skip the next operand, and continue.
4221 if (Kind == InlineAsm::Kind_Imm) {
4222 SDValue op = N->getOperand(++i);
4223 AsmNodeOperands.push_back(op);
4224 continue;
4225 }
4226
4227 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4228 if (NumRegs)
4229 OpChanged.push_back(false);
4230
4231 unsigned DefIdx = 0;
4232 bool IsTiedToChangedOp = false;
4233 // If it's a use that is tied with a previous def, it has no
4234 // reg class constraint.
4235 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4236 IsTiedToChangedOp = OpChanged[DefIdx];
4237
4238 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4239 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4240 continue;
4241
4242 unsigned RC;
4243 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4244 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4245 || NumRegs != 2)
4246 continue;
4247
4248 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4249 SDValue V0 = N->getOperand(i+1);
4250 SDValue V1 = N->getOperand(i+2);
4251 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4252 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4253 SDValue PairedReg;
4254 MachineRegisterInfo &MRI = MF->getRegInfo();
4255
4256 if (Kind == InlineAsm::Kind_RegDef ||
4257 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4258 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4259 // the original GPRs.
4260
4261 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4262 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4263 SDValue Chain = SDValue(N,0);
4264
4265 SDNode *GU = N->getGluedUser();
4266 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4267 Chain.getValue(1));
4268
4269 // Extract values from a GPRPair reg and copy to the original GPR reg.
4270 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4271 RegCopy);
4272 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4273 RegCopy);
4274 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4275 RegCopy.getValue(1));
4276 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4277
4278 // Update the original glue user.
4279 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4280 Ops.push_back(T1.getValue(1));
4281 CurDAG->UpdateNodeOperands(GU, Ops);
4282 }
4283 else {
4284 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4285 // GPRPair and then pass the GPRPair to the inline asm.
4286 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4287
4288 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4289 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4290 Chain.getValue(1));
4291 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4292 T0.getValue(1));
4293 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4294
4295 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4296 // i32 VRs of inline asm with it.
4297 unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4298 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4299 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4300
4301 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4302 Glue = Chain.getValue(1);
4303 }
4304
4305 Changed = true;
4306
4307 if(PairedReg.getNode()) {
4308 OpChanged[OpChanged.size() -1 ] = true;
4309 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4310 if (IsTiedToChangedOp)
4311 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4312 else
4313 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4314 // Replace the current flag.
4315 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4316 Flag, dl, MVT::i32);
4317 // Add the new register node and skip the original two GPRs.
4318 AsmNodeOperands.push_back(PairedReg);
4319 // Skip the next two GPRs.
4320 i += 2;
4321 }
4322 }
4323
4324 if (Glue.getNode())
4325 AsmNodeOperands.push_back(Glue);
4326 if (!Changed)
4327 return false;
4328
4329 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
4330 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4331 New->setNodeId(-1);
4332 ReplaceNode(N, New.getNode());
4333 return true;
4334 }
4335
4336
4337 bool ARMDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)4338 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4339 std::vector<SDValue> &OutOps) {
4340 switch(ConstraintID) {
4341 default:
4342 llvm_unreachable("Unexpected asm memory constraint");
4343 case InlineAsm::Constraint_i:
4344 // FIXME: It seems strange that 'i' is needed here since it's supposed to
4345 // be an immediate and not a memory constraint.
4346 // Fallthrough.
4347 case InlineAsm::Constraint_m:
4348 case InlineAsm::Constraint_o:
4349 case InlineAsm::Constraint_Q:
4350 case InlineAsm::Constraint_Um:
4351 case InlineAsm::Constraint_Un:
4352 case InlineAsm::Constraint_Uq:
4353 case InlineAsm::Constraint_Us:
4354 case InlineAsm::Constraint_Ut:
4355 case InlineAsm::Constraint_Uv:
4356 case InlineAsm::Constraint_Uy:
4357 // Require the address to be in a register. That is safe for all ARM
4358 // variants and it is hard to do anything much smarter without knowing
4359 // how the operand is used.
4360 OutOps.push_back(Op);
4361 return false;
4362 }
4363 return true;
4364 }
4365
4366 /// createARMISelDag - This pass converts a legalized DAG into a
4367 /// ARM-specific DAG, ready for instruction scheduling.
4368 ///
createARMISelDag(ARMBaseTargetMachine & TM,CodeGenOpt::Level OptLevel)4369 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4370 CodeGenOpt::Level OptLevel) {
4371 return new ARMDAGToDAGISel(TM, OptLevel);
4372 }
4373