1 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that expands pseudo instructions into target
11 // instructions to allow proper scheduling and other late optimizations. This
12 // pass should be run after register allocation but before the post-regalloc
13 // scheduling pass.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "AArch64InstrInfo.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/Support/MathExtras.h"
22 using namespace llvm;
23
24 namespace {
25 class AArch64ExpandPseudo : public MachineFunctionPass {
26 public:
27 static char ID;
AArch64ExpandPseudo()28 AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
29
30 const AArch64InstrInfo *TII;
31
32 bool runOnMachineFunction(MachineFunction &Fn) override;
33
getPassName() const34 const char *getPassName() const override {
35 return "AArch64 pseudo instruction expansion pass";
36 }
37
38 private:
39 bool expandMBB(MachineBasicBlock &MBB);
40 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
41 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
42 unsigned BitSize);
43 };
44 char AArch64ExpandPseudo::ID = 0;
45 }
46
47 /// \brief Transfer implicit operands on the pseudo instruction to the
48 /// instructions created from the expansion.
transferImpOps(MachineInstr & OldMI,MachineInstrBuilder & UseMI,MachineInstrBuilder & DefMI)49 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
50 MachineInstrBuilder &DefMI) {
51 const MCInstrDesc &Desc = OldMI.getDesc();
52 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
53 ++i) {
54 const MachineOperand &MO = OldMI.getOperand(i);
55 assert(MO.isReg() && MO.getReg());
56 if (MO.isUse())
57 UseMI.addOperand(MO);
58 else
59 DefMI.addOperand(MO);
60 }
61 }
62
63 /// \brief Helper function which extracts the specified 16-bit chunk from a
64 /// 64-bit value.
getChunk(uint64_t Imm,unsigned ChunkIdx)65 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
66 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
67
68 return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
69 }
70
71 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit
72 /// value. Indices correspond to element numbers in a v4i16.
replicateChunk(uint64_t Imm,unsigned FromIdx,unsigned ToIdx)73 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
74 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
75 const unsigned ShiftAmt = ToIdx * 16;
76
77 // Replicate the source chunk to the destination position.
78 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
79 // Clear the destination chunk.
80 Imm &= ~(0xFFFFLL << ShiftAmt);
81 // Insert the replicated chunk.
82 return Imm | Chunk;
83 }
84
85 /// \brief Helper function which tries to materialize a 64-bit value with an
86 /// ORR + MOVK instruction sequence.
tryOrrMovk(uint64_t UImm,uint64_t OrrImm,MachineInstr & MI,MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const AArch64InstrInfo * TII,unsigned ChunkIdx)87 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
88 MachineBasicBlock &MBB,
89 MachineBasicBlock::iterator &MBBI,
90 const AArch64InstrInfo *TII, unsigned ChunkIdx) {
91 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
92 const unsigned ShiftAmt = ChunkIdx * 16;
93
94 uint64_t Encoding;
95 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
96 // Create the ORR-immediate instruction.
97 MachineInstrBuilder MIB =
98 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
99 .addOperand(MI.getOperand(0))
100 .addReg(AArch64::XZR)
101 .addImm(Encoding);
102
103 // Create the MOVK instruction.
104 const unsigned Imm16 = getChunk(UImm, ChunkIdx);
105 const unsigned DstReg = MI.getOperand(0).getReg();
106 const bool DstIsDead = MI.getOperand(0).isDead();
107 MachineInstrBuilder MIB1 =
108 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
109 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
110 .addReg(DstReg)
111 .addImm(Imm16)
112 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
113
114 transferImpOps(MI, MIB, MIB1);
115 MI.eraseFromParent();
116 return true;
117 }
118
119 return false;
120 }
121
122 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
123 /// can be materialized with an ORR instruction.
canUseOrr(uint64_t Chunk,uint64_t & Encoding)124 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
125 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
126
127 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
128 }
129
130 /// \brief Check for identical 16-bit chunks within the constant and if so
131 /// materialize them with a single ORR instruction. The remaining one or two
132 /// 16-bit chunks will be materialized with MOVK instructions.
133 ///
134 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
135 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
136 /// an ORR instruction.
137 ///
tryToreplicateChunks(uint64_t UImm,MachineInstr & MI,MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const AArch64InstrInfo * TII)138 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
139 MachineBasicBlock &MBB,
140 MachineBasicBlock::iterator &MBBI,
141 const AArch64InstrInfo *TII) {
142 typedef DenseMap<uint64_t, unsigned> CountMap;
143 CountMap Counts;
144
145 // Scan the constant and count how often every chunk occurs.
146 for (unsigned Idx = 0; Idx < 4; ++Idx)
147 ++Counts[getChunk(UImm, Idx)];
148
149 // Traverse the chunks to find one which occurs more than once.
150 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
151 Chunk != End; ++Chunk) {
152 const uint64_t ChunkVal = Chunk->first;
153 const unsigned Count = Chunk->second;
154
155 uint64_t Encoding = 0;
156
157 // We are looking for chunks which have two or three instances and can be
158 // materialized with an ORR instruction.
159 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
160 continue;
161
162 const bool CountThree = Count == 3;
163 // Create the ORR-immediate instruction.
164 MachineInstrBuilder MIB =
165 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
166 .addOperand(MI.getOperand(0))
167 .addReg(AArch64::XZR)
168 .addImm(Encoding);
169
170 const unsigned DstReg = MI.getOperand(0).getReg();
171 const bool DstIsDead = MI.getOperand(0).isDead();
172
173 unsigned ShiftAmt = 0;
174 uint64_t Imm16 = 0;
175 // Find the first chunk not materialized with the ORR instruction.
176 for (; ShiftAmt < 64; ShiftAmt += 16) {
177 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
178
179 if (Imm16 != ChunkVal)
180 break;
181 }
182
183 // Create the first MOVK instruction.
184 MachineInstrBuilder MIB1 =
185 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
186 .addReg(DstReg,
187 RegState::Define | getDeadRegState(DstIsDead && CountThree))
188 .addReg(DstReg)
189 .addImm(Imm16)
190 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
191
192 // In case we have three instances the whole constant is now materialized
193 // and we can exit.
194 if (CountThree) {
195 transferImpOps(MI, MIB, MIB1);
196 MI.eraseFromParent();
197 return true;
198 }
199
200 // Find the remaining chunk which needs to be materialized.
201 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
202 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
203
204 if (Imm16 != ChunkVal)
205 break;
206 }
207
208 // Create the second MOVK instruction.
209 MachineInstrBuilder MIB2 =
210 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
211 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
212 .addReg(DstReg)
213 .addImm(Imm16)
214 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
215
216 transferImpOps(MI, MIB, MIB2);
217 MI.eraseFromParent();
218 return true;
219 }
220
221 return false;
222 }
223
224 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
225 /// starts a contiguous sequence of ones if we look at the bits from the LSB
226 /// towards the MSB.
isStartChunk(uint64_t Chunk)227 static bool isStartChunk(uint64_t Chunk) {
228 if (Chunk == 0 || Chunk == UINT64_MAX)
229 return false;
230
231 return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
232 }
233
234 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern
235 /// ends a contiguous sequence of ones if we look at the bits from the LSB
236 /// towards the MSB.
isEndChunk(uint64_t Chunk)237 static bool isEndChunk(uint64_t Chunk) {
238 if (Chunk == 0 || Chunk == UINT64_MAX)
239 return false;
240
241 return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
242 }
243
244 /// \brief Clear or set all bits in the chunk at the given index.
updateImm(uint64_t Imm,unsigned Idx,bool Clear)245 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
246 const uint64_t Mask = 0xFFFF;
247
248 if (Clear)
249 // Clear chunk in the immediate.
250 Imm &= ~(Mask << (Idx * 16));
251 else
252 // Set all bits in the immediate for the particular chunk.
253 Imm |= Mask << (Idx * 16);
254
255 return Imm;
256 }
257
258 /// \brief Check whether the constant contains a sequence of contiguous ones,
259 /// which might be interrupted by one or two chunks. If so, materialize the
260 /// sequence of contiguous ones with an ORR instruction.
261 /// Materialize the chunks which are either interrupting the sequence or outside
262 /// of the sequence with a MOVK instruction.
263 ///
264 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
265 /// which ends the sequence (0...1...). Then we are looking for constants which
266 /// contain at least one S and E chunk.
267 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
268 ///
269 /// We are also looking for constants like |S|A|B|E| where the contiguous
270 /// sequence of ones wraps around the MSB into the LSB.
271 ///
trySequenceOfOnes(uint64_t UImm,MachineInstr & MI,MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI,const AArch64InstrInfo * TII)272 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
273 MachineBasicBlock &MBB,
274 MachineBasicBlock::iterator &MBBI,
275 const AArch64InstrInfo *TII) {
276 const int NotSet = -1;
277 const uint64_t Mask = 0xFFFF;
278
279 int StartIdx = NotSet;
280 int EndIdx = NotSet;
281 // Try to find the chunks which start/end a contiguous sequence of ones.
282 for (int Idx = 0; Idx < 4; ++Idx) {
283 int64_t Chunk = getChunk(UImm, Idx);
284 // Sign extend the 16-bit chunk to 64-bit.
285 Chunk = (Chunk << 48) >> 48;
286
287 if (isStartChunk(Chunk))
288 StartIdx = Idx;
289 else if (isEndChunk(Chunk))
290 EndIdx = Idx;
291 }
292
293 // Early exit in case we can't find a start/end chunk.
294 if (StartIdx == NotSet || EndIdx == NotSet)
295 return false;
296
297 // Outside of the contiguous sequence of ones everything needs to be zero.
298 uint64_t Outside = 0;
299 // Chunks between the start and end chunk need to have all their bits set.
300 uint64_t Inside = Mask;
301
302 // If our contiguous sequence of ones wraps around from the MSB into the LSB,
303 // just swap indices and pretend we are materializing a contiguous sequence
304 // of zeros surrounded by a contiguous sequence of ones.
305 if (StartIdx > EndIdx) {
306 std::swap(StartIdx, EndIdx);
307 std::swap(Outside, Inside);
308 }
309
310 uint64_t OrrImm = UImm;
311 int FirstMovkIdx = NotSet;
312 int SecondMovkIdx = NotSet;
313
314 // Find out which chunks we need to patch up to obtain a contiguous sequence
315 // of ones.
316 for (int Idx = 0; Idx < 4; ++Idx) {
317 const uint64_t Chunk = getChunk(UImm, Idx);
318
319 // Check whether we are looking at a chunk which is not part of the
320 // contiguous sequence of ones.
321 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
322 OrrImm = updateImm(OrrImm, Idx, Outside == 0);
323
324 // Remember the index we need to patch.
325 if (FirstMovkIdx == NotSet)
326 FirstMovkIdx = Idx;
327 else
328 SecondMovkIdx = Idx;
329
330 // Check whether we are looking a chunk which is part of the contiguous
331 // sequence of ones.
332 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
333 OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
334
335 // Remember the index we need to patch.
336 if (FirstMovkIdx == NotSet)
337 FirstMovkIdx = Idx;
338 else
339 SecondMovkIdx = Idx;
340 }
341 }
342 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
343
344 // Create the ORR-immediate instruction.
345 uint64_t Encoding = 0;
346 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
347 MachineInstrBuilder MIB =
348 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
349 .addOperand(MI.getOperand(0))
350 .addReg(AArch64::XZR)
351 .addImm(Encoding);
352
353 const unsigned DstReg = MI.getOperand(0).getReg();
354 const bool DstIsDead = MI.getOperand(0).isDead();
355
356 const bool SingleMovk = SecondMovkIdx == NotSet;
357 // Create the first MOVK instruction.
358 MachineInstrBuilder MIB1 =
359 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
360 .addReg(DstReg,
361 RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
362 .addReg(DstReg)
363 .addImm(getChunk(UImm, FirstMovkIdx))
364 .addImm(
365 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
366
367 // Early exit in case we only need to emit a single MOVK instruction.
368 if (SingleMovk) {
369 transferImpOps(MI, MIB, MIB1);
370 MI.eraseFromParent();
371 return true;
372 }
373
374 // Create the second MOVK instruction.
375 MachineInstrBuilder MIB2 =
376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
377 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
378 .addReg(DstReg)
379 .addImm(getChunk(UImm, SecondMovkIdx))
380 .addImm(
381 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
382
383 transferImpOps(MI, MIB, MIB2);
384 MI.eraseFromParent();
385 return true;
386 }
387
388 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
389 /// real move-immediate instructions to synthesize the immediate.
expandMOVImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned BitSize)390 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
391 MachineBasicBlock::iterator MBBI,
392 unsigned BitSize) {
393 MachineInstr &MI = *MBBI;
394 uint64_t Imm = MI.getOperand(1).getImm();
395 const unsigned Mask = 0xFFFF;
396
397 // Try a MOVI instruction (aka ORR-immediate with the zero register).
398 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
399 uint64_t Encoding;
400 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
401 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
402 MachineInstrBuilder MIB =
403 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
404 .addOperand(MI.getOperand(0))
405 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
406 .addImm(Encoding);
407 transferImpOps(MI, MIB, MIB);
408 MI.eraseFromParent();
409 return true;
410 }
411
412 // Scan the immediate and count the number of 16-bit chunks which are either
413 // all ones or all zeros.
414 unsigned OneChunks = 0;
415 unsigned ZeroChunks = 0;
416 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
417 const unsigned Chunk = (Imm >> Shift) & Mask;
418 if (Chunk == Mask)
419 OneChunks++;
420 else if (Chunk == 0)
421 ZeroChunks++;
422 }
423
424 // Since we can't materialize the constant with a single ORR instruction,
425 // let's see whether we can materialize 3/4 of the constant with an ORR
426 // instruction and use an additional MOVK instruction to materialize the
427 // remaining 1/4.
428 //
429 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
430 //
431 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
432 // we would create the following instruction sequence:
433 //
434 // ORR x0, xzr, |A|X|A|X|
435 // MOVK x0, |B|, LSL #16
436 //
437 // Only look at 64-bit constants which can't be materialized with a single
438 // instruction e.g. which have less than either three all zero or all one
439 // chunks.
440 //
441 // Ignore 32-bit constants here, they always can be materialized with a
442 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
443 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
444 // Thus we fall back to the default code below which in the best case creates
445 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
446 //
447 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
448 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
449 // identical?
450 if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
451 // See if we can come up with a constant which can be materialized with
452 // ORR-immediate by replicating element 3 into element 1.
453 uint64_t OrrImm = replicateChunk(UImm, 3, 1);
454 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
455 return true;
456
457 // See if we can come up with a constant which can be materialized with
458 // ORR-immediate by replicating element 1 into element 3.
459 OrrImm = replicateChunk(UImm, 1, 3);
460 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
461 return true;
462
463 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
464 // identical?
465 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
466 // See if we can come up with a constant which can be materialized with
467 // ORR-immediate by replicating element 2 into element 0.
468 uint64_t OrrImm = replicateChunk(UImm, 2, 0);
469 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
470 return true;
471
472 // See if we can come up with a constant which can be materialized with
473 // ORR-immediate by replicating element 1 into element 3.
474 OrrImm = replicateChunk(UImm, 0, 2);
475 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
476 return true;
477 }
478 }
479
480 // Check for identical 16-bit chunks within the constant and if so materialize
481 // them with a single ORR instruction. The remaining one or two 16-bit chunks
482 // will be materialized with MOVK instructions.
483 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
484 return true;
485
486 // Check whether the constant contains a sequence of contiguous ones, which
487 // might be interrupted by one or two chunks. If so, materialize the sequence
488 // of contiguous ones with an ORR instruction. Materialize the chunks which
489 // are either interrupting the sequence or outside of the sequence with a
490 // MOVK instruction.
491 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
492 return true;
493
494 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
495 // more MOVK instructions to insert additional 16-bit portions into the
496 // lower bits.
497 bool isNeg = false;
498
499 // Use MOVN to materialize the high bits if we have more all one chunks
500 // than all zero chunks.
501 if (OneChunks > ZeroChunks) {
502 isNeg = true;
503 Imm = ~Imm;
504 }
505
506 unsigned FirstOpc;
507 if (BitSize == 32) {
508 Imm &= (1LL << 32) - 1;
509 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
510 } else {
511 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
512 }
513 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
514 unsigned LastShift = 0; // LSL amount for last MOVK
515 if (Imm != 0) {
516 unsigned LZ = countLeadingZeros(Imm);
517 unsigned TZ = countTrailingZeros(Imm);
518 Shift = ((63 - LZ) / 16) * 16;
519 LastShift = (TZ / 16) * 16;
520 }
521 unsigned Imm16 = (Imm >> Shift) & Mask;
522 unsigned DstReg = MI.getOperand(0).getReg();
523 bool DstIsDead = MI.getOperand(0).isDead();
524 MachineInstrBuilder MIB1 =
525 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
526 .addReg(DstReg, RegState::Define |
527 getDeadRegState(DstIsDead && Shift == LastShift))
528 .addImm(Imm16)
529 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
530
531 // If a MOVN was used for the high bits of a negative value, flip the rest
532 // of the bits back for use with MOVK.
533 if (isNeg)
534 Imm = ~Imm;
535
536 if (Shift == LastShift) {
537 transferImpOps(MI, MIB1, MIB1);
538 MI.eraseFromParent();
539 return true;
540 }
541
542 MachineInstrBuilder MIB2;
543 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
544 while (Shift != LastShift) {
545 Shift -= 16;
546 Imm16 = (Imm >> Shift) & Mask;
547 if (Imm16 == (isNeg ? Mask : 0))
548 continue; // This 16-bit portion is already set correctly.
549 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
550 .addReg(DstReg,
551 RegState::Define |
552 getDeadRegState(DstIsDead && Shift == LastShift))
553 .addReg(DstReg)
554 .addImm(Imm16)
555 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
556 }
557
558 transferImpOps(MI, MIB1, MIB2);
559 MI.eraseFromParent();
560 return true;
561 }
562
563 /// \brief If MBBI references a pseudo instruction that should be expanded here,
564 /// do the expansion and return true. Otherwise return false.
expandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)565 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
566 MachineBasicBlock::iterator MBBI) {
567 MachineInstr &MI = *MBBI;
568 unsigned Opcode = MI.getOpcode();
569 switch (Opcode) {
570 default:
571 break;
572
573 case AArch64::ADDWrr:
574 case AArch64::SUBWrr:
575 case AArch64::ADDXrr:
576 case AArch64::SUBXrr:
577 case AArch64::ADDSWrr:
578 case AArch64::SUBSWrr:
579 case AArch64::ADDSXrr:
580 case AArch64::SUBSXrr:
581 case AArch64::ANDWrr:
582 case AArch64::ANDXrr:
583 case AArch64::BICWrr:
584 case AArch64::BICXrr:
585 case AArch64::ANDSWrr:
586 case AArch64::ANDSXrr:
587 case AArch64::BICSWrr:
588 case AArch64::BICSXrr:
589 case AArch64::EONWrr:
590 case AArch64::EONXrr:
591 case AArch64::EORWrr:
592 case AArch64::EORXrr:
593 case AArch64::ORNWrr:
594 case AArch64::ORNXrr:
595 case AArch64::ORRWrr:
596 case AArch64::ORRXrr: {
597 unsigned Opcode;
598 switch (MI.getOpcode()) {
599 default:
600 return false;
601 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
602 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
603 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
604 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
605 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
606 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
607 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
608 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
609 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
610 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
611 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
612 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
613 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
614 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
615 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
616 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
617 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
618 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
619 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
620 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
621 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
622 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
623 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
624 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
625 }
626 MachineInstrBuilder MIB1 =
627 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
628 MI.getOperand(0).getReg())
629 .addOperand(MI.getOperand(1))
630 .addOperand(MI.getOperand(2))
631 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
632 transferImpOps(MI, MIB1, MIB1);
633 MI.eraseFromParent();
634 return true;
635 }
636
637 case AArch64::FCVTSHpseudo: {
638 MachineOperand Src = MI.getOperand(1);
639 Src.setImplicit();
640 unsigned SrcH =
641 TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub);
642 auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr))
643 .addOperand(MI.getOperand(0))
644 .addReg(SrcH, RegState::Undef)
645 .addOperand(Src);
646 transferImpOps(MI, MIB, MIB);
647 MI.eraseFromParent();
648 return true;
649 }
650 case AArch64::LOADgot: {
651 // Expand into ADRP + LDR.
652 unsigned DstReg = MI.getOperand(0).getReg();
653 const MachineOperand &MO1 = MI.getOperand(1);
654 unsigned Flags = MO1.getTargetFlags();
655 MachineInstrBuilder MIB1 =
656 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
657 MachineInstrBuilder MIB2 =
658 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
659 .addOperand(MI.getOperand(0))
660 .addReg(DstReg);
661
662 if (MO1.isGlobal()) {
663 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
664 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
665 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
666 } else if (MO1.isSymbol()) {
667 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
668 MIB2.addExternalSymbol(MO1.getSymbolName(),
669 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
670 } else {
671 assert(MO1.isCPI() &&
672 "Only expect globals, externalsymbols, or constant pools");
673 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
674 Flags | AArch64II::MO_PAGE);
675 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
676 Flags | AArch64II::MO_PAGEOFF |
677 AArch64II::MO_NC);
678 }
679
680 transferImpOps(MI, MIB1, MIB2);
681 MI.eraseFromParent();
682 return true;
683 }
684
685 case AArch64::MOVaddr:
686 case AArch64::MOVaddrJT:
687 case AArch64::MOVaddrCP:
688 case AArch64::MOVaddrBA:
689 case AArch64::MOVaddrTLS:
690 case AArch64::MOVaddrEXT: {
691 // Expand into ADRP + ADD.
692 unsigned DstReg = MI.getOperand(0).getReg();
693 MachineInstrBuilder MIB1 =
694 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
695 .addOperand(MI.getOperand(1));
696
697 MachineInstrBuilder MIB2 =
698 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
699 .addOperand(MI.getOperand(0))
700 .addReg(DstReg)
701 .addOperand(MI.getOperand(2))
702 .addImm(0);
703
704 transferImpOps(MI, MIB1, MIB2);
705 MI.eraseFromParent();
706 return true;
707 }
708
709 case AArch64::MOVi32imm:
710 return expandMOVImm(MBB, MBBI, 32);
711 case AArch64::MOVi64imm:
712 return expandMOVImm(MBB, MBBI, 64);
713 case AArch64::RET_ReallyLR:
714 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
715 .addReg(AArch64::LR);
716 MI.eraseFromParent();
717 return true;
718 }
719 return false;
720 }
721
722 /// \brief Iterate over the instructions in basic block MBB and expand any
723 /// pseudo instructions. Return true if anything was modified.
expandMBB(MachineBasicBlock & MBB)724 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
725 bool Modified = false;
726
727 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
728 while (MBBI != E) {
729 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
730 Modified |= expandMI(MBB, MBBI);
731 MBBI = NMBBI;
732 }
733
734 return Modified;
735 }
736
runOnMachineFunction(MachineFunction & MF)737 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
738 TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
739
740 bool Modified = false;
741 for (auto &MBB : MF)
742 Modified |= expandMBB(MBB);
743 return Modified;
744 }
745
746 /// \brief Returns an instance of the pseudo instruction expansion pass.
createAArch64ExpandPseudoPass()747 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
748 return new AArch64ExpandPseudo();
749 }
750