1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 #define DEBUG_TYPE "legalizer"
29
30 using namespace llvm;
31 using namespace LegalizeActions;
32
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 ///
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
37 ///
38 /// Returns -1 in the first element of the pair if the breakdown is not
39 /// satisfiable.
40 static std::pair<int, int>
getNarrowTypeBreakDown(LLT OrigTy,LLT NarrowTy,LLT & LeftoverTy)41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42 assert(!LeftoverTy.isValid() && "this is an out argument");
43
44 unsigned Size = OrigTy.getSizeInBits();
45 unsigned NarrowSize = NarrowTy.getSizeInBits();
46 unsigned NumParts = Size / NarrowSize;
47 unsigned LeftoverSize = Size - NumParts * NarrowSize;
48 assert(Size > NarrowSize);
49
50 if (LeftoverSize == 0)
51 return {NumParts, 0};
52
53 if (NarrowTy.isVector()) {
54 unsigned EltSize = OrigTy.getScalarSizeInBits();
55 if (LeftoverSize % EltSize != 0)
56 return {-1, -1};
57 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58 } else {
59 LeftoverTy = LLT::scalar(LeftoverSize);
60 }
61
62 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63 return std::make_pair(NumParts, NumLeftover);
64 }
65
LegalizerHelper(MachineFunction & MF,GISelChangeObserver & Observer,MachineIRBuilder & Builder)66 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
67 GISelChangeObserver &Observer,
68 MachineIRBuilder &Builder)
69 : MIRBuilder(Builder), MRI(MF.getRegInfo()),
70 LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
71 MIRBuilder.setMF(MF);
72 MIRBuilder.setChangeObserver(Observer);
73 }
74
LegalizerHelper(MachineFunction & MF,const LegalizerInfo & LI,GISelChangeObserver & Observer,MachineIRBuilder & B)75 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
76 GISelChangeObserver &Observer,
77 MachineIRBuilder &B)
78 : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
79 MIRBuilder.setMF(MF);
80 MIRBuilder.setChangeObserver(Observer);
81 }
82 LegalizerHelper::LegalizeResult
legalizeInstrStep(MachineInstr & MI)83 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
84 LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
85
86 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
87 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
88 return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
89 : UnableToLegalize;
90 auto Step = LI.getAction(MI, MRI);
91 switch (Step.Action) {
92 case Legal:
93 LLVM_DEBUG(dbgs() << ".. Already legal\n");
94 return AlreadyLegal;
95 case Libcall:
96 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
97 return libcall(MI);
98 case NarrowScalar:
99 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
100 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
101 case WidenScalar:
102 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
103 return widenScalar(MI, Step.TypeIdx, Step.NewType);
104 case Lower:
105 LLVM_DEBUG(dbgs() << ".. Lower\n");
106 return lower(MI, Step.TypeIdx, Step.NewType);
107 case FewerElements:
108 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
109 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
110 case MoreElements:
111 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
112 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
113 case Custom:
114 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
115 return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
116 : UnableToLegalize;
117 default:
118 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
119 return UnableToLegalize;
120 }
121 }
122
extractParts(Register Reg,LLT Ty,int NumParts,SmallVectorImpl<Register> & VRegs)123 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
124 SmallVectorImpl<Register> &VRegs) {
125 for (int i = 0; i < NumParts; ++i)
126 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
127 MIRBuilder.buildUnmerge(VRegs, Reg);
128 }
129
extractParts(Register Reg,LLT RegTy,LLT MainTy,LLT & LeftoverTy,SmallVectorImpl<Register> & VRegs,SmallVectorImpl<Register> & LeftoverRegs)130 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
131 LLT MainTy, LLT &LeftoverTy,
132 SmallVectorImpl<Register> &VRegs,
133 SmallVectorImpl<Register> &LeftoverRegs) {
134 assert(!LeftoverTy.isValid() && "this is an out argument");
135
136 unsigned RegSize = RegTy.getSizeInBits();
137 unsigned MainSize = MainTy.getSizeInBits();
138 unsigned NumParts = RegSize / MainSize;
139 unsigned LeftoverSize = RegSize - NumParts * MainSize;
140
141 // Use an unmerge when possible.
142 if (LeftoverSize == 0) {
143 for (unsigned I = 0; I < NumParts; ++I)
144 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
145 MIRBuilder.buildUnmerge(VRegs, Reg);
146 return true;
147 }
148
149 if (MainTy.isVector()) {
150 unsigned EltSize = MainTy.getScalarSizeInBits();
151 if (LeftoverSize % EltSize != 0)
152 return false;
153 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
154 } else {
155 LeftoverTy = LLT::scalar(LeftoverSize);
156 }
157
158 // For irregular sizes, extract the individual parts.
159 for (unsigned I = 0; I != NumParts; ++I) {
160 Register NewReg = MRI.createGenericVirtualRegister(MainTy);
161 VRegs.push_back(NewReg);
162 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
163 }
164
165 for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
166 Offset += LeftoverSize) {
167 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
168 LeftoverRegs.push_back(NewReg);
169 MIRBuilder.buildExtract(NewReg, Reg, Offset);
170 }
171
172 return true;
173 }
174
getGCDType(LLT OrigTy,LLT TargetTy)175 static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
176 if (OrigTy.isVector() && TargetTy.isVector()) {
177 assert(OrigTy.getElementType() == TargetTy.getElementType());
178 int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
179 TargetTy.getNumElements());
180 return LLT::scalarOrVector(GCD, OrigTy.getElementType());
181 }
182
183 if (OrigTy.isVector() && !TargetTy.isVector()) {
184 assert(OrigTy.getElementType() == TargetTy);
185 return TargetTy;
186 }
187
188 assert(!OrigTy.isVector() && !TargetTy.isVector());
189
190 int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
191 TargetTy.getSizeInBits());
192 return LLT::scalar(GCD);
193 }
194
insertParts(Register DstReg,LLT ResultTy,LLT PartTy,ArrayRef<Register> PartRegs,LLT LeftoverTy,ArrayRef<Register> LeftoverRegs)195 void LegalizerHelper::insertParts(Register DstReg,
196 LLT ResultTy, LLT PartTy,
197 ArrayRef<Register> PartRegs,
198 LLT LeftoverTy,
199 ArrayRef<Register> LeftoverRegs) {
200 if (!LeftoverTy.isValid()) {
201 assert(LeftoverRegs.empty());
202
203 if (!ResultTy.isVector()) {
204 MIRBuilder.buildMerge(DstReg, PartRegs);
205 return;
206 }
207
208 if (PartTy.isVector())
209 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
210 else
211 MIRBuilder.buildBuildVector(DstReg, PartRegs);
212 return;
213 }
214
215 unsigned PartSize = PartTy.getSizeInBits();
216 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
217
218 Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
219 MIRBuilder.buildUndef(CurResultReg);
220
221 unsigned Offset = 0;
222 for (Register PartReg : PartRegs) {
223 Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
224 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
225 CurResultReg = NewResultReg;
226 Offset += PartSize;
227 }
228
229 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
230 // Use the original output register for the final insert to avoid a copy.
231 Register NewResultReg = (I + 1 == E) ?
232 DstReg : MRI.createGenericVirtualRegister(ResultTy);
233
234 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
235 CurResultReg = NewResultReg;
236 Offset += LeftoverPartSize;
237 }
238 }
239
getRTLibDesc(unsigned Opcode,unsigned Size)240 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
241 switch (Opcode) {
242 case TargetOpcode::G_SDIV:
243 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
244 switch (Size) {
245 case 32:
246 return RTLIB::SDIV_I32;
247 case 64:
248 return RTLIB::SDIV_I64;
249 case 128:
250 return RTLIB::SDIV_I128;
251 default:
252 llvm_unreachable("unexpected size");
253 }
254 case TargetOpcode::G_UDIV:
255 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
256 switch (Size) {
257 case 32:
258 return RTLIB::UDIV_I32;
259 case 64:
260 return RTLIB::UDIV_I64;
261 case 128:
262 return RTLIB::UDIV_I128;
263 default:
264 llvm_unreachable("unexpected size");
265 }
266 case TargetOpcode::G_SREM:
267 assert((Size == 32 || Size == 64) && "Unsupported size");
268 return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
269 case TargetOpcode::G_UREM:
270 assert((Size == 32 || Size == 64) && "Unsupported size");
271 return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
272 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
273 assert(Size == 32 && "Unsupported size");
274 return RTLIB::CTLZ_I32;
275 case TargetOpcode::G_FADD:
276 assert((Size == 32 || Size == 64) && "Unsupported size");
277 return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
278 case TargetOpcode::G_FSUB:
279 assert((Size == 32 || Size == 64) && "Unsupported size");
280 return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
281 case TargetOpcode::G_FMUL:
282 assert((Size == 32 || Size == 64) && "Unsupported size");
283 return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
284 case TargetOpcode::G_FDIV:
285 assert((Size == 32 || Size == 64) && "Unsupported size");
286 return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
287 case TargetOpcode::G_FEXP:
288 assert((Size == 32 || Size == 64) && "Unsupported size");
289 return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
290 case TargetOpcode::G_FEXP2:
291 assert((Size == 32 || Size == 64) && "Unsupported size");
292 return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
293 case TargetOpcode::G_FREM:
294 return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
295 case TargetOpcode::G_FPOW:
296 return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
297 case TargetOpcode::G_FMA:
298 assert((Size == 32 || Size == 64) && "Unsupported size");
299 return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
300 case TargetOpcode::G_FSIN:
301 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
302 return Size == 128 ? RTLIB::SIN_F128
303 : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
304 case TargetOpcode::G_FCOS:
305 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
306 return Size == 128 ? RTLIB::COS_F128
307 : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
308 case TargetOpcode::G_FLOG10:
309 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
310 return Size == 128 ? RTLIB::LOG10_F128
311 : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
312 case TargetOpcode::G_FLOG:
313 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
314 return Size == 128 ? RTLIB::LOG_F128
315 : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
316 case TargetOpcode::G_FLOG2:
317 assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
318 return Size == 128 ? RTLIB::LOG2_F128
319 : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
320 case TargetOpcode::G_FCEIL:
321 assert((Size == 32 || Size == 64) && "Unsupported size");
322 return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
323 case TargetOpcode::G_FFLOOR:
324 assert((Size == 32 || Size == 64) && "Unsupported size");
325 return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
326 }
327 llvm_unreachable("Unknown libcall function");
328 }
329
330 /// True if an instruction is in tail position in its caller. Intended for
331 /// legalizing libcalls as tail calls when possible.
isLibCallInTailPosition(MachineInstr & MI)332 static bool isLibCallInTailPosition(MachineInstr &MI) {
333 const Function &F = MI.getParent()->getParent()->getFunction();
334
335 // Conservatively require the attributes of the call to match those of
336 // the return. Ignore NoAlias and NonNull because they don't affect the
337 // call sequence.
338 AttributeList CallerAttrs = F.getAttributes();
339 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
340 .removeAttribute(Attribute::NoAlias)
341 .removeAttribute(Attribute::NonNull)
342 .hasAttributes())
343 return false;
344
345 // It's not safe to eliminate the sign / zero extension of the return value.
346 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
347 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
348 return false;
349
350 // Only tail call if the following instruction is a standard return.
351 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
352 MachineInstr *Next = MI.getNextNode();
353 if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
354 return false;
355
356 return true;
357 }
358
359 LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder & MIRBuilder,RTLIB::Libcall Libcall,const CallLowering::ArgInfo & Result,ArrayRef<CallLowering::ArgInfo> Args)360 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
361 const CallLowering::ArgInfo &Result,
362 ArrayRef<CallLowering::ArgInfo> Args) {
363 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
364 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
365 const char *Name = TLI.getLibcallName(Libcall);
366
367 CallLowering::CallLoweringInfo Info;
368 Info.CallConv = TLI.getLibcallCallingConv(Libcall);
369 Info.Callee = MachineOperand::CreateES(Name);
370 Info.OrigRet = Result;
371 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
372 if (!CLI.lowerCall(MIRBuilder, Info))
373 return LegalizerHelper::UnableToLegalize;
374
375 return LegalizerHelper::Legalized;
376 }
377
378 // Useful for libcalls where all operands have the same type.
379 static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,unsigned Size,Type * OpType)380 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
381 Type *OpType) {
382 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
383
384 SmallVector<CallLowering::ArgInfo, 3> Args;
385 for (unsigned i = 1; i < MI.getNumOperands(); i++)
386 Args.push_back({MI.getOperand(i).getReg(), OpType});
387 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
388 Args);
389 }
390
391 LegalizerHelper::LegalizeResult
createMemLibcall(MachineIRBuilder & MIRBuilder,MachineRegisterInfo & MRI,MachineInstr & MI)392 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
393 MachineInstr &MI) {
394 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
395 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
396
397 SmallVector<CallLowering::ArgInfo, 3> Args;
398 // Add all the args, except for the last which is an imm denoting 'tail'.
399 for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
400 Register Reg = MI.getOperand(i).getReg();
401
402 // Need derive an IR type for call lowering.
403 LLT OpLLT = MRI.getType(Reg);
404 Type *OpTy = nullptr;
405 if (OpLLT.isPointer())
406 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
407 else
408 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
409 Args.push_back({Reg, OpTy});
410 }
411
412 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
413 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
414 Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
415 RTLIB::Libcall RTLibcall;
416 switch (ID) {
417 case Intrinsic::memcpy:
418 RTLibcall = RTLIB::MEMCPY;
419 break;
420 case Intrinsic::memset:
421 RTLibcall = RTLIB::MEMSET;
422 break;
423 case Intrinsic::memmove:
424 RTLibcall = RTLIB::MEMMOVE;
425 break;
426 default:
427 return LegalizerHelper::UnableToLegalize;
428 }
429 const char *Name = TLI.getLibcallName(RTLibcall);
430
431 MIRBuilder.setInstr(MI);
432
433 CallLowering::CallLoweringInfo Info;
434 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
435 Info.Callee = MachineOperand::CreateES(Name);
436 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
437 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
438 isLibCallInTailPosition(MI);
439
440 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
441 if (!CLI.lowerCall(MIRBuilder, Info))
442 return LegalizerHelper::UnableToLegalize;
443
444 if (Info.LoweredTailCall) {
445 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
446 // We must have a return following the call to get past
447 // isLibCallInTailPosition.
448 assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
449 "Expected instr following MI to be a return?");
450
451 // We lowered a tail call, so the call is now the return from the block.
452 // Delete the old return.
453 MI.getNextNode()->eraseFromParent();
454 }
455
456 return LegalizerHelper::Legalized;
457 }
458
getConvRTLibDesc(unsigned Opcode,Type * ToType,Type * FromType)459 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
460 Type *FromType) {
461 auto ToMVT = MVT::getVT(ToType);
462 auto FromMVT = MVT::getVT(FromType);
463
464 switch (Opcode) {
465 case TargetOpcode::G_FPEXT:
466 return RTLIB::getFPEXT(FromMVT, ToMVT);
467 case TargetOpcode::G_FPTRUNC:
468 return RTLIB::getFPROUND(FromMVT, ToMVT);
469 case TargetOpcode::G_FPTOSI:
470 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
471 case TargetOpcode::G_FPTOUI:
472 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
473 case TargetOpcode::G_SITOFP:
474 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
475 case TargetOpcode::G_UITOFP:
476 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
477 }
478 llvm_unreachable("Unsupported libcall function");
479 }
480
481 static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,Type * ToType,Type * FromType)482 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
483 Type *FromType) {
484 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
485 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
486 {{MI.getOperand(1).getReg(), FromType}});
487 }
488
489 LegalizerHelper::LegalizeResult
libcall(MachineInstr & MI)490 LegalizerHelper::libcall(MachineInstr &MI) {
491 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
492 unsigned Size = LLTy.getSizeInBits();
493 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
494
495 MIRBuilder.setInstr(MI);
496
497 switch (MI.getOpcode()) {
498 default:
499 return UnableToLegalize;
500 case TargetOpcode::G_SDIV:
501 case TargetOpcode::G_UDIV:
502 case TargetOpcode::G_SREM:
503 case TargetOpcode::G_UREM:
504 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
505 Type *HLTy = IntegerType::get(Ctx, Size);
506 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
507 if (Status != Legalized)
508 return Status;
509 break;
510 }
511 case TargetOpcode::G_FADD:
512 case TargetOpcode::G_FSUB:
513 case TargetOpcode::G_FMUL:
514 case TargetOpcode::G_FDIV:
515 case TargetOpcode::G_FMA:
516 case TargetOpcode::G_FPOW:
517 case TargetOpcode::G_FREM:
518 case TargetOpcode::G_FCOS:
519 case TargetOpcode::G_FSIN:
520 case TargetOpcode::G_FLOG10:
521 case TargetOpcode::G_FLOG:
522 case TargetOpcode::G_FLOG2:
523 case TargetOpcode::G_FEXP:
524 case TargetOpcode::G_FEXP2:
525 case TargetOpcode::G_FCEIL:
526 case TargetOpcode::G_FFLOOR: {
527 if (Size > 64) {
528 LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
529 return UnableToLegalize;
530 }
531 Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
532 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
533 if (Status != Legalized)
534 return Status;
535 break;
536 }
537 case TargetOpcode::G_FPEXT: {
538 // FIXME: Support other floating point types (half, fp128 etc)
539 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
540 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
541 if (ToSize != 64 || FromSize != 32)
542 return UnableToLegalize;
543 LegalizeResult Status = conversionLibcall(
544 MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
545 if (Status != Legalized)
546 return Status;
547 break;
548 }
549 case TargetOpcode::G_FPTRUNC: {
550 // FIXME: Support other floating point types (half, fp128 etc)
551 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
552 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
553 if (ToSize != 32 || FromSize != 64)
554 return UnableToLegalize;
555 LegalizeResult Status = conversionLibcall(
556 MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
557 if (Status != Legalized)
558 return Status;
559 break;
560 }
561 case TargetOpcode::G_FPTOSI:
562 case TargetOpcode::G_FPTOUI: {
563 // FIXME: Support other types
564 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
565 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
566 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
567 return UnableToLegalize;
568 LegalizeResult Status = conversionLibcall(
569 MI, MIRBuilder,
570 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
571 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
572 if (Status != Legalized)
573 return Status;
574 break;
575 }
576 case TargetOpcode::G_SITOFP:
577 case TargetOpcode::G_UITOFP: {
578 // FIXME: Support other types
579 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
580 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
581 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
582 return UnableToLegalize;
583 LegalizeResult Status = conversionLibcall(
584 MI, MIRBuilder,
585 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
586 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
587 if (Status != Legalized)
588 return Status;
589 break;
590 }
591 }
592
593 MI.eraseFromParent();
594 return Legalized;
595 }
596
narrowScalar(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)597 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
598 unsigned TypeIdx,
599 LLT NarrowTy) {
600 MIRBuilder.setInstr(MI);
601
602 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
603 uint64_t NarrowSize = NarrowTy.getSizeInBits();
604
605 switch (MI.getOpcode()) {
606 default:
607 return UnableToLegalize;
608 case TargetOpcode::G_IMPLICIT_DEF: {
609 // FIXME: add support for when SizeOp0 isn't an exact multiple of
610 // NarrowSize.
611 if (SizeOp0 % NarrowSize != 0)
612 return UnableToLegalize;
613 int NumParts = SizeOp0 / NarrowSize;
614
615 SmallVector<Register, 2> DstRegs;
616 for (int i = 0; i < NumParts; ++i)
617 DstRegs.push_back(
618 MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
619
620 Register DstReg = MI.getOperand(0).getReg();
621 if(MRI.getType(DstReg).isVector())
622 MIRBuilder.buildBuildVector(DstReg, DstRegs);
623 else
624 MIRBuilder.buildMerge(DstReg, DstRegs);
625 MI.eraseFromParent();
626 return Legalized;
627 }
628 case TargetOpcode::G_CONSTANT: {
629 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
630 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
631 unsigned TotalSize = Ty.getSizeInBits();
632 unsigned NarrowSize = NarrowTy.getSizeInBits();
633 int NumParts = TotalSize / NarrowSize;
634
635 SmallVector<Register, 4> PartRegs;
636 for (int I = 0; I != NumParts; ++I) {
637 unsigned Offset = I * NarrowSize;
638 auto K = MIRBuilder.buildConstant(NarrowTy,
639 Val.lshr(Offset).trunc(NarrowSize));
640 PartRegs.push_back(K.getReg(0));
641 }
642
643 LLT LeftoverTy;
644 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
645 SmallVector<Register, 1> LeftoverRegs;
646 if (LeftoverBits != 0) {
647 LeftoverTy = LLT::scalar(LeftoverBits);
648 auto K = MIRBuilder.buildConstant(
649 LeftoverTy,
650 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
651 LeftoverRegs.push_back(K.getReg(0));
652 }
653
654 insertParts(MI.getOperand(0).getReg(),
655 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
656
657 MI.eraseFromParent();
658 return Legalized;
659 }
660 case TargetOpcode::G_SEXT: {
661 if (TypeIdx != 0)
662 return UnableToLegalize;
663
664 Register SrcReg = MI.getOperand(1).getReg();
665 LLT SrcTy = MRI.getType(SrcReg);
666
667 // FIXME: support the general case where the requested NarrowTy may not be
668 // the same as the source type. E.g. s128 = sext(s32)
669 if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
670 SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
671 LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
672 return UnableToLegalize;
673 }
674
675 // Shift the sign bit of the low register through the high register.
676 auto ShiftAmt =
677 MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
678 auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
679 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
680 MI.eraseFromParent();
681 return Legalized;
682 }
683 case TargetOpcode::G_ZEXT: {
684 if (TypeIdx != 0)
685 return UnableToLegalize;
686
687 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
688 uint64_t SizeOp1 = SrcTy.getSizeInBits();
689 if (SizeOp0 % SizeOp1 != 0)
690 return UnableToLegalize;
691
692 // Generate a merge where the bottom bits are taken from the source, and
693 // zero everything else.
694 Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
695 unsigned NumParts = SizeOp0 / SizeOp1;
696 SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
697 for (unsigned Part = 1; Part < NumParts; ++Part)
698 Srcs.push_back(ZeroReg);
699 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
700 MI.eraseFromParent();
701 return Legalized;
702 }
703 case TargetOpcode::G_TRUNC: {
704 if (TypeIdx != 1)
705 return UnableToLegalize;
706
707 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
708 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
709 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
710 return UnableToLegalize;
711 }
712
713 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
714 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
715 MI.eraseFromParent();
716 return Legalized;
717 }
718
719 case TargetOpcode::G_ADD: {
720 // FIXME: add support for when SizeOp0 isn't an exact multiple of
721 // NarrowSize.
722 if (SizeOp0 % NarrowSize != 0)
723 return UnableToLegalize;
724 // Expand in terms of carry-setting/consuming G_ADDE instructions.
725 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
726
727 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
728 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
729 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
730
731 Register CarryIn;
732 for (int i = 0; i < NumParts; ++i) {
733 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
734 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
735
736 if (i == 0)
737 MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
738 else {
739 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
740 Src2Regs[i], CarryIn);
741 }
742
743 DstRegs.push_back(DstReg);
744 CarryIn = CarryOut;
745 }
746 Register DstReg = MI.getOperand(0).getReg();
747 if(MRI.getType(DstReg).isVector())
748 MIRBuilder.buildBuildVector(DstReg, DstRegs);
749 else
750 MIRBuilder.buildMerge(DstReg, DstRegs);
751 MI.eraseFromParent();
752 return Legalized;
753 }
754 case TargetOpcode::G_SUB: {
755 // FIXME: add support for when SizeOp0 isn't an exact multiple of
756 // NarrowSize.
757 if (SizeOp0 % NarrowSize != 0)
758 return UnableToLegalize;
759
760 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
761
762 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
763 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
764 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
765
766 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
767 Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
768 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
769 {Src1Regs[0], Src2Regs[0]});
770 DstRegs.push_back(DstReg);
771 Register BorrowIn = BorrowOut;
772 for (int i = 1; i < NumParts; ++i) {
773 DstReg = MRI.createGenericVirtualRegister(NarrowTy);
774 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
775
776 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
777 {Src1Regs[i], Src2Regs[i], BorrowIn});
778
779 DstRegs.push_back(DstReg);
780 BorrowIn = BorrowOut;
781 }
782 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
783 MI.eraseFromParent();
784 return Legalized;
785 }
786 case TargetOpcode::G_MUL:
787 case TargetOpcode::G_UMULH:
788 return narrowScalarMul(MI, NarrowTy);
789 case TargetOpcode::G_EXTRACT:
790 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
791 case TargetOpcode::G_INSERT:
792 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
793 case TargetOpcode::G_LOAD: {
794 const auto &MMO = **MI.memoperands_begin();
795 Register DstReg = MI.getOperand(0).getReg();
796 LLT DstTy = MRI.getType(DstReg);
797 if (DstTy.isVector())
798 return UnableToLegalize;
799
800 if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
801 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
802 auto &MMO = **MI.memoperands_begin();
803 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
804 MIRBuilder.buildAnyExt(DstReg, TmpReg);
805 MI.eraseFromParent();
806 return Legalized;
807 }
808
809 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
810 }
811 case TargetOpcode::G_ZEXTLOAD:
812 case TargetOpcode::G_SEXTLOAD: {
813 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
814 Register DstReg = MI.getOperand(0).getReg();
815 Register PtrReg = MI.getOperand(1).getReg();
816
817 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
818 auto &MMO = **MI.memoperands_begin();
819 if (MMO.getSizeInBits() == NarrowSize) {
820 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
821 } else {
822 unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
823 : TargetOpcode::G_SEXTLOAD;
824 MIRBuilder.buildInstr(ExtLoad)
825 .addDef(TmpReg)
826 .addUse(PtrReg)
827 .addMemOperand(&MMO);
828 }
829
830 if (ZExt)
831 MIRBuilder.buildZExt(DstReg, TmpReg);
832 else
833 MIRBuilder.buildSExt(DstReg, TmpReg);
834
835 MI.eraseFromParent();
836 return Legalized;
837 }
838 case TargetOpcode::G_STORE: {
839 const auto &MMO = **MI.memoperands_begin();
840
841 Register SrcReg = MI.getOperand(0).getReg();
842 LLT SrcTy = MRI.getType(SrcReg);
843 if (SrcTy.isVector())
844 return UnableToLegalize;
845
846 int NumParts = SizeOp0 / NarrowSize;
847 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
848 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
849 if (SrcTy.isVector() && LeftoverBits != 0)
850 return UnableToLegalize;
851
852 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
853 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
854 auto &MMO = **MI.memoperands_begin();
855 MIRBuilder.buildTrunc(TmpReg, SrcReg);
856 MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
857 MI.eraseFromParent();
858 return Legalized;
859 }
860
861 return reduceLoadStoreWidth(MI, 0, NarrowTy);
862 }
863 case TargetOpcode::G_SELECT:
864 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
865 case TargetOpcode::G_AND:
866 case TargetOpcode::G_OR:
867 case TargetOpcode::G_XOR: {
868 // Legalize bitwise operation:
869 // A = BinOp<Ty> B, C
870 // into:
871 // B1, ..., BN = G_UNMERGE_VALUES B
872 // C1, ..., CN = G_UNMERGE_VALUES C
873 // A1 = BinOp<Ty/N> B1, C2
874 // ...
875 // AN = BinOp<Ty/N> BN, CN
876 // A = G_MERGE_VALUES A1, ..., AN
877 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
878 }
879 case TargetOpcode::G_SHL:
880 case TargetOpcode::G_LSHR:
881 case TargetOpcode::G_ASHR:
882 return narrowScalarShift(MI, TypeIdx, NarrowTy);
883 case TargetOpcode::G_CTLZ:
884 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
885 case TargetOpcode::G_CTTZ:
886 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
887 case TargetOpcode::G_CTPOP:
888 if (TypeIdx != 0)
889 return UnableToLegalize; // TODO
890
891 Observer.changingInstr(MI);
892 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
893 Observer.changedInstr(MI);
894 return Legalized;
895 case TargetOpcode::G_INTTOPTR:
896 if (TypeIdx != 1)
897 return UnableToLegalize;
898
899 Observer.changingInstr(MI);
900 narrowScalarSrc(MI, NarrowTy, 1);
901 Observer.changedInstr(MI);
902 return Legalized;
903 case TargetOpcode::G_PTRTOINT:
904 if (TypeIdx != 0)
905 return UnableToLegalize;
906
907 Observer.changingInstr(MI);
908 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
909 Observer.changedInstr(MI);
910 return Legalized;
911 case TargetOpcode::G_PHI: {
912 unsigned NumParts = SizeOp0 / NarrowSize;
913 SmallVector<Register, 2> DstRegs;
914 SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
915 DstRegs.resize(NumParts);
916 SrcRegs.resize(MI.getNumOperands() / 2);
917 Observer.changingInstr(MI);
918 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
919 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
920 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
921 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
922 SrcRegs[i / 2]);
923 }
924 MachineBasicBlock &MBB = *MI.getParent();
925 MIRBuilder.setInsertPt(MBB, MI);
926 for (unsigned i = 0; i < NumParts; ++i) {
927 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
928 MachineInstrBuilder MIB =
929 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
930 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
931 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
932 }
933 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
934 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
935 Observer.changedInstr(MI);
936 MI.eraseFromParent();
937 return Legalized;
938 }
939 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
940 case TargetOpcode::G_INSERT_VECTOR_ELT: {
941 if (TypeIdx != 2)
942 return UnableToLegalize;
943
944 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
945 Observer.changingInstr(MI);
946 narrowScalarSrc(MI, NarrowTy, OpIdx);
947 Observer.changedInstr(MI);
948 return Legalized;
949 }
950 case TargetOpcode::G_ICMP: {
951 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
952 if (NarrowSize * 2 != SrcSize)
953 return UnableToLegalize;
954
955 Observer.changingInstr(MI);
956 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
957 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
958 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
959
960 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
961 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
962 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
963
964 CmpInst::Predicate Pred =
965 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
966 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
967
968 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
969 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
970 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
971 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
972 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
973 MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
974 } else {
975 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
976 MachineInstrBuilder CmpHEQ =
977 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
978 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
979 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
980 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
981 }
982 Observer.changedInstr(MI);
983 MI.eraseFromParent();
984 return Legalized;
985 }
986 case TargetOpcode::G_SEXT_INREG: {
987 if (TypeIdx != 0)
988 return UnableToLegalize;
989
990 if (!MI.getOperand(2).isImm())
991 return UnableToLegalize;
992 int64_t SizeInBits = MI.getOperand(2).getImm();
993
994 // So long as the new type has more bits than the bits we're extending we
995 // don't need to break it apart.
996 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
997 Observer.changingInstr(MI);
998 // We don't lose any non-extension bits by truncating the src and
999 // sign-extending the dst.
1000 MachineOperand &MO1 = MI.getOperand(1);
1001 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
1002 MO1.setReg(TruncMIB->getOperand(0).getReg());
1003
1004 MachineOperand &MO2 = MI.getOperand(0);
1005 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1006 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1007 MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
1008 MO2.setReg(DstExt);
1009 Observer.changedInstr(MI);
1010 return Legalized;
1011 }
1012
1013 // Break it apart. Components below the extension point are unmodified. The
1014 // component containing the extension point becomes a narrower SEXT_INREG.
1015 // Components above it are ashr'd from the component containing the
1016 // extension point.
1017 if (SizeOp0 % NarrowSize != 0)
1018 return UnableToLegalize;
1019 int NumParts = SizeOp0 / NarrowSize;
1020
1021 // List the registers where the destination will be scattered.
1022 SmallVector<Register, 2> DstRegs;
1023 // List the registers where the source will be split.
1024 SmallVector<Register, 2> SrcRegs;
1025
1026 // Create all the temporary registers.
1027 for (int i = 0; i < NumParts; ++i) {
1028 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1029
1030 SrcRegs.push_back(SrcReg);
1031 }
1032
1033 // Explode the big arguments into smaller chunks.
1034 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
1035
1036 Register AshrCstReg =
1037 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1038 ->getOperand(0)
1039 .getReg();
1040 Register FullExtensionReg = 0;
1041 Register PartialExtensionReg = 0;
1042
1043 // Do the operation on each small part.
1044 for (int i = 0; i < NumParts; ++i) {
1045 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1046 DstRegs.push_back(SrcRegs[i]);
1047 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1048 assert(PartialExtensionReg &&
1049 "Expected to visit partial extension before full");
1050 if (FullExtensionReg) {
1051 DstRegs.push_back(FullExtensionReg);
1052 continue;
1053 }
1054 DstRegs.push_back(MIRBuilder
1055 .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
1056 {PartialExtensionReg, AshrCstReg})
1057 ->getOperand(0)
1058 .getReg());
1059 FullExtensionReg = DstRegs.back();
1060 } else {
1061 DstRegs.push_back(
1062 MIRBuilder
1063 .buildInstr(
1064 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1065 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1066 ->getOperand(0)
1067 .getReg());
1068 PartialExtensionReg = DstRegs.back();
1069 }
1070 }
1071
1072 // Gather the destination registers into the final destination.
1073 Register DstReg = MI.getOperand(0).getReg();
1074 MIRBuilder.buildMerge(DstReg, DstRegs);
1075 MI.eraseFromParent();
1076 return Legalized;
1077 }
1078 case TargetOpcode::G_BSWAP:
1079 case TargetOpcode::G_BITREVERSE: {
1080 if (SizeOp0 % NarrowSize != 0)
1081 return UnableToLegalize;
1082
1083 Observer.changingInstr(MI);
1084 SmallVector<Register, 2> SrcRegs, DstRegs;
1085 unsigned NumParts = SizeOp0 / NarrowSize;
1086 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1087
1088 for (unsigned i = 0; i < NumParts; ++i) {
1089 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1090 {SrcRegs[NumParts - 1 - i]});
1091 DstRegs.push_back(DstPart.getReg(0));
1092 }
1093
1094 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
1095
1096 Observer.changedInstr(MI);
1097 MI.eraseFromParent();
1098 return Legalized;
1099 }
1100 }
1101 }
1102
widenScalarSrc(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned ExtOpcode)1103 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1104 unsigned OpIdx, unsigned ExtOpcode) {
1105 MachineOperand &MO = MI.getOperand(OpIdx);
1106 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
1107 MO.setReg(ExtB->getOperand(0).getReg());
1108 }
1109
narrowScalarSrc(MachineInstr & MI,LLT NarrowTy,unsigned OpIdx)1110 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1111 unsigned OpIdx) {
1112 MachineOperand &MO = MI.getOperand(OpIdx);
1113 auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
1114 {MO.getReg()});
1115 MO.setReg(ExtB->getOperand(0).getReg());
1116 }
1117
widenScalarDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned TruncOpcode)1118 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1119 unsigned OpIdx, unsigned TruncOpcode) {
1120 MachineOperand &MO = MI.getOperand(OpIdx);
1121 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1122 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1123 MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
1124 MO.setReg(DstExt);
1125 }
1126
narrowScalarDst(MachineInstr & MI,LLT NarrowTy,unsigned OpIdx,unsigned ExtOpcode)1127 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1128 unsigned OpIdx, unsigned ExtOpcode) {
1129 MachineOperand &MO = MI.getOperand(OpIdx);
1130 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1131 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1132 MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
1133 MO.setReg(DstTrunc);
1134 }
1135
moreElementsVectorDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx)1136 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1137 unsigned OpIdx) {
1138 MachineOperand &MO = MI.getOperand(OpIdx);
1139 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1140 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1141 MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
1142 MO.setReg(DstExt);
1143 }
1144
moreElementsVectorSrc(MachineInstr & MI,LLT MoreTy,unsigned OpIdx)1145 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1146 unsigned OpIdx) {
1147 MachineOperand &MO = MI.getOperand(OpIdx);
1148
1149 LLT OldTy = MRI.getType(MO.getReg());
1150 unsigned OldElts = OldTy.getNumElements();
1151 unsigned NewElts = MoreTy.getNumElements();
1152
1153 unsigned NumParts = NewElts / OldElts;
1154
1155 // Use concat_vectors if the result is a multiple of the number of elements.
1156 if (NumParts * OldElts == NewElts) {
1157 SmallVector<Register, 8> Parts;
1158 Parts.push_back(MO.getReg());
1159
1160 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1161 for (unsigned I = 1; I != NumParts; ++I)
1162 Parts.push_back(ImpDef);
1163
1164 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1165 MO.setReg(Concat.getReg(0));
1166 return;
1167 }
1168
1169 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1170 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1171 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1172 MO.setReg(MoreReg);
1173 }
1174
1175 LegalizerHelper::LegalizeResult
widenScalarMergeValues(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1176 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1177 LLT WideTy) {
1178 if (TypeIdx != 1)
1179 return UnableToLegalize;
1180
1181 Register DstReg = MI.getOperand(0).getReg();
1182 LLT DstTy = MRI.getType(DstReg);
1183 if (DstTy.isVector())
1184 return UnableToLegalize;
1185
1186 Register Src1 = MI.getOperand(1).getReg();
1187 LLT SrcTy = MRI.getType(Src1);
1188 const int DstSize = DstTy.getSizeInBits();
1189 const int SrcSize = SrcTy.getSizeInBits();
1190 const int WideSize = WideTy.getSizeInBits();
1191 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1192
1193 unsigned NumOps = MI.getNumOperands();
1194 unsigned NumSrc = MI.getNumOperands() - 1;
1195 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1196
1197 if (WideSize >= DstSize) {
1198 // Directly pack the bits in the target type.
1199 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1200
1201 for (unsigned I = 2; I != NumOps; ++I) {
1202 const unsigned Offset = (I - 1) * PartSize;
1203
1204 Register SrcReg = MI.getOperand(I).getReg();
1205 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1206
1207 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1208
1209 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1210 MRI.createGenericVirtualRegister(WideTy);
1211
1212 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1213 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1214 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1215 ResultReg = NextResult;
1216 }
1217
1218 if (WideSize > DstSize)
1219 MIRBuilder.buildTrunc(DstReg, ResultReg);
1220 else if (DstTy.isPointer())
1221 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1222
1223 MI.eraseFromParent();
1224 return Legalized;
1225 }
1226
1227 // Unmerge the original values to the GCD type, and recombine to the next
1228 // multiple greater than the original type.
1229 //
1230 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1231 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1232 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1233 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1234 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1235 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1236 // %12:_(s12) = G_MERGE_VALUES %10, %11
1237 //
1238 // Padding with undef if necessary:
1239 //
1240 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1241 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1242 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1243 // %7:_(s2) = G_IMPLICIT_DEF
1244 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1245 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1246 // %10:_(s12) = G_MERGE_VALUES %8, %9
1247
1248 const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1249 LLT GCDTy = LLT::scalar(GCD);
1250
1251 SmallVector<Register, 8> Parts;
1252 SmallVector<Register, 8> NewMergeRegs;
1253 SmallVector<Register, 8> Unmerges;
1254 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1255
1256 // Decompose the original operands if they don't evenly divide.
1257 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1258 Register SrcReg = MI.getOperand(I).getReg();
1259 if (GCD == SrcSize) {
1260 Unmerges.push_back(SrcReg);
1261 } else {
1262 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1263 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1264 Unmerges.push_back(Unmerge.getReg(J));
1265 }
1266 }
1267
1268 // Pad with undef to the next size that is a multiple of the requested size.
1269 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1270 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1271 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1272 Unmerges.push_back(UndefReg);
1273 }
1274
1275 const int PartsPerGCD = WideSize / GCD;
1276
1277 // Build merges of each piece.
1278 ArrayRef<Register> Slicer(Unmerges);
1279 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1280 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1281 NewMergeRegs.push_back(Merge.getReg(0));
1282 }
1283
1284 // A truncate may be necessary if the requested type doesn't evenly divide the
1285 // original result type.
1286 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1287 MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1288 } else {
1289 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1290 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1291 }
1292
1293 MI.eraseFromParent();
1294 return Legalized;
1295 }
1296
1297 LegalizerHelper::LegalizeResult
widenScalarUnmergeValues(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1298 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1299 LLT WideTy) {
1300 if (TypeIdx != 0)
1301 return UnableToLegalize;
1302
1303 unsigned NumDst = MI.getNumOperands() - 1;
1304 Register SrcReg = MI.getOperand(NumDst).getReg();
1305 LLT SrcTy = MRI.getType(SrcReg);
1306 if (!SrcTy.isScalar())
1307 return UnableToLegalize;
1308
1309 Register Dst0Reg = MI.getOperand(0).getReg();
1310 LLT DstTy = MRI.getType(Dst0Reg);
1311 if (!DstTy.isScalar())
1312 return UnableToLegalize;
1313
1314 unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1315 LLT NewSrcTy = LLT::scalar(NewSrcSize);
1316 unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1317
1318 auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1319
1320 for (unsigned I = 1; I != NumDst; ++I) {
1321 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1322 auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1323 WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1324 }
1325
1326 Observer.changingInstr(MI);
1327
1328 MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1329 for (unsigned I = 0; I != NumDst; ++I)
1330 widenScalarDst(MI, WideTy, I);
1331
1332 Observer.changedInstr(MI);
1333
1334 return Legalized;
1335 }
1336
1337 LegalizerHelper::LegalizeResult
widenScalarExtract(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1338 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1339 LLT WideTy) {
1340 Register DstReg = MI.getOperand(0).getReg();
1341 Register SrcReg = MI.getOperand(1).getReg();
1342 LLT SrcTy = MRI.getType(SrcReg);
1343
1344 LLT DstTy = MRI.getType(DstReg);
1345 unsigned Offset = MI.getOperand(2).getImm();
1346
1347 if (TypeIdx == 0) {
1348 if (SrcTy.isVector() || DstTy.isVector())
1349 return UnableToLegalize;
1350
1351 SrcOp Src(SrcReg);
1352 if (SrcTy.isPointer()) {
1353 // Extracts from pointers can be handled only if they are really just
1354 // simple integers.
1355 const DataLayout &DL = MIRBuilder.getDataLayout();
1356 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1357 return UnableToLegalize;
1358
1359 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1360 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1361 SrcTy = SrcAsIntTy;
1362 }
1363
1364 if (DstTy.isPointer())
1365 return UnableToLegalize;
1366
1367 if (Offset == 0) {
1368 // Avoid a shift in the degenerate case.
1369 MIRBuilder.buildTrunc(DstReg,
1370 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1371 MI.eraseFromParent();
1372 return Legalized;
1373 }
1374
1375 // Do a shift in the source type.
1376 LLT ShiftTy = SrcTy;
1377 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1378 Src = MIRBuilder.buildAnyExt(WideTy, Src);
1379 ShiftTy = WideTy;
1380 } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1381 return UnableToLegalize;
1382
1383 auto LShr = MIRBuilder.buildLShr(
1384 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1385 MIRBuilder.buildTrunc(DstReg, LShr);
1386 MI.eraseFromParent();
1387 return Legalized;
1388 }
1389
1390 if (SrcTy.isScalar()) {
1391 Observer.changingInstr(MI);
1392 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1393 Observer.changedInstr(MI);
1394 return Legalized;
1395 }
1396
1397 if (!SrcTy.isVector())
1398 return UnableToLegalize;
1399
1400 if (DstTy != SrcTy.getElementType())
1401 return UnableToLegalize;
1402
1403 if (Offset % SrcTy.getScalarSizeInBits() != 0)
1404 return UnableToLegalize;
1405
1406 Observer.changingInstr(MI);
1407 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1408
1409 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1410 Offset);
1411 widenScalarDst(MI, WideTy.getScalarType(), 0);
1412 Observer.changedInstr(MI);
1413 return Legalized;
1414 }
1415
1416 LegalizerHelper::LegalizeResult
widenScalarInsert(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1417 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1418 LLT WideTy) {
1419 if (TypeIdx != 0)
1420 return UnableToLegalize;
1421 Observer.changingInstr(MI);
1422 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1423 widenScalarDst(MI, WideTy);
1424 Observer.changedInstr(MI);
1425 return Legalized;
1426 }
1427
1428 LegalizerHelper::LegalizeResult
widenScalar(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1429 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1430 MIRBuilder.setInstr(MI);
1431
1432 switch (MI.getOpcode()) {
1433 default:
1434 return UnableToLegalize;
1435 case TargetOpcode::G_EXTRACT:
1436 return widenScalarExtract(MI, TypeIdx, WideTy);
1437 case TargetOpcode::G_INSERT:
1438 return widenScalarInsert(MI, TypeIdx, WideTy);
1439 case TargetOpcode::G_MERGE_VALUES:
1440 return widenScalarMergeValues(MI, TypeIdx, WideTy);
1441 case TargetOpcode::G_UNMERGE_VALUES:
1442 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1443 case TargetOpcode::G_UADDO:
1444 case TargetOpcode::G_USUBO: {
1445 if (TypeIdx == 1)
1446 return UnableToLegalize; // TODO
1447 auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1448 {MI.getOperand(2).getReg()});
1449 auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1450 {MI.getOperand(3).getReg()});
1451 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1452 ? TargetOpcode::G_ADD
1453 : TargetOpcode::G_SUB;
1454 // Do the arithmetic in the larger type.
1455 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1456 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1457 APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
1458 auto AndOp = MIRBuilder.buildInstr(
1459 TargetOpcode::G_AND, {WideTy},
1460 {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1461 // There is no overflow if the AndOp is the same as NewOp.
1462 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
1463 AndOp);
1464 // Now trunc the NewOp to the original result.
1465 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1466 MI.eraseFromParent();
1467 return Legalized;
1468 }
1469 case TargetOpcode::G_CTTZ:
1470 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1471 case TargetOpcode::G_CTLZ:
1472 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1473 case TargetOpcode::G_CTPOP: {
1474 if (TypeIdx == 0) {
1475 Observer.changingInstr(MI);
1476 widenScalarDst(MI, WideTy, 0);
1477 Observer.changedInstr(MI);
1478 return Legalized;
1479 }
1480
1481 Register SrcReg = MI.getOperand(1).getReg();
1482
1483 // First ZEXT the input.
1484 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1485 LLT CurTy = MRI.getType(SrcReg);
1486 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1487 // The count is the same in the larger type except if the original
1488 // value was zero. This can be handled by setting the bit just off
1489 // the top of the original type.
1490 auto TopBit =
1491 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1492 MIBSrc = MIRBuilder.buildOr(
1493 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1494 }
1495
1496 // Perform the operation at the larger size.
1497 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1498 // This is already the correct result for CTPOP and CTTZs
1499 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1500 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1501 // The correct result is NewOp - (Difference in widety and current ty).
1502 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1503 MIBNewOp = MIRBuilder.buildInstr(
1504 TargetOpcode::G_SUB, {WideTy},
1505 {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1506 }
1507
1508 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1509 MI.eraseFromParent();
1510 return Legalized;
1511 }
1512 case TargetOpcode::G_BSWAP: {
1513 Observer.changingInstr(MI);
1514 Register DstReg = MI.getOperand(0).getReg();
1515
1516 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1517 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1518 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1519 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1520
1521 MI.getOperand(0).setReg(DstExt);
1522
1523 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1524
1525 LLT Ty = MRI.getType(DstReg);
1526 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1527 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1528 MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1529 .addDef(ShrReg)
1530 .addUse(DstExt)
1531 .addUse(ShiftAmtReg);
1532
1533 MIRBuilder.buildTrunc(DstReg, ShrReg);
1534 Observer.changedInstr(MI);
1535 return Legalized;
1536 }
1537 case TargetOpcode::G_BITREVERSE: {
1538 Observer.changingInstr(MI);
1539
1540 Register DstReg = MI.getOperand(0).getReg();
1541 LLT Ty = MRI.getType(DstReg);
1542 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1543
1544 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1545 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1546 MI.getOperand(0).setReg(DstExt);
1547 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1548
1549 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1550 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1551 MIRBuilder.buildTrunc(DstReg, Shift);
1552 Observer.changedInstr(MI);
1553 return Legalized;
1554 }
1555 case TargetOpcode::G_ADD:
1556 case TargetOpcode::G_AND:
1557 case TargetOpcode::G_MUL:
1558 case TargetOpcode::G_OR:
1559 case TargetOpcode::G_XOR:
1560 case TargetOpcode::G_SUB:
1561 // Perform operation at larger width (any extension is fines here, high bits
1562 // don't affect the result) and then truncate the result back to the
1563 // original type.
1564 Observer.changingInstr(MI);
1565 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1566 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1567 widenScalarDst(MI, WideTy);
1568 Observer.changedInstr(MI);
1569 return Legalized;
1570
1571 case TargetOpcode::G_SHL:
1572 Observer.changingInstr(MI);
1573
1574 if (TypeIdx == 0) {
1575 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1576 widenScalarDst(MI, WideTy);
1577 } else {
1578 assert(TypeIdx == 1);
1579 // The "number of bits to shift" operand must preserve its value as an
1580 // unsigned integer:
1581 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1582 }
1583
1584 Observer.changedInstr(MI);
1585 return Legalized;
1586
1587 case TargetOpcode::G_SDIV:
1588 case TargetOpcode::G_SREM:
1589 case TargetOpcode::G_SMIN:
1590 case TargetOpcode::G_SMAX:
1591 Observer.changingInstr(MI);
1592 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1593 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1594 widenScalarDst(MI, WideTy);
1595 Observer.changedInstr(MI);
1596 return Legalized;
1597
1598 case TargetOpcode::G_ASHR:
1599 case TargetOpcode::G_LSHR:
1600 Observer.changingInstr(MI);
1601
1602 if (TypeIdx == 0) {
1603 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1604 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1605
1606 widenScalarSrc(MI, WideTy, 1, CvtOp);
1607 widenScalarDst(MI, WideTy);
1608 } else {
1609 assert(TypeIdx == 1);
1610 // The "number of bits to shift" operand must preserve its value as an
1611 // unsigned integer:
1612 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1613 }
1614
1615 Observer.changedInstr(MI);
1616 return Legalized;
1617 case TargetOpcode::G_UDIV:
1618 case TargetOpcode::G_UREM:
1619 case TargetOpcode::G_UMIN:
1620 case TargetOpcode::G_UMAX:
1621 Observer.changingInstr(MI);
1622 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1623 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1624 widenScalarDst(MI, WideTy);
1625 Observer.changedInstr(MI);
1626 return Legalized;
1627
1628 case TargetOpcode::G_SELECT:
1629 Observer.changingInstr(MI);
1630 if (TypeIdx == 0) {
1631 // Perform operation at larger width (any extension is fine here, high
1632 // bits don't affect the result) and then truncate the result back to the
1633 // original type.
1634 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1635 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1636 widenScalarDst(MI, WideTy);
1637 } else {
1638 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1639 // Explicit extension is required here since high bits affect the result.
1640 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1641 }
1642 Observer.changedInstr(MI);
1643 return Legalized;
1644
1645 case TargetOpcode::G_FPTOSI:
1646 case TargetOpcode::G_FPTOUI:
1647 Observer.changingInstr(MI);
1648
1649 if (TypeIdx == 0)
1650 widenScalarDst(MI, WideTy);
1651 else
1652 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1653
1654 Observer.changedInstr(MI);
1655 return Legalized;
1656 case TargetOpcode::G_SITOFP:
1657 if (TypeIdx != 1)
1658 return UnableToLegalize;
1659 Observer.changingInstr(MI);
1660 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1661 Observer.changedInstr(MI);
1662 return Legalized;
1663
1664 case TargetOpcode::G_UITOFP:
1665 if (TypeIdx != 1)
1666 return UnableToLegalize;
1667 Observer.changingInstr(MI);
1668 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1669 Observer.changedInstr(MI);
1670 return Legalized;
1671
1672 case TargetOpcode::G_LOAD:
1673 case TargetOpcode::G_SEXTLOAD:
1674 case TargetOpcode::G_ZEXTLOAD:
1675 Observer.changingInstr(MI);
1676 widenScalarDst(MI, WideTy);
1677 Observer.changedInstr(MI);
1678 return Legalized;
1679
1680 case TargetOpcode::G_STORE: {
1681 if (TypeIdx != 0)
1682 return UnableToLegalize;
1683
1684 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1685 if (!isPowerOf2_32(Ty.getSizeInBits()))
1686 return UnableToLegalize;
1687
1688 Observer.changingInstr(MI);
1689
1690 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1691 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1692 widenScalarSrc(MI, WideTy, 0, ExtType);
1693
1694 Observer.changedInstr(MI);
1695 return Legalized;
1696 }
1697 case TargetOpcode::G_CONSTANT: {
1698 MachineOperand &SrcMO = MI.getOperand(1);
1699 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1700 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
1701 MRI.getType(MI.getOperand(0).getReg()));
1702 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
1703 ExtOpc == TargetOpcode::G_ANYEXT) &&
1704 "Illegal Extend");
1705 const APInt &SrcVal = SrcMO.getCImm()->getValue();
1706 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
1707 ? SrcVal.sext(WideTy.getSizeInBits())
1708 : SrcVal.zext(WideTy.getSizeInBits());
1709 Observer.changingInstr(MI);
1710 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1711
1712 widenScalarDst(MI, WideTy);
1713 Observer.changedInstr(MI);
1714 return Legalized;
1715 }
1716 case TargetOpcode::G_FCONSTANT: {
1717 MachineOperand &SrcMO = MI.getOperand(1);
1718 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1719 APFloat Val = SrcMO.getFPImm()->getValueAPF();
1720 bool LosesInfo;
1721 switch (WideTy.getSizeInBits()) {
1722 case 32:
1723 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1724 &LosesInfo);
1725 break;
1726 case 64:
1727 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1728 &LosesInfo);
1729 break;
1730 default:
1731 return UnableToLegalize;
1732 }
1733
1734 assert(!LosesInfo && "extend should always be lossless");
1735
1736 Observer.changingInstr(MI);
1737 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1738
1739 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1740 Observer.changedInstr(MI);
1741 return Legalized;
1742 }
1743 case TargetOpcode::G_IMPLICIT_DEF: {
1744 Observer.changingInstr(MI);
1745 widenScalarDst(MI, WideTy);
1746 Observer.changedInstr(MI);
1747 return Legalized;
1748 }
1749 case TargetOpcode::G_BRCOND:
1750 Observer.changingInstr(MI);
1751 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1752 Observer.changedInstr(MI);
1753 return Legalized;
1754
1755 case TargetOpcode::G_FCMP:
1756 Observer.changingInstr(MI);
1757 if (TypeIdx == 0)
1758 widenScalarDst(MI, WideTy);
1759 else {
1760 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1761 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1762 }
1763 Observer.changedInstr(MI);
1764 return Legalized;
1765
1766 case TargetOpcode::G_ICMP:
1767 Observer.changingInstr(MI);
1768 if (TypeIdx == 0)
1769 widenScalarDst(MI, WideTy);
1770 else {
1771 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1772 MI.getOperand(1).getPredicate()))
1773 ? TargetOpcode::G_SEXT
1774 : TargetOpcode::G_ZEXT;
1775 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1776 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1777 }
1778 Observer.changedInstr(MI);
1779 return Legalized;
1780
1781 case TargetOpcode::G_PTR_ADD:
1782 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
1783 Observer.changingInstr(MI);
1784 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1785 Observer.changedInstr(MI);
1786 return Legalized;
1787
1788 case TargetOpcode::G_PHI: {
1789 assert(TypeIdx == 0 && "Expecting only Idx 0");
1790
1791 Observer.changingInstr(MI);
1792 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1793 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1794 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1795 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1796 }
1797
1798 MachineBasicBlock &MBB = *MI.getParent();
1799 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1800 widenScalarDst(MI, WideTy);
1801 Observer.changedInstr(MI);
1802 return Legalized;
1803 }
1804 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1805 if (TypeIdx == 0) {
1806 Register VecReg = MI.getOperand(1).getReg();
1807 LLT VecTy = MRI.getType(VecReg);
1808 Observer.changingInstr(MI);
1809
1810 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1811 WideTy.getSizeInBits()),
1812 1, TargetOpcode::G_SEXT);
1813
1814 widenScalarDst(MI, WideTy, 0);
1815 Observer.changedInstr(MI);
1816 return Legalized;
1817 }
1818
1819 if (TypeIdx != 2)
1820 return UnableToLegalize;
1821 Observer.changingInstr(MI);
1822 // TODO: Probably should be zext
1823 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1824 Observer.changedInstr(MI);
1825 return Legalized;
1826 }
1827 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1828 if (TypeIdx == 1) {
1829 Observer.changingInstr(MI);
1830
1831 Register VecReg = MI.getOperand(1).getReg();
1832 LLT VecTy = MRI.getType(VecReg);
1833 LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
1834
1835 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
1836 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1837 widenScalarDst(MI, WideVecTy, 0);
1838 Observer.changedInstr(MI);
1839 return Legalized;
1840 }
1841
1842 if (TypeIdx == 2) {
1843 Observer.changingInstr(MI);
1844 // TODO: Probably should be zext
1845 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
1846 Observer.changedInstr(MI);
1847 }
1848
1849 return Legalized;
1850 }
1851 case TargetOpcode::G_FADD:
1852 case TargetOpcode::G_FMUL:
1853 case TargetOpcode::G_FSUB:
1854 case TargetOpcode::G_FMA:
1855 case TargetOpcode::G_FMAD:
1856 case TargetOpcode::G_FNEG:
1857 case TargetOpcode::G_FABS:
1858 case TargetOpcode::G_FCANONICALIZE:
1859 case TargetOpcode::G_FMINNUM:
1860 case TargetOpcode::G_FMAXNUM:
1861 case TargetOpcode::G_FMINNUM_IEEE:
1862 case TargetOpcode::G_FMAXNUM_IEEE:
1863 case TargetOpcode::G_FMINIMUM:
1864 case TargetOpcode::G_FMAXIMUM:
1865 case TargetOpcode::G_FDIV:
1866 case TargetOpcode::G_FREM:
1867 case TargetOpcode::G_FCEIL:
1868 case TargetOpcode::G_FFLOOR:
1869 case TargetOpcode::G_FCOS:
1870 case TargetOpcode::G_FSIN:
1871 case TargetOpcode::G_FLOG10:
1872 case TargetOpcode::G_FLOG:
1873 case TargetOpcode::G_FLOG2:
1874 case TargetOpcode::G_FRINT:
1875 case TargetOpcode::G_FNEARBYINT:
1876 case TargetOpcode::G_FSQRT:
1877 case TargetOpcode::G_FEXP:
1878 case TargetOpcode::G_FEXP2:
1879 case TargetOpcode::G_FPOW:
1880 case TargetOpcode::G_INTRINSIC_TRUNC:
1881 case TargetOpcode::G_INTRINSIC_ROUND:
1882 assert(TypeIdx == 0);
1883 Observer.changingInstr(MI);
1884
1885 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1886 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1887
1888 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1889 Observer.changedInstr(MI);
1890 return Legalized;
1891 case TargetOpcode::G_INTTOPTR:
1892 if (TypeIdx != 1)
1893 return UnableToLegalize;
1894
1895 Observer.changingInstr(MI);
1896 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1897 Observer.changedInstr(MI);
1898 return Legalized;
1899 case TargetOpcode::G_PTRTOINT:
1900 if (TypeIdx != 0)
1901 return UnableToLegalize;
1902
1903 Observer.changingInstr(MI);
1904 widenScalarDst(MI, WideTy, 0);
1905 Observer.changedInstr(MI);
1906 return Legalized;
1907 case TargetOpcode::G_BUILD_VECTOR: {
1908 Observer.changingInstr(MI);
1909
1910 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1911 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1912 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1913
1914 // Avoid changing the result vector type if the source element type was
1915 // requested.
1916 if (TypeIdx == 1) {
1917 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1918 MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1919 } else {
1920 widenScalarDst(MI, WideTy, 0);
1921 }
1922
1923 Observer.changedInstr(MI);
1924 return Legalized;
1925 }
1926 case TargetOpcode::G_SEXT_INREG:
1927 if (TypeIdx != 0)
1928 return UnableToLegalize;
1929
1930 Observer.changingInstr(MI);
1931 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1932 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
1933 Observer.changedInstr(MI);
1934 return Legalized;
1935 }
1936 }
1937
1938 LegalizerHelper::LegalizeResult
lower(MachineInstr & MI,unsigned TypeIdx,LLT Ty)1939 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1940 using namespace TargetOpcode;
1941 MIRBuilder.setInstr(MI);
1942
1943 switch(MI.getOpcode()) {
1944 default:
1945 return UnableToLegalize;
1946 case TargetOpcode::G_SREM:
1947 case TargetOpcode::G_UREM: {
1948 Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1949 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1950 .addDef(QuotReg)
1951 .addUse(MI.getOperand(1).getReg())
1952 .addUse(MI.getOperand(2).getReg());
1953
1954 Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1955 MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1956 MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1957 ProdReg);
1958 MI.eraseFromParent();
1959 return Legalized;
1960 }
1961 case TargetOpcode::G_SADDO:
1962 case TargetOpcode::G_SSUBO:
1963 return lowerSADDO_SSUBO(MI);
1964 case TargetOpcode::G_SMULO:
1965 case TargetOpcode::G_UMULO: {
1966 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1967 // result.
1968 Register Res = MI.getOperand(0).getReg();
1969 Register Overflow = MI.getOperand(1).getReg();
1970 Register LHS = MI.getOperand(2).getReg();
1971 Register RHS = MI.getOperand(3).getReg();
1972
1973 MIRBuilder.buildMul(Res, LHS, RHS);
1974
1975 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1976 ? TargetOpcode::G_SMULH
1977 : TargetOpcode::G_UMULH;
1978
1979 Register HiPart = MRI.createGenericVirtualRegister(Ty);
1980 MIRBuilder.buildInstr(Opcode)
1981 .addDef(HiPart)
1982 .addUse(LHS)
1983 .addUse(RHS);
1984
1985 Register Zero = MRI.createGenericVirtualRegister(Ty);
1986 MIRBuilder.buildConstant(Zero, 0);
1987
1988 // For *signed* multiply, overflow is detected by checking:
1989 // (hi != (lo >> bitwidth-1))
1990 if (Opcode == TargetOpcode::G_SMULH) {
1991 Register Shifted = MRI.createGenericVirtualRegister(Ty);
1992 Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1993 MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1994 MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1995 .addDef(Shifted)
1996 .addUse(Res)
1997 .addUse(ShiftAmt);
1998 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1999 } else {
2000 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2001 }
2002 MI.eraseFromParent();
2003 return Legalized;
2004 }
2005 case TargetOpcode::G_FNEG: {
2006 // TODO: Handle vector types once we are able to
2007 // represent them.
2008 if (Ty.isVector())
2009 return UnableToLegalize;
2010 Register Res = MI.getOperand(0).getReg();
2011 Type *ZeroTy;
2012 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2013 switch (Ty.getSizeInBits()) {
2014 case 16:
2015 ZeroTy = Type::getHalfTy(Ctx);
2016 break;
2017 case 32:
2018 ZeroTy = Type::getFloatTy(Ctx);
2019 break;
2020 case 64:
2021 ZeroTy = Type::getDoubleTy(Ctx);
2022 break;
2023 case 128:
2024 ZeroTy = Type::getFP128Ty(Ctx);
2025 break;
2026 default:
2027 llvm_unreachable("unexpected floating-point type");
2028 }
2029 ConstantFP &ZeroForNegation =
2030 *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
2031 auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
2032 Register SubByReg = MI.getOperand(1).getReg();
2033 Register ZeroReg = Zero->getOperand(0).getReg();
2034 MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
2035 MI.getFlags());
2036 MI.eraseFromParent();
2037 return Legalized;
2038 }
2039 case TargetOpcode::G_FSUB: {
2040 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2041 // First, check if G_FNEG is marked as Lower. If so, we may
2042 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2043 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2044 return UnableToLegalize;
2045 Register Res = MI.getOperand(0).getReg();
2046 Register LHS = MI.getOperand(1).getReg();
2047 Register RHS = MI.getOperand(2).getReg();
2048 Register Neg = MRI.createGenericVirtualRegister(Ty);
2049 MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
2050 MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
2051 MI.eraseFromParent();
2052 return Legalized;
2053 }
2054 case TargetOpcode::G_FMAD:
2055 return lowerFMad(MI);
2056 case TargetOpcode::G_INTRINSIC_ROUND:
2057 return lowerIntrinsicRound(MI);
2058 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2059 Register OldValRes = MI.getOperand(0).getReg();
2060 Register SuccessRes = MI.getOperand(1).getReg();
2061 Register Addr = MI.getOperand(2).getReg();
2062 Register CmpVal = MI.getOperand(3).getReg();
2063 Register NewVal = MI.getOperand(4).getReg();
2064 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2065 **MI.memoperands_begin());
2066 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2067 MI.eraseFromParent();
2068 return Legalized;
2069 }
2070 case TargetOpcode::G_LOAD:
2071 case TargetOpcode::G_SEXTLOAD:
2072 case TargetOpcode::G_ZEXTLOAD: {
2073 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2074 Register DstReg = MI.getOperand(0).getReg();
2075 Register PtrReg = MI.getOperand(1).getReg();
2076 LLT DstTy = MRI.getType(DstReg);
2077 auto &MMO = **MI.memoperands_begin();
2078
2079 if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2080 if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2081 // This load needs splitting into power of 2 sized loads.
2082 if (DstTy.isVector())
2083 return UnableToLegalize;
2084 if (isPowerOf2_32(DstTy.getSizeInBits()))
2085 return UnableToLegalize; // Don't know what we're being asked to do.
2086
2087 // Our strategy here is to generate anyextending loads for the smaller
2088 // types up to next power-2 result type, and then combine the two larger
2089 // result values together, before truncating back down to the non-pow-2
2090 // type.
2091 // E.g. v1 = i24 load =>
2092 // v2 = i32 load (2 byte)
2093 // v3 = i32 load (1 byte)
2094 // v4 = i32 shl v3, 16
2095 // v5 = i32 or v4, v2
2096 // v1 = i24 trunc v5
2097 // By doing this we generate the correct truncate which should get
2098 // combined away as an artifact with a matching extend.
2099 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2100 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2101
2102 MachineFunction &MF = MIRBuilder.getMF();
2103 MachineMemOperand *LargeMMO =
2104 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2105 MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2106 &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2107
2108 LLT PtrTy = MRI.getType(PtrReg);
2109 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2110 LLT AnyExtTy = LLT::scalar(AnyExtSize);
2111 Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2112 Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2113 auto LargeLoad =
2114 MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
2115
2116 auto OffsetCst =
2117 MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2118 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2119 auto SmallPtr =
2120 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2121 auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2122 *SmallMMO);
2123
2124 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2125 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2126 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2127 MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2128 MI.eraseFromParent();
2129 return Legalized;
2130 }
2131 MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2132 MI.eraseFromParent();
2133 return Legalized;
2134 }
2135
2136 if (DstTy.isScalar()) {
2137 Register TmpReg =
2138 MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2139 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2140 switch (MI.getOpcode()) {
2141 default:
2142 llvm_unreachable("Unexpected opcode");
2143 case TargetOpcode::G_LOAD:
2144 MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
2145 break;
2146 case TargetOpcode::G_SEXTLOAD:
2147 MIRBuilder.buildSExt(DstReg, TmpReg);
2148 break;
2149 case TargetOpcode::G_ZEXTLOAD:
2150 MIRBuilder.buildZExt(DstReg, TmpReg);
2151 break;
2152 }
2153 MI.eraseFromParent();
2154 return Legalized;
2155 }
2156
2157 return UnableToLegalize;
2158 }
2159 case TargetOpcode::G_STORE: {
2160 // Lower a non-power of 2 store into multiple pow-2 stores.
2161 // E.g. split an i24 store into an i16 store + i8 store.
2162 // We do this by first extending the stored value to the next largest power
2163 // of 2 type, and then using truncating stores to store the components.
2164 // By doing this, likewise with G_LOAD, generate an extend that can be
2165 // artifact-combined away instead of leaving behind extracts.
2166 Register SrcReg = MI.getOperand(0).getReg();
2167 Register PtrReg = MI.getOperand(1).getReg();
2168 LLT SrcTy = MRI.getType(SrcReg);
2169 MachineMemOperand &MMO = **MI.memoperands_begin();
2170 if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2171 return UnableToLegalize;
2172 if (SrcTy.isVector())
2173 return UnableToLegalize;
2174 if (isPowerOf2_32(SrcTy.getSizeInBits()))
2175 return UnableToLegalize; // Don't know what we're being asked to do.
2176
2177 // Extend to the next pow-2.
2178 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2179 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2180
2181 // Obtain the smaller value by shifting away the larger value.
2182 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2183 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2184 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2185 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2186
2187 // Generate the PtrAdd and truncating stores.
2188 LLT PtrTy = MRI.getType(PtrReg);
2189 auto OffsetCst =
2190 MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2191 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2192 auto SmallPtr =
2193 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2194
2195 MachineFunction &MF = MIRBuilder.getMF();
2196 MachineMemOperand *LargeMMO =
2197 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2198 MachineMemOperand *SmallMMO =
2199 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2200 MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2201 MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2202 MI.eraseFromParent();
2203 return Legalized;
2204 }
2205 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2206 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2207 case TargetOpcode::G_CTLZ:
2208 case TargetOpcode::G_CTTZ:
2209 case TargetOpcode::G_CTPOP:
2210 return lowerBitCount(MI, TypeIdx, Ty);
2211 case G_UADDO: {
2212 Register Res = MI.getOperand(0).getReg();
2213 Register CarryOut = MI.getOperand(1).getReg();
2214 Register LHS = MI.getOperand(2).getReg();
2215 Register RHS = MI.getOperand(3).getReg();
2216
2217 MIRBuilder.buildAdd(Res, LHS, RHS);
2218 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2219
2220 MI.eraseFromParent();
2221 return Legalized;
2222 }
2223 case G_UADDE: {
2224 Register Res = MI.getOperand(0).getReg();
2225 Register CarryOut = MI.getOperand(1).getReg();
2226 Register LHS = MI.getOperand(2).getReg();
2227 Register RHS = MI.getOperand(3).getReg();
2228 Register CarryIn = MI.getOperand(4).getReg();
2229
2230 Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2231 Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
2232
2233 MIRBuilder.buildAdd(TmpRes, LHS, RHS);
2234 MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
2235 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2236 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2237
2238 MI.eraseFromParent();
2239 return Legalized;
2240 }
2241 case G_USUBO: {
2242 Register Res = MI.getOperand(0).getReg();
2243 Register BorrowOut = MI.getOperand(1).getReg();
2244 Register LHS = MI.getOperand(2).getReg();
2245 Register RHS = MI.getOperand(3).getReg();
2246
2247 MIRBuilder.buildSub(Res, LHS, RHS);
2248 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2249
2250 MI.eraseFromParent();
2251 return Legalized;
2252 }
2253 case G_USUBE: {
2254 Register Res = MI.getOperand(0).getReg();
2255 Register BorrowOut = MI.getOperand(1).getReg();
2256 Register LHS = MI.getOperand(2).getReg();
2257 Register RHS = MI.getOperand(3).getReg();
2258 Register BorrowIn = MI.getOperand(4).getReg();
2259
2260 Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2261 Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
2262 Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2263 Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2264
2265 MIRBuilder.buildSub(TmpRes, LHS, RHS);
2266 MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
2267 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2268 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
2269 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
2270 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2271
2272 MI.eraseFromParent();
2273 return Legalized;
2274 }
2275 case G_UITOFP:
2276 return lowerUITOFP(MI, TypeIdx, Ty);
2277 case G_SITOFP:
2278 return lowerSITOFP(MI, TypeIdx, Ty);
2279 case G_FPTOUI:
2280 return lowerFPTOUI(MI, TypeIdx, Ty);
2281 case G_SMIN:
2282 case G_SMAX:
2283 case G_UMIN:
2284 case G_UMAX:
2285 return lowerMinMax(MI, TypeIdx, Ty);
2286 case G_FCOPYSIGN:
2287 return lowerFCopySign(MI, TypeIdx, Ty);
2288 case G_FMINNUM:
2289 case G_FMAXNUM:
2290 return lowerFMinNumMaxNum(MI);
2291 case G_UNMERGE_VALUES:
2292 return lowerUnmergeValues(MI);
2293 case TargetOpcode::G_SEXT_INREG: {
2294 assert(MI.getOperand(2).isImm() && "Expected immediate");
2295 int64_t SizeInBits = MI.getOperand(2).getImm();
2296
2297 Register DstReg = MI.getOperand(0).getReg();
2298 Register SrcReg = MI.getOperand(1).getReg();
2299 LLT DstTy = MRI.getType(DstReg);
2300 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2301
2302 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2303 MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
2304 MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
2305 MI.eraseFromParent();
2306 return Legalized;
2307 }
2308 case G_SHUFFLE_VECTOR:
2309 return lowerShuffleVector(MI);
2310 case G_DYN_STACKALLOC:
2311 return lowerDynStackAlloc(MI);
2312 case G_EXTRACT:
2313 return lowerExtract(MI);
2314 case G_INSERT:
2315 return lowerInsert(MI);
2316 case G_BSWAP:
2317 return lowerBswap(MI);
2318 case G_BITREVERSE:
2319 return lowerBitreverse(MI);
2320 case G_READ_REGISTER:
2321 return lowerReadRegister(MI);
2322 }
2323 }
2324
fewerElementsVectorImplicitDef(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2325 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2326 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2327 SmallVector<Register, 2> DstRegs;
2328
2329 unsigned NarrowSize = NarrowTy.getSizeInBits();
2330 Register DstReg = MI.getOperand(0).getReg();
2331 unsigned Size = MRI.getType(DstReg).getSizeInBits();
2332 int NumParts = Size / NarrowSize;
2333 // FIXME: Don't know how to handle the situation where the small vectors
2334 // aren't all the same size yet.
2335 if (Size % NarrowSize != 0)
2336 return UnableToLegalize;
2337
2338 for (int i = 0; i < NumParts; ++i) {
2339 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2340 MIRBuilder.buildUndef(TmpReg);
2341 DstRegs.push_back(TmpReg);
2342 }
2343
2344 if (NarrowTy.isVector())
2345 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2346 else
2347 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2348
2349 MI.eraseFromParent();
2350 return Legalized;
2351 }
2352
2353 LegalizerHelper::LegalizeResult
fewerElementsVectorBasic(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2354 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
2355 LLT NarrowTy) {
2356 const unsigned Opc = MI.getOpcode();
2357 const unsigned NumOps = MI.getNumOperands() - 1;
2358 const unsigned NarrowSize = NarrowTy.getSizeInBits();
2359 const Register DstReg = MI.getOperand(0).getReg();
2360 const unsigned Flags = MI.getFlags();
2361 const LLT DstTy = MRI.getType(DstReg);
2362 const unsigned Size = DstTy.getSizeInBits();
2363 const int NumParts = Size / NarrowSize;
2364 const LLT EltTy = DstTy.getElementType();
2365 const unsigned EltSize = EltTy.getSizeInBits();
2366 const unsigned BitsForNumParts = NarrowSize * NumParts;
2367
2368 // Check if we have any leftovers. If we do, then only handle the case where
2369 // the leftover is one element.
2370 if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2371 return UnableToLegalize;
2372
2373 if (BitsForNumParts != Size) {
2374 Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2375 MIRBuilder.buildUndef(AccumDstReg);
2376
2377 // Handle the pieces which evenly divide into the requested type with
2378 // extract/op/insert sequence.
2379 for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2380 SmallVector<SrcOp, 4> SrcOps;
2381 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2382 Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2383 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
2384 SrcOps.push_back(PartOpReg);
2385 }
2386
2387 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2388 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2389
2390 Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2391 MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2392 AccumDstReg = PartInsertReg;
2393 }
2394
2395 // Handle the remaining element sized leftover piece.
2396 SmallVector<SrcOp, 4> SrcOps;
2397 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2398 Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2399 MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
2400 BitsForNumParts);
2401 SrcOps.push_back(PartOpReg);
2402 }
2403
2404 Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2405 MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2406 MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2407 MI.eraseFromParent();
2408
2409 return Legalized;
2410 }
2411
2412 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2413
2414 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2415
2416 if (NumOps >= 2)
2417 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2418
2419 if (NumOps >= 3)
2420 extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2421
2422 for (int i = 0; i < NumParts; ++i) {
2423 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2424
2425 if (NumOps == 1)
2426 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2427 else if (NumOps == 2) {
2428 MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2429 } else if (NumOps == 3) {
2430 MIRBuilder.buildInstr(Opc, {DstReg},
2431 {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2432 }
2433
2434 DstRegs.push_back(DstReg);
2435 }
2436
2437 if (NarrowTy.isVector())
2438 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2439 else
2440 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2441
2442 MI.eraseFromParent();
2443 return Legalized;
2444 }
2445
2446 // Handle splitting vector operations which need to have the same number of
2447 // elements in each type index, but each type index may have a different element
2448 // type.
2449 //
2450 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2451 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2452 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2453 //
2454 // Also handles some irregular breakdown cases, e.g.
2455 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2456 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2457 // s64 = G_SHL s64, s32
2458 LegalizerHelper::LegalizeResult
fewerElementsVectorMultiEltType(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTyArg)2459 LegalizerHelper::fewerElementsVectorMultiEltType(
2460 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2461 if (TypeIdx != 0)
2462 return UnableToLegalize;
2463
2464 const LLT NarrowTy0 = NarrowTyArg;
2465 const unsigned NewNumElts =
2466 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2467
2468 const Register DstReg = MI.getOperand(0).getReg();
2469 LLT DstTy = MRI.getType(DstReg);
2470 LLT LeftoverTy0;
2471
2472 // All of the operands need to have the same number of elements, so if we can
2473 // determine a type breakdown for the result type, we can for all of the
2474 // source types.
2475 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2476 if (NumParts < 0)
2477 return UnableToLegalize;
2478
2479 SmallVector<MachineInstrBuilder, 4> NewInsts;
2480
2481 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2482 SmallVector<Register, 4> PartRegs, LeftoverRegs;
2483
2484 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2485 LLT LeftoverTy;
2486 Register SrcReg = MI.getOperand(I).getReg();
2487 LLT SrcTyI = MRI.getType(SrcReg);
2488 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2489 LLT LeftoverTyI;
2490
2491 // Split this operand into the requested typed registers, and any leftover
2492 // required to reproduce the original type.
2493 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2494 LeftoverRegs))
2495 return UnableToLegalize;
2496
2497 if (I == 1) {
2498 // For the first operand, create an instruction for each part and setup
2499 // the result.
2500 for (Register PartReg : PartRegs) {
2501 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2502 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2503 .addDef(PartDstReg)
2504 .addUse(PartReg));
2505 DstRegs.push_back(PartDstReg);
2506 }
2507
2508 for (Register LeftoverReg : LeftoverRegs) {
2509 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2510 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2511 .addDef(PartDstReg)
2512 .addUse(LeftoverReg));
2513 LeftoverDstRegs.push_back(PartDstReg);
2514 }
2515 } else {
2516 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2517
2518 // Add the newly created operand splits to the existing instructions. The
2519 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2520 // pieces.
2521 unsigned InstCount = 0;
2522 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2523 NewInsts[InstCount++].addUse(PartRegs[J]);
2524 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2525 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2526 }
2527
2528 PartRegs.clear();
2529 LeftoverRegs.clear();
2530 }
2531
2532 // Insert the newly built operations and rebuild the result register.
2533 for (auto &MIB : NewInsts)
2534 MIRBuilder.insertInstr(MIB);
2535
2536 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2537
2538 MI.eraseFromParent();
2539 return Legalized;
2540 }
2541
2542 LegalizerHelper::LegalizeResult
fewerElementsVectorCasts(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2543 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2544 LLT NarrowTy) {
2545 if (TypeIdx != 0)
2546 return UnableToLegalize;
2547
2548 Register DstReg = MI.getOperand(0).getReg();
2549 Register SrcReg = MI.getOperand(1).getReg();
2550 LLT DstTy = MRI.getType(DstReg);
2551 LLT SrcTy = MRI.getType(SrcReg);
2552
2553 LLT NarrowTy0 = NarrowTy;
2554 LLT NarrowTy1;
2555 unsigned NumParts;
2556
2557 if (NarrowTy.isVector()) {
2558 // Uneven breakdown not handled.
2559 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2560 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2561 return UnableToLegalize;
2562
2563 NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2564 } else {
2565 NumParts = DstTy.getNumElements();
2566 NarrowTy1 = SrcTy.getElementType();
2567 }
2568
2569 SmallVector<Register, 4> SrcRegs, DstRegs;
2570 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2571
2572 for (unsigned I = 0; I < NumParts; ++I) {
2573 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2574 MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2575 .addDef(DstReg)
2576 .addUse(SrcRegs[I]);
2577
2578 NewInst->setFlags(MI.getFlags());
2579 DstRegs.push_back(DstReg);
2580 }
2581
2582 if (NarrowTy.isVector())
2583 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2584 else
2585 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2586
2587 MI.eraseFromParent();
2588 return Legalized;
2589 }
2590
2591 LegalizerHelper::LegalizeResult
fewerElementsVectorCmp(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2592 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2593 LLT NarrowTy) {
2594 Register DstReg = MI.getOperand(0).getReg();
2595 Register Src0Reg = MI.getOperand(2).getReg();
2596 LLT DstTy = MRI.getType(DstReg);
2597 LLT SrcTy = MRI.getType(Src0Reg);
2598
2599 unsigned NumParts;
2600 LLT NarrowTy0, NarrowTy1;
2601
2602 if (TypeIdx == 0) {
2603 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2604 unsigned OldElts = DstTy.getNumElements();
2605
2606 NarrowTy0 = NarrowTy;
2607 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2608 NarrowTy1 = NarrowTy.isVector() ?
2609 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2610 SrcTy.getElementType();
2611
2612 } else {
2613 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2614 unsigned OldElts = SrcTy.getNumElements();
2615
2616 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2617 NarrowTy.getNumElements();
2618 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2619 DstTy.getScalarSizeInBits());
2620 NarrowTy1 = NarrowTy;
2621 }
2622
2623 // FIXME: Don't know how to handle the situation where the small vectors
2624 // aren't all the same size yet.
2625 if (NarrowTy1.isVector() &&
2626 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2627 return UnableToLegalize;
2628
2629 CmpInst::Predicate Pred
2630 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2631
2632 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2633 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2634 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2635
2636 for (unsigned I = 0; I < NumParts; ++I) {
2637 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2638 DstRegs.push_back(DstReg);
2639
2640 if (MI.getOpcode() == TargetOpcode::G_ICMP)
2641 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2642 else {
2643 MachineInstr *NewCmp
2644 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2645 NewCmp->setFlags(MI.getFlags());
2646 }
2647 }
2648
2649 if (NarrowTy1.isVector())
2650 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2651 else
2652 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2653
2654 MI.eraseFromParent();
2655 return Legalized;
2656 }
2657
2658 LegalizerHelper::LegalizeResult
fewerElementsVectorSelect(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2659 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2660 LLT NarrowTy) {
2661 Register DstReg = MI.getOperand(0).getReg();
2662 Register CondReg = MI.getOperand(1).getReg();
2663
2664 unsigned NumParts = 0;
2665 LLT NarrowTy0, NarrowTy1;
2666
2667 LLT DstTy = MRI.getType(DstReg);
2668 LLT CondTy = MRI.getType(CondReg);
2669 unsigned Size = DstTy.getSizeInBits();
2670
2671 assert(TypeIdx == 0 || CondTy.isVector());
2672
2673 if (TypeIdx == 0) {
2674 NarrowTy0 = NarrowTy;
2675 NarrowTy1 = CondTy;
2676
2677 unsigned NarrowSize = NarrowTy0.getSizeInBits();
2678 // FIXME: Don't know how to handle the situation where the small vectors
2679 // aren't all the same size yet.
2680 if (Size % NarrowSize != 0)
2681 return UnableToLegalize;
2682
2683 NumParts = Size / NarrowSize;
2684
2685 // Need to break down the condition type
2686 if (CondTy.isVector()) {
2687 if (CondTy.getNumElements() == NumParts)
2688 NarrowTy1 = CondTy.getElementType();
2689 else
2690 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2691 CondTy.getScalarSizeInBits());
2692 }
2693 } else {
2694 NumParts = CondTy.getNumElements();
2695 if (NarrowTy.isVector()) {
2696 // TODO: Handle uneven breakdown.
2697 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2698 return UnableToLegalize;
2699
2700 return UnableToLegalize;
2701 } else {
2702 NarrowTy0 = DstTy.getElementType();
2703 NarrowTy1 = NarrowTy;
2704 }
2705 }
2706
2707 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2708 if (CondTy.isVector())
2709 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2710
2711 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2712 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2713
2714 for (unsigned i = 0; i < NumParts; ++i) {
2715 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2716 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2717 Src1Regs[i], Src2Regs[i]);
2718 DstRegs.push_back(DstReg);
2719 }
2720
2721 if (NarrowTy0.isVector())
2722 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2723 else
2724 MIRBuilder.buildBuildVector(DstReg, DstRegs);
2725
2726 MI.eraseFromParent();
2727 return Legalized;
2728 }
2729
2730 LegalizerHelper::LegalizeResult
fewerElementsVectorPhi(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2731 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2732 LLT NarrowTy) {
2733 const Register DstReg = MI.getOperand(0).getReg();
2734 LLT PhiTy = MRI.getType(DstReg);
2735 LLT LeftoverTy;
2736
2737 // All of the operands need to have the same number of elements, so if we can
2738 // determine a type breakdown for the result type, we can for all of the
2739 // source types.
2740 int NumParts, NumLeftover;
2741 std::tie(NumParts, NumLeftover)
2742 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2743 if (NumParts < 0)
2744 return UnableToLegalize;
2745
2746 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2747 SmallVector<MachineInstrBuilder, 4> NewInsts;
2748
2749 const int TotalNumParts = NumParts + NumLeftover;
2750
2751 // Insert the new phis in the result block first.
2752 for (int I = 0; I != TotalNumParts; ++I) {
2753 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2754 Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2755 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2756 .addDef(PartDstReg));
2757 if (I < NumParts)
2758 DstRegs.push_back(PartDstReg);
2759 else
2760 LeftoverDstRegs.push_back(PartDstReg);
2761 }
2762
2763 MachineBasicBlock *MBB = MI.getParent();
2764 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2765 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2766
2767 SmallVector<Register, 4> PartRegs, LeftoverRegs;
2768
2769 // Insert code to extract the incoming values in each predecessor block.
2770 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2771 PartRegs.clear();
2772 LeftoverRegs.clear();
2773
2774 Register SrcReg = MI.getOperand(I).getReg();
2775 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2776 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2777
2778 LLT Unused;
2779 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2780 LeftoverRegs))
2781 return UnableToLegalize;
2782
2783 // Add the newly created operand splits to the existing instructions. The
2784 // odd-sized pieces are ordered after the requested NarrowTyArg sized
2785 // pieces.
2786 for (int J = 0; J != TotalNumParts; ++J) {
2787 MachineInstrBuilder MIB = NewInsts[J];
2788 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2789 MIB.addMBB(&OpMBB);
2790 }
2791 }
2792
2793 MI.eraseFromParent();
2794 return Legalized;
2795 }
2796
2797 LegalizerHelper::LegalizeResult
fewerElementsVectorUnmergeValues(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2798 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
2799 unsigned TypeIdx,
2800 LLT NarrowTy) {
2801 if (TypeIdx != 1)
2802 return UnableToLegalize;
2803
2804 const int NumDst = MI.getNumOperands() - 1;
2805 const Register SrcReg = MI.getOperand(NumDst).getReg();
2806 LLT SrcTy = MRI.getType(SrcReg);
2807
2808 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2809
2810 // TODO: Create sequence of extracts.
2811 if (DstTy == NarrowTy)
2812 return UnableToLegalize;
2813
2814 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2815 if (DstTy == GCDTy) {
2816 // This would just be a copy of the same unmerge.
2817 // TODO: Create extracts, pad with undef and create intermediate merges.
2818 return UnableToLegalize;
2819 }
2820
2821 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2822 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2823 const int PartsPerUnmerge = NumDst / NumUnmerge;
2824
2825 for (int I = 0; I != NumUnmerge; ++I) {
2826 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2827
2828 for (int J = 0; J != PartsPerUnmerge; ++J)
2829 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2830 MIB.addUse(Unmerge.getReg(I));
2831 }
2832
2833 MI.eraseFromParent();
2834 return Legalized;
2835 }
2836
2837 LegalizerHelper::LegalizeResult
fewerElementsVectorBuildVector(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2838 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
2839 unsigned TypeIdx,
2840 LLT NarrowTy) {
2841 assert(TypeIdx == 0 && "not a vector type index");
2842 Register DstReg = MI.getOperand(0).getReg();
2843 LLT DstTy = MRI.getType(DstReg);
2844 LLT SrcTy = DstTy.getElementType();
2845
2846 int DstNumElts = DstTy.getNumElements();
2847 int NarrowNumElts = NarrowTy.getNumElements();
2848 int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
2849 LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
2850
2851 SmallVector<Register, 8> ConcatOps;
2852 SmallVector<Register, 8> SubBuildVector;
2853
2854 Register UndefReg;
2855 if (WidenedDstTy != DstTy)
2856 UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
2857
2858 // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
2859 // necessary.
2860 //
2861 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
2862 // -> <2 x s16>
2863 //
2864 // %4:_(s16) = G_IMPLICIT_DEF
2865 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
2866 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
2867 // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
2868 // %3:_(<3 x s16>) = G_EXTRACT %7, 0
2869 for (int I = 0; I != NumConcat; ++I) {
2870 for (int J = 0; J != NarrowNumElts; ++J) {
2871 int SrcIdx = NarrowNumElts * I + J;
2872
2873 if (SrcIdx < DstNumElts) {
2874 Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
2875 SubBuildVector.push_back(SrcReg);
2876 } else
2877 SubBuildVector.push_back(UndefReg);
2878 }
2879
2880 auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
2881 ConcatOps.push_back(BuildVec.getReg(0));
2882 SubBuildVector.clear();
2883 }
2884
2885 if (DstTy == WidenedDstTy)
2886 MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
2887 else {
2888 auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
2889 MIRBuilder.buildExtract(DstReg, Concat, 0);
2890 }
2891
2892 MI.eraseFromParent();
2893 return Legalized;
2894 }
2895
2896 LegalizerHelper::LegalizeResult
reduceLoadStoreWidth(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2897 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2898 LLT NarrowTy) {
2899 // FIXME: Don't know how to handle secondary types yet.
2900 if (TypeIdx != 0)
2901 return UnableToLegalize;
2902
2903 MachineMemOperand *MMO = *MI.memoperands_begin();
2904
2905 // This implementation doesn't work for atomics. Give up instead of doing
2906 // something invalid.
2907 if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2908 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
2909 return UnableToLegalize;
2910
2911 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2912 Register ValReg = MI.getOperand(0).getReg();
2913 Register AddrReg = MI.getOperand(1).getReg();
2914 LLT ValTy = MRI.getType(ValReg);
2915
2916 int NumParts = -1;
2917 int NumLeftover = -1;
2918 LLT LeftoverTy;
2919 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2920 if (IsLoad) {
2921 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2922 } else {
2923 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2924 NarrowLeftoverRegs)) {
2925 NumParts = NarrowRegs.size();
2926 NumLeftover = NarrowLeftoverRegs.size();
2927 }
2928 }
2929
2930 if (NumParts == -1)
2931 return UnableToLegalize;
2932
2933 const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2934
2935 unsigned TotalSize = ValTy.getSizeInBits();
2936
2937 // Split the load/store into PartTy sized pieces starting at Offset. If this
2938 // is a load, return the new registers in ValRegs. For a store, each elements
2939 // of ValRegs should be PartTy. Returns the next offset that needs to be
2940 // handled.
2941 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2942 unsigned Offset) -> unsigned {
2943 MachineFunction &MF = MIRBuilder.getMF();
2944 unsigned PartSize = PartTy.getSizeInBits();
2945 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2946 Offset += PartSize, ++Idx) {
2947 unsigned ByteSize = PartSize / 8;
2948 unsigned ByteOffset = Offset / 8;
2949 Register NewAddrReg;
2950
2951 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2952
2953 MachineMemOperand *NewMMO =
2954 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2955
2956 if (IsLoad) {
2957 Register Dst = MRI.createGenericVirtualRegister(PartTy);
2958 ValRegs.push_back(Dst);
2959 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2960 } else {
2961 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2962 }
2963 }
2964
2965 return Offset;
2966 };
2967
2968 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2969
2970 // Handle the rest of the register if this isn't an even type breakdown.
2971 if (LeftoverTy.isValid())
2972 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2973
2974 if (IsLoad) {
2975 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2976 LeftoverTy, NarrowLeftoverRegs);
2977 }
2978
2979 MI.eraseFromParent();
2980 return Legalized;
2981 }
2982
2983 LegalizerHelper::LegalizeResult
fewerElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2984 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2985 LLT NarrowTy) {
2986 using namespace TargetOpcode;
2987
2988 MIRBuilder.setInstr(MI);
2989 switch (MI.getOpcode()) {
2990 case G_IMPLICIT_DEF:
2991 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2992 case G_AND:
2993 case G_OR:
2994 case G_XOR:
2995 case G_ADD:
2996 case G_SUB:
2997 case G_MUL:
2998 case G_SMULH:
2999 case G_UMULH:
3000 case G_FADD:
3001 case G_FMUL:
3002 case G_FSUB:
3003 case G_FNEG:
3004 case G_FABS:
3005 case G_FCANONICALIZE:
3006 case G_FDIV:
3007 case G_FREM:
3008 case G_FMA:
3009 case G_FMAD:
3010 case G_FPOW:
3011 case G_FEXP:
3012 case G_FEXP2:
3013 case G_FLOG:
3014 case G_FLOG2:
3015 case G_FLOG10:
3016 case G_FNEARBYINT:
3017 case G_FCEIL:
3018 case G_FFLOOR:
3019 case G_FRINT:
3020 case G_INTRINSIC_ROUND:
3021 case G_INTRINSIC_TRUNC:
3022 case G_FCOS:
3023 case G_FSIN:
3024 case G_FSQRT:
3025 case G_BSWAP:
3026 case G_BITREVERSE:
3027 case G_SDIV:
3028 case G_UDIV:
3029 case G_SREM:
3030 case G_UREM:
3031 case G_SMIN:
3032 case G_SMAX:
3033 case G_UMIN:
3034 case G_UMAX:
3035 case G_FMINNUM:
3036 case G_FMAXNUM:
3037 case G_FMINNUM_IEEE:
3038 case G_FMAXNUM_IEEE:
3039 case G_FMINIMUM:
3040 case G_FMAXIMUM:
3041 return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
3042 case G_SHL:
3043 case G_LSHR:
3044 case G_ASHR:
3045 case G_CTLZ:
3046 case G_CTLZ_ZERO_UNDEF:
3047 case G_CTTZ:
3048 case G_CTTZ_ZERO_UNDEF:
3049 case G_CTPOP:
3050 case G_FCOPYSIGN:
3051 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
3052 case G_ZEXT:
3053 case G_SEXT:
3054 case G_ANYEXT:
3055 case G_FPEXT:
3056 case G_FPTRUNC:
3057 case G_SITOFP:
3058 case G_UITOFP:
3059 case G_FPTOSI:
3060 case G_FPTOUI:
3061 case G_INTTOPTR:
3062 case G_PTRTOINT:
3063 case G_ADDRSPACE_CAST:
3064 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
3065 case G_ICMP:
3066 case G_FCMP:
3067 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3068 case G_SELECT:
3069 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3070 case G_PHI:
3071 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3072 case G_UNMERGE_VALUES:
3073 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3074 case G_BUILD_VECTOR:
3075 return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3076 case G_LOAD:
3077 case G_STORE:
3078 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3079 default:
3080 return UnableToLegalize;
3081 }
3082 }
3083
3084 LegalizerHelper::LegalizeResult
narrowScalarShiftByConstant(MachineInstr & MI,const APInt & Amt,const LLT HalfTy,const LLT AmtTy)3085 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
3086 const LLT HalfTy, const LLT AmtTy) {
3087
3088 Register InL = MRI.createGenericVirtualRegister(HalfTy);
3089 Register InH = MRI.createGenericVirtualRegister(HalfTy);
3090 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3091
3092 if (Amt.isNullValue()) {
3093 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
3094 MI.eraseFromParent();
3095 return Legalized;
3096 }
3097
3098 LLT NVT = HalfTy;
3099 unsigned NVTBits = HalfTy.getSizeInBits();
3100 unsigned VTBits = 2 * NVTBits;
3101
3102 SrcOp Lo(Register(0)), Hi(Register(0));
3103 if (MI.getOpcode() == TargetOpcode::G_SHL) {
3104 if (Amt.ugt(VTBits)) {
3105 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3106 } else if (Amt.ugt(NVTBits)) {
3107 Lo = MIRBuilder.buildConstant(NVT, 0);
3108 Hi = MIRBuilder.buildShl(NVT, InL,
3109 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3110 } else if (Amt == NVTBits) {
3111 Lo = MIRBuilder.buildConstant(NVT, 0);
3112 Hi = InL;
3113 } else {
3114 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3115 auto OrLHS =
3116 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3117 auto OrRHS = MIRBuilder.buildLShr(
3118 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3119 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3120 }
3121 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3122 if (Amt.ugt(VTBits)) {
3123 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3124 } else if (Amt.ugt(NVTBits)) {
3125 Lo = MIRBuilder.buildLShr(NVT, InH,
3126 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3127 Hi = MIRBuilder.buildConstant(NVT, 0);
3128 } else if (Amt == NVTBits) {
3129 Lo = InH;
3130 Hi = MIRBuilder.buildConstant(NVT, 0);
3131 } else {
3132 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3133
3134 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3135 auto OrRHS = MIRBuilder.buildShl(
3136 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3137
3138 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3139 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3140 }
3141 } else {
3142 if (Amt.ugt(VTBits)) {
3143 Hi = Lo = MIRBuilder.buildAShr(
3144 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3145 } else if (Amt.ugt(NVTBits)) {
3146 Lo = MIRBuilder.buildAShr(NVT, InH,
3147 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3148 Hi = MIRBuilder.buildAShr(NVT, InH,
3149 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3150 } else if (Amt == NVTBits) {
3151 Lo = InH;
3152 Hi = MIRBuilder.buildAShr(NVT, InH,
3153 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3154 } else {
3155 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3156
3157 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3158 auto OrRHS = MIRBuilder.buildShl(
3159 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3160
3161 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3162 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3163 }
3164 }
3165
3166 MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
3167 MI.eraseFromParent();
3168
3169 return Legalized;
3170 }
3171
3172 // TODO: Optimize if constant shift amount.
3173 LegalizerHelper::LegalizeResult
narrowScalarShift(MachineInstr & MI,unsigned TypeIdx,LLT RequestedTy)3174 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3175 LLT RequestedTy) {
3176 if (TypeIdx == 1) {
3177 Observer.changingInstr(MI);
3178 narrowScalarSrc(MI, RequestedTy, 2);
3179 Observer.changedInstr(MI);
3180 return Legalized;
3181 }
3182
3183 Register DstReg = MI.getOperand(0).getReg();
3184 LLT DstTy = MRI.getType(DstReg);
3185 if (DstTy.isVector())
3186 return UnableToLegalize;
3187
3188 Register Amt = MI.getOperand(2).getReg();
3189 LLT ShiftAmtTy = MRI.getType(Amt);
3190 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3191 if (DstEltSize % 2 != 0)
3192 return UnableToLegalize;
3193
3194 // Ignore the input type. We can only go to exactly half the size of the
3195 // input. If that isn't small enough, the resulting pieces will be further
3196 // legalized.
3197 const unsigned NewBitSize = DstEltSize / 2;
3198 const LLT HalfTy = LLT::scalar(NewBitSize);
3199 const LLT CondTy = LLT::scalar(1);
3200
3201 if (const MachineInstr *KShiftAmt =
3202 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3203 return narrowScalarShiftByConstant(
3204 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3205 }
3206
3207 // TODO: Expand with known bits.
3208
3209 // Handle the fully general expansion by an unknown amount.
3210 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3211
3212 Register InL = MRI.createGenericVirtualRegister(HalfTy);
3213 Register InH = MRI.createGenericVirtualRegister(HalfTy);
3214 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3215
3216 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3217 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3218
3219 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3220 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3221 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3222
3223 Register ResultRegs[2];
3224 switch (MI.getOpcode()) {
3225 case TargetOpcode::G_SHL: {
3226 // Short: ShAmt < NewBitSize
3227 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3228
3229 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3230 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3231 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3232
3233 // Long: ShAmt >= NewBitSize
3234 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
3235 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3236
3237 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3238 auto Hi = MIRBuilder.buildSelect(
3239 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3240
3241 ResultRegs[0] = Lo.getReg(0);
3242 ResultRegs[1] = Hi.getReg(0);
3243 break;
3244 }
3245 case TargetOpcode::G_LSHR:
3246 case TargetOpcode::G_ASHR: {
3247 // Short: ShAmt < NewBitSize
3248 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3249
3250 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3251 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3252 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3253
3254 // Long: ShAmt >= NewBitSize
3255 MachineInstrBuilder HiL;
3256 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3257 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
3258 } else {
3259 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3260 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
3261 }
3262 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3263 {InH, AmtExcess}); // Lo from Hi part.
3264
3265 auto Lo = MIRBuilder.buildSelect(
3266 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3267
3268 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3269
3270 ResultRegs[0] = Lo.getReg(0);
3271 ResultRegs[1] = Hi.getReg(0);
3272 break;
3273 }
3274 default:
3275 llvm_unreachable("not a shift");
3276 }
3277
3278 MIRBuilder.buildMerge(DstReg, ResultRegs);
3279 MI.eraseFromParent();
3280 return Legalized;
3281 }
3282
3283 LegalizerHelper::LegalizeResult
moreElementsVectorPhi(MachineInstr & MI,unsigned TypeIdx,LLT MoreTy)3284 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3285 LLT MoreTy) {
3286 assert(TypeIdx == 0 && "Expecting only Idx 0");
3287
3288 Observer.changingInstr(MI);
3289 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3290 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3291 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3292 moreElementsVectorSrc(MI, MoreTy, I);
3293 }
3294
3295 MachineBasicBlock &MBB = *MI.getParent();
3296 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3297 moreElementsVectorDst(MI, MoreTy, 0);
3298 Observer.changedInstr(MI);
3299 return Legalized;
3300 }
3301
3302 LegalizerHelper::LegalizeResult
moreElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT MoreTy)3303 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3304 LLT MoreTy) {
3305 MIRBuilder.setInstr(MI);
3306 unsigned Opc = MI.getOpcode();
3307 switch (Opc) {
3308 case TargetOpcode::G_IMPLICIT_DEF:
3309 case TargetOpcode::G_LOAD: {
3310 if (TypeIdx != 0)
3311 return UnableToLegalize;
3312 Observer.changingInstr(MI);
3313 moreElementsVectorDst(MI, MoreTy, 0);
3314 Observer.changedInstr(MI);
3315 return Legalized;
3316 }
3317 case TargetOpcode::G_STORE:
3318 if (TypeIdx != 0)
3319 return UnableToLegalize;
3320 Observer.changingInstr(MI);
3321 moreElementsVectorSrc(MI, MoreTy, 0);
3322 Observer.changedInstr(MI);
3323 return Legalized;
3324 case TargetOpcode::G_AND:
3325 case TargetOpcode::G_OR:
3326 case TargetOpcode::G_XOR:
3327 case TargetOpcode::G_SMIN:
3328 case TargetOpcode::G_SMAX:
3329 case TargetOpcode::G_UMIN:
3330 case TargetOpcode::G_UMAX:
3331 case TargetOpcode::G_FMINNUM:
3332 case TargetOpcode::G_FMAXNUM:
3333 case TargetOpcode::G_FMINNUM_IEEE:
3334 case TargetOpcode::G_FMAXNUM_IEEE:
3335 case TargetOpcode::G_FMINIMUM:
3336 case TargetOpcode::G_FMAXIMUM: {
3337 Observer.changingInstr(MI);
3338 moreElementsVectorSrc(MI, MoreTy, 1);
3339 moreElementsVectorSrc(MI, MoreTy, 2);
3340 moreElementsVectorDst(MI, MoreTy, 0);
3341 Observer.changedInstr(MI);
3342 return Legalized;
3343 }
3344 case TargetOpcode::G_EXTRACT:
3345 if (TypeIdx != 1)
3346 return UnableToLegalize;
3347 Observer.changingInstr(MI);
3348 moreElementsVectorSrc(MI, MoreTy, 1);
3349 Observer.changedInstr(MI);
3350 return Legalized;
3351 case TargetOpcode::G_INSERT:
3352 if (TypeIdx != 0)
3353 return UnableToLegalize;
3354 Observer.changingInstr(MI);
3355 moreElementsVectorSrc(MI, MoreTy, 1);
3356 moreElementsVectorDst(MI, MoreTy, 0);
3357 Observer.changedInstr(MI);
3358 return Legalized;
3359 case TargetOpcode::G_SELECT:
3360 if (TypeIdx != 0)
3361 return UnableToLegalize;
3362 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3363 return UnableToLegalize;
3364
3365 Observer.changingInstr(MI);
3366 moreElementsVectorSrc(MI, MoreTy, 2);
3367 moreElementsVectorSrc(MI, MoreTy, 3);
3368 moreElementsVectorDst(MI, MoreTy, 0);
3369 Observer.changedInstr(MI);
3370 return Legalized;
3371 case TargetOpcode::G_UNMERGE_VALUES: {
3372 if (TypeIdx != 1)
3373 return UnableToLegalize;
3374
3375 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3376 int NumDst = MI.getNumOperands() - 1;
3377 moreElementsVectorSrc(MI, MoreTy, NumDst);
3378
3379 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3380 for (int I = 0; I != NumDst; ++I)
3381 MIB.addDef(MI.getOperand(I).getReg());
3382
3383 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3384 for (int I = NumDst; I != NewNumDst; ++I)
3385 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3386
3387 MIB.addUse(MI.getOperand(NumDst).getReg());
3388 MI.eraseFromParent();
3389 return Legalized;
3390 }
3391 case TargetOpcode::G_PHI:
3392 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3393 default:
3394 return UnableToLegalize;
3395 }
3396 }
3397
multiplyRegisters(SmallVectorImpl<Register> & DstRegs,ArrayRef<Register> Src1Regs,ArrayRef<Register> Src2Regs,LLT NarrowTy)3398 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3399 ArrayRef<Register> Src1Regs,
3400 ArrayRef<Register> Src2Regs,
3401 LLT NarrowTy) {
3402 MachineIRBuilder &B = MIRBuilder;
3403 unsigned SrcParts = Src1Regs.size();
3404 unsigned DstParts = DstRegs.size();
3405
3406 unsigned DstIdx = 0; // Low bits of the result.
3407 Register FactorSum =
3408 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3409 DstRegs[DstIdx] = FactorSum;
3410
3411 unsigned CarrySumPrevDstIdx;
3412 SmallVector<Register, 4> Factors;
3413
3414 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3415 // Collect low parts of muls for DstIdx.
3416 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3417 i <= std::min(DstIdx, SrcParts - 1); ++i) {
3418 MachineInstrBuilder Mul =
3419 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3420 Factors.push_back(Mul.getReg(0));
3421 }
3422 // Collect high parts of muls from previous DstIdx.
3423 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3424 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3425 MachineInstrBuilder Umulh =
3426 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3427 Factors.push_back(Umulh.getReg(0));
3428 }
3429 // Add CarrySum from additions calculated for previous DstIdx.
3430 if (DstIdx != 1) {
3431 Factors.push_back(CarrySumPrevDstIdx);
3432 }
3433
3434 Register CarrySum;
3435 // Add all factors and accumulate all carries into CarrySum.
3436 if (DstIdx != DstParts - 1) {
3437 MachineInstrBuilder Uaddo =
3438 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3439 FactorSum = Uaddo.getReg(0);
3440 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3441 for (unsigned i = 2; i < Factors.size(); ++i) {
3442 MachineInstrBuilder Uaddo =
3443 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3444 FactorSum = Uaddo.getReg(0);
3445 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3446 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3447 }
3448 } else {
3449 // Since value for the next index is not calculated, neither is CarrySum.
3450 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3451 for (unsigned i = 2; i < Factors.size(); ++i)
3452 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3453 }
3454
3455 CarrySumPrevDstIdx = CarrySum;
3456 DstRegs[DstIdx] = FactorSum;
3457 Factors.clear();
3458 }
3459 }
3460
3461 LegalizerHelper::LegalizeResult
narrowScalarMul(MachineInstr & MI,LLT NarrowTy)3462 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3463 Register DstReg = MI.getOperand(0).getReg();
3464 Register Src1 = MI.getOperand(1).getReg();
3465 Register Src2 = MI.getOperand(2).getReg();
3466
3467 LLT Ty = MRI.getType(DstReg);
3468 if (Ty.isVector())
3469 return UnableToLegalize;
3470
3471 unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3472 unsigned DstSize = Ty.getSizeInBits();
3473 unsigned NarrowSize = NarrowTy.getSizeInBits();
3474 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3475 return UnableToLegalize;
3476
3477 unsigned NumDstParts = DstSize / NarrowSize;
3478 unsigned NumSrcParts = SrcSize / NarrowSize;
3479 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3480 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3481
3482 SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3483 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3484 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3485 DstTmpRegs.resize(DstTmpParts);
3486 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3487
3488 // Take only high half of registers if this is high mul.
3489 ArrayRef<Register> DstRegs(
3490 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3491 MIRBuilder.buildMerge(DstReg, DstRegs);
3492 MI.eraseFromParent();
3493 return Legalized;
3494 }
3495
3496 LegalizerHelper::LegalizeResult
narrowScalarExtract(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3497 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3498 LLT NarrowTy) {
3499 if (TypeIdx != 1)
3500 return UnableToLegalize;
3501
3502 uint64_t NarrowSize = NarrowTy.getSizeInBits();
3503
3504 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3505 // FIXME: add support for when SizeOp1 isn't an exact multiple of
3506 // NarrowSize.
3507 if (SizeOp1 % NarrowSize != 0)
3508 return UnableToLegalize;
3509 int NumParts = SizeOp1 / NarrowSize;
3510
3511 SmallVector<Register, 2> SrcRegs, DstRegs;
3512 SmallVector<uint64_t, 2> Indexes;
3513 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3514
3515 Register OpReg = MI.getOperand(0).getReg();
3516 uint64_t OpStart = MI.getOperand(2).getImm();
3517 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3518 for (int i = 0; i < NumParts; ++i) {
3519 unsigned SrcStart = i * NarrowSize;
3520
3521 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3522 // No part of the extract uses this subregister, ignore it.
3523 continue;
3524 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3525 // The entire subregister is extracted, forward the value.
3526 DstRegs.push_back(SrcRegs[i]);
3527 continue;
3528 }
3529
3530 // OpSegStart is where this destination segment would start in OpReg if it
3531 // extended infinitely in both directions.
3532 int64_t ExtractOffset;
3533 uint64_t SegSize;
3534 if (OpStart < SrcStart) {
3535 ExtractOffset = 0;
3536 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3537 } else {
3538 ExtractOffset = OpStart - SrcStart;
3539 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3540 }
3541
3542 Register SegReg = SrcRegs[i];
3543 if (ExtractOffset != 0 || SegSize != NarrowSize) {
3544 // A genuine extract is needed.
3545 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3546 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3547 }
3548
3549 DstRegs.push_back(SegReg);
3550 }
3551
3552 Register DstReg = MI.getOperand(0).getReg();
3553 if(MRI.getType(DstReg).isVector())
3554 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3555 else
3556 MIRBuilder.buildMerge(DstReg, DstRegs);
3557 MI.eraseFromParent();
3558 return Legalized;
3559 }
3560
3561 LegalizerHelper::LegalizeResult
narrowScalarInsert(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3562 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3563 LLT NarrowTy) {
3564 // FIXME: Don't know how to handle secondary types yet.
3565 if (TypeIdx != 0)
3566 return UnableToLegalize;
3567
3568 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3569 uint64_t NarrowSize = NarrowTy.getSizeInBits();
3570
3571 // FIXME: add support for when SizeOp0 isn't an exact multiple of
3572 // NarrowSize.
3573 if (SizeOp0 % NarrowSize != 0)
3574 return UnableToLegalize;
3575
3576 int NumParts = SizeOp0 / NarrowSize;
3577
3578 SmallVector<Register, 2> SrcRegs, DstRegs;
3579 SmallVector<uint64_t, 2> Indexes;
3580 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3581
3582 Register OpReg = MI.getOperand(2).getReg();
3583 uint64_t OpStart = MI.getOperand(3).getImm();
3584 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3585 for (int i = 0; i < NumParts; ++i) {
3586 unsigned DstStart = i * NarrowSize;
3587
3588 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3589 // No part of the insert affects this subregister, forward the original.
3590 DstRegs.push_back(SrcRegs[i]);
3591 continue;
3592 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3593 // The entire subregister is defined by this insert, forward the new
3594 // value.
3595 DstRegs.push_back(OpReg);
3596 continue;
3597 }
3598
3599 // OpSegStart is where this destination segment would start in OpReg if it
3600 // extended infinitely in both directions.
3601 int64_t ExtractOffset, InsertOffset;
3602 uint64_t SegSize;
3603 if (OpStart < DstStart) {
3604 InsertOffset = 0;
3605 ExtractOffset = DstStart - OpStart;
3606 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3607 } else {
3608 InsertOffset = OpStart - DstStart;
3609 ExtractOffset = 0;
3610 SegSize =
3611 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3612 }
3613
3614 Register SegReg = OpReg;
3615 if (ExtractOffset != 0 || SegSize != OpSize) {
3616 // A genuine extract is needed.
3617 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3618 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3619 }
3620
3621 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3622 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3623 DstRegs.push_back(DstReg);
3624 }
3625
3626 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3627 Register DstReg = MI.getOperand(0).getReg();
3628 if(MRI.getType(DstReg).isVector())
3629 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3630 else
3631 MIRBuilder.buildMerge(DstReg, DstRegs);
3632 MI.eraseFromParent();
3633 return Legalized;
3634 }
3635
3636 LegalizerHelper::LegalizeResult
narrowScalarBasic(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3637 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3638 LLT NarrowTy) {
3639 Register DstReg = MI.getOperand(0).getReg();
3640 LLT DstTy = MRI.getType(DstReg);
3641
3642 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3643
3644 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3645 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3646 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3647 LLT LeftoverTy;
3648 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3649 Src0Regs, Src0LeftoverRegs))
3650 return UnableToLegalize;
3651
3652 LLT Unused;
3653 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3654 Src1Regs, Src1LeftoverRegs))
3655 llvm_unreachable("inconsistent extractParts result");
3656
3657 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3658 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3659 {Src0Regs[I], Src1Regs[I]});
3660 DstRegs.push_back(Inst->getOperand(0).getReg());
3661 }
3662
3663 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3664 auto Inst = MIRBuilder.buildInstr(
3665 MI.getOpcode(),
3666 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3667 DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3668 }
3669
3670 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3671 LeftoverTy, DstLeftoverRegs);
3672
3673 MI.eraseFromParent();
3674 return Legalized;
3675 }
3676
3677 LegalizerHelper::LegalizeResult
narrowScalarSelect(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3678 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3679 LLT NarrowTy) {
3680 if (TypeIdx != 0)
3681 return UnableToLegalize;
3682
3683 Register CondReg = MI.getOperand(1).getReg();
3684 LLT CondTy = MRI.getType(CondReg);
3685 if (CondTy.isVector()) // TODO: Handle vselect
3686 return UnableToLegalize;
3687
3688 Register DstReg = MI.getOperand(0).getReg();
3689 LLT DstTy = MRI.getType(DstReg);
3690
3691 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3692 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3693 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3694 LLT LeftoverTy;
3695 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3696 Src1Regs, Src1LeftoverRegs))
3697 return UnableToLegalize;
3698
3699 LLT Unused;
3700 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3701 Src2Regs, Src2LeftoverRegs))
3702 llvm_unreachable("inconsistent extractParts result");
3703
3704 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3705 auto Select = MIRBuilder.buildSelect(NarrowTy,
3706 CondReg, Src1Regs[I], Src2Regs[I]);
3707 DstRegs.push_back(Select->getOperand(0).getReg());
3708 }
3709
3710 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3711 auto Select = MIRBuilder.buildSelect(
3712 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3713 DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3714 }
3715
3716 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3717 LeftoverTy, DstLeftoverRegs);
3718
3719 MI.eraseFromParent();
3720 return Legalized;
3721 }
3722
3723 LegalizerHelper::LegalizeResult
lowerBitCount(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3724 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3725 unsigned Opc = MI.getOpcode();
3726 auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3727 auto isSupported = [this](const LegalityQuery &Q) {
3728 auto QAction = LI.getAction(Q).Action;
3729 return QAction == Legal || QAction == Libcall || QAction == Custom;
3730 };
3731 switch (Opc) {
3732 default:
3733 return UnableToLegalize;
3734 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3735 // This trivially expands to CTLZ.
3736 Observer.changingInstr(MI);
3737 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3738 Observer.changedInstr(MI);
3739 return Legalized;
3740 }
3741 case TargetOpcode::G_CTLZ: {
3742 Register SrcReg = MI.getOperand(1).getReg();
3743 unsigned Len = Ty.getSizeInBits();
3744 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3745 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3746 auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3747 {Ty}, {SrcReg});
3748 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3749 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3750 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3751 SrcReg, MIBZero);
3752 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3753 MIBCtlzZU);
3754 MI.eraseFromParent();
3755 return Legalized;
3756 }
3757 // for now, we do this:
3758 // NewLen = NextPowerOf2(Len);
3759 // x = x | (x >> 1);
3760 // x = x | (x >> 2);
3761 // ...
3762 // x = x | (x >>16);
3763 // x = x | (x >>32); // for 64-bit input
3764 // Upto NewLen/2
3765 // return Len - popcount(x);
3766 //
3767 // Ref: "Hacker's Delight" by Henry Warren
3768 Register Op = SrcReg;
3769 unsigned NewLen = PowerOf2Ceil(Len);
3770 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3771 auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3772 auto MIBOp = MIRBuilder.buildInstr(
3773 TargetOpcode::G_OR, {Ty},
3774 {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3775 {Op, MIBShiftAmt})});
3776 Op = MIBOp->getOperand(0).getReg();
3777 }
3778 auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3779 MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3780 {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3781 MI.eraseFromParent();
3782 return Legalized;
3783 }
3784 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3785 // This trivially expands to CTTZ.
3786 Observer.changingInstr(MI);
3787 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3788 Observer.changedInstr(MI);
3789 return Legalized;
3790 }
3791 case TargetOpcode::G_CTTZ: {
3792 Register SrcReg = MI.getOperand(1).getReg();
3793 unsigned Len = Ty.getSizeInBits();
3794 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3795 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3796 // zero.
3797 auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3798 {Ty}, {SrcReg});
3799 auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3800 auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3801 auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3802 SrcReg, MIBZero);
3803 MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3804 MIBCttzZU);
3805 MI.eraseFromParent();
3806 return Legalized;
3807 }
3808 // for now, we use: { return popcount(~x & (x - 1)); }
3809 // unless the target has ctlz but not ctpop, in which case we use:
3810 // { return 32 - nlz(~x & (x-1)); }
3811 // Ref: "Hacker's Delight" by Henry Warren
3812 auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3813 auto MIBNot =
3814 MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3815 auto MIBTmp = MIRBuilder.buildInstr(
3816 TargetOpcode::G_AND, {Ty},
3817 {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3818 {SrcReg, MIBCstNeg1})});
3819 if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3820 isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3821 auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3822 MIRBuilder.buildInstr(
3823 TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3824 {MIBCstLen,
3825 MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3826 MI.eraseFromParent();
3827 return Legalized;
3828 }
3829 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3830 MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3831 return Legalized;
3832 }
3833 }
3834 }
3835
3836 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3837 // representation.
3838 LegalizerHelper::LegalizeResult
lowerU64ToF32BitOps(MachineInstr & MI)3839 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3840 Register Dst = MI.getOperand(0).getReg();
3841 Register Src = MI.getOperand(1).getReg();
3842 const LLT S64 = LLT::scalar(64);
3843 const LLT S32 = LLT::scalar(32);
3844 const LLT S1 = LLT::scalar(1);
3845
3846 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3847
3848 // unsigned cul2f(ulong u) {
3849 // uint lz = clz(u);
3850 // uint e = (u != 0) ? 127U + 63U - lz : 0;
3851 // u = (u << lz) & 0x7fffffffffffffffUL;
3852 // ulong t = u & 0xffffffffffUL;
3853 // uint v = (e << 23) | (uint)(u >> 40);
3854 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3855 // return as_float(v + r);
3856 // }
3857
3858 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3859 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3860
3861 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3862
3863 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3864 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3865
3866 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3867 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3868
3869 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3870 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3871
3872 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3873
3874 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3875 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3876
3877 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3878 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3879 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3880
3881 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3882 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3883 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3884 auto One = MIRBuilder.buildConstant(S32, 1);
3885
3886 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3887 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3888 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3889 MIRBuilder.buildAdd(Dst, V, R);
3890
3891 return Legalized;
3892 }
3893
3894 LegalizerHelper::LegalizeResult
lowerUITOFP(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3895 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3896 Register Dst = MI.getOperand(0).getReg();
3897 Register Src = MI.getOperand(1).getReg();
3898 LLT DstTy = MRI.getType(Dst);
3899 LLT SrcTy = MRI.getType(Src);
3900
3901 if (SrcTy == LLT::scalar(1)) {
3902 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
3903 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
3904 MIRBuilder.buildSelect(Dst, Src, True, False);
3905 MI.eraseFromParent();
3906 return Legalized;
3907 }
3908
3909 if (SrcTy != LLT::scalar(64))
3910 return UnableToLegalize;
3911
3912 if (DstTy == LLT::scalar(32)) {
3913 // TODO: SelectionDAG has several alternative expansions to port which may
3914 // be more reasonble depending on the available instructions. If a target
3915 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3916 // intermediate type, this is probably worse.
3917 return lowerU64ToF32BitOps(MI);
3918 }
3919
3920 return UnableToLegalize;
3921 }
3922
3923 LegalizerHelper::LegalizeResult
lowerSITOFP(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3924 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3925 Register Dst = MI.getOperand(0).getReg();
3926 Register Src = MI.getOperand(1).getReg();
3927 LLT DstTy = MRI.getType(Dst);
3928 LLT SrcTy = MRI.getType(Src);
3929
3930 const LLT S64 = LLT::scalar(64);
3931 const LLT S32 = LLT::scalar(32);
3932 const LLT S1 = LLT::scalar(1);
3933
3934 if (SrcTy == S1) {
3935 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
3936 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
3937 MIRBuilder.buildSelect(Dst, Src, True, False);
3938 MI.eraseFromParent();
3939 return Legalized;
3940 }
3941
3942 if (SrcTy != S64)
3943 return UnableToLegalize;
3944
3945 if (DstTy == S32) {
3946 // signed cl2f(long l) {
3947 // long s = l >> 63;
3948 // float r = cul2f((l + s) ^ s);
3949 // return s ? -r : r;
3950 // }
3951 Register L = Src;
3952 auto SignBit = MIRBuilder.buildConstant(S64, 63);
3953 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3954
3955 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3956 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3957 auto R = MIRBuilder.buildUITOFP(S32, Xor);
3958
3959 auto RNeg = MIRBuilder.buildFNeg(S32, R);
3960 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3961 MIRBuilder.buildConstant(S64, 0));
3962 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3963 return Legalized;
3964 }
3965
3966 return UnableToLegalize;
3967 }
3968
3969 LegalizerHelper::LegalizeResult
lowerFPTOUI(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3970 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3971 Register Dst = MI.getOperand(0).getReg();
3972 Register Src = MI.getOperand(1).getReg();
3973 LLT DstTy = MRI.getType(Dst);
3974 LLT SrcTy = MRI.getType(Src);
3975 const LLT S64 = LLT::scalar(64);
3976 const LLT S32 = LLT::scalar(32);
3977
3978 if (SrcTy != S64 && SrcTy != S32)
3979 return UnableToLegalize;
3980 if (DstTy != S32 && DstTy != S64)
3981 return UnableToLegalize;
3982
3983 // FPTOSI gives same result as FPTOUI for positive signed integers.
3984 // FPTOUI needs to deal with fp values that convert to unsigned integers
3985 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
3986
3987 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
3988 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
3989 : APFloat::IEEEdouble(),
3990 APInt::getNullValue(SrcTy.getSizeInBits()));
3991 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
3992
3993 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
3994
3995 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
3996 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
3997 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
3998 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
3999 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
4000 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
4001 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
4002
4003 const LLT S1 = LLT::scalar(1);
4004
4005 MachineInstrBuilder FCMP =
4006 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
4007 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
4008
4009 MI.eraseFromParent();
4010 return Legalized;
4011 }
4012
minMaxToCompare(unsigned Opc)4013 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
4014 switch (Opc) {
4015 case TargetOpcode::G_SMIN:
4016 return CmpInst::ICMP_SLT;
4017 case TargetOpcode::G_SMAX:
4018 return CmpInst::ICMP_SGT;
4019 case TargetOpcode::G_UMIN:
4020 return CmpInst::ICMP_ULT;
4021 case TargetOpcode::G_UMAX:
4022 return CmpInst::ICMP_UGT;
4023 default:
4024 llvm_unreachable("not in integer min/max");
4025 }
4026 }
4027
4028 LegalizerHelper::LegalizeResult
lowerMinMax(MachineInstr & MI,unsigned TypeIdx,LLT Ty)4029 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4030 Register Dst = MI.getOperand(0).getReg();
4031 Register Src0 = MI.getOperand(1).getReg();
4032 Register Src1 = MI.getOperand(2).getReg();
4033
4034 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
4035 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
4036
4037 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
4038 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
4039
4040 MI.eraseFromParent();
4041 return Legalized;
4042 }
4043
4044 LegalizerHelper::LegalizeResult
lowerFCopySign(MachineInstr & MI,unsigned TypeIdx,LLT Ty)4045 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4046 Register Dst = MI.getOperand(0).getReg();
4047 Register Src0 = MI.getOperand(1).getReg();
4048 Register Src1 = MI.getOperand(2).getReg();
4049
4050 const LLT Src0Ty = MRI.getType(Src0);
4051 const LLT Src1Ty = MRI.getType(Src1);
4052
4053 const int Src0Size = Src0Ty.getScalarSizeInBits();
4054 const int Src1Size = Src1Ty.getScalarSizeInBits();
4055
4056 auto SignBitMask = MIRBuilder.buildConstant(
4057 Src0Ty, APInt::getSignMask(Src0Size));
4058
4059 auto NotSignBitMask = MIRBuilder.buildConstant(
4060 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
4061
4062 auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
4063 MachineInstr *Or;
4064
4065 if (Src0Ty == Src1Ty) {
4066 auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
4067 Or = MIRBuilder.buildOr(Dst, And0, And1);
4068 } else if (Src0Size > Src1Size) {
4069 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
4070 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
4071 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
4072 auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
4073 Or = MIRBuilder.buildOr(Dst, And0, And1);
4074 } else {
4075 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
4076 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
4077 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
4078 auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
4079 Or = MIRBuilder.buildOr(Dst, And0, And1);
4080 }
4081
4082 // Be careful about setting nsz/nnan/ninf on every instruction, since the
4083 // constants are a nan and -0.0, but the final result should preserve
4084 // everything.
4085 if (unsigned Flags = MI.getFlags())
4086 Or->setFlags(Flags);
4087
4088 MI.eraseFromParent();
4089 return Legalized;
4090 }
4091
4092 LegalizerHelper::LegalizeResult
lowerFMinNumMaxNum(MachineInstr & MI)4093 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
4094 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4095 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4096
4097 Register Dst = MI.getOperand(0).getReg();
4098 Register Src0 = MI.getOperand(1).getReg();
4099 Register Src1 = MI.getOperand(2).getReg();
4100 LLT Ty = MRI.getType(Dst);
4101
4102 if (!MI.getFlag(MachineInstr::FmNoNans)) {
4103 // Insert canonicalizes if it's possible we need to quiet to get correct
4104 // sNaN behavior.
4105
4106 // Note this must be done here, and not as an optimization combine in the
4107 // absence of a dedicate quiet-snan instruction as we're using an
4108 // omni-purpose G_FCANONICALIZE.
4109 if (!isKnownNeverSNaN(Src0, MRI))
4110 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4111
4112 if (!isKnownNeverSNaN(Src1, MRI))
4113 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4114 }
4115
4116 // If there are no nans, it's safe to simply replace this with the non-IEEE
4117 // version.
4118 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4119 MI.eraseFromParent();
4120 return Legalized;
4121 }
4122
lowerFMad(MachineInstr & MI)4123 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
4124 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4125 Register DstReg = MI.getOperand(0).getReg();
4126 LLT Ty = MRI.getType(DstReg);
4127 unsigned Flags = MI.getFlags();
4128
4129 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4130 Flags);
4131 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4132 MI.eraseFromParent();
4133 return Legalized;
4134 }
4135
4136 LegalizerHelper::LegalizeResult
lowerIntrinsicRound(MachineInstr & MI)4137 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
4138 Register DstReg = MI.getOperand(0).getReg();
4139 Register SrcReg = MI.getOperand(1).getReg();
4140 unsigned Flags = MI.getFlags();
4141 LLT Ty = MRI.getType(DstReg);
4142 const LLT CondTy = Ty.changeElementSize(1);
4143
4144 // result = trunc(src);
4145 // if (src < 0.0 && src != result)
4146 // result += -1.0.
4147
4148 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
4149 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
4150
4151 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
4152 SrcReg, Zero, Flags);
4153 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
4154 SrcReg, Trunc, Flags);
4155 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
4156 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
4157
4158 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
4159 MI.eraseFromParent();
4160 return Legalized;
4161 }
4162
4163 LegalizerHelper::LegalizeResult
lowerUnmergeValues(MachineInstr & MI)4164 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
4165 const unsigned NumDst = MI.getNumOperands() - 1;
4166 const Register SrcReg = MI.getOperand(NumDst).getReg();
4167 LLT SrcTy = MRI.getType(SrcReg);
4168
4169 Register Dst0Reg = MI.getOperand(0).getReg();
4170 LLT DstTy = MRI.getType(Dst0Reg);
4171
4172
4173 // Expand scalarizing unmerge as bitcast to integer and shift.
4174 if (!DstTy.isVector() && SrcTy.isVector() &&
4175 SrcTy.getElementType() == DstTy) {
4176 LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
4177 Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
4178
4179 MIRBuilder.buildTrunc(Dst0Reg, Cast);
4180
4181 const unsigned DstSize = DstTy.getSizeInBits();
4182 unsigned Offset = DstSize;
4183 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
4184 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
4185 auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
4186 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
4187 }
4188
4189 MI.eraseFromParent();
4190 return Legalized;
4191 }
4192
4193 return UnableToLegalize;
4194 }
4195
4196 LegalizerHelper::LegalizeResult
lowerShuffleVector(MachineInstr & MI)4197 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
4198 Register DstReg = MI.getOperand(0).getReg();
4199 Register Src0Reg = MI.getOperand(1).getReg();
4200 Register Src1Reg = MI.getOperand(2).getReg();
4201 LLT Src0Ty = MRI.getType(Src0Reg);
4202 LLT DstTy = MRI.getType(DstReg);
4203 LLT IdxTy = LLT::scalar(32);
4204
4205 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4206
4207 if (DstTy.isScalar()) {
4208 if (Src0Ty.isVector())
4209 return UnableToLegalize;
4210
4211 // This is just a SELECT.
4212 assert(Mask.size() == 1 && "Expected a single mask element");
4213 Register Val;
4214 if (Mask[0] < 0 || Mask[0] > 1)
4215 Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4216 else
4217 Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4218 MIRBuilder.buildCopy(DstReg, Val);
4219 MI.eraseFromParent();
4220 return Legalized;
4221 }
4222
4223 Register Undef;
4224 SmallVector<Register, 32> BuildVec;
4225 LLT EltTy = DstTy.getElementType();
4226
4227 for (int Idx : Mask) {
4228 if (Idx < 0) {
4229 if (!Undef.isValid())
4230 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4231 BuildVec.push_back(Undef);
4232 continue;
4233 }
4234
4235 if (Src0Ty.isScalar()) {
4236 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4237 } else {
4238 int NumElts = Src0Ty.getNumElements();
4239 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
4240 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
4241 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
4242 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
4243 BuildVec.push_back(Extract.getReg(0));
4244 }
4245 }
4246
4247 MIRBuilder.buildBuildVector(DstReg, BuildVec);
4248 MI.eraseFromParent();
4249 return Legalized;
4250 }
4251
4252 LegalizerHelper::LegalizeResult
lowerDynStackAlloc(MachineInstr & MI)4253 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
4254 Register Dst = MI.getOperand(0).getReg();
4255 Register AllocSize = MI.getOperand(1).getReg();
4256 unsigned Align = MI.getOperand(2).getImm();
4257
4258 const auto &MF = *MI.getMF();
4259 const auto &TLI = *MF.getSubtarget().getTargetLowering();
4260
4261 LLT PtrTy = MRI.getType(Dst);
4262 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
4263
4264 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
4265 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
4266 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
4267
4268 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4269 // have to generate an extra instruction to negate the alloc and then use
4270 // G_PTR_ADD to add the negative offset.
4271 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
4272 if (Align) {
4273 APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
4274 AlignMask.negate();
4275 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
4276 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
4277 }
4278
4279 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
4280 MIRBuilder.buildCopy(SPReg, SPTmp);
4281 MIRBuilder.buildCopy(Dst, SPTmp);
4282
4283 MI.eraseFromParent();
4284 return Legalized;
4285 }
4286
4287 LegalizerHelper::LegalizeResult
lowerExtract(MachineInstr & MI)4288 LegalizerHelper::lowerExtract(MachineInstr &MI) {
4289 Register Dst = MI.getOperand(0).getReg();
4290 Register Src = MI.getOperand(1).getReg();
4291 unsigned Offset = MI.getOperand(2).getImm();
4292
4293 LLT DstTy = MRI.getType(Dst);
4294 LLT SrcTy = MRI.getType(Src);
4295
4296 if (DstTy.isScalar() &&
4297 (SrcTy.isScalar() ||
4298 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
4299 LLT SrcIntTy = SrcTy;
4300 if (!SrcTy.isScalar()) {
4301 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
4302 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
4303 }
4304
4305 if (Offset == 0)
4306 MIRBuilder.buildTrunc(Dst, Src);
4307 else {
4308 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
4309 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
4310 MIRBuilder.buildTrunc(Dst, Shr);
4311 }
4312
4313 MI.eraseFromParent();
4314 return Legalized;
4315 }
4316
4317 return UnableToLegalize;
4318 }
4319
lowerInsert(MachineInstr & MI)4320 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
4321 Register Dst = MI.getOperand(0).getReg();
4322 Register Src = MI.getOperand(1).getReg();
4323 Register InsertSrc = MI.getOperand(2).getReg();
4324 uint64_t Offset = MI.getOperand(3).getImm();
4325
4326 LLT DstTy = MRI.getType(Src);
4327 LLT InsertTy = MRI.getType(InsertSrc);
4328
4329 if (InsertTy.isScalar() &&
4330 (DstTy.isScalar() ||
4331 (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
4332 LLT IntDstTy = DstTy;
4333 if (!DstTy.isScalar()) {
4334 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
4335 Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
4336 }
4337
4338 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
4339 if (Offset != 0) {
4340 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
4341 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
4342 }
4343
4344 APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
4345 InsertTy.getSizeInBits());
4346
4347 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
4348 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
4349 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
4350
4351 MIRBuilder.buildBitcast(Dst, Or);
4352 MI.eraseFromParent();
4353 return Legalized;
4354 }
4355
4356 return UnableToLegalize;
4357 }
4358
4359 LegalizerHelper::LegalizeResult
lowerSADDO_SSUBO(MachineInstr & MI)4360 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
4361 Register Dst0 = MI.getOperand(0).getReg();
4362 Register Dst1 = MI.getOperand(1).getReg();
4363 Register LHS = MI.getOperand(2).getReg();
4364 Register RHS = MI.getOperand(3).getReg();
4365 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
4366
4367 LLT Ty = MRI.getType(Dst0);
4368 LLT BoolTy = MRI.getType(Dst1);
4369
4370 if (IsAdd)
4371 MIRBuilder.buildAdd(Dst0, LHS, RHS);
4372 else
4373 MIRBuilder.buildSub(Dst0, LHS, RHS);
4374
4375 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
4376
4377 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4378
4379 // For an addition, the result should be less than one of the operands (LHS)
4380 // if and only if the other operand (RHS) is negative, otherwise there will
4381 // be overflow.
4382 // For a subtraction, the result should be less than one of the operands
4383 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
4384 // otherwise there will be overflow.
4385 auto ResultLowerThanLHS =
4386 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
4387 auto ConditionRHS = MIRBuilder.buildICmp(
4388 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
4389
4390 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
4391 MI.eraseFromParent();
4392 return Legalized;
4393 }
4394
4395 LegalizerHelper::LegalizeResult
lowerBswap(MachineInstr & MI)4396 LegalizerHelper::lowerBswap(MachineInstr &MI) {
4397 Register Dst = MI.getOperand(0).getReg();
4398 Register Src = MI.getOperand(1).getReg();
4399 const LLT Ty = MRI.getType(Src);
4400 unsigned SizeInBytes = Ty.getSizeInBytes();
4401 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
4402
4403 // Swap most and least significant byte, set remaining bytes in Res to zero.
4404 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
4405 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
4406 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4407 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
4408
4409 // Set i-th high/low byte in Res to i-th low/high byte from Src.
4410 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
4411 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
4412 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
4413 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
4414 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
4415 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
4416 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
4417 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
4418 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
4419 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
4420 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4421 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
4422 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
4423 }
4424 Res.getInstr()->getOperand(0).setReg(Dst);
4425
4426 MI.eraseFromParent();
4427 return Legalized;
4428 }
4429
4430 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
SwapN(unsigned N,DstOp Dst,MachineIRBuilder & B,MachineInstrBuilder Src,APInt Mask)4431 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
4432 MachineInstrBuilder Src, APInt Mask) {
4433 const LLT Ty = Dst.getLLTTy(*B.getMRI());
4434 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
4435 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
4436 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
4437 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
4438 return B.buildOr(Dst, LHS, RHS);
4439 }
4440
4441 LegalizerHelper::LegalizeResult
lowerBitreverse(MachineInstr & MI)4442 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
4443 Register Dst = MI.getOperand(0).getReg();
4444 Register Src = MI.getOperand(1).getReg();
4445 const LLT Ty = MRI.getType(Src);
4446 unsigned Size = Ty.getSizeInBits();
4447
4448 MachineInstrBuilder BSWAP =
4449 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
4450
4451 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
4452 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
4453 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
4454 MachineInstrBuilder Swap4 =
4455 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
4456
4457 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
4458 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
4459 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
4460 MachineInstrBuilder Swap2 =
4461 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
4462
4463 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
4464 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
4465 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
4466 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
4467
4468 MI.eraseFromParent();
4469 return Legalized;
4470 }
4471
4472 LegalizerHelper::LegalizeResult
lowerReadRegister(MachineInstr & MI)4473 LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
4474 Register Dst = MI.getOperand(0).getReg();
4475 const LLT Ty = MRI.getType(Dst);
4476 const MDString *RegStr = cast<MDString>(
4477 cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
4478
4479 MachineFunction &MF = MIRBuilder.getMF();
4480 const TargetSubtargetInfo &STI = MF.getSubtarget();
4481 const TargetLowering *TLI = STI.getTargetLowering();
4482 Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
4483 if (!Reg.isValid())
4484 return UnableToLegalize;
4485
4486 MIRBuilder.buildCopy(Dst, Reg);
4487 MI.eraseFromParent();
4488 return Legalized;
4489 }
4490