1 //
2 // The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "IceTargetLoweringMIPS32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35 return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40 return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49 ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53 return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55
getPointerType()56 ::Ice::Type getPointerType() {
57 return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59
60 } // end of namespace MIPS32
61
62 namespace Ice {
63 namespace MIPS32 {
64
65 using llvm::isInt;
66
67 namespace {
68
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81 auto ClassNum = static_cast<RegClassMIPS32>(C);
82 assert(ClassNum < RCMIPS32_NUM);
83 switch (ClassNum) {
84 default:
85 assert(C < RC_Target);
86 return regClassString(C);
87 // Add handling of new register classes below.
88 }
89 }
90
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97 size_t typeAlignInBytes = typeWidthInBytes(Ty);
98 // Vectors are stored on stack with the same alignment as that of int type
99 if (isVectorType(Ty))
100 typeAlignInBytes = typeWidthInBytes(IceType_i64);
101 return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107 return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109
110 } // end of anonymous namespace
111
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {}
113
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)114 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
115 size_t SpillAreaPaddingBytes,
116 size_t SpillAreaSizeBytes,
117 size_t GlobalsAndSubsequentPaddingSize) {
118 const VariablesMetadata *VMetadata = Func->getVMetadata();
119 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
120 size_t NextStackOffset = SpillAreaPaddingBytes;
121 CfgVector<size_t> LocalsSize(Func->getNumNodes());
122 const bool SimpleCoalescing = !callsReturnsTwice();
123 for (Variable *Var : SortedSpilledVariables) {
124 size_t Increment = typeWidthInBytesOnStack(Var->getType());
125 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
126 if (VMetadata->isMultiBlock(Var)) {
127 GlobalsSpaceUsed += Increment;
128 NextStackOffset = GlobalsSpaceUsed;
129 } else {
130 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
131 LocalsSize[NodeIndex] += Increment;
132 NextStackOffset = SpillAreaPaddingBytes +
133 GlobalsAndSubsequentPaddingSize +
134 LocalsSize[NodeIndex];
135 }
136 } else {
137 NextStackOffset += Increment;
138 }
139 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
140 }
141 }
142
staticInit(GlobalContext * Ctx)143 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
144 (void)Ctx;
145 RegNumT::setLimit(RegMIPS32::Reg_NUM);
146 SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
147 SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
148 SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
149 SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
150 SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
151 SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
152 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
153 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
154 IntegerRegisters[RegMIPS32::val] = isInt; \
155 I64PairRegisters[RegMIPS32::val] = isI64Pair; \
156 Float32Registers[RegMIPS32::val] = isFP32; \
157 Float64Registers[RegMIPS32::val] = isFP64; \
158 VectorRegisters[RegMIPS32::val] = isVec128; \
159 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
160 for (SizeT RegAlias : alias_init) { \
161 assert(!RegisterAliases[RegMIPS32::val][RegAlias] && \
162 "Duplicate alias for " #val); \
163 RegisterAliases[RegMIPS32::val].set(RegAlias); \
164 } \
165 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
166 assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
167 REGMIPS32_TABLE;
168 #undef X
169
170 // TODO(mohit.bhakkad): Change these inits once we provide argument related
171 // field in register tables
172 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
173 GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
174
175 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
176 I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
177
178 for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
179 FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
180 FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
181 }
182
183 TypeToRegisterSet[IceType_void] = InvalidRegisters;
184 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
185 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
186 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
187 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
188 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
189 TypeToRegisterSet[IceType_f32] = Float32Registers;
190 TypeToRegisterSet[IceType_f64] = Float64Registers;
191 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
192 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
193 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
194 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
195 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
196 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
197 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
198
199 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
200 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
201
202 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
203 llvm::array_lengthof(TypeToRegisterSet),
204 RegMIPS32::getRegName, getRegClassName);
205 }
206
unsetIfNonLeafFunc()207 void TargetMIPS32::unsetIfNonLeafFunc() {
208 for (CfgNode *Node : Func->getNodes()) {
209 for (Inst &Instr : Node->getInsts()) {
210 if (llvm::isa<InstCall>(&Instr)) {
211 // Unset MaybeLeafFunc if call instruction exists.
212 MaybeLeafFunc = false;
213 return;
214 }
215 }
216 }
217 }
218
getStackAlignment() const219 uint32_t TargetMIPS32::getStackAlignment() const {
220 return MIPS32_STACK_ALIGNMENT_BYTES;
221 }
222
getCallStackArgumentsSizeBytes(const InstCall * Call)223 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
224 TargetMIPS32::CallingConv CC;
225 size_t OutArgsSizeBytes = 0;
226 Variable *Dest = Call->getDest();
227 bool PartialOnStack = false;
228 if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
229 CC.discardReg(RegMIPS32::Reg_A0);
230 // Next vector is partially on stack
231 PartialOnStack = true;
232 }
233 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
234 Operand *Arg = legalizeUndef(Call->getArg(i));
235 const Type Ty = Arg->getType();
236 RegNumT RegNum;
237 if (CC.argInReg(Ty, i, &RegNum)) {
238 // If PartialOnStack is true and if this is a vector type then last two
239 // elements are on stack
240 if (PartialOnStack && isVectorType(Ty)) {
241 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
242 OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
243 }
244 continue;
245 }
246 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
247 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
248 }
249 // Add size of argument save area
250 constexpr int BytesPerStackArg = 4;
251 OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
252 return applyStackAlignment(OutArgsSizeBytes);
253 }
254
255 namespace {
getConstantMemoryOrder(Operand * Opnd)256 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
257 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
258 return Integer->getValue();
259 return Intrinsics::MemoryOrderInvalid;
260 }
261 } // namespace
262
genTargetHelperCallFor(Inst * Instr)263 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
264 constexpr bool NoTailCall = false;
265 constexpr bool IsTargetHelperCall = true;
266 Variable *Dest = Instr->getDest();
267 const Type DestTy = Dest ? Dest->getType() : IceType_void;
268
269 switch (Instr->getKind()) {
270 default:
271 return;
272 case Inst::Select: {
273 if (isVectorType(DestTy)) {
274 Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
275 Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
276 Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
277 Variable *T = Func->makeVariable(DestTy);
278 auto *Undef = ConstantUndef::create(Ctx, DestTy);
279 Context.insert<InstAssign>(T, Undef);
280 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
281 VarVecOn32->initVecElement(Func);
282 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
283 auto *Index = Ctx->getConstantInt32(I);
284 auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
285 Context.insert<InstExtractElement>(OpC, Cond, Index);
286 auto *OpT = Func->makeVariable(typeElementType(DestTy));
287 Context.insert<InstExtractElement>(OpT, SrcT, Index);
288 auto *OpF = Func->makeVariable(typeElementType(DestTy));
289 Context.insert<InstExtractElement>(OpF, SrcF, Index);
290 auto *Dst = Func->makeVariable(typeElementType(DestTy));
291 Variable *DestT = Func->makeVariable(DestTy);
292 Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
293 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
294 T = DestT;
295 }
296 Context.insert<InstAssign>(Dest, T);
297 Instr->setDeleted();
298 }
299 return;
300 }
301 case Inst::Fcmp: {
302 if (isVectorType(DestTy)) {
303 InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
304 Operand *Src0 = Instr->getSrc(0);
305 Operand *Src1 = Instr->getSrc(1);
306 Variable *T = Func->makeVariable(IceType_v4f32);
307 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
308 Context.insert<InstAssign>(T, Undef);
309 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
310 VarVecOn32->initVecElement(Func);
311 for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
312 auto *Index = Ctx->getConstantInt32(I);
313 auto *Op0 = Func->makeVariable(IceType_f32);
314 Context.insert<InstExtractElement>(Op0, Src0, Index);
315 auto *Op1 = Func->makeVariable(IceType_f32);
316 Context.insert<InstExtractElement>(Op1, Src1, Index);
317 auto *Dst = Func->makeVariable(IceType_f32);
318 Variable *DestT = Func->makeVariable(IceType_v4f32);
319 Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
320 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
321 T = DestT;
322 }
323 Context.insert<InstAssign>(Dest, T);
324 Instr->setDeleted();
325 }
326 return;
327 }
328 case Inst::Icmp: {
329 if (isVectorType(DestTy)) {
330 InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
331 Operand *Src0 = Instr->getSrc(0);
332 Operand *Src1 = Instr->getSrc(1);
333 const Type SrcType = Src0->getType();
334 Variable *T = Func->makeVariable(DestTy);
335 auto *Undef = ConstantUndef::create(Ctx, DestTy);
336 Context.insert<InstAssign>(T, Undef);
337 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
338 VarVecOn32->initVecElement(Func);
339 for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
340 auto *Index = Ctx->getConstantInt32(I);
341 auto *Op0 = Func->makeVariable(typeElementType(SrcType));
342 Context.insert<InstExtractElement>(Op0, Src0, Index);
343 auto *Op1 = Func->makeVariable(typeElementType(SrcType));
344 Context.insert<InstExtractElement>(Op1, Src1, Index);
345 auto *Dst = Func->makeVariable(typeElementType(DestTy));
346 Variable *DestT = Func->makeVariable(DestTy);
347 Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
348 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
349 T = DestT;
350 }
351 Context.insert<InstAssign>(Dest, T);
352 Instr->setDeleted();
353 }
354 return;
355 }
356 case Inst::Arithmetic: {
357 const InstArithmetic::OpKind Op =
358 llvm::cast<InstArithmetic>(Instr)->getOp();
359 if (isVectorType(DestTy)) {
360 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
361 Instr->setDeleted();
362 return;
363 }
364 switch (DestTy) {
365 default:
366 return;
367 case IceType_i64: {
368 RuntimeHelper HelperID = RuntimeHelper::H_Num;
369 switch (Op) {
370 default:
371 return;
372 case InstArithmetic::Udiv:
373 HelperID = RuntimeHelper::H_udiv_i64;
374 break;
375 case InstArithmetic::Sdiv:
376 HelperID = RuntimeHelper::H_sdiv_i64;
377 break;
378 case InstArithmetic::Urem:
379 HelperID = RuntimeHelper::H_urem_i64;
380 break;
381 case InstArithmetic::Srem:
382 HelperID = RuntimeHelper::H_srem_i64;
383 break;
384 }
385
386 if (HelperID == RuntimeHelper::H_Num) {
387 return;
388 }
389
390 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
391 constexpr SizeT MaxArgs = 2;
392 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
393 NoTailCall, IsTargetHelperCall);
394 Call->addArg(Instr->getSrc(0));
395 Call->addArg(Instr->getSrc(1));
396 Instr->setDeleted();
397 return;
398 }
399 case IceType_f32:
400 case IceType_f64: {
401 if (Op != InstArithmetic::Frem) {
402 return;
403 }
404 constexpr SizeT MaxArgs = 2;
405 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
406 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
407 : RuntimeHelper::H_frem_f64);
408 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
409 NoTailCall, IsTargetHelperCall);
410 Call->addArg(Instr->getSrc(0));
411 Call->addArg(Instr->getSrc(1));
412 Instr->setDeleted();
413 return;
414 }
415 }
416 llvm::report_fatal_error("Control flow should never have reached here.");
417 }
418 case Inst::Cast: {
419 Operand *Src0 = Instr->getSrc(0);
420 const Type SrcTy = Src0->getType();
421 auto *CastInstr = llvm::cast<InstCast>(Instr);
422 const InstCast::OpKind CastKind = CastInstr->getCastKind();
423
424 if (isVectorType(DestTy)) {
425 Variable *T = Func->makeVariable(DestTy);
426 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
427 VarVecOn32->initVecElement(Func);
428 auto *Undef = ConstantUndef::create(Ctx, DestTy);
429 Context.insert<InstAssign>(T, Undef);
430 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
431 auto *Index = Ctx->getConstantInt32(I);
432 auto *Op = Func->makeVariable(typeElementType(SrcTy));
433 Context.insert<InstExtractElement>(Op, Src0, Index);
434 auto *Dst = Func->makeVariable(typeElementType(DestTy));
435 Variable *DestT = Func->makeVariable(DestTy);
436 Context.insert<InstCast>(CastKind, Dst, Op);
437 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
438 T = DestT;
439 }
440 Context.insert<InstAssign>(Dest, T);
441 Instr->setDeleted();
442 return;
443 }
444
445 switch (CastKind) {
446 default:
447 return;
448 case InstCast::Fptosi:
449 case InstCast::Fptoui: {
450 if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
451 return;
452 }
453 const bool DestIs32 = DestTy == IceType_i32;
454 const bool DestIsSigned = CastKind == InstCast::Fptosi;
455 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
456 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
457 if (DestIsSigned) {
458 if (DestIs32) {
459 return;
460 }
461 RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
462 : RuntimeHelper::H_fptosi_f64_i64;
463 } else {
464 RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
465 : RuntimeHelper::H_fptoui_f32_i64)
466 : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
467 : RuntimeHelper::H_fptoui_f64_i64);
468 }
469 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
470 static constexpr SizeT MaxArgs = 1;
471 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
472 NoTailCall, IsTargetHelperCall);
473 Call->addArg(Src0);
474 Instr->setDeleted();
475 return;
476 }
477 case InstCast::Sitofp:
478 case InstCast::Uitofp: {
479 if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
480 return;
481 }
482 const bool SourceIs32 = SrcTy == IceType_i32;
483 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
484 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
485 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
486 if (SourceIsSigned) {
487 if (SourceIs32) {
488 return;
489 }
490 RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
491 : RuntimeHelper::H_sitofp_i64_f64;
492 } else {
493 RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
494 : RuntimeHelper::H_uitofp_i64_f32)
495 : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
496 : RuntimeHelper::H_uitofp_i64_f64);
497 }
498 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
499 static constexpr SizeT MaxArgs = 1;
500 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
501 NoTailCall, IsTargetHelperCall);
502 Call->addArg(Src0);
503 Instr->setDeleted();
504 return;
505 }
506 case InstCast::Bitcast: {
507 if (DestTy == SrcTy) {
508 return;
509 }
510 Variable *CallDest = Dest;
511 RuntimeHelper HelperID = RuntimeHelper::H_Num;
512 switch (DestTy) {
513 default:
514 return;
515 case IceType_i8:
516 assert(SrcTy == IceType_v8i1);
517 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
518 CallDest = Func->makeVariable(IceType_i32);
519 break;
520 case IceType_i16:
521 assert(SrcTy == IceType_v16i1);
522 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
523 CallDest = Func->makeVariable(IceType_i32);
524 break;
525 case IceType_v8i1: {
526 assert(SrcTy == IceType_i8);
527 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
528 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
529 // Arguments to functions are required to be at least 32 bits wide.
530 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
531 Src0 = Src0AsI32;
532 } break;
533 case IceType_v16i1: {
534 assert(SrcTy == IceType_i16);
535 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
536 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
537 // Arguments to functions are required to be at least 32 bits wide.
538 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
539 Src0 = Src0AsI32;
540 } break;
541 }
542 constexpr SizeT MaxSrcs = 1;
543 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
544 Call->addArg(Src0);
545 Context.insert(Call);
546 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
547 // call result to the appropriate type as necessary.
548 if (CallDest->getType() != DestTy)
549 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
550 Instr->setDeleted();
551 return;
552 }
553 case InstCast::Trunc: {
554 if (DestTy == SrcTy) {
555 return;
556 }
557 if (!isVectorType(SrcTy)) {
558 return;
559 }
560 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
561 assert(typeElementType(DestTy) == IceType_i1);
562 assert(isVectorIntegerType(SrcTy));
563 return;
564 }
565 case InstCast::Sext:
566 case InstCast::Zext: {
567 if (DestTy == SrcTy) {
568 return;
569 }
570 if (!isVectorType(DestTy)) {
571 return;
572 }
573 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
574 assert(typeElementType(SrcTy) == IceType_i1);
575 assert(isVectorIntegerType(DestTy));
576 return;
577 }
578 }
579 llvm::report_fatal_error("Control flow should never have reached here.");
580 }
581 case Inst::Intrinsic: {
582 auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
583 Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
584 if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
585 Operand *Src0 = Intrinsic->getArg(0);
586 Intrinsics::IntrinsicInfo Info = Intrinsic->getIntrinsicInfo();
587
588 Variable *T = Func->makeVariable(IceType_v4f32);
589 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
590 Context.insert<InstAssign>(T, Undef);
591 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
592 VarVecOn32->initVecElement(Func);
593
594 for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
595 auto *Index = Ctx->getConstantInt32(i);
596 auto *Op = Func->makeVariable(IceType_f32);
597 Context.insert<InstExtractElement>(Op, Src0, Index);
598 auto *Res = Func->makeVariable(IceType_f32);
599 Variable *DestT = Func->makeVariable(IceType_v4f32);
600 auto *Intrinsic = Context.insert<InstIntrinsic>(1, Res, Info);
601 Intrinsic->addArg(Op);
602 Context.insert<InstInsertElement>(DestT, T, Res, Index);
603 T = DestT;
604 }
605
606 Context.insert<InstAssign>(Dest, T);
607
608 Instr->setDeleted();
609 return;
610 }
611 switch (ID) {
612 default:
613 return;
614 case Intrinsics::AtomicLoad: {
615 if (DestTy != IceType_i64)
616 return;
617 if (!Intrinsics::isMemoryOrderValid(
618 ID, getConstantMemoryOrder(Intrinsic->getArg(1)))) {
619 Func->setError("Unexpected memory ordering for AtomicLoad");
620 return;
621 }
622 Operand *Addr = Intrinsic->getArg(0);
623 Operand *TargetHelper = Ctx->getConstantExternSym(
624 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
625 static constexpr SizeT MaxArgs = 3;
626 auto *_0 = Ctx->getConstantZero(IceType_i64);
627 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
628 NoTailCall, IsTargetHelperCall);
629 Call->addArg(Addr);
630 Call->addArg(_0);
631 Call->addArg(_0);
632 Context.insert<InstMIPS32Sync>();
633 Instr->setDeleted();
634 return;
635 }
636 case Intrinsics::AtomicStore: {
637 Operand *Val = Intrinsic->getArg(0);
638 if (Val->getType() != IceType_i64)
639 return;
640 if (!Intrinsics::isMemoryOrderValid(
641 ID, getConstantMemoryOrder(Intrinsic->getArg(2)))) {
642 Func->setError("Unexpected memory ordering for AtomicStore");
643 return;
644 }
645 Operand *Addr = Intrinsic->getArg(1);
646 Variable *NoDest = nullptr;
647 Operand *TargetHelper = Ctx->getConstantExternSym(
648 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
649 Context.insert<InstMIPS32Sync>();
650 static constexpr SizeT MaxArgs = 2;
651 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
652 NoTailCall, IsTargetHelperCall);
653 Call->addArg(Addr);
654 Call->addArg(Val);
655 Context.insert<InstMIPS32Sync>();
656 Instr->setDeleted();
657 return;
658 }
659 case Intrinsics::AtomicCmpxchg: {
660 if (DestTy != IceType_i64)
661 return;
662 if (!Intrinsics::isMemoryOrderValid(
663 ID, getConstantMemoryOrder(Intrinsic->getArg(3)),
664 getConstantMemoryOrder(Intrinsic->getArg(4)))) {
665 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
666 return;
667 }
668 Operand *Addr = Intrinsic->getArg(0);
669 Operand *Oldval = Intrinsic->getArg(1);
670 Operand *Newval = Intrinsic->getArg(2);
671 Operand *TargetHelper = Ctx->getConstantExternSym(
672 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
673 Context.insert<InstMIPS32Sync>();
674 static constexpr SizeT MaxArgs = 3;
675 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
676 NoTailCall, IsTargetHelperCall);
677 Call->addArg(Addr);
678 Call->addArg(Oldval);
679 Call->addArg(Newval);
680 Context.insert<InstMIPS32Sync>();
681 Instr->setDeleted();
682 return;
683 }
684 case Intrinsics::AtomicRMW: {
685 if (DestTy != IceType_i64)
686 return;
687 if (!Intrinsics::isMemoryOrderValid(
688 ID, getConstantMemoryOrder(Intrinsic->getArg(3)))) {
689 Func->setError("Unexpected memory ordering for AtomicRMW");
690 return;
691 }
692 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
693 llvm::cast<ConstantInteger32>(Intrinsic->getArg(0))->getValue());
694 auto *Addr = Intrinsic->getArg(1);
695 auto *Newval = Intrinsic->getArg(2);
696 Operand *TargetHelper;
697 switch (Operation) {
698 case Intrinsics::AtomicAdd:
699 TargetHelper = Ctx->getConstantExternSym(
700 Ctx->getGlobalString("__sync_fetch_and_add_8"));
701 break;
702 case Intrinsics::AtomicSub:
703 TargetHelper = Ctx->getConstantExternSym(
704 Ctx->getGlobalString("__sync_fetch_and_sub_8"));
705 break;
706 case Intrinsics::AtomicOr:
707 TargetHelper = Ctx->getConstantExternSym(
708 Ctx->getGlobalString("__sync_fetch_and_or_8"));
709 break;
710 case Intrinsics::AtomicAnd:
711 TargetHelper = Ctx->getConstantExternSym(
712 Ctx->getGlobalString("__sync_fetch_and_and_8"));
713 break;
714 case Intrinsics::AtomicXor:
715 TargetHelper = Ctx->getConstantExternSym(
716 Ctx->getGlobalString("__sync_fetch_and_xor_8"));
717 break;
718 case Intrinsics::AtomicExchange:
719 TargetHelper = Ctx->getConstantExternSym(
720 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
721 break;
722 default:
723 llvm::report_fatal_error("Unknown AtomicRMW operation");
724 return;
725 }
726 Context.insert<InstMIPS32Sync>();
727 static constexpr SizeT MaxArgs = 2;
728 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
729 NoTailCall, IsTargetHelperCall);
730 Call->addArg(Addr);
731 Call->addArg(Newval);
732 Context.insert<InstMIPS32Sync>();
733 Instr->setDeleted();
734 return;
735 }
736 case Intrinsics::Ctpop: {
737 Operand *Src0 = Intrinsic->getArg(0);
738 Operand *TargetHelper =
739 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
740 ? RuntimeHelper::H_call_ctpop_i32
741 : RuntimeHelper::H_call_ctpop_i64);
742 static constexpr SizeT MaxArgs = 1;
743 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
744 NoTailCall, IsTargetHelperCall);
745 Call->addArg(Src0);
746 Instr->setDeleted();
747 return;
748 }
749 case Intrinsics::Longjmp: {
750 static constexpr SizeT MaxArgs = 2;
751 static constexpr Variable *NoDest = nullptr;
752 Operand *TargetHelper =
753 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
754 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
755 NoTailCall, IsTargetHelperCall);
756 Call->addArg(Intrinsic->getArg(0));
757 Call->addArg(Intrinsic->getArg(1));
758 Instr->setDeleted();
759 return;
760 }
761 case Intrinsics::Memcpy: {
762 static constexpr SizeT MaxArgs = 3;
763 static constexpr Variable *NoDest = nullptr;
764 Operand *TargetHelper =
765 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
766 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
767 NoTailCall, IsTargetHelperCall);
768 Call->addArg(Intrinsic->getArg(0));
769 Call->addArg(Intrinsic->getArg(1));
770 Call->addArg(Intrinsic->getArg(2));
771 Instr->setDeleted();
772 return;
773 }
774 case Intrinsics::Memmove: {
775 static constexpr SizeT MaxArgs = 3;
776 static constexpr Variable *NoDest = nullptr;
777 Operand *TargetHelper =
778 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
779 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
780 NoTailCall, IsTargetHelperCall);
781 Call->addArg(Intrinsic->getArg(0));
782 Call->addArg(Intrinsic->getArg(1));
783 Call->addArg(Intrinsic->getArg(2));
784 Instr->setDeleted();
785 return;
786 }
787 case Intrinsics::Memset: {
788 Operand *ValOp = Intrinsic->getArg(1);
789 assert(ValOp->getType() == IceType_i8);
790 Variable *ValExt = Func->makeVariable(stackSlotType());
791 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
792
793 static constexpr SizeT MaxArgs = 3;
794 static constexpr Variable *NoDest = nullptr;
795 Operand *TargetHelper =
796 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
797 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
798 NoTailCall, IsTargetHelperCall);
799 Call->addArg(Intrinsic->getArg(0));
800 Call->addArg(ValExt);
801 Call->addArg(Intrinsic->getArg(2));
802 Instr->setDeleted();
803 return;
804 }
805 case Intrinsics::Setjmp: {
806 static constexpr SizeT MaxArgs = 1;
807 Operand *TargetHelper =
808 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
809 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
810 NoTailCall, IsTargetHelperCall);
811 Call->addArg(Intrinsic->getArg(0));
812 Instr->setDeleted();
813 return;
814 }
815 }
816 llvm::report_fatal_error("Control flow should never have reached here.");
817 }
818 }
819 }
820
findMaxStackOutArgsSize()821 void TargetMIPS32::findMaxStackOutArgsSize() {
822 // MinNeededOutArgsBytes should be updated if the Target ever creates a
823 // high-level InstCall that requires more stack bytes.
824 size_t MinNeededOutArgsBytes = 0;
825 if (!MaybeLeafFunc)
826 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
827 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
828 for (CfgNode *Node : Func->getNodes()) {
829 Context.init(Node);
830 while (!Context.atEnd()) {
831 PostIncrLoweringContext PostIncrement(Context);
832 Inst *CurInstr = iteratorToInst(Context.getCur());
833 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
834 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
835 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
836 }
837 }
838 }
839 CurrentAllocaOffset = MaxOutArgsSizeBytes;
840 }
841
translateO2()842 void TargetMIPS32::translateO2() {
843 TimerMarker T(TimerStack::TT_O2, Func);
844
845 // TODO(stichnot): share passes with X86?
846 // https://code.google.com/p/nativeclient/issues/detail?id=4094
847 genTargetHelperCalls();
848
849 unsetIfNonLeafFunc();
850
851 findMaxStackOutArgsSize();
852
853 // Merge Alloca instructions, and lay out the stack.
854 static constexpr bool SortAndCombineAllocas = true;
855 Func->processAllocas(SortAndCombineAllocas);
856 Func->dump("After Alloca processing");
857
858 if (!getFlags().getEnablePhiEdgeSplit()) {
859 // Lower Phi instructions.
860 Func->placePhiLoads();
861 if (Func->hasError())
862 return;
863 Func->placePhiStores();
864 if (Func->hasError())
865 return;
866 Func->deletePhis();
867 if (Func->hasError())
868 return;
869 Func->dump("After Phi lowering");
870 }
871
872 // Address mode optimization.
873 Func->getVMetadata()->init(VMK_SingleDefs);
874 Func->doAddressOpt();
875
876 // Argument lowering
877 Func->doArgLowering();
878
879 // Target lowering. This requires liveness analysis for some parts of the
880 // lowering decisions, such as compare/branch fusing. If non-lightweight
881 // liveness analysis is used, the instructions need to be renumbered first.
882 // TODO: This renumbering should only be necessary if we're actually
883 // calculating live intervals, which we only do for register allocation.
884 Func->renumberInstructions();
885 if (Func->hasError())
886 return;
887
888 // TODO: It should be sufficient to use the fastest liveness calculation,
889 // i.e. livenessLightweight(). However, for some reason that slows down the
890 // rest of the translation. Investigate.
891 Func->liveness(Liveness_Basic);
892 if (Func->hasError())
893 return;
894 Func->dump("After MIPS32 address mode opt");
895
896 Func->genCode();
897 if (Func->hasError())
898 return;
899 Func->dump("After MIPS32 codegen");
900
901 // Register allocation. This requires instruction renumbering and full
902 // liveness analysis.
903 Func->renumberInstructions();
904 if (Func->hasError())
905 return;
906 Func->liveness(Liveness_Intervals);
907 if (Func->hasError())
908 return;
909 // The post-codegen dump is done here, after liveness analysis and associated
910 // cleanup, to make the dump cleaner and more useful.
911 Func->dump("After initial MIPS32 codegen");
912 // Validate the live range computations. The expensive validation call is
913 // deliberately only made when assertions are enabled.
914 assert(Func->validateLiveness());
915 Func->getVMetadata()->init(VMK_All);
916 regAlloc(RAK_Global);
917 if (Func->hasError())
918 return;
919 Func->dump("After linear scan regalloc");
920
921 if (getFlags().getEnablePhiEdgeSplit()) {
922 Func->advancedPhiLowering();
923 Func->dump("After advanced Phi lowering");
924 }
925
926 // Stack frame mapping.
927 Func->genFrame();
928 if (Func->hasError())
929 return;
930 Func->dump("After stack frame mapping");
931
932 postLowerLegalization();
933 if (Func->hasError())
934 return;
935 Func->dump("After postLowerLegalization");
936
937 Func->contractEmptyNodes();
938 Func->reorderNodes();
939
940 // Branch optimization. This needs to be done just before code emission. In
941 // particular, no transformations that insert or reorder CfgNodes should be
942 // done after branch optimization. We go ahead and do it before nop insertion
943 // to reduce the amount of work needed for searching for opportunities.
944 Func->doBranchOpt();
945 Func->dump("After branch optimization");
946 }
947
translateOm1()948 void TargetMIPS32::translateOm1() {
949 TimerMarker T(TimerStack::TT_Om1, Func);
950
951 // TODO: share passes with X86?
952 genTargetHelperCalls();
953
954 unsetIfNonLeafFunc();
955
956 findMaxStackOutArgsSize();
957
958 // Do not merge Alloca instructions, and lay out the stack.
959 static constexpr bool SortAndCombineAllocas = false;
960 Func->processAllocas(SortAndCombineAllocas);
961 Func->dump("After Alloca processing");
962
963 Func->placePhiLoads();
964 if (Func->hasError())
965 return;
966 Func->placePhiStores();
967 if (Func->hasError())
968 return;
969 Func->deletePhis();
970 if (Func->hasError())
971 return;
972 Func->dump("After Phi lowering");
973
974 Func->doArgLowering();
975
976 Func->genCode();
977 if (Func->hasError())
978 return;
979 Func->dump("After initial MIPS32 codegen");
980
981 regAlloc(RAK_InfOnly);
982 if (Func->hasError())
983 return;
984 Func->dump("After regalloc of infinite-weight variables");
985
986 Func->genFrame();
987 if (Func->hasError())
988 return;
989 Func->dump("After stack frame mapping");
990
991 postLowerLegalization();
992 if (Func->hasError())
993 return;
994 Func->dump("After postLowerLegalization");
995 }
996
doBranchOpt(Inst * Instr,const CfgNode * NextNode)997 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
998 if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
999 return Br->optimizeBranch(NextNode);
1000 }
1001 return false;
1002 }
1003
1004 namespace {
1005
1006 const char *RegNames[RegMIPS32::Reg_NUM] = {
1007 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
1008 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1009 name,
1010 REGMIPS32_TABLE
1011 #undef X
1012 };
1013
1014 } // end of anonymous namespace
1015
getRegName(RegNumT RegNum)1016 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1017 RegNum.assertIsValid();
1018 return RegNames[RegNum];
1019 }
1020
getRegName(RegNumT RegNum,Type Ty) const1021 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1022 (void)Ty;
1023 return RegMIPS32::getRegName(RegNum);
1024 }
1025
getPhysicalRegister(RegNumT RegNum,Type Ty)1026 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1027 if (Ty == IceType_void)
1028 Ty = IceType_i32;
1029 if (PhysicalRegisters[Ty].empty())
1030 PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1031 RegNum.assertIsValid();
1032 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1033 if (Reg == nullptr) {
1034 Reg = Func->makeVariable(Ty);
1035 Reg->setRegNum(RegNum);
1036 PhysicalRegisters[Ty][RegNum] = Reg;
1037 // Specially mark a named physical register as an "argument" so that it is
1038 // considered live upon function entry. Otherwise it's possible to get
1039 // liveness validation errors for saving callee-save registers.
1040 Func->addImplicitArg(Reg);
1041 // Don't bother tracking the live range of a named physical register.
1042 Reg->setIgnoreLiveness();
1043 }
1044 return Reg;
1045 }
1046
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1047 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1048 const InstJumpTable *JumpTable) const {
1049 (void)Func;
1050 (void)JumpTable;
1051 UnimplementedError(getFlags());
1052 }
1053
1054 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1055 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1056 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1057 }
1058
1059 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1060 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1061 (void)RegNum;
1062 Type Ty = From->getType();
1063 if (llvm::isa<ConstantUndef>(From)) {
1064 // Lower undefs to zero. Another option is to lower undefs to an
1065 // uninitialized register; however, using an uninitialized register
1066 // results in less predictable code.
1067 //
1068 // If in the future the implementation is changed to lower undef
1069 // values to uninitialized registers, a FakeDef will be needed:
1070 // Context.insert(InstFakeDef::create(Func, Reg));
1071 // This is in order to ensure that the live range of Reg is not
1072 // overestimated. If the constant being lowered is a 64 bit value,
1073 // then the result should be split and the lo and hi components will
1074 // need to go in uninitialized registers.
1075 if (isVectorType(Ty)) {
1076 Variable *Var = makeReg(Ty, RegNum);
1077 auto *Reg = llvm::cast<VariableVecOn32>(Var);
1078 Reg->initVecElement(Func);
1079 auto *Zero = getZero();
1080 for (Variable *Var : Reg->getContainers()) {
1081 _mov(Var, Zero);
1082 }
1083 return Reg;
1084 }
1085 return Ctx->getConstantZero(Ty);
1086 }
1087 return From;
1088 }
1089
makeReg(Type Type,RegNumT RegNum)1090 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1091 // There aren't any 64-bit integer registers for Mips32.
1092 assert(Type != IceType_i64);
1093 Variable *Reg = Func->makeVariable(Type);
1094 if (RegNum.hasValue())
1095 Reg->setRegNum(RegNum);
1096 else
1097 Reg->setMustHaveReg();
1098 return Reg;
1099 }
1100
formMemoryOperand(Operand * Operand,Type Ty)1101 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1102 // It may be the case that address mode optimization already creates an
1103 // OperandMIPS32Mem, so in that case it wouldn't need another level of
1104 // transformation.
1105 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1106 return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1107 }
1108
1109 // If we didn't do address mode optimization, then we only have a base/offset
1110 // to work with. MIPS always requires a base register, so just use that to
1111 // hold the operand.
1112 auto *Base = llvm::cast<Variable>(
1113 legalize(Operand, Legal_Reg | Legal_Rematerializable));
1114 const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1115 return OperandMIPS32Mem::create(
1116 Func, Ty, Base,
1117 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1118 }
1119
emitVariable(const Variable * Var) const1120 void TargetMIPS32::emitVariable(const Variable *Var) const {
1121 if (!BuildDefs::dump())
1122 return;
1123 Ostream &Str = Ctx->getStrEmit();
1124 const Type FrameSPTy = IceType_i32;
1125 if (Var->hasReg()) {
1126 Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1127 return;
1128 }
1129 if (Var->mustHaveReg()) {
1130 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1131 ") has no register assigned - function " +
1132 Func->getFunctionName());
1133 }
1134 const int32_t Offset = Var->getStackOffset();
1135 Str << Offset;
1136 Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1137 Str << ")";
1138 }
1139
CallingConv()1140 TargetMIPS32::CallingConv::CallingConv()
1141 : GPRegsUsed(RegMIPS32::Reg_NUM),
1142 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1143 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1144 VFPRegsUsed(RegMIPS32::Reg_NUM),
1145 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1146 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1147
1148 // In MIPS O32 abi FP argument registers can be used only if first argument is
1149 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1150 // registers can be used only for first 2 arguments, so we require argument
1151 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1152 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1153 RegNumT *Reg) {
1154 if (isScalarIntegerType(Ty) || isVectorType(Ty))
1155 return argInGPR(Ty, Reg);
1156 if (isScalarFloatingType(Ty)) {
1157 if (ArgNo == 0) {
1158 UseFPRegs = true;
1159 return argInVFP(Ty, Reg);
1160 }
1161 if (UseFPRegs && ArgNo == 1) {
1162 UseFPRegs = false;
1163 return argInVFP(Ty, Reg);
1164 }
1165 return argInGPR(Ty, Reg);
1166 }
1167 llvm::report_fatal_error("argInReg: Invalid type.");
1168 return false;
1169 }
1170
argInGPR(Type Ty,RegNumT * Reg)1171 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1172 CfgVector<RegNumT> *Source;
1173
1174 switch (Ty) {
1175 default: {
1176 llvm::report_fatal_error("argInGPR: Invalid type.");
1177 return false;
1178 } break;
1179 case IceType_v4i1:
1180 case IceType_v8i1:
1181 case IceType_v16i1:
1182 case IceType_v16i8:
1183 case IceType_v8i16:
1184 case IceType_v4i32:
1185 case IceType_v4f32:
1186 case IceType_i32:
1187 case IceType_f32: {
1188 Source = &GPRArgs;
1189 } break;
1190 case IceType_i64:
1191 case IceType_f64: {
1192 Source = &I64Args;
1193 } break;
1194 }
1195
1196 discardUnavailableGPRsAndTheirAliases(Source);
1197
1198 // If $4 is used for any scalar type (or returining v4f32) then the next
1199 // vector type if passed in $6:$7:stack:stack
1200 if (isVectorType(Ty)) {
1201 alignGPR(Source);
1202 }
1203
1204 if (Source->empty()) {
1205 GPRegsUsed.set();
1206 return false;
1207 }
1208
1209 *Reg = Source->back();
1210 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1211 // we mark all of Reg's aliases as Used. So, for the next argument,
1212 // Source->back() is marked as unavailable, and it is thus implicitly popped
1213 // from the stack.
1214 GPRegsUsed |= RegisterAliases[*Reg];
1215
1216 // All vector arguments irrespective of their base type are passed in GP
1217 // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1218 // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1219 // $4:$5:$6:$7 otherwise discard $6:$7 only.
1220 if (isVectorType(Ty)) {
1221 if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1222 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1223 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1224 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1225 } else {
1226 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1227 }
1228 }
1229
1230 return true;
1231 }
1232
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1233 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1234 CfgVector<RegNumT> *Regs) {
1235 GPRegsUsed |= RegisterAliases[Regs->back()];
1236 Regs->pop_back();
1237 }
1238
alignGPR(CfgVector<RegNumT> * Regs)1239 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1240 if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1241 discardNextGPRAndItsAliases(Regs);
1242 }
1243
1244 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1245 // i32) will have the first argument in a0, the second in a2-a3, and the third
1246 // on the stack. To model this behavior, whenever we pop a register from Regs,
1247 // we remove all of its aliases from the pool of available GPRs. This has the
1248 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1249 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1250 CfgVector<RegNumT> *Regs) {
1251 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1252 discardNextGPRAndItsAliases(Regs);
1253 }
1254 }
1255
argInVFP(Type Ty,RegNumT * Reg)1256 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1257 CfgVector<RegNumT> *Source;
1258
1259 switch (Ty) {
1260 default: {
1261 llvm::report_fatal_error("argInVFP: Invalid type.");
1262 return false;
1263 } break;
1264 case IceType_f32: {
1265 Source = &FP32Args;
1266 } break;
1267 case IceType_f64: {
1268 Source = &FP64Args;
1269 } break;
1270 }
1271
1272 discardUnavailableVFPRegsAndTheirAliases(Source);
1273
1274 if (Source->empty()) {
1275 VFPRegsUsed.set();
1276 return false;
1277 }
1278
1279 *Reg = Source->back();
1280 VFPRegsUsed |= RegisterAliases[*Reg];
1281
1282 // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1283 // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1284 // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1285 // in reg_a3 and a0, a1 are not used.
1286 Source = &GPRArgs;
1287 // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1288 if (Ty == IceType_f64) {
1289 // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1290 // must be aligned at even register. Similarly when we discard GPR registers
1291 // when some arguments from starting 16 bytes goes in FPR, we must take care
1292 // of alignment. For example if fun args are (f32, f64, f32), for first f32
1293 // we discard a0, now for f64 argument, which will go in F14F15, we must
1294 // first align GPR vector to even register by discarding a1, then discard
1295 // two GPRs a2 and a3. Now last f32 argument will go on stack.
1296 alignGPR(Source);
1297 discardNextGPRAndItsAliases(Source);
1298 }
1299 discardNextGPRAndItsAliases(Source);
1300 return true;
1301 }
1302
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1303 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1304 CfgVector<RegNumT> *Regs) {
1305 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1306 Regs->pop_back();
1307 }
1308 }
1309
lowerArguments()1310 void TargetMIPS32::lowerArguments() {
1311 VarList &Args = Func->getArgs();
1312 TargetMIPS32::CallingConv CC;
1313
1314 // For each register argument, replace Arg in the argument list with the home
1315 // register. Then generate an instruction in the prolog to copy the home
1316 // register to the assigned location of Arg.
1317 Context.init(Func->getEntryNode());
1318 Context.setInsertPoint(Context.getCur());
1319
1320 // v4f32 is returned through stack. $4 is setup by the caller and passed as
1321 // first argument implicitly. Callee then copies the return vector at $4.
1322 Variable *ImplicitRetVec = nullptr;
1323 if (isVectorFloatingType(Func->getReturnType())) {
1324 ImplicitRetVec = Func->makeVariable(IceType_i32);
1325 ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1326 ImplicitRetVec->setIsArg();
1327 Args.insert(Args.begin(), ImplicitRetVec);
1328 setImplicitRet(ImplicitRetVec);
1329 }
1330
1331 for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1332 Variable *Arg = Args[i];
1333 Type Ty = Arg->getType();
1334 RegNumT RegNum;
1335 if (!CC.argInReg(Ty, i, &RegNum)) {
1336 continue;
1337 }
1338 Variable *RegisterArg = Func->makeVariable(Ty);
1339 if (BuildDefs::dump()) {
1340 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1341 }
1342 RegisterArg->setIsArg();
1343 Arg->setIsArg(false);
1344 Args[i] = RegisterArg;
1345
1346 if (isVectorType(Ty)) {
1347 auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1348 RegisterArgVec->initVecElement(Func);
1349 RegisterArgVec->getContainers()[0]->setRegNum(
1350 RegNumT::fixme((unsigned)RegNum + 0));
1351 RegisterArgVec->getContainers()[1]->setRegNum(
1352 RegNumT::fixme((unsigned)RegNum + 1));
1353 // First two elements of second vector argument are passed
1354 // in $6:$7 and remaining two on stack. Do not assign register
1355 // to this is second vector argument.
1356 if (i == 0) {
1357 RegisterArgVec->getContainers()[2]->setRegNum(
1358 RegNumT::fixme((unsigned)RegNum + 2));
1359 RegisterArgVec->getContainers()[3]->setRegNum(
1360 RegNumT::fixme((unsigned)RegNum + 3));
1361 } else {
1362 RegisterArgVec->getContainers()[2]->setRegNum(
1363 RegNumT::fixme(RegNumT()));
1364 RegisterArgVec->getContainers()[3]->setRegNum(
1365 RegNumT::fixme(RegNumT()));
1366 }
1367 } else {
1368 switch (Ty) {
1369 default: {
1370 RegisterArg->setRegNum(RegNum);
1371 } break;
1372 case IceType_i64: {
1373 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1374 RegisterArg64->initHiLo(Func);
1375 RegisterArg64->getLo()->setRegNum(
1376 RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1377 RegisterArg64->getHi()->setRegNum(
1378 RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1379 } break;
1380 }
1381 }
1382 Context.insert<InstAssign>(Arg, RegisterArg);
1383 }
1384
1385 // Insert fake use of ImplicitRet_v4f32 to keep it live
1386 if (ImplicitRetVec) {
1387 for (CfgNode *Node : Func->getNodes()) {
1388 for (Inst &Instr : Node->getInsts()) {
1389 if (llvm::isa<InstRet>(&Instr)) {
1390 Context.setInsertPoint(instToIterator(&Instr));
1391 Context.insert<InstFakeUse>(ImplicitRetVec);
1392 break;
1393 }
1394 }
1395 }
1396 }
1397 }
1398
stackSlotType()1399 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1400
1401 // Helper function for addProlog().
1402 //
1403 // This assumes Arg is an argument passed on the stack. This sets the frame
1404 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1405 // I64 arg that has been split into Lo and Hi components, it calls itself
1406 // recursively on the components, taking care to handle Lo first because of the
1407 // little-endian architecture. Lastly, this function generates an instruction
1408 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1409 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1410 Variable *FramePtr,
1411 size_t BasicFrameOffset,
1412 size_t *InArgsSizeBytes) {
1413 const Type Ty = Arg->getType();
1414 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1415
1416 // If $4 is used for any scalar type (or returining v4f32) then the next
1417 // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1418 // from agument stack.
1419 if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1420 if (PartialOnStack == false) {
1421 auto *Elem0 = ArgVecOn32->getContainers()[0];
1422 auto *Elem1 = ArgVecOn32->getContainers()[1];
1423 finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1424 InArgsSizeBytes);
1425 finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1426 InArgsSizeBytes);
1427 }
1428 auto *Elem2 = ArgVecOn32->getContainers()[2];
1429 auto *Elem3 = ArgVecOn32->getContainers()[3];
1430 finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1431 InArgsSizeBytes);
1432 finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1433 InArgsSizeBytes);
1434 return;
1435 }
1436
1437 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1438 Variable *const Lo = Arg64On32->getLo();
1439 Variable *const Hi = Arg64On32->getHi();
1440 finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1441 InArgsSizeBytes);
1442 finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1443 InArgsSizeBytes);
1444 return;
1445 }
1446
1447 assert(Ty != IceType_i64);
1448 assert(!isVectorType(Ty));
1449
1450 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1451 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1452
1453 if (!Arg->hasReg()) {
1454 Arg->setStackOffset(ArgStackOffset);
1455 return;
1456 }
1457
1458 // If the argument variable has been assigned a register, we need to copy the
1459 // value from the stack slot.
1460 Variable *Parameter = Func->makeVariable(Ty);
1461 Parameter->setMustNotHaveReg();
1462 Parameter->setStackOffset(ArgStackOffset);
1463 _mov(Arg, Parameter);
1464 }
1465
addProlog(CfgNode * Node)1466 void TargetMIPS32::addProlog(CfgNode *Node) {
1467 // Stack frame layout:
1468 //
1469 // +------------------------+
1470 // | 1. preserved registers |
1471 // +------------------------+
1472 // | 2. padding |
1473 // +------------------------+
1474 // | 3. global spill area |
1475 // +------------------------+
1476 // | 4. padding |
1477 // +------------------------+
1478 // | 5. local spill area |
1479 // +------------------------+
1480 // | 6. padding |
1481 // +------------------------+
1482 // | 7. allocas |
1483 // +------------------------+
1484 // | 8. padding |
1485 // +------------------------+
1486 // | 9. out args |
1487 // +------------------------+ <--- StackPointer
1488 //
1489 // The following variables record the size in bytes of the given areas:
1490 // * PreservedRegsSizeBytes: area 1
1491 // * SpillAreaPaddingBytes: area 2
1492 // * GlobalsSize: area 3
1493 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1494 // * LocalsSpillAreaSize: area 5
1495 // * SpillAreaSizeBytes: areas 2 - 9
1496 // * maxOutArgsSizeBytes(): area 9
1497
1498 Context.init(Node);
1499 Context.setInsertPoint(Context.getCur());
1500
1501 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1502 RegsUsed = SmallBitVector(CalleeSaves.size());
1503
1504 VarList SortedSpilledVariables;
1505
1506 size_t GlobalsSize = 0;
1507 // If there is a separate locals area, this represents that area. Otherwise
1508 // it counts any variable not counted by GlobalsSize.
1509 SpillAreaSizeBytes = 0;
1510 // If there is a separate locals area, this specifies the alignment for it.
1511 uint32_t LocalsSlotsAlignmentBytes = 0;
1512 // The entire spill locations area gets aligned to largest natural alignment
1513 // of the variables that have a spill slot.
1514 uint32_t SpillAreaAlignmentBytes = 0;
1515 // For now, we don't have target-specific variables that need special
1516 // treatment (no stack-slot-linked SpillVariable type).
1517 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1518 static constexpr bool AssignStackSlot = false;
1519 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1520 if (llvm::isa<Variable64On32>(Var)) {
1521 return DontAssignStackSlot;
1522 }
1523 return AssignStackSlot;
1524 };
1525
1526 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1527 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1528 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1529 &LocalsSlotsAlignmentBytes, TargetVarHook);
1530 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1531 SpillAreaSizeBytes += GlobalsSize;
1532
1533 PreservedGPRs.reserve(CalleeSaves.size());
1534
1535 // Consider FP and RA as callee-save / used as needed.
1536 if (UsesFramePointer) {
1537 if (RegsUsed[RegMIPS32::Reg_FP]) {
1538 llvm::report_fatal_error("Frame pointer has been used.");
1539 }
1540 CalleeSaves[RegMIPS32::Reg_FP] = true;
1541 RegsUsed[RegMIPS32::Reg_FP] = true;
1542 }
1543 if (!MaybeLeafFunc) {
1544 CalleeSaves[RegMIPS32::Reg_RA] = true;
1545 RegsUsed[RegMIPS32::Reg_RA] = true;
1546 }
1547
1548 // Make two passes over the used registers. The first pass records all the
1549 // used registers -- and their aliases. Then, we figure out which GPR
1550 // registers should be saved.
1551 SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1552 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1553 if (CalleeSaves[i] && RegsUsed[i]) {
1554 ToPreserve |= RegisterAliases[i];
1555 }
1556 }
1557
1558 uint32_t NumCallee = 0;
1559
1560 // RegClasses is a tuple of
1561 //
1562 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1563 //
1564 // We use this tuple to figure out which register we should save/restore
1565 // during
1566 // prolog/epilog.
1567 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1568 const RegClassType RegClass = RegClassType(
1569 RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1570 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1571 const uint32_t LastRegInClass = std::get<1>(RegClass);
1572 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1573 for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1574 if (!ToPreserve[Reg]) {
1575 continue;
1576 }
1577 ++NumCallee;
1578 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1579 PreservedRegsSizeBytes +=
1580 typeWidthInBytesOnStack(PhysicalRegister->getType());
1581 PreservedRegsInClass->push_back(PhysicalRegister);
1582 }
1583
1584 Ctx->statsUpdateRegistersSaved(NumCallee);
1585
1586 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1587 // after the preserved registers and before the spill areas.
1588 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1589 // locals area if they are separate.
1590 assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1591 (void)MIPS32_STACK_ALIGNMENT_BYTES;
1592 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1593 uint32_t SpillAreaPaddingBytes = 0;
1594 uint32_t LocalsSlotsPaddingBytes = 0;
1595 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1596 GlobalsSize, LocalsSlotsAlignmentBytes,
1597 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1598 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1599 uint32_t GlobalsAndSubsequentPaddingSize =
1600 GlobalsSize + LocalsSlotsPaddingBytes;
1601
1602 // Adds the out args space to the stack, and align SP if necessary.
1603 if (!NeedsStackAlignment) {
1604 SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1605 } else {
1606 SpillAreaSizeBytes = applyStackAlignment(
1607 SpillAreaSizeBytes +
1608 (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1609 }
1610
1611 // Combine fixed alloca with SpillAreaSize.
1612 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1613
1614 TotalStackSizeBytes =
1615 applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1616
1617 // Generate "addiu sp, sp, -TotalStackSizeBytes"
1618 if (TotalStackSizeBytes) {
1619 // Use the scratch register if needed to legalize the immediate.
1620 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1621 _addiu(SP, SP, -TotalStackSizeBytes);
1622 }
1623
1624 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1625
1626 if (!PreservedGPRs.empty()) {
1627 uint32_t StackOffset = TotalStackSizeBytes;
1628 for (Variable *Var : *PreservedRegsInClass) {
1629 Type RegType;
1630 if (RegMIPS32::isFPRReg(Var->getRegNum()))
1631 RegType = IceType_f32;
1632 else
1633 RegType = IceType_i32;
1634 auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1635 StackOffset -= typeWidthInBytesOnStack(RegType);
1636 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1637 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1638 Func, RegType, SP,
1639 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1640 _sw(PhysicalRegister, MemoryLocation);
1641 }
1642 }
1643
1644 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1645
1646 // Generate "mov FP, SP" if needed.
1647 if (UsesFramePointer) {
1648 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1649 _mov(FP, SP);
1650 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1651 Context.insert<InstFakeUse>(FP);
1652 }
1653
1654 // Fill in stack offsets for stack args, and copy args into registers for
1655 // those that were register-allocated. Args are pushed right to left, so
1656 // Arg[0] is closest to the stack/frame pointer.
1657 const VarList &Args = Func->getArgs();
1658 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1659 TargetMIPS32::CallingConv CC;
1660 uint32_t ArgNo = 0;
1661
1662 for (Variable *Arg : Args) {
1663 RegNumT DummyReg;
1664 const Type Ty = Arg->getType();
1665 bool PartialOnStack;
1666 // Skip arguments passed in registers.
1667 if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1668 // Load argument from stack:
1669 // 1. If this is first vector argument and return type is v4f32.
1670 // In this case $4 is used to pass stack address implicitly.
1671 // 3rd and 4th element of vector argument is passed through stack.
1672 // 2. If this is second vector argument.
1673 if (ArgNo != 0 && isVectorType(Ty)) {
1674 PartialOnStack = true;
1675 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1676 &InArgsSizeBytes);
1677 }
1678 } else {
1679 PartialOnStack = false;
1680 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1681 &InArgsSizeBytes);
1682 }
1683 ++ArgNo;
1684 }
1685
1686 // Fill in stack offsets for locals.
1687 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1688 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1689 this->HasComputedFrame = true;
1690
1691 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1692 OstreamLocker _(Func->getContext());
1693 Ostream &Str = Func->getContext()->getStrDump();
1694
1695 Str << "Stack layout:\n";
1696 uint32_t SPAdjustmentPaddingSize =
1697 SpillAreaSizeBytes - LocalsSpillAreaSize -
1698 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1699 MaxOutArgsSizeBytes;
1700 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1701 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1702 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1703 << " globals spill area = " << GlobalsSize << " bytes\n"
1704 << " globals-locals spill areas intermediate padding = "
1705 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1706 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1707 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1708
1709 Str << "Stack details:\n"
1710 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1711 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1712 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1713 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1714 << " bytes\n"
1715 << " is FP based = " << 1 << "\n";
1716 }
1717 return;
1718 }
1719
addEpilog(CfgNode * Node)1720 void TargetMIPS32::addEpilog(CfgNode *Node) {
1721 InstList &Insts = Node->getInsts();
1722 InstList::reverse_iterator RI, E;
1723 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1724 if (llvm::isa<InstMIPS32Ret>(*RI))
1725 break;
1726 }
1727 if (RI == E)
1728 return;
1729
1730 // Convert the reverse_iterator position into its corresponding (forward)
1731 // iterator position.
1732 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1733 --InsertPoint;
1734 Context.init(Node);
1735 Context.setInsertPoint(InsertPoint);
1736
1737 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1738 if (UsesFramePointer) {
1739 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1740 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1741 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1742 // from being dead-code eliminated.
1743 Context.insert<InstFakeUse>(SP);
1744 _mov(SP, FP);
1745 }
1746
1747 VarList::reverse_iterator RIter, END;
1748
1749 if (!PreservedGPRs.empty()) {
1750 uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1751 for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1752 RIter != END; ++RIter) {
1753 Type RegType;
1754 if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1755 RegType = IceType_f32;
1756 else
1757 RegType = IceType_i32;
1758 auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1759 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1760 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1761 Func, RegType, SP,
1762 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1763 _lw(PhysicalRegister, MemoryLocation);
1764 StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1765 }
1766 }
1767
1768 if (TotalStackSizeBytes) {
1769 _addiu(SP, SP, TotalStackSizeBytes);
1770 }
1771 }
1772
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1773 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1774 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1775 // Legalize will likely need a lui/ori combination, but if the top bits are
1776 // all 0 from negating the offset and subtracting, we could use that instead.
1777 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1778 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1779 if (ShouldSub) {
1780 Target->_addi(ScratchReg, Base, -Offset);
1781 } else {
1782 constexpr bool SignExt = true;
1783 if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1784 const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1785 const uint32_t LowerBits = Offset & 0xFFFF;
1786 Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1787 if (LowerBits)
1788 Target->_ori(ScratchReg, ScratchReg, LowerBits);
1789 Target->_addu(ScratchReg, ScratchReg, Base);
1790 } else {
1791 Target->_addiu(ScratchReg, Base, Offset);
1792 }
1793 }
1794
1795 return ScratchReg;
1796 }
1797
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1798 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1799 InstMIPS32MovFP64ToI64 *MovInstr) {
1800 Variable *Dest = MovInstr->getDest();
1801 Operand *Src = MovInstr->getSrc(0);
1802 const Type SrcTy = Src->getType();
1803
1804 if (Dest != nullptr && SrcTy == IceType_f64) {
1805 int32_t Offset = Dest->getStackOffset();
1806 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1807 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1808 Target->Func, IceType_f32, Base,
1809 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1810 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1811 auto *SrcV = llvm::cast<Variable>(Src);
1812 Variable *SrcR;
1813 if (MovInstr->getInt64Part() == Int64_Lo) {
1814 SrcR = Target->makeReg(
1815 IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1816 } else {
1817 SrcR = Target->makeReg(
1818 IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1819 }
1820 Target->_sw(SrcR, Addr);
1821 if (MovInstr->isDestRedefined()) {
1822 Target->_set_dest_redefined();
1823 }
1824 MovInstr->setDeleted();
1825 return;
1826 }
1827
1828 llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1829 }
1830
legalizeMov(InstMIPS32Mov * MovInstr)1831 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1832 Variable *Dest = MovInstr->getDest();
1833 assert(Dest != nullptr);
1834 const Type DestTy = Dest->getType();
1835 assert(DestTy != IceType_i64);
1836
1837 Operand *Src = MovInstr->getSrc(0);
1838 const Type SrcTy = Src->getType();
1839 (void)SrcTy;
1840 assert(SrcTy != IceType_i64);
1841
1842 bool Legalized = false;
1843 auto *SrcR = llvm::cast<Variable>(Src);
1844 if (Dest->hasReg() && SrcR->hasReg()) {
1845 // This might be a GP to/from FP move generated due to argument passing.
1846 // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1847 // different types.
1848 const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1849 const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1850 const RegNumT SRegNum = SrcR->getRegNum();
1851 const RegNumT DRegNum = Dest->getRegNum();
1852 if (IsDstGPR != IsSrcGPR) {
1853 if (IsDstGPR) {
1854 // Dest is GPR and SrcR is FPR. Use mfc1.
1855 int32_t TypeWidth = typeWidthInBytes(DestTy);
1856 if (MovInstr->getDestHi() != nullptr)
1857 TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1858 if (TypeWidth == 8) {
1859 // Split it into two mfc1 instructions
1860 Variable *SrcGPRHi = Target->makeReg(
1861 IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1862 Variable *SrcGPRLo = Target->makeReg(
1863 IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1864 Variable *DstFPRHi, *DstFPRLo;
1865 if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1866 DstFPRHi = Target->makeReg(IceType_i32,
1867 MovInstr->getDestHi()->getRegNum());
1868 DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1869 } else {
1870 DstFPRHi = Target->makeReg(
1871 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1872 DstFPRLo = Target->makeReg(
1873 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1874 }
1875 Target->_mov(DstFPRHi, SrcGPRHi);
1876 Target->_mov(DstFPRLo, SrcGPRLo);
1877 Legalized = true;
1878 } else {
1879 Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1880 Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1881 Target->_mov(DstFPR, SrcGPR);
1882 Legalized = true;
1883 }
1884 } else {
1885 // Dest is FPR and SrcR is GPR. Use mtc1.
1886 if (typeWidthInBytes(Dest->getType()) == 8) {
1887 Variable *SrcGPRHi, *SrcGPRLo;
1888 // SrcR could be $zero which is i32
1889 if (SRegNum == RegMIPS32::Reg_ZERO) {
1890 SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1891 SrcGPRLo = SrcGPRHi;
1892 } else {
1893 // Split it into two mtc1 instructions
1894 if (MovInstr->getSrcSize() == 2) {
1895 const auto FirstReg =
1896 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1897 const auto SecondReg =
1898 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1899 SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1900 SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1901 } else {
1902 SrcGPRLo = Target->makeReg(
1903 IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1904 SrcGPRHi = Target->makeReg(
1905 IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1906 }
1907 }
1908 Variable *DstFPRHi = Target->makeReg(
1909 IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1910 Variable *DstFPRLo = Target->makeReg(
1911 IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1912 Target->_mov(DstFPRHi, SrcGPRLo);
1913 Target->_mov(DstFPRLo, SrcGPRHi);
1914 Legalized = true;
1915 } else {
1916 Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1917 Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1918 Target->_mov(DstFPR, SrcGPR);
1919 Legalized = true;
1920 }
1921 }
1922 }
1923 if (Legalized) {
1924 if (MovInstr->isDestRedefined()) {
1925 Target->_set_dest_redefined();
1926 }
1927 MovInstr->setDeleted();
1928 return;
1929 }
1930 }
1931
1932 if (!Dest->hasReg()) {
1933 auto *SrcR = llvm::cast<Variable>(Src);
1934 assert(SrcR->hasReg());
1935 assert(!SrcR->isRematerializable());
1936 int32_t Offset = Dest->getStackOffset();
1937
1938 // This is a _mov(Mem(), Variable), i.e., a store.
1939 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1940
1941 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1942 Target->Func, DestTy, Base,
1943 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1944 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1945 Target->Func, DestTy, Base,
1946 llvm::cast<ConstantInteger32>(
1947 Target->Ctx->getConstantInt32(Offset + 4)));
1948 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1949
1950 // FP arguments are passed in GP reg if first argument is in GP. In this
1951 // case type of the SrcR is still FP thus we need to explicitly generate sw
1952 // instead of swc1.
1953 const RegNumT RegNum = SrcR->getRegNum();
1954 const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1955 if (SrcTy == IceType_f32 && IsSrcGPReg) {
1956 Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1957 Target->_sw(SrcGPR, Addr);
1958 } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
1959 Variable *SrcGPRHi =
1960 Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
1961 Variable *SrcGPRLo = Target->makeReg(
1962 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
1963 Target->_sw(SrcGPRHi, Addr);
1964 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
1965 Target->_sw(SrcGPRLo, AddrHi);
1966 } else if (DestTy == IceType_f64 && IsSrcGPReg) {
1967 const auto FirstReg =
1968 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1969 const auto SecondReg =
1970 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1971 Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1972 Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1973 Target->_sw(SrcGPRLo, Addr);
1974 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
1975 Target->_sw(SrcGPRHi, AddrHi);
1976 } else {
1977 Target->_sw(SrcR, Addr);
1978 }
1979
1980 Target->Context.insert<InstFakeDef>(Dest);
1981 Legalized = true;
1982 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1983 if (Var->isRematerializable()) {
1984 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1985
1986 // ExtraOffset is only needed for stack-pointer based frames as we have
1987 // to account for spill storage.
1988 const int32_t ExtraOffset =
1989 (Var->getRegNum() == Target->getFrameOrStackReg())
1990 ? Target->getFrameFixedAllocaOffset()
1991 : 0;
1992
1993 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1994 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1995 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1996 Target->_mov(Dest, T);
1997 Legalized = true;
1998 } else {
1999 if (!Var->hasReg()) {
2000 // This is a _mov(Variable, Mem()), i.e., a load.
2001 const int32_t Offset = Var->getStackOffset();
2002 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2003 const RegNumT RegNum = Dest->getRegNum();
2004 const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2005 // If we are moving i64 to a double using stack then the address may
2006 // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2007 // and store them individually with 4-byte alignment. Load the Hi-Lo
2008 // parts in TmpReg and move them to the dest using mtc1.
2009 if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2010 !IsDstGPReg) {
2011 auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2012 const RegNumT RegNum = Dest->getRegNum();
2013 Variable *DestLo = Target->makeReg(
2014 IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2015 Variable *DestHi = Target->makeReg(
2016 IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2017 OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2018 Target->Func, IceType_i32, Base,
2019 llvm::cast<ConstantInteger32>(
2020 Target->Ctx->getConstantInt32(Offset)));
2021 OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2022 Target->Func, IceType_i32, Base,
2023 llvm::cast<ConstantInteger32>(
2024 Target->Ctx->getConstantInt32(Offset + 4)));
2025 Target->_lw(Reg, AddrLo);
2026 Target->_mov(DestLo, Reg);
2027 Target->_lw(Reg, AddrHi);
2028 Target->_mov(DestHi, Reg);
2029 } else {
2030 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2031 Target->Func, DestTy, Base,
2032 llvm::cast<ConstantInteger32>(
2033 Target->Ctx->getConstantInt32(Offset)));
2034 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2035 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2036 Target->Func, DestTy, Base,
2037 llvm::cast<ConstantInteger32>(
2038 Target->Ctx->getConstantInt32(Offset + 4)));
2039 // FP arguments are passed in GP reg if first argument is in GP.
2040 // In this case type of the Dest is still FP thus we need to
2041 // explicitly generate lw instead of lwc1.
2042 if (DestTy == IceType_f32 && IsDstGPReg) {
2043 Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2044 Target->_lw(DstGPR, Addr);
2045 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2046 Variable *DstGPRHi = Target->makeReg(
2047 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2048 Variable *DstGPRLo = Target->makeReg(
2049 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2050 Target->_lw(DstGPRHi, Addr);
2051 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2052 Target->_lw(DstGPRLo, AddrHi);
2053 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2054 const auto FirstReg =
2055 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2056 const auto SecondReg =
2057 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2058 Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2059 Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2060 Target->_lw(DstGPRLo, Addr);
2061 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2062 Target->_lw(DstGPRHi, AddrHi);
2063 } else {
2064 Target->_lw(Dest, Addr);
2065 }
2066 }
2067 Legalized = true;
2068 }
2069 }
2070 }
2071
2072 if (Legalized) {
2073 if (MovInstr->isDestRedefined()) {
2074 Target->_set_dest_redefined();
2075 }
2076 MovInstr->setDeleted();
2077 }
2078 }
2079
2080 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2081 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2082 if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2083 return nullptr;
2084 }
2085 Variable *Base = Mem->getBase();
2086 auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2087 int32_t Offset = Ci32->getValue();
2088
2089 if (Base->isRematerializable()) {
2090 const int32_t ExtraOffset =
2091 (Base->getRegNum() == Target->getFrameOrStackReg())
2092 ? Target->getFrameFixedAllocaOffset()
2093 : 0;
2094 Offset += Base->getStackOffset() + ExtraOffset;
2095 Base = Target->getPhysicalRegister(Base->getRegNum());
2096 }
2097
2098 constexpr bool SignExt = true;
2099 if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2100 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2101 Offset = 0;
2102 }
2103
2104 return OperandMIPS32Mem::create(
2105 Target->Func, Mem->getType(), Base,
2106 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2107 }
2108
legalizeImmediate(int32_t Imm)2109 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2110 Variable *Reg = nullptr;
2111 if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2112 (Imm <= std::numeric_limits<int16_t>::max()))) {
2113 const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2114 const uint32_t LowerBits = Imm & 0xFFFF;
2115 Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2116 Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2117 if (LowerBits) {
2118 Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2119 Target->_ori(Reg, TReg, LowerBits);
2120 } else {
2121 Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2122 }
2123 }
2124 return Reg;
2125 }
2126
postLowerLegalization()2127 void TargetMIPS32::postLowerLegalization() {
2128 Func->dump("Before postLowerLegalization");
2129 assert(hasComputedFrame());
2130 for (CfgNode *Node : Func->getNodes()) {
2131 Context.init(Node);
2132 PostLoweringLegalizer Legalizer(this);
2133 while (!Context.atEnd()) {
2134 PostIncrLoweringContext PostIncrement(Context);
2135 Inst *CurInstr = iteratorToInst(Context.getCur());
2136 const SizeT NumSrcs = CurInstr->getSrcSize();
2137 Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2138 Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2139 auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2140 auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2141 auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2142 Variable *Dst = CurInstr->getDest();
2143 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2144 Legalizer.legalizeMov(MovInstr);
2145 continue;
2146 }
2147 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2148 Legalizer.legalizeMovFp(MovInstr);
2149 continue;
2150 }
2151 if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2152 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2153 _sw(Src0V, LegalMem);
2154 CurInstr->setDeleted();
2155 }
2156 continue;
2157 }
2158 if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2159 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2160 _swc1(Src0V, LegalMem);
2161 CurInstr->setDeleted();
2162 }
2163 continue;
2164 }
2165 if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2166 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2167 _sdc1(Src0V, LegalMem);
2168 CurInstr->setDeleted();
2169 }
2170 continue;
2171 }
2172 if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2173 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2174 _lw(Dst, LegalMem);
2175 CurInstr->setDeleted();
2176 }
2177 continue;
2178 }
2179 if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2180 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2181 _lwc1(Dst, LegalMem);
2182 CurInstr->setDeleted();
2183 }
2184 continue;
2185 }
2186 if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2187 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2188 _ldc1(Dst, LegalMem);
2189 CurInstr->setDeleted();
2190 }
2191 continue;
2192 }
2193 if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2194 if (auto *LegalImm = Legalizer.legalizeImmediate(
2195 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2196 _addu(Dst, Src0V, LegalImm);
2197 CurInstr->setDeleted();
2198 }
2199 continue;
2200 }
2201 }
2202 }
2203 }
2204
loOperand(Operand * Operand)2205 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2206 assert(Operand->getType() == IceType_i64);
2207 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2208 return Var64On32->getLo();
2209 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2210 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2211 }
2212 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2213 // Conservatively disallow memory operands with side-effects (pre/post
2214 // increment) in case of duplication.
2215 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2216 return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2217 Mem->getOffset(), Mem->getAddrMode());
2218 }
2219 llvm_unreachable("Unsupported operand type");
2220 return nullptr;
2221 }
2222
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2223 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2224 uint32_t Index) {
2225 if (!isVectorType(Operand->getType())) {
2226 llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2227 return nullptr;
2228 }
2229
2230 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2231 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2232 Variable *Base = Mem->getBase();
2233 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2234 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2235 int32_t NextOffsetVal =
2236 Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2237 constexpr bool NoSignExt = false;
2238 if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2239 Constant *_4 = Ctx->getConstantInt32(4);
2240 Variable *NewBase = Func->makeVariable(Base->getType());
2241 lowerArithmetic(
2242 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2243 Base = NewBase;
2244 } else {
2245 Offset =
2246 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2247 }
2248 return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2249 Mem->getAddrMode());
2250 }
2251
2252 if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2253 return VarVecOn32->getContainers()[Index];
2254
2255 llvm_unreachable("Unsupported operand type");
2256 return nullptr;
2257 }
2258
hiOperand(Operand * Operand)2259 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2260 assert(Operand->getType() == IceType_i64);
2261 if (Operand->getType() != IceType_i64)
2262 return Operand;
2263 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2264 return Var64On32->getHi();
2265 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2266 return Ctx->getConstantInt32(
2267 static_cast<uint32_t>(Const->getValue() >> 32));
2268 }
2269 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2270 // Conservatively disallow memory operands with side-effects
2271 // in case of duplication.
2272 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2273 const Type SplitType = IceType_i32;
2274 Variable *Base = Mem->getBase();
2275 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2276 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2277 int32_t NextOffsetVal = Offset->getValue() + 4;
2278 constexpr bool SignExt = false;
2279 if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2280 // We have to make a temp variable and add 4 to either Base or Offset.
2281 // If we add 4 to Offset, this will convert a non-RegReg addressing
2282 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2283 // RegReg addressing modes, prefer adding to base and replacing instead.
2284 // Thus we leave the old offset alone.
2285 Constant *Four = Ctx->getConstantInt32(4);
2286 Variable *NewBase = Func->makeVariable(Base->getType());
2287 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2288 Base, Four));
2289 Base = NewBase;
2290 } else {
2291 Offset =
2292 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2293 }
2294 return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2295 Mem->getAddrMode());
2296 }
2297 llvm_unreachable("Unsupported operand type");
2298 return nullptr;
2299 }
2300
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2301 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2302 RegSetMask Exclude) const {
2303 SmallBitVector Registers(RegMIPS32::Reg_NUM);
2304
2305 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
2306 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
2307 if (scratch && (Include & RegSet_CallerSave)) \
2308 Registers[RegMIPS32::val] = true; \
2309 if (preserved && (Include & RegSet_CalleeSave)) \
2310 Registers[RegMIPS32::val] = true; \
2311 if (stackptr && (Include & RegSet_StackPointer)) \
2312 Registers[RegMIPS32::val] = true; \
2313 if (frameptr && (Include & RegSet_FramePointer)) \
2314 Registers[RegMIPS32::val] = true; \
2315 if (scratch && (Exclude & RegSet_CallerSave)) \
2316 Registers[RegMIPS32::val] = false; \
2317 if (preserved && (Exclude & RegSet_CalleeSave)) \
2318 Registers[RegMIPS32::val] = false; \
2319 if (stackptr && (Exclude & RegSet_StackPointer)) \
2320 Registers[RegMIPS32::val] = false; \
2321 if (frameptr && (Exclude & RegSet_FramePointer)) \
2322 Registers[RegMIPS32::val] = false;
2323
2324 REGMIPS32_TABLE
2325
2326 #undef X
2327
2328 return Registers;
2329 }
2330
lowerAlloca(const InstAlloca * Instr)2331 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2332 // Conservatively require the stack to be aligned. Some stack adjustment
2333 // operations implemented below assume that the stack is aligned before the
2334 // alloca. All the alloca code ensures that the stack alignment is preserved
2335 // after the alloca. The stack alignment restriction can be relaxed in some
2336 // cases.
2337 NeedsStackAlignment = true;
2338
2339 // For default align=0, set it to the real value 1, to avoid any
2340 // bit-manipulation problems below.
2341 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2342
2343 // LLVM enforces power of 2 alignment.
2344 assert(llvm::isPowerOf2_32(AlignmentParam));
2345 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2346
2347 const uint32_t Alignment =
2348 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2349 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2350 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2351 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2352 const bool UseFramePointer =
2353 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2354
2355 if (UseFramePointer)
2356 setHasFramePointer();
2357
2358 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2359
2360 Variable *Dest = Instr->getDest();
2361 Operand *TotalSize = Instr->getSizeInBytes();
2362
2363 if (const auto *ConstantTotalSize =
2364 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2365 const uint32_t Value =
2366 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2367 FixedAllocaSizeBytes += Value;
2368 // Constant size alloca.
2369 if (!UseFramePointer) {
2370 // If we don't need a Frame Pointer, this alloca has a known offset to the
2371 // stack pointer. We don't need adjust the stack pointer, nor assign any
2372 // value to Dest, as Dest is rematerializable.
2373 assert(Dest->isRematerializable());
2374 Context.insert<InstFakeDef>(Dest);
2375 return;
2376 }
2377
2378 if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2379 CurrentAllocaOffset =
2380 Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2381 }
2382 auto *T = I32Reg();
2383 _addiu(T, SP, CurrentAllocaOffset);
2384 _mov(Dest, T);
2385 CurrentAllocaOffset += Value;
2386 return;
2387
2388 } else {
2389 // Non-constant sizes need to be adjusted to the next highest multiple of
2390 // the required alignment at runtime.
2391 VariableAllocaUsed = true;
2392 VariableAllocaAlignBytes = AlignmentParam;
2393 Variable *AlignAmount;
2394 auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2395 auto *T1 = I32Reg();
2396 auto *T2 = I32Reg();
2397 auto *T3 = I32Reg();
2398 auto *T4 = I32Reg();
2399 auto *T5 = I32Reg();
2400 _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2401 _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2402 _and(T3, T1, T2);
2403 _subu(T4, SP, T3);
2404 if (Instr->getAlignInBytes()) {
2405 AlignAmount =
2406 legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2407 _and(T5, T4, AlignAmount);
2408 _mov(Dest, T5);
2409 } else {
2410 _mov(Dest, T4);
2411 }
2412 _mov(SP, Dest);
2413 return;
2414 }
2415 }
2416
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2417 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2418 Variable *Dest, Operand *Src0,
2419 Operand *Src1) {
2420 InstArithmetic::OpKind Op = Instr->getOp();
2421 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2422 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2423 Variable *Src0LoR = nullptr;
2424 Variable *Src1LoR = nullptr;
2425 Variable *Src0HiR = nullptr;
2426 Variable *Src1HiR = nullptr;
2427
2428 switch (Op) {
2429 case InstArithmetic::_num:
2430 llvm::report_fatal_error("Unknown arithmetic operator");
2431 return;
2432 case InstArithmetic::Add: {
2433 Src0LoR = legalizeToReg(loOperand(Src0));
2434 Src1LoR = legalizeToReg(loOperand(Src1));
2435 Src0HiR = legalizeToReg(hiOperand(Src0));
2436 Src1HiR = legalizeToReg(hiOperand(Src1));
2437 auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2438 *T_Hi2 = I32Reg();
2439 _addu(T_Lo, Src0LoR, Src1LoR);
2440 _mov(DestLo, T_Lo);
2441 _sltu(T_Carry, T_Lo, Src0LoR);
2442 _addu(T_Hi, T_Carry, Src0HiR);
2443 _addu(T_Hi2, Src1HiR, T_Hi);
2444 _mov(DestHi, T_Hi2);
2445 return;
2446 }
2447 case InstArithmetic::And: {
2448 Src0LoR = legalizeToReg(loOperand(Src0));
2449 Src1LoR = legalizeToReg(loOperand(Src1));
2450 Src0HiR = legalizeToReg(hiOperand(Src0));
2451 Src1HiR = legalizeToReg(hiOperand(Src1));
2452 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2453 _and(T_Lo, Src0LoR, Src1LoR);
2454 _mov(DestLo, T_Lo);
2455 _and(T_Hi, Src0HiR, Src1HiR);
2456 _mov(DestHi, T_Hi);
2457 return;
2458 }
2459 case InstArithmetic::Sub: {
2460 Src0LoR = legalizeToReg(loOperand(Src0));
2461 Src1LoR = legalizeToReg(loOperand(Src1));
2462 Src0HiR = legalizeToReg(hiOperand(Src0));
2463 Src1HiR = legalizeToReg(hiOperand(Src1));
2464 auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2465 *T_Hi2 = I32Reg();
2466 _subu(T_Lo, Src0LoR, Src1LoR);
2467 _mov(DestLo, T_Lo);
2468 _sltu(T_Borrow, Src0LoR, Src1LoR);
2469 _addu(T_Hi, T_Borrow, Src1HiR);
2470 _subu(T_Hi2, Src0HiR, T_Hi);
2471 _mov(DestHi, T_Hi2);
2472 return;
2473 }
2474 case InstArithmetic::Or: {
2475 Src0LoR = legalizeToReg(loOperand(Src0));
2476 Src1LoR = legalizeToReg(loOperand(Src1));
2477 Src0HiR = legalizeToReg(hiOperand(Src0));
2478 Src1HiR = legalizeToReg(hiOperand(Src1));
2479 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2480 _or(T_Lo, Src0LoR, Src1LoR);
2481 _mov(DestLo, T_Lo);
2482 _or(T_Hi, Src0HiR, Src1HiR);
2483 _mov(DestHi, T_Hi);
2484 return;
2485 }
2486 case InstArithmetic::Xor: {
2487 Src0LoR = legalizeToReg(loOperand(Src0));
2488 Src1LoR = legalizeToReg(loOperand(Src1));
2489 Src0HiR = legalizeToReg(hiOperand(Src0));
2490 Src1HiR = legalizeToReg(hiOperand(Src1));
2491 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2492 _xor(T_Lo, Src0LoR, Src1LoR);
2493 _mov(DestLo, T_Lo);
2494 _xor(T_Hi, Src0HiR, Src1HiR);
2495 _mov(DestHi, T_Hi);
2496 return;
2497 }
2498 case InstArithmetic::Mul: {
2499 // TODO(rkotler): Make sure that mul has the side effect of clobbering
2500 // LO, HI. Check for any other LO, HI quirkiness in this section.
2501 Src0LoR = legalizeToReg(loOperand(Src0));
2502 Src1LoR = legalizeToReg(loOperand(Src1));
2503 Src0HiR = legalizeToReg(hiOperand(Src0));
2504 Src1HiR = legalizeToReg(hiOperand(Src1));
2505 auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2506 auto *T1 = I32Reg(), *T2 = I32Reg();
2507 auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2508 _multu(T_Lo, Src0LoR, Src1LoR);
2509 Context.insert<InstFakeDef>(T_Hi, T_Lo);
2510 _mflo(T1, T_Lo);
2511 _mfhi(T2, T_Hi);
2512 _mov(DestLo, T1);
2513 _mul(TM1, Src0HiR, Src1LoR);
2514 _mul(TM2, Src0LoR, Src1HiR);
2515 _addu(TM3, TM1, T2);
2516 _addu(TM4, TM3, TM2);
2517 _mov(DestHi, TM4);
2518 return;
2519 }
2520 case InstArithmetic::Shl: {
2521 auto *T_Lo = I32Reg();
2522 auto *T_Hi = I32Reg();
2523 auto *T1_Lo = I32Reg();
2524 auto *T1_Hi = I32Reg();
2525 auto *T1 = I32Reg();
2526 auto *T2 = I32Reg();
2527 auto *T3 = I32Reg();
2528 auto *T4 = I32Reg();
2529 auto *T5 = I32Reg();
2530
2531 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2532 Src0LoR = legalizeToReg(loOperand(Src0));
2533 int64_t ShiftAmount = Const->getValue();
2534 if (ShiftAmount == 1) {
2535 Src0HiR = legalizeToReg(hiOperand(Src0));
2536 _addu(T_Lo, Src0LoR, Src0LoR);
2537 _sltu(T1, T_Lo, Src0LoR);
2538 _addu(T2, T1, Src0HiR);
2539 _addu(T_Hi, Src0HiR, T2);
2540 } else if (ShiftAmount < INT32_BITS) {
2541 Src0HiR = legalizeToReg(hiOperand(Src0));
2542 _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2543 _sll(T2, Src0HiR, ShiftAmount);
2544 _or(T_Hi, T1, T2);
2545 _sll(T_Lo, Src0LoR, ShiftAmount);
2546 } else if (ShiftAmount == INT32_BITS) {
2547 _addiu(T_Lo, getZero(), 0);
2548 _mov(T_Hi, Src0LoR);
2549 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2550 _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2551 _addiu(T_Lo, getZero(), 0);
2552 }
2553 _mov(DestLo, T_Lo);
2554 _mov(DestHi, T_Hi);
2555 return;
2556 }
2557
2558 Src0LoR = legalizeToReg(loOperand(Src0));
2559 Src1LoR = legalizeToReg(loOperand(Src1));
2560 Src0HiR = legalizeToReg(hiOperand(Src0));
2561
2562 _sllv(T1, Src0HiR, Src1LoR);
2563 _not(T2, Src1LoR);
2564 _srl(T3, Src0LoR, 1);
2565 _srlv(T4, T3, T2);
2566 _or(T_Hi, T1, T4);
2567 _sllv(T_Lo, Src0LoR, Src1LoR);
2568
2569 _mov(T1_Hi, T_Hi);
2570 _mov(T1_Lo, T_Lo);
2571 _andi(T5, Src1LoR, INT32_BITS);
2572 _movn(T1_Hi, T_Lo, T5);
2573 _movn(T1_Lo, getZero(), T5);
2574 _mov(DestHi, T1_Hi);
2575 _mov(DestLo, T1_Lo);
2576 return;
2577 }
2578 case InstArithmetic::Lshr: {
2579
2580 auto *T_Lo = I32Reg();
2581 auto *T_Hi = I32Reg();
2582 auto *T1_Lo = I32Reg();
2583 auto *T1_Hi = I32Reg();
2584 auto *T1 = I32Reg();
2585 auto *T2 = I32Reg();
2586 auto *T3 = I32Reg();
2587 auto *T4 = I32Reg();
2588 auto *T5 = I32Reg();
2589
2590 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2591 Src0HiR = legalizeToReg(hiOperand(Src0));
2592 int64_t ShiftAmount = Const->getValue();
2593 if (ShiftAmount < INT32_BITS) {
2594 Src0LoR = legalizeToReg(loOperand(Src0));
2595 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2596 _srl(T2, Src0LoR, ShiftAmount);
2597 _or(T_Lo, T1, T2);
2598 _srl(T_Hi, Src0HiR, ShiftAmount);
2599 } else if (ShiftAmount == INT32_BITS) {
2600 _mov(T_Lo, Src0HiR);
2601 _addiu(T_Hi, getZero(), 0);
2602 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2603 _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2604 _addiu(T_Hi, getZero(), 0);
2605 }
2606 _mov(DestLo, T_Lo);
2607 _mov(DestHi, T_Hi);
2608 return;
2609 }
2610
2611 Src0LoR = legalizeToReg(loOperand(Src0));
2612 Src1LoR = legalizeToReg(loOperand(Src1));
2613 Src0HiR = legalizeToReg(hiOperand(Src0));
2614
2615 _srlv(T1, Src0LoR, Src1LoR);
2616 _not(T2, Src1LoR);
2617 _sll(T3, Src0HiR, 1);
2618 _sllv(T4, T3, T2);
2619 _or(T_Lo, T1, T4);
2620 _srlv(T_Hi, Src0HiR, Src1LoR);
2621
2622 _mov(T1_Hi, T_Hi);
2623 _mov(T1_Lo, T_Lo);
2624 _andi(T5, Src1LoR, INT32_BITS);
2625 _movn(T1_Lo, T_Hi, T5);
2626 _movn(T1_Hi, getZero(), T5);
2627 _mov(DestHi, T1_Hi);
2628 _mov(DestLo, T1_Lo);
2629 return;
2630 }
2631 case InstArithmetic::Ashr: {
2632
2633 auto *T_Lo = I32Reg();
2634 auto *T_Hi = I32Reg();
2635 auto *T1_Lo = I32Reg();
2636 auto *T1_Hi = I32Reg();
2637 auto *T1 = I32Reg();
2638 auto *T2 = I32Reg();
2639 auto *T3 = I32Reg();
2640 auto *T4 = I32Reg();
2641 auto *T5 = I32Reg();
2642 auto *T6 = I32Reg();
2643
2644 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2645 Src0HiR = legalizeToReg(hiOperand(Src0));
2646 int64_t ShiftAmount = Const->getValue();
2647 if (ShiftAmount < INT32_BITS) {
2648 Src0LoR = legalizeToReg(loOperand(Src0));
2649 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2650 _srl(T2, Src0LoR, ShiftAmount);
2651 _or(T_Lo, T1, T2);
2652 _sra(T_Hi, Src0HiR, ShiftAmount);
2653 } else if (ShiftAmount == INT32_BITS) {
2654 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2655 _mov(T_Lo, Src0HiR);
2656 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2657 _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2658 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2659 }
2660 _mov(DestLo, T_Lo);
2661 _mov(DestHi, T_Hi);
2662 return;
2663 }
2664
2665 Src0LoR = legalizeToReg(loOperand(Src0));
2666 Src1LoR = legalizeToReg(loOperand(Src1));
2667 Src0HiR = legalizeToReg(hiOperand(Src0));
2668
2669 _srlv(T1, Src0LoR, Src1LoR);
2670 _not(T2, Src1LoR);
2671 _sll(T3, Src0HiR, 1);
2672 _sllv(T4, T3, T2);
2673 _or(T_Lo, T1, T4);
2674 _srav(T_Hi, Src0HiR, Src1LoR);
2675
2676 _mov(T1_Hi, T_Hi);
2677 _mov(T1_Lo, T_Lo);
2678 _andi(T5, Src1LoR, INT32_BITS);
2679 _movn(T1_Lo, T_Hi, T5);
2680 _sra(T6, Src0HiR, INT32_BITS - 1);
2681 _movn(T1_Hi, T6, T5);
2682 _mov(DestHi, T1_Hi);
2683 _mov(DestLo, T1_Lo);
2684 return;
2685 }
2686 case InstArithmetic::Fadd:
2687 case InstArithmetic::Fsub:
2688 case InstArithmetic::Fmul:
2689 case InstArithmetic::Fdiv:
2690 case InstArithmetic::Frem:
2691 llvm::report_fatal_error("FP instruction with i64 type");
2692 return;
2693 case InstArithmetic::Udiv:
2694 case InstArithmetic::Sdiv:
2695 case InstArithmetic::Urem:
2696 case InstArithmetic::Srem:
2697 llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2698 return;
2699 }
2700 }
2701
lowerArithmetic(const InstArithmetic * Instr)2702 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2703 Variable *Dest = Instr->getDest();
2704
2705 if (Dest->isRematerializable()) {
2706 Context.insert<InstFakeDef>(Dest);
2707 return;
2708 }
2709
2710 // We need to signal all the UnimplementedLoweringError errors before any
2711 // legalization into new variables, otherwise Om1 register allocation may fail
2712 // when it sees variables that are defined but not used.
2713 Type DestTy = Dest->getType();
2714 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2715 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2716 if (DestTy == IceType_i64) {
2717 lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2718 return;
2719 }
2720 if (isVectorType(Dest->getType())) {
2721 llvm::report_fatal_error("Arithmetic: Destination type is vector");
2722 return;
2723 }
2724
2725 Variable *T = makeReg(Dest->getType());
2726 Variable *Src0R = legalizeToReg(Src0);
2727 Variable *Src1R = nullptr;
2728 uint32_t Value = 0;
2729 bool IsSrc1Imm16 = false;
2730
2731 switch (Instr->getOp()) {
2732 case InstArithmetic::Add:
2733 case InstArithmetic::Sub: {
2734 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2735 if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2736 IsSrc1Imm16 = true;
2737 Value = Const32->getValue();
2738 } else {
2739 Src1R = legalizeToReg(Src1);
2740 }
2741 break;
2742 }
2743 case InstArithmetic::And:
2744 case InstArithmetic::Or:
2745 case InstArithmetic::Xor:
2746 case InstArithmetic::Shl:
2747 case InstArithmetic::Lshr:
2748 case InstArithmetic::Ashr: {
2749 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2750 if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2751 IsSrc1Imm16 = true;
2752 Value = Const32->getValue();
2753 } else {
2754 Src1R = legalizeToReg(Src1);
2755 }
2756 break;
2757 }
2758 default:
2759 Src1R = legalizeToReg(Src1);
2760 break;
2761 }
2762 constexpr uint32_t DivideByZeroTrapCode = 7;
2763
2764 switch (Instr->getOp()) {
2765 case InstArithmetic::_num:
2766 break;
2767 case InstArithmetic::Add: {
2768 auto *T0R = Src0R;
2769 auto *T1R = Src1R;
2770 if (Dest->getType() != IceType_i32) {
2771 T0R = makeReg(IceType_i32);
2772 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2773 if (!IsSrc1Imm16) {
2774 T1R = makeReg(IceType_i32);
2775 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2776 }
2777 }
2778 if (IsSrc1Imm16) {
2779 _addiu(T, T0R, Value);
2780 } else {
2781 _addu(T, T0R, T1R);
2782 }
2783 _mov(Dest, T);
2784 return;
2785 }
2786 case InstArithmetic::And:
2787 if (IsSrc1Imm16) {
2788 _andi(T, Src0R, Value);
2789 } else {
2790 _and(T, Src0R, Src1R);
2791 }
2792 _mov(Dest, T);
2793 return;
2794 case InstArithmetic::Or:
2795 if (IsSrc1Imm16) {
2796 _ori(T, Src0R, Value);
2797 } else {
2798 _or(T, Src0R, Src1R);
2799 }
2800 _mov(Dest, T);
2801 return;
2802 case InstArithmetic::Xor:
2803 if (IsSrc1Imm16) {
2804 _xori(T, Src0R, Value);
2805 } else {
2806 _xor(T, Src0R, Src1R);
2807 }
2808 _mov(Dest, T);
2809 return;
2810 case InstArithmetic::Sub: {
2811 auto *T0R = Src0R;
2812 auto *T1R = Src1R;
2813 if (Dest->getType() != IceType_i32) {
2814 T0R = makeReg(IceType_i32);
2815 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2816 if (!IsSrc1Imm16) {
2817 T1R = makeReg(IceType_i32);
2818 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2819 }
2820 }
2821 if (IsSrc1Imm16) {
2822 _addiu(T, T0R, -Value);
2823 } else {
2824 _subu(T, T0R, T1R);
2825 }
2826 _mov(Dest, T);
2827 return;
2828 }
2829 case InstArithmetic::Mul: {
2830 _mul(T, Src0R, Src1R);
2831 _mov(Dest, T);
2832 return;
2833 }
2834 case InstArithmetic::Shl: {
2835 if (IsSrc1Imm16) {
2836 _sll(T, Src0R, Value);
2837 } else {
2838 _sllv(T, Src0R, Src1R);
2839 }
2840 _mov(Dest, T);
2841 return;
2842 }
2843 case InstArithmetic::Lshr: {
2844 auto *T0R = Src0R;
2845 auto *T1R = Src1R;
2846 if (Dest->getType() != IceType_i32) {
2847 T0R = makeReg(IceType_i32);
2848 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2849 if (!IsSrc1Imm16) {
2850 T1R = makeReg(IceType_i32);
2851 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2852 }
2853 }
2854 if (IsSrc1Imm16) {
2855 _srl(T, T0R, Value);
2856 } else {
2857 _srlv(T, T0R, T1R);
2858 }
2859 _mov(Dest, T);
2860 return;
2861 }
2862 case InstArithmetic::Ashr: {
2863 auto *T0R = Src0R;
2864 auto *T1R = Src1R;
2865 if (Dest->getType() != IceType_i32) {
2866 T0R = makeReg(IceType_i32);
2867 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2868 if (!IsSrc1Imm16) {
2869 T1R = makeReg(IceType_i32);
2870 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2871 }
2872 }
2873 if (IsSrc1Imm16) {
2874 _sra(T, T0R, Value);
2875 } else {
2876 _srav(T, T0R, T1R);
2877 }
2878 _mov(Dest, T);
2879 return;
2880 }
2881 case InstArithmetic::Udiv: {
2882 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2883 auto *T0R = Src0R;
2884 auto *T1R = Src1R;
2885 if (Dest->getType() != IceType_i32) {
2886 T0R = makeReg(IceType_i32);
2887 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2888 T1R = makeReg(IceType_i32);
2889 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2890 }
2891 _divu(T_Zero, T0R, T1R);
2892 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2893 _mflo(T, T_Zero);
2894 _mov(Dest, T);
2895 return;
2896 }
2897 case InstArithmetic::Sdiv: {
2898 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2899 auto *T0R = Src0R;
2900 auto *T1R = Src1R;
2901 if (Dest->getType() != IceType_i32) {
2902 T0R = makeReg(IceType_i32);
2903 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2904 T1R = makeReg(IceType_i32);
2905 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2906 }
2907 _div(T_Zero, T0R, T1R);
2908 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2909 _mflo(T, T_Zero);
2910 _mov(Dest, T);
2911 return;
2912 }
2913 case InstArithmetic::Urem: {
2914 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2915 auto *T0R = Src0R;
2916 auto *T1R = Src1R;
2917 if (Dest->getType() != IceType_i32) {
2918 T0R = makeReg(IceType_i32);
2919 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2920 T1R = makeReg(IceType_i32);
2921 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2922 }
2923 _divu(T_Zero, T0R, T1R);
2924 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2925 _mfhi(T, T_Zero);
2926 _mov(Dest, T);
2927 return;
2928 }
2929 case InstArithmetic::Srem: {
2930 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2931 auto *T0R = Src0R;
2932 auto *T1R = Src1R;
2933 if (Dest->getType() != IceType_i32) {
2934 T0R = makeReg(IceType_i32);
2935 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2936 T1R = makeReg(IceType_i32);
2937 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2938 }
2939 _div(T_Zero, T0R, T1R);
2940 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2941 _mfhi(T, T_Zero);
2942 _mov(Dest, T);
2943 return;
2944 }
2945 case InstArithmetic::Fadd: {
2946 if (DestTy == IceType_f32) {
2947 _add_s(T, Src0R, Src1R);
2948 _mov(Dest, T);
2949 return;
2950 }
2951 if (DestTy == IceType_f64) {
2952 _add_d(T, Src0R, Src1R);
2953 _mov(Dest, T);
2954 return;
2955 }
2956 break;
2957 }
2958 case InstArithmetic::Fsub:
2959 if (DestTy == IceType_f32) {
2960 _sub_s(T, Src0R, Src1R);
2961 _mov(Dest, T);
2962 return;
2963 }
2964 if (DestTy == IceType_f64) {
2965 _sub_d(T, Src0R, Src1R);
2966 _mov(Dest, T);
2967 return;
2968 }
2969 break;
2970 case InstArithmetic::Fmul:
2971 if (DestTy == IceType_f32) {
2972 _mul_s(T, Src0R, Src1R);
2973 _mov(Dest, T);
2974 return;
2975 }
2976 if (DestTy == IceType_f64) {
2977 _mul_d(T, Src0R, Src1R);
2978 _mov(Dest, T);
2979 return;
2980 }
2981 break;
2982 case InstArithmetic::Fdiv:
2983 if (DestTy == IceType_f32) {
2984 _div_s(T, Src0R, Src1R);
2985 _mov(Dest, T);
2986 return;
2987 }
2988 if (DestTy == IceType_f64) {
2989 _div_d(T, Src0R, Src1R);
2990 _mov(Dest, T);
2991 return;
2992 }
2993 break;
2994 case InstArithmetic::Frem:
2995 llvm::report_fatal_error("frem should have been prelowered.");
2996 break;
2997 }
2998 llvm::report_fatal_error("Unknown arithmetic operator");
2999 }
3000
lowerAssign(const InstAssign * Instr)3001 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3002 Variable *Dest = Instr->getDest();
3003
3004 if (Dest->isRematerializable()) {
3005 Context.insert<InstFakeDef>(Dest);
3006 return;
3007 }
3008
3009 // Source type may not be same as destination
3010 if (isVectorType(Dest->getType())) {
3011 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3012 auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3013 for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3014 auto *DCont = DstVec->getContainers()[i];
3015 auto *SCont =
3016 legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3017 auto *TReg = makeReg(IceType_i32);
3018 _mov(TReg, SCont);
3019 _mov(DCont, TReg);
3020 }
3021 return;
3022 }
3023 Operand *Src0 = Instr->getSrc(0);
3024 assert(Dest->getType() == Src0->getType());
3025 if (Dest->getType() == IceType_i64) {
3026 Src0 = legalizeUndef(Src0);
3027 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3028 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3029 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3030 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3031 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3032 _mov(T_Lo, Src0Lo);
3033 _mov(DestLo, T_Lo);
3034 _mov(T_Hi, Src0Hi);
3035 _mov(DestHi, T_Hi);
3036 return;
3037 }
3038 Operand *SrcR;
3039 if (Dest->hasReg()) {
3040 // If Dest already has a physical register, then legalize the Src operand
3041 // into a Variable with the same register assignment. This especially
3042 // helps allow the use of Flex operands.
3043 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3044 } else {
3045 // Dest could be a stack operand. Since we could potentially need
3046 // to do a Store (and store can only have Register operands),
3047 // legalize this to a register.
3048 SrcR = legalize(Src0, Legal_Reg);
3049 }
3050 _mov(Dest, SrcR);
3051 }
3052
lowerBr(const InstBr * Instr)3053 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3054 if (Instr->isUnconditional()) {
3055 _br(Instr->getTargetUnconditional());
3056 return;
3057 }
3058 CfgNode *TargetTrue = Instr->getTargetTrue();
3059 CfgNode *TargetFalse = Instr->getTargetFalse();
3060 Operand *Boolean = Instr->getCondition();
3061 const Inst *Producer = Computations.getProducerOf(Boolean);
3062 if (Producer == nullptr) {
3063 // Since we don't know the producer of this boolean we will assume its
3064 // producer will keep it in positive logic and just emit beqz with this
3065 // Boolean as an operand.
3066 auto *BooleanR = legalizeToReg(Boolean);
3067 _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3068 return;
3069 }
3070 if (Producer->getKind() == Inst::Icmp) {
3071 const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3072 Operand *Src0 = CompareInst->getSrc(0);
3073 Operand *Src1 = CompareInst->getSrc(1);
3074 const Type Src0Ty = Src0->getType();
3075 assert(Src0Ty == Src1->getType());
3076
3077 Variable *Src0R = nullptr;
3078 Variable *Src1R = nullptr;
3079 Variable *Src0HiR = nullptr;
3080 Variable *Src1HiR = nullptr;
3081 if (Src0Ty == IceType_i64) {
3082 Src0R = legalizeToReg(loOperand(Src0));
3083 Src1R = legalizeToReg(loOperand(Src1));
3084 Src0HiR = legalizeToReg(hiOperand(Src0));
3085 Src1HiR = legalizeToReg(hiOperand(Src1));
3086 } else {
3087 auto *Src0RT = legalizeToReg(Src0);
3088 auto *Src1RT = legalizeToReg(Src1);
3089 // Sign/Zero extend the source operands
3090 if (Src0Ty != IceType_i32) {
3091 InstCast::OpKind CastKind;
3092 switch (CompareInst->getCondition()) {
3093 case InstIcmp::Eq:
3094 case InstIcmp::Ne:
3095 case InstIcmp::Sgt:
3096 case InstIcmp::Sge:
3097 case InstIcmp::Slt:
3098 case InstIcmp::Sle:
3099 CastKind = InstCast::Sext;
3100 break;
3101 default:
3102 CastKind = InstCast::Zext;
3103 break;
3104 }
3105 Src0R = makeReg(IceType_i32);
3106 Src1R = makeReg(IceType_i32);
3107 lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3108 lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3109 } else {
3110 Src0R = Src0RT;
3111 Src1R = Src1RT;
3112 }
3113 }
3114 auto *DestT = makeReg(IceType_i32);
3115
3116 switch (CompareInst->getCondition()) {
3117 default:
3118 llvm_unreachable("unexpected condition");
3119 return;
3120 case InstIcmp::Eq: {
3121 if (Src0Ty == IceType_i64) {
3122 auto *T1 = I32Reg();
3123 auto *T2 = I32Reg();
3124 auto *T3 = I32Reg();
3125 _xor(T1, Src0HiR, Src1HiR);
3126 _xor(T2, Src0R, Src1R);
3127 _or(T3, T1, T2);
3128 _mov(DestT, T3);
3129 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3130 } else {
3131 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3132 }
3133 return;
3134 }
3135 case InstIcmp::Ne: {
3136 if (Src0Ty == IceType_i64) {
3137 auto *T1 = I32Reg();
3138 auto *T2 = I32Reg();
3139 auto *T3 = I32Reg();
3140 _xor(T1, Src0HiR, Src1HiR);
3141 _xor(T2, Src0R, Src1R);
3142 _or(T3, T1, T2);
3143 _mov(DestT, T3);
3144 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3145 } else {
3146 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3147 }
3148 return;
3149 }
3150 case InstIcmp::Ugt: {
3151 if (Src0Ty == IceType_i64) {
3152 auto *T1 = I32Reg();
3153 auto *T2 = I32Reg();
3154 auto *T3 = I32Reg();
3155 auto *T4 = I32Reg();
3156 auto *T5 = I32Reg();
3157 _xor(T1, Src0HiR, Src1HiR);
3158 _sltu(T2, Src1HiR, Src0HiR);
3159 _xori(T3, T2, 1);
3160 _sltu(T4, Src1R, Src0R);
3161 _xori(T5, T4, 1);
3162 _movz(T3, T5, T1);
3163 _mov(DestT, T3);
3164 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3165 } else {
3166 _sltu(DestT, Src1R, Src0R);
3167 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3168 }
3169 return;
3170 }
3171 case InstIcmp::Uge: {
3172 if (Src0Ty == IceType_i64) {
3173 auto *T1 = I32Reg();
3174 auto *T2 = I32Reg();
3175 auto *T3 = I32Reg();
3176 _xor(T1, Src0HiR, Src1HiR);
3177 _sltu(T2, Src0HiR, Src1HiR);
3178 _sltu(T3, Src0R, Src1R);
3179 _movz(T2, T3, T1);
3180 _mov(DestT, T2);
3181 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3182 } else {
3183 _sltu(DestT, Src0R, Src1R);
3184 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3185 }
3186 return;
3187 }
3188 case InstIcmp::Ult: {
3189 if (Src0Ty == IceType_i64) {
3190 auto *T1 = I32Reg();
3191 auto *T2 = I32Reg();
3192 auto *T3 = I32Reg();
3193 auto *T4 = I32Reg();
3194 auto *T5 = I32Reg();
3195 _xor(T1, Src0HiR, Src1HiR);
3196 _sltu(T2, Src0HiR, Src1HiR);
3197 _xori(T3, T2, 1);
3198 _sltu(T4, Src0R, Src1R);
3199 _xori(T5, T4, 1);
3200 _movz(T3, T5, T1);
3201 _mov(DestT, T3);
3202 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3203 } else {
3204 _sltu(DestT, Src0R, Src1R);
3205 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3206 }
3207 return;
3208 }
3209 case InstIcmp::Ule: {
3210 if (Src0Ty == IceType_i64) {
3211 auto *T1 = I32Reg();
3212 auto *T2 = I32Reg();
3213 auto *T3 = I32Reg();
3214 _xor(T1, Src0HiR, Src1HiR);
3215 _sltu(T2, Src1HiR, Src0HiR);
3216 _sltu(T3, Src1R, Src0R);
3217 _movz(T2, T3, T1);
3218 _mov(DestT, T2);
3219 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3220 } else {
3221 _sltu(DestT, Src1R, Src0R);
3222 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3223 }
3224 return;
3225 }
3226 case InstIcmp::Sgt: {
3227 if (Src0Ty == IceType_i64) {
3228 auto *T1 = I32Reg();
3229 auto *T2 = I32Reg();
3230 auto *T3 = I32Reg();
3231 auto *T4 = I32Reg();
3232 auto *T5 = I32Reg();
3233 _xor(T1, Src0HiR, Src1HiR);
3234 _slt(T2, Src1HiR, Src0HiR);
3235 _xori(T3, T2, 1);
3236 _sltu(T4, Src1R, Src0R);
3237 _xori(T5, T4, 1);
3238 _movz(T3, T5, T1);
3239 _mov(DestT, T3);
3240 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3241 } else {
3242 _slt(DestT, Src1R, Src0R);
3243 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3244 }
3245 return;
3246 }
3247 case InstIcmp::Sge: {
3248 if (Src0Ty == IceType_i64) {
3249 auto *T1 = I32Reg();
3250 auto *T2 = I32Reg();
3251 auto *T3 = I32Reg();
3252 _xor(T1, Src0HiR, Src1HiR);
3253 _slt(T2, Src0HiR, Src1HiR);
3254 _sltu(T3, Src0R, Src1R);
3255 _movz(T2, T3, T1);
3256 _mov(DestT, T2);
3257 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3258 } else {
3259 _slt(DestT, Src0R, Src1R);
3260 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3261 }
3262 return;
3263 }
3264 case InstIcmp::Slt: {
3265 if (Src0Ty == IceType_i64) {
3266 auto *T1 = I32Reg();
3267 auto *T2 = I32Reg();
3268 auto *T3 = I32Reg();
3269 auto *T4 = I32Reg();
3270 auto *T5 = I32Reg();
3271 _xor(T1, Src0HiR, Src1HiR);
3272 _slt(T2, Src0HiR, Src1HiR);
3273 _xori(T3, T2, 1);
3274 _sltu(T4, Src0R, Src1R);
3275 _xori(T5, T4, 1);
3276 _movz(T3, T5, T1);
3277 _mov(DestT, T3);
3278 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3279 } else {
3280 _slt(DestT, Src0R, Src1R);
3281 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3282 }
3283 return;
3284 }
3285 case InstIcmp::Sle: {
3286 if (Src0Ty == IceType_i64) {
3287 auto *T1 = I32Reg();
3288 auto *T2 = I32Reg();
3289 auto *T3 = I32Reg();
3290 _xor(T1, Src0HiR, Src1HiR);
3291 _slt(T2, Src1HiR, Src0HiR);
3292 _sltu(T3, Src1R, Src0R);
3293 _movz(T2, T3, T1);
3294 _mov(DestT, T2);
3295 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3296 } else {
3297 _slt(DestT, Src1R, Src0R);
3298 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3299 }
3300 return;
3301 }
3302 }
3303 }
3304 }
3305
lowerCall(const InstCall * Instr)3306 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3307 CfgVector<Variable *> RegArgs;
3308 NeedsStackAlignment = true;
3309
3310 // Assign arguments to registers and stack. Also reserve stack.
3311 TargetMIPS32::CallingConv CC;
3312
3313 // Pair of Arg Operand -> GPR number assignments.
3314 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3315 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3316 // Pair of Arg Operand -> stack offset.
3317 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3318 size_t ParameterAreaSizeBytes = 16;
3319
3320 // Classify each argument operand according to the location where the
3321 // argument is passed.
3322
3323 // v4f32 is returned through stack. $4 is setup by the caller and passed as
3324 // first argument implicitly. Callee then copies the return vector at $4.
3325 SizeT ArgNum = 0;
3326 Variable *Dest = Instr->getDest();
3327 Variable *RetVecFloat = nullptr;
3328 if (Dest && isVectorFloatingType(Dest->getType())) {
3329 ArgNum = 1;
3330 CC.discardReg(RegMIPS32::Reg_A0);
3331 RetVecFloat = Func->makeVariable(IceType_i32);
3332 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3333 constexpr SizeT Alignment = 4;
3334 lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3335 RegArgs.emplace_back(
3336 legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3337 }
3338
3339 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3340 Operand *Arg = legalizeUndef(Instr->getArg(i));
3341 const Type Ty = Arg->getType();
3342 bool InReg = false;
3343 RegNumT Reg;
3344
3345 InReg = CC.argInReg(Ty, i, &Reg);
3346
3347 if (!InReg) {
3348 if (isVectorType(Ty)) {
3349 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3350 ParameterAreaSizeBytes =
3351 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3352 for (Variable *Elem : ArgVec->getContainers()) {
3353 StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3354 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3355 }
3356 } else {
3357 ParameterAreaSizeBytes =
3358 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3359 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3360 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3361 }
3362 ++ArgNum;
3363 continue;
3364 }
3365
3366 if (isVectorType(Ty)) {
3367 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3368 Operand *Elem0 = ArgVec->getContainers()[0];
3369 Operand *Elem1 = ArgVec->getContainers()[1];
3370 GPRArgs.push_back(
3371 std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3372 GPRArgs.push_back(
3373 std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3374 Operand *Elem2 = ArgVec->getContainers()[2];
3375 Operand *Elem3 = ArgVec->getContainers()[3];
3376 // First argument is passed in $4:$5:$6:$7
3377 // Second and rest arguments are passed in $6:$7:stack:stack
3378 if (ArgNum == 0) {
3379 GPRArgs.push_back(
3380 std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3381 GPRArgs.push_back(
3382 std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3383 } else {
3384 ParameterAreaSizeBytes =
3385 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3386 StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3387 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3388 StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3389 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3390 }
3391 } else if (Ty == IceType_i64) {
3392 Operand *Lo = loOperand(Arg);
3393 Operand *Hi = hiOperand(Arg);
3394 GPRArgs.push_back(
3395 std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3396 GPRArgs.push_back(
3397 std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3398 } else if (isScalarIntegerType(Ty)) {
3399 GPRArgs.push_back(std::make_pair(Arg, Reg));
3400 } else {
3401 FPArgs.push_back(std::make_pair(Arg, Reg));
3402 }
3403 ++ArgNum;
3404 }
3405
3406 // Adjust the parameter area so that the stack is aligned. It is assumed that
3407 // the stack is already aligned at the start of the calling sequence.
3408 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3409
3410 // Copy arguments that are passed on the stack to the appropriate stack
3411 // locations.
3412 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3413 for (auto &StackArg : StackArgs) {
3414 ConstantInteger32 *Loc =
3415 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3416 Type Ty = StackArg.first->getType();
3417 OperandMIPS32Mem *Addr;
3418 constexpr bool SignExt = false;
3419 if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3420 Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3421 } else {
3422 Variable *NewBase = Func->makeVariable(SP->getType());
3423 lowerArithmetic(
3424 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3425 Addr = formMemoryOperand(NewBase, Ty);
3426 }
3427 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3428 }
3429
3430 // Generate the call instruction. Assign its result to a temporary with high
3431 // register allocation weight.
3432
3433 // ReturnReg doubles as ReturnRegLo as necessary.
3434 Variable *ReturnReg = nullptr;
3435 Variable *ReturnRegHi = nullptr;
3436 if (Dest) {
3437 switch (Dest->getType()) {
3438 case IceType_NUM:
3439 llvm_unreachable("Invalid Call dest type");
3440 return;
3441 case IceType_void:
3442 break;
3443 case IceType_i1:
3444 case IceType_i8:
3445 case IceType_i16:
3446 case IceType_i32:
3447 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3448 break;
3449 case IceType_i64:
3450 ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3451 ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3452 break;
3453 case IceType_f32:
3454 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3455 break;
3456 case IceType_f64:
3457 ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3458 break;
3459 case IceType_v4i1:
3460 case IceType_v8i1:
3461 case IceType_v16i1:
3462 case IceType_v16i8:
3463 case IceType_v8i16:
3464 case IceType_v4i32: {
3465 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3466 auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3467 RetVec->initVecElement(Func);
3468 for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3469 auto *Var = RetVec->getContainers()[i];
3470 Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3471 }
3472 break;
3473 }
3474 case IceType_v4f32:
3475 ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3476 break;
3477 }
3478 }
3479 Operand *CallTarget = Instr->getCallTarget();
3480 // Allow ConstantRelocatable to be left alone as a direct call,
3481 // but force other constants like ConstantInteger32 to be in
3482 // a register and make it an indirect call.
3483 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3484 CallTarget = legalize(CallTarget, Legal_Reg);
3485 }
3486
3487 // Copy arguments to be passed in registers to the appropriate registers.
3488 for (auto &FPArg : FPArgs) {
3489 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3490 }
3491 for (auto &GPRArg : GPRArgs) {
3492 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3493 }
3494
3495 // Generate a FakeUse of register arguments so that they do not get dead code
3496 // eliminated as a result of the FakeKill of scratch registers after the call.
3497 // These fake-uses need to be placed here to avoid argument registers from
3498 // being used during the legalizeToReg() calls above.
3499 for (auto *RegArg : RegArgs) {
3500 Context.insert<InstFakeUse>(RegArg);
3501 }
3502
3503 // If variable alloca is used the extra 16 bytes for argument build area
3504 // will be allocated on stack before a call.
3505 if (VariableAllocaUsed)
3506 _addiu(SP, SP, -MaxOutArgsSizeBytes);
3507
3508 Inst *NewCall;
3509
3510 // We don't need to define the return register if it is a vector.
3511 // We have inserted fake defs of it just after the call.
3512 if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3513 Variable *RetReg = nullptr;
3514 NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3515 Context.insert(NewCall);
3516 } else {
3517 NewCall = Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
3518 }
3519
3520 if (VariableAllocaUsed)
3521 _addiu(SP, SP, MaxOutArgsSizeBytes);
3522
3523 // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3524 // instruction.
3525 Context.insert<InstFakeUse>(SP);
3526
3527 if (ReturnRegHi)
3528 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3529
3530 if (ReturnReg) {
3531 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3532 for (Variable *Var : RetVec->getContainers()) {
3533 Context.insert(InstFakeDef::create(Func, Var));
3534 }
3535 }
3536 }
3537
3538 // Insert a register-kill pseudo instruction.
3539 Context.insert(InstFakeKill::create(Func, NewCall));
3540
3541 // Generate a FakeUse to keep the call live if necessary.
3542 if (Instr->hasSideEffects() && ReturnReg) {
3543 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3544 for (Variable *Var : RetVec->getContainers()) {
3545 Context.insert<InstFakeUse>(Var);
3546 }
3547 } else {
3548 Context.insert<InstFakeUse>(ReturnReg);
3549 }
3550 }
3551
3552 if (Dest == nullptr)
3553 return;
3554
3555 // Assign the result of the call to Dest.
3556 if (ReturnReg) {
3557 if (RetVecFloat) {
3558 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3559 auto *TBase = legalizeToReg(RetVecFloat);
3560 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3561 auto *Var = DestVecOn32->getContainers()[i];
3562 auto *TVar = makeReg(IceType_i32);
3563 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3564 Func, IceType_i32, TBase,
3565 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3566 _lw(TVar, Mem);
3567 _mov(Var, TVar);
3568 }
3569 } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3570 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3571 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3572 _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3573 }
3574 } else if (ReturnRegHi) {
3575 assert(Dest->getType() == IceType_i64);
3576 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3577 Variable *DestLo = Dest64On32->getLo();
3578 Variable *DestHi = Dest64On32->getHi();
3579 _mov(DestLo, ReturnReg);
3580 _mov(DestHi, ReturnRegHi);
3581 } else {
3582 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3583 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3584 isScalarFloatingType(Dest->getType()) ||
3585 isVectorType(Dest->getType()));
3586 _mov(Dest, ReturnReg);
3587 }
3588 }
3589 }
3590
lowerCast(const InstCast * Instr)3591 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3592 InstCast::OpKind CastKind = Instr->getCastKind();
3593 Variable *Dest = Instr->getDest();
3594 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3595 const Type DestTy = Dest->getType();
3596 const Type Src0Ty = Src0->getType();
3597 const uint32_t ShiftAmount =
3598 (Src0Ty == IceType_i1
3599 ? INT32_BITS - 1
3600 : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3601 const uint32_t Mask =
3602 (Src0Ty == IceType_i1
3603 ? 1
3604 : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3605
3606 if (isVectorType(DestTy)) {
3607 llvm::report_fatal_error("Cast: Destination type is vector");
3608 return;
3609 }
3610 switch (CastKind) {
3611 default:
3612 Func->setError("Cast type not supported");
3613 return;
3614 case InstCast::Sext: {
3615 if (DestTy == IceType_i64) {
3616 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3617 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3618 Variable *Src0R = legalizeToReg(Src0);
3619 Variable *T1_Lo = I32Reg();
3620 Variable *T2_Lo = I32Reg();
3621 Variable *T_Hi = I32Reg();
3622 if (Src0Ty == IceType_i1) {
3623 _sll(T1_Lo, Src0R, INT32_BITS - 1);
3624 _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3625 _mov(DestHi, T2_Lo);
3626 _mov(DestLo, T2_Lo);
3627 } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3628 _sll(T1_Lo, Src0R, ShiftAmount);
3629 _sra(T2_Lo, T1_Lo, ShiftAmount);
3630 _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3631 _mov(DestHi, T_Hi);
3632 _mov(DestLo, T2_Lo);
3633 } else if (Src0Ty == IceType_i32) {
3634 _mov(T1_Lo, Src0R);
3635 _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3636 _mov(DestHi, T_Hi);
3637 _mov(DestLo, T1_Lo);
3638 }
3639 } else {
3640 Variable *Src0R = legalizeToReg(Src0);
3641 Variable *T1 = makeReg(DestTy);
3642 Variable *T2 = makeReg(DestTy);
3643 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3644 Src0Ty == IceType_i16) {
3645 _sll(T1, Src0R, ShiftAmount);
3646 _sra(T2, T1, ShiftAmount);
3647 _mov(Dest, T2);
3648 }
3649 }
3650 break;
3651 }
3652 case InstCast::Zext: {
3653 if (DestTy == IceType_i64) {
3654 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3655 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3656 Variable *Src0R = legalizeToReg(Src0);
3657 Variable *T_Lo = I32Reg();
3658 Variable *T_Hi = I32Reg();
3659
3660 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3661 _andi(T_Lo, Src0R, Mask);
3662 else if (Src0Ty == IceType_i32)
3663 _mov(T_Lo, Src0R);
3664 else
3665 assert(Src0Ty != IceType_i64);
3666 _mov(DestLo, T_Lo);
3667
3668 auto *Zero = getZero();
3669 _addiu(T_Hi, Zero, 0);
3670 _mov(DestHi, T_Hi);
3671 } else {
3672 Variable *Src0R = legalizeToReg(Src0);
3673 Variable *T = makeReg(DestTy);
3674 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3675 Src0Ty == IceType_i16) {
3676 _andi(T, Src0R, Mask);
3677 _mov(Dest, T);
3678 }
3679 }
3680 break;
3681 }
3682 case InstCast::Trunc: {
3683 if (Src0Ty == IceType_i64)
3684 Src0 = loOperand(Src0);
3685 Variable *Src0R = legalizeToReg(Src0);
3686 Variable *T = makeReg(DestTy);
3687 switch (DestTy) {
3688 case IceType_i1:
3689 _andi(T, Src0R, 0x1);
3690 break;
3691 case IceType_i8:
3692 _andi(T, Src0R, 0xff);
3693 break;
3694 case IceType_i16:
3695 _andi(T, Src0R, 0xffff);
3696 break;
3697 default:
3698 _mov(T, Src0R);
3699 break;
3700 }
3701 _mov(Dest, T);
3702 break;
3703 }
3704 case InstCast::Fptrunc: {
3705 assert(Dest->getType() == IceType_f32);
3706 assert(Src0->getType() == IceType_f64);
3707 auto *DestR = legalizeToReg(Dest);
3708 auto *Src0R = legalizeToReg(Src0);
3709 _cvt_s_d(DestR, Src0R);
3710 _mov(Dest, DestR);
3711 break;
3712 }
3713 case InstCast::Fpext: {
3714 assert(Dest->getType() == IceType_f64);
3715 assert(Src0->getType() == IceType_f32);
3716 auto *DestR = legalizeToReg(Dest);
3717 auto *Src0R = legalizeToReg(Src0);
3718 _cvt_d_s(DestR, Src0R);
3719 _mov(Dest, DestR);
3720 break;
3721 }
3722 case InstCast::Fptosi:
3723 case InstCast::Fptoui: {
3724 if (llvm::isa<Variable64On32>(Dest)) {
3725 llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3726 return;
3727 }
3728 if (DestTy != IceType_i64) {
3729 if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3730 Variable *Src0R = legalizeToReg(Src0);
3731 Variable *FTmp = makeReg(IceType_f32);
3732 _trunc_w_s(FTmp, Src0R);
3733 _mov(Dest, FTmp);
3734 return;
3735 }
3736 if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3737 Variable *Src0R = legalizeToReg(Src0);
3738 Variable *FTmp = makeReg(IceType_f64);
3739 _trunc_w_d(FTmp, Src0R);
3740 _mov(Dest, FTmp);
3741 return;
3742 }
3743 }
3744 llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3745 break;
3746 }
3747 case InstCast::Sitofp:
3748 case InstCast::Uitofp: {
3749 if (llvm::isa<Variable64On32>(Dest)) {
3750 llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3751 return;
3752 }
3753 if (Src0Ty != IceType_i64) {
3754 Variable *Src0R = legalizeToReg(Src0);
3755 auto *T0R = Src0R;
3756 if (Src0Ty != IceType_i32) {
3757 T0R = makeReg(IceType_i32);
3758 if (CastKind == InstCast::Uitofp)
3759 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3760 else
3761 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3762 }
3763 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3764 Variable *FTmp1 = makeReg(IceType_f32);
3765 Variable *FTmp2 = makeReg(IceType_f32);
3766 _mtc1(FTmp1, T0R);
3767 _cvt_s_w(FTmp2, FTmp1);
3768 _mov(Dest, FTmp2);
3769 return;
3770 }
3771 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3772 Variable *FTmp1 = makeReg(IceType_f64);
3773 Variable *FTmp2 = makeReg(IceType_f64);
3774 _mtc1(FTmp1, T0R);
3775 _cvt_d_w(FTmp2, FTmp1);
3776 _mov(Dest, FTmp2);
3777 return;
3778 }
3779 }
3780 llvm::report_fatal_error("Source is i64 in i32-to-fp");
3781 break;
3782 }
3783 case InstCast::Bitcast: {
3784 Operand *Src0 = Instr->getSrc(0);
3785 if (DestTy == Src0->getType()) {
3786 auto *Assign = InstAssign::create(Func, Dest, Src0);
3787 lowerAssign(Assign);
3788 return;
3789 }
3790 if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3791 llvm::report_fatal_error(
3792 "Bitcast: vector type should have been prelowered.");
3793 return;
3794 }
3795 switch (DestTy) {
3796 case IceType_NUM:
3797 case IceType_void:
3798 llvm::report_fatal_error("Unexpected bitcast.");
3799 case IceType_i1:
3800 UnimplementedLoweringError(this, Instr);
3801 break;
3802 case IceType_i8:
3803 assert(Src0->getType() == IceType_v8i1);
3804 llvm::report_fatal_error(
3805 "i8 to v8i1 conversion should have been prelowered.");
3806 break;
3807 case IceType_i16:
3808 assert(Src0->getType() == IceType_v16i1);
3809 llvm::report_fatal_error(
3810 "i16 to v16i1 conversion should have been prelowered.");
3811 break;
3812 case IceType_i32:
3813 case IceType_f32: {
3814 Variable *Src0R = legalizeToReg(Src0);
3815 _mov(Dest, Src0R);
3816 break;
3817 }
3818 case IceType_i64: {
3819 assert(Src0->getType() == IceType_f64);
3820 Variable *Src0R = legalizeToReg(Src0);
3821 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3822 T->initHiLo(Func);
3823 T->getHi()->setMustNotHaveReg();
3824 T->getLo()->setMustNotHaveReg();
3825 Context.insert<InstFakeDef>(T->getHi());
3826 Context.insert<InstFakeDef>(T->getLo());
3827 _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3828 _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3829 lowerAssign(InstAssign::create(Func, Dest, T));
3830 break;
3831 }
3832 case IceType_f64: {
3833 assert(Src0->getType() == IceType_i64);
3834 const uint32_t Mask = 0xFFFFFFFF;
3835 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3836 Variable *RegHi, *RegLo;
3837 const uint64_t Value = C64->getValue();
3838 uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3839 uint64_t Lower32Bits = Value & Mask;
3840 RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3841 RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3842 _mov(Dest, RegHi, RegLo);
3843 } else {
3844 auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3845 auto *RegLo = legalizeToReg(loOperand(Var64On32));
3846 auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3847 _mov(Dest, RegHi, RegLo);
3848 }
3849 break;
3850 }
3851 default:
3852 llvm::report_fatal_error("Unexpected bitcast.");
3853 }
3854 break;
3855 }
3856 }
3857 }
3858
lowerExtractElement(const InstExtractElement * Instr)3859 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3860 Variable *Dest = Instr->getDest();
3861 const Type DestTy = Dest->getType();
3862 Operand *Src1 = Instr->getSrc(1);
3863 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3864 const uint32_t Index = Imm->getValue();
3865 Variable *TDest = makeReg(DestTy);
3866 Variable *TReg = makeReg(DestTy);
3867 auto *Src0 = legalizeUndef(Instr->getSrc(0));
3868 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3869 // Number of elements in each container
3870 uint32_t ElemPerCont =
3871 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3872 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3873 auto *SrcE = legalizeToReg(Src);
3874 // Position of the element in the container
3875 uint32_t PosInCont = Index % ElemPerCont;
3876 if (ElemPerCont == 1) {
3877 _mov(TDest, SrcE);
3878 } else if (ElemPerCont == 2) {
3879 switch (PosInCont) {
3880 case 0:
3881 _andi(TDest, SrcE, 0xffff);
3882 break;
3883 case 1:
3884 _srl(TDest, SrcE, 16);
3885 break;
3886 default:
3887 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3888 break;
3889 }
3890 } else if (ElemPerCont == 4) {
3891 switch (PosInCont) {
3892 case 0:
3893 _andi(TDest, SrcE, 0xff);
3894 break;
3895 case 1:
3896 _srl(TReg, SrcE, 8);
3897 _andi(TDest, TReg, 0xff);
3898 break;
3899 case 2:
3900 _srl(TReg, SrcE, 16);
3901 _andi(TDest, TReg, 0xff);
3902 break;
3903 case 3:
3904 _srl(TDest, SrcE, 24);
3905 break;
3906 default:
3907 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3908 break;
3909 }
3910 }
3911 if (typeElementType(Src0R->getType()) == IceType_i1) {
3912 Variable *TReg1 = makeReg(DestTy);
3913 _andi(TReg1, TDest, 0x1);
3914 _mov(Dest, TReg1);
3915 } else {
3916 _mov(Dest, TDest);
3917 }
3918 return;
3919 }
3920 llvm::report_fatal_error("ExtractElement requires a constant index");
3921 }
3922
lowerFcmp(const InstFcmp * Instr)3923 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3924 Variable *Dest = Instr->getDest();
3925 if (isVectorType(Dest->getType())) {
3926 llvm::report_fatal_error("Fcmp: Destination type is vector");
3927 return;
3928 }
3929
3930 auto *Src0 = Instr->getSrc(0);
3931 auto *Src1 = Instr->getSrc(1);
3932 auto *Zero = getZero();
3933
3934 InstFcmp::FCond Cond = Instr->getCondition();
3935 auto *DestR = makeReg(IceType_i32);
3936 auto *Src0R = legalizeToReg(Src0);
3937 auto *Src1R = legalizeToReg(Src1);
3938 const Type Src0Ty = Src0->getType();
3939
3940 Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3941
3942 switch (Cond) {
3943 default: {
3944 llvm::report_fatal_error("Unhandled fp comparison.");
3945 return;
3946 }
3947 case InstFcmp::False: {
3948 Context.insert<InstFakeUse>(Src0R);
3949 Context.insert<InstFakeUse>(Src1R);
3950 _addiu(DestR, Zero, 0);
3951 _mov(Dest, DestR);
3952 break;
3953 }
3954 case InstFcmp::Oeq: {
3955 if (Src0Ty == IceType_f32) {
3956 _c_eq_s(Src0R, Src1R);
3957 } else {
3958 _c_eq_d(Src0R, Src1R);
3959 }
3960 _addiu(DestR, Zero, 1);
3961 _movf(DestR, Zero, FCC0);
3962 _mov(Dest, DestR);
3963 break;
3964 }
3965 case InstFcmp::Ogt: {
3966 if (Src0Ty == IceType_f32) {
3967 _c_ule_s(Src0R, Src1R);
3968 } else {
3969 _c_ule_d(Src0R, Src1R);
3970 }
3971 _addiu(DestR, Zero, 1);
3972 _movt(DestR, Zero, FCC0);
3973 _mov(Dest, DestR);
3974 break;
3975 }
3976 case InstFcmp::Oge: {
3977 if (Src0Ty == IceType_f32) {
3978 _c_ult_s(Src0R, Src1R);
3979 } else {
3980 _c_ult_d(Src0R, Src1R);
3981 }
3982 _addiu(DestR, Zero, 1);
3983 _movt(DestR, Zero, FCC0);
3984 _mov(Dest, DestR);
3985 break;
3986 }
3987 case InstFcmp::Olt: {
3988 if (Src0Ty == IceType_f32) {
3989 _c_olt_s(Src0R, Src1R);
3990 } else {
3991 _c_olt_d(Src0R, Src1R);
3992 }
3993 _addiu(DestR, Zero, 1);
3994 _movf(DestR, Zero, FCC0);
3995 _mov(Dest, DestR);
3996 break;
3997 }
3998 case InstFcmp::Ole: {
3999 if (Src0Ty == IceType_f32) {
4000 _c_ole_s(Src0R, Src1R);
4001 } else {
4002 _c_ole_d(Src0R, Src1R);
4003 }
4004 _addiu(DestR, Zero, 1);
4005 _movf(DestR, Zero, FCC0);
4006 _mov(Dest, DestR);
4007 break;
4008 }
4009 case InstFcmp::One: {
4010 if (Src0Ty == IceType_f32) {
4011 _c_ueq_s(Src0R, Src1R);
4012 } else {
4013 _c_ueq_d(Src0R, Src1R);
4014 }
4015 _addiu(DestR, Zero, 1);
4016 _movt(DestR, Zero, FCC0);
4017 _mov(Dest, DestR);
4018 break;
4019 }
4020 case InstFcmp::Ord: {
4021 if (Src0Ty == IceType_f32) {
4022 _c_un_s(Src0R, Src1R);
4023 } else {
4024 _c_un_d(Src0R, Src1R);
4025 }
4026 _addiu(DestR, Zero, 1);
4027 _movt(DestR, Zero, FCC0);
4028 _mov(Dest, DestR);
4029 break;
4030 }
4031 case InstFcmp::Ueq: {
4032 if (Src0Ty == IceType_f32) {
4033 _c_ueq_s(Src0R, Src1R);
4034 } else {
4035 _c_ueq_d(Src0R, Src1R);
4036 }
4037 _addiu(DestR, Zero, 1);
4038 _movf(DestR, Zero, FCC0);
4039 _mov(Dest, DestR);
4040 break;
4041 }
4042 case InstFcmp::Ugt: {
4043 if (Src0Ty == IceType_f32) {
4044 _c_ole_s(Src0R, Src1R);
4045 } else {
4046 _c_ole_d(Src0R, Src1R);
4047 }
4048 _addiu(DestR, Zero, 1);
4049 _movt(DestR, Zero, FCC0);
4050 _mov(Dest, DestR);
4051 break;
4052 }
4053 case InstFcmp::Uge: {
4054 if (Src0Ty == IceType_f32) {
4055 _c_olt_s(Src0R, Src1R);
4056 } else {
4057 _c_olt_d(Src0R, Src1R);
4058 }
4059 _addiu(DestR, Zero, 1);
4060 _movt(DestR, Zero, FCC0);
4061 _mov(Dest, DestR);
4062 break;
4063 }
4064 case InstFcmp::Ult: {
4065 if (Src0Ty == IceType_f32) {
4066 _c_ult_s(Src0R, Src1R);
4067 } else {
4068 _c_ult_d(Src0R, Src1R);
4069 }
4070 _addiu(DestR, Zero, 1);
4071 _movf(DestR, Zero, FCC0);
4072 _mov(Dest, DestR);
4073 break;
4074 }
4075 case InstFcmp::Ule: {
4076 if (Src0Ty == IceType_f32) {
4077 _c_ule_s(Src0R, Src1R);
4078 } else {
4079 _c_ule_d(Src0R, Src1R);
4080 }
4081 _addiu(DestR, Zero, 1);
4082 _movf(DestR, Zero, FCC0);
4083 _mov(Dest, DestR);
4084 break;
4085 }
4086 case InstFcmp::Une: {
4087 if (Src0Ty == IceType_f32) {
4088 _c_eq_s(Src0R, Src1R);
4089 } else {
4090 _c_eq_d(Src0R, Src1R);
4091 }
4092 _addiu(DestR, Zero, 1);
4093 _movt(DestR, Zero, FCC0);
4094 _mov(Dest, DestR);
4095 break;
4096 }
4097 case InstFcmp::Uno: {
4098 if (Src0Ty == IceType_f32) {
4099 _c_un_s(Src0R, Src1R);
4100 } else {
4101 _c_un_d(Src0R, Src1R);
4102 }
4103 _addiu(DestR, Zero, 1);
4104 _movf(DestR, Zero, FCC0);
4105 _mov(Dest, DestR);
4106 break;
4107 }
4108 case InstFcmp::True: {
4109 Context.insert<InstFakeUse>(Src0R);
4110 Context.insert<InstFakeUse>(Src1R);
4111 _addiu(DestR, Zero, 1);
4112 _mov(Dest, DestR);
4113 break;
4114 }
4115 }
4116 }
4117
lower64Icmp(const InstIcmp * Instr)4118 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4119 Operand *Src0 = legalize(Instr->getSrc(0));
4120 Operand *Src1 = legalize(Instr->getSrc(1));
4121 Variable *Dest = Instr->getDest();
4122 InstIcmp::ICond Condition = Instr->getCondition();
4123
4124 Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4125 Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4126 Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4127 Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4128
4129 switch (Condition) {
4130 default:
4131 llvm_unreachable("unexpected condition");
4132 return;
4133 case InstIcmp::Eq: {
4134 auto *T1 = I32Reg();
4135 auto *T2 = I32Reg();
4136 auto *T3 = I32Reg();
4137 auto *T4 = I32Reg();
4138 _xor(T1, Src0HiR, Src1HiR);
4139 _xor(T2, Src0LoR, Src1LoR);
4140 _or(T3, T1, T2);
4141 _sltiu(T4, T3, 1);
4142 _mov(Dest, T4);
4143 return;
4144 }
4145 case InstIcmp::Ne: {
4146 auto *T1 = I32Reg();
4147 auto *T2 = I32Reg();
4148 auto *T3 = I32Reg();
4149 auto *T4 = I32Reg();
4150 _xor(T1, Src0HiR, Src1HiR);
4151 _xor(T2, Src0LoR, Src1LoR);
4152 _or(T3, T1, T2);
4153 _sltu(T4, getZero(), T3);
4154 _mov(Dest, T4);
4155 return;
4156 }
4157 case InstIcmp::Sgt: {
4158 auto *T1 = I32Reg();
4159 auto *T2 = I32Reg();
4160 auto *T3 = I32Reg();
4161 _xor(T1, Src0HiR, Src1HiR);
4162 _slt(T2, Src1HiR, Src0HiR);
4163 _sltu(T3, Src1LoR, Src0LoR);
4164 _movz(T2, T3, T1);
4165 _mov(Dest, T2);
4166 return;
4167 }
4168 case InstIcmp::Ugt: {
4169 auto *T1 = I32Reg();
4170 auto *T2 = I32Reg();
4171 auto *T3 = I32Reg();
4172 _xor(T1, Src0HiR, Src1HiR);
4173 _sltu(T2, Src1HiR, Src0HiR);
4174 _sltu(T3, Src1LoR, Src0LoR);
4175 _movz(T2, T3, T1);
4176 _mov(Dest, T2);
4177 return;
4178 }
4179 case InstIcmp::Sge: {
4180 auto *T1 = I32Reg();
4181 auto *T2 = I32Reg();
4182 auto *T3 = I32Reg();
4183 auto *T4 = I32Reg();
4184 auto *T5 = I32Reg();
4185 _xor(T1, Src0HiR, Src1HiR);
4186 _slt(T2, Src0HiR, Src1HiR);
4187 _xori(T3, T2, 1);
4188 _sltu(T4, Src0LoR, Src1LoR);
4189 _xori(T5, T4, 1);
4190 _movz(T3, T5, T1);
4191 _mov(Dest, T3);
4192 return;
4193 }
4194 case InstIcmp::Uge: {
4195 auto *T1 = I32Reg();
4196 auto *T2 = I32Reg();
4197 auto *T3 = I32Reg();
4198 auto *T4 = I32Reg();
4199 auto *T5 = I32Reg();
4200 _xor(T1, Src0HiR, Src1HiR);
4201 _sltu(T2, Src0HiR, Src1HiR);
4202 _xori(T3, T2, 1);
4203 _sltu(T4, Src0LoR, Src1LoR);
4204 _xori(T5, T4, 1);
4205 _movz(T3, T5, T1);
4206 _mov(Dest, T3);
4207 return;
4208 }
4209 case InstIcmp::Slt: {
4210 auto *T1 = I32Reg();
4211 auto *T2 = I32Reg();
4212 auto *T3 = I32Reg();
4213 _xor(T1, Src0HiR, Src1HiR);
4214 _slt(T2, Src0HiR, Src1HiR);
4215 _sltu(T3, Src0LoR, Src1LoR);
4216 _movz(T2, T3, T1);
4217 _mov(Dest, T2);
4218 return;
4219 }
4220 case InstIcmp::Ult: {
4221 auto *T1 = I32Reg();
4222 auto *T2 = I32Reg();
4223 auto *T3 = I32Reg();
4224 _xor(T1, Src0HiR, Src1HiR);
4225 _sltu(T2, Src0HiR, Src1HiR);
4226 _sltu(T3, Src0LoR, Src1LoR);
4227 _movz(T2, T3, T1);
4228 _mov(Dest, T2);
4229 return;
4230 }
4231 case InstIcmp::Sle: {
4232 auto *T1 = I32Reg();
4233 auto *T2 = I32Reg();
4234 auto *T3 = I32Reg();
4235 auto *T4 = I32Reg();
4236 auto *T5 = I32Reg();
4237 _xor(T1, Src0HiR, Src1HiR);
4238 _slt(T2, Src1HiR, Src0HiR);
4239 _xori(T3, T2, 1);
4240 _sltu(T4, Src1LoR, Src0LoR);
4241 _xori(T5, T4, 1);
4242 _movz(T3, T5, T1);
4243 _mov(Dest, T3);
4244 return;
4245 }
4246 case InstIcmp::Ule: {
4247 auto *T1 = I32Reg();
4248 auto *T2 = I32Reg();
4249 auto *T3 = I32Reg();
4250 auto *T4 = I32Reg();
4251 auto *T5 = I32Reg();
4252 _xor(T1, Src0HiR, Src1HiR);
4253 _sltu(T2, Src1HiR, Src0HiR);
4254 _xori(T3, T2, 1);
4255 _sltu(T4, Src1LoR, Src0LoR);
4256 _xori(T5, T4, 1);
4257 _movz(T3, T5, T1);
4258 _mov(Dest, T3);
4259 return;
4260 }
4261 }
4262 }
4263
lowerIcmp(const InstIcmp * Instr)4264 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4265 auto *Src0 = Instr->getSrc(0);
4266 auto *Src1 = Instr->getSrc(1);
4267 if (Src0->getType() == IceType_i64) {
4268 lower64Icmp(Instr);
4269 return;
4270 }
4271 Variable *Dest = Instr->getDest();
4272 if (isVectorType(Dest->getType())) {
4273 llvm::report_fatal_error("Icmp: Destination type is vector");
4274 return;
4275 }
4276 InstIcmp::ICond Cond = Instr->getCondition();
4277 auto *Src0R = legalizeToReg(Src0);
4278 auto *Src1R = legalizeToReg(Src1);
4279 const Type Src0Ty = Src0R->getType();
4280 const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4281 Variable *Src0RT = I32Reg();
4282 Variable *Src1RT = I32Reg();
4283
4284 if (Src0Ty != IceType_i32) {
4285 _sll(Src0RT, Src0R, ShAmt);
4286 _sll(Src1RT, Src1R, ShAmt);
4287 } else {
4288 _mov(Src0RT, Src0R);
4289 _mov(Src1RT, Src1R);
4290 }
4291
4292 switch (Cond) {
4293 case InstIcmp::Eq: {
4294 auto *DestT = I32Reg();
4295 auto *T = I32Reg();
4296 _xor(T, Src0RT, Src1RT);
4297 _sltiu(DestT, T, 1);
4298 _mov(Dest, DestT);
4299 return;
4300 }
4301 case InstIcmp::Ne: {
4302 auto *DestT = I32Reg();
4303 auto *T = I32Reg();
4304 auto *Zero = getZero();
4305 _xor(T, Src0RT, Src1RT);
4306 _sltu(DestT, Zero, T);
4307 _mov(Dest, DestT);
4308 return;
4309 }
4310 case InstIcmp::Ugt: {
4311 auto *DestT = I32Reg();
4312 _sltu(DestT, Src1RT, Src0RT);
4313 _mov(Dest, DestT);
4314 return;
4315 }
4316 case InstIcmp::Uge: {
4317 auto *DestT = I32Reg();
4318 auto *T = I32Reg();
4319 _sltu(T, Src0RT, Src1RT);
4320 _xori(DestT, T, 1);
4321 _mov(Dest, DestT);
4322 return;
4323 }
4324 case InstIcmp::Ult: {
4325 auto *DestT = I32Reg();
4326 _sltu(DestT, Src0RT, Src1RT);
4327 _mov(Dest, DestT);
4328 return;
4329 }
4330 case InstIcmp::Ule: {
4331 auto *DestT = I32Reg();
4332 auto *T = I32Reg();
4333 _sltu(T, Src1RT, Src0RT);
4334 _xori(DestT, T, 1);
4335 _mov(Dest, DestT);
4336 return;
4337 }
4338 case InstIcmp::Sgt: {
4339 auto *DestT = I32Reg();
4340 _slt(DestT, Src1RT, Src0RT);
4341 _mov(Dest, DestT);
4342 return;
4343 }
4344 case InstIcmp::Sge: {
4345 auto *DestT = I32Reg();
4346 auto *T = I32Reg();
4347 _slt(T, Src0RT, Src1RT);
4348 _xori(DestT, T, 1);
4349 _mov(Dest, DestT);
4350 return;
4351 }
4352 case InstIcmp::Slt: {
4353 auto *DestT = I32Reg();
4354 _slt(DestT, Src0RT, Src1RT);
4355 _mov(Dest, DestT);
4356 return;
4357 }
4358 case InstIcmp::Sle: {
4359 auto *DestT = I32Reg();
4360 auto *T = I32Reg();
4361 _slt(T, Src1RT, Src0RT);
4362 _xori(DestT, T, 1);
4363 _mov(Dest, DestT);
4364 return;
4365 }
4366 default:
4367 llvm_unreachable("Invalid ICmp operator");
4368 return;
4369 }
4370 }
4371
lowerInsertElement(const InstInsertElement * Instr)4372 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4373 Variable *Dest = Instr->getDest();
4374 const Type DestTy = Dest->getType();
4375 Operand *Src2 = Instr->getSrc(2);
4376 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4377 const uint32_t Index = Imm->getValue();
4378 // Vector to insert in
4379 auto *Src0 = legalizeUndef(Instr->getSrc(0));
4380 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4381 // Number of elements in each container
4382 uint32_t ElemPerCont =
4383 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4384 // Source Element
4385 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4386 auto *SrcE = Src;
4387 if (ElemPerCont > 1)
4388 SrcE = legalizeToReg(Src);
4389 // Dest is a vector
4390 auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4391 VDest->initVecElement(Func);
4392 // Temp vector variable
4393 auto *TDest = makeReg(DestTy);
4394 auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4395 TVDest->initVecElement(Func);
4396 // Destination element
4397 auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4398 // Element to insert
4399 auto *Src1R = legalizeToReg(Instr->getSrc(1));
4400 auto *TReg1 = makeReg(IceType_i32);
4401 auto *TReg2 = makeReg(IceType_i32);
4402 auto *TReg3 = makeReg(IceType_i32);
4403 auto *TReg4 = makeReg(IceType_i32);
4404 auto *TReg5 = makeReg(IceType_i32);
4405 auto *TDReg = makeReg(IceType_i32);
4406 // Position of the element in the container
4407 uint32_t PosInCont = Index % ElemPerCont;
4408 // Load source vector in a temporary vector
4409 for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4410 auto *DCont = TVDest->getContainers()[i];
4411 // Do not define DstE as we are going to redefine it
4412 if (DCont == DstE)
4413 continue;
4414 auto *SCont = Src0R->getContainers()[i];
4415 auto *TReg = makeReg(IceType_i32);
4416 _mov(TReg, SCont);
4417 _mov(DCont, TReg);
4418 }
4419 // Insert the element
4420 if (ElemPerCont == 1) {
4421 _mov(DstE, Src1R);
4422 } else if (ElemPerCont == 2) {
4423 switch (PosInCont) {
4424 case 0:
4425 _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4426 _srl(TReg2, SrcE, 16);
4427 _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4428 _or(TDReg, TReg1, TReg3);
4429 _mov(DstE, TDReg);
4430 break;
4431 case 1:
4432 _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source
4433 _sll(TReg2, SrcE, 16);
4434 _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4435 _or(TDReg, TReg1, TReg3);
4436 _mov(DstE, TDReg);
4437 break;
4438 default:
4439 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4440 break;
4441 }
4442 } else if (ElemPerCont == 4) {
4443 switch (PosInCont) {
4444 case 0:
4445 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4446 _srl(TReg2, SrcE, 8);
4447 _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4448 _or(TDReg, TReg1, TReg3);
4449 _mov(DstE, TDReg);
4450 break;
4451 case 1:
4452 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4453 _sll(TReg5, TReg1, 8); // Position in the destination
4454 _lui(TReg2, Ctx->getConstantInt32(0xffff));
4455 _ori(TReg3, TReg2, 0x00ff);
4456 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4457 _or(TDReg, TReg5, TReg4);
4458 _mov(DstE, TDReg);
4459 break;
4460 case 2:
4461 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4462 _sll(TReg5, TReg1, 16); // Position in the destination
4463 _lui(TReg2, Ctx->getConstantInt32(0xff00));
4464 _ori(TReg3, TReg2, 0xffff);
4465 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4466 _or(TDReg, TReg5, TReg4);
4467 _mov(DstE, TDReg);
4468 break;
4469 case 3:
4470 _sll(TReg1, Src1R, 24); // Position in the destination
4471 _sll(TReg2, SrcE, 8);
4472 _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4473 _or(TDReg, TReg1, TReg3);
4474 _mov(DstE, TDReg);
4475 break;
4476 default:
4477 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4478 break;
4479 }
4480 }
4481 // Write back temporary vector to the destination
4482 auto *Assign = InstAssign::create(Func, Dest, TDest);
4483 lowerAssign(Assign);
4484 return;
4485 }
4486 llvm::report_fatal_error("InsertElement requires a constant index");
4487 }
4488
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4489 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4490 Variable *Dest, Variable *Src0,
4491 Variable *Src1) {
4492 switch (Operation) {
4493 default:
4494 llvm::report_fatal_error("Unknown AtomicRMW operation");
4495 case Intrinsics::AtomicExchange:
4496 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4497 case Intrinsics::AtomicAdd:
4498 _addu(Dest, Src0, Src1);
4499 break;
4500 case Intrinsics::AtomicAnd:
4501 _and(Dest, Src0, Src1);
4502 break;
4503 case Intrinsics::AtomicSub:
4504 _subu(Dest, Src0, Src1);
4505 break;
4506 case Intrinsics::AtomicOr:
4507 _or(Dest, Src0, Src1);
4508 break;
4509 case Intrinsics::AtomicXor:
4510 _xor(Dest, Src0, Src1);
4511 break;
4512 }
4513 }
4514
lowerIntrinsic(const InstIntrinsic * Instr)4515 void TargetMIPS32::lowerIntrinsic(const InstIntrinsic *Instr) {
4516 Variable *Dest = Instr->getDest();
4517 Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4518
4519 Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
4520 switch (ID) {
4521 case Intrinsics::AtomicLoad: {
4522 assert(isScalarIntegerType(DestTy));
4523 // We require the memory address to be naturally aligned. Given that is the
4524 // case, then normal loads are atomic.
4525 if (!Intrinsics::isMemoryOrderValid(
4526 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4527 Func->setError("Unexpected memory ordering for AtomicLoad");
4528 return;
4529 }
4530 if (DestTy == IceType_i64) {
4531 llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4532 return;
4533 } else if (DestTy == IceType_i32) {
4534 auto *T1 = makeReg(DestTy);
4535 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4536 auto *Base = legalizeToReg(Instr->getArg(0));
4537 auto *Addr = formMemoryOperand(Base, DestTy);
4538 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4539 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4540 constexpr CfgNode *NoTarget = nullptr;
4541 _sync();
4542 Context.insert(Retry);
4543 _ll(T1, Addr);
4544 _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4545 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4546 _sc(RegAt, Addr);
4547 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4548 Context.insert(Exit);
4549 _sync();
4550 _mov(Dest, T1);
4551 Context.insert<InstFakeUse>(T1);
4552 } else {
4553 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4554 auto *Base = legalizeToReg(Instr->getArg(0));
4555 auto *T1 = makeReg(IceType_i32);
4556 auto *T2 = makeReg(IceType_i32);
4557 auto *T3 = makeReg(IceType_i32);
4558 auto *T4 = makeReg(IceType_i32);
4559 auto *T5 = makeReg(IceType_i32);
4560 auto *T6 = makeReg(IceType_i32);
4561 auto *SrcMask = makeReg(IceType_i32);
4562 auto *Tdest = makeReg(IceType_i32);
4563 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4564 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4565 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4566 constexpr CfgNode *NoTarget = nullptr;
4567 _sync();
4568 _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4569 _andi(T2, Base, 3); // Last two bits of the address
4570 _and(T3, Base, T1); // Align the address
4571 _sll(T4, T2, 3);
4572 _ori(T5, getZero(), Mask);
4573 _sllv(SrcMask, T5, T4); // Source mask
4574 auto *Addr = formMemoryOperand(T3, IceType_i32);
4575 Context.insert(Retry);
4576 _ll(T6, Addr);
4577 _and(Tdest, T6, SrcMask);
4578 _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4579 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4580 _sc(RegAt, Addr);
4581 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4582 Context.insert(Exit);
4583 auto *T7 = makeReg(IceType_i32);
4584 auto *T8 = makeReg(IceType_i32);
4585 _srlv(T7, Tdest, T4);
4586 _andi(T8, T7, Mask);
4587 _sync();
4588 _mov(Dest, T8);
4589 Context.insert<InstFakeUse>(T6);
4590 Context.insert<InstFakeUse>(SrcMask);
4591 }
4592 return;
4593 }
4594 case Intrinsics::AtomicStore: {
4595 // We require the memory address to be naturally aligned. Given that is the
4596 // case, then normal stores are atomic.
4597 if (!Intrinsics::isMemoryOrderValid(
4598 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4599 Func->setError("Unexpected memory ordering for AtomicStore");
4600 return;
4601 }
4602 auto *Val = Instr->getArg(0);
4603 auto Ty = Val->getType();
4604 if (Ty == IceType_i64) {
4605 llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4606 return;
4607 } else if (Ty == IceType_i32) {
4608 auto *Val = legalizeToReg(Instr->getArg(0));
4609 auto *Base = legalizeToReg(Instr->getArg(1));
4610 auto *Addr = formMemoryOperand(Base, Ty);
4611 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4612 constexpr CfgNode *NoTarget = nullptr;
4613 auto *T1 = makeReg(IceType_i32);
4614 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4615 _sync();
4616 Context.insert(Retry);
4617 _ll(T1, Addr);
4618 _mov(RegAt, Val);
4619 _sc(RegAt, Addr);
4620 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4621 Context.insert<InstFakeUse>(T1); // To keep LL alive
4622 _sync();
4623 } else {
4624 auto *Val = legalizeToReg(Instr->getArg(0));
4625 auto *Base = legalizeToReg(Instr->getArg(1));
4626 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4627 constexpr CfgNode *NoTarget = nullptr;
4628 auto *T1 = makeReg(IceType_i32);
4629 auto *T2 = makeReg(IceType_i32);
4630 auto *T3 = makeReg(IceType_i32);
4631 auto *T4 = makeReg(IceType_i32);
4632 auto *T5 = makeReg(IceType_i32);
4633 auto *T6 = makeReg(IceType_i32);
4634 auto *T7 = makeReg(IceType_i32);
4635 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4636 auto *SrcMask = makeReg(IceType_i32);
4637 auto *DstMask = makeReg(IceType_i32);
4638 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4639 _sync();
4640 _addiu(T1, getZero(), -4);
4641 _and(T7, Base, T1);
4642 auto *Addr = formMemoryOperand(T7, Ty);
4643 _andi(T2, Base, 3);
4644 _sll(T3, T2, 3);
4645 _ori(T4, getZero(), Mask);
4646 _sllv(T5, T4, T3);
4647 _sllv(T6, Val, T3);
4648 _nor(SrcMask, getZero(), T5);
4649 _and(DstMask, T6, T5);
4650 Context.insert(Retry);
4651 _ll(RegAt, Addr);
4652 _and(RegAt, RegAt, SrcMask);
4653 _or(RegAt, RegAt, DstMask);
4654 _sc(RegAt, Addr);
4655 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4656 Context.insert<InstFakeUse>(SrcMask);
4657 Context.insert<InstFakeUse>(DstMask);
4658 _sync();
4659 }
4660 return;
4661 }
4662 case Intrinsics::AtomicCmpxchg: {
4663 assert(isScalarIntegerType(DestTy));
4664 // We require the memory address to be naturally aligned. Given that is the
4665 // case, then normal loads are atomic.
4666 if (!Intrinsics::isMemoryOrderValid(
4667 ID, getConstantMemoryOrder(Instr->getArg(3)),
4668 getConstantMemoryOrder(Instr->getArg(4)))) {
4669 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4670 return;
4671 }
4672
4673 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4674 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4675 constexpr CfgNode *NoTarget = nullptr;
4676 auto *New = Instr->getArg(2);
4677 auto *Expected = Instr->getArg(1);
4678 auto *ActualAddress = Instr->getArg(0);
4679
4680 if (DestTy == IceType_i64) {
4681 llvm::report_fatal_error(
4682 "AtomicCmpxchg.i64 should have been prelowered.");
4683 return;
4684 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4685 auto *NewR = legalizeToReg(New);
4686 auto *ExpectedR = legalizeToReg(Expected);
4687 auto *ActualAddressR = legalizeToReg(ActualAddress);
4688 const uint32_t ShiftAmount =
4689 (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4690 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4691 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4692 auto *T1 = I32Reg();
4693 auto *T2 = I32Reg();
4694 auto *T3 = I32Reg();
4695 auto *T4 = I32Reg();
4696 auto *T5 = I32Reg();
4697 auto *T6 = I32Reg();
4698 auto *T7 = I32Reg();
4699 auto *T8 = I32Reg();
4700 auto *T9 = I32Reg();
4701 _addiu(RegAt, getZero(), -4);
4702 _and(T1, ActualAddressR, RegAt);
4703 auto *Addr = formMemoryOperand(T1, DestTy);
4704 _andi(RegAt, ActualAddressR, 3);
4705 _sll(T2, RegAt, 3);
4706 _ori(RegAt, getZero(), Mask);
4707 _sllv(T3, RegAt, T2);
4708 _nor(T4, getZero(), T3);
4709 _andi(RegAt, ExpectedR, Mask);
4710 _sllv(T5, RegAt, T2);
4711 _andi(RegAt, NewR, Mask);
4712 _sllv(T6, RegAt, T2);
4713 _sync();
4714 Context.insert(Retry);
4715 _ll(T7, Addr);
4716 _and(T8, T7, T3);
4717 _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4718 _and(RegAt, T7, T4);
4719 _or(T9, RegAt, T6);
4720 _sc(T9, Addr);
4721 _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4722 Context.insert<InstFakeUse>(getZero());
4723 Context.insert(Exit);
4724 _srlv(RegAt, T8, T2);
4725 _sll(RegAt, RegAt, ShiftAmount);
4726 _sra(RegAt, RegAt, ShiftAmount);
4727 _mov(Dest, RegAt);
4728 _sync();
4729 Context.insert<InstFakeUse>(T3);
4730 Context.insert<InstFakeUse>(T4);
4731 Context.insert<InstFakeUse>(T5);
4732 Context.insert<InstFakeUse>(T6);
4733 Context.insert<InstFakeUse>(T8);
4734 Context.insert<InstFakeUse>(ExpectedR);
4735 Context.insert<InstFakeUse>(NewR);
4736 } else {
4737 auto *T1 = I32Reg();
4738 auto *T2 = I32Reg();
4739 auto *NewR = legalizeToReg(New);
4740 auto *ExpectedR = legalizeToReg(Expected);
4741 auto *ActualAddressR = legalizeToReg(ActualAddress);
4742 _sync();
4743 Context.insert(Retry);
4744 _ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4745 _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4746 _mov(T2, NewR);
4747 _sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4748 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4749 Context.insert<InstFakeUse>(getZero());
4750 Context.insert(Exit);
4751 _mov(Dest, T1);
4752 _sync();
4753 Context.insert<InstFakeUse>(ExpectedR);
4754 Context.insert<InstFakeUse>(NewR);
4755 }
4756 return;
4757 }
4758 case Intrinsics::AtomicRMW: {
4759 assert(isScalarIntegerType(DestTy));
4760 // We require the memory address to be naturally aligned. Given that is the
4761 // case, then normal loads are atomic.
4762 if (!Intrinsics::isMemoryOrderValid(
4763 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4764 Func->setError("Unexpected memory ordering for AtomicRMW");
4765 return;
4766 }
4767
4768 constexpr CfgNode *NoTarget = nullptr;
4769 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4770 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4771 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4772 auto *New = Instr->getArg(2);
4773 auto *ActualAddress = Instr->getArg(1);
4774
4775 if (DestTy == IceType_i64) {
4776 llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4777 return;
4778 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4779 const uint32_t ShiftAmount =
4780 INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4781 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4782 auto *NewR = legalizeToReg(New);
4783 auto *ActualAddressR = legalizeToReg(ActualAddress);
4784 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4785 auto *T1 = I32Reg();
4786 auto *T2 = I32Reg();
4787 auto *T3 = I32Reg();
4788 auto *T4 = I32Reg();
4789 auto *T5 = I32Reg();
4790 auto *T6 = I32Reg();
4791 auto *T7 = I32Reg();
4792 _sync();
4793 _addiu(RegAt, getZero(), -4);
4794 _and(T1, ActualAddressR, RegAt);
4795 _andi(RegAt, ActualAddressR, 3);
4796 _sll(T2, RegAt, 3);
4797 _ori(RegAt, getZero(), Mask);
4798 _sllv(T3, RegAt, T2);
4799 _nor(T4, getZero(), T3);
4800 _sllv(T5, NewR, T2);
4801 Context.insert(Retry);
4802 _ll(T6, formMemoryOperand(T1, DestTy));
4803 if (Operation != Intrinsics::AtomicExchange) {
4804 createArithInst(Operation, RegAt, T6, T5);
4805 _and(RegAt, RegAt, T3);
4806 }
4807 _and(T7, T6, T4);
4808 if (Operation == Intrinsics::AtomicExchange) {
4809 _or(RegAt, T7, T5);
4810 } else {
4811 _or(RegAt, T7, RegAt);
4812 }
4813 _sc(RegAt, formMemoryOperand(T1, DestTy));
4814 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4815 Context.insert<InstFakeUse>(getZero());
4816 _and(RegAt, T6, T3);
4817 _srlv(RegAt, RegAt, T2);
4818 _sll(RegAt, RegAt, ShiftAmount);
4819 _sra(RegAt, RegAt, ShiftAmount);
4820 _mov(Dest, RegAt);
4821 _sync();
4822 Context.insert<InstFakeUse>(NewR);
4823 Context.insert<InstFakeUse>(Dest);
4824 } else {
4825 auto *T1 = I32Reg();
4826 auto *T2 = I32Reg();
4827 auto *NewR = legalizeToReg(New);
4828 auto *ActualAddressR = legalizeToReg(ActualAddress);
4829 _sync();
4830 Context.insert(Retry);
4831 _ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4832 if (Operation == Intrinsics::AtomicExchange) {
4833 _mov(T2, NewR);
4834 } else {
4835 createArithInst(Operation, T2, T1, NewR);
4836 }
4837 _sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4838 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4839 Context.insert<InstFakeUse>(getZero());
4840 _mov(Dest, T1);
4841 _sync();
4842 Context.insert<InstFakeUse>(NewR);
4843 Context.insert<InstFakeUse>(Dest);
4844 }
4845 return;
4846 }
4847 case Intrinsics::AtomicFence:
4848 case Intrinsics::AtomicFenceAll:
4849 assert(Dest == nullptr);
4850 _sync();
4851 return;
4852 case Intrinsics::AtomicIsLockFree: {
4853 Operand *ByteSize = Instr->getArg(0);
4854 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4855 auto *T = I32Reg();
4856 if (CI == nullptr) {
4857 // The PNaCl ABI requires the byte size to be a compile-time constant.
4858 Func->setError("AtomicIsLockFree byte size should be compile-time const");
4859 return;
4860 }
4861 static constexpr int32_t NotLockFree = 0;
4862 static constexpr int32_t LockFree = 1;
4863 int32_t Result = NotLockFree;
4864 switch (CI->getValue()) {
4865 case 1:
4866 case 2:
4867 case 4:
4868 Result = LockFree;
4869 break;
4870 }
4871 _addiu(T, getZero(), Result);
4872 _mov(Dest, T);
4873 return;
4874 }
4875 case Intrinsics::Bswap: {
4876 auto *Src = Instr->getArg(0);
4877 const Type SrcTy = Src->getType();
4878 assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4879 SrcTy == IceType_i64);
4880 switch (SrcTy) {
4881 case IceType_i16: {
4882 auto *T1 = I32Reg();
4883 auto *T2 = I32Reg();
4884 auto *T3 = I32Reg();
4885 auto *T4 = I32Reg();
4886 auto *SrcR = legalizeToReg(Src);
4887 _sll(T1, SrcR, 8);
4888 _lui(T2, Ctx->getConstantInt32(255));
4889 _and(T1, T1, T2);
4890 _sll(T3, SrcR, 24);
4891 _or(T1, T3, T1);
4892 _srl(T4, T1, 16);
4893 _mov(Dest, T4);
4894 return;
4895 }
4896 case IceType_i32: {
4897 auto *T1 = I32Reg();
4898 auto *T2 = I32Reg();
4899 auto *T3 = I32Reg();
4900 auto *T4 = I32Reg();
4901 auto *T5 = I32Reg();
4902 auto *SrcR = legalizeToReg(Src);
4903 _srl(T1, SrcR, 24);
4904 _srl(T2, SrcR, 8);
4905 _andi(T2, T2, 0xFF00);
4906 _or(T1, T2, T1);
4907 _sll(T4, SrcR, 8);
4908 _lui(T3, Ctx->getConstantInt32(255));
4909 _and(T4, T4, T3);
4910 _sll(T5, SrcR, 24);
4911 _or(T4, T5, T4);
4912 _or(T4, T4, T1);
4913 _mov(Dest, T4);
4914 return;
4915 }
4916 case IceType_i64: {
4917 auto *T1 = I32Reg();
4918 auto *T2 = I32Reg();
4919 auto *T3 = I32Reg();
4920 auto *T4 = I32Reg();
4921 auto *T5 = I32Reg();
4922 auto *T6 = I32Reg();
4923 auto *T7 = I32Reg();
4924 auto *T8 = I32Reg();
4925 auto *T9 = I32Reg();
4926 auto *T10 = I32Reg();
4927 auto *T11 = I32Reg();
4928 auto *T12 = I32Reg();
4929 auto *T13 = I32Reg();
4930 auto *T14 = I32Reg();
4931 auto *T15 = I32Reg();
4932 auto *T16 = I32Reg();
4933 auto *T17 = I32Reg();
4934 auto *T18 = I32Reg();
4935 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4936 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4937 Src = legalizeUndef(Src);
4938 auto *SrcLoR = legalizeToReg(loOperand(Src));
4939 auto *SrcHiR = legalizeToReg(hiOperand(Src));
4940 _sll(T1, SrcHiR, 8);
4941 _srl(T2, SrcHiR, 24);
4942 _srl(T3, SrcHiR, 8);
4943 _andi(T3, T3, 0xFF00);
4944 _lui(T4, Ctx->getConstantInt32(255));
4945 _or(T5, T3, T2);
4946 _and(T6, T1, T4);
4947 _sll(T7, SrcHiR, 24);
4948 _or(T8, T7, T6);
4949 _srl(T9, SrcLoR, 24);
4950 _srl(T10, SrcLoR, 8);
4951 _andi(T11, T10, 0xFF00);
4952 _or(T12, T8, T5);
4953 _or(T13, T11, T9);
4954 _sll(T14, SrcLoR, 8);
4955 _and(T15, T14, T4);
4956 _sll(T16, SrcLoR, 24);
4957 _or(T17, T16, T15);
4958 _or(T18, T17, T13);
4959 _mov(DestLo, T12);
4960 _mov(DestHi, T18);
4961 return;
4962 }
4963 default:
4964 llvm::report_fatal_error("Control flow should never have reached here.");
4965 }
4966 return;
4967 }
4968 case Intrinsics::Ctpop: {
4969 llvm::report_fatal_error("Ctpop should have been prelowered.");
4970 return;
4971 }
4972 case Intrinsics::Ctlz: {
4973 auto *Src = Instr->getArg(0);
4974 const Type SrcTy = Src->getType();
4975 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
4976 switch (SrcTy) {
4977 case IceType_i32: {
4978 auto *T = I32Reg();
4979 auto *SrcR = legalizeToReg(Src);
4980 _clz(T, SrcR);
4981 _mov(Dest, T);
4982 break;
4983 }
4984 case IceType_i64: {
4985 auto *T1 = I32Reg();
4986 auto *T2 = I32Reg();
4987 auto *T3 = I32Reg();
4988 auto *T4 = I32Reg();
4989 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4990 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4991 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
4992 Variable *SrcLoR = legalizeToReg(loOperand(Src));
4993 _clz(T1, SrcHiR);
4994 _clz(T2, SrcLoR);
4995 _addiu(T3, T2, 32);
4996 _movn(T3, T1, SrcHiR);
4997 _addiu(T4, getZero(), 0);
4998 _mov(DestHi, T4);
4999 _mov(DestLo, T3);
5000 break;
5001 }
5002 default:
5003 llvm::report_fatal_error("Control flow should never have reached here.");
5004 }
5005 break;
5006 }
5007 case Intrinsics::Cttz: {
5008 auto *Src = Instr->getArg(0);
5009 const Type SrcTy = Src->getType();
5010 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5011 switch (SrcTy) {
5012 case IceType_i32: {
5013 auto *T1 = I32Reg();
5014 auto *T2 = I32Reg();
5015 auto *T3 = I32Reg();
5016 auto *T4 = I32Reg();
5017 auto *T5 = I32Reg();
5018 auto *T6 = I32Reg();
5019 auto *SrcR = legalizeToReg(Src);
5020 _addiu(T1, SrcR, -1);
5021 _not(T2, SrcR);
5022 _and(T3, T2, T1);
5023 _clz(T4, T3);
5024 _addiu(T5, getZero(), 32);
5025 _subu(T6, T5, T4);
5026 _mov(Dest, T6);
5027 break;
5028 }
5029 case IceType_i64: {
5030 auto *THi1 = I32Reg();
5031 auto *THi2 = I32Reg();
5032 auto *THi3 = I32Reg();
5033 auto *THi4 = I32Reg();
5034 auto *THi5 = I32Reg();
5035 auto *THi6 = I32Reg();
5036 auto *TLo1 = I32Reg();
5037 auto *TLo2 = I32Reg();
5038 auto *TLo3 = I32Reg();
5039 auto *TLo4 = I32Reg();
5040 auto *TLo5 = I32Reg();
5041 auto *TLo6 = I32Reg();
5042 auto *TResHi = I32Reg();
5043 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5044 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5045 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5046 Variable *SrcLoR = legalizeToReg(loOperand(Src));
5047 _addiu(THi1, SrcHiR, -1);
5048 _not(THi2, SrcHiR);
5049 _and(THi3, THi2, THi1);
5050 _clz(THi4, THi3);
5051 _addiu(THi5, getZero(), 64);
5052 _subu(THi6, THi5, THi4);
5053 _addiu(TLo1, SrcLoR, -1);
5054 _not(TLo2, SrcLoR);
5055 _and(TLo3, TLo2, TLo1);
5056 _clz(TLo4, TLo3);
5057 _addiu(TLo5, getZero(), 32);
5058 _subu(TLo6, TLo5, TLo4);
5059 _movn(THi6, TLo6, SrcLoR);
5060 _addiu(TResHi, getZero(), 0);
5061 _mov(DestHi, TResHi);
5062 _mov(DestLo, THi6);
5063 break;
5064 }
5065 default:
5066 llvm::report_fatal_error("Control flow should never have reached here.");
5067 }
5068 return;
5069 }
5070 case Intrinsics::Fabs: {
5071 if (isScalarFloatingType(DestTy)) {
5072 Variable *T = makeReg(DestTy);
5073 if (DestTy == IceType_f32) {
5074 _abs_s(T, legalizeToReg(Instr->getArg(0)));
5075 } else {
5076 _abs_d(T, legalizeToReg(Instr->getArg(0)));
5077 }
5078 _mov(Dest, T);
5079 }
5080 return;
5081 }
5082 case Intrinsics::Longjmp: {
5083 llvm::report_fatal_error("longjmp should have been prelowered.");
5084 return;
5085 }
5086 case Intrinsics::Memcpy: {
5087 llvm::report_fatal_error("memcpy should have been prelowered.");
5088 return;
5089 }
5090 case Intrinsics::Memmove: {
5091 llvm::report_fatal_error("memmove should have been prelowered.");
5092 return;
5093 }
5094 case Intrinsics::Memset: {
5095 llvm::report_fatal_error("memset should have been prelowered.");
5096 return;
5097 }
5098 case Intrinsics::Setjmp: {
5099 llvm::report_fatal_error("setjmp should have been prelowered.");
5100 return;
5101 }
5102 case Intrinsics::Sqrt: {
5103 if (isScalarFloatingType(DestTy)) {
5104 Variable *T = makeReg(DestTy);
5105 if (DestTy == IceType_f32) {
5106 _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5107 } else {
5108 _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5109 }
5110 _mov(Dest, T);
5111 } else {
5112 UnimplementedLoweringError(this, Instr);
5113 }
5114 return;
5115 }
5116 case Intrinsics::Stacksave: {
5117 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5118 _mov(Dest, SP);
5119 return;
5120 }
5121 case Intrinsics::Stackrestore: {
5122 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5123 Variable *Val = legalizeToReg(Instr->getArg(0));
5124 _mov(SP, Val);
5125 return;
5126 }
5127 case Intrinsics::Trap: {
5128 const uint32_t TrapCodeZero = 0;
5129 _teq(getZero(), getZero(), TrapCodeZero);
5130 return;
5131 }
5132 case Intrinsics::LoadSubVector: {
5133 UnimplementedLoweringError(this, Instr);
5134 return;
5135 }
5136 case Intrinsics::StoreSubVector: {
5137 UnimplementedLoweringError(this, Instr);
5138 return;
5139 }
5140 default: // UnknownIntrinsic
5141 Func->setError("Unexpected intrinsic");
5142 return;
5143 }
5144 return;
5145 }
5146
lowerLoad(const InstLoad * Instr)5147 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5148 // A Load instruction can be treated the same as an Assign instruction, after
5149 // the source operand is transformed into an OperandMIPS32Mem operand.
5150 Type Ty = Instr->getDest()->getType();
5151 Operand *Src0 = formMemoryOperand(Instr->getLoadAddress(), Ty);
5152 Variable *DestLoad = Instr->getDest();
5153 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5154 lowerAssign(Assign);
5155 }
5156
5157 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5158 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5159 const Inst *Reason) {
5160 if (!BuildDefs::dump())
5161 return;
5162 if (!Func->isVerbose(IceV_AddrOpt))
5163 return;
5164 OstreamLocker _(Func->getContext());
5165 Ostream &Str = Func->getContext()->getStrDump();
5166 Str << "Instruction: ";
5167 Reason->dumpDecorated(Func);
5168 Str << " results in Base=";
5169 if (Base)
5170 Base->dump(Func);
5171 else
5172 Str << "<null>";
5173 Str << ", Offset=" << Offset << "\n";
5174 }
5175
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5176 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5177 int32_t *Offset, const Inst **Reason) {
5178 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5179 if (*Var == nullptr)
5180 return false;
5181 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5182 if (!VarAssign)
5183 return false;
5184 assert(!VMetadata->isMultiDef(*Var));
5185 if (!llvm::isa<InstAssign>(VarAssign))
5186 return false;
5187
5188 Operand *SrcOp = VarAssign->getSrc(0);
5189 bool Optimized = false;
5190 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5191 if (!VMetadata->isMultiDef(SrcVar) ||
5192 // TODO: ensure SrcVar stays single-BB
5193 false) {
5194 Optimized = true;
5195 *Var = SrcVar;
5196 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5197 int32_t MoreOffset = Const->getValue();
5198 int32_t NewOffset = MoreOffset + *Offset;
5199 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5200 return false;
5201 *Var = nullptr;
5202 *Offset += NewOffset;
5203 Optimized = true;
5204 }
5205 }
5206
5207 if (Optimized) {
5208 *Reason = VarAssign;
5209 }
5210
5211 return Optimized;
5212 }
5213
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5214 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5215 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5216 switch (Arith->getOp()) {
5217 default:
5218 return false;
5219 case InstArithmetic::Add:
5220 case InstArithmetic::Sub:
5221 *Kind = Arith->getOp();
5222 return true;
5223 }
5224 }
5225 return false;
5226 }
5227
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5228 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5229 int32_t *Offset, const Inst **Reason) {
5230 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5231 // set Base=Var, Offset+=Const
5232 // Base is Base=Var-Const ==>
5233 // set Base=Var, Offset-=Const
5234 if (*Base == nullptr)
5235 return false;
5236 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5237 if (BaseInst == nullptr) {
5238 return false;
5239 }
5240 assert(!VMetadata->isMultiDef(*Base));
5241
5242 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5243 if (ArithInst == nullptr)
5244 return false;
5245 InstArithmetic::OpKind Kind;
5246 if (!isAddOrSub(ArithInst, &Kind))
5247 return false;
5248 bool IsAdd = Kind == InstArithmetic::Add;
5249 Operand *Src0 = ArithInst->getSrc(0);
5250 Operand *Src1 = ArithInst->getSrc(1);
5251 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5252 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5253 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5254 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5255 Variable *NewBase = nullptr;
5256 int32_t NewOffset = *Offset;
5257
5258 if (Var0 == nullptr && Const0 == nullptr) {
5259 assert(llvm::isa<ConstantRelocatable>(Src0));
5260 return false;
5261 }
5262
5263 if (Var1 == nullptr && Const1 == nullptr) {
5264 assert(llvm::isa<ConstantRelocatable>(Src1));
5265 return false;
5266 }
5267
5268 if (Var0 && Var1)
5269 // TODO(jpp): merge base/index splitting into here.
5270 return false;
5271 if (!IsAdd && Var1)
5272 return false;
5273 if (Var0)
5274 NewBase = Var0;
5275 else if (Var1)
5276 NewBase = Var1;
5277 // Compute the updated constant offset.
5278 if (Const0) {
5279 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5280 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5281 return false;
5282 NewOffset += MoreOffset;
5283 }
5284 if (Const1) {
5285 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5286 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5287 return false;
5288 NewOffset += MoreOffset;
5289 }
5290
5291 // Update the computed address parameters once we are sure optimization
5292 // is valid.
5293 *Base = NewBase;
5294 *Offset = NewOffset;
5295 *Reason = BaseInst;
5296 return true;
5297 }
5298 } // end of anonymous namespace
5299
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5300 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5301 const Inst *LdSt,
5302 Operand *Base) {
5303 assert(Base != nullptr);
5304 int32_t OffsetImm = 0;
5305
5306 Func->resetCurrentNode();
5307 if (Func->isVerbose(IceV_AddrOpt)) {
5308 OstreamLocker _(Func->getContext());
5309 Ostream &Str = Func->getContext()->getStrDump();
5310 Str << "\nAddress mode formation:\t";
5311 LdSt->dumpDecorated(Func);
5312 }
5313
5314 if (isVectorType(Ty)) {
5315 return nullptr;
5316 }
5317
5318 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5319 if (BaseVar == nullptr)
5320 return nullptr;
5321
5322 const VariablesMetadata *VMetadata = Func->getVMetadata();
5323 const Inst *Reason = nullptr;
5324
5325 do {
5326 if (Reason != nullptr) {
5327 dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5328 Reason = nullptr;
5329 }
5330
5331 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5332 continue;
5333 }
5334
5335 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5336 continue;
5337 }
5338 } while (Reason);
5339
5340 if (BaseVar == nullptr) {
5341 // We need base register rather than just OffsetImm. Move the OffsetImm to
5342 // BaseVar and form 0(BaseVar) addressing.
5343 const Type PointerType = getPointerType();
5344 BaseVar = makeReg(PointerType);
5345 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5346 OffsetImm = 0;
5347 } else if (OffsetImm != 0) {
5348 // If the OffsetImm is more than signed 16-bit value then add it in the
5349 // BaseVar and form 0(BaseVar) addressing.
5350 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5351 const InstArithmetic::OpKind Op =
5352 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5353 constexpr bool ZeroExt = false;
5354 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5355 const Type PointerType = getPointerType();
5356 Variable *T = makeReg(PointerType);
5357 Context.insert<InstArithmetic>(Op, T, BaseVar,
5358 Ctx->getConstantInt32(PositiveOffset));
5359 BaseVar = T;
5360 OffsetImm = 0;
5361 }
5362 }
5363
5364 assert(BaseVar != nullptr);
5365 assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5366 : (OffsetImm & 0x0000ffff) == OffsetImm);
5367
5368 return OperandMIPS32Mem::create(
5369 Func, Ty, BaseVar,
5370 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5371 }
5372
doAddressOptLoad()5373 void TargetMIPS32::doAddressOptLoad() {
5374 Inst *Instr = iteratorToInst(Context.getCur());
5375 assert(llvm::isa<InstLoad>(Instr));
5376 Variable *Dest = Instr->getDest();
5377 Operand *Addr = Instr->getSrc(0);
5378 if (OperandMIPS32Mem *Mem =
5379 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5380 Instr->setDeleted();
5381 Context.insert<InstLoad>(Dest, Mem);
5382 }
5383 }
5384
lowerPhi(const InstPhi *)5385 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5386 Func->setError("Phi found in regular instruction list");
5387 }
5388
lowerRet(const InstRet * Instr)5389 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5390 Variable *Reg = nullptr;
5391 if (Instr->hasRetValue()) {
5392 Operand *Src0 = Instr->getRetValue();
5393 switch (Src0->getType()) {
5394 case IceType_f32: {
5395 Operand *Src0F = legalizeToReg(Src0);
5396 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5397 _mov(Reg, Src0F);
5398 break;
5399 }
5400 case IceType_f64: {
5401 Operand *Src0F = legalizeToReg(Src0);
5402 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5403 _mov(Reg, Src0F);
5404 break;
5405 }
5406 case IceType_i1:
5407 case IceType_i8:
5408 case IceType_i16:
5409 case IceType_i32: {
5410 Operand *Src0F = legalizeToReg(Src0);
5411 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5412 _mov(Reg, Src0F);
5413 break;
5414 }
5415 case IceType_i64: {
5416 Src0 = legalizeUndef(Src0);
5417 Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5418 Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5419 Reg = R0;
5420 Context.insert<InstFakeUse>(R1);
5421 break;
5422 }
5423 case IceType_v4i1:
5424 case IceType_v8i1:
5425 case IceType_v16i1:
5426 case IceType_v16i8:
5427 case IceType_v8i16:
5428 case IceType_v4i32: {
5429 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5430 Variable *V0 =
5431 legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5432 Variable *V1 =
5433 legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5434 Variable *A0 =
5435 legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5436 Variable *A1 =
5437 legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5438 Reg = V0;
5439 Context.insert<InstFakeUse>(V1);
5440 Context.insert<InstFakeUse>(A0);
5441 Context.insert<InstFakeUse>(A1);
5442 break;
5443 }
5444 case IceType_v4f32: {
5445 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5446 Reg = getImplicitRet();
5447 auto *RegT = legalizeToReg(Reg);
5448 // Return the vector through buffer in implicit argument a0
5449 for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5450 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5451 Func, IceType_f32, RegT,
5452 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5453 Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5454 _sw(Var, Mem);
5455 }
5456 Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5457 _mov(V0, Reg); // move v0,a0
5458 Context.insert<InstFakeUse>(Reg);
5459 Context.insert<InstFakeUse>(V0);
5460 break;
5461 }
5462 default:
5463 llvm::report_fatal_error("Ret: Invalid type.");
5464 break;
5465 }
5466 }
5467 _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5468 }
5469
lowerSelect(const InstSelect * Instr)5470 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5471 Variable *Dest = Instr->getDest();
5472 const Type DestTy = Dest->getType();
5473
5474 if (isVectorType(DestTy)) {
5475 llvm::report_fatal_error("Select: Destination type is vector");
5476 return;
5477 }
5478
5479 Variable *DestR = nullptr;
5480 Variable *DestHiR = nullptr;
5481 Variable *SrcTR = nullptr;
5482 Variable *SrcTHiR = nullptr;
5483 Variable *SrcFR = nullptr;
5484 Variable *SrcFHiR = nullptr;
5485
5486 if (DestTy == IceType_i64) {
5487 DestR = llvm::cast<Variable>(loOperand(Dest));
5488 DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5489 SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5490 SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5491 SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5492 SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5493 } else {
5494 SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5495 SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5496 }
5497
5498 Variable *ConditionR = legalizeToReg(Instr->getCondition());
5499
5500 assert(Instr->getCondition()->getType() == IceType_i1);
5501
5502 switch (DestTy) {
5503 case IceType_i1:
5504 case IceType_i8:
5505 case IceType_i16:
5506 case IceType_i32:
5507 _movn(SrcFR, SrcTR, ConditionR);
5508 _mov(Dest, SrcFR);
5509 break;
5510 case IceType_i64:
5511 _movn(SrcFR, SrcTR, ConditionR);
5512 _movn(SrcFHiR, SrcTHiR, ConditionR);
5513 _mov(DestR, SrcFR);
5514 _mov(DestHiR, SrcFHiR);
5515 break;
5516 case IceType_f32:
5517 _movn_s(SrcFR, SrcTR, ConditionR);
5518 _mov(Dest, SrcFR);
5519 break;
5520 case IceType_f64:
5521 _movn_d(SrcFR, SrcTR, ConditionR);
5522 _mov(Dest, SrcFR);
5523 break;
5524 default:
5525 llvm::report_fatal_error("Select: Invalid type.");
5526 }
5527 }
5528
lowerShuffleVector(const InstShuffleVector * Instr)5529 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5530 UnimplementedLoweringError(this, Instr);
5531 }
5532
lowerStore(const InstStore * Instr)5533 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5534 Operand *Value = Instr->getData();
5535 Operand *Addr = Instr->getStoreAddress();
5536 OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5537 Type Ty = NewAddr->getType();
5538
5539 if (Ty == IceType_i64) {
5540 Value = legalizeUndef(Value);
5541 Variable *ValueHi = legalizeToReg(hiOperand(Value));
5542 Variable *ValueLo = legalizeToReg(loOperand(Value));
5543 _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5544 _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5545 } else if (isVectorType(Value->getType())) {
5546 auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5547 for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5548 auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5549 auto *MCont = llvm::cast<OperandMIPS32Mem>(
5550 getOperandAtIndex(NewAddr, IceType_i32, i));
5551 _sw(DCont, MCont);
5552 }
5553 } else {
5554 Variable *ValueR = legalizeToReg(Value);
5555 _sw(ValueR, NewAddr);
5556 }
5557 }
5558
doAddressOptStore()5559 void TargetMIPS32::doAddressOptStore() {
5560 Inst *Instr = iteratorToInst(Context.getCur());
5561 assert(llvm::isa<InstStore>(Instr));
5562 Operand *Src = Instr->getSrc(0);
5563 Operand *Addr = Instr->getSrc(1);
5564 if (OperandMIPS32Mem *Mem =
5565 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5566 Instr->setDeleted();
5567 Context.insert<InstStore>(Src, Mem);
5568 }
5569 }
5570
lowerSwitch(const InstSwitch * Instr)5571 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5572 Operand *Src = Instr->getComparison();
5573 SizeT NumCases = Instr->getNumCases();
5574 if (Src->getType() == IceType_i64) {
5575 Src = legalizeUndef(Src);
5576 Variable *Src0Lo = legalizeToReg(loOperand(Src));
5577 Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5578 for (SizeT I = 0; I < NumCases; ++I) {
5579 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5580 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5581 CfgNode *TargetTrue = Instr->getLabel(I);
5582 constexpr CfgNode *NoTarget = nullptr;
5583 ValueHi = legalizeToReg(ValueHi);
5584 InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5585 _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5586 CondMIPS32::Cond::NE);
5587 ValueLo = legalizeToReg(ValueLo);
5588 _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5589 Context.insert(IntraLabel);
5590 }
5591 _br(Instr->getLabelDefault());
5592 return;
5593 }
5594 Variable *SrcVar = legalizeToReg(Src);
5595 assert(SrcVar->mustHaveReg());
5596 for (SizeT I = 0; I < NumCases; ++I) {
5597 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5598 CfgNode *TargetTrue = Instr->getLabel(I);
5599 constexpr CfgNode *NoTargetFalse = nullptr;
5600 Value = legalizeToReg(Value);
5601 _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5602 }
5603 _br(Instr->getLabelDefault());
5604 }
5605
lowerBreakpoint(const InstBreakpoint * Instr)5606 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5607 UnimplementedLoweringError(this, Instr);
5608 }
5609
lowerUnreachable(const InstUnreachable *)5610 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5611 const uint32_t TrapCodeZero = 0;
5612 _teq(getZero(), getZero(), TrapCodeZero);
5613 }
5614
lowerOther(const Inst * Instr)5615 void TargetMIPS32::lowerOther(const Inst *Instr) {
5616 if (llvm::isa<InstMIPS32Sync>(Instr)) {
5617 _sync();
5618 } else {
5619 TargetLowering::lowerOther(Instr);
5620 }
5621 }
5622
5623 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5624 // integrity of liveness analysis. Undef values are also turned into zeroes,
5625 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5626 void TargetMIPS32::prelowerPhis() {
5627 PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5628 }
5629
postLower()5630 void TargetMIPS32::postLower() {
5631 if (Func->getOptLevel() == Opt_m1)
5632 return;
5633 markRedefinitions();
5634 Context.availabilityUpdate();
5635 }
5636
5637 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5638 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5639 llvm_unreachable("Not expecting to emitWithoutDollar undef");
5640 }
5641
5642 void ConstantUndef::emit(GlobalContext *) const {
5643 llvm_unreachable("undef value encountered by emitter.");
5644 }
5645 */
5646
TargetDataMIPS32(GlobalContext * Ctx)5647 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5648 : TargetDataLowering(Ctx) {}
5649
5650 // Generate .MIPS.abiflags section. This section contains a versioned data
5651 // structure with essential information required for loader to determine the
5652 // requirements of the application.
emitTargetRODataSections()5653 void TargetDataMIPS32::emitTargetRODataSections() {
5654 struct MipsABIFlagsSection Flags;
5655 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5656 const std::string Name = ".MIPS.abiflags";
5657 const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5658 const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5659 const llvm::ELF::Elf64_Xword ShAddralign = 8;
5660 const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5661 Writer->writeTargetRODataSection(
5662 Name, ShType, ShFlags, ShAddralign, ShEntsize,
5663 llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5664 }
5665
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5666 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5667 const std::string &SectionSuffix) {
5668 const bool IsPIC = false;
5669 switch (getFlags().getOutFileType()) {
5670 case FT_Elf: {
5671 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5672 Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5673 } break;
5674 case FT_Asm:
5675 case FT_Iasm: {
5676 OstreamLocker L(Ctx);
5677 for (const VariableDeclaration *Var : Vars) {
5678 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5679 emitGlobal(*Var, SectionSuffix);
5680 }
5681 }
5682 } break;
5683 }
5684 }
5685
5686 namespace {
5687 template <typename T> struct ConstantPoolEmitterTraits;
5688
5689 static_assert(sizeof(uint64_t) == 8,
5690 "uint64_t is supposed to be 8 bytes wide.");
5691
5692 // TODO(jaydeep.patil): implement the following when implementing constant
5693 // randomization:
5694 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
5695 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
5696 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
5697 template <> struct ConstantPoolEmitterTraits<float> {
5698 using ConstantType = ConstantFloat;
5699 static constexpr Type IceType = IceType_f32;
5700 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5701 // about them being constexpr.
5702 static const char AsmTag[];
5703 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon5724c8200611::ConstantPoolEmitterTraits5704 static uint64_t bitcastToUint64(float Value) {
5705 static_assert(sizeof(Value) == sizeof(uint32_t),
5706 "Float should be 4 bytes.");
5707 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5708 return static_cast<uint64_t>(IntValue);
5709 }
5710 };
5711 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5712 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5713
5714 template <> struct ConstantPoolEmitterTraits<double> {
5715 using ConstantType = ConstantDouble;
5716 static constexpr Type IceType = IceType_f64;
5717 static const char AsmTag[];
5718 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon5724c8200611::ConstantPoolEmitterTraits5719 static uint64_t bitcastToUint64(double Value) {
5720 static_assert(sizeof(double) == sizeof(uint64_t),
5721 "Double should be 8 bytes.");
5722 return Utils::bitCopy<uint64_t>(Value);
5723 }
5724 };
5725 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5726 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5727
5728 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5729 void emitConstant(
5730 Ostream &Str,
5731 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5732 if (!BuildDefs::dump())
5733 return;
5734 using Traits = ConstantPoolEmitterTraits<T>;
5735 Str << Const->getLabelName();
5736 T Value = Const->getValue();
5737 Str << ":\n\t" << Traits::AsmTag << "\t0x";
5738 Str.write_hex(Traits::bitcastToUint64(Value));
5739 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5740 }
5741
emitConstantPool(GlobalContext * Ctx)5742 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5743 if (!BuildDefs::dump())
5744 return;
5745 using Traits = ConstantPoolEmitterTraits<T>;
5746 static constexpr size_t MinimumAlignment = 4;
5747 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5748 assert((Align % 4) == 0 && "Constants should be aligned");
5749 Ostream &Str = Ctx->getStrEmit();
5750 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5751 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5752 << "\n"
5753 << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5754 for (Constant *C : Pool) {
5755 if (!C->getShouldBePooled()) {
5756 continue;
5757 }
5758 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5759 }
5760 }
5761 } // end of anonymous namespace
5762
lowerConstants()5763 void TargetDataMIPS32::lowerConstants() {
5764 if (getFlags().getDisableTranslation())
5765 return;
5766 switch (getFlags().getOutFileType()) {
5767 case FT_Elf: {
5768 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5769 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5770 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5771 } break;
5772 case FT_Asm:
5773 case FT_Iasm: {
5774 OstreamLocker _(Ctx);
5775 emitConstantPool<float>(Ctx);
5776 emitConstantPool<double>(Ctx);
5777 break;
5778 }
5779 }
5780 }
5781
lowerJumpTables()5782 void TargetDataMIPS32::lowerJumpTables() {
5783 if (getFlags().getDisableTranslation())
5784 return;
5785 }
5786
5787 // Helper for legalize() to emit the right code to lower an operand to a
5788 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5789 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5790 Type Ty = Src->getType();
5791 Variable *Reg = makeReg(Ty, RegNum);
5792 if (isVectorType(Ty)) {
5793 llvm::report_fatal_error("Invalid copy from vector type.");
5794 } else {
5795 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5796 _lw(Reg, Mem);
5797 } else {
5798 _mov(Reg, Src);
5799 }
5800 }
5801 return Reg;
5802 }
5803
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5804 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5805 RegNumT RegNum) {
5806 Type Ty = From->getType();
5807 // Assert that a physical register is allowed. To date, all calls
5808 // to legalize() allow a physical register. Legal_Flex converts
5809 // registers to the right type OperandMIPS32FlexReg as needed.
5810 assert(Allowed & Legal_Reg);
5811
5812 if (RegNum.hasNoValue()) {
5813 if (Variable *Subst = getContext().availabilityGet(From)) {
5814 // At this point we know there is a potential substitution available.
5815 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5816 !Subst->hasReg()) {
5817 // At this point we know the substitution will have a register.
5818 if (From->getType() == Subst->getType()) {
5819 // At this point we know the substitution's register is compatible.
5820 return Subst;
5821 }
5822 }
5823 }
5824 }
5825
5826 // Go through the various types of operands:
5827 // OperandMIPS32Mem, Constant, and Variable.
5828 // Given the above assertion, if type of operand is not legal
5829 // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5830 // to a register.
5831 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5832 // Base must be in a physical register.
5833 Variable *Base = Mem->getBase();
5834 ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5835 Variable *RegBase = nullptr;
5836 assert(Base);
5837
5838 RegBase = llvm::cast<Variable>(
5839 legalize(Base, Legal_Reg | Legal_Rematerializable));
5840
5841 if (Offset != nullptr && Offset->getValue() != 0) {
5842 static constexpr bool ZeroExt = false;
5843 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5844 llvm::report_fatal_error("Invalid memory offset.");
5845 }
5846 }
5847
5848 // Create a new operand if there was a change.
5849 if (Base != RegBase) {
5850 Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5851 Mem->getAddrMode());
5852 }
5853
5854 if (Allowed & Legal_Mem) {
5855 From = Mem;
5856 } else {
5857 Variable *Reg = makeReg(Ty, RegNum);
5858 _lw(Reg, Mem);
5859 From = Reg;
5860 }
5861 return From;
5862 }
5863
5864 if (llvm::isa<Constant>(From)) {
5865 if (llvm::isa<ConstantUndef>(From)) {
5866 From = legalizeUndef(From, RegNum);
5867 if (isVectorType(Ty))
5868 return From;
5869 }
5870 if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5871 Variable *Reg = makeReg(Ty, RegNum);
5872 Variable *TReg = makeReg(Ty, RegNum);
5873 _lui(TReg, C, RO_Hi);
5874 _addiu(Reg, TReg, C, RO_Lo);
5875 return Reg;
5876 } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5877 const uint32_t Value = C32->getValue();
5878 // Use addiu if the immediate is a 16bit value. Otherwise load it
5879 // using a lui-ori instructions.
5880 Variable *Reg = makeReg(Ty, RegNum);
5881 if (isInt<16>(int32_t(Value))) {
5882 Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5883 Context.insert<InstFakeDef>(Zero);
5884 _addiu(Reg, Zero, Value);
5885 } else {
5886 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5887 uint32_t LowerBits = Value & 0xFFFF;
5888 if (LowerBits) {
5889 Variable *TReg = makeReg(Ty, RegNum);
5890 _lui(TReg, Ctx->getConstantInt32(UpperBits));
5891 _ori(Reg, TReg, LowerBits);
5892 } else {
5893 _lui(Reg, Ctx->getConstantInt32(UpperBits));
5894 }
5895 }
5896 return Reg;
5897 } else if (isScalarFloatingType(Ty)) {
5898 auto *CFrom = llvm::cast<Constant>(From);
5899 Variable *TReg = makeReg(Ty);
5900 if (!CFrom->getShouldBePooled()) {
5901 // Float/Double constant 0 is not pooled.
5902 Context.insert<InstFakeDef>(TReg);
5903 _mov(TReg, getZero());
5904 } else {
5905 // Load floats/doubles from literal pool.
5906 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5907 Variable *TReg1 = makeReg(getPointerType());
5908 _lui(TReg1, Offset, RO_Hi);
5909 OperandMIPS32Mem *Addr =
5910 OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5911 if (Ty == IceType_f32)
5912 _lwc1(TReg, Addr, RO_Lo);
5913 else
5914 _ldc1(TReg, Addr, RO_Lo);
5915 }
5916 return copyToReg(TReg, RegNum);
5917 }
5918 }
5919
5920 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5921 if (Var->isRematerializable()) {
5922 if (Allowed & Legal_Rematerializable) {
5923 return From;
5924 }
5925
5926 Variable *T = makeReg(Var->getType(), RegNum);
5927 _mov(T, Var);
5928 return T;
5929 }
5930 // Check if the variable is guaranteed a physical register. This
5931 // can happen either when the variable is pre-colored or when it is
5932 // assigned infinite weight.
5933 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5934 // We need a new physical register for the operand if:
5935 // Mem is not allowed and Var isn't guaranteed a physical
5936 // register, or
5937 // RegNum is required and Var->getRegNum() doesn't match.
5938 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5939 (RegNum.hasValue() && RegNum != Var->getRegNum())) {
5940 From = copyToReg(From, RegNum);
5941 }
5942 return From;
5943 }
5944 return From;
5945 }
5946
5947 namespace BoolFolding {
5948 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
5949 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)5950 bool shouldTrackProducer(const Inst &Instr) {
5951 return Instr.getKind() == Inst::Icmp;
5952 }
5953
isValidConsumer(const Inst & Instr)5954 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
5955 } // end of namespace BoolFolding
5956
recordProducers(CfgNode * Node)5957 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
5958 for (Inst &Instr : Node->getInsts()) {
5959 if (Instr.isDeleted())
5960 continue;
5961 // Check whether Instr is a valid producer.
5962 Variable *Dest = Instr.getDest();
5963 if (Dest // only consider instructions with an actual dest var; and
5964 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
5965 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
5966 KnownComputations.emplace(Dest->getIndex(),
5967 ComputationEntry(&Instr, IceType_i1));
5968 }
5969 // Check each src variable against the map.
5970 FOREACH_VAR_IN_INST(Var, Instr) {
5971 SizeT VarNum = Var->getIndex();
5972 auto ComputationIter = KnownComputations.find(VarNum);
5973 if (ComputationIter == KnownComputations.end()) {
5974 continue;
5975 }
5976
5977 ++ComputationIter->second.NumUses;
5978 switch (ComputationIter->second.ComputationType) {
5979 default:
5980 KnownComputations.erase(VarNum);
5981 continue;
5982 case IceType_i1:
5983 if (!BoolFolding::isValidConsumer(Instr)) {
5984 KnownComputations.erase(VarNum);
5985 continue;
5986 }
5987 break;
5988 }
5989
5990 if (Instr.isLastUse(Var)) {
5991 ComputationIter->second.IsLiveOut = false;
5992 }
5993 }
5994 }
5995
5996 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
5997 Iter != End;) {
5998 // Disable the folding if its dest may be live beyond this block.
5999 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6000 Iter = KnownComputations.erase(Iter);
6001 continue;
6002 }
6003
6004 // Mark as "dead" rather than outright deleting. This is so that other
6005 // peephole style optimizations during or before lowering have access to
6006 // this instruction in undeleted form. See for example
6007 // tryOptimizedCmpxchgCmpBr().
6008 Iter->second.Instr->setDead();
6009 ++Iter;
6010 }
6011 }
6012
TargetHeaderMIPS32(GlobalContext * Ctx)6013 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6014 : TargetHeaderLowering(Ctx) {}
6015
lower()6016 void TargetHeaderMIPS32::lower() {
6017 if (!BuildDefs::dump())
6018 return;
6019 OstreamLocker L(Ctx);
6020 Ostream &Str = Ctx->getStrEmit();
6021 Str << "\t.set\t"
6022 << "nomicromips\n";
6023 Str << "\t.set\t"
6024 << "nomips16\n";
6025 Str << "\t.set\t"
6026 << "noat\n";
6027 }
6028
6029 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6030 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6031 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6032
6033 } // end of namespace MIPS32
6034 } // end of namespace Ice
6035