• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //                        The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "IceTargetLoweringMIPS32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32 
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35   return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37 
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40   return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42 
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47 
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49   ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51 
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53   return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55 
getPointerType()56 ::Ice::Type getPointerType() {
57   return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59 
60 } // end of namespace MIPS32
61 
62 namespace Ice {
63 namespace MIPS32 {
64 
65 using llvm::isInt;
66 
67 namespace {
68 
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71 
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74 
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76 
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79 
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81   auto ClassNum = static_cast<RegClassMIPS32>(C);
82   assert(ClassNum < RCMIPS32_NUM);
83   switch (ClassNum) {
84   default:
85     assert(C < RC_Target);
86     return regClassString(C);
87     // Add handling of new register classes below.
88   }
89 }
90 
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93 
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97   size_t typeAlignInBytes = typeWidthInBytes(Ty);
98   // Vectors are stored on stack with the same alignment as that of int type
99   if (isVectorType(Ty))
100     typeAlignInBytes = typeWidthInBytes(IceType_i64);
101   return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103 
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107   return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109 
110 } // end of anonymous namespace
111 
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func) : TargetLowering(Func) {}
113 
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)114 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
115                                        size_t SpillAreaPaddingBytes,
116                                        size_t SpillAreaSizeBytes,
117                                        size_t GlobalsAndSubsequentPaddingSize) {
118   const VariablesMetadata *VMetadata = Func->getVMetadata();
119   size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
120   size_t NextStackOffset = SpillAreaPaddingBytes;
121   CfgVector<size_t> LocalsSize(Func->getNumNodes());
122   const bool SimpleCoalescing = !callsReturnsTwice();
123   for (Variable *Var : SortedSpilledVariables) {
124     size_t Increment = typeWidthInBytesOnStack(Var->getType());
125     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
126       if (VMetadata->isMultiBlock(Var)) {
127         GlobalsSpaceUsed += Increment;
128         NextStackOffset = GlobalsSpaceUsed;
129       } else {
130         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
131         LocalsSize[NodeIndex] += Increment;
132         NextStackOffset = SpillAreaPaddingBytes +
133                           GlobalsAndSubsequentPaddingSize +
134                           LocalsSize[NodeIndex];
135       }
136     } else {
137       NextStackOffset += Increment;
138     }
139     Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
140   }
141 }
142 
staticInit(GlobalContext * Ctx)143 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
144   (void)Ctx;
145   RegNumT::setLimit(RegMIPS32::Reg_NUM);
146   SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
147   SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
148   SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
149   SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
150   SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
151   SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
152 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
153           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
154   IntegerRegisters[RegMIPS32::val] = isInt;                                    \
155   I64PairRegisters[RegMIPS32::val] = isI64Pair;                                \
156   Float32Registers[RegMIPS32::val] = isFP32;                                   \
157   Float64Registers[RegMIPS32::val] = isFP64;                                   \
158   VectorRegisters[RegMIPS32::val] = isVec128;                                  \
159   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
160   for (SizeT RegAlias : alias_init) {                                          \
161     assert(!RegisterAliases[RegMIPS32::val][RegAlias] &&                       \
162            "Duplicate alias for " #val);                                       \
163     RegisterAliases[RegMIPS32::val].set(RegAlias);                             \
164   }                                                                            \
165   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
166   assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
167   REGMIPS32_TABLE;
168 #undef X
169 
170   // TODO(mohit.bhakkad): Change these inits once we provide argument related
171   // field in register tables
172   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
173     GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
174 
175   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
176     I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
177 
178   for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
179     FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
180     FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
181   }
182 
183   TypeToRegisterSet[IceType_void] = InvalidRegisters;
184   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
185   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
186   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
187   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
188   TypeToRegisterSet[IceType_i64] = IntegerRegisters;
189   TypeToRegisterSet[IceType_f32] = Float32Registers;
190   TypeToRegisterSet[IceType_f64] = Float64Registers;
191   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
192   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
193   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
194   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
195   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
196   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
197   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
198 
199   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
200     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
201 
202   filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
203                           llvm::array_lengthof(TypeToRegisterSet),
204                           RegMIPS32::getRegName, getRegClassName);
205 }
206 
unsetIfNonLeafFunc()207 void TargetMIPS32::unsetIfNonLeafFunc() {
208   for (CfgNode *Node : Func->getNodes()) {
209     for (Inst &Instr : Node->getInsts()) {
210       if (llvm::isa<InstCall>(&Instr)) {
211         // Unset MaybeLeafFunc if call instruction exists.
212         MaybeLeafFunc = false;
213         return;
214       }
215     }
216   }
217 }
218 
getStackAlignment() const219 uint32_t TargetMIPS32::getStackAlignment() const {
220   return MIPS32_STACK_ALIGNMENT_BYTES;
221 }
222 
getCallStackArgumentsSizeBytes(const InstCall * Call)223 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
224   TargetMIPS32::CallingConv CC;
225   size_t OutArgsSizeBytes = 0;
226   Variable *Dest = Call->getDest();
227   bool PartialOnStack = false;
228   if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
229     CC.discardReg(RegMIPS32::Reg_A0);
230     // Next vector is partially on stack
231     PartialOnStack = true;
232   }
233   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
234     Operand *Arg = legalizeUndef(Call->getArg(i));
235     const Type Ty = Arg->getType();
236     RegNumT RegNum;
237     if (CC.argInReg(Ty, i, &RegNum)) {
238       // If PartialOnStack is true and if this is a vector type then last two
239       // elements are on stack
240       if (PartialOnStack && isVectorType(Ty)) {
241         OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
242         OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
243       }
244       continue;
245     }
246     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
247     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
248   }
249   // Add size of argument save area
250   constexpr int BytesPerStackArg = 4;
251   OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
252   return applyStackAlignment(OutArgsSizeBytes);
253 }
254 
255 namespace {
getConstantMemoryOrder(Operand * Opnd)256 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
257   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
258     return Integer->getValue();
259   return Intrinsics::MemoryOrderInvalid;
260 }
261 } // namespace
262 
genTargetHelperCallFor(Inst * Instr)263 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
264   constexpr bool NoTailCall = false;
265   constexpr bool IsTargetHelperCall = true;
266   Variable *Dest = Instr->getDest();
267   const Type DestTy = Dest ? Dest->getType() : IceType_void;
268 
269   switch (Instr->getKind()) {
270   default:
271     return;
272   case Inst::Select: {
273     if (isVectorType(DestTy)) {
274       Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
275       Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
276       Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
277       Variable *T = Func->makeVariable(DestTy);
278       auto *Undef = ConstantUndef::create(Ctx, DestTy);
279       Context.insert<InstAssign>(T, Undef);
280       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
281       VarVecOn32->initVecElement(Func);
282       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
283         auto *Index = Ctx->getConstantInt32(I);
284         auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
285         Context.insert<InstExtractElement>(OpC, Cond, Index);
286         auto *OpT = Func->makeVariable(typeElementType(DestTy));
287         Context.insert<InstExtractElement>(OpT, SrcT, Index);
288         auto *OpF = Func->makeVariable(typeElementType(DestTy));
289         Context.insert<InstExtractElement>(OpF, SrcF, Index);
290         auto *Dst = Func->makeVariable(typeElementType(DestTy));
291         Variable *DestT = Func->makeVariable(DestTy);
292         Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
293         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
294         T = DestT;
295       }
296       Context.insert<InstAssign>(Dest, T);
297       Instr->setDeleted();
298     }
299     return;
300   }
301   case Inst::Fcmp: {
302     if (isVectorType(DestTy)) {
303       InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
304       Operand *Src0 = Instr->getSrc(0);
305       Operand *Src1 = Instr->getSrc(1);
306       Variable *T = Func->makeVariable(IceType_v4f32);
307       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
308       Context.insert<InstAssign>(T, Undef);
309       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
310       VarVecOn32->initVecElement(Func);
311       for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
312         auto *Index = Ctx->getConstantInt32(I);
313         auto *Op0 = Func->makeVariable(IceType_f32);
314         Context.insert<InstExtractElement>(Op0, Src0, Index);
315         auto *Op1 = Func->makeVariable(IceType_f32);
316         Context.insert<InstExtractElement>(Op1, Src1, Index);
317         auto *Dst = Func->makeVariable(IceType_f32);
318         Variable *DestT = Func->makeVariable(IceType_v4f32);
319         Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
320         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
321         T = DestT;
322       }
323       Context.insert<InstAssign>(Dest, T);
324       Instr->setDeleted();
325     }
326     return;
327   }
328   case Inst::Icmp: {
329     if (isVectorType(DestTy)) {
330       InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
331       Operand *Src0 = Instr->getSrc(0);
332       Operand *Src1 = Instr->getSrc(1);
333       const Type SrcType = Src0->getType();
334       Variable *T = Func->makeVariable(DestTy);
335       auto *Undef = ConstantUndef::create(Ctx, DestTy);
336       Context.insert<InstAssign>(T, Undef);
337       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
338       VarVecOn32->initVecElement(Func);
339       for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
340         auto *Index = Ctx->getConstantInt32(I);
341         auto *Op0 = Func->makeVariable(typeElementType(SrcType));
342         Context.insert<InstExtractElement>(Op0, Src0, Index);
343         auto *Op1 = Func->makeVariable(typeElementType(SrcType));
344         Context.insert<InstExtractElement>(Op1, Src1, Index);
345         auto *Dst = Func->makeVariable(typeElementType(DestTy));
346         Variable *DestT = Func->makeVariable(DestTy);
347         Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
348         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
349         T = DestT;
350       }
351       Context.insert<InstAssign>(Dest, T);
352       Instr->setDeleted();
353     }
354     return;
355   }
356   case Inst::Arithmetic: {
357     const InstArithmetic::OpKind Op =
358         llvm::cast<InstArithmetic>(Instr)->getOp();
359     if (isVectorType(DestTy)) {
360       scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
361       Instr->setDeleted();
362       return;
363     }
364     switch (DestTy) {
365     default:
366       return;
367     case IceType_i64: {
368       RuntimeHelper HelperID = RuntimeHelper::H_Num;
369       switch (Op) {
370       default:
371         return;
372       case InstArithmetic::Udiv:
373         HelperID = RuntimeHelper::H_udiv_i64;
374         break;
375       case InstArithmetic::Sdiv:
376         HelperID = RuntimeHelper::H_sdiv_i64;
377         break;
378       case InstArithmetic::Urem:
379         HelperID = RuntimeHelper::H_urem_i64;
380         break;
381       case InstArithmetic::Srem:
382         HelperID = RuntimeHelper::H_srem_i64;
383         break;
384       }
385 
386       if (HelperID == RuntimeHelper::H_Num) {
387         return;
388       }
389 
390       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
391       constexpr SizeT MaxArgs = 2;
392       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
393                                             NoTailCall, IsTargetHelperCall);
394       Call->addArg(Instr->getSrc(0));
395       Call->addArg(Instr->getSrc(1));
396       Instr->setDeleted();
397       return;
398     }
399     case IceType_f32:
400     case IceType_f64: {
401       if (Op != InstArithmetic::Frem) {
402         return;
403       }
404       constexpr SizeT MaxArgs = 2;
405       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
406           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
407                                 : RuntimeHelper::H_frem_f64);
408       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
409                                             NoTailCall, IsTargetHelperCall);
410       Call->addArg(Instr->getSrc(0));
411       Call->addArg(Instr->getSrc(1));
412       Instr->setDeleted();
413       return;
414     }
415     }
416     llvm::report_fatal_error("Control flow should never have reached here.");
417   }
418   case Inst::Cast: {
419     Operand *Src0 = Instr->getSrc(0);
420     const Type SrcTy = Src0->getType();
421     auto *CastInstr = llvm::cast<InstCast>(Instr);
422     const InstCast::OpKind CastKind = CastInstr->getCastKind();
423 
424     if (isVectorType(DestTy)) {
425       Variable *T = Func->makeVariable(DestTy);
426       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
427       VarVecOn32->initVecElement(Func);
428       auto *Undef = ConstantUndef::create(Ctx, DestTy);
429       Context.insert<InstAssign>(T, Undef);
430       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
431         auto *Index = Ctx->getConstantInt32(I);
432         auto *Op = Func->makeVariable(typeElementType(SrcTy));
433         Context.insert<InstExtractElement>(Op, Src0, Index);
434         auto *Dst = Func->makeVariable(typeElementType(DestTy));
435         Variable *DestT = Func->makeVariable(DestTy);
436         Context.insert<InstCast>(CastKind, Dst, Op);
437         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
438         T = DestT;
439       }
440       Context.insert<InstAssign>(Dest, T);
441       Instr->setDeleted();
442       return;
443     }
444 
445     switch (CastKind) {
446     default:
447       return;
448     case InstCast::Fptosi:
449     case InstCast::Fptoui: {
450       if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
451         return;
452       }
453       const bool DestIs32 = DestTy == IceType_i32;
454       const bool DestIsSigned = CastKind == InstCast::Fptosi;
455       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
456       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
457       if (DestIsSigned) {
458         if (DestIs32) {
459           return;
460         }
461         RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
462                             : RuntimeHelper::H_fptosi_f64_i64;
463       } else {
464         RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
465                                         : RuntimeHelper::H_fptoui_f32_i64)
466                             : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
467                                         : RuntimeHelper::H_fptoui_f64_i64);
468       }
469       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
470       static constexpr SizeT MaxArgs = 1;
471       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
472                                             NoTailCall, IsTargetHelperCall);
473       Call->addArg(Src0);
474       Instr->setDeleted();
475       return;
476     }
477     case InstCast::Sitofp:
478     case InstCast::Uitofp: {
479       if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
480         return;
481       }
482       const bool SourceIs32 = SrcTy == IceType_i32;
483       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
484       const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
485       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
486       if (SourceIsSigned) {
487         if (SourceIs32) {
488           return;
489         }
490         RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
491                             : RuntimeHelper::H_sitofp_i64_f64;
492       } else {
493         RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
494                                           : RuntimeHelper::H_uitofp_i64_f32)
495                             : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
496                                           : RuntimeHelper::H_uitofp_i64_f64);
497       }
498       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
499       static constexpr SizeT MaxArgs = 1;
500       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
501                                             NoTailCall, IsTargetHelperCall);
502       Call->addArg(Src0);
503       Instr->setDeleted();
504       return;
505     }
506     case InstCast::Bitcast: {
507       if (DestTy == SrcTy) {
508         return;
509       }
510       Variable *CallDest = Dest;
511       RuntimeHelper HelperID = RuntimeHelper::H_Num;
512       switch (DestTy) {
513       default:
514         return;
515       case IceType_i8:
516         assert(SrcTy == IceType_v8i1);
517         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
518         CallDest = Func->makeVariable(IceType_i32);
519         break;
520       case IceType_i16:
521         assert(SrcTy == IceType_v16i1);
522         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
523         CallDest = Func->makeVariable(IceType_i32);
524         break;
525       case IceType_v8i1: {
526         assert(SrcTy == IceType_i8);
527         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
528         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
529         // Arguments to functions are required to be at least 32 bits wide.
530         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
531         Src0 = Src0AsI32;
532       } break;
533       case IceType_v16i1: {
534         assert(SrcTy == IceType_i16);
535         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
536         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
537         // Arguments to functions are required to be at least 32 bits wide.
538         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
539         Src0 = Src0AsI32;
540       } break;
541       }
542       constexpr SizeT MaxSrcs = 1;
543       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
544       Call->addArg(Src0);
545       Context.insert(Call);
546       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
547       // call result to the appropriate type as necessary.
548       if (CallDest->getType() != DestTy)
549         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
550       Instr->setDeleted();
551       return;
552     }
553     case InstCast::Trunc: {
554       if (DestTy == SrcTy) {
555         return;
556       }
557       if (!isVectorType(SrcTy)) {
558         return;
559       }
560       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
561       assert(typeElementType(DestTy) == IceType_i1);
562       assert(isVectorIntegerType(SrcTy));
563       return;
564     }
565     case InstCast::Sext:
566     case InstCast::Zext: {
567       if (DestTy == SrcTy) {
568         return;
569       }
570       if (!isVectorType(DestTy)) {
571         return;
572       }
573       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
574       assert(typeElementType(SrcTy) == IceType_i1);
575       assert(isVectorIntegerType(DestTy));
576       return;
577     }
578     }
579     llvm::report_fatal_error("Control flow should never have reached here.");
580   }
581   case Inst::Intrinsic: {
582     auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
583     Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
584     if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
585       Operand *Src0 = Intrinsic->getArg(0);
586       Intrinsics::IntrinsicInfo Info = Intrinsic->getIntrinsicInfo();
587 
588       Variable *T = Func->makeVariable(IceType_v4f32);
589       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
590       Context.insert<InstAssign>(T, Undef);
591       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
592       VarVecOn32->initVecElement(Func);
593 
594       for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
595         auto *Index = Ctx->getConstantInt32(i);
596         auto *Op = Func->makeVariable(IceType_f32);
597         Context.insert<InstExtractElement>(Op, Src0, Index);
598         auto *Res = Func->makeVariable(IceType_f32);
599         Variable *DestT = Func->makeVariable(IceType_v4f32);
600         auto *Intrinsic = Context.insert<InstIntrinsic>(1, Res, Info);
601         Intrinsic->addArg(Op);
602         Context.insert<InstInsertElement>(DestT, T, Res, Index);
603         T = DestT;
604       }
605 
606       Context.insert<InstAssign>(Dest, T);
607 
608       Instr->setDeleted();
609       return;
610     }
611     switch (ID) {
612     default:
613       return;
614     case Intrinsics::AtomicLoad: {
615       if (DestTy != IceType_i64)
616         return;
617       if (!Intrinsics::isMemoryOrderValid(
618               ID, getConstantMemoryOrder(Intrinsic->getArg(1)))) {
619         Func->setError("Unexpected memory ordering for AtomicLoad");
620         return;
621       }
622       Operand *Addr = Intrinsic->getArg(0);
623       Operand *TargetHelper = Ctx->getConstantExternSym(
624           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
625       static constexpr SizeT MaxArgs = 3;
626       auto *_0 = Ctx->getConstantZero(IceType_i64);
627       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
628                                             NoTailCall, IsTargetHelperCall);
629       Call->addArg(Addr);
630       Call->addArg(_0);
631       Call->addArg(_0);
632       Context.insert<InstMIPS32Sync>();
633       Instr->setDeleted();
634       return;
635     }
636     case Intrinsics::AtomicStore: {
637       Operand *Val = Intrinsic->getArg(0);
638       if (Val->getType() != IceType_i64)
639         return;
640       if (!Intrinsics::isMemoryOrderValid(
641               ID, getConstantMemoryOrder(Intrinsic->getArg(2)))) {
642         Func->setError("Unexpected memory ordering for AtomicStore");
643         return;
644       }
645       Operand *Addr = Intrinsic->getArg(1);
646       Variable *NoDest = nullptr;
647       Operand *TargetHelper = Ctx->getConstantExternSym(
648           Ctx->getGlobalString("__sync_lock_test_and_set_8"));
649       Context.insert<InstMIPS32Sync>();
650       static constexpr SizeT MaxArgs = 2;
651       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
652                                             NoTailCall, IsTargetHelperCall);
653       Call->addArg(Addr);
654       Call->addArg(Val);
655       Context.insert<InstMIPS32Sync>();
656       Instr->setDeleted();
657       return;
658     }
659     case Intrinsics::AtomicCmpxchg: {
660       if (DestTy != IceType_i64)
661         return;
662       if (!Intrinsics::isMemoryOrderValid(
663               ID, getConstantMemoryOrder(Intrinsic->getArg(3)),
664               getConstantMemoryOrder(Intrinsic->getArg(4)))) {
665         Func->setError("Unexpected memory ordering for AtomicCmpxchg");
666         return;
667       }
668       Operand *Addr = Intrinsic->getArg(0);
669       Operand *Oldval = Intrinsic->getArg(1);
670       Operand *Newval = Intrinsic->getArg(2);
671       Operand *TargetHelper = Ctx->getConstantExternSym(
672           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
673       Context.insert<InstMIPS32Sync>();
674       static constexpr SizeT MaxArgs = 3;
675       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
676                                             NoTailCall, IsTargetHelperCall);
677       Call->addArg(Addr);
678       Call->addArg(Oldval);
679       Call->addArg(Newval);
680       Context.insert<InstMIPS32Sync>();
681       Instr->setDeleted();
682       return;
683     }
684     case Intrinsics::AtomicRMW: {
685       if (DestTy != IceType_i64)
686         return;
687       if (!Intrinsics::isMemoryOrderValid(
688               ID, getConstantMemoryOrder(Intrinsic->getArg(3)))) {
689         Func->setError("Unexpected memory ordering for AtomicRMW");
690         return;
691       }
692       auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
693           llvm::cast<ConstantInteger32>(Intrinsic->getArg(0))->getValue());
694       auto *Addr = Intrinsic->getArg(1);
695       auto *Newval = Intrinsic->getArg(2);
696       Operand *TargetHelper;
697       switch (Operation) {
698       case Intrinsics::AtomicAdd:
699         TargetHelper = Ctx->getConstantExternSym(
700             Ctx->getGlobalString("__sync_fetch_and_add_8"));
701         break;
702       case Intrinsics::AtomicSub:
703         TargetHelper = Ctx->getConstantExternSym(
704             Ctx->getGlobalString("__sync_fetch_and_sub_8"));
705         break;
706       case Intrinsics::AtomicOr:
707         TargetHelper = Ctx->getConstantExternSym(
708             Ctx->getGlobalString("__sync_fetch_and_or_8"));
709         break;
710       case Intrinsics::AtomicAnd:
711         TargetHelper = Ctx->getConstantExternSym(
712             Ctx->getGlobalString("__sync_fetch_and_and_8"));
713         break;
714       case Intrinsics::AtomicXor:
715         TargetHelper = Ctx->getConstantExternSym(
716             Ctx->getGlobalString("__sync_fetch_and_xor_8"));
717         break;
718       case Intrinsics::AtomicExchange:
719         TargetHelper = Ctx->getConstantExternSym(
720             Ctx->getGlobalString("__sync_lock_test_and_set_8"));
721         break;
722       default:
723         llvm::report_fatal_error("Unknown AtomicRMW operation");
724         return;
725       }
726       Context.insert<InstMIPS32Sync>();
727       static constexpr SizeT MaxArgs = 2;
728       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
729                                             NoTailCall, IsTargetHelperCall);
730       Call->addArg(Addr);
731       Call->addArg(Newval);
732       Context.insert<InstMIPS32Sync>();
733       Instr->setDeleted();
734       return;
735     }
736     case Intrinsics::Ctpop: {
737       Operand *Src0 = Intrinsic->getArg(0);
738       Operand *TargetHelper =
739           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
740                                         ? RuntimeHelper::H_call_ctpop_i32
741                                         : RuntimeHelper::H_call_ctpop_i64);
742       static constexpr SizeT MaxArgs = 1;
743       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
744                                             NoTailCall, IsTargetHelperCall);
745       Call->addArg(Src0);
746       Instr->setDeleted();
747       return;
748     }
749     case Intrinsics::Longjmp: {
750       static constexpr SizeT MaxArgs = 2;
751       static constexpr Variable *NoDest = nullptr;
752       Operand *TargetHelper =
753           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
754       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
755                                             NoTailCall, IsTargetHelperCall);
756       Call->addArg(Intrinsic->getArg(0));
757       Call->addArg(Intrinsic->getArg(1));
758       Instr->setDeleted();
759       return;
760     }
761     case Intrinsics::Memcpy: {
762       static constexpr SizeT MaxArgs = 3;
763       static constexpr Variable *NoDest = nullptr;
764       Operand *TargetHelper =
765           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
766       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
767                                             NoTailCall, IsTargetHelperCall);
768       Call->addArg(Intrinsic->getArg(0));
769       Call->addArg(Intrinsic->getArg(1));
770       Call->addArg(Intrinsic->getArg(2));
771       Instr->setDeleted();
772       return;
773     }
774     case Intrinsics::Memmove: {
775       static constexpr SizeT MaxArgs = 3;
776       static constexpr Variable *NoDest = nullptr;
777       Operand *TargetHelper =
778           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
779       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
780                                             NoTailCall, IsTargetHelperCall);
781       Call->addArg(Intrinsic->getArg(0));
782       Call->addArg(Intrinsic->getArg(1));
783       Call->addArg(Intrinsic->getArg(2));
784       Instr->setDeleted();
785       return;
786     }
787     case Intrinsics::Memset: {
788       Operand *ValOp = Intrinsic->getArg(1);
789       assert(ValOp->getType() == IceType_i8);
790       Variable *ValExt = Func->makeVariable(stackSlotType());
791       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
792 
793       static constexpr SizeT MaxArgs = 3;
794       static constexpr Variable *NoDest = nullptr;
795       Operand *TargetHelper =
796           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
797       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
798                                             NoTailCall, IsTargetHelperCall);
799       Call->addArg(Intrinsic->getArg(0));
800       Call->addArg(ValExt);
801       Call->addArg(Intrinsic->getArg(2));
802       Instr->setDeleted();
803       return;
804     }
805     case Intrinsics::Setjmp: {
806       static constexpr SizeT MaxArgs = 1;
807       Operand *TargetHelper =
808           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
809       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
810                                             NoTailCall, IsTargetHelperCall);
811       Call->addArg(Intrinsic->getArg(0));
812       Instr->setDeleted();
813       return;
814     }
815     }
816     llvm::report_fatal_error("Control flow should never have reached here.");
817   }
818   }
819 }
820 
findMaxStackOutArgsSize()821 void TargetMIPS32::findMaxStackOutArgsSize() {
822   // MinNeededOutArgsBytes should be updated if the Target ever creates a
823   // high-level InstCall that requires more stack bytes.
824   size_t MinNeededOutArgsBytes = 0;
825   if (!MaybeLeafFunc)
826     MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
827   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
828   for (CfgNode *Node : Func->getNodes()) {
829     Context.init(Node);
830     while (!Context.atEnd()) {
831       PostIncrLoweringContext PostIncrement(Context);
832       Inst *CurInstr = iteratorToInst(Context.getCur());
833       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
834         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
835         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
836       }
837     }
838   }
839   CurrentAllocaOffset = MaxOutArgsSizeBytes;
840 }
841 
translateO2()842 void TargetMIPS32::translateO2() {
843   TimerMarker T(TimerStack::TT_O2, Func);
844 
845   // TODO(stichnot): share passes with X86?
846   // https://code.google.com/p/nativeclient/issues/detail?id=4094
847   genTargetHelperCalls();
848 
849   unsetIfNonLeafFunc();
850 
851   findMaxStackOutArgsSize();
852 
853   // Merge Alloca instructions, and lay out the stack.
854   static constexpr bool SortAndCombineAllocas = true;
855   Func->processAllocas(SortAndCombineAllocas);
856   Func->dump("After Alloca processing");
857 
858   if (!getFlags().getEnablePhiEdgeSplit()) {
859     // Lower Phi instructions.
860     Func->placePhiLoads();
861     if (Func->hasError())
862       return;
863     Func->placePhiStores();
864     if (Func->hasError())
865       return;
866     Func->deletePhis();
867     if (Func->hasError())
868       return;
869     Func->dump("After Phi lowering");
870   }
871 
872   // Address mode optimization.
873   Func->getVMetadata()->init(VMK_SingleDefs);
874   Func->doAddressOpt();
875 
876   // Argument lowering
877   Func->doArgLowering();
878 
879   // Target lowering. This requires liveness analysis for some parts of the
880   // lowering decisions, such as compare/branch fusing. If non-lightweight
881   // liveness analysis is used, the instructions need to be renumbered first.
882   // TODO: This renumbering should only be necessary if we're actually
883   // calculating live intervals, which we only do for register allocation.
884   Func->renumberInstructions();
885   if (Func->hasError())
886     return;
887 
888   // TODO: It should be sufficient to use the fastest liveness calculation,
889   // i.e. livenessLightweight(). However, for some reason that slows down the
890   // rest of the translation. Investigate.
891   Func->liveness(Liveness_Basic);
892   if (Func->hasError())
893     return;
894   Func->dump("After MIPS32 address mode opt");
895 
896   Func->genCode();
897   if (Func->hasError())
898     return;
899   Func->dump("After MIPS32 codegen");
900 
901   // Register allocation. This requires instruction renumbering and full
902   // liveness analysis.
903   Func->renumberInstructions();
904   if (Func->hasError())
905     return;
906   Func->liveness(Liveness_Intervals);
907   if (Func->hasError())
908     return;
909   // The post-codegen dump is done here, after liveness analysis and associated
910   // cleanup, to make the dump cleaner and more useful.
911   Func->dump("After initial MIPS32 codegen");
912   // Validate the live range computations. The expensive validation call is
913   // deliberately only made when assertions are enabled.
914   assert(Func->validateLiveness());
915   Func->getVMetadata()->init(VMK_All);
916   regAlloc(RAK_Global);
917   if (Func->hasError())
918     return;
919   Func->dump("After linear scan regalloc");
920 
921   if (getFlags().getEnablePhiEdgeSplit()) {
922     Func->advancedPhiLowering();
923     Func->dump("After advanced Phi lowering");
924   }
925 
926   // Stack frame mapping.
927   Func->genFrame();
928   if (Func->hasError())
929     return;
930   Func->dump("After stack frame mapping");
931 
932   postLowerLegalization();
933   if (Func->hasError())
934     return;
935   Func->dump("After postLowerLegalization");
936 
937   Func->contractEmptyNodes();
938   Func->reorderNodes();
939 
940   // Branch optimization. This needs to be done just before code emission. In
941   // particular, no transformations that insert or reorder CfgNodes should be
942   // done after branch optimization. We go ahead and do it before nop insertion
943   // to reduce the amount of work needed for searching for opportunities.
944   Func->doBranchOpt();
945   Func->dump("After branch optimization");
946 }
947 
translateOm1()948 void TargetMIPS32::translateOm1() {
949   TimerMarker T(TimerStack::TT_Om1, Func);
950 
951   // TODO: share passes with X86?
952   genTargetHelperCalls();
953 
954   unsetIfNonLeafFunc();
955 
956   findMaxStackOutArgsSize();
957 
958   // Do not merge Alloca instructions, and lay out the stack.
959   static constexpr bool SortAndCombineAllocas = false;
960   Func->processAllocas(SortAndCombineAllocas);
961   Func->dump("After Alloca processing");
962 
963   Func->placePhiLoads();
964   if (Func->hasError())
965     return;
966   Func->placePhiStores();
967   if (Func->hasError())
968     return;
969   Func->deletePhis();
970   if (Func->hasError())
971     return;
972   Func->dump("After Phi lowering");
973 
974   Func->doArgLowering();
975 
976   Func->genCode();
977   if (Func->hasError())
978     return;
979   Func->dump("After initial MIPS32 codegen");
980 
981   regAlloc(RAK_InfOnly);
982   if (Func->hasError())
983     return;
984   Func->dump("After regalloc of infinite-weight variables");
985 
986   Func->genFrame();
987   if (Func->hasError())
988     return;
989   Func->dump("After stack frame mapping");
990 
991   postLowerLegalization();
992   if (Func->hasError())
993     return;
994   Func->dump("After postLowerLegalization");
995 }
996 
doBranchOpt(Inst * Instr,const CfgNode * NextNode)997 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
998   if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
999     return Br->optimizeBranch(NextNode);
1000   }
1001   return false;
1002 }
1003 
1004 namespace {
1005 
1006 const char *RegNames[RegMIPS32::Reg_NUM] = {
1007 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
1008           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
1009   name,
1010     REGMIPS32_TABLE
1011 #undef X
1012 };
1013 
1014 } // end of anonymous namespace
1015 
getRegName(RegNumT RegNum)1016 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1017   RegNum.assertIsValid();
1018   return RegNames[RegNum];
1019 }
1020 
getRegName(RegNumT RegNum,Type Ty) const1021 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1022   (void)Ty;
1023   return RegMIPS32::getRegName(RegNum);
1024 }
1025 
getPhysicalRegister(RegNumT RegNum,Type Ty)1026 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1027   if (Ty == IceType_void)
1028     Ty = IceType_i32;
1029   if (PhysicalRegisters[Ty].empty())
1030     PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1031   RegNum.assertIsValid();
1032   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1033   if (Reg == nullptr) {
1034     Reg = Func->makeVariable(Ty);
1035     Reg->setRegNum(RegNum);
1036     PhysicalRegisters[Ty][RegNum] = Reg;
1037     // Specially mark a named physical register as an "argument" so that it is
1038     // considered live upon function entry.  Otherwise it's possible to get
1039     // liveness validation errors for saving callee-save registers.
1040     Func->addImplicitArg(Reg);
1041     // Don't bother tracking the live range of a named physical register.
1042     Reg->setIgnoreLiveness();
1043   }
1044   return Reg;
1045 }
1046 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1047 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1048                                  const InstJumpTable *JumpTable) const {
1049   (void)Func;
1050   (void)JumpTable;
1051   UnimplementedError(getFlags());
1052 }
1053 
1054 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1055 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1056   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1057 }
1058 
1059 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1060 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1061   (void)RegNum;
1062   Type Ty = From->getType();
1063   if (llvm::isa<ConstantUndef>(From)) {
1064     // Lower undefs to zero.  Another option is to lower undefs to an
1065     // uninitialized register; however, using an uninitialized register
1066     // results in less predictable code.
1067     //
1068     // If in the future the implementation is changed to lower undef
1069     // values to uninitialized registers, a FakeDef will be needed:
1070     //     Context.insert(InstFakeDef::create(Func, Reg));
1071     // This is in order to ensure that the live range of Reg is not
1072     // overestimated.  If the constant being lowered is a 64 bit value,
1073     // then the result should be split and the lo and hi components will
1074     // need to go in uninitialized registers.
1075     if (isVectorType(Ty)) {
1076       Variable *Var = makeReg(Ty, RegNum);
1077       auto *Reg = llvm::cast<VariableVecOn32>(Var);
1078       Reg->initVecElement(Func);
1079       auto *Zero = getZero();
1080       for (Variable *Var : Reg->getContainers()) {
1081         _mov(Var, Zero);
1082       }
1083       return Reg;
1084     }
1085     return Ctx->getConstantZero(Ty);
1086   }
1087   return From;
1088 }
1089 
makeReg(Type Type,RegNumT RegNum)1090 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1091   // There aren't any 64-bit integer registers for Mips32.
1092   assert(Type != IceType_i64);
1093   Variable *Reg = Func->makeVariable(Type);
1094   if (RegNum.hasValue())
1095     Reg->setRegNum(RegNum);
1096   else
1097     Reg->setMustHaveReg();
1098   return Reg;
1099 }
1100 
formMemoryOperand(Operand * Operand,Type Ty)1101 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1102   // It may be the case that address mode optimization already creates an
1103   // OperandMIPS32Mem, so in that case it wouldn't need another level of
1104   // transformation.
1105   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1106     return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1107   }
1108 
1109   // If we didn't do address mode optimization, then we only have a base/offset
1110   // to work with. MIPS always requires a base register, so just use that to
1111   // hold the operand.
1112   auto *Base = llvm::cast<Variable>(
1113       legalize(Operand, Legal_Reg | Legal_Rematerializable));
1114   const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1115   return OperandMIPS32Mem::create(
1116       Func, Ty, Base,
1117       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1118 }
1119 
emitVariable(const Variable * Var) const1120 void TargetMIPS32::emitVariable(const Variable *Var) const {
1121   if (!BuildDefs::dump())
1122     return;
1123   Ostream &Str = Ctx->getStrEmit();
1124   const Type FrameSPTy = IceType_i32;
1125   if (Var->hasReg()) {
1126     Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1127     return;
1128   }
1129   if (Var->mustHaveReg()) {
1130     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1131                              ") has no register assigned - function " +
1132                              Func->getFunctionName());
1133   }
1134   const int32_t Offset = Var->getStackOffset();
1135   Str << Offset;
1136   Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1137   Str << ")";
1138 }
1139 
CallingConv()1140 TargetMIPS32::CallingConv::CallingConv()
1141     : GPRegsUsed(RegMIPS32::Reg_NUM),
1142       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1143       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1144       VFPRegsUsed(RegMIPS32::Reg_NUM),
1145       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1146       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1147 
1148 // In MIPS O32 abi FP argument registers can be used only if first argument is
1149 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1150 // registers can be used only for first 2 arguments, so we require argument
1151 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1152 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1153                                          RegNumT *Reg) {
1154   if (isScalarIntegerType(Ty) || isVectorType(Ty))
1155     return argInGPR(Ty, Reg);
1156   if (isScalarFloatingType(Ty)) {
1157     if (ArgNo == 0) {
1158       UseFPRegs = true;
1159       return argInVFP(Ty, Reg);
1160     }
1161     if (UseFPRegs && ArgNo == 1) {
1162       UseFPRegs = false;
1163       return argInVFP(Ty, Reg);
1164     }
1165     return argInGPR(Ty, Reg);
1166   }
1167   llvm::report_fatal_error("argInReg: Invalid type.");
1168   return false;
1169 }
1170 
argInGPR(Type Ty,RegNumT * Reg)1171 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1172   CfgVector<RegNumT> *Source;
1173 
1174   switch (Ty) {
1175   default: {
1176     llvm::report_fatal_error("argInGPR: Invalid type.");
1177     return false;
1178   } break;
1179   case IceType_v4i1:
1180   case IceType_v8i1:
1181   case IceType_v16i1:
1182   case IceType_v16i8:
1183   case IceType_v8i16:
1184   case IceType_v4i32:
1185   case IceType_v4f32:
1186   case IceType_i32:
1187   case IceType_f32: {
1188     Source = &GPRArgs;
1189   } break;
1190   case IceType_i64:
1191   case IceType_f64: {
1192     Source = &I64Args;
1193   } break;
1194   }
1195 
1196   discardUnavailableGPRsAndTheirAliases(Source);
1197 
1198   // If $4 is used for any scalar type (or returining v4f32) then the next
1199   // vector type if passed in $6:$7:stack:stack
1200   if (isVectorType(Ty)) {
1201     alignGPR(Source);
1202   }
1203 
1204   if (Source->empty()) {
1205     GPRegsUsed.set();
1206     return false;
1207   }
1208 
1209   *Reg = Source->back();
1210   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1211   // we mark all of Reg's aliases as Used. So, for the next argument,
1212   // Source->back() is marked as unavailable, and it is thus implicitly popped
1213   // from the stack.
1214   GPRegsUsed |= RegisterAliases[*Reg];
1215 
1216   // All vector arguments irrespective of their base type are passed in GP
1217   // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1218   // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1219   // $4:$5:$6:$7 otherwise discard $6:$7 only.
1220   if (isVectorType(Ty)) {
1221     if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1222       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1223       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1224       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1225     } else {
1226       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1227     }
1228   }
1229 
1230   return true;
1231 }
1232 
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1233 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1234     CfgVector<RegNumT> *Regs) {
1235   GPRegsUsed |= RegisterAliases[Regs->back()];
1236   Regs->pop_back();
1237 }
1238 
alignGPR(CfgVector<RegNumT> * Regs)1239 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1240   if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1241     discardNextGPRAndItsAliases(Regs);
1242 }
1243 
1244 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1245 // i32) will have the first argument in a0, the second in a2-a3, and the third
1246 // on the stack. To model this behavior, whenever we pop a register from Regs,
1247 // we remove all of its aliases from the pool of available GPRs. This has the
1248 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1249 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1250     CfgVector<RegNumT> *Regs) {
1251   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1252     discardNextGPRAndItsAliases(Regs);
1253   }
1254 }
1255 
argInVFP(Type Ty,RegNumT * Reg)1256 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1257   CfgVector<RegNumT> *Source;
1258 
1259   switch (Ty) {
1260   default: {
1261     llvm::report_fatal_error("argInVFP: Invalid type.");
1262     return false;
1263   } break;
1264   case IceType_f32: {
1265     Source = &FP32Args;
1266   } break;
1267   case IceType_f64: {
1268     Source = &FP64Args;
1269   } break;
1270   }
1271 
1272   discardUnavailableVFPRegsAndTheirAliases(Source);
1273 
1274   if (Source->empty()) {
1275     VFPRegsUsed.set();
1276     return false;
1277   }
1278 
1279   *Reg = Source->back();
1280   VFPRegsUsed |= RegisterAliases[*Reg];
1281 
1282   // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1283   // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1284   // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1285   // in reg_a3 and a0, a1 are not used.
1286   Source = &GPRArgs;
1287   // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1288   if (Ty == IceType_f64) {
1289     // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1290     // must be aligned at even register. Similarly when we discard GPR registers
1291     // when some arguments from starting 16 bytes goes in FPR, we must take care
1292     // of alignment. For example if fun args are (f32, f64, f32), for first f32
1293     // we discard a0, now for f64 argument, which will go in F14F15, we must
1294     // first align GPR vector to even register by discarding a1, then discard
1295     // two GPRs a2 and a3. Now last f32 argument will go on stack.
1296     alignGPR(Source);
1297     discardNextGPRAndItsAliases(Source);
1298   }
1299   discardNextGPRAndItsAliases(Source);
1300   return true;
1301 }
1302 
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1303 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1304     CfgVector<RegNumT> *Regs) {
1305   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1306     Regs->pop_back();
1307   }
1308 }
1309 
lowerArguments()1310 void TargetMIPS32::lowerArguments() {
1311   VarList &Args = Func->getArgs();
1312   TargetMIPS32::CallingConv CC;
1313 
1314   // For each register argument, replace Arg in the argument list with the home
1315   // register. Then generate an instruction in the prolog to copy the home
1316   // register to the assigned location of Arg.
1317   Context.init(Func->getEntryNode());
1318   Context.setInsertPoint(Context.getCur());
1319 
1320   // v4f32 is returned through stack. $4 is setup by the caller and passed as
1321   // first argument implicitly. Callee then copies the return vector at $4.
1322   Variable *ImplicitRetVec = nullptr;
1323   if (isVectorFloatingType(Func->getReturnType())) {
1324     ImplicitRetVec = Func->makeVariable(IceType_i32);
1325     ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1326     ImplicitRetVec->setIsArg();
1327     Args.insert(Args.begin(), ImplicitRetVec);
1328     setImplicitRet(ImplicitRetVec);
1329   }
1330 
1331   for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1332     Variable *Arg = Args[i];
1333     Type Ty = Arg->getType();
1334     RegNumT RegNum;
1335     if (!CC.argInReg(Ty, i, &RegNum)) {
1336       continue;
1337     }
1338     Variable *RegisterArg = Func->makeVariable(Ty);
1339     if (BuildDefs::dump()) {
1340       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1341     }
1342     RegisterArg->setIsArg();
1343     Arg->setIsArg(false);
1344     Args[i] = RegisterArg;
1345 
1346     if (isVectorType(Ty)) {
1347       auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1348       RegisterArgVec->initVecElement(Func);
1349       RegisterArgVec->getContainers()[0]->setRegNum(
1350           RegNumT::fixme((unsigned)RegNum + 0));
1351       RegisterArgVec->getContainers()[1]->setRegNum(
1352           RegNumT::fixme((unsigned)RegNum + 1));
1353       // First two elements of second vector argument are passed
1354       // in $6:$7 and remaining two on stack. Do not assign register
1355       // to this is second vector argument.
1356       if (i == 0) {
1357         RegisterArgVec->getContainers()[2]->setRegNum(
1358             RegNumT::fixme((unsigned)RegNum + 2));
1359         RegisterArgVec->getContainers()[3]->setRegNum(
1360             RegNumT::fixme((unsigned)RegNum + 3));
1361       } else {
1362         RegisterArgVec->getContainers()[2]->setRegNum(
1363             RegNumT::fixme(RegNumT()));
1364         RegisterArgVec->getContainers()[3]->setRegNum(
1365             RegNumT::fixme(RegNumT()));
1366       }
1367     } else {
1368       switch (Ty) {
1369       default: {
1370         RegisterArg->setRegNum(RegNum);
1371       } break;
1372       case IceType_i64: {
1373         auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1374         RegisterArg64->initHiLo(Func);
1375         RegisterArg64->getLo()->setRegNum(
1376             RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1377         RegisterArg64->getHi()->setRegNum(
1378             RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1379       } break;
1380       }
1381     }
1382     Context.insert<InstAssign>(Arg, RegisterArg);
1383   }
1384 
1385   // Insert fake use of ImplicitRet_v4f32 to keep it live
1386   if (ImplicitRetVec) {
1387     for (CfgNode *Node : Func->getNodes()) {
1388       for (Inst &Instr : Node->getInsts()) {
1389         if (llvm::isa<InstRet>(&Instr)) {
1390           Context.setInsertPoint(instToIterator(&Instr));
1391           Context.insert<InstFakeUse>(ImplicitRetVec);
1392           break;
1393         }
1394       }
1395     }
1396   }
1397 }
1398 
stackSlotType()1399 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1400 
1401 // Helper function for addProlog().
1402 //
1403 // This assumes Arg is an argument passed on the stack. This sets the frame
1404 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1405 // I64 arg that has been split into Lo and Hi components, it calls itself
1406 // recursively on the components, taking care to handle Lo first because of the
1407 // little-endian architecture. Lastly, this function generates an instruction
1408 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1409 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1410                                           Variable *FramePtr,
1411                                           size_t BasicFrameOffset,
1412                                           size_t *InArgsSizeBytes) {
1413   const Type Ty = Arg->getType();
1414   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1415 
1416   // If $4 is used for any scalar type (or returining v4f32) then the next
1417   // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1418   // from agument stack.
1419   if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1420     if (PartialOnStack == false) {
1421       auto *Elem0 = ArgVecOn32->getContainers()[0];
1422       auto *Elem1 = ArgVecOn32->getContainers()[1];
1423       finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1424                              InArgsSizeBytes);
1425       finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1426                              InArgsSizeBytes);
1427     }
1428     auto *Elem2 = ArgVecOn32->getContainers()[2];
1429     auto *Elem3 = ArgVecOn32->getContainers()[3];
1430     finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1431                            InArgsSizeBytes);
1432     finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1433                            InArgsSizeBytes);
1434     return;
1435   }
1436 
1437   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1438     Variable *const Lo = Arg64On32->getLo();
1439     Variable *const Hi = Arg64On32->getHi();
1440     finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1441                            InArgsSizeBytes);
1442     finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1443                            InArgsSizeBytes);
1444     return;
1445   }
1446 
1447   assert(Ty != IceType_i64);
1448   assert(!isVectorType(Ty));
1449 
1450   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1451   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1452 
1453   if (!Arg->hasReg()) {
1454     Arg->setStackOffset(ArgStackOffset);
1455     return;
1456   }
1457 
1458   // If the argument variable has been assigned a register, we need to copy the
1459   // value from the stack slot.
1460   Variable *Parameter = Func->makeVariable(Ty);
1461   Parameter->setMustNotHaveReg();
1462   Parameter->setStackOffset(ArgStackOffset);
1463   _mov(Arg, Parameter);
1464 }
1465 
addProlog(CfgNode * Node)1466 void TargetMIPS32::addProlog(CfgNode *Node) {
1467   // Stack frame layout:
1468   //
1469   // +------------------------+
1470   // | 1. preserved registers |
1471   // +------------------------+
1472   // | 2. padding             |
1473   // +------------------------+
1474   // | 3. global spill area   |
1475   // +------------------------+
1476   // | 4. padding             |
1477   // +------------------------+
1478   // | 5. local spill area    |
1479   // +------------------------+
1480   // | 6. padding             |
1481   // +------------------------+
1482   // | 7. allocas             |
1483   // +------------------------+
1484   // | 8. padding             |
1485   // +------------------------+
1486   // | 9. out args            |
1487   // +------------------------+ <--- StackPointer
1488   //
1489   // The following variables record the size in bytes of the given areas:
1490   //  * PreservedRegsSizeBytes: area 1
1491   //  * SpillAreaPaddingBytes:  area 2
1492   //  * GlobalsSize:            area 3
1493   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1494   //  * LocalsSpillAreaSize:    area 5
1495   //  * SpillAreaSizeBytes:     areas 2 - 9
1496   //  * maxOutArgsSizeBytes():  area 9
1497 
1498   Context.init(Node);
1499   Context.setInsertPoint(Context.getCur());
1500 
1501   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1502   RegsUsed = SmallBitVector(CalleeSaves.size());
1503 
1504   VarList SortedSpilledVariables;
1505 
1506   size_t GlobalsSize = 0;
1507   // If there is a separate locals area, this represents that area. Otherwise
1508   // it counts any variable not counted by GlobalsSize.
1509   SpillAreaSizeBytes = 0;
1510   // If there is a separate locals area, this specifies the alignment for it.
1511   uint32_t LocalsSlotsAlignmentBytes = 0;
1512   // The entire spill locations area gets aligned to largest natural alignment
1513   // of the variables that have a spill slot.
1514   uint32_t SpillAreaAlignmentBytes = 0;
1515   // For now, we don't have target-specific variables that need special
1516   // treatment (no stack-slot-linked SpillVariable type).
1517   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1518     static constexpr bool AssignStackSlot = false;
1519     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1520     if (llvm::isa<Variable64On32>(Var)) {
1521       return DontAssignStackSlot;
1522     }
1523     return AssignStackSlot;
1524   };
1525 
1526   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1527   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1528                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1529                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1530   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1531   SpillAreaSizeBytes += GlobalsSize;
1532 
1533   PreservedGPRs.reserve(CalleeSaves.size());
1534 
1535   // Consider FP and RA as callee-save / used as needed.
1536   if (UsesFramePointer) {
1537     if (RegsUsed[RegMIPS32::Reg_FP]) {
1538       llvm::report_fatal_error("Frame pointer has been used.");
1539     }
1540     CalleeSaves[RegMIPS32::Reg_FP] = true;
1541     RegsUsed[RegMIPS32::Reg_FP] = true;
1542   }
1543   if (!MaybeLeafFunc) {
1544     CalleeSaves[RegMIPS32::Reg_RA] = true;
1545     RegsUsed[RegMIPS32::Reg_RA] = true;
1546   }
1547 
1548   // Make two passes over the used registers. The first pass records all the
1549   // used registers -- and their aliases. Then, we figure out which GPR
1550   // registers should be saved.
1551   SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1552   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1553     if (CalleeSaves[i] && RegsUsed[i]) {
1554       ToPreserve |= RegisterAliases[i];
1555     }
1556   }
1557 
1558   uint32_t NumCallee = 0;
1559 
1560   // RegClasses is a tuple of
1561   //
1562   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1563   //
1564   // We use this tuple to figure out which register we should save/restore
1565   // during
1566   // prolog/epilog.
1567   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1568   const RegClassType RegClass = RegClassType(
1569       RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1570   const uint32_t FirstRegInClass = std::get<0>(RegClass);
1571   const uint32_t LastRegInClass = std::get<1>(RegClass);
1572   VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1573   for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1574     if (!ToPreserve[Reg]) {
1575       continue;
1576     }
1577     ++NumCallee;
1578     Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1579     PreservedRegsSizeBytes +=
1580         typeWidthInBytesOnStack(PhysicalRegister->getType());
1581     PreservedRegsInClass->push_back(PhysicalRegister);
1582   }
1583 
1584   Ctx->statsUpdateRegistersSaved(NumCallee);
1585 
1586   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1587   // after the preserved registers and before the spill areas.
1588   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1589   // locals area if they are separate.
1590   assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1591   (void)MIPS32_STACK_ALIGNMENT_BYTES;
1592   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1593   uint32_t SpillAreaPaddingBytes = 0;
1594   uint32_t LocalsSlotsPaddingBytes = 0;
1595   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1596                        GlobalsSize, LocalsSlotsAlignmentBytes,
1597                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1598   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1599   uint32_t GlobalsAndSubsequentPaddingSize =
1600       GlobalsSize + LocalsSlotsPaddingBytes;
1601 
1602   // Adds the out args space to the stack, and align SP if necessary.
1603   if (!NeedsStackAlignment) {
1604     SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1605   } else {
1606     SpillAreaSizeBytes = applyStackAlignment(
1607         SpillAreaSizeBytes +
1608         (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1609   }
1610 
1611   // Combine fixed alloca with SpillAreaSize.
1612   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1613 
1614   TotalStackSizeBytes =
1615       applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1616 
1617   // Generate "addiu sp, sp, -TotalStackSizeBytes"
1618   if (TotalStackSizeBytes) {
1619     // Use the scratch register if needed to legalize the immediate.
1620     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1621     _addiu(SP, SP, -TotalStackSizeBytes);
1622   }
1623 
1624   Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1625 
1626   if (!PreservedGPRs.empty()) {
1627     uint32_t StackOffset = TotalStackSizeBytes;
1628     for (Variable *Var : *PreservedRegsInClass) {
1629       Type RegType;
1630       if (RegMIPS32::isFPRReg(Var->getRegNum()))
1631         RegType = IceType_f32;
1632       else
1633         RegType = IceType_i32;
1634       auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1635       StackOffset -= typeWidthInBytesOnStack(RegType);
1636       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1637       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1638           Func, RegType, SP,
1639           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1640       _sw(PhysicalRegister, MemoryLocation);
1641     }
1642   }
1643 
1644   Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1645 
1646   // Generate "mov FP, SP" if needed.
1647   if (UsesFramePointer) {
1648     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1649     _mov(FP, SP);
1650     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1651     Context.insert<InstFakeUse>(FP);
1652   }
1653 
1654   // Fill in stack offsets for stack args, and copy args into registers for
1655   // those that were register-allocated. Args are pushed right to left, so
1656   // Arg[0] is closest to the stack/frame pointer.
1657   const VarList &Args = Func->getArgs();
1658   size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1659   TargetMIPS32::CallingConv CC;
1660   uint32_t ArgNo = 0;
1661 
1662   for (Variable *Arg : Args) {
1663     RegNumT DummyReg;
1664     const Type Ty = Arg->getType();
1665     bool PartialOnStack;
1666     // Skip arguments passed in registers.
1667     if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1668       // Load argument from stack:
1669       // 1. If this is first vector argument and return type is v4f32.
1670       //    In this case $4 is used to pass stack address implicitly.
1671       //    3rd and 4th element of vector argument is passed through stack.
1672       // 2. If this is second vector argument.
1673       if (ArgNo != 0 && isVectorType(Ty)) {
1674         PartialOnStack = true;
1675         finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1676                                &InArgsSizeBytes);
1677       }
1678     } else {
1679       PartialOnStack = false;
1680       finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1681                              &InArgsSizeBytes);
1682     }
1683     ++ArgNo;
1684   }
1685 
1686   // Fill in stack offsets for locals.
1687   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1688                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1689   this->HasComputedFrame = true;
1690 
1691   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1692     OstreamLocker _(Func->getContext());
1693     Ostream &Str = Func->getContext()->getStrDump();
1694 
1695     Str << "Stack layout:\n";
1696     uint32_t SPAdjustmentPaddingSize =
1697         SpillAreaSizeBytes - LocalsSpillAreaSize -
1698         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1699         MaxOutArgsSizeBytes;
1700     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1701         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1702         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1703         << " globals spill area = " << GlobalsSize << " bytes\n"
1704         << " globals-locals spill areas intermediate padding = "
1705         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1706         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1707         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1708 
1709     Str << "Stack details:\n"
1710         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1711         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1712         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1713         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1714         << " bytes\n"
1715         << " is FP based = " << 1 << "\n";
1716   }
1717   return;
1718 }
1719 
addEpilog(CfgNode * Node)1720 void TargetMIPS32::addEpilog(CfgNode *Node) {
1721   InstList &Insts = Node->getInsts();
1722   InstList::reverse_iterator RI, E;
1723   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1724     if (llvm::isa<InstMIPS32Ret>(*RI))
1725       break;
1726   }
1727   if (RI == E)
1728     return;
1729 
1730   // Convert the reverse_iterator position into its corresponding (forward)
1731   // iterator position.
1732   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1733   --InsertPoint;
1734   Context.init(Node);
1735   Context.setInsertPoint(InsertPoint);
1736 
1737   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1738   if (UsesFramePointer) {
1739     Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1740     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1741     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1742     // from being dead-code eliminated.
1743     Context.insert<InstFakeUse>(SP);
1744     _mov(SP, FP);
1745   }
1746 
1747   VarList::reverse_iterator RIter, END;
1748 
1749   if (!PreservedGPRs.empty()) {
1750     uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1751     for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1752          RIter != END; ++RIter) {
1753       Type RegType;
1754       if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1755         RegType = IceType_f32;
1756       else
1757         RegType = IceType_i32;
1758       auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1759       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1760       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1761           Func, RegType, SP,
1762           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1763       _lw(PhysicalRegister, MemoryLocation);
1764       StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1765     }
1766   }
1767 
1768   if (TotalStackSizeBytes) {
1769     _addiu(SP, SP, TotalStackSizeBytes);
1770   }
1771 }
1772 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1773 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1774     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1775   // Legalize will likely need a lui/ori combination, but if the top bits are
1776   // all 0 from negating the offset and subtracting, we could use that instead.
1777   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1778   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1779   if (ShouldSub) {
1780     Target->_addi(ScratchReg, Base, -Offset);
1781   } else {
1782     constexpr bool SignExt = true;
1783     if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1784       const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1785       const uint32_t LowerBits = Offset & 0xFFFF;
1786       Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1787       if (LowerBits)
1788         Target->_ori(ScratchReg, ScratchReg, LowerBits);
1789       Target->_addu(ScratchReg, ScratchReg, Base);
1790     } else {
1791       Target->_addiu(ScratchReg, Base, Offset);
1792     }
1793   }
1794 
1795   return ScratchReg;
1796 }
1797 
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1798 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1799     InstMIPS32MovFP64ToI64 *MovInstr) {
1800   Variable *Dest = MovInstr->getDest();
1801   Operand *Src = MovInstr->getSrc(0);
1802   const Type SrcTy = Src->getType();
1803 
1804   if (Dest != nullptr && SrcTy == IceType_f64) {
1805     int32_t Offset = Dest->getStackOffset();
1806     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1807     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1808         Target->Func, IceType_f32, Base,
1809         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1810     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1811     auto *SrcV = llvm::cast<Variable>(Src);
1812     Variable *SrcR;
1813     if (MovInstr->getInt64Part() == Int64_Lo) {
1814       SrcR = Target->makeReg(
1815           IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1816     } else {
1817       SrcR = Target->makeReg(
1818           IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1819     }
1820     Target->_sw(SrcR, Addr);
1821     if (MovInstr->isDestRedefined()) {
1822       Target->_set_dest_redefined();
1823     }
1824     MovInstr->setDeleted();
1825     return;
1826   }
1827 
1828   llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1829 }
1830 
legalizeMov(InstMIPS32Mov * MovInstr)1831 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1832   Variable *Dest = MovInstr->getDest();
1833   assert(Dest != nullptr);
1834   const Type DestTy = Dest->getType();
1835   assert(DestTy != IceType_i64);
1836 
1837   Operand *Src = MovInstr->getSrc(0);
1838   const Type SrcTy = Src->getType();
1839   (void)SrcTy;
1840   assert(SrcTy != IceType_i64);
1841 
1842   bool Legalized = false;
1843   auto *SrcR = llvm::cast<Variable>(Src);
1844   if (Dest->hasReg() && SrcR->hasReg()) {
1845     // This might be a GP to/from FP move generated due to argument passing.
1846     // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1847     // different types.
1848     const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1849     const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1850     const RegNumT SRegNum = SrcR->getRegNum();
1851     const RegNumT DRegNum = Dest->getRegNum();
1852     if (IsDstGPR != IsSrcGPR) {
1853       if (IsDstGPR) {
1854         // Dest is GPR and SrcR is FPR. Use mfc1.
1855         int32_t TypeWidth = typeWidthInBytes(DestTy);
1856         if (MovInstr->getDestHi() != nullptr)
1857           TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1858         if (TypeWidth == 8) {
1859           // Split it into two mfc1 instructions
1860           Variable *SrcGPRHi = Target->makeReg(
1861               IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1862           Variable *SrcGPRLo = Target->makeReg(
1863               IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1864           Variable *DstFPRHi, *DstFPRLo;
1865           if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1866             DstFPRHi = Target->makeReg(IceType_i32,
1867                                        MovInstr->getDestHi()->getRegNum());
1868             DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1869           } else {
1870             DstFPRHi = Target->makeReg(
1871                 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1872             DstFPRLo = Target->makeReg(
1873                 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1874           }
1875           Target->_mov(DstFPRHi, SrcGPRHi);
1876           Target->_mov(DstFPRLo, SrcGPRLo);
1877           Legalized = true;
1878         } else {
1879           Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1880           Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1881           Target->_mov(DstFPR, SrcGPR);
1882           Legalized = true;
1883         }
1884       } else {
1885         // Dest is FPR and SrcR is GPR. Use mtc1.
1886         if (typeWidthInBytes(Dest->getType()) == 8) {
1887           Variable *SrcGPRHi, *SrcGPRLo;
1888           // SrcR could be $zero which is i32
1889           if (SRegNum == RegMIPS32::Reg_ZERO) {
1890             SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1891             SrcGPRLo = SrcGPRHi;
1892           } else {
1893             // Split it into two mtc1 instructions
1894             if (MovInstr->getSrcSize() == 2) {
1895               const auto FirstReg =
1896                   (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1897               const auto SecondReg =
1898                   (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1899               SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1900               SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1901             } else {
1902               SrcGPRLo = Target->makeReg(
1903                   IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1904               SrcGPRHi = Target->makeReg(
1905                   IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1906             }
1907           }
1908           Variable *DstFPRHi = Target->makeReg(
1909               IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1910           Variable *DstFPRLo = Target->makeReg(
1911               IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1912           Target->_mov(DstFPRHi, SrcGPRLo);
1913           Target->_mov(DstFPRLo, SrcGPRHi);
1914           Legalized = true;
1915         } else {
1916           Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1917           Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1918           Target->_mov(DstFPR, SrcGPR);
1919           Legalized = true;
1920         }
1921       }
1922     }
1923     if (Legalized) {
1924       if (MovInstr->isDestRedefined()) {
1925         Target->_set_dest_redefined();
1926       }
1927       MovInstr->setDeleted();
1928       return;
1929     }
1930   }
1931 
1932   if (!Dest->hasReg()) {
1933     auto *SrcR = llvm::cast<Variable>(Src);
1934     assert(SrcR->hasReg());
1935     assert(!SrcR->isRematerializable());
1936     int32_t Offset = Dest->getStackOffset();
1937 
1938     // This is a _mov(Mem(), Variable), i.e., a store.
1939     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1940 
1941     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1942         Target->Func, DestTy, Base,
1943         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1944     OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1945         Target->Func, DestTy, Base,
1946         llvm::cast<ConstantInteger32>(
1947             Target->Ctx->getConstantInt32(Offset + 4)));
1948     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1949 
1950     // FP arguments are passed in GP reg if first argument is in GP. In this
1951     // case type of the SrcR is still FP thus we need to explicitly generate sw
1952     // instead of swc1.
1953     const RegNumT RegNum = SrcR->getRegNum();
1954     const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1955     if (SrcTy == IceType_f32 && IsSrcGPReg) {
1956       Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1957       Target->_sw(SrcGPR, Addr);
1958     } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
1959       Variable *SrcGPRHi =
1960           Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
1961       Variable *SrcGPRLo = Target->makeReg(
1962           IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
1963       Target->_sw(SrcGPRHi, Addr);
1964       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
1965       Target->_sw(SrcGPRLo, AddrHi);
1966     } else if (DestTy == IceType_f64 && IsSrcGPReg) {
1967       const auto FirstReg =
1968           (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1969       const auto SecondReg =
1970           (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1971       Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1972       Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1973       Target->_sw(SrcGPRLo, Addr);
1974       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
1975       Target->_sw(SrcGPRHi, AddrHi);
1976     } else {
1977       Target->_sw(SrcR, Addr);
1978     }
1979 
1980     Target->Context.insert<InstFakeDef>(Dest);
1981     Legalized = true;
1982   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1983     if (Var->isRematerializable()) {
1984       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1985 
1986       // ExtraOffset is only needed for stack-pointer based frames as we have
1987       // to account for spill storage.
1988       const int32_t ExtraOffset =
1989           (Var->getRegNum() == Target->getFrameOrStackReg())
1990               ? Target->getFrameFixedAllocaOffset()
1991               : 0;
1992 
1993       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1994       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1995       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1996       Target->_mov(Dest, T);
1997       Legalized = true;
1998     } else {
1999       if (!Var->hasReg()) {
2000         // This is a _mov(Variable, Mem()), i.e., a load.
2001         const int32_t Offset = Var->getStackOffset();
2002         auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2003         const RegNumT RegNum = Dest->getRegNum();
2004         const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2005         // If we are moving i64 to a double using stack then the address may
2006         // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2007         // and store them individually with 4-byte alignment. Load the Hi-Lo
2008         // parts in TmpReg and move them to the dest using mtc1.
2009         if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2010             !IsDstGPReg) {
2011           auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2012           const RegNumT RegNum = Dest->getRegNum();
2013           Variable *DestLo = Target->makeReg(
2014               IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2015           Variable *DestHi = Target->makeReg(
2016               IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2017           OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2018               Target->Func, IceType_i32, Base,
2019               llvm::cast<ConstantInteger32>(
2020                   Target->Ctx->getConstantInt32(Offset)));
2021           OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2022               Target->Func, IceType_i32, Base,
2023               llvm::cast<ConstantInteger32>(
2024                   Target->Ctx->getConstantInt32(Offset + 4)));
2025           Target->_lw(Reg, AddrLo);
2026           Target->_mov(DestLo, Reg);
2027           Target->_lw(Reg, AddrHi);
2028           Target->_mov(DestHi, Reg);
2029         } else {
2030           OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2031               Target->Func, DestTy, Base,
2032               llvm::cast<ConstantInteger32>(
2033                   Target->Ctx->getConstantInt32(Offset)));
2034           OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2035           OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2036               Target->Func, DestTy, Base,
2037               llvm::cast<ConstantInteger32>(
2038                   Target->Ctx->getConstantInt32(Offset + 4)));
2039           // FP arguments are passed in GP reg if first argument is in GP.
2040           // In this case type of the Dest is still FP thus we need to
2041           // explicitly generate lw instead of lwc1.
2042           if (DestTy == IceType_f32 && IsDstGPReg) {
2043             Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2044             Target->_lw(DstGPR, Addr);
2045           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2046             Variable *DstGPRHi = Target->makeReg(
2047                 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2048             Variable *DstGPRLo = Target->makeReg(
2049                 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2050             Target->_lw(DstGPRHi, Addr);
2051             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2052             Target->_lw(DstGPRLo, AddrHi);
2053           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2054             const auto FirstReg =
2055                 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2056             const auto SecondReg =
2057                 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2058             Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2059             Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2060             Target->_lw(DstGPRLo, Addr);
2061             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2062             Target->_lw(DstGPRHi, AddrHi);
2063           } else {
2064             Target->_lw(Dest, Addr);
2065           }
2066         }
2067         Legalized = true;
2068       }
2069     }
2070   }
2071 
2072   if (Legalized) {
2073     if (MovInstr->isDestRedefined()) {
2074       Target->_set_dest_redefined();
2075     }
2076     MovInstr->setDeleted();
2077   }
2078 }
2079 
2080 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2081 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2082   if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2083     return nullptr;
2084   }
2085   Variable *Base = Mem->getBase();
2086   auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2087   int32_t Offset = Ci32->getValue();
2088 
2089   if (Base->isRematerializable()) {
2090     const int32_t ExtraOffset =
2091         (Base->getRegNum() == Target->getFrameOrStackReg())
2092             ? Target->getFrameFixedAllocaOffset()
2093             : 0;
2094     Offset += Base->getStackOffset() + ExtraOffset;
2095     Base = Target->getPhysicalRegister(Base->getRegNum());
2096   }
2097 
2098   constexpr bool SignExt = true;
2099   if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2100     Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2101     Offset = 0;
2102   }
2103 
2104   return OperandMIPS32Mem::create(
2105       Target->Func, Mem->getType(), Base,
2106       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2107 }
2108 
legalizeImmediate(int32_t Imm)2109 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2110   Variable *Reg = nullptr;
2111   if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2112         (Imm <= std::numeric_limits<int16_t>::max()))) {
2113     const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2114     const uint32_t LowerBits = Imm & 0xFFFF;
2115     Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2116     Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2117     if (LowerBits) {
2118       Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2119       Target->_ori(Reg, TReg, LowerBits);
2120     } else {
2121       Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2122     }
2123   }
2124   return Reg;
2125 }
2126 
postLowerLegalization()2127 void TargetMIPS32::postLowerLegalization() {
2128   Func->dump("Before postLowerLegalization");
2129   assert(hasComputedFrame());
2130   for (CfgNode *Node : Func->getNodes()) {
2131     Context.init(Node);
2132     PostLoweringLegalizer Legalizer(this);
2133     while (!Context.atEnd()) {
2134       PostIncrLoweringContext PostIncrement(Context);
2135       Inst *CurInstr = iteratorToInst(Context.getCur());
2136       const SizeT NumSrcs = CurInstr->getSrcSize();
2137       Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2138       Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2139       auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2140       auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2141       auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2142       Variable *Dst = CurInstr->getDest();
2143       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2144         Legalizer.legalizeMov(MovInstr);
2145         continue;
2146       }
2147       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2148         Legalizer.legalizeMovFp(MovInstr);
2149         continue;
2150       }
2151       if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2152         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2153           _sw(Src0V, LegalMem);
2154           CurInstr->setDeleted();
2155         }
2156         continue;
2157       }
2158       if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2159         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2160           _swc1(Src0V, LegalMem);
2161           CurInstr->setDeleted();
2162         }
2163         continue;
2164       }
2165       if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2166         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2167           _sdc1(Src0V, LegalMem);
2168           CurInstr->setDeleted();
2169         }
2170         continue;
2171       }
2172       if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2173         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2174           _lw(Dst, LegalMem);
2175           CurInstr->setDeleted();
2176         }
2177         continue;
2178       }
2179       if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2180         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2181           _lwc1(Dst, LegalMem);
2182           CurInstr->setDeleted();
2183         }
2184         continue;
2185       }
2186       if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2187         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2188           _ldc1(Dst, LegalMem);
2189           CurInstr->setDeleted();
2190         }
2191         continue;
2192       }
2193       if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2194         if (auto *LegalImm = Legalizer.legalizeImmediate(
2195                 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2196           _addu(Dst, Src0V, LegalImm);
2197           CurInstr->setDeleted();
2198         }
2199         continue;
2200       }
2201     }
2202   }
2203 }
2204 
loOperand(Operand * Operand)2205 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2206   assert(Operand->getType() == IceType_i64);
2207   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2208     return Var64On32->getLo();
2209   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2210     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2211   }
2212   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2213     // Conservatively disallow memory operands with side-effects (pre/post
2214     // increment) in case of duplication.
2215     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2216     return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2217                                     Mem->getOffset(), Mem->getAddrMode());
2218   }
2219   llvm_unreachable("Unsupported operand type");
2220   return nullptr;
2221 }
2222 
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2223 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2224                                          uint32_t Index) {
2225   if (!isVectorType(Operand->getType())) {
2226     llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2227     return nullptr;
2228   }
2229 
2230   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2231     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2232     Variable *Base = Mem->getBase();
2233     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2234     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2235     int32_t NextOffsetVal =
2236         Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2237     constexpr bool NoSignExt = false;
2238     if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2239       Constant *_4 = Ctx->getConstantInt32(4);
2240       Variable *NewBase = Func->makeVariable(Base->getType());
2241       lowerArithmetic(
2242           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2243       Base = NewBase;
2244     } else {
2245       Offset =
2246           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2247     }
2248     return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2249                                     Mem->getAddrMode());
2250   }
2251 
2252   if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2253     return VarVecOn32->getContainers()[Index];
2254 
2255   llvm_unreachable("Unsupported operand type");
2256   return nullptr;
2257 }
2258 
hiOperand(Operand * Operand)2259 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2260   assert(Operand->getType() == IceType_i64);
2261   if (Operand->getType() != IceType_i64)
2262     return Operand;
2263   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2264     return Var64On32->getHi();
2265   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2266     return Ctx->getConstantInt32(
2267         static_cast<uint32_t>(Const->getValue() >> 32));
2268   }
2269   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2270     // Conservatively disallow memory operands with side-effects
2271     // in case of duplication.
2272     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2273     const Type SplitType = IceType_i32;
2274     Variable *Base = Mem->getBase();
2275     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2276     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2277     int32_t NextOffsetVal = Offset->getValue() + 4;
2278     constexpr bool SignExt = false;
2279     if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2280       // We have to make a temp variable and add 4 to either Base or Offset.
2281       // If we add 4 to Offset, this will convert a non-RegReg addressing
2282       // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2283       // RegReg addressing modes, prefer adding to base and replacing instead.
2284       // Thus we leave the old offset alone.
2285       Constant *Four = Ctx->getConstantInt32(4);
2286       Variable *NewBase = Func->makeVariable(Base->getType());
2287       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2288                                              Base, Four));
2289       Base = NewBase;
2290     } else {
2291       Offset =
2292           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2293     }
2294     return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2295                                     Mem->getAddrMode());
2296   }
2297   llvm_unreachable("Unsupported operand type");
2298   return nullptr;
2299 }
2300 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2301 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2302                                             RegSetMask Exclude) const {
2303   SmallBitVector Registers(RegMIPS32::Reg_NUM);
2304 
2305 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
2306           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
2307   if (scratch && (Include & RegSet_CallerSave))                                \
2308     Registers[RegMIPS32::val] = true;                                          \
2309   if (preserved && (Include & RegSet_CalleeSave))                              \
2310     Registers[RegMIPS32::val] = true;                                          \
2311   if (stackptr && (Include & RegSet_StackPointer))                             \
2312     Registers[RegMIPS32::val] = true;                                          \
2313   if (frameptr && (Include & RegSet_FramePointer))                             \
2314     Registers[RegMIPS32::val] = true;                                          \
2315   if (scratch && (Exclude & RegSet_CallerSave))                                \
2316     Registers[RegMIPS32::val] = false;                                         \
2317   if (preserved && (Exclude & RegSet_CalleeSave))                              \
2318     Registers[RegMIPS32::val] = false;                                         \
2319   if (stackptr && (Exclude & RegSet_StackPointer))                             \
2320     Registers[RegMIPS32::val] = false;                                         \
2321   if (frameptr && (Exclude & RegSet_FramePointer))                             \
2322     Registers[RegMIPS32::val] = false;
2323 
2324   REGMIPS32_TABLE
2325 
2326 #undef X
2327 
2328   return Registers;
2329 }
2330 
lowerAlloca(const InstAlloca * Instr)2331 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2332   // Conservatively require the stack to be aligned. Some stack adjustment
2333   // operations implemented below assume that the stack is aligned before the
2334   // alloca. All the alloca code ensures that the stack alignment is preserved
2335   // after the alloca. The stack alignment restriction can be relaxed in some
2336   // cases.
2337   NeedsStackAlignment = true;
2338 
2339   // For default align=0, set it to the real value 1, to avoid any
2340   // bit-manipulation problems below.
2341   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2342 
2343   // LLVM enforces power of 2 alignment.
2344   assert(llvm::isPowerOf2_32(AlignmentParam));
2345   assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2346 
2347   const uint32_t Alignment =
2348       std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2349   const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2350   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2351   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2352   const bool UseFramePointer =
2353       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2354 
2355   if (UseFramePointer)
2356     setHasFramePointer();
2357 
2358   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2359 
2360   Variable *Dest = Instr->getDest();
2361   Operand *TotalSize = Instr->getSizeInBytes();
2362 
2363   if (const auto *ConstantTotalSize =
2364           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2365     const uint32_t Value =
2366         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2367     FixedAllocaSizeBytes += Value;
2368     // Constant size alloca.
2369     if (!UseFramePointer) {
2370       // If we don't need a Frame Pointer, this alloca has a known offset to the
2371       // stack pointer. We don't need adjust the stack pointer, nor assign any
2372       // value to Dest, as Dest is rematerializable.
2373       assert(Dest->isRematerializable());
2374       Context.insert<InstFakeDef>(Dest);
2375       return;
2376     }
2377 
2378     if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2379       CurrentAllocaOffset =
2380           Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2381     }
2382     auto *T = I32Reg();
2383     _addiu(T, SP, CurrentAllocaOffset);
2384     _mov(Dest, T);
2385     CurrentAllocaOffset += Value;
2386     return;
2387 
2388   } else {
2389     // Non-constant sizes need to be adjusted to the next highest multiple of
2390     // the required alignment at runtime.
2391     VariableAllocaUsed = true;
2392     VariableAllocaAlignBytes = AlignmentParam;
2393     Variable *AlignAmount;
2394     auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2395     auto *T1 = I32Reg();
2396     auto *T2 = I32Reg();
2397     auto *T3 = I32Reg();
2398     auto *T4 = I32Reg();
2399     auto *T5 = I32Reg();
2400     _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2401     _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2402     _and(T3, T1, T2);
2403     _subu(T4, SP, T3);
2404     if (Instr->getAlignInBytes()) {
2405       AlignAmount =
2406           legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2407       _and(T5, T4, AlignAmount);
2408       _mov(Dest, T5);
2409     } else {
2410       _mov(Dest, T4);
2411     }
2412     _mov(SP, Dest);
2413     return;
2414   }
2415 }
2416 
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2417 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2418                                         Variable *Dest, Operand *Src0,
2419                                         Operand *Src1) {
2420   InstArithmetic::OpKind Op = Instr->getOp();
2421   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2422   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2423   Variable *Src0LoR = nullptr;
2424   Variable *Src1LoR = nullptr;
2425   Variable *Src0HiR = nullptr;
2426   Variable *Src1HiR = nullptr;
2427 
2428   switch (Op) {
2429   case InstArithmetic::_num:
2430     llvm::report_fatal_error("Unknown arithmetic operator");
2431     return;
2432   case InstArithmetic::Add: {
2433     Src0LoR = legalizeToReg(loOperand(Src0));
2434     Src1LoR = legalizeToReg(loOperand(Src1));
2435     Src0HiR = legalizeToReg(hiOperand(Src0));
2436     Src1HiR = legalizeToReg(hiOperand(Src1));
2437     auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2438          *T_Hi2 = I32Reg();
2439     _addu(T_Lo, Src0LoR, Src1LoR);
2440     _mov(DestLo, T_Lo);
2441     _sltu(T_Carry, T_Lo, Src0LoR);
2442     _addu(T_Hi, T_Carry, Src0HiR);
2443     _addu(T_Hi2, Src1HiR, T_Hi);
2444     _mov(DestHi, T_Hi2);
2445     return;
2446   }
2447   case InstArithmetic::And: {
2448     Src0LoR = legalizeToReg(loOperand(Src0));
2449     Src1LoR = legalizeToReg(loOperand(Src1));
2450     Src0HiR = legalizeToReg(hiOperand(Src0));
2451     Src1HiR = legalizeToReg(hiOperand(Src1));
2452     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2453     _and(T_Lo, Src0LoR, Src1LoR);
2454     _mov(DestLo, T_Lo);
2455     _and(T_Hi, Src0HiR, Src1HiR);
2456     _mov(DestHi, T_Hi);
2457     return;
2458   }
2459   case InstArithmetic::Sub: {
2460     Src0LoR = legalizeToReg(loOperand(Src0));
2461     Src1LoR = legalizeToReg(loOperand(Src1));
2462     Src0HiR = legalizeToReg(hiOperand(Src0));
2463     Src1HiR = legalizeToReg(hiOperand(Src1));
2464     auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2465          *T_Hi2 = I32Reg();
2466     _subu(T_Lo, Src0LoR, Src1LoR);
2467     _mov(DestLo, T_Lo);
2468     _sltu(T_Borrow, Src0LoR, Src1LoR);
2469     _addu(T_Hi, T_Borrow, Src1HiR);
2470     _subu(T_Hi2, Src0HiR, T_Hi);
2471     _mov(DestHi, T_Hi2);
2472     return;
2473   }
2474   case InstArithmetic::Or: {
2475     Src0LoR = legalizeToReg(loOperand(Src0));
2476     Src1LoR = legalizeToReg(loOperand(Src1));
2477     Src0HiR = legalizeToReg(hiOperand(Src0));
2478     Src1HiR = legalizeToReg(hiOperand(Src1));
2479     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2480     _or(T_Lo, Src0LoR, Src1LoR);
2481     _mov(DestLo, T_Lo);
2482     _or(T_Hi, Src0HiR, Src1HiR);
2483     _mov(DestHi, T_Hi);
2484     return;
2485   }
2486   case InstArithmetic::Xor: {
2487     Src0LoR = legalizeToReg(loOperand(Src0));
2488     Src1LoR = legalizeToReg(loOperand(Src1));
2489     Src0HiR = legalizeToReg(hiOperand(Src0));
2490     Src1HiR = legalizeToReg(hiOperand(Src1));
2491     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2492     _xor(T_Lo, Src0LoR, Src1LoR);
2493     _mov(DestLo, T_Lo);
2494     _xor(T_Hi, Src0HiR, Src1HiR);
2495     _mov(DestHi, T_Hi);
2496     return;
2497   }
2498   case InstArithmetic::Mul: {
2499     // TODO(rkotler): Make sure that mul has the side effect of clobbering
2500     // LO, HI. Check for any other LO, HI quirkiness in this section.
2501     Src0LoR = legalizeToReg(loOperand(Src0));
2502     Src1LoR = legalizeToReg(loOperand(Src1));
2503     Src0HiR = legalizeToReg(hiOperand(Src0));
2504     Src1HiR = legalizeToReg(hiOperand(Src1));
2505     auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2506     auto *T1 = I32Reg(), *T2 = I32Reg();
2507     auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2508     _multu(T_Lo, Src0LoR, Src1LoR);
2509     Context.insert<InstFakeDef>(T_Hi, T_Lo);
2510     _mflo(T1, T_Lo);
2511     _mfhi(T2, T_Hi);
2512     _mov(DestLo, T1);
2513     _mul(TM1, Src0HiR, Src1LoR);
2514     _mul(TM2, Src0LoR, Src1HiR);
2515     _addu(TM3, TM1, T2);
2516     _addu(TM4, TM3, TM2);
2517     _mov(DestHi, TM4);
2518     return;
2519   }
2520   case InstArithmetic::Shl: {
2521     auto *T_Lo = I32Reg();
2522     auto *T_Hi = I32Reg();
2523     auto *T1_Lo = I32Reg();
2524     auto *T1_Hi = I32Reg();
2525     auto *T1 = I32Reg();
2526     auto *T2 = I32Reg();
2527     auto *T3 = I32Reg();
2528     auto *T4 = I32Reg();
2529     auto *T5 = I32Reg();
2530 
2531     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2532       Src0LoR = legalizeToReg(loOperand(Src0));
2533       int64_t ShiftAmount = Const->getValue();
2534       if (ShiftAmount == 1) {
2535         Src0HiR = legalizeToReg(hiOperand(Src0));
2536         _addu(T_Lo, Src0LoR, Src0LoR);
2537         _sltu(T1, T_Lo, Src0LoR);
2538         _addu(T2, T1, Src0HiR);
2539         _addu(T_Hi, Src0HiR, T2);
2540       } else if (ShiftAmount < INT32_BITS) {
2541         Src0HiR = legalizeToReg(hiOperand(Src0));
2542         _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2543         _sll(T2, Src0HiR, ShiftAmount);
2544         _or(T_Hi, T1, T2);
2545         _sll(T_Lo, Src0LoR, ShiftAmount);
2546       } else if (ShiftAmount == INT32_BITS) {
2547         _addiu(T_Lo, getZero(), 0);
2548         _mov(T_Hi, Src0LoR);
2549       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2550         _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2551         _addiu(T_Lo, getZero(), 0);
2552       }
2553       _mov(DestLo, T_Lo);
2554       _mov(DestHi, T_Hi);
2555       return;
2556     }
2557 
2558     Src0LoR = legalizeToReg(loOperand(Src0));
2559     Src1LoR = legalizeToReg(loOperand(Src1));
2560     Src0HiR = legalizeToReg(hiOperand(Src0));
2561 
2562     _sllv(T1, Src0HiR, Src1LoR);
2563     _not(T2, Src1LoR);
2564     _srl(T3, Src0LoR, 1);
2565     _srlv(T4, T3, T2);
2566     _or(T_Hi, T1, T4);
2567     _sllv(T_Lo, Src0LoR, Src1LoR);
2568 
2569     _mov(T1_Hi, T_Hi);
2570     _mov(T1_Lo, T_Lo);
2571     _andi(T5, Src1LoR, INT32_BITS);
2572     _movn(T1_Hi, T_Lo, T5);
2573     _movn(T1_Lo, getZero(), T5);
2574     _mov(DestHi, T1_Hi);
2575     _mov(DestLo, T1_Lo);
2576     return;
2577   }
2578   case InstArithmetic::Lshr: {
2579 
2580     auto *T_Lo = I32Reg();
2581     auto *T_Hi = I32Reg();
2582     auto *T1_Lo = I32Reg();
2583     auto *T1_Hi = I32Reg();
2584     auto *T1 = I32Reg();
2585     auto *T2 = I32Reg();
2586     auto *T3 = I32Reg();
2587     auto *T4 = I32Reg();
2588     auto *T5 = I32Reg();
2589 
2590     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2591       Src0HiR = legalizeToReg(hiOperand(Src0));
2592       int64_t ShiftAmount = Const->getValue();
2593       if (ShiftAmount < INT32_BITS) {
2594         Src0LoR = legalizeToReg(loOperand(Src0));
2595         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2596         _srl(T2, Src0LoR, ShiftAmount);
2597         _or(T_Lo, T1, T2);
2598         _srl(T_Hi, Src0HiR, ShiftAmount);
2599       } else if (ShiftAmount == INT32_BITS) {
2600         _mov(T_Lo, Src0HiR);
2601         _addiu(T_Hi, getZero(), 0);
2602       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2603         _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2604         _addiu(T_Hi, getZero(), 0);
2605       }
2606       _mov(DestLo, T_Lo);
2607       _mov(DestHi, T_Hi);
2608       return;
2609     }
2610 
2611     Src0LoR = legalizeToReg(loOperand(Src0));
2612     Src1LoR = legalizeToReg(loOperand(Src1));
2613     Src0HiR = legalizeToReg(hiOperand(Src0));
2614 
2615     _srlv(T1, Src0LoR, Src1LoR);
2616     _not(T2, Src1LoR);
2617     _sll(T3, Src0HiR, 1);
2618     _sllv(T4, T3, T2);
2619     _or(T_Lo, T1, T4);
2620     _srlv(T_Hi, Src0HiR, Src1LoR);
2621 
2622     _mov(T1_Hi, T_Hi);
2623     _mov(T1_Lo, T_Lo);
2624     _andi(T5, Src1LoR, INT32_BITS);
2625     _movn(T1_Lo, T_Hi, T5);
2626     _movn(T1_Hi, getZero(), T5);
2627     _mov(DestHi, T1_Hi);
2628     _mov(DestLo, T1_Lo);
2629     return;
2630   }
2631   case InstArithmetic::Ashr: {
2632 
2633     auto *T_Lo = I32Reg();
2634     auto *T_Hi = I32Reg();
2635     auto *T1_Lo = I32Reg();
2636     auto *T1_Hi = I32Reg();
2637     auto *T1 = I32Reg();
2638     auto *T2 = I32Reg();
2639     auto *T3 = I32Reg();
2640     auto *T4 = I32Reg();
2641     auto *T5 = I32Reg();
2642     auto *T6 = I32Reg();
2643 
2644     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2645       Src0HiR = legalizeToReg(hiOperand(Src0));
2646       int64_t ShiftAmount = Const->getValue();
2647       if (ShiftAmount < INT32_BITS) {
2648         Src0LoR = legalizeToReg(loOperand(Src0));
2649         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2650         _srl(T2, Src0LoR, ShiftAmount);
2651         _or(T_Lo, T1, T2);
2652         _sra(T_Hi, Src0HiR, ShiftAmount);
2653       } else if (ShiftAmount == INT32_BITS) {
2654         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2655         _mov(T_Lo, Src0HiR);
2656       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2657         _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2658         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2659       }
2660       _mov(DestLo, T_Lo);
2661       _mov(DestHi, T_Hi);
2662       return;
2663     }
2664 
2665     Src0LoR = legalizeToReg(loOperand(Src0));
2666     Src1LoR = legalizeToReg(loOperand(Src1));
2667     Src0HiR = legalizeToReg(hiOperand(Src0));
2668 
2669     _srlv(T1, Src0LoR, Src1LoR);
2670     _not(T2, Src1LoR);
2671     _sll(T3, Src0HiR, 1);
2672     _sllv(T4, T3, T2);
2673     _or(T_Lo, T1, T4);
2674     _srav(T_Hi, Src0HiR, Src1LoR);
2675 
2676     _mov(T1_Hi, T_Hi);
2677     _mov(T1_Lo, T_Lo);
2678     _andi(T5, Src1LoR, INT32_BITS);
2679     _movn(T1_Lo, T_Hi, T5);
2680     _sra(T6, Src0HiR, INT32_BITS - 1);
2681     _movn(T1_Hi, T6, T5);
2682     _mov(DestHi, T1_Hi);
2683     _mov(DestLo, T1_Lo);
2684     return;
2685   }
2686   case InstArithmetic::Fadd:
2687   case InstArithmetic::Fsub:
2688   case InstArithmetic::Fmul:
2689   case InstArithmetic::Fdiv:
2690   case InstArithmetic::Frem:
2691     llvm::report_fatal_error("FP instruction with i64 type");
2692     return;
2693   case InstArithmetic::Udiv:
2694   case InstArithmetic::Sdiv:
2695   case InstArithmetic::Urem:
2696   case InstArithmetic::Srem:
2697     llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2698     return;
2699   }
2700 }
2701 
lowerArithmetic(const InstArithmetic * Instr)2702 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2703   Variable *Dest = Instr->getDest();
2704 
2705   if (Dest->isRematerializable()) {
2706     Context.insert<InstFakeDef>(Dest);
2707     return;
2708   }
2709 
2710   // We need to signal all the UnimplementedLoweringError errors before any
2711   // legalization into new variables, otherwise Om1 register allocation may fail
2712   // when it sees variables that are defined but not used.
2713   Type DestTy = Dest->getType();
2714   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2715   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2716   if (DestTy == IceType_i64) {
2717     lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2718     return;
2719   }
2720   if (isVectorType(Dest->getType())) {
2721     llvm::report_fatal_error("Arithmetic: Destination type is vector");
2722     return;
2723   }
2724 
2725   Variable *T = makeReg(Dest->getType());
2726   Variable *Src0R = legalizeToReg(Src0);
2727   Variable *Src1R = nullptr;
2728   uint32_t Value = 0;
2729   bool IsSrc1Imm16 = false;
2730 
2731   switch (Instr->getOp()) {
2732   case InstArithmetic::Add:
2733   case InstArithmetic::Sub: {
2734     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2735     if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2736       IsSrc1Imm16 = true;
2737       Value = Const32->getValue();
2738     } else {
2739       Src1R = legalizeToReg(Src1);
2740     }
2741     break;
2742   }
2743   case InstArithmetic::And:
2744   case InstArithmetic::Or:
2745   case InstArithmetic::Xor:
2746   case InstArithmetic::Shl:
2747   case InstArithmetic::Lshr:
2748   case InstArithmetic::Ashr: {
2749     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2750     if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2751       IsSrc1Imm16 = true;
2752       Value = Const32->getValue();
2753     } else {
2754       Src1R = legalizeToReg(Src1);
2755     }
2756     break;
2757   }
2758   default:
2759     Src1R = legalizeToReg(Src1);
2760     break;
2761   }
2762   constexpr uint32_t DivideByZeroTrapCode = 7;
2763 
2764   switch (Instr->getOp()) {
2765   case InstArithmetic::_num:
2766     break;
2767   case InstArithmetic::Add: {
2768     auto *T0R = Src0R;
2769     auto *T1R = Src1R;
2770     if (Dest->getType() != IceType_i32) {
2771       T0R = makeReg(IceType_i32);
2772       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2773       if (!IsSrc1Imm16) {
2774         T1R = makeReg(IceType_i32);
2775         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2776       }
2777     }
2778     if (IsSrc1Imm16) {
2779       _addiu(T, T0R, Value);
2780     } else {
2781       _addu(T, T0R, T1R);
2782     }
2783     _mov(Dest, T);
2784     return;
2785   }
2786   case InstArithmetic::And:
2787     if (IsSrc1Imm16) {
2788       _andi(T, Src0R, Value);
2789     } else {
2790       _and(T, Src0R, Src1R);
2791     }
2792     _mov(Dest, T);
2793     return;
2794   case InstArithmetic::Or:
2795     if (IsSrc1Imm16) {
2796       _ori(T, Src0R, Value);
2797     } else {
2798       _or(T, Src0R, Src1R);
2799     }
2800     _mov(Dest, T);
2801     return;
2802   case InstArithmetic::Xor:
2803     if (IsSrc1Imm16) {
2804       _xori(T, Src0R, Value);
2805     } else {
2806       _xor(T, Src0R, Src1R);
2807     }
2808     _mov(Dest, T);
2809     return;
2810   case InstArithmetic::Sub: {
2811     auto *T0R = Src0R;
2812     auto *T1R = Src1R;
2813     if (Dest->getType() != IceType_i32) {
2814       T0R = makeReg(IceType_i32);
2815       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2816       if (!IsSrc1Imm16) {
2817         T1R = makeReg(IceType_i32);
2818         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2819       }
2820     }
2821     if (IsSrc1Imm16) {
2822       _addiu(T, T0R, -Value);
2823     } else {
2824       _subu(T, T0R, T1R);
2825     }
2826     _mov(Dest, T);
2827     return;
2828   }
2829   case InstArithmetic::Mul: {
2830     _mul(T, Src0R, Src1R);
2831     _mov(Dest, T);
2832     return;
2833   }
2834   case InstArithmetic::Shl: {
2835     if (IsSrc1Imm16) {
2836       _sll(T, Src0R, Value);
2837     } else {
2838       _sllv(T, Src0R, Src1R);
2839     }
2840     _mov(Dest, T);
2841     return;
2842   }
2843   case InstArithmetic::Lshr: {
2844     auto *T0R = Src0R;
2845     auto *T1R = Src1R;
2846     if (Dest->getType() != IceType_i32) {
2847       T0R = makeReg(IceType_i32);
2848       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2849       if (!IsSrc1Imm16) {
2850         T1R = makeReg(IceType_i32);
2851         lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2852       }
2853     }
2854     if (IsSrc1Imm16) {
2855       _srl(T, T0R, Value);
2856     } else {
2857       _srlv(T, T0R, T1R);
2858     }
2859     _mov(Dest, T);
2860     return;
2861   }
2862   case InstArithmetic::Ashr: {
2863     auto *T0R = Src0R;
2864     auto *T1R = Src1R;
2865     if (Dest->getType() != IceType_i32) {
2866       T0R = makeReg(IceType_i32);
2867       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2868       if (!IsSrc1Imm16) {
2869         T1R = makeReg(IceType_i32);
2870         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2871       }
2872     }
2873     if (IsSrc1Imm16) {
2874       _sra(T, T0R, Value);
2875     } else {
2876       _srav(T, T0R, T1R);
2877     }
2878     _mov(Dest, T);
2879     return;
2880   }
2881   case InstArithmetic::Udiv: {
2882     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2883     auto *T0R = Src0R;
2884     auto *T1R = Src1R;
2885     if (Dest->getType() != IceType_i32) {
2886       T0R = makeReg(IceType_i32);
2887       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2888       T1R = makeReg(IceType_i32);
2889       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2890     }
2891     _divu(T_Zero, T0R, T1R);
2892     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2893     _mflo(T, T_Zero);
2894     _mov(Dest, T);
2895     return;
2896   }
2897   case InstArithmetic::Sdiv: {
2898     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2899     auto *T0R = Src0R;
2900     auto *T1R = Src1R;
2901     if (Dest->getType() != IceType_i32) {
2902       T0R = makeReg(IceType_i32);
2903       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2904       T1R = makeReg(IceType_i32);
2905       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2906     }
2907     _div(T_Zero, T0R, T1R);
2908     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2909     _mflo(T, T_Zero);
2910     _mov(Dest, T);
2911     return;
2912   }
2913   case InstArithmetic::Urem: {
2914     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2915     auto *T0R = Src0R;
2916     auto *T1R = Src1R;
2917     if (Dest->getType() != IceType_i32) {
2918       T0R = makeReg(IceType_i32);
2919       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2920       T1R = makeReg(IceType_i32);
2921       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2922     }
2923     _divu(T_Zero, T0R, T1R);
2924     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2925     _mfhi(T, T_Zero);
2926     _mov(Dest, T);
2927     return;
2928   }
2929   case InstArithmetic::Srem: {
2930     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2931     auto *T0R = Src0R;
2932     auto *T1R = Src1R;
2933     if (Dest->getType() != IceType_i32) {
2934       T0R = makeReg(IceType_i32);
2935       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2936       T1R = makeReg(IceType_i32);
2937       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2938     }
2939     _div(T_Zero, T0R, T1R);
2940     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2941     _mfhi(T, T_Zero);
2942     _mov(Dest, T);
2943     return;
2944   }
2945   case InstArithmetic::Fadd: {
2946     if (DestTy == IceType_f32) {
2947       _add_s(T, Src0R, Src1R);
2948       _mov(Dest, T);
2949       return;
2950     }
2951     if (DestTy == IceType_f64) {
2952       _add_d(T, Src0R, Src1R);
2953       _mov(Dest, T);
2954       return;
2955     }
2956     break;
2957   }
2958   case InstArithmetic::Fsub:
2959     if (DestTy == IceType_f32) {
2960       _sub_s(T, Src0R, Src1R);
2961       _mov(Dest, T);
2962       return;
2963     }
2964     if (DestTy == IceType_f64) {
2965       _sub_d(T, Src0R, Src1R);
2966       _mov(Dest, T);
2967       return;
2968     }
2969     break;
2970   case InstArithmetic::Fmul:
2971     if (DestTy == IceType_f32) {
2972       _mul_s(T, Src0R, Src1R);
2973       _mov(Dest, T);
2974       return;
2975     }
2976     if (DestTy == IceType_f64) {
2977       _mul_d(T, Src0R, Src1R);
2978       _mov(Dest, T);
2979       return;
2980     }
2981     break;
2982   case InstArithmetic::Fdiv:
2983     if (DestTy == IceType_f32) {
2984       _div_s(T, Src0R, Src1R);
2985       _mov(Dest, T);
2986       return;
2987     }
2988     if (DestTy == IceType_f64) {
2989       _div_d(T, Src0R, Src1R);
2990       _mov(Dest, T);
2991       return;
2992     }
2993     break;
2994   case InstArithmetic::Frem:
2995     llvm::report_fatal_error("frem should have been prelowered.");
2996     break;
2997   }
2998   llvm::report_fatal_error("Unknown arithmetic operator");
2999 }
3000 
lowerAssign(const InstAssign * Instr)3001 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3002   Variable *Dest = Instr->getDest();
3003 
3004   if (Dest->isRematerializable()) {
3005     Context.insert<InstFakeDef>(Dest);
3006     return;
3007   }
3008 
3009   // Source type may not be same as destination
3010   if (isVectorType(Dest->getType())) {
3011     Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3012     auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3013     for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3014       auto *DCont = DstVec->getContainers()[i];
3015       auto *SCont =
3016           legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3017       auto *TReg = makeReg(IceType_i32);
3018       _mov(TReg, SCont);
3019       _mov(DCont, TReg);
3020     }
3021     return;
3022   }
3023   Operand *Src0 = Instr->getSrc(0);
3024   assert(Dest->getType() == Src0->getType());
3025   if (Dest->getType() == IceType_i64) {
3026     Src0 = legalizeUndef(Src0);
3027     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3028     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3029     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3030     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3031     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3032     _mov(T_Lo, Src0Lo);
3033     _mov(DestLo, T_Lo);
3034     _mov(T_Hi, Src0Hi);
3035     _mov(DestHi, T_Hi);
3036     return;
3037   }
3038   Operand *SrcR;
3039   if (Dest->hasReg()) {
3040     // If Dest already has a physical register, then legalize the Src operand
3041     // into a Variable with the same register assignment.  This especially
3042     // helps allow the use of Flex operands.
3043     SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3044   } else {
3045     // Dest could be a stack operand. Since we could potentially need
3046     // to do a Store (and store can only have Register operands),
3047     // legalize this to a register.
3048     SrcR = legalize(Src0, Legal_Reg);
3049   }
3050   _mov(Dest, SrcR);
3051 }
3052 
lowerBr(const InstBr * Instr)3053 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3054   if (Instr->isUnconditional()) {
3055     _br(Instr->getTargetUnconditional());
3056     return;
3057   }
3058   CfgNode *TargetTrue = Instr->getTargetTrue();
3059   CfgNode *TargetFalse = Instr->getTargetFalse();
3060   Operand *Boolean = Instr->getCondition();
3061   const Inst *Producer = Computations.getProducerOf(Boolean);
3062   if (Producer == nullptr) {
3063     // Since we don't know the producer of this boolean we will assume its
3064     // producer will keep it in positive logic and just emit beqz with this
3065     // Boolean as an operand.
3066     auto *BooleanR = legalizeToReg(Boolean);
3067     _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3068     return;
3069   }
3070   if (Producer->getKind() == Inst::Icmp) {
3071     const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3072     Operand *Src0 = CompareInst->getSrc(0);
3073     Operand *Src1 = CompareInst->getSrc(1);
3074     const Type Src0Ty = Src0->getType();
3075     assert(Src0Ty == Src1->getType());
3076 
3077     Variable *Src0R = nullptr;
3078     Variable *Src1R = nullptr;
3079     Variable *Src0HiR = nullptr;
3080     Variable *Src1HiR = nullptr;
3081     if (Src0Ty == IceType_i64) {
3082       Src0R = legalizeToReg(loOperand(Src0));
3083       Src1R = legalizeToReg(loOperand(Src1));
3084       Src0HiR = legalizeToReg(hiOperand(Src0));
3085       Src1HiR = legalizeToReg(hiOperand(Src1));
3086     } else {
3087       auto *Src0RT = legalizeToReg(Src0);
3088       auto *Src1RT = legalizeToReg(Src1);
3089       // Sign/Zero extend the source operands
3090       if (Src0Ty != IceType_i32) {
3091         InstCast::OpKind CastKind;
3092         switch (CompareInst->getCondition()) {
3093         case InstIcmp::Eq:
3094         case InstIcmp::Ne:
3095         case InstIcmp::Sgt:
3096         case InstIcmp::Sge:
3097         case InstIcmp::Slt:
3098         case InstIcmp::Sle:
3099           CastKind = InstCast::Sext;
3100           break;
3101         default:
3102           CastKind = InstCast::Zext;
3103           break;
3104         }
3105         Src0R = makeReg(IceType_i32);
3106         Src1R = makeReg(IceType_i32);
3107         lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3108         lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3109       } else {
3110         Src0R = Src0RT;
3111         Src1R = Src1RT;
3112       }
3113     }
3114     auto *DestT = makeReg(IceType_i32);
3115 
3116     switch (CompareInst->getCondition()) {
3117     default:
3118       llvm_unreachable("unexpected condition");
3119       return;
3120     case InstIcmp::Eq: {
3121       if (Src0Ty == IceType_i64) {
3122         auto *T1 = I32Reg();
3123         auto *T2 = I32Reg();
3124         auto *T3 = I32Reg();
3125         _xor(T1, Src0HiR, Src1HiR);
3126         _xor(T2, Src0R, Src1R);
3127         _or(T3, T1, T2);
3128         _mov(DestT, T3);
3129         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3130       } else {
3131         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3132       }
3133       return;
3134     }
3135     case InstIcmp::Ne: {
3136       if (Src0Ty == IceType_i64) {
3137         auto *T1 = I32Reg();
3138         auto *T2 = I32Reg();
3139         auto *T3 = I32Reg();
3140         _xor(T1, Src0HiR, Src1HiR);
3141         _xor(T2, Src0R, Src1R);
3142         _or(T3, T1, T2);
3143         _mov(DestT, T3);
3144         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3145       } else {
3146         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3147       }
3148       return;
3149     }
3150     case InstIcmp::Ugt: {
3151       if (Src0Ty == IceType_i64) {
3152         auto *T1 = I32Reg();
3153         auto *T2 = I32Reg();
3154         auto *T3 = I32Reg();
3155         auto *T4 = I32Reg();
3156         auto *T5 = I32Reg();
3157         _xor(T1, Src0HiR, Src1HiR);
3158         _sltu(T2, Src1HiR, Src0HiR);
3159         _xori(T3, T2, 1);
3160         _sltu(T4, Src1R, Src0R);
3161         _xori(T5, T4, 1);
3162         _movz(T3, T5, T1);
3163         _mov(DestT, T3);
3164         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3165       } else {
3166         _sltu(DestT, Src1R, Src0R);
3167         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3168       }
3169       return;
3170     }
3171     case InstIcmp::Uge: {
3172       if (Src0Ty == IceType_i64) {
3173         auto *T1 = I32Reg();
3174         auto *T2 = I32Reg();
3175         auto *T3 = I32Reg();
3176         _xor(T1, Src0HiR, Src1HiR);
3177         _sltu(T2, Src0HiR, Src1HiR);
3178         _sltu(T3, Src0R, Src1R);
3179         _movz(T2, T3, T1);
3180         _mov(DestT, T2);
3181         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3182       } else {
3183         _sltu(DestT, Src0R, Src1R);
3184         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3185       }
3186       return;
3187     }
3188     case InstIcmp::Ult: {
3189       if (Src0Ty == IceType_i64) {
3190         auto *T1 = I32Reg();
3191         auto *T2 = I32Reg();
3192         auto *T3 = I32Reg();
3193         auto *T4 = I32Reg();
3194         auto *T5 = I32Reg();
3195         _xor(T1, Src0HiR, Src1HiR);
3196         _sltu(T2, Src0HiR, Src1HiR);
3197         _xori(T3, T2, 1);
3198         _sltu(T4, Src0R, Src1R);
3199         _xori(T5, T4, 1);
3200         _movz(T3, T5, T1);
3201         _mov(DestT, T3);
3202         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3203       } else {
3204         _sltu(DestT, Src0R, Src1R);
3205         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3206       }
3207       return;
3208     }
3209     case InstIcmp::Ule: {
3210       if (Src0Ty == IceType_i64) {
3211         auto *T1 = I32Reg();
3212         auto *T2 = I32Reg();
3213         auto *T3 = I32Reg();
3214         _xor(T1, Src0HiR, Src1HiR);
3215         _sltu(T2, Src1HiR, Src0HiR);
3216         _sltu(T3, Src1R, Src0R);
3217         _movz(T2, T3, T1);
3218         _mov(DestT, T2);
3219         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3220       } else {
3221         _sltu(DestT, Src1R, Src0R);
3222         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3223       }
3224       return;
3225     }
3226     case InstIcmp::Sgt: {
3227       if (Src0Ty == IceType_i64) {
3228         auto *T1 = I32Reg();
3229         auto *T2 = I32Reg();
3230         auto *T3 = I32Reg();
3231         auto *T4 = I32Reg();
3232         auto *T5 = I32Reg();
3233         _xor(T1, Src0HiR, Src1HiR);
3234         _slt(T2, Src1HiR, Src0HiR);
3235         _xori(T3, T2, 1);
3236         _sltu(T4, Src1R, Src0R);
3237         _xori(T5, T4, 1);
3238         _movz(T3, T5, T1);
3239         _mov(DestT, T3);
3240         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3241       } else {
3242         _slt(DestT, Src1R, Src0R);
3243         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3244       }
3245       return;
3246     }
3247     case InstIcmp::Sge: {
3248       if (Src0Ty == IceType_i64) {
3249         auto *T1 = I32Reg();
3250         auto *T2 = I32Reg();
3251         auto *T3 = I32Reg();
3252         _xor(T1, Src0HiR, Src1HiR);
3253         _slt(T2, Src0HiR, Src1HiR);
3254         _sltu(T3, Src0R, Src1R);
3255         _movz(T2, T3, T1);
3256         _mov(DestT, T2);
3257         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3258       } else {
3259         _slt(DestT, Src0R, Src1R);
3260         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3261       }
3262       return;
3263     }
3264     case InstIcmp::Slt: {
3265       if (Src0Ty == IceType_i64) {
3266         auto *T1 = I32Reg();
3267         auto *T2 = I32Reg();
3268         auto *T3 = I32Reg();
3269         auto *T4 = I32Reg();
3270         auto *T5 = I32Reg();
3271         _xor(T1, Src0HiR, Src1HiR);
3272         _slt(T2, Src0HiR, Src1HiR);
3273         _xori(T3, T2, 1);
3274         _sltu(T4, Src0R, Src1R);
3275         _xori(T5, T4, 1);
3276         _movz(T3, T5, T1);
3277         _mov(DestT, T3);
3278         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3279       } else {
3280         _slt(DestT, Src0R, Src1R);
3281         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3282       }
3283       return;
3284     }
3285     case InstIcmp::Sle: {
3286       if (Src0Ty == IceType_i64) {
3287         auto *T1 = I32Reg();
3288         auto *T2 = I32Reg();
3289         auto *T3 = I32Reg();
3290         _xor(T1, Src0HiR, Src1HiR);
3291         _slt(T2, Src1HiR, Src0HiR);
3292         _sltu(T3, Src1R, Src0R);
3293         _movz(T2, T3, T1);
3294         _mov(DestT, T2);
3295         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3296       } else {
3297         _slt(DestT, Src1R, Src0R);
3298         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3299       }
3300       return;
3301     }
3302     }
3303   }
3304 }
3305 
lowerCall(const InstCall * Instr)3306 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3307   CfgVector<Variable *> RegArgs;
3308   NeedsStackAlignment = true;
3309 
3310   //  Assign arguments to registers and stack. Also reserve stack.
3311   TargetMIPS32::CallingConv CC;
3312 
3313   // Pair of Arg Operand -> GPR number assignments.
3314   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3315   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3316   // Pair of Arg Operand -> stack offset.
3317   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3318   size_t ParameterAreaSizeBytes = 16;
3319 
3320   // Classify each argument operand according to the location where the
3321   // argument is passed.
3322 
3323   // v4f32 is returned through stack. $4 is setup by the caller and passed as
3324   // first argument implicitly. Callee then copies the return vector at $4.
3325   SizeT ArgNum = 0;
3326   Variable *Dest = Instr->getDest();
3327   Variable *RetVecFloat = nullptr;
3328   if (Dest && isVectorFloatingType(Dest->getType())) {
3329     ArgNum = 1;
3330     CC.discardReg(RegMIPS32::Reg_A0);
3331     RetVecFloat = Func->makeVariable(IceType_i32);
3332     auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3333     constexpr SizeT Alignment = 4;
3334     lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3335     RegArgs.emplace_back(
3336         legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3337   }
3338 
3339   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3340     Operand *Arg = legalizeUndef(Instr->getArg(i));
3341     const Type Ty = Arg->getType();
3342     bool InReg = false;
3343     RegNumT Reg;
3344 
3345     InReg = CC.argInReg(Ty, i, &Reg);
3346 
3347     if (!InReg) {
3348       if (isVectorType(Ty)) {
3349         auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3350         ParameterAreaSizeBytes =
3351             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3352         for (Variable *Elem : ArgVec->getContainers()) {
3353           StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3354           ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3355         }
3356       } else {
3357         ParameterAreaSizeBytes =
3358             applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3359         StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3360         ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3361       }
3362       ++ArgNum;
3363       continue;
3364     }
3365 
3366     if (isVectorType(Ty)) {
3367       auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3368       Operand *Elem0 = ArgVec->getContainers()[0];
3369       Operand *Elem1 = ArgVec->getContainers()[1];
3370       GPRArgs.push_back(
3371           std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3372       GPRArgs.push_back(
3373           std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3374       Operand *Elem2 = ArgVec->getContainers()[2];
3375       Operand *Elem3 = ArgVec->getContainers()[3];
3376       // First argument is passed in $4:$5:$6:$7
3377       // Second and rest arguments are passed in $6:$7:stack:stack
3378       if (ArgNum == 0) {
3379         GPRArgs.push_back(
3380             std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3381         GPRArgs.push_back(
3382             std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3383       } else {
3384         ParameterAreaSizeBytes =
3385             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3386         StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3387         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3388         StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3389         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3390       }
3391     } else if (Ty == IceType_i64) {
3392       Operand *Lo = loOperand(Arg);
3393       Operand *Hi = hiOperand(Arg);
3394       GPRArgs.push_back(
3395           std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3396       GPRArgs.push_back(
3397           std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3398     } else if (isScalarIntegerType(Ty)) {
3399       GPRArgs.push_back(std::make_pair(Arg, Reg));
3400     } else {
3401       FPArgs.push_back(std::make_pair(Arg, Reg));
3402     }
3403     ++ArgNum;
3404   }
3405 
3406   // Adjust the parameter area so that the stack is aligned. It is assumed that
3407   // the stack is already aligned at the start of the calling sequence.
3408   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3409 
3410   // Copy arguments that are passed on the stack to the appropriate stack
3411   // locations.
3412   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3413   for (auto &StackArg : StackArgs) {
3414     ConstantInteger32 *Loc =
3415         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3416     Type Ty = StackArg.first->getType();
3417     OperandMIPS32Mem *Addr;
3418     constexpr bool SignExt = false;
3419     if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3420       Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3421     } else {
3422       Variable *NewBase = Func->makeVariable(SP->getType());
3423       lowerArithmetic(
3424           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3425       Addr = formMemoryOperand(NewBase, Ty);
3426     }
3427     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3428   }
3429 
3430   // Generate the call instruction.  Assign its result to a temporary with high
3431   // register allocation weight.
3432 
3433   // ReturnReg doubles as ReturnRegLo as necessary.
3434   Variable *ReturnReg = nullptr;
3435   Variable *ReturnRegHi = nullptr;
3436   if (Dest) {
3437     switch (Dest->getType()) {
3438     case IceType_NUM:
3439       llvm_unreachable("Invalid Call dest type");
3440       return;
3441     case IceType_void:
3442       break;
3443     case IceType_i1:
3444     case IceType_i8:
3445     case IceType_i16:
3446     case IceType_i32:
3447       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3448       break;
3449     case IceType_i64:
3450       ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3451       ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3452       break;
3453     case IceType_f32:
3454       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3455       break;
3456     case IceType_f64:
3457       ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3458       break;
3459     case IceType_v4i1:
3460     case IceType_v8i1:
3461     case IceType_v16i1:
3462     case IceType_v16i8:
3463     case IceType_v8i16:
3464     case IceType_v4i32: {
3465       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3466       auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3467       RetVec->initVecElement(Func);
3468       for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3469         auto *Var = RetVec->getContainers()[i];
3470         Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3471       }
3472       break;
3473     }
3474     case IceType_v4f32:
3475       ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3476       break;
3477     }
3478   }
3479   Operand *CallTarget = Instr->getCallTarget();
3480   // Allow ConstantRelocatable to be left alone as a direct call,
3481   // but force other constants like ConstantInteger32 to be in
3482   // a register and make it an indirect call.
3483   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3484     CallTarget = legalize(CallTarget, Legal_Reg);
3485   }
3486 
3487   // Copy arguments to be passed in registers to the appropriate registers.
3488   for (auto &FPArg : FPArgs) {
3489     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3490   }
3491   for (auto &GPRArg : GPRArgs) {
3492     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3493   }
3494 
3495   // Generate a FakeUse of register arguments so that they do not get dead code
3496   // eliminated as a result of the FakeKill of scratch registers after the call.
3497   // These fake-uses need to be placed here to avoid argument registers from
3498   // being used during the legalizeToReg() calls above.
3499   for (auto *RegArg : RegArgs) {
3500     Context.insert<InstFakeUse>(RegArg);
3501   }
3502 
3503   // If variable alloca is used the extra 16 bytes for argument build area
3504   // will be allocated on stack before a call.
3505   if (VariableAllocaUsed)
3506     _addiu(SP, SP, -MaxOutArgsSizeBytes);
3507 
3508   Inst *NewCall;
3509 
3510   // We don't need to define the return register if it is a vector.
3511   // We have inserted fake defs of it just after the call.
3512   if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3513     Variable *RetReg = nullptr;
3514     NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3515     Context.insert(NewCall);
3516   } else {
3517     NewCall = Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
3518   }
3519 
3520   if (VariableAllocaUsed)
3521     _addiu(SP, SP, MaxOutArgsSizeBytes);
3522 
3523   // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3524   // instruction.
3525   Context.insert<InstFakeUse>(SP);
3526 
3527   if (ReturnRegHi)
3528     Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3529 
3530   if (ReturnReg) {
3531     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3532       for (Variable *Var : RetVec->getContainers()) {
3533         Context.insert(InstFakeDef::create(Func, Var));
3534       }
3535     }
3536   }
3537 
3538   // Insert a register-kill pseudo instruction.
3539   Context.insert(InstFakeKill::create(Func, NewCall));
3540 
3541   // Generate a FakeUse to keep the call live if necessary.
3542   if (Instr->hasSideEffects() && ReturnReg) {
3543     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3544       for (Variable *Var : RetVec->getContainers()) {
3545         Context.insert<InstFakeUse>(Var);
3546       }
3547     } else {
3548       Context.insert<InstFakeUse>(ReturnReg);
3549     }
3550   }
3551 
3552   if (Dest == nullptr)
3553     return;
3554 
3555   // Assign the result of the call to Dest.
3556   if (ReturnReg) {
3557     if (RetVecFloat) {
3558       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3559       auto *TBase = legalizeToReg(RetVecFloat);
3560       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3561         auto *Var = DestVecOn32->getContainers()[i];
3562         auto *TVar = makeReg(IceType_i32);
3563         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3564             Func, IceType_i32, TBase,
3565             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3566         _lw(TVar, Mem);
3567         _mov(Var, TVar);
3568       }
3569     } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3570       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3571       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3572         _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3573       }
3574     } else if (ReturnRegHi) {
3575       assert(Dest->getType() == IceType_i64);
3576       auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3577       Variable *DestLo = Dest64On32->getLo();
3578       Variable *DestHi = Dest64On32->getHi();
3579       _mov(DestLo, ReturnReg);
3580       _mov(DestHi, ReturnRegHi);
3581     } else {
3582       assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3583              Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3584              isScalarFloatingType(Dest->getType()) ||
3585              isVectorType(Dest->getType()));
3586       _mov(Dest, ReturnReg);
3587     }
3588   }
3589 }
3590 
lowerCast(const InstCast * Instr)3591 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3592   InstCast::OpKind CastKind = Instr->getCastKind();
3593   Variable *Dest = Instr->getDest();
3594   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3595   const Type DestTy = Dest->getType();
3596   const Type Src0Ty = Src0->getType();
3597   const uint32_t ShiftAmount =
3598       (Src0Ty == IceType_i1
3599            ? INT32_BITS - 1
3600            : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3601   const uint32_t Mask =
3602       (Src0Ty == IceType_i1
3603            ? 1
3604            : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3605 
3606   if (isVectorType(DestTy)) {
3607     llvm::report_fatal_error("Cast: Destination type is vector");
3608     return;
3609   }
3610   switch (CastKind) {
3611   default:
3612     Func->setError("Cast type not supported");
3613     return;
3614   case InstCast::Sext: {
3615     if (DestTy == IceType_i64) {
3616       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3617       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3618       Variable *Src0R = legalizeToReg(Src0);
3619       Variable *T1_Lo = I32Reg();
3620       Variable *T2_Lo = I32Reg();
3621       Variable *T_Hi = I32Reg();
3622       if (Src0Ty == IceType_i1) {
3623         _sll(T1_Lo, Src0R, INT32_BITS - 1);
3624         _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3625         _mov(DestHi, T2_Lo);
3626         _mov(DestLo, T2_Lo);
3627       } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3628         _sll(T1_Lo, Src0R, ShiftAmount);
3629         _sra(T2_Lo, T1_Lo, ShiftAmount);
3630         _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3631         _mov(DestHi, T_Hi);
3632         _mov(DestLo, T2_Lo);
3633       } else if (Src0Ty == IceType_i32) {
3634         _mov(T1_Lo, Src0R);
3635         _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3636         _mov(DestHi, T_Hi);
3637         _mov(DestLo, T1_Lo);
3638       }
3639     } else {
3640       Variable *Src0R = legalizeToReg(Src0);
3641       Variable *T1 = makeReg(DestTy);
3642       Variable *T2 = makeReg(DestTy);
3643       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3644           Src0Ty == IceType_i16) {
3645         _sll(T1, Src0R, ShiftAmount);
3646         _sra(T2, T1, ShiftAmount);
3647         _mov(Dest, T2);
3648       }
3649     }
3650     break;
3651   }
3652   case InstCast::Zext: {
3653     if (DestTy == IceType_i64) {
3654       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3655       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3656       Variable *Src0R = legalizeToReg(Src0);
3657       Variable *T_Lo = I32Reg();
3658       Variable *T_Hi = I32Reg();
3659 
3660       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3661         _andi(T_Lo, Src0R, Mask);
3662       else if (Src0Ty == IceType_i32)
3663         _mov(T_Lo, Src0R);
3664       else
3665         assert(Src0Ty != IceType_i64);
3666       _mov(DestLo, T_Lo);
3667 
3668       auto *Zero = getZero();
3669       _addiu(T_Hi, Zero, 0);
3670       _mov(DestHi, T_Hi);
3671     } else {
3672       Variable *Src0R = legalizeToReg(Src0);
3673       Variable *T = makeReg(DestTy);
3674       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3675           Src0Ty == IceType_i16) {
3676         _andi(T, Src0R, Mask);
3677         _mov(Dest, T);
3678       }
3679     }
3680     break;
3681   }
3682   case InstCast::Trunc: {
3683     if (Src0Ty == IceType_i64)
3684       Src0 = loOperand(Src0);
3685     Variable *Src0R = legalizeToReg(Src0);
3686     Variable *T = makeReg(DestTy);
3687     switch (DestTy) {
3688     case IceType_i1:
3689       _andi(T, Src0R, 0x1);
3690       break;
3691     case IceType_i8:
3692       _andi(T, Src0R, 0xff);
3693       break;
3694     case IceType_i16:
3695       _andi(T, Src0R, 0xffff);
3696       break;
3697     default:
3698       _mov(T, Src0R);
3699       break;
3700     }
3701     _mov(Dest, T);
3702     break;
3703   }
3704   case InstCast::Fptrunc: {
3705     assert(Dest->getType() == IceType_f32);
3706     assert(Src0->getType() == IceType_f64);
3707     auto *DestR = legalizeToReg(Dest);
3708     auto *Src0R = legalizeToReg(Src0);
3709     _cvt_s_d(DestR, Src0R);
3710     _mov(Dest, DestR);
3711     break;
3712   }
3713   case InstCast::Fpext: {
3714     assert(Dest->getType() == IceType_f64);
3715     assert(Src0->getType() == IceType_f32);
3716     auto *DestR = legalizeToReg(Dest);
3717     auto *Src0R = legalizeToReg(Src0);
3718     _cvt_d_s(DestR, Src0R);
3719     _mov(Dest, DestR);
3720     break;
3721   }
3722   case InstCast::Fptosi:
3723   case InstCast::Fptoui: {
3724     if (llvm::isa<Variable64On32>(Dest)) {
3725       llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3726       return;
3727     }
3728     if (DestTy != IceType_i64) {
3729       if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3730         Variable *Src0R = legalizeToReg(Src0);
3731         Variable *FTmp = makeReg(IceType_f32);
3732         _trunc_w_s(FTmp, Src0R);
3733         _mov(Dest, FTmp);
3734         return;
3735       }
3736       if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3737         Variable *Src0R = legalizeToReg(Src0);
3738         Variable *FTmp = makeReg(IceType_f64);
3739         _trunc_w_d(FTmp, Src0R);
3740         _mov(Dest, FTmp);
3741         return;
3742       }
3743     }
3744     llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3745     break;
3746   }
3747   case InstCast::Sitofp:
3748   case InstCast::Uitofp: {
3749     if (llvm::isa<Variable64On32>(Dest)) {
3750       llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3751       return;
3752     }
3753     if (Src0Ty != IceType_i64) {
3754       Variable *Src0R = legalizeToReg(Src0);
3755       auto *T0R = Src0R;
3756       if (Src0Ty != IceType_i32) {
3757         T0R = makeReg(IceType_i32);
3758         if (CastKind == InstCast::Uitofp)
3759           lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3760         else
3761           lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3762       }
3763       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3764         Variable *FTmp1 = makeReg(IceType_f32);
3765         Variable *FTmp2 = makeReg(IceType_f32);
3766         _mtc1(FTmp1, T0R);
3767         _cvt_s_w(FTmp2, FTmp1);
3768         _mov(Dest, FTmp2);
3769         return;
3770       }
3771       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3772         Variable *FTmp1 = makeReg(IceType_f64);
3773         Variable *FTmp2 = makeReg(IceType_f64);
3774         _mtc1(FTmp1, T0R);
3775         _cvt_d_w(FTmp2, FTmp1);
3776         _mov(Dest, FTmp2);
3777         return;
3778       }
3779     }
3780     llvm::report_fatal_error("Source is i64 in i32-to-fp");
3781     break;
3782   }
3783   case InstCast::Bitcast: {
3784     Operand *Src0 = Instr->getSrc(0);
3785     if (DestTy == Src0->getType()) {
3786       auto *Assign = InstAssign::create(Func, Dest, Src0);
3787       lowerAssign(Assign);
3788       return;
3789     }
3790     if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3791       llvm::report_fatal_error(
3792           "Bitcast: vector type should have been prelowered.");
3793       return;
3794     }
3795     switch (DestTy) {
3796     case IceType_NUM:
3797     case IceType_void:
3798       llvm::report_fatal_error("Unexpected bitcast.");
3799     case IceType_i1:
3800       UnimplementedLoweringError(this, Instr);
3801       break;
3802     case IceType_i8:
3803       assert(Src0->getType() == IceType_v8i1);
3804       llvm::report_fatal_error(
3805           "i8 to v8i1 conversion should have been prelowered.");
3806       break;
3807     case IceType_i16:
3808       assert(Src0->getType() == IceType_v16i1);
3809       llvm::report_fatal_error(
3810           "i16 to v16i1 conversion should have been prelowered.");
3811       break;
3812     case IceType_i32:
3813     case IceType_f32: {
3814       Variable *Src0R = legalizeToReg(Src0);
3815       _mov(Dest, Src0R);
3816       break;
3817     }
3818     case IceType_i64: {
3819       assert(Src0->getType() == IceType_f64);
3820       Variable *Src0R = legalizeToReg(Src0);
3821       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3822       T->initHiLo(Func);
3823       T->getHi()->setMustNotHaveReg();
3824       T->getLo()->setMustNotHaveReg();
3825       Context.insert<InstFakeDef>(T->getHi());
3826       Context.insert<InstFakeDef>(T->getLo());
3827       _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3828       _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3829       lowerAssign(InstAssign::create(Func, Dest, T));
3830       break;
3831     }
3832     case IceType_f64: {
3833       assert(Src0->getType() == IceType_i64);
3834       const uint32_t Mask = 0xFFFFFFFF;
3835       if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3836         Variable *RegHi, *RegLo;
3837         const uint64_t Value = C64->getValue();
3838         uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3839         uint64_t Lower32Bits = Value & Mask;
3840         RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3841         RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3842         _mov(Dest, RegHi, RegLo);
3843       } else {
3844         auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3845         auto *RegLo = legalizeToReg(loOperand(Var64On32));
3846         auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3847         _mov(Dest, RegHi, RegLo);
3848       }
3849       break;
3850     }
3851     default:
3852       llvm::report_fatal_error("Unexpected bitcast.");
3853     }
3854     break;
3855   }
3856   }
3857 }
3858 
lowerExtractElement(const InstExtractElement * Instr)3859 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3860   Variable *Dest = Instr->getDest();
3861   const Type DestTy = Dest->getType();
3862   Operand *Src1 = Instr->getSrc(1);
3863   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3864     const uint32_t Index = Imm->getValue();
3865     Variable *TDest = makeReg(DestTy);
3866     Variable *TReg = makeReg(DestTy);
3867     auto *Src0 = legalizeUndef(Instr->getSrc(0));
3868     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3869     // Number of elements in each container
3870     uint32_t ElemPerCont =
3871         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3872     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3873     auto *SrcE = legalizeToReg(Src);
3874     // Position of the element in the container
3875     uint32_t PosInCont = Index % ElemPerCont;
3876     if (ElemPerCont == 1) {
3877       _mov(TDest, SrcE);
3878     } else if (ElemPerCont == 2) {
3879       switch (PosInCont) {
3880       case 0:
3881         _andi(TDest, SrcE, 0xffff);
3882         break;
3883       case 1:
3884         _srl(TDest, SrcE, 16);
3885         break;
3886       default:
3887         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3888         break;
3889       }
3890     } else if (ElemPerCont == 4) {
3891       switch (PosInCont) {
3892       case 0:
3893         _andi(TDest, SrcE, 0xff);
3894         break;
3895       case 1:
3896         _srl(TReg, SrcE, 8);
3897         _andi(TDest, TReg, 0xff);
3898         break;
3899       case 2:
3900         _srl(TReg, SrcE, 16);
3901         _andi(TDest, TReg, 0xff);
3902         break;
3903       case 3:
3904         _srl(TDest, SrcE, 24);
3905         break;
3906       default:
3907         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3908         break;
3909       }
3910     }
3911     if (typeElementType(Src0R->getType()) == IceType_i1) {
3912       Variable *TReg1 = makeReg(DestTy);
3913       _andi(TReg1, TDest, 0x1);
3914       _mov(Dest, TReg1);
3915     } else {
3916       _mov(Dest, TDest);
3917     }
3918     return;
3919   }
3920   llvm::report_fatal_error("ExtractElement requires a constant index");
3921 }
3922 
lowerFcmp(const InstFcmp * Instr)3923 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3924   Variable *Dest = Instr->getDest();
3925   if (isVectorType(Dest->getType())) {
3926     llvm::report_fatal_error("Fcmp: Destination type is vector");
3927     return;
3928   }
3929 
3930   auto *Src0 = Instr->getSrc(0);
3931   auto *Src1 = Instr->getSrc(1);
3932   auto *Zero = getZero();
3933 
3934   InstFcmp::FCond Cond = Instr->getCondition();
3935   auto *DestR = makeReg(IceType_i32);
3936   auto *Src0R = legalizeToReg(Src0);
3937   auto *Src1R = legalizeToReg(Src1);
3938   const Type Src0Ty = Src0->getType();
3939 
3940   Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3941 
3942   switch (Cond) {
3943   default: {
3944     llvm::report_fatal_error("Unhandled fp comparison.");
3945     return;
3946   }
3947   case InstFcmp::False: {
3948     Context.insert<InstFakeUse>(Src0R);
3949     Context.insert<InstFakeUse>(Src1R);
3950     _addiu(DestR, Zero, 0);
3951     _mov(Dest, DestR);
3952     break;
3953   }
3954   case InstFcmp::Oeq: {
3955     if (Src0Ty == IceType_f32) {
3956       _c_eq_s(Src0R, Src1R);
3957     } else {
3958       _c_eq_d(Src0R, Src1R);
3959     }
3960     _addiu(DestR, Zero, 1);
3961     _movf(DestR, Zero, FCC0);
3962     _mov(Dest, DestR);
3963     break;
3964   }
3965   case InstFcmp::Ogt: {
3966     if (Src0Ty == IceType_f32) {
3967       _c_ule_s(Src0R, Src1R);
3968     } else {
3969       _c_ule_d(Src0R, Src1R);
3970     }
3971     _addiu(DestR, Zero, 1);
3972     _movt(DestR, Zero, FCC0);
3973     _mov(Dest, DestR);
3974     break;
3975   }
3976   case InstFcmp::Oge: {
3977     if (Src0Ty == IceType_f32) {
3978       _c_ult_s(Src0R, Src1R);
3979     } else {
3980       _c_ult_d(Src0R, Src1R);
3981     }
3982     _addiu(DestR, Zero, 1);
3983     _movt(DestR, Zero, FCC0);
3984     _mov(Dest, DestR);
3985     break;
3986   }
3987   case InstFcmp::Olt: {
3988     if (Src0Ty == IceType_f32) {
3989       _c_olt_s(Src0R, Src1R);
3990     } else {
3991       _c_olt_d(Src0R, Src1R);
3992     }
3993     _addiu(DestR, Zero, 1);
3994     _movf(DestR, Zero, FCC0);
3995     _mov(Dest, DestR);
3996     break;
3997   }
3998   case InstFcmp::Ole: {
3999     if (Src0Ty == IceType_f32) {
4000       _c_ole_s(Src0R, Src1R);
4001     } else {
4002       _c_ole_d(Src0R, Src1R);
4003     }
4004     _addiu(DestR, Zero, 1);
4005     _movf(DestR, Zero, FCC0);
4006     _mov(Dest, DestR);
4007     break;
4008   }
4009   case InstFcmp::One: {
4010     if (Src0Ty == IceType_f32) {
4011       _c_ueq_s(Src0R, Src1R);
4012     } else {
4013       _c_ueq_d(Src0R, Src1R);
4014     }
4015     _addiu(DestR, Zero, 1);
4016     _movt(DestR, Zero, FCC0);
4017     _mov(Dest, DestR);
4018     break;
4019   }
4020   case InstFcmp::Ord: {
4021     if (Src0Ty == IceType_f32) {
4022       _c_un_s(Src0R, Src1R);
4023     } else {
4024       _c_un_d(Src0R, Src1R);
4025     }
4026     _addiu(DestR, Zero, 1);
4027     _movt(DestR, Zero, FCC0);
4028     _mov(Dest, DestR);
4029     break;
4030   }
4031   case InstFcmp::Ueq: {
4032     if (Src0Ty == IceType_f32) {
4033       _c_ueq_s(Src0R, Src1R);
4034     } else {
4035       _c_ueq_d(Src0R, Src1R);
4036     }
4037     _addiu(DestR, Zero, 1);
4038     _movf(DestR, Zero, FCC0);
4039     _mov(Dest, DestR);
4040     break;
4041   }
4042   case InstFcmp::Ugt: {
4043     if (Src0Ty == IceType_f32) {
4044       _c_ole_s(Src0R, Src1R);
4045     } else {
4046       _c_ole_d(Src0R, Src1R);
4047     }
4048     _addiu(DestR, Zero, 1);
4049     _movt(DestR, Zero, FCC0);
4050     _mov(Dest, DestR);
4051     break;
4052   }
4053   case InstFcmp::Uge: {
4054     if (Src0Ty == IceType_f32) {
4055       _c_olt_s(Src0R, Src1R);
4056     } else {
4057       _c_olt_d(Src0R, Src1R);
4058     }
4059     _addiu(DestR, Zero, 1);
4060     _movt(DestR, Zero, FCC0);
4061     _mov(Dest, DestR);
4062     break;
4063   }
4064   case InstFcmp::Ult: {
4065     if (Src0Ty == IceType_f32) {
4066       _c_ult_s(Src0R, Src1R);
4067     } else {
4068       _c_ult_d(Src0R, Src1R);
4069     }
4070     _addiu(DestR, Zero, 1);
4071     _movf(DestR, Zero, FCC0);
4072     _mov(Dest, DestR);
4073     break;
4074   }
4075   case InstFcmp::Ule: {
4076     if (Src0Ty == IceType_f32) {
4077       _c_ule_s(Src0R, Src1R);
4078     } else {
4079       _c_ule_d(Src0R, Src1R);
4080     }
4081     _addiu(DestR, Zero, 1);
4082     _movf(DestR, Zero, FCC0);
4083     _mov(Dest, DestR);
4084     break;
4085   }
4086   case InstFcmp::Une: {
4087     if (Src0Ty == IceType_f32) {
4088       _c_eq_s(Src0R, Src1R);
4089     } else {
4090       _c_eq_d(Src0R, Src1R);
4091     }
4092     _addiu(DestR, Zero, 1);
4093     _movt(DestR, Zero, FCC0);
4094     _mov(Dest, DestR);
4095     break;
4096   }
4097   case InstFcmp::Uno: {
4098     if (Src0Ty == IceType_f32) {
4099       _c_un_s(Src0R, Src1R);
4100     } else {
4101       _c_un_d(Src0R, Src1R);
4102     }
4103     _addiu(DestR, Zero, 1);
4104     _movf(DestR, Zero, FCC0);
4105     _mov(Dest, DestR);
4106     break;
4107   }
4108   case InstFcmp::True: {
4109     Context.insert<InstFakeUse>(Src0R);
4110     Context.insert<InstFakeUse>(Src1R);
4111     _addiu(DestR, Zero, 1);
4112     _mov(Dest, DestR);
4113     break;
4114   }
4115   }
4116 }
4117 
lower64Icmp(const InstIcmp * Instr)4118 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4119   Operand *Src0 = legalize(Instr->getSrc(0));
4120   Operand *Src1 = legalize(Instr->getSrc(1));
4121   Variable *Dest = Instr->getDest();
4122   InstIcmp::ICond Condition = Instr->getCondition();
4123 
4124   Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4125   Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4126   Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4127   Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4128 
4129   switch (Condition) {
4130   default:
4131     llvm_unreachable("unexpected condition");
4132     return;
4133   case InstIcmp::Eq: {
4134     auto *T1 = I32Reg();
4135     auto *T2 = I32Reg();
4136     auto *T3 = I32Reg();
4137     auto *T4 = I32Reg();
4138     _xor(T1, Src0HiR, Src1HiR);
4139     _xor(T2, Src0LoR, Src1LoR);
4140     _or(T3, T1, T2);
4141     _sltiu(T4, T3, 1);
4142     _mov(Dest, T4);
4143     return;
4144   }
4145   case InstIcmp::Ne: {
4146     auto *T1 = I32Reg();
4147     auto *T2 = I32Reg();
4148     auto *T3 = I32Reg();
4149     auto *T4 = I32Reg();
4150     _xor(T1, Src0HiR, Src1HiR);
4151     _xor(T2, Src0LoR, Src1LoR);
4152     _or(T3, T1, T2);
4153     _sltu(T4, getZero(), T3);
4154     _mov(Dest, T4);
4155     return;
4156   }
4157   case InstIcmp::Sgt: {
4158     auto *T1 = I32Reg();
4159     auto *T2 = I32Reg();
4160     auto *T3 = I32Reg();
4161     _xor(T1, Src0HiR, Src1HiR);
4162     _slt(T2, Src1HiR, Src0HiR);
4163     _sltu(T3, Src1LoR, Src0LoR);
4164     _movz(T2, T3, T1);
4165     _mov(Dest, T2);
4166     return;
4167   }
4168   case InstIcmp::Ugt: {
4169     auto *T1 = I32Reg();
4170     auto *T2 = I32Reg();
4171     auto *T3 = I32Reg();
4172     _xor(T1, Src0HiR, Src1HiR);
4173     _sltu(T2, Src1HiR, Src0HiR);
4174     _sltu(T3, Src1LoR, Src0LoR);
4175     _movz(T2, T3, T1);
4176     _mov(Dest, T2);
4177     return;
4178   }
4179   case InstIcmp::Sge: {
4180     auto *T1 = I32Reg();
4181     auto *T2 = I32Reg();
4182     auto *T3 = I32Reg();
4183     auto *T4 = I32Reg();
4184     auto *T5 = I32Reg();
4185     _xor(T1, Src0HiR, Src1HiR);
4186     _slt(T2, Src0HiR, Src1HiR);
4187     _xori(T3, T2, 1);
4188     _sltu(T4, Src0LoR, Src1LoR);
4189     _xori(T5, T4, 1);
4190     _movz(T3, T5, T1);
4191     _mov(Dest, T3);
4192     return;
4193   }
4194   case InstIcmp::Uge: {
4195     auto *T1 = I32Reg();
4196     auto *T2 = I32Reg();
4197     auto *T3 = I32Reg();
4198     auto *T4 = I32Reg();
4199     auto *T5 = I32Reg();
4200     _xor(T1, Src0HiR, Src1HiR);
4201     _sltu(T2, Src0HiR, Src1HiR);
4202     _xori(T3, T2, 1);
4203     _sltu(T4, Src0LoR, Src1LoR);
4204     _xori(T5, T4, 1);
4205     _movz(T3, T5, T1);
4206     _mov(Dest, T3);
4207     return;
4208   }
4209   case InstIcmp::Slt: {
4210     auto *T1 = I32Reg();
4211     auto *T2 = I32Reg();
4212     auto *T3 = I32Reg();
4213     _xor(T1, Src0HiR, Src1HiR);
4214     _slt(T2, Src0HiR, Src1HiR);
4215     _sltu(T3, Src0LoR, Src1LoR);
4216     _movz(T2, T3, T1);
4217     _mov(Dest, T2);
4218     return;
4219   }
4220   case InstIcmp::Ult: {
4221     auto *T1 = I32Reg();
4222     auto *T2 = I32Reg();
4223     auto *T3 = I32Reg();
4224     _xor(T1, Src0HiR, Src1HiR);
4225     _sltu(T2, Src0HiR, Src1HiR);
4226     _sltu(T3, Src0LoR, Src1LoR);
4227     _movz(T2, T3, T1);
4228     _mov(Dest, T2);
4229     return;
4230   }
4231   case InstIcmp::Sle: {
4232     auto *T1 = I32Reg();
4233     auto *T2 = I32Reg();
4234     auto *T3 = I32Reg();
4235     auto *T4 = I32Reg();
4236     auto *T5 = I32Reg();
4237     _xor(T1, Src0HiR, Src1HiR);
4238     _slt(T2, Src1HiR, Src0HiR);
4239     _xori(T3, T2, 1);
4240     _sltu(T4, Src1LoR, Src0LoR);
4241     _xori(T5, T4, 1);
4242     _movz(T3, T5, T1);
4243     _mov(Dest, T3);
4244     return;
4245   }
4246   case InstIcmp::Ule: {
4247     auto *T1 = I32Reg();
4248     auto *T2 = I32Reg();
4249     auto *T3 = I32Reg();
4250     auto *T4 = I32Reg();
4251     auto *T5 = I32Reg();
4252     _xor(T1, Src0HiR, Src1HiR);
4253     _sltu(T2, Src1HiR, Src0HiR);
4254     _xori(T3, T2, 1);
4255     _sltu(T4, Src1LoR, Src0LoR);
4256     _xori(T5, T4, 1);
4257     _movz(T3, T5, T1);
4258     _mov(Dest, T3);
4259     return;
4260   }
4261   }
4262 }
4263 
lowerIcmp(const InstIcmp * Instr)4264 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4265   auto *Src0 = Instr->getSrc(0);
4266   auto *Src1 = Instr->getSrc(1);
4267   if (Src0->getType() == IceType_i64) {
4268     lower64Icmp(Instr);
4269     return;
4270   }
4271   Variable *Dest = Instr->getDest();
4272   if (isVectorType(Dest->getType())) {
4273     llvm::report_fatal_error("Icmp: Destination type is vector");
4274     return;
4275   }
4276   InstIcmp::ICond Cond = Instr->getCondition();
4277   auto *Src0R = legalizeToReg(Src0);
4278   auto *Src1R = legalizeToReg(Src1);
4279   const Type Src0Ty = Src0R->getType();
4280   const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4281   Variable *Src0RT = I32Reg();
4282   Variable *Src1RT = I32Reg();
4283 
4284   if (Src0Ty != IceType_i32) {
4285     _sll(Src0RT, Src0R, ShAmt);
4286     _sll(Src1RT, Src1R, ShAmt);
4287   } else {
4288     _mov(Src0RT, Src0R);
4289     _mov(Src1RT, Src1R);
4290   }
4291 
4292   switch (Cond) {
4293   case InstIcmp::Eq: {
4294     auto *DestT = I32Reg();
4295     auto *T = I32Reg();
4296     _xor(T, Src0RT, Src1RT);
4297     _sltiu(DestT, T, 1);
4298     _mov(Dest, DestT);
4299     return;
4300   }
4301   case InstIcmp::Ne: {
4302     auto *DestT = I32Reg();
4303     auto *T = I32Reg();
4304     auto *Zero = getZero();
4305     _xor(T, Src0RT, Src1RT);
4306     _sltu(DestT, Zero, T);
4307     _mov(Dest, DestT);
4308     return;
4309   }
4310   case InstIcmp::Ugt: {
4311     auto *DestT = I32Reg();
4312     _sltu(DestT, Src1RT, Src0RT);
4313     _mov(Dest, DestT);
4314     return;
4315   }
4316   case InstIcmp::Uge: {
4317     auto *DestT = I32Reg();
4318     auto *T = I32Reg();
4319     _sltu(T, Src0RT, Src1RT);
4320     _xori(DestT, T, 1);
4321     _mov(Dest, DestT);
4322     return;
4323   }
4324   case InstIcmp::Ult: {
4325     auto *DestT = I32Reg();
4326     _sltu(DestT, Src0RT, Src1RT);
4327     _mov(Dest, DestT);
4328     return;
4329   }
4330   case InstIcmp::Ule: {
4331     auto *DestT = I32Reg();
4332     auto *T = I32Reg();
4333     _sltu(T, Src1RT, Src0RT);
4334     _xori(DestT, T, 1);
4335     _mov(Dest, DestT);
4336     return;
4337   }
4338   case InstIcmp::Sgt: {
4339     auto *DestT = I32Reg();
4340     _slt(DestT, Src1RT, Src0RT);
4341     _mov(Dest, DestT);
4342     return;
4343   }
4344   case InstIcmp::Sge: {
4345     auto *DestT = I32Reg();
4346     auto *T = I32Reg();
4347     _slt(T, Src0RT, Src1RT);
4348     _xori(DestT, T, 1);
4349     _mov(Dest, DestT);
4350     return;
4351   }
4352   case InstIcmp::Slt: {
4353     auto *DestT = I32Reg();
4354     _slt(DestT, Src0RT, Src1RT);
4355     _mov(Dest, DestT);
4356     return;
4357   }
4358   case InstIcmp::Sle: {
4359     auto *DestT = I32Reg();
4360     auto *T = I32Reg();
4361     _slt(T, Src1RT, Src0RT);
4362     _xori(DestT, T, 1);
4363     _mov(Dest, DestT);
4364     return;
4365   }
4366   default:
4367     llvm_unreachable("Invalid ICmp operator");
4368     return;
4369   }
4370 }
4371 
lowerInsertElement(const InstInsertElement * Instr)4372 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4373   Variable *Dest = Instr->getDest();
4374   const Type DestTy = Dest->getType();
4375   Operand *Src2 = Instr->getSrc(2);
4376   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4377     const uint32_t Index = Imm->getValue();
4378     // Vector to insert in
4379     auto *Src0 = legalizeUndef(Instr->getSrc(0));
4380     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4381     // Number of elements in each container
4382     uint32_t ElemPerCont =
4383         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4384     // Source Element
4385     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4386     auto *SrcE = Src;
4387     if (ElemPerCont > 1)
4388       SrcE = legalizeToReg(Src);
4389     // Dest is a vector
4390     auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4391     VDest->initVecElement(Func);
4392     // Temp vector variable
4393     auto *TDest = makeReg(DestTy);
4394     auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4395     TVDest->initVecElement(Func);
4396     // Destination element
4397     auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4398     // Element to insert
4399     auto *Src1R = legalizeToReg(Instr->getSrc(1));
4400     auto *TReg1 = makeReg(IceType_i32);
4401     auto *TReg2 = makeReg(IceType_i32);
4402     auto *TReg3 = makeReg(IceType_i32);
4403     auto *TReg4 = makeReg(IceType_i32);
4404     auto *TReg5 = makeReg(IceType_i32);
4405     auto *TDReg = makeReg(IceType_i32);
4406     // Position of the element in the container
4407     uint32_t PosInCont = Index % ElemPerCont;
4408     // Load source vector in a temporary vector
4409     for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4410       auto *DCont = TVDest->getContainers()[i];
4411       // Do not define DstE as we are going to redefine it
4412       if (DCont == DstE)
4413         continue;
4414       auto *SCont = Src0R->getContainers()[i];
4415       auto *TReg = makeReg(IceType_i32);
4416       _mov(TReg, SCont);
4417       _mov(DCont, TReg);
4418     }
4419     // Insert the element
4420     if (ElemPerCont == 1) {
4421       _mov(DstE, Src1R);
4422     } else if (ElemPerCont == 2) {
4423       switch (PosInCont) {
4424       case 0:
4425         _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4426         _srl(TReg2, SrcE, 16);
4427         _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4428         _or(TDReg, TReg1, TReg3);
4429         _mov(DstE, TDReg);
4430         break;
4431       case 1:
4432         _sll(TReg1, Src1R, 16); // Clear lower 16-bits  of source
4433         _sll(TReg2, SrcE, 16);
4434         _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4435         _or(TDReg, TReg1, TReg3);
4436         _mov(DstE, TDReg);
4437         break;
4438       default:
4439         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4440         break;
4441       }
4442     } else if (ElemPerCont == 4) {
4443       switch (PosInCont) {
4444       case 0:
4445         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4446         _srl(TReg2, SrcE, 8);
4447         _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4448         _or(TDReg, TReg1, TReg3);
4449         _mov(DstE, TDReg);
4450         break;
4451       case 1:
4452         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4453         _sll(TReg5, TReg1, 8);     // Position in the destination
4454         _lui(TReg2, Ctx->getConstantInt32(0xffff));
4455         _ori(TReg3, TReg2, 0x00ff);
4456         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4457         _or(TDReg, TReg5, TReg4);
4458         _mov(DstE, TDReg);
4459         break;
4460       case 2:
4461         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4462         _sll(TReg5, TReg1, 16);    // Position in the destination
4463         _lui(TReg2, Ctx->getConstantInt32(0xff00));
4464         _ori(TReg3, TReg2, 0xffff);
4465         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4466         _or(TDReg, TReg5, TReg4);
4467         _mov(DstE, TDReg);
4468         break;
4469       case 3:
4470         _sll(TReg1, Src1R, 24); // Position in the destination
4471         _sll(TReg2, SrcE, 8);
4472         _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4473         _or(TDReg, TReg1, TReg3);
4474         _mov(DstE, TDReg);
4475         break;
4476       default:
4477         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4478         break;
4479       }
4480     }
4481     // Write back temporary vector to the destination
4482     auto *Assign = InstAssign::create(Func, Dest, TDest);
4483     lowerAssign(Assign);
4484     return;
4485   }
4486   llvm::report_fatal_error("InsertElement requires a constant index");
4487 }
4488 
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4489 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4490                                    Variable *Dest, Variable *Src0,
4491                                    Variable *Src1) {
4492   switch (Operation) {
4493   default:
4494     llvm::report_fatal_error("Unknown AtomicRMW operation");
4495   case Intrinsics::AtomicExchange:
4496     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4497   case Intrinsics::AtomicAdd:
4498     _addu(Dest, Src0, Src1);
4499     break;
4500   case Intrinsics::AtomicAnd:
4501     _and(Dest, Src0, Src1);
4502     break;
4503   case Intrinsics::AtomicSub:
4504     _subu(Dest, Src0, Src1);
4505     break;
4506   case Intrinsics::AtomicOr:
4507     _or(Dest, Src0, Src1);
4508     break;
4509   case Intrinsics::AtomicXor:
4510     _xor(Dest, Src0, Src1);
4511     break;
4512   }
4513 }
4514 
lowerIntrinsic(const InstIntrinsic * Instr)4515 void TargetMIPS32::lowerIntrinsic(const InstIntrinsic *Instr) {
4516   Variable *Dest = Instr->getDest();
4517   Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4518 
4519   Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
4520   switch (ID) {
4521   case Intrinsics::AtomicLoad: {
4522     assert(isScalarIntegerType(DestTy));
4523     // We require the memory address to be naturally aligned. Given that is the
4524     // case, then normal loads are atomic.
4525     if (!Intrinsics::isMemoryOrderValid(
4526             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4527       Func->setError("Unexpected memory ordering for AtomicLoad");
4528       return;
4529     }
4530     if (DestTy == IceType_i64) {
4531       llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4532       return;
4533     } else if (DestTy == IceType_i32) {
4534       auto *T1 = makeReg(DestTy);
4535       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4536       auto *Base = legalizeToReg(Instr->getArg(0));
4537       auto *Addr = formMemoryOperand(Base, DestTy);
4538       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4539       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4540       constexpr CfgNode *NoTarget = nullptr;
4541       _sync();
4542       Context.insert(Retry);
4543       _ll(T1, Addr);
4544       _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4545       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4546       _sc(RegAt, Addr);
4547       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4548       Context.insert(Exit);
4549       _sync();
4550       _mov(Dest, T1);
4551       Context.insert<InstFakeUse>(T1);
4552     } else {
4553       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4554       auto *Base = legalizeToReg(Instr->getArg(0));
4555       auto *T1 = makeReg(IceType_i32);
4556       auto *T2 = makeReg(IceType_i32);
4557       auto *T3 = makeReg(IceType_i32);
4558       auto *T4 = makeReg(IceType_i32);
4559       auto *T5 = makeReg(IceType_i32);
4560       auto *T6 = makeReg(IceType_i32);
4561       auto *SrcMask = makeReg(IceType_i32);
4562       auto *Tdest = makeReg(IceType_i32);
4563       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4564       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4565       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4566       constexpr CfgNode *NoTarget = nullptr;
4567       _sync();
4568       _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4569       _andi(T2, Base, 3);        // Last two bits of the address
4570       _and(T3, Base, T1);        // Align the address
4571       _sll(T4, T2, 3);
4572       _ori(T5, getZero(), Mask);
4573       _sllv(SrcMask, T5, T4); // Source mask
4574       auto *Addr = formMemoryOperand(T3, IceType_i32);
4575       Context.insert(Retry);
4576       _ll(T6, Addr);
4577       _and(Tdest, T6, SrcMask);
4578       _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4579       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4580       _sc(RegAt, Addr);
4581       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4582       Context.insert(Exit);
4583       auto *T7 = makeReg(IceType_i32);
4584       auto *T8 = makeReg(IceType_i32);
4585       _srlv(T7, Tdest, T4);
4586       _andi(T8, T7, Mask);
4587       _sync();
4588       _mov(Dest, T8);
4589       Context.insert<InstFakeUse>(T6);
4590       Context.insert<InstFakeUse>(SrcMask);
4591     }
4592     return;
4593   }
4594   case Intrinsics::AtomicStore: {
4595     // We require the memory address to be naturally aligned. Given that is the
4596     // case, then normal stores are atomic.
4597     if (!Intrinsics::isMemoryOrderValid(
4598             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4599       Func->setError("Unexpected memory ordering for AtomicStore");
4600       return;
4601     }
4602     auto *Val = Instr->getArg(0);
4603     auto Ty = Val->getType();
4604     if (Ty == IceType_i64) {
4605       llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4606       return;
4607     } else if (Ty == IceType_i32) {
4608       auto *Val = legalizeToReg(Instr->getArg(0));
4609       auto *Base = legalizeToReg(Instr->getArg(1));
4610       auto *Addr = formMemoryOperand(Base, Ty);
4611       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4612       constexpr CfgNode *NoTarget = nullptr;
4613       auto *T1 = makeReg(IceType_i32);
4614       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4615       _sync();
4616       Context.insert(Retry);
4617       _ll(T1, Addr);
4618       _mov(RegAt, Val);
4619       _sc(RegAt, Addr);
4620       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4621       Context.insert<InstFakeUse>(T1); // To keep LL alive
4622       _sync();
4623     } else {
4624       auto *Val = legalizeToReg(Instr->getArg(0));
4625       auto *Base = legalizeToReg(Instr->getArg(1));
4626       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4627       constexpr CfgNode *NoTarget = nullptr;
4628       auto *T1 = makeReg(IceType_i32);
4629       auto *T2 = makeReg(IceType_i32);
4630       auto *T3 = makeReg(IceType_i32);
4631       auto *T4 = makeReg(IceType_i32);
4632       auto *T5 = makeReg(IceType_i32);
4633       auto *T6 = makeReg(IceType_i32);
4634       auto *T7 = makeReg(IceType_i32);
4635       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4636       auto *SrcMask = makeReg(IceType_i32);
4637       auto *DstMask = makeReg(IceType_i32);
4638       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4639       _sync();
4640       _addiu(T1, getZero(), -4);
4641       _and(T7, Base, T1);
4642       auto *Addr = formMemoryOperand(T7, Ty);
4643       _andi(T2, Base, 3);
4644       _sll(T3, T2, 3);
4645       _ori(T4, getZero(), Mask);
4646       _sllv(T5, T4, T3);
4647       _sllv(T6, Val, T3);
4648       _nor(SrcMask, getZero(), T5);
4649       _and(DstMask, T6, T5);
4650       Context.insert(Retry);
4651       _ll(RegAt, Addr);
4652       _and(RegAt, RegAt, SrcMask);
4653       _or(RegAt, RegAt, DstMask);
4654       _sc(RegAt, Addr);
4655       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4656       Context.insert<InstFakeUse>(SrcMask);
4657       Context.insert<InstFakeUse>(DstMask);
4658       _sync();
4659     }
4660     return;
4661   }
4662   case Intrinsics::AtomicCmpxchg: {
4663     assert(isScalarIntegerType(DestTy));
4664     // We require the memory address to be naturally aligned. Given that is the
4665     // case, then normal loads are atomic.
4666     if (!Intrinsics::isMemoryOrderValid(
4667             ID, getConstantMemoryOrder(Instr->getArg(3)),
4668             getConstantMemoryOrder(Instr->getArg(4)))) {
4669       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4670       return;
4671     }
4672 
4673     InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4674     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4675     constexpr CfgNode *NoTarget = nullptr;
4676     auto *New = Instr->getArg(2);
4677     auto *Expected = Instr->getArg(1);
4678     auto *ActualAddress = Instr->getArg(0);
4679 
4680     if (DestTy == IceType_i64) {
4681       llvm::report_fatal_error(
4682           "AtomicCmpxchg.i64 should have been prelowered.");
4683       return;
4684     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4685       auto *NewR = legalizeToReg(New);
4686       auto *ExpectedR = legalizeToReg(Expected);
4687       auto *ActualAddressR = legalizeToReg(ActualAddress);
4688       const uint32_t ShiftAmount =
4689           (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4690       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4691       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4692       auto *T1 = I32Reg();
4693       auto *T2 = I32Reg();
4694       auto *T3 = I32Reg();
4695       auto *T4 = I32Reg();
4696       auto *T5 = I32Reg();
4697       auto *T6 = I32Reg();
4698       auto *T7 = I32Reg();
4699       auto *T8 = I32Reg();
4700       auto *T9 = I32Reg();
4701       _addiu(RegAt, getZero(), -4);
4702       _and(T1, ActualAddressR, RegAt);
4703       auto *Addr = formMemoryOperand(T1, DestTy);
4704       _andi(RegAt, ActualAddressR, 3);
4705       _sll(T2, RegAt, 3);
4706       _ori(RegAt, getZero(), Mask);
4707       _sllv(T3, RegAt, T2);
4708       _nor(T4, getZero(), T3);
4709       _andi(RegAt, ExpectedR, Mask);
4710       _sllv(T5, RegAt, T2);
4711       _andi(RegAt, NewR, Mask);
4712       _sllv(T6, RegAt, T2);
4713       _sync();
4714       Context.insert(Retry);
4715       _ll(T7, Addr);
4716       _and(T8, T7, T3);
4717       _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4718       _and(RegAt, T7, T4);
4719       _or(T9, RegAt, T6);
4720       _sc(T9, Addr);
4721       _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4722       Context.insert<InstFakeUse>(getZero());
4723       Context.insert(Exit);
4724       _srlv(RegAt, T8, T2);
4725       _sll(RegAt, RegAt, ShiftAmount);
4726       _sra(RegAt, RegAt, ShiftAmount);
4727       _mov(Dest, RegAt);
4728       _sync();
4729       Context.insert<InstFakeUse>(T3);
4730       Context.insert<InstFakeUse>(T4);
4731       Context.insert<InstFakeUse>(T5);
4732       Context.insert<InstFakeUse>(T6);
4733       Context.insert<InstFakeUse>(T8);
4734       Context.insert<InstFakeUse>(ExpectedR);
4735       Context.insert<InstFakeUse>(NewR);
4736     } else {
4737       auto *T1 = I32Reg();
4738       auto *T2 = I32Reg();
4739       auto *NewR = legalizeToReg(New);
4740       auto *ExpectedR = legalizeToReg(Expected);
4741       auto *ActualAddressR = legalizeToReg(ActualAddress);
4742       _sync();
4743       Context.insert(Retry);
4744       _ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4745       _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4746       _mov(T2, NewR);
4747       _sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4748       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4749       Context.insert<InstFakeUse>(getZero());
4750       Context.insert(Exit);
4751       _mov(Dest, T1);
4752       _sync();
4753       Context.insert<InstFakeUse>(ExpectedR);
4754       Context.insert<InstFakeUse>(NewR);
4755     }
4756     return;
4757   }
4758   case Intrinsics::AtomicRMW: {
4759     assert(isScalarIntegerType(DestTy));
4760     // We require the memory address to be naturally aligned. Given that is the
4761     // case, then normal loads are atomic.
4762     if (!Intrinsics::isMemoryOrderValid(
4763             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4764       Func->setError("Unexpected memory ordering for AtomicRMW");
4765       return;
4766     }
4767 
4768     constexpr CfgNode *NoTarget = nullptr;
4769     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4770     auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4771         llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4772     auto *New = Instr->getArg(2);
4773     auto *ActualAddress = Instr->getArg(1);
4774 
4775     if (DestTy == IceType_i64) {
4776       llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4777       return;
4778     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4779       const uint32_t ShiftAmount =
4780           INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4781       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4782       auto *NewR = legalizeToReg(New);
4783       auto *ActualAddressR = legalizeToReg(ActualAddress);
4784       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4785       auto *T1 = I32Reg();
4786       auto *T2 = I32Reg();
4787       auto *T3 = I32Reg();
4788       auto *T4 = I32Reg();
4789       auto *T5 = I32Reg();
4790       auto *T6 = I32Reg();
4791       auto *T7 = I32Reg();
4792       _sync();
4793       _addiu(RegAt, getZero(), -4);
4794       _and(T1, ActualAddressR, RegAt);
4795       _andi(RegAt, ActualAddressR, 3);
4796       _sll(T2, RegAt, 3);
4797       _ori(RegAt, getZero(), Mask);
4798       _sllv(T3, RegAt, T2);
4799       _nor(T4, getZero(), T3);
4800       _sllv(T5, NewR, T2);
4801       Context.insert(Retry);
4802       _ll(T6, formMemoryOperand(T1, DestTy));
4803       if (Operation != Intrinsics::AtomicExchange) {
4804         createArithInst(Operation, RegAt, T6, T5);
4805         _and(RegAt, RegAt, T3);
4806       }
4807       _and(T7, T6, T4);
4808       if (Operation == Intrinsics::AtomicExchange) {
4809         _or(RegAt, T7, T5);
4810       } else {
4811         _or(RegAt, T7, RegAt);
4812       }
4813       _sc(RegAt, formMemoryOperand(T1, DestTy));
4814       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4815       Context.insert<InstFakeUse>(getZero());
4816       _and(RegAt, T6, T3);
4817       _srlv(RegAt, RegAt, T2);
4818       _sll(RegAt, RegAt, ShiftAmount);
4819       _sra(RegAt, RegAt, ShiftAmount);
4820       _mov(Dest, RegAt);
4821       _sync();
4822       Context.insert<InstFakeUse>(NewR);
4823       Context.insert<InstFakeUse>(Dest);
4824     } else {
4825       auto *T1 = I32Reg();
4826       auto *T2 = I32Reg();
4827       auto *NewR = legalizeToReg(New);
4828       auto *ActualAddressR = legalizeToReg(ActualAddress);
4829       _sync();
4830       Context.insert(Retry);
4831       _ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4832       if (Operation == Intrinsics::AtomicExchange) {
4833         _mov(T2, NewR);
4834       } else {
4835         createArithInst(Operation, T2, T1, NewR);
4836       }
4837       _sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4838       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4839       Context.insert<InstFakeUse>(getZero());
4840       _mov(Dest, T1);
4841       _sync();
4842       Context.insert<InstFakeUse>(NewR);
4843       Context.insert<InstFakeUse>(Dest);
4844     }
4845     return;
4846   }
4847   case Intrinsics::AtomicFence:
4848   case Intrinsics::AtomicFenceAll:
4849     assert(Dest == nullptr);
4850     _sync();
4851     return;
4852   case Intrinsics::AtomicIsLockFree: {
4853     Operand *ByteSize = Instr->getArg(0);
4854     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4855     auto *T = I32Reg();
4856     if (CI == nullptr) {
4857       // The PNaCl ABI requires the byte size to be a compile-time constant.
4858       Func->setError("AtomicIsLockFree byte size should be compile-time const");
4859       return;
4860     }
4861     static constexpr int32_t NotLockFree = 0;
4862     static constexpr int32_t LockFree = 1;
4863     int32_t Result = NotLockFree;
4864     switch (CI->getValue()) {
4865     case 1:
4866     case 2:
4867     case 4:
4868       Result = LockFree;
4869       break;
4870     }
4871     _addiu(T, getZero(), Result);
4872     _mov(Dest, T);
4873     return;
4874   }
4875   case Intrinsics::Bswap: {
4876     auto *Src = Instr->getArg(0);
4877     const Type SrcTy = Src->getType();
4878     assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4879            SrcTy == IceType_i64);
4880     switch (SrcTy) {
4881     case IceType_i16: {
4882       auto *T1 = I32Reg();
4883       auto *T2 = I32Reg();
4884       auto *T3 = I32Reg();
4885       auto *T4 = I32Reg();
4886       auto *SrcR = legalizeToReg(Src);
4887       _sll(T1, SrcR, 8);
4888       _lui(T2, Ctx->getConstantInt32(255));
4889       _and(T1, T1, T2);
4890       _sll(T3, SrcR, 24);
4891       _or(T1, T3, T1);
4892       _srl(T4, T1, 16);
4893       _mov(Dest, T4);
4894       return;
4895     }
4896     case IceType_i32: {
4897       auto *T1 = I32Reg();
4898       auto *T2 = I32Reg();
4899       auto *T3 = I32Reg();
4900       auto *T4 = I32Reg();
4901       auto *T5 = I32Reg();
4902       auto *SrcR = legalizeToReg(Src);
4903       _srl(T1, SrcR, 24);
4904       _srl(T2, SrcR, 8);
4905       _andi(T2, T2, 0xFF00);
4906       _or(T1, T2, T1);
4907       _sll(T4, SrcR, 8);
4908       _lui(T3, Ctx->getConstantInt32(255));
4909       _and(T4, T4, T3);
4910       _sll(T5, SrcR, 24);
4911       _or(T4, T5, T4);
4912       _or(T4, T4, T1);
4913       _mov(Dest, T4);
4914       return;
4915     }
4916     case IceType_i64: {
4917       auto *T1 = I32Reg();
4918       auto *T2 = I32Reg();
4919       auto *T3 = I32Reg();
4920       auto *T4 = I32Reg();
4921       auto *T5 = I32Reg();
4922       auto *T6 = I32Reg();
4923       auto *T7 = I32Reg();
4924       auto *T8 = I32Reg();
4925       auto *T9 = I32Reg();
4926       auto *T10 = I32Reg();
4927       auto *T11 = I32Reg();
4928       auto *T12 = I32Reg();
4929       auto *T13 = I32Reg();
4930       auto *T14 = I32Reg();
4931       auto *T15 = I32Reg();
4932       auto *T16 = I32Reg();
4933       auto *T17 = I32Reg();
4934       auto *T18 = I32Reg();
4935       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4936       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4937       Src = legalizeUndef(Src);
4938       auto *SrcLoR = legalizeToReg(loOperand(Src));
4939       auto *SrcHiR = legalizeToReg(hiOperand(Src));
4940       _sll(T1, SrcHiR, 8);
4941       _srl(T2, SrcHiR, 24);
4942       _srl(T3, SrcHiR, 8);
4943       _andi(T3, T3, 0xFF00);
4944       _lui(T4, Ctx->getConstantInt32(255));
4945       _or(T5, T3, T2);
4946       _and(T6, T1, T4);
4947       _sll(T7, SrcHiR, 24);
4948       _or(T8, T7, T6);
4949       _srl(T9, SrcLoR, 24);
4950       _srl(T10, SrcLoR, 8);
4951       _andi(T11, T10, 0xFF00);
4952       _or(T12, T8, T5);
4953       _or(T13, T11, T9);
4954       _sll(T14, SrcLoR, 8);
4955       _and(T15, T14, T4);
4956       _sll(T16, SrcLoR, 24);
4957       _or(T17, T16, T15);
4958       _or(T18, T17, T13);
4959       _mov(DestLo, T12);
4960       _mov(DestHi, T18);
4961       return;
4962     }
4963     default:
4964       llvm::report_fatal_error("Control flow should never have reached here.");
4965     }
4966     return;
4967   }
4968   case Intrinsics::Ctpop: {
4969     llvm::report_fatal_error("Ctpop should have been prelowered.");
4970     return;
4971   }
4972   case Intrinsics::Ctlz: {
4973     auto *Src = Instr->getArg(0);
4974     const Type SrcTy = Src->getType();
4975     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
4976     switch (SrcTy) {
4977     case IceType_i32: {
4978       auto *T = I32Reg();
4979       auto *SrcR = legalizeToReg(Src);
4980       _clz(T, SrcR);
4981       _mov(Dest, T);
4982       break;
4983     }
4984     case IceType_i64: {
4985       auto *T1 = I32Reg();
4986       auto *T2 = I32Reg();
4987       auto *T3 = I32Reg();
4988       auto *T4 = I32Reg();
4989       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4990       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4991       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
4992       Variable *SrcLoR = legalizeToReg(loOperand(Src));
4993       _clz(T1, SrcHiR);
4994       _clz(T2, SrcLoR);
4995       _addiu(T3, T2, 32);
4996       _movn(T3, T1, SrcHiR);
4997       _addiu(T4, getZero(), 0);
4998       _mov(DestHi, T4);
4999       _mov(DestLo, T3);
5000       break;
5001     }
5002     default:
5003       llvm::report_fatal_error("Control flow should never have reached here.");
5004     }
5005     break;
5006   }
5007   case Intrinsics::Cttz: {
5008     auto *Src = Instr->getArg(0);
5009     const Type SrcTy = Src->getType();
5010     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5011     switch (SrcTy) {
5012     case IceType_i32: {
5013       auto *T1 = I32Reg();
5014       auto *T2 = I32Reg();
5015       auto *T3 = I32Reg();
5016       auto *T4 = I32Reg();
5017       auto *T5 = I32Reg();
5018       auto *T6 = I32Reg();
5019       auto *SrcR = legalizeToReg(Src);
5020       _addiu(T1, SrcR, -1);
5021       _not(T2, SrcR);
5022       _and(T3, T2, T1);
5023       _clz(T4, T3);
5024       _addiu(T5, getZero(), 32);
5025       _subu(T6, T5, T4);
5026       _mov(Dest, T6);
5027       break;
5028     }
5029     case IceType_i64: {
5030       auto *THi1 = I32Reg();
5031       auto *THi2 = I32Reg();
5032       auto *THi3 = I32Reg();
5033       auto *THi4 = I32Reg();
5034       auto *THi5 = I32Reg();
5035       auto *THi6 = I32Reg();
5036       auto *TLo1 = I32Reg();
5037       auto *TLo2 = I32Reg();
5038       auto *TLo3 = I32Reg();
5039       auto *TLo4 = I32Reg();
5040       auto *TLo5 = I32Reg();
5041       auto *TLo6 = I32Reg();
5042       auto *TResHi = I32Reg();
5043       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5044       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5045       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5046       Variable *SrcLoR = legalizeToReg(loOperand(Src));
5047       _addiu(THi1, SrcHiR, -1);
5048       _not(THi2, SrcHiR);
5049       _and(THi3, THi2, THi1);
5050       _clz(THi4, THi3);
5051       _addiu(THi5, getZero(), 64);
5052       _subu(THi6, THi5, THi4);
5053       _addiu(TLo1, SrcLoR, -1);
5054       _not(TLo2, SrcLoR);
5055       _and(TLo3, TLo2, TLo1);
5056       _clz(TLo4, TLo3);
5057       _addiu(TLo5, getZero(), 32);
5058       _subu(TLo6, TLo5, TLo4);
5059       _movn(THi6, TLo6, SrcLoR);
5060       _addiu(TResHi, getZero(), 0);
5061       _mov(DestHi, TResHi);
5062       _mov(DestLo, THi6);
5063       break;
5064     }
5065     default:
5066       llvm::report_fatal_error("Control flow should never have reached here.");
5067     }
5068     return;
5069   }
5070   case Intrinsics::Fabs: {
5071     if (isScalarFloatingType(DestTy)) {
5072       Variable *T = makeReg(DestTy);
5073       if (DestTy == IceType_f32) {
5074         _abs_s(T, legalizeToReg(Instr->getArg(0)));
5075       } else {
5076         _abs_d(T, legalizeToReg(Instr->getArg(0)));
5077       }
5078       _mov(Dest, T);
5079     }
5080     return;
5081   }
5082   case Intrinsics::Longjmp: {
5083     llvm::report_fatal_error("longjmp should have been prelowered.");
5084     return;
5085   }
5086   case Intrinsics::Memcpy: {
5087     llvm::report_fatal_error("memcpy should have been prelowered.");
5088     return;
5089   }
5090   case Intrinsics::Memmove: {
5091     llvm::report_fatal_error("memmove should have been prelowered.");
5092     return;
5093   }
5094   case Intrinsics::Memset: {
5095     llvm::report_fatal_error("memset should have been prelowered.");
5096     return;
5097   }
5098   case Intrinsics::Setjmp: {
5099     llvm::report_fatal_error("setjmp should have been prelowered.");
5100     return;
5101   }
5102   case Intrinsics::Sqrt: {
5103     if (isScalarFloatingType(DestTy)) {
5104       Variable *T = makeReg(DestTy);
5105       if (DestTy == IceType_f32) {
5106         _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5107       } else {
5108         _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5109       }
5110       _mov(Dest, T);
5111     } else {
5112       UnimplementedLoweringError(this, Instr);
5113     }
5114     return;
5115   }
5116   case Intrinsics::Stacksave: {
5117     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5118     _mov(Dest, SP);
5119     return;
5120   }
5121   case Intrinsics::Stackrestore: {
5122     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5123     Variable *Val = legalizeToReg(Instr->getArg(0));
5124     _mov(SP, Val);
5125     return;
5126   }
5127   case Intrinsics::Trap: {
5128     const uint32_t TrapCodeZero = 0;
5129     _teq(getZero(), getZero(), TrapCodeZero);
5130     return;
5131   }
5132   case Intrinsics::LoadSubVector: {
5133     UnimplementedLoweringError(this, Instr);
5134     return;
5135   }
5136   case Intrinsics::StoreSubVector: {
5137     UnimplementedLoweringError(this, Instr);
5138     return;
5139   }
5140   default: // UnknownIntrinsic
5141     Func->setError("Unexpected intrinsic");
5142     return;
5143   }
5144   return;
5145 }
5146 
lowerLoad(const InstLoad * Instr)5147 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5148   // A Load instruction can be treated the same as an Assign instruction, after
5149   // the source operand is transformed into an OperandMIPS32Mem operand.
5150   Type Ty = Instr->getDest()->getType();
5151   Operand *Src0 = formMemoryOperand(Instr->getLoadAddress(), Ty);
5152   Variable *DestLoad = Instr->getDest();
5153   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5154   lowerAssign(Assign);
5155 }
5156 
5157 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5158 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5159                     const Inst *Reason) {
5160   if (!BuildDefs::dump())
5161     return;
5162   if (!Func->isVerbose(IceV_AddrOpt))
5163     return;
5164   OstreamLocker _(Func->getContext());
5165   Ostream &Str = Func->getContext()->getStrDump();
5166   Str << "Instruction: ";
5167   Reason->dumpDecorated(Func);
5168   Str << "  results in Base=";
5169   if (Base)
5170     Base->dump(Func);
5171   else
5172     Str << "<null>";
5173   Str << ", Offset=" << Offset << "\n";
5174 }
5175 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5176 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5177                  int32_t *Offset, const Inst **Reason) {
5178   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5179   if (*Var == nullptr)
5180     return false;
5181   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5182   if (!VarAssign)
5183     return false;
5184   assert(!VMetadata->isMultiDef(*Var));
5185   if (!llvm::isa<InstAssign>(VarAssign))
5186     return false;
5187 
5188   Operand *SrcOp = VarAssign->getSrc(0);
5189   bool Optimized = false;
5190   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5191     if (!VMetadata->isMultiDef(SrcVar) ||
5192         // TODO: ensure SrcVar stays single-BB
5193         false) {
5194       Optimized = true;
5195       *Var = SrcVar;
5196     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5197       int32_t MoreOffset = Const->getValue();
5198       int32_t NewOffset = MoreOffset + *Offset;
5199       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5200         return false;
5201       *Var = nullptr;
5202       *Offset += NewOffset;
5203       Optimized = true;
5204     }
5205   }
5206 
5207   if (Optimized) {
5208     *Reason = VarAssign;
5209   }
5210 
5211   return Optimized;
5212 }
5213 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5214 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5215   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5216     switch (Arith->getOp()) {
5217     default:
5218       return false;
5219     case InstArithmetic::Add:
5220     case InstArithmetic::Sub:
5221       *Kind = Arith->getOp();
5222       return true;
5223     }
5224   }
5225   return false;
5226 }
5227 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5228 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5229                      int32_t *Offset, const Inst **Reason) {
5230   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5231   //   set Base=Var, Offset+=Const
5232   // Base is Base=Var-Const ==>
5233   //   set Base=Var, Offset-=Const
5234   if (*Base == nullptr)
5235     return false;
5236   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5237   if (BaseInst == nullptr) {
5238     return false;
5239   }
5240   assert(!VMetadata->isMultiDef(*Base));
5241 
5242   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5243   if (ArithInst == nullptr)
5244     return false;
5245   InstArithmetic::OpKind Kind;
5246   if (!isAddOrSub(ArithInst, &Kind))
5247     return false;
5248   bool IsAdd = Kind == InstArithmetic::Add;
5249   Operand *Src0 = ArithInst->getSrc(0);
5250   Operand *Src1 = ArithInst->getSrc(1);
5251   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5252   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5253   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5254   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5255   Variable *NewBase = nullptr;
5256   int32_t NewOffset = *Offset;
5257 
5258   if (Var0 == nullptr && Const0 == nullptr) {
5259     assert(llvm::isa<ConstantRelocatable>(Src0));
5260     return false;
5261   }
5262 
5263   if (Var1 == nullptr && Const1 == nullptr) {
5264     assert(llvm::isa<ConstantRelocatable>(Src1));
5265     return false;
5266   }
5267 
5268   if (Var0 && Var1)
5269     // TODO(jpp): merge base/index splitting into here.
5270     return false;
5271   if (!IsAdd && Var1)
5272     return false;
5273   if (Var0)
5274     NewBase = Var0;
5275   else if (Var1)
5276     NewBase = Var1;
5277   // Compute the updated constant offset.
5278   if (Const0) {
5279     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5280     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5281       return false;
5282     NewOffset += MoreOffset;
5283   }
5284   if (Const1) {
5285     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5286     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5287       return false;
5288     NewOffset += MoreOffset;
5289   }
5290 
5291   // Update the computed address parameters once we are sure optimization
5292   // is valid.
5293   *Base = NewBase;
5294   *Offset = NewOffset;
5295   *Reason = BaseInst;
5296   return true;
5297 }
5298 } // end of anonymous namespace
5299 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5300 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5301                                                    const Inst *LdSt,
5302                                                    Operand *Base) {
5303   assert(Base != nullptr);
5304   int32_t OffsetImm = 0;
5305 
5306   Func->resetCurrentNode();
5307   if (Func->isVerbose(IceV_AddrOpt)) {
5308     OstreamLocker _(Func->getContext());
5309     Ostream &Str = Func->getContext()->getStrDump();
5310     Str << "\nAddress mode formation:\t";
5311     LdSt->dumpDecorated(Func);
5312   }
5313 
5314   if (isVectorType(Ty)) {
5315     return nullptr;
5316   }
5317 
5318   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5319   if (BaseVar == nullptr)
5320     return nullptr;
5321 
5322   const VariablesMetadata *VMetadata = Func->getVMetadata();
5323   const Inst *Reason = nullptr;
5324 
5325   do {
5326     if (Reason != nullptr) {
5327       dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5328       Reason = nullptr;
5329     }
5330 
5331     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5332       continue;
5333     }
5334 
5335     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5336       continue;
5337     }
5338   } while (Reason);
5339 
5340   if (BaseVar == nullptr) {
5341     // We need base register rather than just OffsetImm. Move the OffsetImm to
5342     // BaseVar and form 0(BaseVar) addressing.
5343     const Type PointerType = getPointerType();
5344     BaseVar = makeReg(PointerType);
5345     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5346     OffsetImm = 0;
5347   } else if (OffsetImm != 0) {
5348     // If the OffsetImm is more than signed 16-bit value then add it in the
5349     // BaseVar and form 0(BaseVar) addressing.
5350     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5351     const InstArithmetic::OpKind Op =
5352         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5353     constexpr bool ZeroExt = false;
5354     if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5355       const Type PointerType = getPointerType();
5356       Variable *T = makeReg(PointerType);
5357       Context.insert<InstArithmetic>(Op, T, BaseVar,
5358                                      Ctx->getConstantInt32(PositiveOffset));
5359       BaseVar = T;
5360       OffsetImm = 0;
5361     }
5362   }
5363 
5364   assert(BaseVar != nullptr);
5365   assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5366                        : (OffsetImm & 0x0000ffff) == OffsetImm);
5367 
5368   return OperandMIPS32Mem::create(
5369       Func, Ty, BaseVar,
5370       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5371 }
5372 
doAddressOptLoad()5373 void TargetMIPS32::doAddressOptLoad() {
5374   Inst *Instr = iteratorToInst(Context.getCur());
5375   assert(llvm::isa<InstLoad>(Instr));
5376   Variable *Dest = Instr->getDest();
5377   Operand *Addr = Instr->getSrc(0);
5378   if (OperandMIPS32Mem *Mem =
5379           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5380     Instr->setDeleted();
5381     Context.insert<InstLoad>(Dest, Mem);
5382   }
5383 }
5384 
lowerPhi(const InstPhi *)5385 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5386   Func->setError("Phi found in regular instruction list");
5387 }
5388 
lowerRet(const InstRet * Instr)5389 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5390   Variable *Reg = nullptr;
5391   if (Instr->hasRetValue()) {
5392     Operand *Src0 = Instr->getRetValue();
5393     switch (Src0->getType()) {
5394     case IceType_f32: {
5395       Operand *Src0F = legalizeToReg(Src0);
5396       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5397       _mov(Reg, Src0F);
5398       break;
5399     }
5400     case IceType_f64: {
5401       Operand *Src0F = legalizeToReg(Src0);
5402       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5403       _mov(Reg, Src0F);
5404       break;
5405     }
5406     case IceType_i1:
5407     case IceType_i8:
5408     case IceType_i16:
5409     case IceType_i32: {
5410       Operand *Src0F = legalizeToReg(Src0);
5411       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5412       _mov(Reg, Src0F);
5413       break;
5414     }
5415     case IceType_i64: {
5416       Src0 = legalizeUndef(Src0);
5417       Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5418       Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5419       Reg = R0;
5420       Context.insert<InstFakeUse>(R1);
5421       break;
5422     }
5423     case IceType_v4i1:
5424     case IceType_v8i1:
5425     case IceType_v16i1:
5426     case IceType_v16i8:
5427     case IceType_v8i16:
5428     case IceType_v4i32: {
5429       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5430       Variable *V0 =
5431           legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5432       Variable *V1 =
5433           legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5434       Variable *A0 =
5435           legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5436       Variable *A1 =
5437           legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5438       Reg = V0;
5439       Context.insert<InstFakeUse>(V1);
5440       Context.insert<InstFakeUse>(A0);
5441       Context.insert<InstFakeUse>(A1);
5442       break;
5443     }
5444     case IceType_v4f32: {
5445       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5446       Reg = getImplicitRet();
5447       auto *RegT = legalizeToReg(Reg);
5448       // Return the vector through buffer in implicit argument a0
5449       for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5450         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5451             Func, IceType_f32, RegT,
5452             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5453         Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5454         _sw(Var, Mem);
5455       }
5456       Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5457       _mov(V0, Reg); // move v0,a0
5458       Context.insert<InstFakeUse>(Reg);
5459       Context.insert<InstFakeUse>(V0);
5460       break;
5461     }
5462     default:
5463       llvm::report_fatal_error("Ret: Invalid type.");
5464       break;
5465     }
5466   }
5467   _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5468 }
5469 
lowerSelect(const InstSelect * Instr)5470 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5471   Variable *Dest = Instr->getDest();
5472   const Type DestTy = Dest->getType();
5473 
5474   if (isVectorType(DestTy)) {
5475     llvm::report_fatal_error("Select: Destination type is vector");
5476     return;
5477   }
5478 
5479   Variable *DestR = nullptr;
5480   Variable *DestHiR = nullptr;
5481   Variable *SrcTR = nullptr;
5482   Variable *SrcTHiR = nullptr;
5483   Variable *SrcFR = nullptr;
5484   Variable *SrcFHiR = nullptr;
5485 
5486   if (DestTy == IceType_i64) {
5487     DestR = llvm::cast<Variable>(loOperand(Dest));
5488     DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5489     SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5490     SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5491     SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5492     SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5493   } else {
5494     SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5495     SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5496   }
5497 
5498   Variable *ConditionR = legalizeToReg(Instr->getCondition());
5499 
5500   assert(Instr->getCondition()->getType() == IceType_i1);
5501 
5502   switch (DestTy) {
5503   case IceType_i1:
5504   case IceType_i8:
5505   case IceType_i16:
5506   case IceType_i32:
5507     _movn(SrcFR, SrcTR, ConditionR);
5508     _mov(Dest, SrcFR);
5509     break;
5510   case IceType_i64:
5511     _movn(SrcFR, SrcTR, ConditionR);
5512     _movn(SrcFHiR, SrcTHiR, ConditionR);
5513     _mov(DestR, SrcFR);
5514     _mov(DestHiR, SrcFHiR);
5515     break;
5516   case IceType_f32:
5517     _movn_s(SrcFR, SrcTR, ConditionR);
5518     _mov(Dest, SrcFR);
5519     break;
5520   case IceType_f64:
5521     _movn_d(SrcFR, SrcTR, ConditionR);
5522     _mov(Dest, SrcFR);
5523     break;
5524   default:
5525     llvm::report_fatal_error("Select: Invalid type.");
5526   }
5527 }
5528 
lowerShuffleVector(const InstShuffleVector * Instr)5529 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5530   UnimplementedLoweringError(this, Instr);
5531 }
5532 
lowerStore(const InstStore * Instr)5533 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5534   Operand *Value = Instr->getData();
5535   Operand *Addr = Instr->getStoreAddress();
5536   OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5537   Type Ty = NewAddr->getType();
5538 
5539   if (Ty == IceType_i64) {
5540     Value = legalizeUndef(Value);
5541     Variable *ValueHi = legalizeToReg(hiOperand(Value));
5542     Variable *ValueLo = legalizeToReg(loOperand(Value));
5543     _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5544     _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5545   } else if (isVectorType(Value->getType())) {
5546     auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5547     for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5548       auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5549       auto *MCont = llvm::cast<OperandMIPS32Mem>(
5550           getOperandAtIndex(NewAddr, IceType_i32, i));
5551       _sw(DCont, MCont);
5552     }
5553   } else {
5554     Variable *ValueR = legalizeToReg(Value);
5555     _sw(ValueR, NewAddr);
5556   }
5557 }
5558 
doAddressOptStore()5559 void TargetMIPS32::doAddressOptStore() {
5560   Inst *Instr = iteratorToInst(Context.getCur());
5561   assert(llvm::isa<InstStore>(Instr));
5562   Operand *Src = Instr->getSrc(0);
5563   Operand *Addr = Instr->getSrc(1);
5564   if (OperandMIPS32Mem *Mem =
5565           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5566     Instr->setDeleted();
5567     Context.insert<InstStore>(Src, Mem);
5568   }
5569 }
5570 
lowerSwitch(const InstSwitch * Instr)5571 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5572   Operand *Src = Instr->getComparison();
5573   SizeT NumCases = Instr->getNumCases();
5574   if (Src->getType() == IceType_i64) {
5575     Src = legalizeUndef(Src);
5576     Variable *Src0Lo = legalizeToReg(loOperand(Src));
5577     Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5578     for (SizeT I = 0; I < NumCases; ++I) {
5579       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5580       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5581       CfgNode *TargetTrue = Instr->getLabel(I);
5582       constexpr CfgNode *NoTarget = nullptr;
5583       ValueHi = legalizeToReg(ValueHi);
5584       InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5585       _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5586           CondMIPS32::Cond::NE);
5587       ValueLo = legalizeToReg(ValueLo);
5588       _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5589       Context.insert(IntraLabel);
5590     }
5591     _br(Instr->getLabelDefault());
5592     return;
5593   }
5594   Variable *SrcVar = legalizeToReg(Src);
5595   assert(SrcVar->mustHaveReg());
5596   for (SizeT I = 0; I < NumCases; ++I) {
5597     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5598     CfgNode *TargetTrue = Instr->getLabel(I);
5599     constexpr CfgNode *NoTargetFalse = nullptr;
5600     Value = legalizeToReg(Value);
5601     _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5602   }
5603   _br(Instr->getLabelDefault());
5604 }
5605 
lowerBreakpoint(const InstBreakpoint * Instr)5606 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5607   UnimplementedLoweringError(this, Instr);
5608 }
5609 
lowerUnreachable(const InstUnreachable *)5610 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5611   const uint32_t TrapCodeZero = 0;
5612   _teq(getZero(), getZero(), TrapCodeZero);
5613 }
5614 
lowerOther(const Inst * Instr)5615 void TargetMIPS32::lowerOther(const Inst *Instr) {
5616   if (llvm::isa<InstMIPS32Sync>(Instr)) {
5617     _sync();
5618   } else {
5619     TargetLowering::lowerOther(Instr);
5620   }
5621 }
5622 
5623 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5624 // integrity of liveness analysis. Undef values are also turned into zeroes,
5625 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5626 void TargetMIPS32::prelowerPhis() {
5627   PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5628 }
5629 
postLower()5630 void TargetMIPS32::postLower() {
5631   if (Func->getOptLevel() == Opt_m1)
5632     return;
5633   markRedefinitions();
5634   Context.availabilityUpdate();
5635 }
5636 
5637 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5638 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5639   llvm_unreachable("Not expecting to emitWithoutDollar undef");
5640 }
5641 
5642 void ConstantUndef::emit(GlobalContext *) const {
5643   llvm_unreachable("undef value encountered by emitter.");
5644 }
5645 */
5646 
TargetDataMIPS32(GlobalContext * Ctx)5647 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5648     : TargetDataLowering(Ctx) {}
5649 
5650 // Generate .MIPS.abiflags section. This section contains a versioned data
5651 // structure with essential information required for loader to determine the
5652 // requirements of the application.
emitTargetRODataSections()5653 void TargetDataMIPS32::emitTargetRODataSections() {
5654   struct MipsABIFlagsSection Flags;
5655   ELFObjectWriter *Writer = Ctx->getObjectWriter();
5656   const std::string Name = ".MIPS.abiflags";
5657   const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5658   const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5659   const llvm::ELF::Elf64_Xword ShAddralign = 8;
5660   const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5661   Writer->writeTargetRODataSection(
5662       Name, ShType, ShFlags, ShAddralign, ShEntsize,
5663       llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5664 }
5665 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5666 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5667                                     const std::string &SectionSuffix) {
5668   const bool IsPIC = false;
5669   switch (getFlags().getOutFileType()) {
5670   case FT_Elf: {
5671     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5672     Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5673   } break;
5674   case FT_Asm:
5675   case FT_Iasm: {
5676     OstreamLocker L(Ctx);
5677     for (const VariableDeclaration *Var : Vars) {
5678       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5679         emitGlobal(*Var, SectionSuffix);
5680       }
5681     }
5682   } break;
5683   }
5684 }
5685 
5686 namespace {
5687 template <typename T> struct ConstantPoolEmitterTraits;
5688 
5689 static_assert(sizeof(uint64_t) == 8,
5690               "uint64_t is supposed to be 8 bytes wide.");
5691 
5692 // TODO(jaydeep.patil): implement the following when implementing constant
5693 // randomization:
5694 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
5695 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
5696 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
5697 template <> struct ConstantPoolEmitterTraits<float> {
5698   using ConstantType = ConstantFloat;
5699   static constexpr Type IceType = IceType_f32;
5700   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5701   // about them being constexpr.
5702   static const char AsmTag[];
5703   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon5724c8200611::ConstantPoolEmitterTraits5704   static uint64_t bitcastToUint64(float Value) {
5705     static_assert(sizeof(Value) == sizeof(uint32_t),
5706                   "Float should be 4 bytes.");
5707     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5708     return static_cast<uint64_t>(IntValue);
5709   }
5710 };
5711 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5712 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5713 
5714 template <> struct ConstantPoolEmitterTraits<double> {
5715   using ConstantType = ConstantDouble;
5716   static constexpr Type IceType = IceType_f64;
5717   static const char AsmTag[];
5718   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anon5724c8200611::ConstantPoolEmitterTraits5719   static uint64_t bitcastToUint64(double Value) {
5720     static_assert(sizeof(double) == sizeof(uint64_t),
5721                   "Double should be 8 bytes.");
5722     return Utils::bitCopy<uint64_t>(Value);
5723   }
5724 };
5725 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5726 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5727 
5728 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5729 void emitConstant(
5730     Ostream &Str,
5731     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5732   if (!BuildDefs::dump())
5733     return;
5734   using Traits = ConstantPoolEmitterTraits<T>;
5735   Str << Const->getLabelName();
5736   T Value = Const->getValue();
5737   Str << ":\n\t" << Traits::AsmTag << "\t0x";
5738   Str.write_hex(Traits::bitcastToUint64(Value));
5739   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5740 }
5741 
emitConstantPool(GlobalContext * Ctx)5742 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5743   if (!BuildDefs::dump())
5744     return;
5745   using Traits = ConstantPoolEmitterTraits<T>;
5746   static constexpr size_t MinimumAlignment = 4;
5747   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5748   assert((Align % 4) == 0 && "Constants should be aligned");
5749   Ostream &Str = Ctx->getStrEmit();
5750   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5751   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5752       << "\n"
5753       << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5754   for (Constant *C : Pool) {
5755     if (!C->getShouldBePooled()) {
5756       continue;
5757     }
5758     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5759   }
5760 }
5761 } // end of anonymous namespace
5762 
lowerConstants()5763 void TargetDataMIPS32::lowerConstants() {
5764   if (getFlags().getDisableTranslation())
5765     return;
5766   switch (getFlags().getOutFileType()) {
5767   case FT_Elf: {
5768     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5769     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5770     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5771   } break;
5772   case FT_Asm:
5773   case FT_Iasm: {
5774     OstreamLocker _(Ctx);
5775     emitConstantPool<float>(Ctx);
5776     emitConstantPool<double>(Ctx);
5777     break;
5778   }
5779   }
5780 }
5781 
lowerJumpTables()5782 void TargetDataMIPS32::lowerJumpTables() {
5783   if (getFlags().getDisableTranslation())
5784     return;
5785 }
5786 
5787 // Helper for legalize() to emit the right code to lower an operand to a
5788 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5789 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5790   Type Ty = Src->getType();
5791   Variable *Reg = makeReg(Ty, RegNum);
5792   if (isVectorType(Ty)) {
5793     llvm::report_fatal_error("Invalid copy from vector type.");
5794   } else {
5795     if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5796       _lw(Reg, Mem);
5797     } else {
5798       _mov(Reg, Src);
5799     }
5800   }
5801   return Reg;
5802 }
5803 
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5804 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5805                                 RegNumT RegNum) {
5806   Type Ty = From->getType();
5807   // Assert that a physical register is allowed.  To date, all calls
5808   // to legalize() allow a physical register. Legal_Flex converts
5809   // registers to the right type OperandMIPS32FlexReg as needed.
5810   assert(Allowed & Legal_Reg);
5811 
5812   if (RegNum.hasNoValue()) {
5813     if (Variable *Subst = getContext().availabilityGet(From)) {
5814       // At this point we know there is a potential substitution available.
5815       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5816           !Subst->hasReg()) {
5817         // At this point we know the substitution will have a register.
5818         if (From->getType() == Subst->getType()) {
5819           // At this point we know the substitution's register is compatible.
5820           return Subst;
5821         }
5822       }
5823     }
5824   }
5825 
5826   // Go through the various types of operands:
5827   // OperandMIPS32Mem, Constant, and Variable.
5828   // Given the above assertion, if type of operand is not legal
5829   // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5830   // to a register.
5831   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5832     // Base must be in a physical register.
5833     Variable *Base = Mem->getBase();
5834     ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5835     Variable *RegBase = nullptr;
5836     assert(Base);
5837 
5838     RegBase = llvm::cast<Variable>(
5839         legalize(Base, Legal_Reg | Legal_Rematerializable));
5840 
5841     if (Offset != nullptr && Offset->getValue() != 0) {
5842       static constexpr bool ZeroExt = false;
5843       if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5844         llvm::report_fatal_error("Invalid memory offset.");
5845       }
5846     }
5847 
5848     // Create a new operand if there was a change.
5849     if (Base != RegBase) {
5850       Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5851                                      Mem->getAddrMode());
5852     }
5853 
5854     if (Allowed & Legal_Mem) {
5855       From = Mem;
5856     } else {
5857       Variable *Reg = makeReg(Ty, RegNum);
5858       _lw(Reg, Mem);
5859       From = Reg;
5860     }
5861     return From;
5862   }
5863 
5864   if (llvm::isa<Constant>(From)) {
5865     if (llvm::isa<ConstantUndef>(From)) {
5866       From = legalizeUndef(From, RegNum);
5867       if (isVectorType(Ty))
5868         return From;
5869     }
5870     if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5871       Variable *Reg = makeReg(Ty, RegNum);
5872       Variable *TReg = makeReg(Ty, RegNum);
5873       _lui(TReg, C, RO_Hi);
5874       _addiu(Reg, TReg, C, RO_Lo);
5875       return Reg;
5876     } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5877       const uint32_t Value = C32->getValue();
5878       // Use addiu if the immediate is a 16bit value. Otherwise load it
5879       // using a lui-ori instructions.
5880       Variable *Reg = makeReg(Ty, RegNum);
5881       if (isInt<16>(int32_t(Value))) {
5882         Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5883         Context.insert<InstFakeDef>(Zero);
5884         _addiu(Reg, Zero, Value);
5885       } else {
5886         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5887         uint32_t LowerBits = Value & 0xFFFF;
5888         if (LowerBits) {
5889           Variable *TReg = makeReg(Ty, RegNum);
5890           _lui(TReg, Ctx->getConstantInt32(UpperBits));
5891           _ori(Reg, TReg, LowerBits);
5892         } else {
5893           _lui(Reg, Ctx->getConstantInt32(UpperBits));
5894         }
5895       }
5896       return Reg;
5897     } else if (isScalarFloatingType(Ty)) {
5898       auto *CFrom = llvm::cast<Constant>(From);
5899       Variable *TReg = makeReg(Ty);
5900       if (!CFrom->getShouldBePooled()) {
5901         // Float/Double constant 0 is not pooled.
5902         Context.insert<InstFakeDef>(TReg);
5903         _mov(TReg, getZero());
5904       } else {
5905         // Load floats/doubles from literal pool.
5906         Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5907         Variable *TReg1 = makeReg(getPointerType());
5908         _lui(TReg1, Offset, RO_Hi);
5909         OperandMIPS32Mem *Addr =
5910             OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5911         if (Ty == IceType_f32)
5912           _lwc1(TReg, Addr, RO_Lo);
5913         else
5914           _ldc1(TReg, Addr, RO_Lo);
5915       }
5916       return copyToReg(TReg, RegNum);
5917     }
5918   }
5919 
5920   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5921     if (Var->isRematerializable()) {
5922       if (Allowed & Legal_Rematerializable) {
5923         return From;
5924       }
5925 
5926       Variable *T = makeReg(Var->getType(), RegNum);
5927       _mov(T, Var);
5928       return T;
5929     }
5930     // Check if the variable is guaranteed a physical register.  This
5931     // can happen either when the variable is pre-colored or when it is
5932     // assigned infinite weight.
5933     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5934     // We need a new physical register for the operand if:
5935     //   Mem is not allowed and Var isn't guaranteed a physical
5936     //   register, or
5937     //   RegNum is required and Var->getRegNum() doesn't match.
5938     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5939         (RegNum.hasValue() && RegNum != Var->getRegNum())) {
5940       From = copyToReg(From, RegNum);
5941     }
5942     return From;
5943   }
5944   return From;
5945 }
5946 
5947 namespace BoolFolding {
5948 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
5949 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)5950 bool shouldTrackProducer(const Inst &Instr) {
5951   return Instr.getKind() == Inst::Icmp;
5952 }
5953 
isValidConsumer(const Inst & Instr)5954 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
5955 } // end of namespace BoolFolding
5956 
recordProducers(CfgNode * Node)5957 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
5958   for (Inst &Instr : Node->getInsts()) {
5959     if (Instr.isDeleted())
5960       continue;
5961     // Check whether Instr is a valid producer.
5962     Variable *Dest = Instr.getDest();
5963     if (Dest // only consider instructions with an actual dest var; and
5964         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
5965         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
5966       KnownComputations.emplace(Dest->getIndex(),
5967                                 ComputationEntry(&Instr, IceType_i1));
5968     }
5969     // Check each src variable against the map.
5970     FOREACH_VAR_IN_INST(Var, Instr) {
5971       SizeT VarNum = Var->getIndex();
5972       auto ComputationIter = KnownComputations.find(VarNum);
5973       if (ComputationIter == KnownComputations.end()) {
5974         continue;
5975       }
5976 
5977       ++ComputationIter->second.NumUses;
5978       switch (ComputationIter->second.ComputationType) {
5979       default:
5980         KnownComputations.erase(VarNum);
5981         continue;
5982       case IceType_i1:
5983         if (!BoolFolding::isValidConsumer(Instr)) {
5984           KnownComputations.erase(VarNum);
5985           continue;
5986         }
5987         break;
5988       }
5989 
5990       if (Instr.isLastUse(Var)) {
5991         ComputationIter->second.IsLiveOut = false;
5992       }
5993     }
5994   }
5995 
5996   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
5997        Iter != End;) {
5998     // Disable the folding if its dest may be live beyond this block.
5999     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6000       Iter = KnownComputations.erase(Iter);
6001       continue;
6002     }
6003 
6004     // Mark as "dead" rather than outright deleting. This is so that other
6005     // peephole style optimizations during or before lowering have access to
6006     // this instruction in undeleted form. See for example
6007     // tryOptimizedCmpxchgCmpBr().
6008     Iter->second.Instr->setDead();
6009     ++Iter;
6010   }
6011 }
6012 
TargetHeaderMIPS32(GlobalContext * Ctx)6013 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6014     : TargetHeaderLowering(Ctx) {}
6015 
lower()6016 void TargetHeaderMIPS32::lower() {
6017   if (!BuildDefs::dump())
6018     return;
6019   OstreamLocker L(Ctx);
6020   Ostream &Str = Ctx->getStrEmit();
6021   Str << "\t.set\t"
6022       << "nomicromips\n";
6023   Str << "\t.set\t"
6024       << "nomips16\n";
6025   Str << "\t.set\t"
6026       << "noat\n";
6027 }
6028 
6029 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6030 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6031 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6032 
6033 } // end of namespace MIPS32
6034 } // end of namespace Ice
6035