• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //                        The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "IceTargetLoweringMIPS32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32 
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35   return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37 
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40   return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42 
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47 
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49   ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51 
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53   return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55 
getPointerType()56 ::Ice::Type getPointerType() {
57   return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59 
60 } // end of namespace MIPS32
61 
62 namespace Ice {
63 namespace MIPS32 {
64 
65 using llvm::isInt;
66 
67 namespace {
68 
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71 
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74 
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76 
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79 
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81   auto ClassNum = static_cast<RegClassMIPS32>(C);
82   assert(ClassNum < RCMIPS32_NUM);
83   switch (ClassNum) {
84   default:
85     assert(C < RC_Target);
86     return regClassString(C);
87     // Add handling of new register classes below.
88   }
89 }
90 
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93 
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97   size_t typeAlignInBytes = typeWidthInBytes(Ty);
98   // Vectors are stored on stack with the same alignment as that of int type
99   if (isVectorType(Ty))
100     typeAlignInBytes = typeWidthInBytes(IceType_i64);
101   return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103 
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107   return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109 
110 } // end of anonymous namespace
111 
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func)
113     : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {}
114 
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)115 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
116                                        size_t SpillAreaPaddingBytes,
117                                        size_t SpillAreaSizeBytes,
118                                        size_t GlobalsAndSubsequentPaddingSize) {
119   const VariablesMetadata *VMetadata = Func->getVMetadata();
120   size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
121   size_t NextStackOffset = SpillAreaPaddingBytes;
122   CfgVector<size_t> LocalsSize(Func->getNumNodes());
123   const bool SimpleCoalescing = !callsReturnsTwice();
124   for (Variable *Var : SortedSpilledVariables) {
125     size_t Increment = typeWidthInBytesOnStack(Var->getType());
126     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
127       if (VMetadata->isMultiBlock(Var)) {
128         GlobalsSpaceUsed += Increment;
129         NextStackOffset = GlobalsSpaceUsed;
130       } else {
131         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
132         LocalsSize[NodeIndex] += Increment;
133         NextStackOffset = SpillAreaPaddingBytes +
134                           GlobalsAndSubsequentPaddingSize +
135                           LocalsSize[NodeIndex];
136       }
137     } else {
138       NextStackOffset += Increment;
139     }
140     Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
141   }
142 }
143 
staticInit(GlobalContext * Ctx)144 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
145   (void)Ctx;
146   RegNumT::setLimit(RegMIPS32::Reg_NUM);
147   SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
148   SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
149   SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
150   SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
151   SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
152   SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
153 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
154           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
155   IntegerRegisters[RegMIPS32::val] = isInt;                                    \
156   I64PairRegisters[RegMIPS32::val] = isI64Pair;                                \
157   Float32Registers[RegMIPS32::val] = isFP32;                                   \
158   Float64Registers[RegMIPS32::val] = isFP64;                                   \
159   VectorRegisters[RegMIPS32::val] = isVec128;                                  \
160   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
161   for (SizeT RegAlias : alias_init) {                                          \
162     assert(!RegisterAliases[RegMIPS32::val][RegAlias] &&                       \
163            "Duplicate alias for " #val);                                       \
164     RegisterAliases[RegMIPS32::val].set(RegAlias);                             \
165   }                                                                            \
166   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
167   assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
168   REGMIPS32_TABLE;
169 #undef X
170 
171   // TODO(mohit.bhakkad): Change these inits once we provide argument related
172   // field in register tables
173   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
174     GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
175 
176   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
177     I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
178 
179   for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
180     FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
181     FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
182   }
183 
184   TypeToRegisterSet[IceType_void] = InvalidRegisters;
185   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
186   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
187   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
188   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
189   TypeToRegisterSet[IceType_i64] = IntegerRegisters;
190   TypeToRegisterSet[IceType_f32] = Float32Registers;
191   TypeToRegisterSet[IceType_f64] = Float64Registers;
192   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
193   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
194   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
195   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
196   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
197   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
198   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
199 
200   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
201     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
202 
203   filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
204                           llvm::array_lengthof(TypeToRegisterSet),
205                           RegMIPS32::getRegName, getRegClassName);
206 }
207 
unsetIfNonLeafFunc()208 void TargetMIPS32::unsetIfNonLeafFunc() {
209   for (CfgNode *Node : Func->getNodes()) {
210     for (Inst &Instr : Node->getInsts()) {
211       if (llvm::isa<InstCall>(&Instr)) {
212         // Unset MaybeLeafFunc if call instruction exists.
213         MaybeLeafFunc = false;
214         return;
215       }
216     }
217   }
218 }
219 
getStackAlignment() const220 uint32_t TargetMIPS32::getStackAlignment() const {
221   return MIPS32_STACK_ALIGNMENT_BYTES;
222 }
223 
getCallStackArgumentsSizeBytes(const InstCall * Call)224 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
225   TargetMIPS32::CallingConv CC;
226   RegNumT DummyReg;
227   size_t OutArgsSizeBytes = 0;
228   Variable *Dest = Call->getDest();
229   bool PartialOnStack = false;
230   if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
231     CC.discardReg(RegMIPS32::Reg_A0);
232     // Next vector is partially on stack
233     PartialOnStack = true;
234   }
235   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
236     Operand *Arg = legalizeUndef(Call->getArg(i));
237     const Type Ty = Arg->getType();
238     RegNumT RegNum;
239     if (CC.argInReg(Ty, i, &RegNum)) {
240       // If PartialOnStack is true and if this is a vector type then last two
241       // elements are on stack
242       if (PartialOnStack && isVectorType(Ty)) {
243         OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
244         OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
245       }
246       continue;
247     }
248     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
249     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
250   }
251   // Add size of argument save area
252   constexpr int BytesPerStackArg = 4;
253   OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
254   return applyStackAlignment(OutArgsSizeBytes);
255 }
256 
257 namespace {
getConstantMemoryOrder(Operand * Opnd)258 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
259   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
260     return Integer->getValue();
261   return Intrinsics::MemoryOrderInvalid;
262 }
263 }
264 
genTargetHelperCallFor(Inst * Instr)265 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
266   constexpr bool NoTailCall = false;
267   constexpr bool IsTargetHelperCall = true;
268   Variable *Dest = Instr->getDest();
269   const Type DestTy = Dest ? Dest->getType() : IceType_void;
270 
271   switch (Instr->getKind()) {
272   default:
273     return;
274   case Inst::Select: {
275     if (isVectorType(DestTy)) {
276       Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
277       Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
278       Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
279       Variable *T = Func->makeVariable(DestTy);
280       auto *Undef = ConstantUndef::create(Ctx, DestTy);
281       Context.insert<InstAssign>(T, Undef);
282       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
283       VarVecOn32->initVecElement(Func);
284       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
285         auto *Index = Ctx->getConstantInt32(I);
286         auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
287         Context.insert<InstExtractElement>(OpC, Cond, Index);
288         auto *OpT = Func->makeVariable(typeElementType(DestTy));
289         Context.insert<InstExtractElement>(OpT, SrcT, Index);
290         auto *OpF = Func->makeVariable(typeElementType(DestTy));
291         Context.insert<InstExtractElement>(OpF, SrcF, Index);
292         auto *Dst = Func->makeVariable(typeElementType(DestTy));
293         Variable *DestT = Func->makeVariable(DestTy);
294         Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
295         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
296         T = DestT;
297       }
298       Context.insert<InstAssign>(Dest, T);
299       Instr->setDeleted();
300     }
301     return;
302   }
303   case Inst::Fcmp: {
304     if (isVectorType(DestTy)) {
305       InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
306       Operand *Src0 = Instr->getSrc(0);
307       Operand *Src1 = Instr->getSrc(1);
308       Variable *T = Func->makeVariable(IceType_v4f32);
309       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
310       Context.insert<InstAssign>(T, Undef);
311       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
312       VarVecOn32->initVecElement(Func);
313       for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
314         auto *Index = Ctx->getConstantInt32(I);
315         auto *Op0 = Func->makeVariable(IceType_f32);
316         Context.insert<InstExtractElement>(Op0, Src0, Index);
317         auto *Op1 = Func->makeVariable(IceType_f32);
318         Context.insert<InstExtractElement>(Op1, Src1, Index);
319         auto *Dst = Func->makeVariable(IceType_f32);
320         Variable *DestT = Func->makeVariable(IceType_v4f32);
321         Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
322         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
323         T = DestT;
324       }
325       Context.insert<InstAssign>(Dest, T);
326       Instr->setDeleted();
327     }
328     return;
329   }
330   case Inst::Icmp: {
331     if (isVectorType(DestTy)) {
332       InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
333       Operand *Src0 = Instr->getSrc(0);
334       Operand *Src1 = Instr->getSrc(1);
335       const Type SrcType = Src0->getType();
336       Variable *T = Func->makeVariable(DestTy);
337       auto *Undef = ConstantUndef::create(Ctx, DestTy);
338       Context.insert<InstAssign>(T, Undef);
339       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
340       VarVecOn32->initVecElement(Func);
341       for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
342         auto *Index = Ctx->getConstantInt32(I);
343         auto *Op0 = Func->makeVariable(typeElementType(SrcType));
344         Context.insert<InstExtractElement>(Op0, Src0, Index);
345         auto *Op1 = Func->makeVariable(typeElementType(SrcType));
346         Context.insert<InstExtractElement>(Op1, Src1, Index);
347         auto *Dst = Func->makeVariable(typeElementType(DestTy));
348         Variable *DestT = Func->makeVariable(DestTy);
349         Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
350         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
351         T = DestT;
352       }
353       Context.insert<InstAssign>(Dest, T);
354       Instr->setDeleted();
355     }
356     return;
357   }
358   case Inst::Arithmetic: {
359     const InstArithmetic::OpKind Op =
360         llvm::cast<InstArithmetic>(Instr)->getOp();
361     if (isVectorType(DestTy)) {
362       scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
363       Instr->setDeleted();
364       return;
365     }
366     switch (DestTy) {
367     default:
368       return;
369     case IceType_i64: {
370       RuntimeHelper HelperID = RuntimeHelper::H_Num;
371       switch (Op) {
372       default:
373         return;
374       case InstArithmetic::Udiv:
375         HelperID = RuntimeHelper::H_udiv_i64;
376         break;
377       case InstArithmetic::Sdiv:
378         HelperID = RuntimeHelper::H_sdiv_i64;
379         break;
380       case InstArithmetic::Urem:
381         HelperID = RuntimeHelper::H_urem_i64;
382         break;
383       case InstArithmetic::Srem:
384         HelperID = RuntimeHelper::H_srem_i64;
385         break;
386       }
387 
388       if (HelperID == RuntimeHelper::H_Num) {
389         return;
390       }
391 
392       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
393       constexpr SizeT MaxArgs = 2;
394       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
395                                             NoTailCall, IsTargetHelperCall);
396       Call->addArg(Instr->getSrc(0));
397       Call->addArg(Instr->getSrc(1));
398       Instr->setDeleted();
399       return;
400     }
401     case IceType_f32:
402     case IceType_f64: {
403       if (Op != InstArithmetic::Frem) {
404         return;
405       }
406       constexpr SizeT MaxArgs = 2;
407       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
408           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
409                                 : RuntimeHelper::H_frem_f64);
410       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
411                                             NoTailCall, IsTargetHelperCall);
412       Call->addArg(Instr->getSrc(0));
413       Call->addArg(Instr->getSrc(1));
414       Instr->setDeleted();
415       return;
416     }
417     }
418     llvm::report_fatal_error("Control flow should never have reached here.");
419   }
420   case Inst::Cast: {
421     Operand *Src0 = Instr->getSrc(0);
422     const Type SrcTy = Src0->getType();
423     auto *CastInstr = llvm::cast<InstCast>(Instr);
424     const InstCast::OpKind CastKind = CastInstr->getCastKind();
425 
426     if (isVectorType(DestTy)) {
427       Variable *T = Func->makeVariable(DestTy);
428       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
429       VarVecOn32->initVecElement(Func);
430       auto *Undef = ConstantUndef::create(Ctx, DestTy);
431       Context.insert<InstAssign>(T, Undef);
432       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
433         auto *Index = Ctx->getConstantInt32(I);
434         auto *Op = Func->makeVariable(typeElementType(SrcTy));
435         Context.insert<InstExtractElement>(Op, Src0, Index);
436         auto *Dst = Func->makeVariable(typeElementType(DestTy));
437         Variable *DestT = Func->makeVariable(DestTy);
438         Context.insert<InstCast>(CastKind, Dst, Op);
439         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
440         T = DestT;
441       }
442       Context.insert<InstAssign>(Dest, T);
443       Instr->setDeleted();
444       return;
445     }
446 
447     switch (CastKind) {
448     default:
449       return;
450     case InstCast::Fptosi:
451     case InstCast::Fptoui: {
452       if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
453         return;
454       }
455       const bool DestIs32 = DestTy == IceType_i32;
456       const bool DestIsSigned = CastKind == InstCast::Fptosi;
457       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
458       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
459       if (DestIsSigned) {
460         if (DestIs32) {
461           return;
462         }
463         RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
464                             : RuntimeHelper::H_fptosi_f64_i64;
465       } else {
466         RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
467                                         : RuntimeHelper::H_fptoui_f32_i64)
468                             : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
469                                         : RuntimeHelper::H_fptoui_f64_i64);
470       }
471       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
472       static constexpr SizeT MaxArgs = 1;
473       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
474                                             NoTailCall, IsTargetHelperCall);
475       Call->addArg(Src0);
476       Instr->setDeleted();
477       return;
478     }
479     case InstCast::Sitofp:
480     case InstCast::Uitofp: {
481       if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
482         return;
483       }
484       const bool SourceIs32 = SrcTy == IceType_i32;
485       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
486       const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
487       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
488       if (SourceIsSigned) {
489         if (SourceIs32) {
490           return;
491         }
492         RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
493                             : RuntimeHelper::H_sitofp_i64_f64;
494       } else {
495         RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
496                                           : RuntimeHelper::H_uitofp_i64_f32)
497                             : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
498                                           : RuntimeHelper::H_uitofp_i64_f64);
499       }
500       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
501       static constexpr SizeT MaxArgs = 1;
502       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
503                                             NoTailCall, IsTargetHelperCall);
504       Call->addArg(Src0);
505       Instr->setDeleted();
506       return;
507     }
508     case InstCast::Bitcast: {
509       if (DestTy == SrcTy) {
510         return;
511       }
512       Variable *CallDest = Dest;
513       RuntimeHelper HelperID = RuntimeHelper::H_Num;
514       switch (DestTy) {
515       default:
516         return;
517       case IceType_i8:
518         assert(SrcTy == IceType_v8i1);
519         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
520         CallDest = Func->makeVariable(IceType_i32);
521         break;
522       case IceType_i16:
523         assert(SrcTy == IceType_v16i1);
524         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
525         CallDest = Func->makeVariable(IceType_i32);
526         break;
527       case IceType_v8i1: {
528         assert(SrcTy == IceType_i8);
529         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
530         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
531         // Arguments to functions are required to be at least 32 bits wide.
532         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
533         Src0 = Src0AsI32;
534       } break;
535       case IceType_v16i1: {
536         assert(SrcTy == IceType_i16);
537         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
538         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
539         // Arguments to functions are required to be at least 32 bits wide.
540         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
541         Src0 = Src0AsI32;
542       } break;
543       }
544       constexpr SizeT MaxSrcs = 1;
545       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
546       Call->addArg(Src0);
547       Context.insert(Call);
548       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
549       // call result to the appropriate type as necessary.
550       if (CallDest->getType() != DestTy)
551         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
552       Instr->setDeleted();
553       return;
554     }
555     case InstCast::Trunc: {
556       if (DestTy == SrcTy) {
557         return;
558       }
559       if (!isVectorType(SrcTy)) {
560         return;
561       }
562       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
563       assert(typeElementType(DestTy) == IceType_i1);
564       assert(isVectorIntegerType(SrcTy));
565       return;
566     }
567     case InstCast::Sext:
568     case InstCast::Zext: {
569       if (DestTy == SrcTy) {
570         return;
571       }
572       if (!isVectorType(DestTy)) {
573         return;
574       }
575       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
576       assert(typeElementType(SrcTy) == IceType_i1);
577       assert(isVectorIntegerType(DestTy));
578       return;
579     }
580     }
581     llvm::report_fatal_error("Control flow should never have reached here.");
582   }
583   case Inst::IntrinsicCall: {
584     auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
585     Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
586     if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
587       Operand *Src0 = IntrinsicCall->getArg(0);
588       GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32");
589       Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat);
590       GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32");
591       bool BadIntrinsic = false;
592       const Intrinsics::FullIntrinsicInfo *FullInfo =
593           Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic);
594       Intrinsics::IntrinsicInfo Info = FullInfo->Info;
595 
596       Variable *T = Func->makeVariable(IceType_v4f32);
597       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
598       Context.insert<InstAssign>(T, Undef);
599       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
600       VarVecOn32->initVecElement(Func);
601 
602       for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
603         auto *Index = Ctx->getConstantInt32(i);
604         auto *Op = Func->makeVariable(IceType_f32);
605         Context.insert<InstExtractElement>(Op, Src0, Index);
606         auto *Res = Func->makeVariable(IceType_f32);
607         Variable *DestT = Func->makeVariable(IceType_v4f32);
608         auto *Call =
609             Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info);
610         Call->addArg(Op);
611         Context.insert<InstInsertElement>(DestT, T, Res, Index);
612         T = DestT;
613       }
614 
615       Context.insert<InstAssign>(Dest, T);
616 
617       Instr->setDeleted();
618       return;
619     }
620     switch (ID) {
621     default:
622       return;
623     case Intrinsics::AtomicLoad: {
624       if (DestTy != IceType_i64)
625         return;
626       if (!Intrinsics::isMemoryOrderValid(
627               ID, getConstantMemoryOrder(IntrinsicCall->getArg(1)))) {
628         Func->setError("Unexpected memory ordering for AtomicLoad");
629         return;
630       }
631       Operand *Addr = IntrinsicCall->getArg(0);
632       Operand *TargetHelper = Ctx->getConstantExternSym(
633           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
634       static constexpr SizeT MaxArgs = 3;
635       auto *_0 = Ctx->getConstantZero(IceType_i64);
636       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
637                                             NoTailCall, IsTargetHelperCall);
638       Call->addArg(Addr);
639       Call->addArg(_0);
640       Call->addArg(_0);
641       Context.insert<InstMIPS32Sync>();
642       Instr->setDeleted();
643       return;
644     }
645     case Intrinsics::AtomicStore: {
646       Operand *Val = IntrinsicCall->getArg(0);
647       if (Val->getType() != IceType_i64)
648         return;
649       if (!Intrinsics::isMemoryOrderValid(
650               ID, getConstantMemoryOrder(IntrinsicCall->getArg(2)))) {
651         Func->setError("Unexpected memory ordering for AtomicStore");
652         return;
653       }
654       Operand *Addr = IntrinsicCall->getArg(1);
655       Variable *NoDest = nullptr;
656       Operand *TargetHelper = Ctx->getConstantExternSym(
657           Ctx->getGlobalString("__sync_lock_test_and_set_8"));
658       Context.insert<InstMIPS32Sync>();
659       static constexpr SizeT MaxArgs = 2;
660       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
661                                             NoTailCall, IsTargetHelperCall);
662       Call->addArg(Addr);
663       Call->addArg(Val);
664       Context.insert<InstMIPS32Sync>();
665       Instr->setDeleted();
666       return;
667     }
668     case Intrinsics::AtomicCmpxchg: {
669       if (DestTy != IceType_i64)
670         return;
671       if (!Intrinsics::isMemoryOrderValid(
672               ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)),
673               getConstantMemoryOrder(IntrinsicCall->getArg(4)))) {
674         Func->setError("Unexpected memory ordering for AtomicCmpxchg");
675         return;
676       }
677       Operand *Addr = IntrinsicCall->getArg(0);
678       Operand *Oldval = IntrinsicCall->getArg(1);
679       Operand *Newval = IntrinsicCall->getArg(2);
680       Operand *TargetHelper = Ctx->getConstantExternSym(
681           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
682       Context.insert<InstMIPS32Sync>();
683       static constexpr SizeT MaxArgs = 3;
684       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
685                                             NoTailCall, IsTargetHelperCall);
686       Call->addArg(Addr);
687       Call->addArg(Oldval);
688       Call->addArg(Newval);
689       Context.insert<InstMIPS32Sync>();
690       Instr->setDeleted();
691       return;
692     }
693     case Intrinsics::AtomicRMW: {
694       if (DestTy != IceType_i64)
695         return;
696       if (!Intrinsics::isMemoryOrderValid(
697               ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)))) {
698         Func->setError("Unexpected memory ordering for AtomicRMW");
699         return;
700       }
701       auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
702           llvm::cast<ConstantInteger32>(IntrinsicCall->getArg(0))->getValue());
703       auto *Addr = IntrinsicCall->getArg(1);
704       auto *Newval = IntrinsicCall->getArg(2);
705       Operand *TargetHelper;
706       switch (Operation) {
707       case Intrinsics::AtomicAdd:
708         TargetHelper = Ctx->getConstantExternSym(
709             Ctx->getGlobalString("__sync_fetch_and_add_8"));
710         break;
711       case Intrinsics::AtomicSub:
712         TargetHelper = Ctx->getConstantExternSym(
713             Ctx->getGlobalString("__sync_fetch_and_sub_8"));
714         break;
715       case Intrinsics::AtomicOr:
716         TargetHelper = Ctx->getConstantExternSym(
717             Ctx->getGlobalString("__sync_fetch_and_or_8"));
718         break;
719       case Intrinsics::AtomicAnd:
720         TargetHelper = Ctx->getConstantExternSym(
721             Ctx->getGlobalString("__sync_fetch_and_and_8"));
722         break;
723       case Intrinsics::AtomicXor:
724         TargetHelper = Ctx->getConstantExternSym(
725             Ctx->getGlobalString("__sync_fetch_and_xor_8"));
726         break;
727       case Intrinsics::AtomicExchange:
728         TargetHelper = Ctx->getConstantExternSym(
729             Ctx->getGlobalString("__sync_lock_test_and_set_8"));
730         break;
731       default:
732         llvm::report_fatal_error("Unknown AtomicRMW operation");
733         return;
734       }
735       Context.insert<InstMIPS32Sync>();
736       static constexpr SizeT MaxArgs = 2;
737       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738                                             NoTailCall, IsTargetHelperCall);
739       Call->addArg(Addr);
740       Call->addArg(Newval);
741       Context.insert<InstMIPS32Sync>();
742       Instr->setDeleted();
743       return;
744     }
745     case Intrinsics::Ctpop: {
746       Operand *Src0 = IntrinsicCall->getArg(0);
747       Operand *TargetHelper =
748           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
749                                         ? RuntimeHelper::H_call_ctpop_i32
750                                         : RuntimeHelper::H_call_ctpop_i64);
751       static constexpr SizeT MaxArgs = 1;
752       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
753                                             NoTailCall, IsTargetHelperCall);
754       Call->addArg(Src0);
755       Instr->setDeleted();
756       return;
757     }
758     case Intrinsics::Longjmp: {
759       static constexpr SizeT MaxArgs = 2;
760       static constexpr Variable *NoDest = nullptr;
761       Operand *TargetHelper =
762           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
763       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
764                                             NoTailCall, IsTargetHelperCall);
765       Call->addArg(IntrinsicCall->getArg(0));
766       Call->addArg(IntrinsicCall->getArg(1));
767       Instr->setDeleted();
768       return;
769     }
770     case Intrinsics::Memcpy: {
771       static constexpr SizeT MaxArgs = 3;
772       static constexpr Variable *NoDest = nullptr;
773       Operand *TargetHelper =
774           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
775       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
776                                             NoTailCall, IsTargetHelperCall);
777       Call->addArg(IntrinsicCall->getArg(0));
778       Call->addArg(IntrinsicCall->getArg(1));
779       Call->addArg(IntrinsicCall->getArg(2));
780       Instr->setDeleted();
781       return;
782     }
783     case Intrinsics::Memmove: {
784       static constexpr SizeT MaxArgs = 3;
785       static constexpr Variable *NoDest = nullptr;
786       Operand *TargetHelper =
787           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
788       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
789                                             NoTailCall, IsTargetHelperCall);
790       Call->addArg(IntrinsicCall->getArg(0));
791       Call->addArg(IntrinsicCall->getArg(1));
792       Call->addArg(IntrinsicCall->getArg(2));
793       Instr->setDeleted();
794       return;
795     }
796     case Intrinsics::Memset: {
797       Operand *ValOp = IntrinsicCall->getArg(1);
798       assert(ValOp->getType() == IceType_i8);
799       Variable *ValExt = Func->makeVariable(stackSlotType());
800       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
801 
802       static constexpr SizeT MaxArgs = 3;
803       static constexpr Variable *NoDest = nullptr;
804       Operand *TargetHelper =
805           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
806       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
807                                             NoTailCall, IsTargetHelperCall);
808       Call->addArg(IntrinsicCall->getArg(0));
809       Call->addArg(ValExt);
810       Call->addArg(IntrinsicCall->getArg(2));
811       Instr->setDeleted();
812       return;
813     }
814     case Intrinsics::NaClReadTP: {
815       if (SandboxingType == ST_NaCl) {
816         return;
817       }
818       static constexpr SizeT MaxArgs = 0;
819       assert(SandboxingType != ST_Nonsfi);
820       Operand *TargetHelper =
821           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
822       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
823                                IsTargetHelperCall);
824       Instr->setDeleted();
825       return;
826     }
827     case Intrinsics::Setjmp: {
828       static constexpr SizeT MaxArgs = 1;
829       Operand *TargetHelper =
830           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
831       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
832                                             NoTailCall, IsTargetHelperCall);
833       Call->addArg(IntrinsicCall->getArg(0));
834       Instr->setDeleted();
835       return;
836     }
837     }
838     llvm::report_fatal_error("Control flow should never have reached here.");
839   }
840   }
841 }
842 
findMaxStackOutArgsSize()843 void TargetMIPS32::findMaxStackOutArgsSize() {
844   // MinNeededOutArgsBytes should be updated if the Target ever creates a
845   // high-level InstCall that requires more stack bytes.
846   size_t MinNeededOutArgsBytes = 0;
847   if (!MaybeLeafFunc)
848     MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
849   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
850   for (CfgNode *Node : Func->getNodes()) {
851     Context.init(Node);
852     while (!Context.atEnd()) {
853       PostIncrLoweringContext PostIncrement(Context);
854       Inst *CurInstr = iteratorToInst(Context.getCur());
855       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
856         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
857         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
858       }
859     }
860   }
861   CurrentAllocaOffset = MaxOutArgsSizeBytes;
862 }
863 
translateO2()864 void TargetMIPS32::translateO2() {
865   TimerMarker T(TimerStack::TT_O2, Func);
866 
867   // TODO(stichnot): share passes with X86?
868   // https://code.google.com/p/nativeclient/issues/detail?id=4094
869   genTargetHelperCalls();
870 
871   unsetIfNonLeafFunc();
872 
873   findMaxStackOutArgsSize();
874 
875   // Merge Alloca instructions, and lay out the stack.
876   static constexpr bool SortAndCombineAllocas = true;
877   Func->processAllocas(SortAndCombineAllocas);
878   Func->dump("After Alloca processing");
879 
880   if (!getFlags().getEnablePhiEdgeSplit()) {
881     // Lower Phi instructions.
882     Func->placePhiLoads();
883     if (Func->hasError())
884       return;
885     Func->placePhiStores();
886     if (Func->hasError())
887       return;
888     Func->deletePhis();
889     if (Func->hasError())
890       return;
891     Func->dump("After Phi lowering");
892   }
893 
894   // Address mode optimization.
895   Func->getVMetadata()->init(VMK_SingleDefs);
896   Func->doAddressOpt();
897 
898   // Argument lowering
899   Func->doArgLowering();
900 
901   // Target lowering. This requires liveness analysis for some parts of the
902   // lowering decisions, such as compare/branch fusing. If non-lightweight
903   // liveness analysis is used, the instructions need to be renumbered first.
904   // TODO: This renumbering should only be necessary if we're actually
905   // calculating live intervals, which we only do for register allocation.
906   Func->renumberInstructions();
907   if (Func->hasError())
908     return;
909 
910   // TODO: It should be sufficient to use the fastest liveness calculation,
911   // i.e. livenessLightweight(). However, for some reason that slows down the
912   // rest of the translation. Investigate.
913   Func->liveness(Liveness_Basic);
914   if (Func->hasError())
915     return;
916   Func->dump("After MIPS32 address mode opt");
917 
918   Func->genCode();
919   if (Func->hasError())
920     return;
921   Func->dump("After MIPS32 codegen");
922 
923   // Register allocation. This requires instruction renumbering and full
924   // liveness analysis.
925   Func->renumberInstructions();
926   if (Func->hasError())
927     return;
928   Func->liveness(Liveness_Intervals);
929   if (Func->hasError())
930     return;
931   // The post-codegen dump is done here, after liveness analysis and associated
932   // cleanup, to make the dump cleaner and more useful.
933   Func->dump("After initial MIPS32 codegen");
934   // Validate the live range computations. The expensive validation call is
935   // deliberately only made when assertions are enabled.
936   assert(Func->validateLiveness());
937   Func->getVMetadata()->init(VMK_All);
938   regAlloc(RAK_Global);
939   if (Func->hasError())
940     return;
941   Func->dump("After linear scan regalloc");
942 
943   if (getFlags().getEnablePhiEdgeSplit()) {
944     Func->advancedPhiLowering();
945     Func->dump("After advanced Phi lowering");
946   }
947 
948   // Stack frame mapping.
949   Func->genFrame();
950   if (Func->hasError())
951     return;
952   Func->dump("After stack frame mapping");
953 
954   postLowerLegalization();
955   if (Func->hasError())
956     return;
957   Func->dump("After postLowerLegalization");
958 
959   Func->contractEmptyNodes();
960   Func->reorderNodes();
961 
962   // Branch optimization. This needs to be done just before code emission. In
963   // particular, no transformations that insert or reorder CfgNodes should be
964   // done after branch optimization. We go ahead and do it before nop insertion
965   // to reduce the amount of work needed for searching for opportunities.
966   Func->doBranchOpt();
967   Func->dump("After branch optimization");
968 
969   // Nop insertion
970   if (getFlags().getShouldDoNopInsertion()) {
971     Func->doNopInsertion();
972   }
973 }
974 
translateOm1()975 void TargetMIPS32::translateOm1() {
976   TimerMarker T(TimerStack::TT_Om1, Func);
977 
978   // TODO: share passes with X86?
979   genTargetHelperCalls();
980 
981   unsetIfNonLeafFunc();
982 
983   findMaxStackOutArgsSize();
984 
985   // Do not merge Alloca instructions, and lay out the stack.
986   static constexpr bool SortAndCombineAllocas = false;
987   Func->processAllocas(SortAndCombineAllocas);
988   Func->dump("After Alloca processing");
989 
990   Func->placePhiLoads();
991   if (Func->hasError())
992     return;
993   Func->placePhiStores();
994   if (Func->hasError())
995     return;
996   Func->deletePhis();
997   if (Func->hasError())
998     return;
999   Func->dump("After Phi lowering");
1000 
1001   Func->doArgLowering();
1002 
1003   Func->genCode();
1004   if (Func->hasError())
1005     return;
1006   Func->dump("After initial MIPS32 codegen");
1007 
1008   regAlloc(RAK_InfOnly);
1009   if (Func->hasError())
1010     return;
1011   Func->dump("After regalloc of infinite-weight variables");
1012 
1013   Func->genFrame();
1014   if (Func->hasError())
1015     return;
1016   Func->dump("After stack frame mapping");
1017 
1018   postLowerLegalization();
1019   if (Func->hasError())
1020     return;
1021   Func->dump("After postLowerLegalization");
1022 
1023   // Nop insertion
1024   if (getFlags().getShouldDoNopInsertion()) {
1025     Func->doNopInsertion();
1026   }
1027 }
1028 
doBranchOpt(Inst * Instr,const CfgNode * NextNode)1029 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
1030   if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
1031     return Br->optimizeBranch(NextNode);
1032   }
1033   return false;
1034 }
1035 
1036 namespace {
1037 
1038 const char *RegNames[RegMIPS32::Reg_NUM] = {
1039 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
1040           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
1041   name,
1042     REGMIPS32_TABLE
1043 #undef X
1044 };
1045 
1046 } // end of anonymous namespace
1047 
getRegName(RegNumT RegNum)1048 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1049   RegNum.assertIsValid();
1050   return RegNames[RegNum];
1051 }
1052 
getRegName(RegNumT RegNum,Type Ty) const1053 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1054   (void)Ty;
1055   return RegMIPS32::getRegName(RegNum);
1056 }
1057 
getPhysicalRegister(RegNumT RegNum,Type Ty)1058 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1059   if (Ty == IceType_void)
1060     Ty = IceType_i32;
1061   if (PhysicalRegisters[Ty].empty())
1062     PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1063   RegNum.assertIsValid();
1064   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1065   if (Reg == nullptr) {
1066     Reg = Func->makeVariable(Ty);
1067     Reg->setRegNum(RegNum);
1068     PhysicalRegisters[Ty][RegNum] = Reg;
1069     // Specially mark a named physical register as an "argument" so that it is
1070     // considered live upon function entry.  Otherwise it's possible to get
1071     // liveness validation errors for saving callee-save registers.
1072     Func->addImplicitArg(Reg);
1073     // Don't bother tracking the live range of a named physical register.
1074     Reg->setIgnoreLiveness();
1075   }
1076   return Reg;
1077 }
1078 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1079 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1080                                  const InstJumpTable *JumpTable) const {
1081   (void)Func;
1082   (void)JumpTable;
1083   UnimplementedError(getFlags());
1084 }
1085 
1086 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1087 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1088   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1089 }
1090 
1091 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1092 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1093   (void)RegNum;
1094   Type Ty = From->getType();
1095   if (llvm::isa<ConstantUndef>(From)) {
1096     // Lower undefs to zero.  Another option is to lower undefs to an
1097     // uninitialized register; however, using an uninitialized register
1098     // results in less predictable code.
1099     //
1100     // If in the future the implementation is changed to lower undef
1101     // values to uninitialized registers, a FakeDef will be needed:
1102     //     Context.insert(InstFakeDef::create(Func, Reg));
1103     // This is in order to ensure that the live range of Reg is not
1104     // overestimated.  If the constant being lowered is a 64 bit value,
1105     // then the result should be split and the lo and hi components will
1106     // need to go in uninitialized registers.
1107     if (isVectorType(Ty)) {
1108       Variable *Var = makeReg(Ty, RegNum);
1109       auto *Reg = llvm::cast<VariableVecOn32>(Var);
1110       Reg->initVecElement(Func);
1111       auto *Zero = getZero();
1112       for (Variable *Var : Reg->getContainers()) {
1113         _mov(Var, Zero);
1114       }
1115       return Reg;
1116     }
1117     return Ctx->getConstantZero(Ty);
1118   }
1119   return From;
1120 }
1121 
makeReg(Type Type,RegNumT RegNum)1122 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1123   // There aren't any 64-bit integer registers for Mips32.
1124   assert(Type != IceType_i64);
1125   Variable *Reg = Func->makeVariable(Type);
1126   if (RegNum.hasValue())
1127     Reg->setRegNum(RegNum);
1128   else
1129     Reg->setMustHaveReg();
1130   return Reg;
1131 }
1132 
formMemoryOperand(Operand * Operand,Type Ty)1133 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1134   // It may be the case that address mode optimization already creates an
1135   // OperandMIPS32Mem, so in that case it wouldn't need another level of
1136   // transformation.
1137   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1138     return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1139   }
1140 
1141   // If we didn't do address mode optimization, then we only have a base/offset
1142   // to work with. MIPS always requires a base register, so just use that to
1143   // hold the operand.
1144   auto *Base = llvm::cast<Variable>(
1145       legalize(Operand, Legal_Reg | Legal_Rematerializable));
1146   const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1147   return OperandMIPS32Mem::create(
1148       Func, Ty, Base,
1149       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1150 }
1151 
emitVariable(const Variable * Var) const1152 void TargetMIPS32::emitVariable(const Variable *Var) const {
1153   if (!BuildDefs::dump())
1154     return;
1155   Ostream &Str = Ctx->getStrEmit();
1156   const Type FrameSPTy = IceType_i32;
1157   if (Var->hasReg()) {
1158     Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1159     return;
1160   }
1161   if (Var->mustHaveReg()) {
1162     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1163                              ") has no register assigned - function " +
1164                              Func->getFunctionName());
1165   }
1166   const int32_t Offset = Var->getStackOffset();
1167   Str << Offset;
1168   Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1169   Str << ")";
1170 }
1171 
CallingConv()1172 TargetMIPS32::CallingConv::CallingConv()
1173     : GPRegsUsed(RegMIPS32::Reg_NUM),
1174       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1175       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1176       VFPRegsUsed(RegMIPS32::Reg_NUM),
1177       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1178       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1179 
1180 // In MIPS O32 abi FP argument registers can be used only if first argument is
1181 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1182 // registers can be used only for first 2 arguments, so we require argument
1183 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1184 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1185                                          RegNumT *Reg) {
1186   if (isScalarIntegerType(Ty) || isVectorType(Ty))
1187     return argInGPR(Ty, Reg);
1188   if (isScalarFloatingType(Ty)) {
1189     if (ArgNo == 0) {
1190       UseFPRegs = true;
1191       return argInVFP(Ty, Reg);
1192     }
1193     if (UseFPRegs && ArgNo == 1) {
1194       UseFPRegs = false;
1195       return argInVFP(Ty, Reg);
1196     }
1197     return argInGPR(Ty, Reg);
1198   }
1199   llvm::report_fatal_error("argInReg: Invalid type.");
1200   return false;
1201 }
1202 
argInGPR(Type Ty,RegNumT * Reg)1203 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1204   CfgVector<RegNumT> *Source;
1205 
1206   switch (Ty) {
1207   default: {
1208     llvm::report_fatal_error("argInGPR: Invalid type.");
1209     return false;
1210   } break;
1211   case IceType_v4i1:
1212   case IceType_v8i1:
1213   case IceType_v16i1:
1214   case IceType_v16i8:
1215   case IceType_v8i16:
1216   case IceType_v4i32:
1217   case IceType_v4f32:
1218   case IceType_i32:
1219   case IceType_f32: {
1220     Source = &GPRArgs;
1221   } break;
1222   case IceType_i64:
1223   case IceType_f64: {
1224     Source = &I64Args;
1225   } break;
1226   }
1227 
1228   discardUnavailableGPRsAndTheirAliases(Source);
1229 
1230   // If $4 is used for any scalar type (or returining v4f32) then the next
1231   // vector type if passed in $6:$7:stack:stack
1232   if (isVectorType(Ty)) {
1233     alignGPR(Source);
1234   }
1235 
1236   if (Source->empty()) {
1237     GPRegsUsed.set();
1238     return false;
1239   }
1240 
1241   *Reg = Source->back();
1242   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1243   // we mark all of Reg's aliases as Used. So, for the next argument,
1244   // Source->back() is marked as unavailable, and it is thus implicitly popped
1245   // from the stack.
1246   GPRegsUsed |= RegisterAliases[*Reg];
1247 
1248   // All vector arguments irrespective of their base type are passed in GP
1249   // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1250   // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1251   // $4:$5:$6:$7 otherwise discard $6:$7 only.
1252   if (isVectorType(Ty)) {
1253     if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1254       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1255       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1256       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1257     } else {
1258       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1259     }
1260   }
1261 
1262   return true;
1263 }
1264 
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1265 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1266     CfgVector<RegNumT> *Regs) {
1267   GPRegsUsed |= RegisterAliases[Regs->back()];
1268   Regs->pop_back();
1269 }
1270 
alignGPR(CfgVector<RegNumT> * Regs)1271 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1272   if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1273     discardNextGPRAndItsAliases(Regs);
1274 }
1275 
1276 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1277 // i32) will have the first argument in a0, the second in a2-a3, and the third
1278 // on the stack. To model this behavior, whenever we pop a register from Regs,
1279 // we remove all of its aliases from the pool of available GPRs. This has the
1280 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1281 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1282     CfgVector<RegNumT> *Regs) {
1283   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1284     discardNextGPRAndItsAliases(Regs);
1285   }
1286 }
1287 
argInVFP(Type Ty,RegNumT * Reg)1288 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1289   CfgVector<RegNumT> *Source;
1290 
1291   switch (Ty) {
1292   default: {
1293     llvm::report_fatal_error("argInVFP: Invalid type.");
1294     return false;
1295   } break;
1296   case IceType_f32: {
1297     Source = &FP32Args;
1298   } break;
1299   case IceType_f64: {
1300     Source = &FP64Args;
1301   } break;
1302   }
1303 
1304   discardUnavailableVFPRegsAndTheirAliases(Source);
1305 
1306   if (Source->empty()) {
1307     VFPRegsUsed.set();
1308     return false;
1309   }
1310 
1311   *Reg = Source->back();
1312   VFPRegsUsed |= RegisterAliases[*Reg];
1313 
1314   // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1315   // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1316   // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1317   // in reg_a3 and a0, a1 are not used.
1318   Source = &GPRArgs;
1319   // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1320   if (Ty == IceType_f64) {
1321     // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1322     // must be aligned at even register. Similarly when we discard GPR registers
1323     // when some arguments from starting 16 bytes goes in FPR, we must take care
1324     // of alignment. For example if fun args are (f32, f64, f32), for first f32
1325     // we discard a0, now for f64 argument, which will go in F14F15, we must
1326     // first align GPR vector to even register by discarding a1, then discard
1327     // two GPRs a2 and a3. Now last f32 argument will go on stack.
1328     alignGPR(Source);
1329     discardNextGPRAndItsAliases(Source);
1330   }
1331   discardNextGPRAndItsAliases(Source);
1332   return true;
1333 }
1334 
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1335 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1336     CfgVector<RegNumT> *Regs) {
1337   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1338     Regs->pop_back();
1339   }
1340 }
1341 
lowerArguments()1342 void TargetMIPS32::lowerArguments() {
1343   VarList &Args = Func->getArgs();
1344   TargetMIPS32::CallingConv CC;
1345 
1346   // For each register argument, replace Arg in the argument list with the home
1347   // register. Then generate an instruction in the prolog to copy the home
1348   // register to the assigned location of Arg.
1349   Context.init(Func->getEntryNode());
1350   Context.setInsertPoint(Context.getCur());
1351 
1352   // v4f32 is returned through stack. $4 is setup by the caller and passed as
1353   // first argument implicitly. Callee then copies the return vector at $4.
1354   Variable *ImplicitRetVec = nullptr;
1355   if (isVectorFloatingType(Func->getReturnType())) {
1356     ImplicitRetVec = Func->makeVariable(IceType_i32);
1357     ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1358     ImplicitRetVec->setIsArg();
1359     Args.insert(Args.begin(), ImplicitRetVec);
1360     setImplicitRet(ImplicitRetVec);
1361   }
1362 
1363   for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1364     Variable *Arg = Args[i];
1365     Type Ty = Arg->getType();
1366     RegNumT RegNum;
1367     if (!CC.argInReg(Ty, i, &RegNum)) {
1368       continue;
1369     }
1370     Variable *RegisterArg = Func->makeVariable(Ty);
1371     if (BuildDefs::dump()) {
1372       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1373     }
1374     RegisterArg->setIsArg();
1375     Arg->setIsArg(false);
1376     Args[i] = RegisterArg;
1377 
1378     if (isVectorType(Ty)) {
1379       auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1380       RegisterArgVec->initVecElement(Func);
1381       RegisterArgVec->getContainers()[0]->setRegNum(
1382           RegNumT::fixme((unsigned)RegNum + 0));
1383       RegisterArgVec->getContainers()[1]->setRegNum(
1384           RegNumT::fixme((unsigned)RegNum + 1));
1385       // First two elements of second vector argument are passed
1386       // in $6:$7 and remaining two on stack. Do not assign register
1387       // to this is second vector argument.
1388       if (i == 0) {
1389         RegisterArgVec->getContainers()[2]->setRegNum(
1390             RegNumT::fixme((unsigned)RegNum + 2));
1391         RegisterArgVec->getContainers()[3]->setRegNum(
1392             RegNumT::fixme((unsigned)RegNum + 3));
1393       } else {
1394         RegisterArgVec->getContainers()[2]->setRegNum(
1395             RegNumT::fixme(RegNumT()));
1396         RegisterArgVec->getContainers()[3]->setRegNum(
1397             RegNumT::fixme(RegNumT()));
1398       }
1399     } else {
1400       switch (Ty) {
1401       default: { RegisterArg->setRegNum(RegNum); } break;
1402       case IceType_i64: {
1403         auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1404         RegisterArg64->initHiLo(Func);
1405         RegisterArg64->getLo()->setRegNum(
1406             RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1407         RegisterArg64->getHi()->setRegNum(
1408             RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1409       } break;
1410       }
1411     }
1412     Context.insert<InstAssign>(Arg, RegisterArg);
1413   }
1414 
1415   // Insert fake use of ImplicitRet_v4f32 to keep it live
1416   if (ImplicitRetVec) {
1417     for (CfgNode *Node : Func->getNodes()) {
1418       for (Inst &Instr : Node->getInsts()) {
1419         if (llvm::isa<InstRet>(&Instr)) {
1420           Context.setInsertPoint(instToIterator(&Instr));
1421           Context.insert<InstFakeUse>(ImplicitRetVec);
1422           break;
1423         }
1424       }
1425     }
1426   }
1427 }
1428 
stackSlotType()1429 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1430 
1431 // Helper function for addProlog().
1432 //
1433 // This assumes Arg is an argument passed on the stack. This sets the frame
1434 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1435 // I64 arg that has been split into Lo and Hi components, it calls itself
1436 // recursively on the components, taking care to handle Lo first because of the
1437 // little-endian architecture. Lastly, this function generates an instruction
1438 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1439 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1440                                           Variable *FramePtr,
1441                                           size_t BasicFrameOffset,
1442                                           size_t *InArgsSizeBytes) {
1443   const Type Ty = Arg->getType();
1444   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1445 
1446   // If $4 is used for any scalar type (or returining v4f32) then the next
1447   // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1448   // from agument stack.
1449   if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1450     if (PartialOnStack == false) {
1451       auto *Elem0 = ArgVecOn32->getContainers()[0];
1452       auto *Elem1 = ArgVecOn32->getContainers()[1];
1453       finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1454                              InArgsSizeBytes);
1455       finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1456                              InArgsSizeBytes);
1457     }
1458     auto *Elem2 = ArgVecOn32->getContainers()[2];
1459     auto *Elem3 = ArgVecOn32->getContainers()[3];
1460     finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1461                            InArgsSizeBytes);
1462     finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1463                            InArgsSizeBytes);
1464     return;
1465   }
1466 
1467   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1468     Variable *const Lo = Arg64On32->getLo();
1469     Variable *const Hi = Arg64On32->getHi();
1470     finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1471                            InArgsSizeBytes);
1472     finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1473                            InArgsSizeBytes);
1474     return;
1475   }
1476 
1477   assert(Ty != IceType_i64);
1478   assert(!isVectorType(Ty));
1479 
1480   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1481   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1482 
1483   if (!Arg->hasReg()) {
1484     Arg->setStackOffset(ArgStackOffset);
1485     return;
1486   }
1487 
1488   // If the argument variable has been assigned a register, we need to copy the
1489   // value from the stack slot.
1490   Variable *Parameter = Func->makeVariable(Ty);
1491   Parameter->setMustNotHaveReg();
1492   Parameter->setStackOffset(ArgStackOffset);
1493   _mov(Arg, Parameter);
1494 }
1495 
addProlog(CfgNode * Node)1496 void TargetMIPS32::addProlog(CfgNode *Node) {
1497   // Stack frame layout:
1498   //
1499   // +------------------------+
1500   // | 1. preserved registers |
1501   // +------------------------+
1502   // | 2. padding             |
1503   // +------------------------+
1504   // | 3. global spill area   |
1505   // +------------------------+
1506   // | 4. padding             |
1507   // +------------------------+
1508   // | 5. local spill area    |
1509   // +------------------------+
1510   // | 6. padding             |
1511   // +------------------------+
1512   // | 7. allocas             |
1513   // +------------------------+
1514   // | 8. padding             |
1515   // +------------------------+
1516   // | 9. out args            |
1517   // +------------------------+ <--- StackPointer
1518   //
1519   // The following variables record the size in bytes of the given areas:
1520   //  * PreservedRegsSizeBytes: area 1
1521   //  * SpillAreaPaddingBytes:  area 2
1522   //  * GlobalsSize:            area 3
1523   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1524   //  * LocalsSpillAreaSize:    area 5
1525   //  * SpillAreaSizeBytes:     areas 2 - 9
1526   //  * maxOutArgsSizeBytes():  area 9
1527 
1528   Context.init(Node);
1529   Context.setInsertPoint(Context.getCur());
1530 
1531   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1532   RegsUsed = SmallBitVector(CalleeSaves.size());
1533 
1534   VarList SortedSpilledVariables;
1535 
1536   size_t GlobalsSize = 0;
1537   // If there is a separate locals area, this represents that area. Otherwise
1538   // it counts any variable not counted by GlobalsSize.
1539   SpillAreaSizeBytes = 0;
1540   // If there is a separate locals area, this specifies the alignment for it.
1541   uint32_t LocalsSlotsAlignmentBytes = 0;
1542   // The entire spill locations area gets aligned to largest natural alignment
1543   // of the variables that have a spill slot.
1544   uint32_t SpillAreaAlignmentBytes = 0;
1545   // For now, we don't have target-specific variables that need special
1546   // treatment (no stack-slot-linked SpillVariable type).
1547   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1548     static constexpr bool AssignStackSlot = false;
1549     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1550     if (llvm::isa<Variable64On32>(Var)) {
1551       return DontAssignStackSlot;
1552     }
1553     return AssignStackSlot;
1554   };
1555 
1556   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1557   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1558                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1559                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1560   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1561   SpillAreaSizeBytes += GlobalsSize;
1562 
1563   PreservedGPRs.reserve(CalleeSaves.size());
1564 
1565   // Consider FP and RA as callee-save / used as needed.
1566   if (UsesFramePointer) {
1567     if (RegsUsed[RegMIPS32::Reg_FP]) {
1568       llvm::report_fatal_error("Frame pointer has been used.");
1569     }
1570     CalleeSaves[RegMIPS32::Reg_FP] = true;
1571     RegsUsed[RegMIPS32::Reg_FP] = true;
1572   }
1573   if (!MaybeLeafFunc) {
1574     CalleeSaves[RegMIPS32::Reg_RA] = true;
1575     RegsUsed[RegMIPS32::Reg_RA] = true;
1576   }
1577 
1578   // Make two passes over the used registers. The first pass records all the
1579   // used registers -- and their aliases. Then, we figure out which GPR
1580   // registers should be saved.
1581   SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1582   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1583     if (CalleeSaves[i] && RegsUsed[i]) {
1584       ToPreserve |= RegisterAliases[i];
1585     }
1586   }
1587 
1588   uint32_t NumCallee = 0;
1589 
1590   // RegClasses is a tuple of
1591   //
1592   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1593   //
1594   // We use this tuple to figure out which register we should save/restore
1595   // during
1596   // prolog/epilog.
1597   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1598   const RegClassType RegClass = RegClassType(
1599       RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1600   const uint32_t FirstRegInClass = std::get<0>(RegClass);
1601   const uint32_t LastRegInClass = std::get<1>(RegClass);
1602   VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1603   for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1604     if (!ToPreserve[Reg]) {
1605       continue;
1606     }
1607     ++NumCallee;
1608     Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1609     PreservedRegsSizeBytes +=
1610         typeWidthInBytesOnStack(PhysicalRegister->getType());
1611     PreservedRegsInClass->push_back(PhysicalRegister);
1612   }
1613 
1614   Ctx->statsUpdateRegistersSaved(NumCallee);
1615 
1616   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1617   // after the preserved registers and before the spill areas.
1618   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1619   // locals area if they are separate.
1620   assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1621   (void)MIPS32_STACK_ALIGNMENT_BYTES;
1622   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1623   uint32_t SpillAreaPaddingBytes = 0;
1624   uint32_t LocalsSlotsPaddingBytes = 0;
1625   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1626                        GlobalsSize, LocalsSlotsAlignmentBytes,
1627                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1628   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1629   uint32_t GlobalsAndSubsequentPaddingSize =
1630       GlobalsSize + LocalsSlotsPaddingBytes;
1631 
1632   // Adds the out args space to the stack, and align SP if necessary.
1633   if (!NeedsStackAlignment) {
1634     SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1635   } else {
1636     SpillAreaSizeBytes = applyStackAlignment(
1637         SpillAreaSizeBytes +
1638         (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1639   }
1640 
1641   // Combine fixed alloca with SpillAreaSize.
1642   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1643 
1644   TotalStackSizeBytes =
1645       applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1646 
1647   // Generate "addiu sp, sp, -TotalStackSizeBytes"
1648   if (TotalStackSizeBytes) {
1649     // Use the scratch register if needed to legalize the immediate.
1650     Sandboxer(this).addiu_sp(-TotalStackSizeBytes);
1651   }
1652 
1653   Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1654 
1655   if (!PreservedGPRs.empty()) {
1656     uint32_t StackOffset = TotalStackSizeBytes;
1657     for (Variable *Var : *PreservedRegsInClass) {
1658       Type RegType;
1659       if (RegMIPS32::isFPRReg(Var->getRegNum()))
1660         RegType = IceType_f32;
1661       else
1662         RegType = IceType_i32;
1663       auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1664       StackOffset -= typeWidthInBytesOnStack(RegType);
1665       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1666       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1667           Func, RegType, SP,
1668           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1669       Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
1670     }
1671   }
1672 
1673   Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1674 
1675   // Generate "mov FP, SP" if needed.
1676   if (UsesFramePointer) {
1677     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1678     _mov(FP, SP);
1679     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1680     Context.insert<InstFakeUse>(FP);
1681   }
1682 
1683   // Fill in stack offsets for stack args, and copy args into registers for
1684   // those that were register-allocated. Args are pushed right to left, so
1685   // Arg[0] is closest to the stack/frame pointer.
1686   const VarList &Args = Func->getArgs();
1687   size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1688   TargetMIPS32::CallingConv CC;
1689   uint32_t ArgNo = 0;
1690 
1691   for (Variable *Arg : Args) {
1692     RegNumT DummyReg;
1693     const Type Ty = Arg->getType();
1694     bool PartialOnStack;
1695     // Skip arguments passed in registers.
1696     if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1697       // Load argument from stack:
1698       // 1. If this is first vector argument and return type is v4f32.
1699       //    In this case $4 is used to pass stack address implicitly.
1700       //    3rd and 4th element of vector argument is passed through stack.
1701       // 2. If this is second vector argument.
1702       if (ArgNo != 0 && isVectorType(Ty)) {
1703         PartialOnStack = true;
1704         finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1705                                &InArgsSizeBytes);
1706       }
1707     } else {
1708       PartialOnStack = false;
1709       finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1710                              &InArgsSizeBytes);
1711     }
1712     ++ArgNo;
1713   }
1714 
1715   // Fill in stack offsets for locals.
1716   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1717                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1718   this->HasComputedFrame = true;
1719 
1720   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1721     OstreamLocker _(Func->getContext());
1722     Ostream &Str = Func->getContext()->getStrDump();
1723 
1724     Str << "Stack layout:\n";
1725     uint32_t SPAdjustmentPaddingSize =
1726         SpillAreaSizeBytes - LocalsSpillAreaSize -
1727         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1728         MaxOutArgsSizeBytes;
1729     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1730         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1731         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1732         << " globals spill area = " << GlobalsSize << " bytes\n"
1733         << " globals-locals spill areas intermediate padding = "
1734         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1735         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1736         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1737 
1738     Str << "Stack details:\n"
1739         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1740         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1741         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1742         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1743         << " bytes\n"
1744         << " is FP based = " << 1 << "\n";
1745   }
1746   return;
1747 }
1748 
addEpilog(CfgNode * Node)1749 void TargetMIPS32::addEpilog(CfgNode *Node) {
1750   InstList &Insts = Node->getInsts();
1751   InstList::reverse_iterator RI, E;
1752   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1753     if (llvm::isa<InstMIPS32Ret>(*RI))
1754       break;
1755   }
1756   if (RI == E)
1757     return;
1758 
1759   // Convert the reverse_iterator position into its corresponding (forward)
1760   // iterator position.
1761   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1762   --InsertPoint;
1763   Context.init(Node);
1764   Context.setInsertPoint(InsertPoint);
1765 
1766   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1767   if (UsesFramePointer) {
1768     Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1769     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1770     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1771     // from being dead-code eliminated.
1772     Context.insert<InstFakeUse>(SP);
1773     Sandboxer(this).reset_sp(FP);
1774   }
1775 
1776   VarList::reverse_iterator RIter, END;
1777 
1778   if (!PreservedGPRs.empty()) {
1779     uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1780     for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1781          RIter != END; ++RIter) {
1782       Type RegType;
1783       if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1784         RegType = IceType_f32;
1785       else
1786         RegType = IceType_i32;
1787       auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1788       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1789       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1790           Func, RegType, SP,
1791           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1792       _lw(PhysicalRegister, MemoryLocation);
1793       StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1794     }
1795   }
1796 
1797   if (TotalStackSizeBytes) {
1798     Sandboxer(this).addiu_sp(TotalStackSizeBytes);
1799   }
1800   if (!getFlags().getUseSandboxing())
1801     return;
1802 
1803   Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA);
1804   Variable *RetValue = nullptr;
1805   if (RI->getSrcSize())
1806     RetValue = llvm::cast<Variable>(RI->getSrc(0));
1807 
1808   Sandboxer(this).ret(RA, RetValue);
1809 
1810   RI->setDeleted();
1811 }
1812 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1813 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1814     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1815   // Legalize will likely need a lui/ori combination, but if the top bits are
1816   // all 0 from negating the offset and subtracting, we could use that instead.
1817   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1818   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1819   if (ShouldSub) {
1820     Target->_addi(ScratchReg, Base, -Offset);
1821   } else {
1822     constexpr bool SignExt = true;
1823     if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1824       const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1825       const uint32_t LowerBits = Offset & 0xFFFF;
1826       Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1827       if (LowerBits)
1828         Target->_ori(ScratchReg, ScratchReg, LowerBits);
1829       Target->_addu(ScratchReg, ScratchReg, Base);
1830     } else {
1831       Target->_addiu(ScratchReg, Base, Offset);
1832     }
1833   }
1834 
1835   return ScratchReg;
1836 }
1837 
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1838 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1839     InstMIPS32MovFP64ToI64 *MovInstr) {
1840   Variable *Dest = MovInstr->getDest();
1841   Operand *Src = MovInstr->getSrc(0);
1842   const Type SrcTy = Src->getType();
1843 
1844   if (Dest != nullptr && SrcTy == IceType_f64) {
1845     int32_t Offset = Dest->getStackOffset();
1846     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1847     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1848         Target->Func, IceType_f32, Base,
1849         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1850     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1851     auto *SrcV = llvm::cast<Variable>(Src);
1852     Variable *SrcR;
1853     if (MovInstr->getInt64Part() == Int64_Lo) {
1854       SrcR = Target->makeReg(
1855           IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1856     } else {
1857       SrcR = Target->makeReg(
1858           IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1859     }
1860     Sandboxer(Target).sw(SrcR, Addr);
1861     if (MovInstr->isDestRedefined()) {
1862       Target->_set_dest_redefined();
1863     }
1864     MovInstr->setDeleted();
1865     return;
1866   }
1867 
1868   llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1869 }
1870 
legalizeMov(InstMIPS32Mov * MovInstr)1871 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1872   Variable *Dest = MovInstr->getDest();
1873   assert(Dest != nullptr);
1874   const Type DestTy = Dest->getType();
1875   assert(DestTy != IceType_i64);
1876 
1877   Operand *Src = MovInstr->getSrc(0);
1878   const Type SrcTy = Src->getType();
1879   (void)SrcTy;
1880   assert(SrcTy != IceType_i64);
1881 
1882   bool Legalized = false;
1883   auto *SrcR = llvm::cast<Variable>(Src);
1884   if (Dest->hasReg() && SrcR->hasReg()) {
1885     // This might be a GP to/from FP move generated due to argument passing.
1886     // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1887     // different types.
1888     const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1889     const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1890     const RegNumT SRegNum = SrcR->getRegNum();
1891     const RegNumT DRegNum = Dest->getRegNum();
1892     if (IsDstGPR != IsSrcGPR) {
1893       if (IsDstGPR) {
1894         // Dest is GPR and SrcR is FPR. Use mfc1.
1895         int32_t TypeWidth = typeWidthInBytes(DestTy);
1896         if (MovInstr->getDestHi() != nullptr)
1897           TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1898         if (TypeWidth == 8) {
1899           // Split it into two mfc1 instructions
1900           Variable *SrcGPRHi = Target->makeReg(
1901               IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1902           Variable *SrcGPRLo = Target->makeReg(
1903               IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1904           Variable *DstFPRHi, *DstFPRLo;
1905           if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1906             DstFPRHi = Target->makeReg(IceType_i32,
1907                                        MovInstr->getDestHi()->getRegNum());
1908             DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1909           } else {
1910             DstFPRHi = Target->makeReg(
1911                 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1912             DstFPRLo = Target->makeReg(
1913                 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1914           }
1915           Target->_mov(DstFPRHi, SrcGPRHi);
1916           Target->_mov(DstFPRLo, SrcGPRLo);
1917           Legalized = true;
1918         } else {
1919           Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1920           Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1921           Target->_mov(DstFPR, SrcGPR);
1922           Legalized = true;
1923         }
1924       } else {
1925         // Dest is FPR and SrcR is GPR. Use mtc1.
1926         if (typeWidthInBytes(Dest->getType()) == 8) {
1927           Variable *SrcGPRHi, *SrcGPRLo;
1928           // SrcR could be $zero which is i32
1929           if (SRegNum == RegMIPS32::Reg_ZERO) {
1930             SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1931             SrcGPRLo = SrcGPRHi;
1932           } else {
1933             // Split it into two mtc1 instructions
1934             if (MovInstr->getSrcSize() == 2) {
1935               const auto FirstReg =
1936                   (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1937               const auto SecondReg =
1938                   (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1939               SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1940               SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1941             } else {
1942               SrcGPRLo = Target->makeReg(
1943                   IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1944               SrcGPRHi = Target->makeReg(
1945                   IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1946             }
1947           }
1948           Variable *DstFPRHi = Target->makeReg(
1949               IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1950           Variable *DstFPRLo = Target->makeReg(
1951               IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1952           Target->_mov(DstFPRHi, SrcGPRLo);
1953           Target->_mov(DstFPRLo, SrcGPRHi);
1954           Legalized = true;
1955         } else {
1956           Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1957           Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1958           Target->_mov(DstFPR, SrcGPR);
1959           Legalized = true;
1960         }
1961       }
1962     }
1963     if (Legalized) {
1964       if (MovInstr->isDestRedefined()) {
1965         Target->_set_dest_redefined();
1966       }
1967       MovInstr->setDeleted();
1968       return;
1969     }
1970   }
1971 
1972   if (!Dest->hasReg()) {
1973     auto *SrcR = llvm::cast<Variable>(Src);
1974     assert(SrcR->hasReg());
1975     assert(!SrcR->isRematerializable());
1976     int32_t Offset = Dest->getStackOffset();
1977 
1978     // This is a _mov(Mem(), Variable), i.e., a store.
1979     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1980 
1981     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1982         Target->Func, DestTy, Base,
1983         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1984     OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1985         Target->Func, DestTy, Base,
1986         llvm::cast<ConstantInteger32>(
1987             Target->Ctx->getConstantInt32(Offset + 4)));
1988     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1989 
1990     // FP arguments are passed in GP reg if first argument is in GP. In this
1991     // case type of the SrcR is still FP thus we need to explicitly generate sw
1992     // instead of swc1.
1993     const RegNumT RegNum = SrcR->getRegNum();
1994     const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1995     if (SrcTy == IceType_f32 && IsSrcGPReg) {
1996       Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1997       Sandboxer(Target).sw(SrcGPR, Addr);
1998     } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
1999       Variable *SrcGPRHi =
2000           Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2001       Variable *SrcGPRLo = Target->makeReg(
2002           IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2003       Sandboxer(Target).sw(SrcGPRHi, Addr);
2004       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2005       Sandboxer(Target).sw(SrcGPRLo, AddrHi);
2006     } else if (DestTy == IceType_f64 && IsSrcGPReg) {
2007       const auto FirstReg =
2008           (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2009       const auto SecondReg =
2010           (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2011       Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
2012       Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
2013       Sandboxer(Target).sw(SrcGPRLo, Addr);
2014       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2015       Sandboxer(Target).sw(SrcGPRHi, AddrHi);
2016     } else {
2017       Sandboxer(Target).sw(SrcR, Addr);
2018     }
2019 
2020     Target->Context.insert<InstFakeDef>(Dest);
2021     Legalized = true;
2022   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
2023     if (Var->isRematerializable()) {
2024       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
2025 
2026       // ExtraOffset is only needed for stack-pointer based frames as we have
2027       // to account for spill storage.
2028       const int32_t ExtraOffset =
2029           (Var->getRegNum() == Target->getFrameOrStackReg())
2030               ? Target->getFrameFixedAllocaOffset()
2031               : 0;
2032 
2033       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
2034       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
2035       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
2036       Target->_mov(Dest, T);
2037       Legalized = true;
2038     } else {
2039       if (!Var->hasReg()) {
2040         // This is a _mov(Variable, Mem()), i.e., a load.
2041         const int32_t Offset = Var->getStackOffset();
2042         auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2043         const RegNumT RegNum = Dest->getRegNum();
2044         const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2045         // If we are moving i64 to a double using stack then the address may
2046         // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2047         // and store them individually with 4-byte alignment. Load the Hi-Lo
2048         // parts in TmpReg and move them to the dest using mtc1.
2049         if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2050             !IsDstGPReg) {
2051           auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2052           const RegNumT RegNum = Dest->getRegNum();
2053           Variable *DestLo = Target->makeReg(
2054               IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2055           Variable *DestHi = Target->makeReg(
2056               IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2057           OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2058               Target->Func, IceType_i32, Base,
2059               llvm::cast<ConstantInteger32>(
2060                   Target->Ctx->getConstantInt32(Offset)));
2061           OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2062               Target->Func, IceType_i32, Base,
2063               llvm::cast<ConstantInteger32>(
2064                   Target->Ctx->getConstantInt32(Offset + 4)));
2065           Sandboxer(Target).lw(Reg, AddrLo);
2066           Target->_mov(DestLo, Reg);
2067           Sandboxer(Target).lw(Reg, AddrHi);
2068           Target->_mov(DestHi, Reg);
2069         } else {
2070           OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2071               Target->Func, DestTy, Base,
2072               llvm::cast<ConstantInteger32>(
2073                   Target->Ctx->getConstantInt32(Offset)));
2074           OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2075           OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2076               Target->Func, DestTy, Base,
2077               llvm::cast<ConstantInteger32>(
2078                   Target->Ctx->getConstantInt32(Offset + 4)));
2079           // FP arguments are passed in GP reg if first argument is in GP.
2080           // In this case type of the Dest is still FP thus we need to
2081           // explicitly generate lw instead of lwc1.
2082           if (DestTy == IceType_f32 && IsDstGPReg) {
2083             Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2084             Sandboxer(Target).lw(DstGPR, Addr);
2085           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2086             Variable *DstGPRHi = Target->makeReg(
2087                 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2088             Variable *DstGPRLo = Target->makeReg(
2089                 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2090             Sandboxer(Target).lw(DstGPRHi, Addr);
2091             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2092             Sandboxer(Target).lw(DstGPRLo, AddrHi);
2093           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2094             const auto FirstReg =
2095                 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2096             const auto SecondReg =
2097                 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2098             Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2099             Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2100             Sandboxer(Target).lw(DstGPRLo, Addr);
2101             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2102             Sandboxer(Target).lw(DstGPRHi, AddrHi);
2103           } else {
2104             Sandboxer(Target).lw(Dest, Addr);
2105           }
2106         }
2107         Legalized = true;
2108       }
2109     }
2110   }
2111 
2112   if (Legalized) {
2113     if (MovInstr->isDestRedefined()) {
2114       Target->_set_dest_redefined();
2115     }
2116     MovInstr->setDeleted();
2117   }
2118 }
2119 
2120 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2121 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2122   if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2123     return nullptr;
2124   }
2125   Variable *Base = Mem->getBase();
2126   auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2127   int32_t Offset = Ci32->getValue();
2128 
2129   if (Base->isRematerializable()) {
2130     const int32_t ExtraOffset =
2131         (Base->getRegNum() == Target->getFrameOrStackReg())
2132             ? Target->getFrameFixedAllocaOffset()
2133             : 0;
2134     Offset += Base->getStackOffset() + ExtraOffset;
2135     Base = Target->getPhysicalRegister(Base->getRegNum());
2136   }
2137 
2138   constexpr bool SignExt = true;
2139   if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2140     Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2141     Offset = 0;
2142   }
2143 
2144   return OperandMIPS32Mem::create(
2145       Target->Func, Mem->getType(), Base,
2146       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2147 }
2148 
legalizeImmediate(int32_t Imm)2149 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2150   Variable *Reg = nullptr;
2151   if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2152         (Imm <= std::numeric_limits<int16_t>::max()))) {
2153     const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2154     const uint32_t LowerBits = Imm & 0xFFFF;
2155     Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2156     Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2157     if (LowerBits) {
2158       Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2159       Target->_ori(Reg, TReg, LowerBits);
2160     } else {
2161       Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2162     }
2163   }
2164   return Reg;
2165 }
2166 
postLowerLegalization()2167 void TargetMIPS32::postLowerLegalization() {
2168   Func->dump("Before postLowerLegalization");
2169   assert(hasComputedFrame());
2170   for (CfgNode *Node : Func->getNodes()) {
2171     Context.init(Node);
2172     PostLoweringLegalizer Legalizer(this);
2173     while (!Context.atEnd()) {
2174       PostIncrLoweringContext PostIncrement(Context);
2175       Inst *CurInstr = iteratorToInst(Context.getCur());
2176       const SizeT NumSrcs = CurInstr->getSrcSize();
2177       Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2178       Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2179       auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2180       auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2181       auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2182       Variable *Dst = CurInstr->getDest();
2183       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2184         Legalizer.legalizeMov(MovInstr);
2185         continue;
2186       }
2187       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2188         Legalizer.legalizeMovFp(MovInstr);
2189         continue;
2190       }
2191       if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2192         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2193           Sandboxer(this).sw(Src0V, LegalMem);
2194           CurInstr->setDeleted();
2195         }
2196         continue;
2197       }
2198       if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2199         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2200           _swc1(Src0V, LegalMem);
2201           CurInstr->setDeleted();
2202         }
2203         continue;
2204       }
2205       if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2206         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2207           _sdc1(Src0V, LegalMem);
2208           CurInstr->setDeleted();
2209         }
2210         continue;
2211       }
2212       if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2213         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2214           Sandboxer(this).lw(Dst, LegalMem);
2215           CurInstr->setDeleted();
2216         }
2217         continue;
2218       }
2219       if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2220         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2221           _lwc1(Dst, LegalMem);
2222           CurInstr->setDeleted();
2223         }
2224         continue;
2225       }
2226       if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2227         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2228           _ldc1(Dst, LegalMem);
2229           CurInstr->setDeleted();
2230         }
2231         continue;
2232       }
2233       if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2234         if (auto *LegalImm = Legalizer.legalizeImmediate(
2235                 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2236           _addu(Dst, Src0V, LegalImm);
2237           CurInstr->setDeleted();
2238         }
2239         continue;
2240       }
2241     }
2242   }
2243 }
2244 
loOperand(Operand * Operand)2245 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2246   assert(Operand->getType() == IceType_i64);
2247   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2248     return Var64On32->getLo();
2249   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2250     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2251   }
2252   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2253     // Conservatively disallow memory operands with side-effects (pre/post
2254     // increment) in case of duplication.
2255     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2256     return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2257                                     Mem->getOffset(), Mem->getAddrMode());
2258   }
2259   llvm_unreachable("Unsupported operand type");
2260   return nullptr;
2261 }
2262 
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2263 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2264                                          uint32_t Index) {
2265   if (!isVectorType(Operand->getType())) {
2266     llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2267     return nullptr;
2268   }
2269 
2270   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2271     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2272     Variable *Base = Mem->getBase();
2273     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2274     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2275     int32_t NextOffsetVal =
2276         Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2277     constexpr bool NoSignExt = false;
2278     if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2279       Constant *_4 = Ctx->getConstantInt32(4);
2280       Variable *NewBase = Func->makeVariable(Base->getType());
2281       lowerArithmetic(
2282           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2283       Base = NewBase;
2284     } else {
2285       Offset =
2286           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2287     }
2288     return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2289                                     Mem->getAddrMode());
2290   }
2291 
2292   if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2293     return VarVecOn32->getContainers()[Index];
2294 
2295   llvm_unreachable("Unsupported operand type");
2296   return nullptr;
2297 }
2298 
hiOperand(Operand * Operand)2299 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2300   assert(Operand->getType() == IceType_i64);
2301   if (Operand->getType() != IceType_i64)
2302     return Operand;
2303   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2304     return Var64On32->getHi();
2305   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2306     return Ctx->getConstantInt32(
2307         static_cast<uint32_t>(Const->getValue() >> 32));
2308   }
2309   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2310     // Conservatively disallow memory operands with side-effects
2311     // in case of duplication.
2312     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2313     const Type SplitType = IceType_i32;
2314     Variable *Base = Mem->getBase();
2315     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2316     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2317     int32_t NextOffsetVal = Offset->getValue() + 4;
2318     constexpr bool SignExt = false;
2319     if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2320       // We have to make a temp variable and add 4 to either Base or Offset.
2321       // If we add 4 to Offset, this will convert a non-RegReg addressing
2322       // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2323       // RegReg addressing modes, prefer adding to base and replacing instead.
2324       // Thus we leave the old offset alone.
2325       Constant *Four = Ctx->getConstantInt32(4);
2326       Variable *NewBase = Func->makeVariable(Base->getType());
2327       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2328                                              Base, Four));
2329       Base = NewBase;
2330     } else {
2331       Offset =
2332           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2333     }
2334     return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2335                                     Mem->getAddrMode());
2336   }
2337   llvm_unreachable("Unsupported operand type");
2338   return nullptr;
2339 }
2340 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2341 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2342                                             RegSetMask Exclude) const {
2343   SmallBitVector Registers(RegMIPS32::Reg_NUM);
2344 
2345 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
2346           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
2347   if (scratch && (Include & RegSet_CallerSave))                                \
2348     Registers[RegMIPS32::val] = true;                                          \
2349   if (preserved && (Include & RegSet_CalleeSave))                              \
2350     Registers[RegMIPS32::val] = true;                                          \
2351   if (stackptr && (Include & RegSet_StackPointer))                             \
2352     Registers[RegMIPS32::val] = true;                                          \
2353   if (frameptr && (Include & RegSet_FramePointer))                             \
2354     Registers[RegMIPS32::val] = true;                                          \
2355   if (scratch && (Exclude & RegSet_CallerSave))                                \
2356     Registers[RegMIPS32::val] = false;                                         \
2357   if (preserved && (Exclude & RegSet_CalleeSave))                              \
2358     Registers[RegMIPS32::val] = false;                                         \
2359   if (stackptr && (Exclude & RegSet_StackPointer))                             \
2360     Registers[RegMIPS32::val] = false;                                         \
2361   if (frameptr && (Exclude & RegSet_FramePointer))                             \
2362     Registers[RegMIPS32::val] = false;
2363 
2364   REGMIPS32_TABLE
2365 
2366 #undef X
2367 
2368   if (NeedSandboxing) {
2369     Registers[RegMIPS32::Reg_T6] = false;
2370     Registers[RegMIPS32::Reg_T7] = false;
2371     Registers[RegMIPS32::Reg_T8] = false;
2372   }
2373   return Registers;
2374 }
2375 
lowerAlloca(const InstAlloca * Instr)2376 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2377   // Conservatively require the stack to be aligned. Some stack adjustment
2378   // operations implemented below assume that the stack is aligned before the
2379   // alloca. All the alloca code ensures that the stack alignment is preserved
2380   // after the alloca. The stack alignment restriction can be relaxed in some
2381   // cases.
2382   NeedsStackAlignment = true;
2383 
2384   // For default align=0, set it to the real value 1, to avoid any
2385   // bit-manipulation problems below.
2386   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2387 
2388   // LLVM enforces power of 2 alignment.
2389   assert(llvm::isPowerOf2_32(AlignmentParam));
2390   assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2391 
2392   const uint32_t Alignment =
2393       std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2394   const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2395   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2396   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2397   const bool UseFramePointer =
2398       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2399 
2400   if (UseFramePointer)
2401     setHasFramePointer();
2402 
2403   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2404 
2405   Variable *Dest = Instr->getDest();
2406   Operand *TotalSize = Instr->getSizeInBytes();
2407 
2408   if (const auto *ConstantTotalSize =
2409           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2410     const uint32_t Value =
2411         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2412     FixedAllocaSizeBytes += Value;
2413     // Constant size alloca.
2414     if (!UseFramePointer) {
2415       // If we don't need a Frame Pointer, this alloca has a known offset to the
2416       // stack pointer. We don't need adjust the stack pointer, nor assign any
2417       // value to Dest, as Dest is rematerializable.
2418       assert(Dest->isRematerializable());
2419       Context.insert<InstFakeDef>(Dest);
2420       return;
2421     }
2422 
2423     if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2424       CurrentAllocaOffset =
2425           Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2426     }
2427     auto *T = I32Reg();
2428     _addiu(T, SP, CurrentAllocaOffset);
2429     _mov(Dest, T);
2430     CurrentAllocaOffset += Value;
2431     return;
2432 
2433   } else {
2434     // Non-constant sizes need to be adjusted to the next highest multiple of
2435     // the required alignment at runtime.
2436     VariableAllocaUsed = true;
2437     VariableAllocaAlignBytes = AlignmentParam;
2438     Variable *AlignAmount;
2439     auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2440     auto *T1 = I32Reg();
2441     auto *T2 = I32Reg();
2442     auto *T3 = I32Reg();
2443     auto *T4 = I32Reg();
2444     auto *T5 = I32Reg();
2445     _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2446     _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2447     _and(T3, T1, T2);
2448     _subu(T4, SP, T3);
2449     if (Instr->getAlignInBytes()) {
2450       AlignAmount =
2451           legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2452       _and(T5, T4, AlignAmount);
2453       _mov(Dest, T5);
2454     } else {
2455       _mov(Dest, T4);
2456     }
2457     if (OptM1)
2458       _mov(SP, Dest);
2459     else
2460       Sandboxer(this).reset_sp(Dest);
2461     return;
2462   }
2463 }
2464 
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2465 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2466                                         Variable *Dest, Operand *Src0,
2467                                         Operand *Src1) {
2468   InstArithmetic::OpKind Op = Instr->getOp();
2469   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2470   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2471   Variable *Src0LoR = nullptr;
2472   Variable *Src1LoR = nullptr;
2473   Variable *Src0HiR = nullptr;
2474   Variable *Src1HiR = nullptr;
2475 
2476   switch (Op) {
2477   case InstArithmetic::_num:
2478     llvm::report_fatal_error("Unknown arithmetic operator");
2479     return;
2480   case InstArithmetic::Add: {
2481     Src0LoR = legalizeToReg(loOperand(Src0));
2482     Src1LoR = legalizeToReg(loOperand(Src1));
2483     Src0HiR = legalizeToReg(hiOperand(Src0));
2484     Src1HiR = legalizeToReg(hiOperand(Src1));
2485     auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2486          *T_Hi2 = I32Reg();
2487     _addu(T_Lo, Src0LoR, Src1LoR);
2488     _mov(DestLo, T_Lo);
2489     _sltu(T_Carry, T_Lo, Src0LoR);
2490     _addu(T_Hi, T_Carry, Src0HiR);
2491     _addu(T_Hi2, Src1HiR, T_Hi);
2492     _mov(DestHi, T_Hi2);
2493     return;
2494   }
2495   case InstArithmetic::And: {
2496     Src0LoR = legalizeToReg(loOperand(Src0));
2497     Src1LoR = legalizeToReg(loOperand(Src1));
2498     Src0HiR = legalizeToReg(hiOperand(Src0));
2499     Src1HiR = legalizeToReg(hiOperand(Src1));
2500     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2501     _and(T_Lo, Src0LoR, Src1LoR);
2502     _mov(DestLo, T_Lo);
2503     _and(T_Hi, Src0HiR, Src1HiR);
2504     _mov(DestHi, T_Hi);
2505     return;
2506   }
2507   case InstArithmetic::Sub: {
2508     Src0LoR = legalizeToReg(loOperand(Src0));
2509     Src1LoR = legalizeToReg(loOperand(Src1));
2510     Src0HiR = legalizeToReg(hiOperand(Src0));
2511     Src1HiR = legalizeToReg(hiOperand(Src1));
2512     auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2513          *T_Hi2 = I32Reg();
2514     _subu(T_Lo, Src0LoR, Src1LoR);
2515     _mov(DestLo, T_Lo);
2516     _sltu(T_Borrow, Src0LoR, Src1LoR);
2517     _addu(T_Hi, T_Borrow, Src1HiR);
2518     _subu(T_Hi2, Src0HiR, T_Hi);
2519     _mov(DestHi, T_Hi2);
2520     return;
2521   }
2522   case InstArithmetic::Or: {
2523     Src0LoR = legalizeToReg(loOperand(Src0));
2524     Src1LoR = legalizeToReg(loOperand(Src1));
2525     Src0HiR = legalizeToReg(hiOperand(Src0));
2526     Src1HiR = legalizeToReg(hiOperand(Src1));
2527     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2528     _or(T_Lo, Src0LoR, Src1LoR);
2529     _mov(DestLo, T_Lo);
2530     _or(T_Hi, Src0HiR, Src1HiR);
2531     _mov(DestHi, T_Hi);
2532     return;
2533   }
2534   case InstArithmetic::Xor: {
2535     Src0LoR = legalizeToReg(loOperand(Src0));
2536     Src1LoR = legalizeToReg(loOperand(Src1));
2537     Src0HiR = legalizeToReg(hiOperand(Src0));
2538     Src1HiR = legalizeToReg(hiOperand(Src1));
2539     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2540     _xor(T_Lo, Src0LoR, Src1LoR);
2541     _mov(DestLo, T_Lo);
2542     _xor(T_Hi, Src0HiR, Src1HiR);
2543     _mov(DestHi, T_Hi);
2544     return;
2545   }
2546   case InstArithmetic::Mul: {
2547     // TODO(rkotler): Make sure that mul has the side effect of clobbering
2548     // LO, HI. Check for any other LO, HI quirkiness in this section.
2549     Src0LoR = legalizeToReg(loOperand(Src0));
2550     Src1LoR = legalizeToReg(loOperand(Src1));
2551     Src0HiR = legalizeToReg(hiOperand(Src0));
2552     Src1HiR = legalizeToReg(hiOperand(Src1));
2553     auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2554     auto *T1 = I32Reg(), *T2 = I32Reg();
2555     auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2556     _multu(T_Lo, Src0LoR, Src1LoR);
2557     Context.insert<InstFakeDef>(T_Hi, T_Lo);
2558     _mflo(T1, T_Lo);
2559     _mfhi(T2, T_Hi);
2560     _mov(DestLo, T1);
2561     _mul(TM1, Src0HiR, Src1LoR);
2562     _mul(TM2, Src0LoR, Src1HiR);
2563     _addu(TM3, TM1, T2);
2564     _addu(TM4, TM3, TM2);
2565     _mov(DestHi, TM4);
2566     return;
2567   }
2568   case InstArithmetic::Shl: {
2569     auto *T_Lo = I32Reg();
2570     auto *T_Hi = I32Reg();
2571     auto *T1_Lo = I32Reg();
2572     auto *T1_Hi = I32Reg();
2573     auto *T1 = I32Reg();
2574     auto *T2 = I32Reg();
2575     auto *T3 = I32Reg();
2576     auto *T4 = I32Reg();
2577     auto *T5 = I32Reg();
2578 
2579     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2580       Src0LoR = legalizeToReg(loOperand(Src0));
2581       int64_t ShiftAmount = Const->getValue();
2582       if (ShiftAmount == 1) {
2583         Src0HiR = legalizeToReg(hiOperand(Src0));
2584         _addu(T_Lo, Src0LoR, Src0LoR);
2585         _sltu(T1, T_Lo, Src0LoR);
2586         _addu(T2, T1, Src0HiR);
2587         _addu(T_Hi, Src0HiR, T2);
2588       } else if (ShiftAmount < INT32_BITS) {
2589         Src0HiR = legalizeToReg(hiOperand(Src0));
2590         _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2591         _sll(T2, Src0HiR, ShiftAmount);
2592         _or(T_Hi, T1, T2);
2593         _sll(T_Lo, Src0LoR, ShiftAmount);
2594       } else if (ShiftAmount == INT32_BITS) {
2595         _addiu(T_Lo, getZero(), 0);
2596         _mov(T_Hi, Src0LoR);
2597       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2598         _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2599         _addiu(T_Lo, getZero(), 0);
2600       }
2601       _mov(DestLo, T_Lo);
2602       _mov(DestHi, T_Hi);
2603       return;
2604     }
2605 
2606     Src0LoR = legalizeToReg(loOperand(Src0));
2607     Src1LoR = legalizeToReg(loOperand(Src1));
2608     Src0HiR = legalizeToReg(hiOperand(Src0));
2609 
2610     _sllv(T1, Src0HiR, Src1LoR);
2611     _not(T2, Src1LoR);
2612     _srl(T3, Src0LoR, 1);
2613     _srlv(T4, T3, T2);
2614     _or(T_Hi, T1, T4);
2615     _sllv(T_Lo, Src0LoR, Src1LoR);
2616 
2617     _mov(T1_Hi, T_Hi);
2618     _mov(T1_Lo, T_Lo);
2619     _andi(T5, Src1LoR, INT32_BITS);
2620     _movn(T1_Hi, T_Lo, T5);
2621     _movn(T1_Lo, getZero(), T5);
2622     _mov(DestHi, T1_Hi);
2623     _mov(DestLo, T1_Lo);
2624     return;
2625   }
2626   case InstArithmetic::Lshr: {
2627 
2628     auto *T_Lo = I32Reg();
2629     auto *T_Hi = I32Reg();
2630     auto *T1_Lo = I32Reg();
2631     auto *T1_Hi = I32Reg();
2632     auto *T1 = I32Reg();
2633     auto *T2 = I32Reg();
2634     auto *T3 = I32Reg();
2635     auto *T4 = I32Reg();
2636     auto *T5 = I32Reg();
2637 
2638     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2639       Src0HiR = legalizeToReg(hiOperand(Src0));
2640       int64_t ShiftAmount = Const->getValue();
2641       if (ShiftAmount < INT32_BITS) {
2642         Src0LoR = legalizeToReg(loOperand(Src0));
2643         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2644         _srl(T2, Src0LoR, ShiftAmount);
2645         _or(T_Lo, T1, T2);
2646         _srl(T_Hi, Src0HiR, ShiftAmount);
2647       } else if (ShiftAmount == INT32_BITS) {
2648         _mov(T_Lo, Src0HiR);
2649         _addiu(T_Hi, getZero(), 0);
2650       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2651         _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2652         _addiu(T_Hi, getZero(), 0);
2653       }
2654       _mov(DestLo, T_Lo);
2655       _mov(DestHi, T_Hi);
2656       return;
2657     }
2658 
2659     Src0LoR = legalizeToReg(loOperand(Src0));
2660     Src1LoR = legalizeToReg(loOperand(Src1));
2661     Src0HiR = legalizeToReg(hiOperand(Src0));
2662 
2663     _srlv(T1, Src0LoR, Src1LoR);
2664     _not(T2, Src1LoR);
2665     _sll(T3, Src0HiR, 1);
2666     _sllv(T4, T3, T2);
2667     _or(T_Lo, T1, T4);
2668     _srlv(T_Hi, Src0HiR, Src1LoR);
2669 
2670     _mov(T1_Hi, T_Hi);
2671     _mov(T1_Lo, T_Lo);
2672     _andi(T5, Src1LoR, INT32_BITS);
2673     _movn(T1_Lo, T_Hi, T5);
2674     _movn(T1_Hi, getZero(), T5);
2675     _mov(DestHi, T1_Hi);
2676     _mov(DestLo, T1_Lo);
2677     return;
2678   }
2679   case InstArithmetic::Ashr: {
2680 
2681     auto *T_Lo = I32Reg();
2682     auto *T_Hi = I32Reg();
2683     auto *T1_Lo = I32Reg();
2684     auto *T1_Hi = I32Reg();
2685     auto *T1 = I32Reg();
2686     auto *T2 = I32Reg();
2687     auto *T3 = I32Reg();
2688     auto *T4 = I32Reg();
2689     auto *T5 = I32Reg();
2690     auto *T6 = I32Reg();
2691 
2692     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2693       Src0HiR = legalizeToReg(hiOperand(Src0));
2694       int64_t ShiftAmount = Const->getValue();
2695       if (ShiftAmount < INT32_BITS) {
2696         Src0LoR = legalizeToReg(loOperand(Src0));
2697         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2698         _srl(T2, Src0LoR, ShiftAmount);
2699         _or(T_Lo, T1, T2);
2700         _sra(T_Hi, Src0HiR, ShiftAmount);
2701       } else if (ShiftAmount == INT32_BITS) {
2702         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2703         _mov(T_Lo, Src0HiR);
2704       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2705         _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2706         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2707       }
2708       _mov(DestLo, T_Lo);
2709       _mov(DestHi, T_Hi);
2710       return;
2711     }
2712 
2713     Src0LoR = legalizeToReg(loOperand(Src0));
2714     Src1LoR = legalizeToReg(loOperand(Src1));
2715     Src0HiR = legalizeToReg(hiOperand(Src0));
2716 
2717     _srlv(T1, Src0LoR, Src1LoR);
2718     _not(T2, Src1LoR);
2719     _sll(T3, Src0HiR, 1);
2720     _sllv(T4, T3, T2);
2721     _or(T_Lo, T1, T4);
2722     _srav(T_Hi, Src0HiR, Src1LoR);
2723 
2724     _mov(T1_Hi, T_Hi);
2725     _mov(T1_Lo, T_Lo);
2726     _andi(T5, Src1LoR, INT32_BITS);
2727     _movn(T1_Lo, T_Hi, T5);
2728     _sra(T6, Src0HiR, INT32_BITS - 1);
2729     _movn(T1_Hi, T6, T5);
2730     _mov(DestHi, T1_Hi);
2731     _mov(DestLo, T1_Lo);
2732     return;
2733   }
2734   case InstArithmetic::Fadd:
2735   case InstArithmetic::Fsub:
2736   case InstArithmetic::Fmul:
2737   case InstArithmetic::Fdiv:
2738   case InstArithmetic::Frem:
2739     llvm::report_fatal_error("FP instruction with i64 type");
2740     return;
2741   case InstArithmetic::Udiv:
2742   case InstArithmetic::Sdiv:
2743   case InstArithmetic::Urem:
2744   case InstArithmetic::Srem:
2745     llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2746     return;
2747   }
2748 }
2749 
lowerArithmetic(const InstArithmetic * Instr)2750 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2751   Variable *Dest = Instr->getDest();
2752 
2753   if (Dest->isRematerializable()) {
2754     Context.insert<InstFakeDef>(Dest);
2755     return;
2756   }
2757 
2758   // We need to signal all the UnimplementedLoweringError errors before any
2759   // legalization into new variables, otherwise Om1 register allocation may fail
2760   // when it sees variables that are defined but not used.
2761   Type DestTy = Dest->getType();
2762   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2763   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2764   if (DestTy == IceType_i64) {
2765     lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2766     return;
2767   }
2768   if (isVectorType(Dest->getType())) {
2769     llvm::report_fatal_error("Arithmetic: Destination type is vector");
2770     return;
2771   }
2772 
2773   Variable *T = makeReg(Dest->getType());
2774   Variable *Src0R = legalizeToReg(Src0);
2775   Variable *Src1R = nullptr;
2776   uint32_t Value = 0;
2777   bool IsSrc1Imm16 = false;
2778 
2779   switch (Instr->getOp()) {
2780   case InstArithmetic::Add:
2781   case InstArithmetic::Sub: {
2782     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2783     if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2784       IsSrc1Imm16 = true;
2785       Value = Const32->getValue();
2786     } else {
2787       Src1R = legalizeToReg(Src1);
2788     }
2789     break;
2790   }
2791   case InstArithmetic::And:
2792   case InstArithmetic::Or:
2793   case InstArithmetic::Xor:
2794   case InstArithmetic::Shl:
2795   case InstArithmetic::Lshr:
2796   case InstArithmetic::Ashr: {
2797     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2798     if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2799       IsSrc1Imm16 = true;
2800       Value = Const32->getValue();
2801     } else {
2802       Src1R = legalizeToReg(Src1);
2803     }
2804     break;
2805   }
2806   default:
2807     Src1R = legalizeToReg(Src1);
2808     break;
2809   }
2810   constexpr uint32_t DivideByZeroTrapCode = 7;
2811 
2812   switch (Instr->getOp()) {
2813   case InstArithmetic::_num:
2814     break;
2815   case InstArithmetic::Add: {
2816     auto *T0R = Src0R;
2817     auto *T1R = Src1R;
2818     if (Dest->getType() != IceType_i32) {
2819       T0R = makeReg(IceType_i32);
2820       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2821       if (!IsSrc1Imm16) {
2822         T1R = makeReg(IceType_i32);
2823         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2824       }
2825     }
2826     if (IsSrc1Imm16) {
2827       _addiu(T, T0R, Value);
2828     } else {
2829       _addu(T, T0R, T1R);
2830     }
2831     _mov(Dest, T);
2832     return;
2833   }
2834   case InstArithmetic::And:
2835     if (IsSrc1Imm16) {
2836       _andi(T, Src0R, Value);
2837     } else {
2838       _and(T, Src0R, Src1R);
2839     }
2840     _mov(Dest, T);
2841     return;
2842   case InstArithmetic::Or:
2843     if (IsSrc1Imm16) {
2844       _ori(T, Src0R, Value);
2845     } else {
2846       _or(T, Src0R, Src1R);
2847     }
2848     _mov(Dest, T);
2849     return;
2850   case InstArithmetic::Xor:
2851     if (IsSrc1Imm16) {
2852       _xori(T, Src0R, Value);
2853     } else {
2854       _xor(T, Src0R, Src1R);
2855     }
2856     _mov(Dest, T);
2857     return;
2858   case InstArithmetic::Sub: {
2859     auto *T0R = Src0R;
2860     auto *T1R = Src1R;
2861     if (Dest->getType() != IceType_i32) {
2862       T0R = makeReg(IceType_i32);
2863       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2864       if (!IsSrc1Imm16) {
2865         T1R = makeReg(IceType_i32);
2866         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2867       }
2868     }
2869     if (IsSrc1Imm16) {
2870       _addiu(T, T0R, -Value);
2871     } else {
2872       _subu(T, T0R, T1R);
2873     }
2874     _mov(Dest, T);
2875     return;
2876   }
2877   case InstArithmetic::Mul: {
2878     _mul(T, Src0R, Src1R);
2879     _mov(Dest, T);
2880     return;
2881   }
2882   case InstArithmetic::Shl: {
2883     if (IsSrc1Imm16) {
2884       _sll(T, Src0R, Value);
2885     } else {
2886       _sllv(T, Src0R, Src1R);
2887     }
2888     _mov(Dest, T);
2889     return;
2890   }
2891   case InstArithmetic::Lshr: {
2892     auto *T0R = Src0R;
2893     auto *T1R = Src1R;
2894     if (Dest->getType() != IceType_i32) {
2895       T0R = makeReg(IceType_i32);
2896       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2897       if (!IsSrc1Imm16) {
2898         T1R = makeReg(IceType_i32);
2899         lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2900       }
2901     }
2902     if (IsSrc1Imm16) {
2903       _srl(T, T0R, Value);
2904     } else {
2905       _srlv(T, T0R, T1R);
2906     }
2907     _mov(Dest, T);
2908     return;
2909   }
2910   case InstArithmetic::Ashr: {
2911     auto *T0R = Src0R;
2912     auto *T1R = Src1R;
2913     if (Dest->getType() != IceType_i32) {
2914       T0R = makeReg(IceType_i32);
2915       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2916       if (!IsSrc1Imm16) {
2917         T1R = makeReg(IceType_i32);
2918         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2919       }
2920     }
2921     if (IsSrc1Imm16) {
2922       _sra(T, T0R, Value);
2923     } else {
2924       _srav(T, T0R, T1R);
2925     }
2926     _mov(Dest, T);
2927     return;
2928   }
2929   case InstArithmetic::Udiv: {
2930     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2931     auto *T0R = Src0R;
2932     auto *T1R = Src1R;
2933     if (Dest->getType() != IceType_i32) {
2934       T0R = makeReg(IceType_i32);
2935       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2936       T1R = makeReg(IceType_i32);
2937       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2938     }
2939     _divu(T_Zero, T0R, T1R);
2940     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2941     _mflo(T, T_Zero);
2942     _mov(Dest, T);
2943     return;
2944   }
2945   case InstArithmetic::Sdiv: {
2946     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2947     auto *T0R = Src0R;
2948     auto *T1R = Src1R;
2949     if (Dest->getType() != IceType_i32) {
2950       T0R = makeReg(IceType_i32);
2951       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2952       T1R = makeReg(IceType_i32);
2953       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2954     }
2955     _div(T_Zero, T0R, T1R);
2956     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2957     _mflo(T, T_Zero);
2958     _mov(Dest, T);
2959     return;
2960   }
2961   case InstArithmetic::Urem: {
2962     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2963     auto *T0R = Src0R;
2964     auto *T1R = Src1R;
2965     if (Dest->getType() != IceType_i32) {
2966       T0R = makeReg(IceType_i32);
2967       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2968       T1R = makeReg(IceType_i32);
2969       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2970     }
2971     _divu(T_Zero, T0R, T1R);
2972     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2973     _mfhi(T, T_Zero);
2974     _mov(Dest, T);
2975     return;
2976   }
2977   case InstArithmetic::Srem: {
2978     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2979     auto *T0R = Src0R;
2980     auto *T1R = Src1R;
2981     if (Dest->getType() != IceType_i32) {
2982       T0R = makeReg(IceType_i32);
2983       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2984       T1R = makeReg(IceType_i32);
2985       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2986     }
2987     _div(T_Zero, T0R, T1R);
2988     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2989     _mfhi(T, T_Zero);
2990     _mov(Dest, T);
2991     return;
2992   }
2993   case InstArithmetic::Fadd: {
2994     if (DestTy == IceType_f32) {
2995       _add_s(T, Src0R, Src1R);
2996       _mov(Dest, T);
2997       return;
2998     }
2999     if (DestTy == IceType_f64) {
3000       _add_d(T, Src0R, Src1R);
3001       _mov(Dest, T);
3002       return;
3003     }
3004     break;
3005   }
3006   case InstArithmetic::Fsub:
3007     if (DestTy == IceType_f32) {
3008       _sub_s(T, Src0R, Src1R);
3009       _mov(Dest, T);
3010       return;
3011     }
3012     if (DestTy == IceType_f64) {
3013       _sub_d(T, Src0R, Src1R);
3014       _mov(Dest, T);
3015       return;
3016     }
3017     break;
3018   case InstArithmetic::Fmul:
3019     if (DestTy == IceType_f32) {
3020       _mul_s(T, Src0R, Src1R);
3021       _mov(Dest, T);
3022       return;
3023     }
3024     if (DestTy == IceType_f64) {
3025       _mul_d(T, Src0R, Src1R);
3026       _mov(Dest, T);
3027       return;
3028     }
3029     break;
3030   case InstArithmetic::Fdiv:
3031     if (DestTy == IceType_f32) {
3032       _div_s(T, Src0R, Src1R);
3033       _mov(Dest, T);
3034       return;
3035     }
3036     if (DestTy == IceType_f64) {
3037       _div_d(T, Src0R, Src1R);
3038       _mov(Dest, T);
3039       return;
3040     }
3041     break;
3042   case InstArithmetic::Frem:
3043     llvm::report_fatal_error("frem should have been prelowered.");
3044     break;
3045   }
3046   llvm::report_fatal_error("Unknown arithmetic operator");
3047 }
3048 
lowerAssign(const InstAssign * Instr)3049 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3050   Variable *Dest = Instr->getDest();
3051 
3052   if (Dest->isRematerializable()) {
3053     Context.insert<InstFakeDef>(Dest);
3054     return;
3055   }
3056 
3057   // Source type may not be same as destination
3058   if (isVectorType(Dest->getType())) {
3059     Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3060     auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3061     for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3062       auto *DCont = DstVec->getContainers()[i];
3063       auto *SCont =
3064           legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3065       auto *TReg = makeReg(IceType_i32);
3066       _mov(TReg, SCont);
3067       _mov(DCont, TReg);
3068     }
3069     return;
3070   }
3071   Operand *Src0 = Instr->getSrc(0);
3072   assert(Dest->getType() == Src0->getType());
3073   if (Dest->getType() == IceType_i64) {
3074     Src0 = legalizeUndef(Src0);
3075     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3076     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3077     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3078     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3079     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3080     _mov(T_Lo, Src0Lo);
3081     _mov(DestLo, T_Lo);
3082     _mov(T_Hi, Src0Hi);
3083     _mov(DestHi, T_Hi);
3084     return;
3085   }
3086   Operand *SrcR;
3087   if (Dest->hasReg()) {
3088     // If Dest already has a physical register, then legalize the Src operand
3089     // into a Variable with the same register assignment.  This especially
3090     // helps allow the use of Flex operands.
3091     SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3092   } else {
3093     // Dest could be a stack operand. Since we could potentially need
3094     // to do a Store (and store can only have Register operands),
3095     // legalize this to a register.
3096     SrcR = legalize(Src0, Legal_Reg);
3097   }
3098   _mov(Dest, SrcR);
3099 }
3100 
lowerBr(const InstBr * Instr)3101 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3102   if (Instr->isUnconditional()) {
3103     _br(Instr->getTargetUnconditional());
3104     return;
3105   }
3106   CfgNode *TargetTrue = Instr->getTargetTrue();
3107   CfgNode *TargetFalse = Instr->getTargetFalse();
3108   Operand *Boolean = Instr->getCondition();
3109   const Inst *Producer = Computations.getProducerOf(Boolean);
3110   if (Producer == nullptr) {
3111     // Since we don't know the producer of this boolean we will assume its
3112     // producer will keep it in positive logic and just emit beqz with this
3113     // Boolean as an operand.
3114     auto *BooleanR = legalizeToReg(Boolean);
3115     _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3116     return;
3117   }
3118   if (Producer->getKind() == Inst::Icmp) {
3119     const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3120     Operand *Src0 = CompareInst->getSrc(0);
3121     Operand *Src1 = CompareInst->getSrc(1);
3122     const Type Src0Ty = Src0->getType();
3123     assert(Src0Ty == Src1->getType());
3124 
3125     Variable *Src0R = nullptr;
3126     Variable *Src1R = nullptr;
3127     Variable *Src0HiR = nullptr;
3128     Variable *Src1HiR = nullptr;
3129     if (Src0Ty == IceType_i64) {
3130       Src0R = legalizeToReg(loOperand(Src0));
3131       Src1R = legalizeToReg(loOperand(Src1));
3132       Src0HiR = legalizeToReg(hiOperand(Src0));
3133       Src1HiR = legalizeToReg(hiOperand(Src1));
3134     } else {
3135       auto *Src0RT = legalizeToReg(Src0);
3136       auto *Src1RT = legalizeToReg(Src1);
3137       // Sign/Zero extend the source operands
3138       if (Src0Ty != IceType_i32) {
3139         InstCast::OpKind CastKind;
3140         switch (CompareInst->getCondition()) {
3141         case InstIcmp::Eq:
3142         case InstIcmp::Ne:
3143         case InstIcmp::Sgt:
3144         case InstIcmp::Sge:
3145         case InstIcmp::Slt:
3146         case InstIcmp::Sle:
3147           CastKind = InstCast::Sext;
3148           break;
3149         default:
3150           CastKind = InstCast::Zext;
3151           break;
3152         }
3153         Src0R = makeReg(IceType_i32);
3154         Src1R = makeReg(IceType_i32);
3155         lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3156         lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3157       } else {
3158         Src0R = Src0RT;
3159         Src1R = Src1RT;
3160       }
3161     }
3162     auto *DestT = makeReg(IceType_i32);
3163 
3164     switch (CompareInst->getCondition()) {
3165     default:
3166       llvm_unreachable("unexpected condition");
3167       return;
3168     case InstIcmp::Eq: {
3169       if (Src0Ty == IceType_i64) {
3170         auto *T1 = I32Reg();
3171         auto *T2 = I32Reg();
3172         auto *T3 = I32Reg();
3173         _xor(T1, Src0HiR, Src1HiR);
3174         _xor(T2, Src0R, Src1R);
3175         _or(T3, T1, T2);
3176         _mov(DestT, T3);
3177         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3178       } else {
3179         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3180       }
3181       return;
3182     }
3183     case InstIcmp::Ne: {
3184       if (Src0Ty == IceType_i64) {
3185         auto *T1 = I32Reg();
3186         auto *T2 = I32Reg();
3187         auto *T3 = I32Reg();
3188         _xor(T1, Src0HiR, Src1HiR);
3189         _xor(T2, Src0R, Src1R);
3190         _or(T3, T1, T2);
3191         _mov(DestT, T3);
3192         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3193       } else {
3194         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3195       }
3196       return;
3197     }
3198     case InstIcmp::Ugt: {
3199       if (Src0Ty == IceType_i64) {
3200         auto *T1 = I32Reg();
3201         auto *T2 = I32Reg();
3202         auto *T3 = I32Reg();
3203         auto *T4 = I32Reg();
3204         auto *T5 = I32Reg();
3205         _xor(T1, Src0HiR, Src1HiR);
3206         _sltu(T2, Src1HiR, Src0HiR);
3207         _xori(T3, T2, 1);
3208         _sltu(T4, Src1R, Src0R);
3209         _xori(T5, T4, 1);
3210         _movz(T3, T5, T1);
3211         _mov(DestT, T3);
3212         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3213       } else {
3214         _sltu(DestT, Src1R, Src0R);
3215         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3216       }
3217       return;
3218     }
3219     case InstIcmp::Uge: {
3220       if (Src0Ty == IceType_i64) {
3221         auto *T1 = I32Reg();
3222         auto *T2 = I32Reg();
3223         auto *T3 = I32Reg();
3224         _xor(T1, Src0HiR, Src1HiR);
3225         _sltu(T2, Src0HiR, Src1HiR);
3226         _sltu(T3, Src0R, Src1R);
3227         _movz(T2, T3, T1);
3228         _mov(DestT, T2);
3229         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3230       } else {
3231         _sltu(DestT, Src0R, Src1R);
3232         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3233       }
3234       return;
3235     }
3236     case InstIcmp::Ult: {
3237       if (Src0Ty == IceType_i64) {
3238         auto *T1 = I32Reg();
3239         auto *T2 = I32Reg();
3240         auto *T3 = I32Reg();
3241         auto *T4 = I32Reg();
3242         auto *T5 = I32Reg();
3243         _xor(T1, Src0HiR, Src1HiR);
3244         _sltu(T2, Src0HiR, Src1HiR);
3245         _xori(T3, T2, 1);
3246         _sltu(T4, Src0R, Src1R);
3247         _xori(T5, T4, 1);
3248         _movz(T3, T5, T1);
3249         _mov(DestT, T3);
3250         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3251       } else {
3252         _sltu(DestT, Src0R, Src1R);
3253         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3254       }
3255       return;
3256     }
3257     case InstIcmp::Ule: {
3258       if (Src0Ty == IceType_i64) {
3259         auto *T1 = I32Reg();
3260         auto *T2 = I32Reg();
3261         auto *T3 = I32Reg();
3262         _xor(T1, Src0HiR, Src1HiR);
3263         _sltu(T2, Src1HiR, Src0HiR);
3264         _sltu(T3, Src1R, Src0R);
3265         _movz(T2, T3, T1);
3266         _mov(DestT, T2);
3267         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3268       } else {
3269         _sltu(DestT, Src1R, Src0R);
3270         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3271       }
3272       return;
3273     }
3274     case InstIcmp::Sgt: {
3275       if (Src0Ty == IceType_i64) {
3276         auto *T1 = I32Reg();
3277         auto *T2 = I32Reg();
3278         auto *T3 = I32Reg();
3279         auto *T4 = I32Reg();
3280         auto *T5 = I32Reg();
3281         _xor(T1, Src0HiR, Src1HiR);
3282         _slt(T2, Src1HiR, Src0HiR);
3283         _xori(T3, T2, 1);
3284         _sltu(T4, Src1R, Src0R);
3285         _xori(T5, T4, 1);
3286         _movz(T3, T5, T1);
3287         _mov(DestT, T3);
3288         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3289       } else {
3290         _slt(DestT, Src1R, Src0R);
3291         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3292       }
3293       return;
3294     }
3295     case InstIcmp::Sge: {
3296       if (Src0Ty == IceType_i64) {
3297         auto *T1 = I32Reg();
3298         auto *T2 = I32Reg();
3299         auto *T3 = I32Reg();
3300         _xor(T1, Src0HiR, Src1HiR);
3301         _slt(T2, Src0HiR, Src1HiR);
3302         _sltu(T3, Src0R, Src1R);
3303         _movz(T2, T3, T1);
3304         _mov(DestT, T2);
3305         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3306       } else {
3307         _slt(DestT, Src0R, Src1R);
3308         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3309       }
3310       return;
3311     }
3312     case InstIcmp::Slt: {
3313       if (Src0Ty == IceType_i64) {
3314         auto *T1 = I32Reg();
3315         auto *T2 = I32Reg();
3316         auto *T3 = I32Reg();
3317         auto *T4 = I32Reg();
3318         auto *T5 = I32Reg();
3319         _xor(T1, Src0HiR, Src1HiR);
3320         _slt(T2, Src0HiR, Src1HiR);
3321         _xori(T3, T2, 1);
3322         _sltu(T4, Src0R, Src1R);
3323         _xori(T5, T4, 1);
3324         _movz(T3, T5, T1);
3325         _mov(DestT, T3);
3326         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3327       } else {
3328         _slt(DestT, Src0R, Src1R);
3329         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3330       }
3331       return;
3332     }
3333     case InstIcmp::Sle: {
3334       if (Src0Ty == IceType_i64) {
3335         auto *T1 = I32Reg();
3336         auto *T2 = I32Reg();
3337         auto *T3 = I32Reg();
3338         _xor(T1, Src0HiR, Src1HiR);
3339         _slt(T2, Src1HiR, Src0HiR);
3340         _sltu(T3, Src1R, Src0R);
3341         _movz(T2, T3, T1);
3342         _mov(DestT, T2);
3343         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3344       } else {
3345         _slt(DestT, Src1R, Src0R);
3346         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3347       }
3348       return;
3349     }
3350     }
3351   }
3352 }
3353 
lowerCall(const InstCall * Instr)3354 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3355   CfgVector<Variable *> RegArgs;
3356   NeedsStackAlignment = true;
3357 
3358   //  Assign arguments to registers and stack. Also reserve stack.
3359   TargetMIPS32::CallingConv CC;
3360 
3361   // Pair of Arg Operand -> GPR number assignments.
3362   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3363   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3364   // Pair of Arg Operand -> stack offset.
3365   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3366   size_t ParameterAreaSizeBytes = 16;
3367 
3368   // Classify each argument operand according to the location where the
3369   // argument is passed.
3370 
3371   // v4f32 is returned through stack. $4 is setup by the caller and passed as
3372   // first argument implicitly. Callee then copies the return vector at $4.
3373   SizeT ArgNum = 0;
3374   Variable *Dest = Instr->getDest();
3375   Variable *RetVecFloat = nullptr;
3376   if (Dest && isVectorFloatingType(Dest->getType())) {
3377     ArgNum = 1;
3378     CC.discardReg(RegMIPS32::Reg_A0);
3379     RetVecFloat = Func->makeVariable(IceType_i32);
3380     auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3381     constexpr SizeT Alignment = 4;
3382     lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3383     RegArgs.emplace_back(
3384         legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3385   }
3386 
3387   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3388     Operand *Arg = legalizeUndef(Instr->getArg(i));
3389     const Type Ty = Arg->getType();
3390     bool InReg = false;
3391     RegNumT Reg;
3392 
3393     InReg = CC.argInReg(Ty, i, &Reg);
3394 
3395     if (!InReg) {
3396       if (isVectorType(Ty)) {
3397         auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3398         ParameterAreaSizeBytes =
3399             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3400         for (Variable *Elem : ArgVec->getContainers()) {
3401           StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3402           ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3403         }
3404       } else {
3405         ParameterAreaSizeBytes =
3406             applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3407         StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3408         ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3409       }
3410       ++ArgNum;
3411       continue;
3412     }
3413 
3414     if (isVectorType(Ty)) {
3415       auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3416       Operand *Elem0 = ArgVec->getContainers()[0];
3417       Operand *Elem1 = ArgVec->getContainers()[1];
3418       GPRArgs.push_back(
3419           std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3420       GPRArgs.push_back(
3421           std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3422       Operand *Elem2 = ArgVec->getContainers()[2];
3423       Operand *Elem3 = ArgVec->getContainers()[3];
3424       // First argument is passed in $4:$5:$6:$7
3425       // Second and rest arguments are passed in $6:$7:stack:stack
3426       if (ArgNum == 0) {
3427         GPRArgs.push_back(
3428             std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3429         GPRArgs.push_back(
3430             std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3431       } else {
3432         ParameterAreaSizeBytes =
3433             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3434         StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3435         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3436         StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3437         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3438       }
3439     } else if (Ty == IceType_i64) {
3440       Operand *Lo = loOperand(Arg);
3441       Operand *Hi = hiOperand(Arg);
3442       GPRArgs.push_back(
3443           std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3444       GPRArgs.push_back(
3445           std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3446     } else if (isScalarIntegerType(Ty)) {
3447       GPRArgs.push_back(std::make_pair(Arg, Reg));
3448     } else {
3449       FPArgs.push_back(std::make_pair(Arg, Reg));
3450     }
3451     ++ArgNum;
3452   }
3453 
3454   // Adjust the parameter area so that the stack is aligned. It is assumed that
3455   // the stack is already aligned at the start of the calling sequence.
3456   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3457 
3458   // Copy arguments that are passed on the stack to the appropriate stack
3459   // locations.
3460   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3461   for (auto &StackArg : StackArgs) {
3462     ConstantInteger32 *Loc =
3463         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3464     Type Ty = StackArg.first->getType();
3465     OperandMIPS32Mem *Addr;
3466     constexpr bool SignExt = false;
3467     if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3468       Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3469     } else {
3470       Variable *NewBase = Func->makeVariable(SP->getType());
3471       lowerArithmetic(
3472           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3473       Addr = formMemoryOperand(NewBase, Ty);
3474     }
3475     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3476   }
3477 
3478   // Generate the call instruction.  Assign its result to a temporary with high
3479   // register allocation weight.
3480 
3481   // ReturnReg doubles as ReturnRegLo as necessary.
3482   Variable *ReturnReg = nullptr;
3483   Variable *ReturnRegHi = nullptr;
3484   if (Dest) {
3485     switch (Dest->getType()) {
3486     case IceType_NUM:
3487       llvm_unreachable("Invalid Call dest type");
3488       return;
3489     case IceType_void:
3490       break;
3491     case IceType_i1:
3492     case IceType_i8:
3493     case IceType_i16:
3494     case IceType_i32:
3495       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3496       break;
3497     case IceType_i64:
3498       ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3499       ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3500       break;
3501     case IceType_f32:
3502       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3503       break;
3504     case IceType_f64:
3505       ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3506       break;
3507     case IceType_v4i1:
3508     case IceType_v8i1:
3509     case IceType_v16i1:
3510     case IceType_v16i8:
3511     case IceType_v8i16:
3512     case IceType_v4i32: {
3513       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3514       auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3515       RetVec->initVecElement(Func);
3516       for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3517         auto *Var = RetVec->getContainers()[i];
3518         Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3519       }
3520       break;
3521     }
3522     case IceType_v4f32:
3523       ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3524       break;
3525     }
3526   }
3527   Operand *CallTarget = Instr->getCallTarget();
3528   // Allow ConstantRelocatable to be left alone as a direct call,
3529   // but force other constants like ConstantInteger32 to be in
3530   // a register and make it an indirect call.
3531   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3532     CallTarget = legalize(CallTarget, Legal_Reg);
3533   }
3534 
3535   // Copy arguments to be passed in registers to the appropriate registers.
3536   for (auto &FPArg : FPArgs) {
3537     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3538   }
3539   for (auto &GPRArg : GPRArgs) {
3540     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3541   }
3542 
3543   // Generate a FakeUse of register arguments so that they do not get dead code
3544   // eliminated as a result of the FakeKill of scratch registers after the call.
3545   // These fake-uses need to be placed here to avoid argument registers from
3546   // being used during the legalizeToReg() calls above.
3547   for (auto *RegArg : RegArgs) {
3548     Context.insert<InstFakeUse>(RegArg);
3549   }
3550 
3551   // If variable alloca is used the extra 16 bytes for argument build area
3552   // will be allocated on stack before a call.
3553   if (VariableAllocaUsed)
3554     Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes);
3555 
3556   Inst *NewCall;
3557 
3558   // We don't need to define the return register if it is a vector.
3559   // We have inserted fake defs of it just after the call.
3560   if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3561     Variable *RetReg = nullptr;
3562     NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3563     Context.insert(NewCall);
3564   } else {
3565     NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd)
3566                   .jal(ReturnReg, CallTarget);
3567   }
3568 
3569   if (VariableAllocaUsed)
3570     Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes);
3571 
3572   // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3573   // instruction.
3574   Context.insert<InstFakeUse>(SP);
3575 
3576   if (ReturnRegHi)
3577     Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3578 
3579   if (ReturnReg) {
3580     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3581       for (Variable *Var : RetVec->getContainers()) {
3582         Context.insert(InstFakeDef::create(Func, Var));
3583       }
3584     }
3585   }
3586 
3587   // Insert a register-kill pseudo instruction.
3588   Context.insert(InstFakeKill::create(Func, NewCall));
3589 
3590   // Generate a FakeUse to keep the call live if necessary.
3591   if (Instr->hasSideEffects() && ReturnReg) {
3592     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3593       for (Variable *Var : RetVec->getContainers()) {
3594         Context.insert<InstFakeUse>(Var);
3595       }
3596     } else {
3597       Context.insert<InstFakeUse>(ReturnReg);
3598     }
3599   }
3600 
3601   if (Dest == nullptr)
3602     return;
3603 
3604   // Assign the result of the call to Dest.
3605   if (ReturnReg) {
3606     if (RetVecFloat) {
3607       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3608       auto *TBase = legalizeToReg(RetVecFloat);
3609       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3610         auto *Var = DestVecOn32->getContainers()[i];
3611         auto *TVar = makeReg(IceType_i32);
3612         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3613             Func, IceType_i32, TBase,
3614             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3615         _lw(TVar, Mem);
3616         _mov(Var, TVar);
3617       }
3618     } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3619       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3620       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3621         _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3622       }
3623     } else if (ReturnRegHi) {
3624       assert(Dest->getType() == IceType_i64);
3625       auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3626       Variable *DestLo = Dest64On32->getLo();
3627       Variable *DestHi = Dest64On32->getHi();
3628       _mov(DestLo, ReturnReg);
3629       _mov(DestHi, ReturnRegHi);
3630     } else {
3631       assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3632              Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3633              isScalarFloatingType(Dest->getType()) ||
3634              isVectorType(Dest->getType()));
3635       _mov(Dest, ReturnReg);
3636     }
3637   }
3638 }
3639 
lowerCast(const InstCast * Instr)3640 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3641   InstCast::OpKind CastKind = Instr->getCastKind();
3642   Variable *Dest = Instr->getDest();
3643   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3644   const Type DestTy = Dest->getType();
3645   const Type Src0Ty = Src0->getType();
3646   const uint32_t ShiftAmount =
3647       (Src0Ty == IceType_i1
3648            ? INT32_BITS - 1
3649            : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3650   const uint32_t Mask =
3651       (Src0Ty == IceType_i1
3652            ? 1
3653            : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3654 
3655   if (isVectorType(DestTy)) {
3656     llvm::report_fatal_error("Cast: Destination type is vector");
3657     return;
3658   }
3659   switch (CastKind) {
3660   default:
3661     Func->setError("Cast type not supported");
3662     return;
3663   case InstCast::Sext: {
3664     if (DestTy == IceType_i64) {
3665       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3666       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3667       Variable *Src0R = legalizeToReg(Src0);
3668       Variable *T1_Lo = I32Reg();
3669       Variable *T2_Lo = I32Reg();
3670       Variable *T_Hi = I32Reg();
3671       if (Src0Ty == IceType_i1) {
3672         _sll(T1_Lo, Src0R, INT32_BITS - 1);
3673         _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3674         _mov(DestHi, T2_Lo);
3675         _mov(DestLo, T2_Lo);
3676       } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3677         _sll(T1_Lo, Src0R, ShiftAmount);
3678         _sra(T2_Lo, T1_Lo, ShiftAmount);
3679         _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3680         _mov(DestHi, T_Hi);
3681         _mov(DestLo, T2_Lo);
3682       } else if (Src0Ty == IceType_i32) {
3683         _mov(T1_Lo, Src0R);
3684         _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3685         _mov(DestHi, T_Hi);
3686         _mov(DestLo, T1_Lo);
3687       }
3688     } else {
3689       Variable *Src0R = legalizeToReg(Src0);
3690       Variable *T1 = makeReg(DestTy);
3691       Variable *T2 = makeReg(DestTy);
3692       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3693           Src0Ty == IceType_i16) {
3694         _sll(T1, Src0R, ShiftAmount);
3695         _sra(T2, T1, ShiftAmount);
3696         _mov(Dest, T2);
3697       }
3698     }
3699     break;
3700   }
3701   case InstCast::Zext: {
3702     if (DestTy == IceType_i64) {
3703       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3704       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3705       Variable *Src0R = legalizeToReg(Src0);
3706       Variable *T_Lo = I32Reg();
3707       Variable *T_Hi = I32Reg();
3708 
3709       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3710         _andi(T_Lo, Src0R, Mask);
3711       else if (Src0Ty == IceType_i32)
3712         _mov(T_Lo, Src0R);
3713       else
3714         assert(Src0Ty != IceType_i64);
3715       _mov(DestLo, T_Lo);
3716 
3717       auto *Zero = getZero();
3718       _addiu(T_Hi, Zero, 0);
3719       _mov(DestHi, T_Hi);
3720     } else {
3721       Variable *Src0R = legalizeToReg(Src0);
3722       Variable *T = makeReg(DestTy);
3723       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3724           Src0Ty == IceType_i16) {
3725         _andi(T, Src0R, Mask);
3726         _mov(Dest, T);
3727       }
3728     }
3729     break;
3730   }
3731   case InstCast::Trunc: {
3732     if (Src0Ty == IceType_i64)
3733       Src0 = loOperand(Src0);
3734     Variable *Src0R = legalizeToReg(Src0);
3735     Variable *T = makeReg(DestTy);
3736     switch (DestTy) {
3737     case IceType_i1:
3738       _andi(T, Src0R, 0x1);
3739       break;
3740     case IceType_i8:
3741       _andi(T, Src0R, 0xff);
3742       break;
3743     case IceType_i16:
3744       _andi(T, Src0R, 0xffff);
3745       break;
3746     default:
3747       _mov(T, Src0R);
3748       break;
3749     }
3750     _mov(Dest, T);
3751     break;
3752   }
3753   case InstCast::Fptrunc: {
3754     assert(Dest->getType() == IceType_f32);
3755     assert(Src0->getType() == IceType_f64);
3756     auto *DestR = legalizeToReg(Dest);
3757     auto *Src0R = legalizeToReg(Src0);
3758     _cvt_s_d(DestR, Src0R);
3759     _mov(Dest, DestR);
3760     break;
3761   }
3762   case InstCast::Fpext: {
3763     assert(Dest->getType() == IceType_f64);
3764     assert(Src0->getType() == IceType_f32);
3765     auto *DestR = legalizeToReg(Dest);
3766     auto *Src0R = legalizeToReg(Src0);
3767     _cvt_d_s(DestR, Src0R);
3768     _mov(Dest, DestR);
3769     break;
3770   }
3771   case InstCast::Fptosi:
3772   case InstCast::Fptoui: {
3773     if (llvm::isa<Variable64On32>(Dest)) {
3774       llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3775       return;
3776     }
3777     if (DestTy != IceType_i64) {
3778       if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3779         Variable *Src0R = legalizeToReg(Src0);
3780         Variable *FTmp = makeReg(IceType_f32);
3781         _trunc_w_s(FTmp, Src0R);
3782         _mov(Dest, FTmp);
3783         return;
3784       }
3785       if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3786         Variable *Src0R = legalizeToReg(Src0);
3787         Variable *FTmp = makeReg(IceType_f64);
3788         _trunc_w_d(FTmp, Src0R);
3789         _mov(Dest, FTmp);
3790         return;
3791       }
3792     }
3793     llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3794     break;
3795   }
3796   case InstCast::Sitofp:
3797   case InstCast::Uitofp: {
3798     if (llvm::isa<Variable64On32>(Dest)) {
3799       llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3800       return;
3801     }
3802     if (Src0Ty != IceType_i64) {
3803       Variable *Src0R = legalizeToReg(Src0);
3804       auto *T0R = Src0R;
3805       if (Src0Ty != IceType_i32) {
3806         T0R = makeReg(IceType_i32);
3807         if (CastKind == InstCast::Uitofp)
3808           lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3809         else
3810           lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3811       }
3812       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3813         Variable *FTmp1 = makeReg(IceType_f32);
3814         Variable *FTmp2 = makeReg(IceType_f32);
3815         _mtc1(FTmp1, T0R);
3816         _cvt_s_w(FTmp2, FTmp1);
3817         _mov(Dest, FTmp2);
3818         return;
3819       }
3820       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3821         Variable *FTmp1 = makeReg(IceType_f64);
3822         Variable *FTmp2 = makeReg(IceType_f64);
3823         _mtc1(FTmp1, T0R);
3824         _cvt_d_w(FTmp2, FTmp1);
3825         _mov(Dest, FTmp2);
3826         return;
3827       }
3828     }
3829     llvm::report_fatal_error("Source is i64 in i32-to-fp");
3830     break;
3831   }
3832   case InstCast::Bitcast: {
3833     Operand *Src0 = Instr->getSrc(0);
3834     if (DestTy == Src0->getType()) {
3835       auto *Assign = InstAssign::create(Func, Dest, Src0);
3836       lowerAssign(Assign);
3837       return;
3838     }
3839     if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3840       llvm::report_fatal_error(
3841           "Bitcast: vector type should have been prelowered.");
3842       return;
3843     }
3844     switch (DestTy) {
3845     case IceType_NUM:
3846     case IceType_void:
3847       llvm::report_fatal_error("Unexpected bitcast.");
3848     case IceType_i1:
3849       UnimplementedLoweringError(this, Instr);
3850       break;
3851     case IceType_i8:
3852       assert(Src0->getType() == IceType_v8i1);
3853       llvm::report_fatal_error(
3854           "i8 to v8i1 conversion should have been prelowered.");
3855       break;
3856     case IceType_i16:
3857       assert(Src0->getType() == IceType_v16i1);
3858       llvm::report_fatal_error(
3859           "i16 to v16i1 conversion should have been prelowered.");
3860       break;
3861     case IceType_i32:
3862     case IceType_f32: {
3863       Variable *Src0R = legalizeToReg(Src0);
3864       _mov(Dest, Src0R);
3865       break;
3866     }
3867     case IceType_i64: {
3868       assert(Src0->getType() == IceType_f64);
3869       Variable *Src0R = legalizeToReg(Src0);
3870       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3871       T->initHiLo(Func);
3872       T->getHi()->setMustNotHaveReg();
3873       T->getLo()->setMustNotHaveReg();
3874       Context.insert<InstFakeDef>(T->getHi());
3875       Context.insert<InstFakeDef>(T->getLo());
3876       _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3877       _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3878       lowerAssign(InstAssign::create(Func, Dest, T));
3879       break;
3880     }
3881     case IceType_f64: {
3882       assert(Src0->getType() == IceType_i64);
3883       const uint32_t Mask = 0xFFFFFFFF;
3884       if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3885         Variable *RegHi, *RegLo;
3886         const uint64_t Value = C64->getValue();
3887         uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3888         uint64_t Lower32Bits = Value & Mask;
3889         RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3890         RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3891         _mov(Dest, RegHi, RegLo);
3892       } else {
3893         auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3894         auto *RegLo = legalizeToReg(loOperand(Var64On32));
3895         auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3896         _mov(Dest, RegHi, RegLo);
3897       }
3898       break;
3899     }
3900     default:
3901       llvm::report_fatal_error("Unexpected bitcast.");
3902     }
3903     break;
3904   }
3905   }
3906 }
3907 
lowerExtractElement(const InstExtractElement * Instr)3908 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3909   Variable *Dest = Instr->getDest();
3910   const Type DestTy = Dest->getType();
3911   Operand *Src1 = Instr->getSrc(1);
3912   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3913     const uint32_t Index = Imm->getValue();
3914     Variable *TDest = makeReg(DestTy);
3915     Variable *TReg = makeReg(DestTy);
3916     auto *Src0 = legalizeUndef(Instr->getSrc(0));
3917     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3918     // Number of elements in each container
3919     uint32_t ElemPerCont =
3920         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3921     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3922     auto *SrcE = legalizeToReg(Src);
3923     // Position of the element in the container
3924     uint32_t PosInCont = Index % ElemPerCont;
3925     if (ElemPerCont == 1) {
3926       _mov(TDest, SrcE);
3927     } else if (ElemPerCont == 2) {
3928       switch (PosInCont) {
3929       case 0:
3930         _andi(TDest, SrcE, 0xffff);
3931         break;
3932       case 1:
3933         _srl(TDest, SrcE, 16);
3934         break;
3935       default:
3936         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3937         break;
3938       }
3939     } else if (ElemPerCont == 4) {
3940       switch (PosInCont) {
3941       case 0:
3942         _andi(TDest, SrcE, 0xff);
3943         break;
3944       case 1:
3945         _srl(TReg, SrcE, 8);
3946         _andi(TDest, TReg, 0xff);
3947         break;
3948       case 2:
3949         _srl(TReg, SrcE, 16);
3950         _andi(TDest, TReg, 0xff);
3951         break;
3952       case 3:
3953         _srl(TDest, SrcE, 24);
3954         break;
3955       default:
3956         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3957         break;
3958       }
3959     }
3960     if (typeElementType(Src0R->getType()) == IceType_i1) {
3961       Variable *TReg1 = makeReg(DestTy);
3962       _andi(TReg1, TDest, 0x1);
3963       _mov(Dest, TReg1);
3964     } else {
3965       _mov(Dest, TDest);
3966     }
3967     return;
3968   }
3969   llvm::report_fatal_error("ExtractElement requires a constant index");
3970 }
3971 
lowerFcmp(const InstFcmp * Instr)3972 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3973   Variable *Dest = Instr->getDest();
3974   if (isVectorType(Dest->getType())) {
3975     llvm::report_fatal_error("Fcmp: Destination type is vector");
3976     return;
3977   }
3978 
3979   auto *Src0 = Instr->getSrc(0);
3980   auto *Src1 = Instr->getSrc(1);
3981   auto *Zero = getZero();
3982 
3983   InstFcmp::FCond Cond = Instr->getCondition();
3984   auto *DestR = makeReg(IceType_i32);
3985   auto *Src0R = legalizeToReg(Src0);
3986   auto *Src1R = legalizeToReg(Src1);
3987   const Type Src0Ty = Src0->getType();
3988 
3989   Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3990 
3991   switch (Cond) {
3992   default: {
3993     llvm::report_fatal_error("Unhandled fp comparison.");
3994     return;
3995   }
3996   case InstFcmp::False: {
3997     Context.insert<InstFakeUse>(Src0R);
3998     Context.insert<InstFakeUse>(Src1R);
3999     _addiu(DestR, Zero, 0);
4000     _mov(Dest, DestR);
4001     break;
4002   }
4003   case InstFcmp::Oeq: {
4004     if (Src0Ty == IceType_f32) {
4005       _c_eq_s(Src0R, Src1R);
4006     } else {
4007       _c_eq_d(Src0R, Src1R);
4008     }
4009     _addiu(DestR, Zero, 1);
4010     _movf(DestR, Zero, FCC0);
4011     _mov(Dest, DestR);
4012     break;
4013   }
4014   case InstFcmp::Ogt: {
4015     if (Src0Ty == IceType_f32) {
4016       _c_ule_s(Src0R, Src1R);
4017     } else {
4018       _c_ule_d(Src0R, Src1R);
4019     }
4020     _addiu(DestR, Zero, 1);
4021     _movt(DestR, Zero, FCC0);
4022     _mov(Dest, DestR);
4023     break;
4024   }
4025   case InstFcmp::Oge: {
4026     if (Src0Ty == IceType_f32) {
4027       _c_ult_s(Src0R, Src1R);
4028     } else {
4029       _c_ult_d(Src0R, Src1R);
4030     }
4031     _addiu(DestR, Zero, 1);
4032     _movt(DestR, Zero, FCC0);
4033     _mov(Dest, DestR);
4034     break;
4035   }
4036   case InstFcmp::Olt: {
4037     if (Src0Ty == IceType_f32) {
4038       _c_olt_s(Src0R, Src1R);
4039     } else {
4040       _c_olt_d(Src0R, Src1R);
4041     }
4042     _addiu(DestR, Zero, 1);
4043     _movf(DestR, Zero, FCC0);
4044     _mov(Dest, DestR);
4045     break;
4046   }
4047   case InstFcmp::Ole: {
4048     if (Src0Ty == IceType_f32) {
4049       _c_ole_s(Src0R, Src1R);
4050     } else {
4051       _c_ole_d(Src0R, Src1R);
4052     }
4053     _addiu(DestR, Zero, 1);
4054     _movf(DestR, Zero, FCC0);
4055     _mov(Dest, DestR);
4056     break;
4057   }
4058   case InstFcmp::One: {
4059     if (Src0Ty == IceType_f32) {
4060       _c_ueq_s(Src0R, Src1R);
4061     } else {
4062       _c_ueq_d(Src0R, Src1R);
4063     }
4064     _addiu(DestR, Zero, 1);
4065     _movt(DestR, Zero, FCC0);
4066     _mov(Dest, DestR);
4067     break;
4068   }
4069   case InstFcmp::Ord: {
4070     if (Src0Ty == IceType_f32) {
4071       _c_un_s(Src0R, Src1R);
4072     } else {
4073       _c_un_d(Src0R, Src1R);
4074     }
4075     _addiu(DestR, Zero, 1);
4076     _movt(DestR, Zero, FCC0);
4077     _mov(Dest, DestR);
4078     break;
4079   }
4080   case InstFcmp::Ueq: {
4081     if (Src0Ty == IceType_f32) {
4082       _c_ueq_s(Src0R, Src1R);
4083     } else {
4084       _c_ueq_d(Src0R, Src1R);
4085     }
4086     _addiu(DestR, Zero, 1);
4087     _movf(DestR, Zero, FCC0);
4088     _mov(Dest, DestR);
4089     break;
4090   }
4091   case InstFcmp::Ugt: {
4092     if (Src0Ty == IceType_f32) {
4093       _c_ole_s(Src0R, Src1R);
4094     } else {
4095       _c_ole_d(Src0R, Src1R);
4096     }
4097     _addiu(DestR, Zero, 1);
4098     _movt(DestR, Zero, FCC0);
4099     _mov(Dest, DestR);
4100     break;
4101   }
4102   case InstFcmp::Uge: {
4103     if (Src0Ty == IceType_f32) {
4104       _c_olt_s(Src0R, Src1R);
4105     } else {
4106       _c_olt_d(Src0R, Src1R);
4107     }
4108     _addiu(DestR, Zero, 1);
4109     _movt(DestR, Zero, FCC0);
4110     _mov(Dest, DestR);
4111     break;
4112   }
4113   case InstFcmp::Ult: {
4114     if (Src0Ty == IceType_f32) {
4115       _c_ult_s(Src0R, Src1R);
4116     } else {
4117       _c_ult_d(Src0R, Src1R);
4118     }
4119     _addiu(DestR, Zero, 1);
4120     _movf(DestR, Zero, FCC0);
4121     _mov(Dest, DestR);
4122     break;
4123   }
4124   case InstFcmp::Ule: {
4125     if (Src0Ty == IceType_f32) {
4126       _c_ule_s(Src0R, Src1R);
4127     } else {
4128       _c_ule_d(Src0R, Src1R);
4129     }
4130     _addiu(DestR, Zero, 1);
4131     _movf(DestR, Zero, FCC0);
4132     _mov(Dest, DestR);
4133     break;
4134   }
4135   case InstFcmp::Une: {
4136     if (Src0Ty == IceType_f32) {
4137       _c_eq_s(Src0R, Src1R);
4138     } else {
4139       _c_eq_d(Src0R, Src1R);
4140     }
4141     _addiu(DestR, Zero, 1);
4142     _movt(DestR, Zero, FCC0);
4143     _mov(Dest, DestR);
4144     break;
4145   }
4146   case InstFcmp::Uno: {
4147     if (Src0Ty == IceType_f32) {
4148       _c_un_s(Src0R, Src1R);
4149     } else {
4150       _c_un_d(Src0R, Src1R);
4151     }
4152     _addiu(DestR, Zero, 1);
4153     _movf(DestR, Zero, FCC0);
4154     _mov(Dest, DestR);
4155     break;
4156   }
4157   case InstFcmp::True: {
4158     Context.insert<InstFakeUse>(Src0R);
4159     Context.insert<InstFakeUse>(Src1R);
4160     _addiu(DestR, Zero, 1);
4161     _mov(Dest, DestR);
4162     break;
4163   }
4164   }
4165 }
4166 
lower64Icmp(const InstIcmp * Instr)4167 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4168   Operand *Src0 = legalize(Instr->getSrc(0));
4169   Operand *Src1 = legalize(Instr->getSrc(1));
4170   Variable *Dest = Instr->getDest();
4171   InstIcmp::ICond Condition = Instr->getCondition();
4172 
4173   Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4174   Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4175   Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4176   Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4177 
4178   switch (Condition) {
4179   default:
4180     llvm_unreachable("unexpected condition");
4181     return;
4182   case InstIcmp::Eq: {
4183     auto *T1 = I32Reg();
4184     auto *T2 = I32Reg();
4185     auto *T3 = I32Reg();
4186     auto *T4 = I32Reg();
4187     _xor(T1, Src0HiR, Src1HiR);
4188     _xor(T2, Src0LoR, Src1LoR);
4189     _or(T3, T1, T2);
4190     _sltiu(T4, T3, 1);
4191     _mov(Dest, T4);
4192     return;
4193   }
4194   case InstIcmp::Ne: {
4195     auto *T1 = I32Reg();
4196     auto *T2 = I32Reg();
4197     auto *T3 = I32Reg();
4198     auto *T4 = I32Reg();
4199     _xor(T1, Src0HiR, Src1HiR);
4200     _xor(T2, Src0LoR, Src1LoR);
4201     _or(T3, T1, T2);
4202     _sltu(T4, getZero(), T3);
4203     _mov(Dest, T4);
4204     return;
4205   }
4206   case InstIcmp::Sgt: {
4207     auto *T1 = I32Reg();
4208     auto *T2 = I32Reg();
4209     auto *T3 = I32Reg();
4210     _xor(T1, Src0HiR, Src1HiR);
4211     _slt(T2, Src1HiR, Src0HiR);
4212     _sltu(T3, Src1LoR, Src0LoR);
4213     _movz(T2, T3, T1);
4214     _mov(Dest, T2);
4215     return;
4216   }
4217   case InstIcmp::Ugt: {
4218     auto *T1 = I32Reg();
4219     auto *T2 = I32Reg();
4220     auto *T3 = I32Reg();
4221     _xor(T1, Src0HiR, Src1HiR);
4222     _sltu(T2, Src1HiR, Src0HiR);
4223     _sltu(T3, Src1LoR, Src0LoR);
4224     _movz(T2, T3, T1);
4225     _mov(Dest, T2);
4226     return;
4227   }
4228   case InstIcmp::Sge: {
4229     auto *T1 = I32Reg();
4230     auto *T2 = I32Reg();
4231     auto *T3 = I32Reg();
4232     auto *T4 = I32Reg();
4233     auto *T5 = I32Reg();
4234     _xor(T1, Src0HiR, Src1HiR);
4235     _slt(T2, Src0HiR, Src1HiR);
4236     _xori(T3, T2, 1);
4237     _sltu(T4, Src0LoR, Src1LoR);
4238     _xori(T5, T4, 1);
4239     _movz(T3, T5, T1);
4240     _mov(Dest, T3);
4241     return;
4242   }
4243   case InstIcmp::Uge: {
4244     auto *T1 = I32Reg();
4245     auto *T2 = I32Reg();
4246     auto *T3 = I32Reg();
4247     auto *T4 = I32Reg();
4248     auto *T5 = I32Reg();
4249     _xor(T1, Src0HiR, Src1HiR);
4250     _sltu(T2, Src0HiR, Src1HiR);
4251     _xori(T3, T2, 1);
4252     _sltu(T4, Src0LoR, Src1LoR);
4253     _xori(T5, T4, 1);
4254     _movz(T3, T5, T1);
4255     _mov(Dest, T3);
4256     return;
4257   }
4258   case InstIcmp::Slt: {
4259     auto *T1 = I32Reg();
4260     auto *T2 = I32Reg();
4261     auto *T3 = I32Reg();
4262     _xor(T1, Src0HiR, Src1HiR);
4263     _slt(T2, Src0HiR, Src1HiR);
4264     _sltu(T3, Src0LoR, Src1LoR);
4265     _movz(T2, T3, T1);
4266     _mov(Dest, T2);
4267     return;
4268   }
4269   case InstIcmp::Ult: {
4270     auto *T1 = I32Reg();
4271     auto *T2 = I32Reg();
4272     auto *T3 = I32Reg();
4273     _xor(T1, Src0HiR, Src1HiR);
4274     _sltu(T2, Src0HiR, Src1HiR);
4275     _sltu(T3, Src0LoR, Src1LoR);
4276     _movz(T2, T3, T1);
4277     _mov(Dest, T2);
4278     return;
4279   }
4280   case InstIcmp::Sle: {
4281     auto *T1 = I32Reg();
4282     auto *T2 = I32Reg();
4283     auto *T3 = I32Reg();
4284     auto *T4 = I32Reg();
4285     auto *T5 = I32Reg();
4286     _xor(T1, Src0HiR, Src1HiR);
4287     _slt(T2, Src1HiR, Src0HiR);
4288     _xori(T3, T2, 1);
4289     _sltu(T4, Src1LoR, Src0LoR);
4290     _xori(T5, T4, 1);
4291     _movz(T3, T5, T1);
4292     _mov(Dest, T3);
4293     return;
4294   }
4295   case InstIcmp::Ule: {
4296     auto *T1 = I32Reg();
4297     auto *T2 = I32Reg();
4298     auto *T3 = I32Reg();
4299     auto *T4 = I32Reg();
4300     auto *T5 = I32Reg();
4301     _xor(T1, Src0HiR, Src1HiR);
4302     _sltu(T2, Src1HiR, Src0HiR);
4303     _xori(T3, T2, 1);
4304     _sltu(T4, Src1LoR, Src0LoR);
4305     _xori(T5, T4, 1);
4306     _movz(T3, T5, T1);
4307     _mov(Dest, T3);
4308     return;
4309   }
4310   }
4311 }
4312 
lowerIcmp(const InstIcmp * Instr)4313 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4314   auto *Src0 = Instr->getSrc(0);
4315   auto *Src1 = Instr->getSrc(1);
4316   if (Src0->getType() == IceType_i64) {
4317     lower64Icmp(Instr);
4318     return;
4319   }
4320   Variable *Dest = Instr->getDest();
4321   if (isVectorType(Dest->getType())) {
4322     llvm::report_fatal_error("Icmp: Destination type is vector");
4323     return;
4324   }
4325   InstIcmp::ICond Cond = Instr->getCondition();
4326   auto *Src0R = legalizeToReg(Src0);
4327   auto *Src1R = legalizeToReg(Src1);
4328   const Type Src0Ty = Src0R->getType();
4329   const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4330   Variable *Src0RT = I32Reg();
4331   Variable *Src1RT = I32Reg();
4332 
4333   if (Src0Ty != IceType_i32) {
4334     _sll(Src0RT, Src0R, ShAmt);
4335     _sll(Src1RT, Src1R, ShAmt);
4336   } else {
4337     _mov(Src0RT, Src0R);
4338     _mov(Src1RT, Src1R);
4339   }
4340 
4341   switch (Cond) {
4342   case InstIcmp::Eq: {
4343     auto *DestT = I32Reg();
4344     auto *T = I32Reg();
4345     _xor(T, Src0RT, Src1RT);
4346     _sltiu(DestT, T, 1);
4347     _mov(Dest, DestT);
4348     return;
4349   }
4350   case InstIcmp::Ne: {
4351     auto *DestT = I32Reg();
4352     auto *T = I32Reg();
4353     auto *Zero = getZero();
4354     _xor(T, Src0RT, Src1RT);
4355     _sltu(DestT, Zero, T);
4356     _mov(Dest, DestT);
4357     return;
4358   }
4359   case InstIcmp::Ugt: {
4360     auto *DestT = I32Reg();
4361     _sltu(DestT, Src1RT, Src0RT);
4362     _mov(Dest, DestT);
4363     return;
4364   }
4365   case InstIcmp::Uge: {
4366     auto *DestT = I32Reg();
4367     auto *T = I32Reg();
4368     _sltu(T, Src0RT, Src1RT);
4369     _xori(DestT, T, 1);
4370     _mov(Dest, DestT);
4371     return;
4372   }
4373   case InstIcmp::Ult: {
4374     auto *DestT = I32Reg();
4375     _sltu(DestT, Src0RT, Src1RT);
4376     _mov(Dest, DestT);
4377     return;
4378   }
4379   case InstIcmp::Ule: {
4380     auto *DestT = I32Reg();
4381     auto *T = I32Reg();
4382     _sltu(T, Src1RT, Src0RT);
4383     _xori(DestT, T, 1);
4384     _mov(Dest, DestT);
4385     return;
4386   }
4387   case InstIcmp::Sgt: {
4388     auto *DestT = I32Reg();
4389     _slt(DestT, Src1RT, Src0RT);
4390     _mov(Dest, DestT);
4391     return;
4392   }
4393   case InstIcmp::Sge: {
4394     auto *DestT = I32Reg();
4395     auto *T = I32Reg();
4396     _slt(T, Src0RT, Src1RT);
4397     _xori(DestT, T, 1);
4398     _mov(Dest, DestT);
4399     return;
4400   }
4401   case InstIcmp::Slt: {
4402     auto *DestT = I32Reg();
4403     _slt(DestT, Src0RT, Src1RT);
4404     _mov(Dest, DestT);
4405     return;
4406   }
4407   case InstIcmp::Sle: {
4408     auto *DestT = I32Reg();
4409     auto *T = I32Reg();
4410     _slt(T, Src1RT, Src0RT);
4411     _xori(DestT, T, 1);
4412     _mov(Dest, DestT);
4413     return;
4414   }
4415   default:
4416     llvm_unreachable("Invalid ICmp operator");
4417     return;
4418   }
4419 }
4420 
lowerInsertElement(const InstInsertElement * Instr)4421 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4422   Variable *Dest = Instr->getDest();
4423   const Type DestTy = Dest->getType();
4424   Operand *Src2 = Instr->getSrc(2);
4425   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4426     const uint32_t Index = Imm->getValue();
4427     // Vector to insert in
4428     auto *Src0 = legalizeUndef(Instr->getSrc(0));
4429     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4430     // Number of elements in each container
4431     uint32_t ElemPerCont =
4432         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4433     // Source Element
4434     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4435     auto *SrcE = Src;
4436     if (ElemPerCont > 1)
4437       SrcE = legalizeToReg(Src);
4438     // Dest is a vector
4439     auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4440     VDest->initVecElement(Func);
4441     // Temp vector variable
4442     auto *TDest = makeReg(DestTy);
4443     auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4444     TVDest->initVecElement(Func);
4445     // Destination element
4446     auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4447     // Element to insert
4448     auto *Src1R = legalizeToReg(Instr->getSrc(1));
4449     auto *TReg1 = makeReg(IceType_i32);
4450     auto *TReg2 = makeReg(IceType_i32);
4451     auto *TReg3 = makeReg(IceType_i32);
4452     auto *TReg4 = makeReg(IceType_i32);
4453     auto *TReg5 = makeReg(IceType_i32);
4454     auto *TDReg = makeReg(IceType_i32);
4455     // Position of the element in the container
4456     uint32_t PosInCont = Index % ElemPerCont;
4457     // Load source vector in a temporary vector
4458     for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4459       auto *DCont = TVDest->getContainers()[i];
4460       // Do not define DstE as we are going to redefine it
4461       if (DCont == DstE)
4462         continue;
4463       auto *SCont = Src0R->getContainers()[i];
4464       auto *TReg = makeReg(IceType_i32);
4465       _mov(TReg, SCont);
4466       _mov(DCont, TReg);
4467     }
4468     // Insert the element
4469     if (ElemPerCont == 1) {
4470       _mov(DstE, Src1R);
4471     } else if (ElemPerCont == 2) {
4472       switch (PosInCont) {
4473       case 0:
4474         _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4475         _srl(TReg2, SrcE, 16);
4476         _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4477         _or(TDReg, TReg1, TReg3);
4478         _mov(DstE, TDReg);
4479         break;
4480       case 1:
4481         _sll(TReg1, Src1R, 16); // Clear lower 16-bits  of source
4482         _sll(TReg2, SrcE, 16);
4483         _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4484         _or(TDReg, TReg1, TReg3);
4485         _mov(DstE, TDReg);
4486         break;
4487       default:
4488         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4489         break;
4490       }
4491     } else if (ElemPerCont == 4) {
4492       switch (PosInCont) {
4493       case 0:
4494         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4495         _srl(TReg2, SrcE, 8);
4496         _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4497         _or(TDReg, TReg1, TReg3);
4498         _mov(DstE, TDReg);
4499         break;
4500       case 1:
4501         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4502         _sll(TReg5, TReg1, 8);     // Position in the destination
4503         _lui(TReg2, Ctx->getConstantInt32(0xffff));
4504         _ori(TReg3, TReg2, 0x00ff);
4505         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4506         _or(TDReg, TReg5, TReg4);
4507         _mov(DstE, TDReg);
4508         break;
4509       case 2:
4510         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4511         _sll(TReg5, TReg1, 16);    // Position in the destination
4512         _lui(TReg2, Ctx->getConstantInt32(0xff00));
4513         _ori(TReg3, TReg2, 0xffff);
4514         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4515         _or(TDReg, TReg5, TReg4);
4516         _mov(DstE, TDReg);
4517         break;
4518       case 3:
4519         _sll(TReg1, Src1R, 24); // Position in the destination
4520         _sll(TReg2, SrcE, 8);
4521         _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4522         _or(TDReg, TReg1, TReg3);
4523         _mov(DstE, TDReg);
4524         break;
4525       default:
4526         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4527         break;
4528       }
4529     }
4530     // Write back temporary vector to the destination
4531     auto *Assign = InstAssign::create(Func, Dest, TDest);
4532     lowerAssign(Assign);
4533     return;
4534   }
4535   llvm::report_fatal_error("InsertElement requires a constant index");
4536 }
4537 
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4538 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4539                                    Variable *Dest, Variable *Src0,
4540                                    Variable *Src1) {
4541   switch (Operation) {
4542   default:
4543     llvm::report_fatal_error("Unknown AtomicRMW operation");
4544   case Intrinsics::AtomicExchange:
4545     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4546   case Intrinsics::AtomicAdd:
4547     _addu(Dest, Src0, Src1);
4548     break;
4549   case Intrinsics::AtomicAnd:
4550     _and(Dest, Src0, Src1);
4551     break;
4552   case Intrinsics::AtomicSub:
4553     _subu(Dest, Src0, Src1);
4554     break;
4555   case Intrinsics::AtomicOr:
4556     _or(Dest, Src0, Src1);
4557     break;
4558   case Intrinsics::AtomicXor:
4559     _xor(Dest, Src0, Src1);
4560     break;
4561   }
4562 }
4563 
lowerIntrinsicCall(const InstIntrinsicCall * Instr)4564 void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
4565   Variable *Dest = Instr->getDest();
4566   Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4567 
4568   Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
4569   switch (ID) {
4570   case Intrinsics::AtomicLoad: {
4571     assert(isScalarIntegerType(DestTy));
4572     // We require the memory address to be naturally aligned. Given that is the
4573     // case, then normal loads are atomic.
4574     if (!Intrinsics::isMemoryOrderValid(
4575             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4576       Func->setError("Unexpected memory ordering for AtomicLoad");
4577       return;
4578     }
4579     if (DestTy == IceType_i64) {
4580       llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4581       return;
4582     } else if (DestTy == IceType_i32) {
4583       auto *T1 = makeReg(DestTy);
4584       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4585       auto *Base = legalizeToReg(Instr->getArg(0));
4586       auto *Addr = formMemoryOperand(Base, DestTy);
4587       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4588       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4589       constexpr CfgNode *NoTarget = nullptr;
4590       _sync();
4591       Context.insert(Retry);
4592       Sandboxer(this).ll(T1, Addr);
4593       _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4594       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4595       Sandboxer(this).sc(RegAt, Addr);
4596       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4597       Context.insert(Exit);
4598       _sync();
4599       _mov(Dest, T1);
4600       Context.insert<InstFakeUse>(T1);
4601     } else {
4602       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4603       auto *Base = legalizeToReg(Instr->getArg(0));
4604       auto *T1 = makeReg(IceType_i32);
4605       auto *T2 = makeReg(IceType_i32);
4606       auto *T3 = makeReg(IceType_i32);
4607       auto *T4 = makeReg(IceType_i32);
4608       auto *T5 = makeReg(IceType_i32);
4609       auto *T6 = makeReg(IceType_i32);
4610       auto *SrcMask = makeReg(IceType_i32);
4611       auto *Tdest = makeReg(IceType_i32);
4612       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4613       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4614       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4615       constexpr CfgNode *NoTarget = nullptr;
4616       _sync();
4617       _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4618       _andi(T2, Base, 3);        // Last two bits of the address
4619       _and(T3, Base, T1);        // Align the address
4620       _sll(T4, T2, 3);
4621       _ori(T5, getZero(), Mask);
4622       _sllv(SrcMask, T5, T4); // Source mask
4623       auto *Addr = formMemoryOperand(T3, IceType_i32);
4624       Context.insert(Retry);
4625       Sandboxer(this).ll(T6, Addr);
4626       _and(Tdest, T6, SrcMask);
4627       _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4628       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4629       Sandboxer(this).sc(RegAt, Addr);
4630       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4631       Context.insert(Exit);
4632       auto *T7 = makeReg(IceType_i32);
4633       auto *T8 = makeReg(IceType_i32);
4634       _srlv(T7, Tdest, T4);
4635       _andi(T8, T7, Mask);
4636       _sync();
4637       _mov(Dest, T8);
4638       Context.insert<InstFakeUse>(T6);
4639       Context.insert<InstFakeUse>(SrcMask);
4640     }
4641     return;
4642   }
4643   case Intrinsics::AtomicStore: {
4644     // We require the memory address to be naturally aligned. Given that is the
4645     // case, then normal stores are atomic.
4646     if (!Intrinsics::isMemoryOrderValid(
4647             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4648       Func->setError("Unexpected memory ordering for AtomicStore");
4649       return;
4650     }
4651     auto *Val = Instr->getArg(0);
4652     auto Ty = Val->getType();
4653     if (Ty == IceType_i64) {
4654       llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4655       return;
4656     } else if (Ty == IceType_i32) {
4657       auto *Val = legalizeToReg(Instr->getArg(0));
4658       auto *Base = legalizeToReg(Instr->getArg(1));
4659       auto *Addr = formMemoryOperand(Base, Ty);
4660       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4661       constexpr CfgNode *NoTarget = nullptr;
4662       auto *T1 = makeReg(IceType_i32);
4663       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4664       _sync();
4665       Context.insert(Retry);
4666       Sandboxer(this).ll(T1, Addr);
4667       _mov(RegAt, Val);
4668       Sandboxer(this).sc(RegAt, Addr);
4669       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4670       Context.insert<InstFakeUse>(T1); // To keep LL alive
4671       _sync();
4672     } else {
4673       auto *Val = legalizeToReg(Instr->getArg(0));
4674       auto *Base = legalizeToReg(Instr->getArg(1));
4675       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4676       constexpr CfgNode *NoTarget = nullptr;
4677       auto *T1 = makeReg(IceType_i32);
4678       auto *T2 = makeReg(IceType_i32);
4679       auto *T3 = makeReg(IceType_i32);
4680       auto *T4 = makeReg(IceType_i32);
4681       auto *T5 = makeReg(IceType_i32);
4682       auto *T6 = makeReg(IceType_i32);
4683       auto *T7 = makeReg(IceType_i32);
4684       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4685       auto *SrcMask = makeReg(IceType_i32);
4686       auto *DstMask = makeReg(IceType_i32);
4687       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4688       _sync();
4689       _addiu(T1, getZero(), -4);
4690       _and(T7, Base, T1);
4691       auto *Addr = formMemoryOperand(T7, Ty);
4692       _andi(T2, Base, 3);
4693       _sll(T3, T2, 3);
4694       _ori(T4, getZero(), Mask);
4695       _sllv(T5, T4, T3);
4696       _sllv(T6, Val, T3);
4697       _nor(SrcMask, getZero(), T5);
4698       _and(DstMask, T6, T5);
4699       Context.insert(Retry);
4700       Sandboxer(this).ll(RegAt, Addr);
4701       _and(RegAt, RegAt, SrcMask);
4702       _or(RegAt, RegAt, DstMask);
4703       Sandboxer(this).sc(RegAt, Addr);
4704       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4705       Context.insert<InstFakeUse>(SrcMask);
4706       Context.insert<InstFakeUse>(DstMask);
4707       _sync();
4708     }
4709     return;
4710   }
4711   case Intrinsics::AtomicCmpxchg: {
4712     assert(isScalarIntegerType(DestTy));
4713     // We require the memory address to be naturally aligned. Given that is the
4714     // case, then normal loads are atomic.
4715     if (!Intrinsics::isMemoryOrderValid(
4716             ID, getConstantMemoryOrder(Instr->getArg(3)),
4717             getConstantMemoryOrder(Instr->getArg(4)))) {
4718       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4719       return;
4720     }
4721 
4722     InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4723     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4724     constexpr CfgNode *NoTarget = nullptr;
4725     auto *New = Instr->getArg(2);
4726     auto *Expected = Instr->getArg(1);
4727     auto *ActualAddress = Instr->getArg(0);
4728 
4729     if (DestTy == IceType_i64) {
4730       llvm::report_fatal_error(
4731           "AtomicCmpxchg.i64 should have been prelowered.");
4732       return;
4733     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4734       auto *NewR = legalizeToReg(New);
4735       auto *ExpectedR = legalizeToReg(Expected);
4736       auto *ActualAddressR = legalizeToReg(ActualAddress);
4737       const uint32_t ShiftAmount =
4738           (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4739       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4740       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4741       auto *T1 = I32Reg();
4742       auto *T2 = I32Reg();
4743       auto *T3 = I32Reg();
4744       auto *T4 = I32Reg();
4745       auto *T5 = I32Reg();
4746       auto *T6 = I32Reg();
4747       auto *T7 = I32Reg();
4748       auto *T8 = I32Reg();
4749       auto *T9 = I32Reg();
4750       _addiu(RegAt, getZero(), -4);
4751       _and(T1, ActualAddressR, RegAt);
4752       auto *Addr = formMemoryOperand(T1, DestTy);
4753       _andi(RegAt, ActualAddressR, 3);
4754       _sll(T2, RegAt, 3);
4755       _ori(RegAt, getZero(), Mask);
4756       _sllv(T3, RegAt, T2);
4757       _nor(T4, getZero(), T3);
4758       _andi(RegAt, ExpectedR, Mask);
4759       _sllv(T5, RegAt, T2);
4760       _andi(RegAt, NewR, Mask);
4761       _sllv(T6, RegAt, T2);
4762       _sync();
4763       Context.insert(Retry);
4764       Sandboxer(this).ll(T7, Addr);
4765       _and(T8, T7, T3);
4766       _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4767       _and(RegAt, T7, T4);
4768       _or(T9, RegAt, T6);
4769       Sandboxer(this).sc(T9, Addr);
4770       _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4771       Context.insert<InstFakeUse>(getZero());
4772       Context.insert(Exit);
4773       _srlv(RegAt, T8, T2);
4774       _sll(RegAt, RegAt, ShiftAmount);
4775       _sra(RegAt, RegAt, ShiftAmount);
4776       _mov(Dest, RegAt);
4777       _sync();
4778       Context.insert<InstFakeUse>(T3);
4779       Context.insert<InstFakeUse>(T4);
4780       Context.insert<InstFakeUse>(T5);
4781       Context.insert<InstFakeUse>(T6);
4782       Context.insert<InstFakeUse>(T8);
4783       Context.insert<InstFakeUse>(ExpectedR);
4784       Context.insert<InstFakeUse>(NewR);
4785     } else {
4786       auto *T1 = I32Reg();
4787       auto *T2 = I32Reg();
4788       auto *NewR = legalizeToReg(New);
4789       auto *ExpectedR = legalizeToReg(Expected);
4790       auto *ActualAddressR = legalizeToReg(ActualAddress);
4791       _sync();
4792       Context.insert(Retry);
4793       Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4794       _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4795       _mov(T2, NewR);
4796       Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4797       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4798       Context.insert<InstFakeUse>(getZero());
4799       Context.insert(Exit);
4800       _mov(Dest, T1);
4801       _sync();
4802       Context.insert<InstFakeUse>(ExpectedR);
4803       Context.insert<InstFakeUse>(NewR);
4804     }
4805     return;
4806   }
4807   case Intrinsics::AtomicRMW: {
4808     assert(isScalarIntegerType(DestTy));
4809     // We require the memory address to be naturally aligned. Given that is the
4810     // case, then normal loads are atomic.
4811     if (!Intrinsics::isMemoryOrderValid(
4812             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4813       Func->setError("Unexpected memory ordering for AtomicRMW");
4814       return;
4815     }
4816 
4817     constexpr CfgNode *NoTarget = nullptr;
4818     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4819     auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4820         llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4821     auto *New = Instr->getArg(2);
4822     auto *ActualAddress = Instr->getArg(1);
4823 
4824     if (DestTy == IceType_i64) {
4825       llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4826       return;
4827     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4828       const uint32_t ShiftAmount =
4829           INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4830       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4831       auto *NewR = legalizeToReg(New);
4832       auto *ActualAddressR = legalizeToReg(ActualAddress);
4833       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4834       auto *T1 = I32Reg();
4835       auto *T2 = I32Reg();
4836       auto *T3 = I32Reg();
4837       auto *T4 = I32Reg();
4838       auto *T5 = I32Reg();
4839       auto *T6 = I32Reg();
4840       auto *T7 = I32Reg();
4841       _sync();
4842       _addiu(RegAt, getZero(), -4);
4843       _and(T1, ActualAddressR, RegAt);
4844       _andi(RegAt, ActualAddressR, 3);
4845       _sll(T2, RegAt, 3);
4846       _ori(RegAt, getZero(), Mask);
4847       _sllv(T3, RegAt, T2);
4848       _nor(T4, getZero(), T3);
4849       _sllv(T5, NewR, T2);
4850       Context.insert(Retry);
4851       Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
4852       if (Operation != Intrinsics::AtomicExchange) {
4853         createArithInst(Operation, RegAt, T6, T5);
4854         _and(RegAt, RegAt, T3);
4855       }
4856       _and(T7, T6, T4);
4857       if (Operation == Intrinsics::AtomicExchange) {
4858         _or(RegAt, T7, T5);
4859       } else {
4860         _or(RegAt, T7, RegAt);
4861       }
4862       Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
4863       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4864       Context.insert<InstFakeUse>(getZero());
4865       _and(RegAt, T6, T3);
4866       _srlv(RegAt, RegAt, T2);
4867       _sll(RegAt, RegAt, ShiftAmount);
4868       _sra(RegAt, RegAt, ShiftAmount);
4869       _mov(Dest, RegAt);
4870       _sync();
4871       Context.insert<InstFakeUse>(NewR);
4872       Context.insert<InstFakeUse>(Dest);
4873     } else {
4874       auto *T1 = I32Reg();
4875       auto *T2 = I32Reg();
4876       auto *NewR = legalizeToReg(New);
4877       auto *ActualAddressR = legalizeToReg(ActualAddress);
4878       _sync();
4879       Context.insert(Retry);
4880       Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4881       if (Operation == Intrinsics::AtomicExchange) {
4882         _mov(T2, NewR);
4883       } else {
4884         createArithInst(Operation, T2, T1, NewR);
4885       }
4886       Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4887       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4888       Context.insert<InstFakeUse>(getZero());
4889       _mov(Dest, T1);
4890       _sync();
4891       Context.insert<InstFakeUse>(NewR);
4892       Context.insert<InstFakeUse>(Dest);
4893     }
4894     return;
4895   }
4896   case Intrinsics::AtomicFence:
4897   case Intrinsics::AtomicFenceAll:
4898     assert(Dest == nullptr);
4899     _sync();
4900     return;
4901   case Intrinsics::AtomicIsLockFree: {
4902     Operand *ByteSize = Instr->getArg(0);
4903     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4904     auto *T = I32Reg();
4905     if (CI == nullptr) {
4906       // The PNaCl ABI requires the byte size to be a compile-time constant.
4907       Func->setError("AtomicIsLockFree byte size should be compile-time const");
4908       return;
4909     }
4910     static constexpr int32_t NotLockFree = 0;
4911     static constexpr int32_t LockFree = 1;
4912     int32_t Result = NotLockFree;
4913     switch (CI->getValue()) {
4914     case 1:
4915     case 2:
4916     case 4:
4917       Result = LockFree;
4918       break;
4919     }
4920     _addiu(T, getZero(), Result);
4921     _mov(Dest, T);
4922     return;
4923   }
4924   case Intrinsics::Bswap: {
4925     auto *Src = Instr->getArg(0);
4926     const Type SrcTy = Src->getType();
4927     assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4928            SrcTy == IceType_i64);
4929     switch (SrcTy) {
4930     case IceType_i16: {
4931       auto *T1 = I32Reg();
4932       auto *T2 = I32Reg();
4933       auto *T3 = I32Reg();
4934       auto *T4 = I32Reg();
4935       auto *SrcR = legalizeToReg(Src);
4936       _sll(T1, SrcR, 8);
4937       _lui(T2, Ctx->getConstantInt32(255));
4938       _and(T1, T1, T2);
4939       _sll(T3, SrcR, 24);
4940       _or(T1, T3, T1);
4941       _srl(T4, T1, 16);
4942       _mov(Dest, T4);
4943       return;
4944     }
4945     case IceType_i32: {
4946       auto *T1 = I32Reg();
4947       auto *T2 = I32Reg();
4948       auto *T3 = I32Reg();
4949       auto *T4 = I32Reg();
4950       auto *T5 = I32Reg();
4951       auto *SrcR = legalizeToReg(Src);
4952       _srl(T1, SrcR, 24);
4953       _srl(T2, SrcR, 8);
4954       _andi(T2, T2, 0xFF00);
4955       _or(T1, T2, T1);
4956       _sll(T4, SrcR, 8);
4957       _lui(T3, Ctx->getConstantInt32(255));
4958       _and(T4, T4, T3);
4959       _sll(T5, SrcR, 24);
4960       _or(T4, T5, T4);
4961       _or(T4, T4, T1);
4962       _mov(Dest, T4);
4963       return;
4964     }
4965     case IceType_i64: {
4966       auto *T1 = I32Reg();
4967       auto *T2 = I32Reg();
4968       auto *T3 = I32Reg();
4969       auto *T4 = I32Reg();
4970       auto *T5 = I32Reg();
4971       auto *T6 = I32Reg();
4972       auto *T7 = I32Reg();
4973       auto *T8 = I32Reg();
4974       auto *T9 = I32Reg();
4975       auto *T10 = I32Reg();
4976       auto *T11 = I32Reg();
4977       auto *T12 = I32Reg();
4978       auto *T13 = I32Reg();
4979       auto *T14 = I32Reg();
4980       auto *T15 = I32Reg();
4981       auto *T16 = I32Reg();
4982       auto *T17 = I32Reg();
4983       auto *T18 = I32Reg();
4984       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4985       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4986       Src = legalizeUndef(Src);
4987       auto *SrcLoR = legalizeToReg(loOperand(Src));
4988       auto *SrcHiR = legalizeToReg(hiOperand(Src));
4989       _sll(T1, SrcHiR, 8);
4990       _srl(T2, SrcHiR, 24);
4991       _srl(T3, SrcHiR, 8);
4992       _andi(T3, T3, 0xFF00);
4993       _lui(T4, Ctx->getConstantInt32(255));
4994       _or(T5, T3, T2);
4995       _and(T6, T1, T4);
4996       _sll(T7, SrcHiR, 24);
4997       _or(T8, T7, T6);
4998       _srl(T9, SrcLoR, 24);
4999       _srl(T10, SrcLoR, 8);
5000       _andi(T11, T10, 0xFF00);
5001       _or(T12, T8, T5);
5002       _or(T13, T11, T9);
5003       _sll(T14, SrcLoR, 8);
5004       _and(T15, T14, T4);
5005       _sll(T16, SrcLoR, 24);
5006       _or(T17, T16, T15);
5007       _or(T18, T17, T13);
5008       _mov(DestLo, T12);
5009       _mov(DestHi, T18);
5010       return;
5011     }
5012     default:
5013       llvm::report_fatal_error("Control flow should never have reached here.");
5014     }
5015     return;
5016   }
5017   case Intrinsics::Ctpop: {
5018     llvm::report_fatal_error("Ctpop should have been prelowered.");
5019     return;
5020   }
5021   case Intrinsics::Ctlz: {
5022     auto *Src = Instr->getArg(0);
5023     const Type SrcTy = Src->getType();
5024     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5025     switch (SrcTy) {
5026     case IceType_i32: {
5027       auto *T = I32Reg();
5028       auto *SrcR = legalizeToReg(Src);
5029       _clz(T, SrcR);
5030       _mov(Dest, T);
5031       break;
5032     }
5033     case IceType_i64: {
5034       auto *T1 = I32Reg();
5035       auto *T2 = I32Reg();
5036       auto *T3 = I32Reg();
5037       auto *T4 = I32Reg();
5038       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5039       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5040       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5041       Variable *SrcLoR = legalizeToReg(loOperand(Src));
5042       _clz(T1, SrcHiR);
5043       _clz(T2, SrcLoR);
5044       _addiu(T3, T2, 32);
5045       _movn(T3, T1, SrcHiR);
5046       _addiu(T4, getZero(), 0);
5047       _mov(DestHi, T4);
5048       _mov(DestLo, T3);
5049       break;
5050     }
5051     default:
5052       llvm::report_fatal_error("Control flow should never have reached here.");
5053     }
5054     break;
5055   }
5056   case Intrinsics::Cttz: {
5057     auto *Src = Instr->getArg(0);
5058     const Type SrcTy = Src->getType();
5059     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5060     switch (SrcTy) {
5061     case IceType_i32: {
5062       auto *T1 = I32Reg();
5063       auto *T2 = I32Reg();
5064       auto *T3 = I32Reg();
5065       auto *T4 = I32Reg();
5066       auto *T5 = I32Reg();
5067       auto *T6 = I32Reg();
5068       auto *SrcR = legalizeToReg(Src);
5069       _addiu(T1, SrcR, -1);
5070       _not(T2, SrcR);
5071       _and(T3, T2, T1);
5072       _clz(T4, T3);
5073       _addiu(T5, getZero(), 32);
5074       _subu(T6, T5, T4);
5075       _mov(Dest, T6);
5076       break;
5077     }
5078     case IceType_i64: {
5079       auto *THi1 = I32Reg();
5080       auto *THi2 = I32Reg();
5081       auto *THi3 = I32Reg();
5082       auto *THi4 = I32Reg();
5083       auto *THi5 = I32Reg();
5084       auto *THi6 = I32Reg();
5085       auto *TLo1 = I32Reg();
5086       auto *TLo2 = I32Reg();
5087       auto *TLo3 = I32Reg();
5088       auto *TLo4 = I32Reg();
5089       auto *TLo5 = I32Reg();
5090       auto *TLo6 = I32Reg();
5091       auto *TResHi = I32Reg();
5092       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5093       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5094       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5095       Variable *SrcLoR = legalizeToReg(loOperand(Src));
5096       _addiu(THi1, SrcHiR, -1);
5097       _not(THi2, SrcHiR);
5098       _and(THi3, THi2, THi1);
5099       _clz(THi4, THi3);
5100       _addiu(THi5, getZero(), 64);
5101       _subu(THi6, THi5, THi4);
5102       _addiu(TLo1, SrcLoR, -1);
5103       _not(TLo2, SrcLoR);
5104       _and(TLo3, TLo2, TLo1);
5105       _clz(TLo4, TLo3);
5106       _addiu(TLo5, getZero(), 32);
5107       _subu(TLo6, TLo5, TLo4);
5108       _movn(THi6, TLo6, SrcLoR);
5109       _addiu(TResHi, getZero(), 0);
5110       _mov(DestHi, TResHi);
5111       _mov(DestLo, THi6);
5112       break;
5113     }
5114     default:
5115       llvm::report_fatal_error("Control flow should never have reached here.");
5116     }
5117     return;
5118   }
5119   case Intrinsics::Fabs: {
5120     if (isScalarFloatingType(DestTy)) {
5121       Variable *T = makeReg(DestTy);
5122       if (DestTy == IceType_f32) {
5123         _abs_s(T, legalizeToReg(Instr->getArg(0)));
5124       } else {
5125         _abs_d(T, legalizeToReg(Instr->getArg(0)));
5126       }
5127       _mov(Dest, T);
5128     }
5129     return;
5130   }
5131   case Intrinsics::Longjmp: {
5132     llvm::report_fatal_error("longjmp should have been prelowered.");
5133     return;
5134   }
5135   case Intrinsics::Memcpy: {
5136     llvm::report_fatal_error("memcpy should have been prelowered.");
5137     return;
5138   }
5139   case Intrinsics::Memmove: {
5140     llvm::report_fatal_error("memmove should have been prelowered.");
5141     return;
5142   }
5143   case Intrinsics::Memset: {
5144     llvm::report_fatal_error("memset should have been prelowered.");
5145     return;
5146   }
5147   case Intrinsics::NaClReadTP: {
5148     if (SandboxingType != ST_NaCl)
5149       llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5150     else {
5151       auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
5152       Context.insert<InstFakeDef>(T8);
5153       Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
5154           Func, getPointerType(), T8,
5155           llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5156       _mov(Dest, TP);
5157     }
5158     return;
5159   }
5160   case Intrinsics::Setjmp: {
5161     llvm::report_fatal_error("setjmp should have been prelowered.");
5162     return;
5163   }
5164   case Intrinsics::Sqrt: {
5165     if (isScalarFloatingType(DestTy)) {
5166       Variable *T = makeReg(DestTy);
5167       if (DestTy == IceType_f32) {
5168         _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5169       } else {
5170         _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5171       }
5172       _mov(Dest, T);
5173     } else {
5174       assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5175       UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5176     }
5177     return;
5178   }
5179   case Intrinsics::Stacksave: {
5180     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5181     _mov(Dest, SP);
5182     return;
5183   }
5184   case Intrinsics::Stackrestore: {
5185     Variable *Val = legalizeToReg(Instr->getArg(0));
5186     Sandboxer(this).reset_sp(Val);
5187     return;
5188   }
5189   case Intrinsics::Trap: {
5190     const uint32_t TrapCodeZero = 0;
5191     _teq(getZero(), getZero(), TrapCodeZero);
5192     return;
5193   }
5194   case Intrinsics::LoadSubVector: {
5195     UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5196     return;
5197   }
5198   case Intrinsics::StoreSubVector: {
5199     UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5200     return;
5201   }
5202   default: // UnknownIntrinsic
5203     Func->setError("Unexpected intrinsic");
5204     return;
5205   }
5206   return;
5207 }
5208 
lowerLoad(const InstLoad * Instr)5209 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5210   // A Load instruction can be treated the same as an Assign instruction, after
5211   // the source operand is transformed into an OperandMIPS32Mem operand.
5212   Type Ty = Instr->getDest()->getType();
5213   Operand *Src0 = formMemoryOperand(Instr->getSourceAddress(), Ty);
5214   Variable *DestLoad = Instr->getDest();
5215   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5216   lowerAssign(Assign);
5217 }
5218 
5219 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5220 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5221                     const Inst *Reason) {
5222   if (!BuildDefs::dump())
5223     return;
5224   if (!Func->isVerbose(IceV_AddrOpt))
5225     return;
5226   OstreamLocker _(Func->getContext());
5227   Ostream &Str = Func->getContext()->getStrDump();
5228   Str << "Instruction: ";
5229   Reason->dumpDecorated(Func);
5230   Str << "  results in Base=";
5231   if (Base)
5232     Base->dump(Func);
5233   else
5234     Str << "<null>";
5235   Str << ", Offset=" << Offset << "\n";
5236 }
5237 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5238 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5239                  int32_t *Offset, const Inst **Reason) {
5240   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5241   if (*Var == nullptr)
5242     return false;
5243   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5244   if (!VarAssign)
5245     return false;
5246   assert(!VMetadata->isMultiDef(*Var));
5247   if (!llvm::isa<InstAssign>(VarAssign))
5248     return false;
5249 
5250   Operand *SrcOp = VarAssign->getSrc(0);
5251   bool Optimized = false;
5252   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5253     if (!VMetadata->isMultiDef(SrcVar) ||
5254         // TODO: ensure SrcVar stays single-BB
5255         false) {
5256       Optimized = true;
5257       *Var = SrcVar;
5258     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5259       int32_t MoreOffset = Const->getValue();
5260       int32_t NewOffset = MoreOffset + *Offset;
5261       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5262         return false;
5263       *Var = nullptr;
5264       *Offset += NewOffset;
5265       Optimized = true;
5266     }
5267   }
5268 
5269   if (Optimized) {
5270     *Reason = VarAssign;
5271   }
5272 
5273   return Optimized;
5274 }
5275 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5276 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5277   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5278     switch (Arith->getOp()) {
5279     default:
5280       return false;
5281     case InstArithmetic::Add:
5282     case InstArithmetic::Sub:
5283       *Kind = Arith->getOp();
5284       return true;
5285     }
5286   }
5287   return false;
5288 }
5289 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5290 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5291                      int32_t *Offset, const Inst **Reason) {
5292   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5293   //   set Base=Var, Offset+=Const
5294   // Base is Base=Var-Const ==>
5295   //   set Base=Var, Offset-=Const
5296   if (*Base == nullptr)
5297     return false;
5298   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5299   if (BaseInst == nullptr) {
5300     return false;
5301   }
5302   assert(!VMetadata->isMultiDef(*Base));
5303 
5304   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5305   if (ArithInst == nullptr)
5306     return false;
5307   InstArithmetic::OpKind Kind;
5308   if (!isAddOrSub(ArithInst, &Kind))
5309     return false;
5310   bool IsAdd = Kind == InstArithmetic::Add;
5311   Operand *Src0 = ArithInst->getSrc(0);
5312   Operand *Src1 = ArithInst->getSrc(1);
5313   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5314   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5315   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5316   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5317   Variable *NewBase = nullptr;
5318   int32_t NewOffset = *Offset;
5319 
5320   if (Var0 == nullptr && Const0 == nullptr) {
5321     assert(llvm::isa<ConstantRelocatable>(Src0));
5322     return false;
5323   }
5324 
5325   if (Var1 == nullptr && Const1 == nullptr) {
5326     assert(llvm::isa<ConstantRelocatable>(Src1));
5327     return false;
5328   }
5329 
5330   if (Var0 && Var1)
5331     // TODO(jpp): merge base/index splitting into here.
5332     return false;
5333   if (!IsAdd && Var1)
5334     return false;
5335   if (Var0)
5336     NewBase = Var0;
5337   else if (Var1)
5338     NewBase = Var1;
5339   // Compute the updated constant offset.
5340   if (Const0) {
5341     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5342     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5343       return false;
5344     NewOffset += MoreOffset;
5345   }
5346   if (Const1) {
5347     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5348     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5349       return false;
5350     NewOffset += MoreOffset;
5351   }
5352 
5353   // Update the computed address parameters once we are sure optimization
5354   // is valid.
5355   *Base = NewBase;
5356   *Offset = NewOffset;
5357   *Reason = BaseInst;
5358   return true;
5359 }
5360 } // end of anonymous namespace
5361 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5362 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5363                                                    const Inst *LdSt,
5364                                                    Operand *Base) {
5365   assert(Base != nullptr);
5366   int32_t OffsetImm = 0;
5367 
5368   Func->resetCurrentNode();
5369   if (Func->isVerbose(IceV_AddrOpt)) {
5370     OstreamLocker _(Func->getContext());
5371     Ostream &Str = Func->getContext()->getStrDump();
5372     Str << "\nAddress mode formation:\t";
5373     LdSt->dumpDecorated(Func);
5374   }
5375 
5376   if (isVectorType(Ty)) {
5377     return nullptr;
5378   }
5379 
5380   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5381   if (BaseVar == nullptr)
5382     return nullptr;
5383 
5384   const VariablesMetadata *VMetadata = Func->getVMetadata();
5385   const Inst *Reason = nullptr;
5386 
5387   do {
5388     if (Reason != nullptr) {
5389       dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5390       Reason = nullptr;
5391     }
5392 
5393     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5394       continue;
5395     }
5396 
5397     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5398       continue;
5399     }
5400   } while (Reason);
5401 
5402   if (BaseVar == nullptr) {
5403     // We need base register rather than just OffsetImm. Move the OffsetImm to
5404     // BaseVar and form 0(BaseVar) addressing.
5405     const Type PointerType = getPointerType();
5406     BaseVar = makeReg(PointerType);
5407     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5408     OffsetImm = 0;
5409   } else if (OffsetImm != 0) {
5410     // If the OffsetImm is more than signed 16-bit value then add it in the
5411     // BaseVar and form 0(BaseVar) addressing.
5412     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5413     const InstArithmetic::OpKind Op =
5414         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5415     constexpr bool ZeroExt = false;
5416     if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5417       const Type PointerType = getPointerType();
5418       Variable *T = makeReg(PointerType);
5419       Context.insert<InstArithmetic>(Op, T, BaseVar,
5420                                      Ctx->getConstantInt32(PositiveOffset));
5421       BaseVar = T;
5422       OffsetImm = 0;
5423     }
5424   }
5425 
5426   assert(BaseVar != nullptr);
5427   assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5428                        : (OffsetImm & 0x0000ffff) == OffsetImm);
5429 
5430   return OperandMIPS32Mem::create(
5431       Func, Ty, BaseVar,
5432       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5433 }
5434 
doAddressOptLoad()5435 void TargetMIPS32::doAddressOptLoad() {
5436   Inst *Instr = iteratorToInst(Context.getCur());
5437   assert(llvm::isa<InstLoad>(Instr));
5438   Variable *Dest = Instr->getDest();
5439   Operand *Addr = Instr->getSrc(0);
5440   if (OperandMIPS32Mem *Mem =
5441           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5442     Instr->setDeleted();
5443     Context.insert<InstLoad>(Dest, Mem);
5444   }
5445 }
5446 
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5447 void TargetMIPS32::randomlyInsertNop(float Probability,
5448                                      RandomNumberGenerator &RNG) {
5449   RandomNumberGeneratorWrapper RNGW(RNG);
5450   if (RNGW.getTrueWithProbability(Probability)) {
5451     _nop();
5452   }
5453 }
5454 
lowerPhi(const InstPhi *)5455 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5456   Func->setError("Phi found in regular instruction list");
5457 }
5458 
lowerRet(const InstRet * Instr)5459 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5460   Variable *Reg = nullptr;
5461   if (Instr->hasRetValue()) {
5462     Operand *Src0 = Instr->getRetValue();
5463     switch (Src0->getType()) {
5464     case IceType_f32: {
5465       Operand *Src0F = legalizeToReg(Src0);
5466       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5467       _mov(Reg, Src0F);
5468       break;
5469     }
5470     case IceType_f64: {
5471       Operand *Src0F = legalizeToReg(Src0);
5472       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5473       _mov(Reg, Src0F);
5474       break;
5475     }
5476     case IceType_i1:
5477     case IceType_i8:
5478     case IceType_i16:
5479     case IceType_i32: {
5480       Operand *Src0F = legalizeToReg(Src0);
5481       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5482       _mov(Reg, Src0F);
5483       break;
5484     }
5485     case IceType_i64: {
5486       Src0 = legalizeUndef(Src0);
5487       Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5488       Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5489       Reg = R0;
5490       Context.insert<InstFakeUse>(R1);
5491       break;
5492     }
5493     case IceType_v4i1:
5494     case IceType_v8i1:
5495     case IceType_v16i1:
5496     case IceType_v16i8:
5497     case IceType_v8i16:
5498     case IceType_v4i32: {
5499       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5500       Variable *V0 =
5501           legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5502       Variable *V1 =
5503           legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5504       Variable *A0 =
5505           legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5506       Variable *A1 =
5507           legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5508       Reg = V0;
5509       Context.insert<InstFakeUse>(V1);
5510       Context.insert<InstFakeUse>(A0);
5511       Context.insert<InstFakeUse>(A1);
5512       break;
5513     }
5514     case IceType_v4f32: {
5515       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5516       Reg = getImplicitRet();
5517       auto *RegT = legalizeToReg(Reg);
5518       // Return the vector through buffer in implicit argument a0
5519       for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5520         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5521             Func, IceType_f32, RegT,
5522             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5523         Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5524         _sw(Var, Mem);
5525       }
5526       Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5527       _mov(V0, Reg); // move v0,a0
5528       Context.insert<InstFakeUse>(Reg);
5529       Context.insert<InstFakeUse>(V0);
5530       break;
5531     }
5532     default:
5533       llvm::report_fatal_error("Ret: Invalid type.");
5534       break;
5535     }
5536   }
5537   _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5538 }
5539 
lowerSelect(const InstSelect * Instr)5540 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5541   Variable *Dest = Instr->getDest();
5542   const Type DestTy = Dest->getType();
5543 
5544   if (isVectorType(DestTy)) {
5545     llvm::report_fatal_error("Select: Destination type is vector");
5546     return;
5547   }
5548 
5549   Variable *DestR = nullptr;
5550   Variable *DestHiR = nullptr;
5551   Variable *SrcTR = nullptr;
5552   Variable *SrcTHiR = nullptr;
5553   Variable *SrcFR = nullptr;
5554   Variable *SrcFHiR = nullptr;
5555 
5556   if (DestTy == IceType_i64) {
5557     DestR = llvm::cast<Variable>(loOperand(Dest));
5558     DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5559     SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5560     SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5561     SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5562     SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5563   } else {
5564     SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5565     SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5566   }
5567 
5568   Variable *ConditionR = legalizeToReg(Instr->getCondition());
5569 
5570   assert(Instr->getCondition()->getType() == IceType_i1);
5571 
5572   switch (DestTy) {
5573   case IceType_i1:
5574   case IceType_i8:
5575   case IceType_i16:
5576   case IceType_i32:
5577     _movn(SrcFR, SrcTR, ConditionR);
5578     _mov(Dest, SrcFR);
5579     break;
5580   case IceType_i64:
5581     _movn(SrcFR, SrcTR, ConditionR);
5582     _movn(SrcFHiR, SrcTHiR, ConditionR);
5583     _mov(DestR, SrcFR);
5584     _mov(DestHiR, SrcFHiR);
5585     break;
5586   case IceType_f32:
5587     _movn_s(SrcFR, SrcTR, ConditionR);
5588     _mov(Dest, SrcFR);
5589     break;
5590   case IceType_f64:
5591     _movn_d(SrcFR, SrcTR, ConditionR);
5592     _mov(Dest, SrcFR);
5593     break;
5594   default:
5595     llvm::report_fatal_error("Select: Invalid type.");
5596   }
5597 }
5598 
lowerShuffleVector(const InstShuffleVector * Instr)5599 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5600   UnimplementedLoweringError(this, Instr);
5601 }
5602 
lowerStore(const InstStore * Instr)5603 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5604   Operand *Value = Instr->getData();
5605   Operand *Addr = Instr->getAddr();
5606   OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5607   Type Ty = NewAddr->getType();
5608 
5609   if (Ty == IceType_i64) {
5610     Value = legalizeUndef(Value);
5611     Variable *ValueHi = legalizeToReg(hiOperand(Value));
5612     Variable *ValueLo = legalizeToReg(loOperand(Value));
5613     _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5614     _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5615   } else if (isVectorType(Value->getType())) {
5616     auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5617     for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5618       auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5619       auto *MCont = llvm::cast<OperandMIPS32Mem>(
5620           getOperandAtIndex(NewAddr, IceType_i32, i));
5621       _sw(DCont, MCont);
5622     }
5623   } else {
5624     Variable *ValueR = legalizeToReg(Value);
5625     _sw(ValueR, NewAddr);
5626   }
5627 }
5628 
doAddressOptStore()5629 void TargetMIPS32::doAddressOptStore() {
5630   Inst *Instr = iteratorToInst(Context.getCur());
5631   assert(llvm::isa<InstStore>(Instr));
5632   Operand *Src = Instr->getSrc(0);
5633   Operand *Addr = Instr->getSrc(1);
5634   if (OperandMIPS32Mem *Mem =
5635           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5636     Instr->setDeleted();
5637     Context.insert<InstStore>(Src, Mem);
5638   }
5639 }
5640 
lowerSwitch(const InstSwitch * Instr)5641 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5642   Operand *Src = Instr->getComparison();
5643   SizeT NumCases = Instr->getNumCases();
5644   if (Src->getType() == IceType_i64) {
5645     Src = legalizeUndef(Src);
5646     Variable *Src0Lo = legalizeToReg(loOperand(Src));
5647     Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5648     for (SizeT I = 0; I < NumCases; ++I) {
5649       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5650       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5651       CfgNode *TargetTrue = Instr->getLabel(I);
5652       constexpr CfgNode *NoTarget = nullptr;
5653       ValueHi = legalizeToReg(ValueHi);
5654       InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5655       _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5656           CondMIPS32::Cond::NE);
5657       ValueLo = legalizeToReg(ValueLo);
5658       _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5659       Context.insert(IntraLabel);
5660     }
5661     _br(Instr->getLabelDefault());
5662     return;
5663   }
5664   Variable *SrcVar = legalizeToReg(Src);
5665   assert(SrcVar->mustHaveReg());
5666   for (SizeT I = 0; I < NumCases; ++I) {
5667     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5668     CfgNode *TargetTrue = Instr->getLabel(I);
5669     constexpr CfgNode *NoTargetFalse = nullptr;
5670     Value = legalizeToReg(Value);
5671     _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5672   }
5673   _br(Instr->getLabelDefault());
5674 }
5675 
lowerBreakpoint(const InstBreakpoint * Instr)5676 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5677   UnimplementedLoweringError(this, Instr);
5678 }
5679 
lowerUnreachable(const InstUnreachable *)5680 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5681   const uint32_t TrapCodeZero = 0;
5682   _teq(getZero(), getZero(), TrapCodeZero);
5683 }
5684 
lowerOther(const Inst * Instr)5685 void TargetMIPS32::lowerOther(const Inst *Instr) {
5686   if (llvm::isa<InstMIPS32Sync>(Instr)) {
5687     _sync();
5688   } else {
5689     TargetLowering::lowerOther(Instr);
5690   }
5691 }
5692 
5693 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5694 // integrity of liveness analysis. Undef values are also turned into zeroes,
5695 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5696 void TargetMIPS32::prelowerPhis() {
5697   PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5698 }
5699 
postLower()5700 void TargetMIPS32::postLower() {
5701   if (Func->getOptLevel() == Opt_m1)
5702     return;
5703   markRedefinitions();
5704   Context.availabilityUpdate();
5705 }
5706 
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const5707 void TargetMIPS32::makeRandomRegisterPermutation(
5708     llvm::SmallVectorImpl<RegNumT> &Permutation,
5709     const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
5710   (void)Permutation;
5711   (void)ExcludeRegisters;
5712   (void)Salt;
5713   UnimplementedError(getFlags());
5714 }
5715 
5716 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5717 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5718   llvm_unreachable("Not expecting to emitWithoutDollar undef");
5719 }
5720 
5721 void ConstantUndef::emit(GlobalContext *) const {
5722   llvm_unreachable("undef value encountered by emitter.");
5723 }
5724 */
5725 
TargetDataMIPS32(GlobalContext * Ctx)5726 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5727     : TargetDataLowering(Ctx) {}
5728 
5729 // Generate .MIPS.abiflags section. This section contains a versioned data
5730 // structure with essential information required for loader to determine the
5731 // requirements of the application.
emitTargetRODataSections()5732 void TargetDataMIPS32::emitTargetRODataSections() {
5733   struct MipsABIFlagsSection Flags;
5734   ELFObjectWriter *Writer = Ctx->getObjectWriter();
5735   const std::string Name = ".MIPS.abiflags";
5736   const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5737   const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5738   const llvm::ELF::Elf64_Xword ShAddralign = 8;
5739   const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5740   Writer->writeTargetRODataSection(
5741       Name, ShType, ShFlags, ShAddralign, ShEntsize,
5742       llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5743 }
5744 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5745 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5746                                     const std::string &SectionSuffix) {
5747   const bool IsPIC = getFlags().getUseNonsfi();
5748   switch (getFlags().getOutFileType()) {
5749   case FT_Elf: {
5750     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5751     Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5752   } break;
5753   case FT_Asm:
5754   case FT_Iasm: {
5755     OstreamLocker L(Ctx);
5756     for (const VariableDeclaration *Var : Vars) {
5757       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5758         emitGlobal(*Var, SectionSuffix);
5759       }
5760     }
5761   } break;
5762   }
5763 }
5764 
5765 namespace {
5766 template <typename T> struct ConstantPoolEmitterTraits;
5767 
5768 static_assert(sizeof(uint64_t) == 8,
5769               "uint64_t is supposed to be 8 bytes wide.");
5770 
5771 // TODO(jaydeep.patil): implement the following when implementing constant
5772 // randomization:
5773 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
5774 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
5775 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
5776 template <> struct ConstantPoolEmitterTraits<float> {
5777   using ConstantType = ConstantFloat;
5778   static constexpr Type IceType = IceType_f32;
5779   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5780   // about them being constexpr.
5781   static const char AsmTag[];
5782   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anonc1a930e40611::ConstantPoolEmitterTraits5783   static uint64_t bitcastToUint64(float Value) {
5784     static_assert(sizeof(Value) == sizeof(uint32_t),
5785                   "Float should be 4 bytes.");
5786     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5787     return static_cast<uint64_t>(IntValue);
5788   }
5789 };
5790 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5791 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5792 
5793 template <> struct ConstantPoolEmitterTraits<double> {
5794   using ConstantType = ConstantDouble;
5795   static constexpr Type IceType = IceType_f64;
5796   static const char AsmTag[];
5797   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anonc1a930e40611::ConstantPoolEmitterTraits5798   static uint64_t bitcastToUint64(double Value) {
5799     static_assert(sizeof(double) == sizeof(uint64_t),
5800                   "Double should be 8 bytes.");
5801     return Utils::bitCopy<uint64_t>(Value);
5802   }
5803 };
5804 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5805 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5806 
5807 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5808 void emitConstant(
5809     Ostream &Str,
5810     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5811   if (!BuildDefs::dump())
5812     return;
5813   using Traits = ConstantPoolEmitterTraits<T>;
5814   Str << Const->getLabelName();
5815   T Value = Const->getValue();
5816   Str << ":\n\t" << Traits::AsmTag << "\t0x";
5817   Str.write_hex(Traits::bitcastToUint64(Value));
5818   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5819 }
5820 
emitConstantPool(GlobalContext * Ctx)5821 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5822   if (!BuildDefs::dump())
5823     return;
5824   using Traits = ConstantPoolEmitterTraits<T>;
5825   static constexpr size_t MinimumAlignment = 4;
5826   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5827   assert((Align % 4) == 0 && "Constants should be aligned");
5828   Ostream &Str = Ctx->getStrEmit();
5829   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5830   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5831       << "\n"
5832       << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5833   if (getFlags().getReorderPooledConstants()) {
5834     // TODO(jaydeep.patil): add constant pooling.
5835     UnimplementedError(getFlags());
5836   }
5837   for (Constant *C : Pool) {
5838     if (!C->getShouldBePooled()) {
5839       continue;
5840     }
5841     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5842   }
5843 }
5844 } // end of anonymous namespace
5845 
lowerConstants()5846 void TargetDataMIPS32::lowerConstants() {
5847   if (getFlags().getDisableTranslation())
5848     return;
5849   switch (getFlags().getOutFileType()) {
5850   case FT_Elf: {
5851     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5852     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5853     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5854   } break;
5855   case FT_Asm:
5856   case FT_Iasm: {
5857     OstreamLocker _(Ctx);
5858     emitConstantPool<float>(Ctx);
5859     emitConstantPool<double>(Ctx);
5860     break;
5861   }
5862   }
5863 }
5864 
lowerJumpTables()5865 void TargetDataMIPS32::lowerJumpTables() {
5866   if (getFlags().getDisableTranslation())
5867     return;
5868 }
5869 
5870 // Helper for legalize() to emit the right code to lower an operand to a
5871 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5872 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5873   Type Ty = Src->getType();
5874   Variable *Reg = makeReg(Ty, RegNum);
5875   if (isVectorType(Ty)) {
5876     llvm::report_fatal_error("Invalid copy from vector type.");
5877   } else {
5878     if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5879       _lw(Reg, Mem);
5880     } else {
5881       _mov(Reg, Src);
5882     }
5883   }
5884   return Reg;
5885 }
5886 
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5887 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5888                                 RegNumT RegNum) {
5889   Type Ty = From->getType();
5890   // Assert that a physical register is allowed.  To date, all calls
5891   // to legalize() allow a physical register. Legal_Flex converts
5892   // registers to the right type OperandMIPS32FlexReg as needed.
5893   assert(Allowed & Legal_Reg);
5894 
5895   if (RegNum.hasNoValue()) {
5896     if (Variable *Subst = getContext().availabilityGet(From)) {
5897       // At this point we know there is a potential substitution available.
5898       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5899           !Subst->hasReg()) {
5900         // At this point we know the substitution will have a register.
5901         if (From->getType() == Subst->getType()) {
5902           // At this point we know the substitution's register is compatible.
5903           return Subst;
5904         }
5905       }
5906     }
5907   }
5908 
5909   // Go through the various types of operands:
5910   // OperandMIPS32Mem, Constant, and Variable.
5911   // Given the above assertion, if type of operand is not legal
5912   // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5913   // to a register.
5914   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5915     // Base must be in a physical register.
5916     Variable *Base = Mem->getBase();
5917     ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5918     Variable *RegBase = nullptr;
5919     assert(Base);
5920 
5921     RegBase = llvm::cast<Variable>(
5922         legalize(Base, Legal_Reg | Legal_Rematerializable));
5923 
5924     if (Offset != nullptr && Offset->getValue() != 0) {
5925       static constexpr bool ZeroExt = false;
5926       if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5927         llvm::report_fatal_error("Invalid memory offset.");
5928       }
5929     }
5930 
5931     // Create a new operand if there was a change.
5932     if (Base != RegBase) {
5933       Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5934                                      Mem->getAddrMode());
5935     }
5936 
5937     if (Allowed & Legal_Mem) {
5938       From = Mem;
5939     } else {
5940       Variable *Reg = makeReg(Ty, RegNum);
5941       _lw(Reg, Mem);
5942       From = Reg;
5943     }
5944     return From;
5945   }
5946 
5947   if (llvm::isa<Constant>(From)) {
5948     if (llvm::isa<ConstantUndef>(From)) {
5949       From = legalizeUndef(From, RegNum);
5950       if (isVectorType(Ty))
5951         return From;
5952     }
5953     if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5954       Variable *Reg = makeReg(Ty, RegNum);
5955       Variable *TReg = makeReg(Ty, RegNum);
5956       _lui(TReg, C, RO_Hi);
5957       _addiu(Reg, TReg, C, RO_Lo);
5958       return Reg;
5959     } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5960       const uint32_t Value = C32->getValue();
5961       // Use addiu if the immediate is a 16bit value. Otherwise load it
5962       // using a lui-ori instructions.
5963       Variable *Reg = makeReg(Ty, RegNum);
5964       if (isInt<16>(int32_t(Value))) {
5965         Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5966         Context.insert<InstFakeDef>(Zero);
5967         _addiu(Reg, Zero, Value);
5968       } else {
5969         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5970         uint32_t LowerBits = Value & 0xFFFF;
5971         if (LowerBits) {
5972           Variable *TReg = makeReg(Ty, RegNum);
5973           _lui(TReg, Ctx->getConstantInt32(UpperBits));
5974           _ori(Reg, TReg, LowerBits);
5975         } else {
5976           _lui(Reg, Ctx->getConstantInt32(UpperBits));
5977         }
5978       }
5979       return Reg;
5980     } else if (isScalarFloatingType(Ty)) {
5981       auto *CFrom = llvm::cast<Constant>(From);
5982       Variable *TReg = makeReg(Ty);
5983       if (!CFrom->getShouldBePooled()) {
5984         // Float/Double constant 0 is not pooled.
5985         Context.insert<InstFakeDef>(TReg);
5986         _mov(TReg, getZero());
5987       } else {
5988         // Load floats/doubles from literal pool.
5989         Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5990         Variable *TReg1 = makeReg(getPointerType());
5991         _lui(TReg1, Offset, RO_Hi);
5992         OperandMIPS32Mem *Addr =
5993             OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5994         if (Ty == IceType_f32)
5995           Sandboxer(this).lwc1(TReg, Addr, RO_Lo);
5996         else
5997           Sandboxer(this).ldc1(TReg, Addr, RO_Lo);
5998       }
5999       return copyToReg(TReg, RegNum);
6000     }
6001   }
6002 
6003   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6004     if (Var->isRematerializable()) {
6005       if (Allowed & Legal_Rematerializable) {
6006         return From;
6007       }
6008 
6009       Variable *T = makeReg(Var->getType(), RegNum);
6010       _mov(T, Var);
6011       return T;
6012     }
6013     // Check if the variable is guaranteed a physical register.  This
6014     // can happen either when the variable is pre-colored or when it is
6015     // assigned infinite weight.
6016     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6017     // We need a new physical register for the operand if:
6018     //   Mem is not allowed and Var isn't guaranteed a physical
6019     //   register, or
6020     //   RegNum is required and Var->getRegNum() doesn't match.
6021     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6022         (RegNum.hasValue() && RegNum != Var->getRegNum())) {
6023       From = copyToReg(From, RegNum);
6024     }
6025     return From;
6026   }
6027   return From;
6028 }
6029 
6030 namespace BoolFolding {
6031 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
6032 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)6033 bool shouldTrackProducer(const Inst &Instr) {
6034   return Instr.getKind() == Inst::Icmp;
6035 }
6036 
isValidConsumer(const Inst & Instr)6037 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
6038 } // end of namespace BoolFolding
6039 
recordProducers(CfgNode * Node)6040 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
6041   for (Inst &Instr : Node->getInsts()) {
6042     if (Instr.isDeleted())
6043       continue;
6044     // Check whether Instr is a valid producer.
6045     Variable *Dest = Instr.getDest();
6046     if (Dest // only consider instructions with an actual dest var; and
6047         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6048         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6049       KnownComputations.emplace(Dest->getIndex(),
6050                                 ComputationEntry(&Instr, IceType_i1));
6051     }
6052     // Check each src variable against the map.
6053     FOREACH_VAR_IN_INST(Var, Instr) {
6054       SizeT VarNum = Var->getIndex();
6055       auto ComputationIter = KnownComputations.find(VarNum);
6056       if (ComputationIter == KnownComputations.end()) {
6057         continue;
6058       }
6059 
6060       ++ComputationIter->second.NumUses;
6061       switch (ComputationIter->second.ComputationType) {
6062       default:
6063         KnownComputations.erase(VarNum);
6064         continue;
6065       case IceType_i1:
6066         if (!BoolFolding::isValidConsumer(Instr)) {
6067           KnownComputations.erase(VarNum);
6068           continue;
6069         }
6070         break;
6071       }
6072 
6073       if (Instr.isLastUse(Var)) {
6074         ComputationIter->second.IsLiveOut = false;
6075       }
6076     }
6077   }
6078 
6079   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6080        Iter != End;) {
6081     // Disable the folding if its dest may be live beyond this block.
6082     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6083       Iter = KnownComputations.erase(Iter);
6084       continue;
6085     }
6086 
6087     // Mark as "dead" rather than outright deleting. This is so that other
6088     // peephole style optimizations during or before lowering have access to
6089     // this instruction in undeleted form. See for example
6090     // tryOptimizedCmpxchgCmpBr().
6091     Iter->second.Instr->setDead();
6092     ++Iter;
6093   }
6094 }
6095 
TargetHeaderMIPS32(GlobalContext * Ctx)6096 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6097     : TargetHeaderLowering(Ctx) {}
6098 
lower()6099 void TargetHeaderMIPS32::lower() {
6100   if (!BuildDefs::dump())
6101     return;
6102   OstreamLocker L(Ctx);
6103   Ostream &Str = Ctx->getStrEmit();
6104   Str << "\t.set\t"
6105       << "nomicromips\n";
6106   Str << "\t.set\t"
6107       << "nomips16\n";
6108   Str << "\t.set\t"
6109       << "noat\n";
6110   if (getFlags().getUseSandboxing())
6111     Str << "\t.bundle_align_mode 4\n";
6112 }
6113 
6114 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6115 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6116 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6117 
Sandboxer(TargetMIPS32 * Target,InstBundleLock::Option BundleOption)6118 TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target,
6119                                    InstBundleLock::Option BundleOption)
6120     : Target(Target), BundleOption(BundleOption) {}
6121 
~Sandboxer()6122 TargetMIPS32::Sandboxer::~Sandboxer() {}
6123 
createAutoBundle()6124 void TargetMIPS32::Sandboxer::createAutoBundle() {
6125   Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6126 }
6127 
addiu_sp(uint32_t StackOffset)6128 void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) {
6129   Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6130   if (!Target->NeedSandboxing) {
6131     Target->_addiu(SP, SP, StackOffset);
6132     return;
6133   }
6134   auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6135   Target->Context.insert<InstFakeDef>(T7);
6136   createAutoBundle();
6137   Target->_addiu(SP, SP, StackOffset);
6138   Target->_and(SP, SP, T7);
6139 }
6140 
lw(Variable * Dest,OperandMIPS32Mem * Mem)6141 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
6142   Variable *Base = Mem->getBase();
6143   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
6144       (RegMIPS32::Reg_T8 != Base->getRegNum())) {
6145     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6146     Target->Context.insert<InstFakeDef>(T7);
6147     createAutoBundle();
6148     Target->_and(Base, Base, T7);
6149   }
6150   Target->_lw(Dest, Mem);
6151   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6152     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6153     Target->Context.insert<InstFakeDef>(T7);
6154     Target->_and(Dest, Dest, T7);
6155   }
6156 }
6157 
ll(Variable * Dest,OperandMIPS32Mem * Mem)6158 void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
6159   Variable *Base = Mem->getBase();
6160   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6161     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6162     Target->Context.insert<InstFakeDef>(T7);
6163     createAutoBundle();
6164     Target->_and(Base, Base, T7);
6165   }
6166   Target->_ll(Dest, Mem);
6167   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6168     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6169     Target->Context.insert<InstFakeDef>(T7);
6170     Target->_and(Dest, Dest, T7);
6171   }
6172 }
6173 
sc(Variable * Dest,OperandMIPS32Mem * Mem)6174 void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
6175   Variable *Base = Mem->getBase();
6176   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6177     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6178     Target->Context.insert<InstFakeDef>(T7);
6179     createAutoBundle();
6180     Target->_and(Base, Base, T7);
6181   }
6182   Target->_sc(Dest, Mem);
6183 }
6184 
sw(Variable * Dest,OperandMIPS32Mem * Mem)6185 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
6186   Variable *Base = Mem->getBase();
6187   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6188     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6189     Target->Context.insert<InstFakeDef>(T7);
6190     createAutoBundle();
6191     Target->_and(Base, Base, T7);
6192   }
6193   Target->_sw(Dest, Mem);
6194 }
6195 
lwc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6196 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
6197                                    RelocOp Reloc) {
6198   Variable *Base = Mem->getBase();
6199   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6200     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6201     Target->Context.insert<InstFakeDef>(T7);
6202     createAutoBundle();
6203     Target->_and(Base, Base, T7);
6204   }
6205   Target->_lwc1(Dest, Mem, Reloc);
6206   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6207     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6208     Target->Context.insert<InstFakeDef>(T7);
6209     Target->_and(Dest, Dest, T7);
6210   }
6211 }
6212 
ldc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6213 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
6214                                    RelocOp Reloc) {
6215   Variable *Base = Mem->getBase();
6216   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6217     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6218     Target->Context.insert<InstFakeDef>(T7);
6219     createAutoBundle();
6220     Target->_and(Base, Base, T7);
6221   }
6222   Target->_ldc1(Dest, Mem, Reloc);
6223   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6224     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6225     Target->Context.insert<InstFakeDef>(T7);
6226     Target->_and(Dest, Dest, T7);
6227   }
6228 }
6229 
ret(Variable * RetAddr,Variable * RetValue)6230 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6231   if (!Target->NeedSandboxing) {
6232     Target->_ret(RetAddr, RetValue);
6233   }
6234   auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6235   Target->Context.insert<InstFakeDef>(T6);
6236   createAutoBundle();
6237   Target->_and(RetAddr, RetAddr, T6);
6238   Target->_ret(RetAddr, RetValue);
6239 }
6240 
reset_sp(Variable * Src)6241 void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) {
6242   Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6243   if (!Target->NeedSandboxing) {
6244     Target->_mov(SP, Src);
6245     return;
6246   }
6247   auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6248   Target->Context.insert<InstFakeDef>(T7);
6249   createAutoBundle();
6250   Target->_mov(SP, Src);
6251   Target->_and(SP, SP, T7);
6252   Target->getContext().insert<InstFakeUse>(SP);
6253 }
6254 
jal(Variable * ReturnReg,Operand * CallTarget)6255 InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg,
6256                                              Operand *CallTarget) {
6257   if (Target->NeedSandboxing) {
6258     createAutoBundle();
6259     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6260       auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6261       Target->Context.insert<InstFakeDef>(T6);
6262       Target->_and(CallTargetR, CallTargetR, T6);
6263     }
6264   }
6265   return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
6266 }
6267 
6268 } // end of namespace MIPS32
6269 } // end of namespace Ice
6270