• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33 
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37 
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40   return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42 
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47 
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50   return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52 
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54   ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55   if (Ice::getFlags().getUseNonsfi()) {
56     // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
57     // globals. The GOT is an external symbol (i.e., it is not defined in the
58     // pexe) so we need to register it as such so that ELF emission won't barf
59     // on an "unknown" symbol. The GOT is added to the External symbols list
60     // here because staticInit() is invoked in a single-thread context.
61     Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
62   }
63 }
64 
shouldBePooled(const::Ice::Constant * C)65 bool shouldBePooled(const ::Ice::Constant *C) {
66   return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
67 }
68 
getPointerType()69 ::Ice::Type getPointerType() {
70   return ::Ice::ARM32::TargetARM32::getPointerType();
71 }
72 
73 } // end of namespace ARM32
74 
75 namespace Ice {
76 namespace ARM32 {
77 
78 namespace {
79 
80 /// SizeOf is used to obtain the size of an initializer list as a constexpr
81 /// expression. This is only needed until our C++ library is updated to
82 /// C++ 14 -- which defines constexpr members to std::initializer_list.
83 class SizeOf {
84   SizeOf(const SizeOf &) = delete;
85   SizeOf &operator=(const SizeOf &) = delete;
86 
87 public:
SizeOf()88   constexpr SizeOf() : Size(0) {}
89   template <typename... T>
SizeOf(T...)90   explicit constexpr SizeOf(T...) : Size(__length<T...>::value) {}
size() const91   constexpr SizeT size() const { return Size; }
92 
93 private:
94   template <typename T, typename... U> struct __length {
95     static constexpr std::size_t value = 1 + __length<U...>::value;
96   };
97 
98   template <typename T> struct __length<T> {
99     static constexpr std::size_t value = 1;
100   };
101 
102   const std::size_t Size;
103 };
104 
105 } // end of anonymous namespace
106 
107 // Defines the RegARM32::Table table with register information.
108 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
109 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
110           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
111   {name,      encode,                                                          \
112    cc_arg,    scratch,                                                         \
113    preserved, stackptr,                                                        \
114    frameptr,  isGPR,                                                           \
115    isInt,     isI64Pair,                                                       \
116    isFP32,    isFP64,                                                          \
117    isVec128,  (SizeOf alias_init).size(),                                      \
118    alias_init},
119     REGARM32_TABLE
120 #undef X
121 };
122 
123 namespace {
124 
125 // The following table summarizes the logic for lowering the icmp instruction
126 // for i32 and narrower types. Each icmp condition has a clear mapping to an
127 // ARM32 conditional move instruction.
128 
129 const struct TableIcmp32_ {
130   CondARM32::Cond Mapping;
131 } TableIcmp32[] = {
132 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
133   {CondARM32::C_32},
134     ICMPARM32_TABLE
135 #undef X
136 };
137 
138 // The following table summarizes the logic for lowering the icmp instruction
139 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
140 // The operands may need to be swapped, and there is a slight difference for
141 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
142 const struct TableIcmp64_ {
143   bool IsSigned;
144   bool Swapped;
145   CondARM32::Cond C1, C2;
146 } TableIcmp64[] = {
147 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
148   {is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64},
149     ICMPARM32_TABLE
150 #undef X
151 };
152 
getIcmp32Mapping(InstIcmp::ICond Cond)153 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
154   assert(Cond < llvm::array_lengthof(TableIcmp32));
155   return TableIcmp32[Cond].Mapping;
156 }
157 
158 // In some cases, there are x-macros tables for both high-level and low-level
159 // instructions/operands that use the same enum key value. The tables are kept
160 // separate to maintain a proper separation between abstraction layers. There
161 // is a risk that the tables could get out of sync if enum values are reordered
162 // or if entries are added or deleted. The following anonymous namespaces use
163 // static_asserts to ensure everything is kept in sync.
164 
165 // Validate the enum values in ICMPARM32_TABLE.
166 namespace {
167 // Define a temporary set of enum values based on low-level table entries.
168 enum _icmp_ll_enum {
169 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
170   _icmp_ll_##val,
171   ICMPARM32_TABLE
172 #undef X
173       _num
174 };
175 // Define a set of constants based on high-level table entries.
176 #define X(tag, reverse, str)                                                   \
177   static constexpr int _icmp_hl_##tag = InstIcmp::tag;
178 ICEINSTICMP_TABLE
179 #undef X
180 // Define a set of constants based on low-level table entries, and ensure the
181 // table entry keys are consistent.
182 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
183   static_assert(                                                               \
184       _icmp_ll_##val == _icmp_hl_##val,                                        \
185       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
186 ICMPARM32_TABLE
187 #undef X
188 // Repeat the static asserts with respect to the high-level table entries in
189 // case the high-level table has extra entries.
190 #define X(tag, reverse, str)                                                   \
191   static_assert(                                                               \
192       _icmp_hl_##tag == _icmp_ll_##tag,                                        \
193       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
194 ICEINSTICMP_TABLE
195 #undef X
196 } // end of anonymous namespace
197 
198 // Stack alignment
199 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
200 
201 // Value is in bytes. Return Value adjusted to the next highest multiple of the
202 // stack alignment.
applyStackAlignment(uint32_t Value)203 uint32_t applyStackAlignment(uint32_t Value) {
204   return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
205 }
206 
207 // Value is in bytes. Return Value adjusted to the next highest multiple of the
208 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)209 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
210   // Use natural alignment, except that normally (non-NaCl) ARM only aligns
211   // vectors to 8 bytes.
212   // TODO(jvoung): Check this ...
213   size_t typeAlignInBytes = typeWidthInBytes(Ty);
214   if (isVectorType(Ty))
215     typeAlignInBytes = 8;
216   return Utils::applyAlignment(Value, typeAlignInBytes);
217 }
218 
219 // Conservatively check if at compile time we know that the operand is
220 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)221 bool isGuaranteedNonzeroInt(const Operand *Op) {
222   if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
223     return Const->getValue() != 0;
224   }
225   return false;
226 }
227 
228 } // end of anonymous namespace
229 
TargetARM32Features(const ClFlags & Flags)230 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
231   static_assert(
232       (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
233           (TargetInstructionSet::ARM32InstructionSet_End -
234            TargetInstructionSet::ARM32InstructionSet_Begin),
235       "ARM32InstructionSet range different from TargetInstructionSet");
236   if (Flags.getTargetInstructionSet() !=
237       TargetInstructionSet::BaseInstructionSet) {
238     InstructionSet = static_cast<ARM32InstructionSet>(
239         (Flags.getTargetInstructionSet() -
240          TargetInstructionSet::ARM32InstructionSet_Begin) +
241         ARM32InstructionSet::Begin);
242   }
243 }
244 
245 namespace {
246 constexpr SizeT NumGPRArgs =
247 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
248           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
249   +(((cc_arg) > 0) ? 1 : 0)
250     REGARM32_GPR_TABLE
251 #undef X
252     ;
253 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
254 
255 constexpr SizeT NumI64Args =
256 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
257           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
258   +(((cc_arg) > 0) ? 1 : 0)
259     REGARM32_I64PAIR_TABLE
260 #undef X
261     ;
262 std::array<RegNumT, NumI64Args> I64ArgInitializer;
263 
264 constexpr SizeT NumFP32Args =
265 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
266           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
267   +(((cc_arg) > 0) ? 1 : 0)
268     REGARM32_FP32_TABLE
269 #undef X
270     ;
271 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
272 
273 constexpr SizeT NumFP64Args =
274 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
275           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
276   +(((cc_arg) > 0) ? 1 : 0)
277     REGARM32_FP64_TABLE
278 #undef X
279     ;
280 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
281 
282 constexpr SizeT NumVec128Args =
283 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
284           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
285   +(((cc_arg > 0)) ? 1 : 0)
286     REGARM32_VEC128_TABLE
287 #undef X
288     ;
289 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
290 
getRegClassName(RegClass C)291 const char *getRegClassName(RegClass C) {
292   auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
293   assert(ClassNum < RegARM32::RCARM32_NUM);
294   switch (ClassNum) {
295   default:
296     assert(C < RC_Target);
297     return regClassString(C);
298   // Add handling of new register classes below.
299   case RegARM32::RCARM32_QtoS:
300     return "QtoS";
301   }
302 }
303 
304 } // end of anonymous namespace
305 
TargetARM32(Cfg * Func)306 TargetARM32::TargetARM32(Cfg *Func)
307     : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),
308       CPUFeatures(getFlags()) {}
309 
staticInit(GlobalContext * Ctx)310 void TargetARM32::staticInit(GlobalContext *Ctx) {
311   RegNumT::setLimit(RegARM32::Reg_NUM);
312   // Limit this size (or do all bitsets need to be the same width)???
313   SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
314   SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
315   SmallBitVector Float32Registers(RegARM32::Reg_NUM);
316   SmallBitVector Float64Registers(RegARM32::Reg_NUM);
317   SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
318   SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
319   SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
320   const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
321   for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
322     const auto &Entry = RegARM32::RegTable[i];
323     IntegerRegisters[i] = Entry.IsInt;
324     I64PairRegisters[i] = Entry.IsI64Pair;
325     Float32Registers[i] = Entry.IsFP32;
326     Float64Registers[i] = Entry.IsFP64;
327     VectorRegisters[i] = Entry.IsVec128;
328     RegisterAliases[i].resize(RegARM32::Reg_NUM);
329     // TODO(eholk): It would be better to store a QtoS flag in the
330     // IceRegistersARM32 table than to compare their encodings here.
331     QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
332     for (int j = 0; j < Entry.NumAliases; ++j) {
333       assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
334       RegisterAliases[i].set(Entry.Aliases[j]);
335     }
336     assert(RegisterAliases[i][i]);
337     if (Entry.CCArg <= 0) {
338       continue;
339     }
340     const auto RegNum = RegNumT::fromInt(i);
341     if (Entry.IsGPR) {
342       GPRArgInitializer[Entry.CCArg - 1] = RegNum;
343     } else if (Entry.IsI64Pair) {
344       I64ArgInitializer[Entry.CCArg - 1] = RegNum;
345     } else if (Entry.IsFP32) {
346       FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
347     } else if (Entry.IsFP64) {
348       FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
349     } else if (Entry.IsVec128) {
350       Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
351     }
352   }
353   TypeToRegisterSet[IceType_void] = InvalidRegisters;
354   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
355   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
356   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
357   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
358   TypeToRegisterSet[IceType_i64] = I64PairRegisters;
359   TypeToRegisterSet[IceType_f32] = Float32Registers;
360   TypeToRegisterSet[IceType_f64] = Float64Registers;
361   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
362   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
363   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
364   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
365   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
366   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
367   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
368   TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
369 
370   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
371     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
372 
373   filterTypeToRegisterSet(
374       Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
375       llvm::array_lengthof(TypeToRegisterSet),
376       [](RegNumT RegNum) -> std::string {
377         // This function simply removes ", " from the
378         // register name.
379         std::string Name = RegARM32::getRegName(RegNum);
380         constexpr const char RegSeparator[] = ", ";
381         constexpr size_t RegSeparatorWidth =
382             llvm::array_lengthof(RegSeparator) - 1;
383         for (size_t Pos = Name.find(RegSeparator); Pos != std::string::npos;
384              Pos = Name.find(RegSeparator)) {
385           Name.replace(Pos, RegSeparatorWidth, "");
386         }
387         return Name;
388       },
389       getRegClassName);
390 }
391 
392 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)393 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
394   for (Variable *Var : Vars) {
395     auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
396     if (!Var64) {
397       // This is not the variable we are looking for.
398       continue;
399     }
400     // only allow infinite-weight i64 temporaries to be register allocated.
401     assert(!Var64->hasReg() || Var64->mustHaveReg());
402     if (!Var64->hasReg()) {
403       continue;
404     }
405     const auto FirstReg =
406         RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
407     // This assumes little endian.
408     Variable *Lo = Var64->getLo();
409     Variable *Hi = Var64->getHi();
410     assert(Lo->hasReg() == Hi->hasReg());
411     if (Lo->hasReg()) {
412       continue;
413     }
414     Lo->setRegNum(FirstReg);
415     Lo->setMustHaveReg();
416     Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
417     Hi->setMustHaveReg();
418   }
419 }
420 } // end of anonymous namespace
421 
getCallStackArgumentsSizeBytes(const InstCall * Call)422 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
423   TargetARM32::CallingConv CC;
424   RegNumT DummyReg;
425   size_t OutArgsSizeBytes = 0;
426   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
427     Operand *Arg = legalizeUndef(Call->getArg(i));
428     const Type Ty = Arg->getType();
429     if (isScalarIntegerType(Ty)) {
430       if (CC.argInGPR(Ty, &DummyReg)) {
431         continue;
432       }
433     } else {
434       if (CC.argInVFP(Ty, &DummyReg)) {
435         continue;
436       }
437     }
438 
439     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
440     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
441   }
442 
443   return applyStackAlignment(OutArgsSizeBytes);
444 }
445 
genTargetHelperCallFor(Inst * Instr)446 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
447   constexpr bool NoTailCall = false;
448   constexpr bool IsTargetHelperCall = true;
449 
450   switch (Instr->getKind()) {
451   default:
452     return;
453   case Inst::Arithmetic: {
454     Variable *Dest = Instr->getDest();
455     const Type DestTy = Dest->getType();
456     const InstArithmetic::OpKind Op =
457         llvm::cast<InstArithmetic>(Instr)->getOp();
458     if (isVectorType(DestTy)) {
459       switch (Op) {
460       default:
461         break;
462       case InstArithmetic::Fdiv:
463       case InstArithmetic::Frem:
464       case InstArithmetic::Sdiv:
465       case InstArithmetic::Srem:
466       case InstArithmetic::Udiv:
467       case InstArithmetic::Urem:
468         scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
469         Instr->setDeleted();
470         return;
471       }
472     }
473     switch (DestTy) {
474     default:
475       return;
476     case IceType_i64: {
477       // Technically, ARM has its own aeabi routines, but we can use the
478       // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
479       // the more standard __moddi3 for rem.
480       RuntimeHelper HelperID = RuntimeHelper::H_Num;
481       switch (Op) {
482       default:
483         return;
484       case InstArithmetic::Udiv:
485         HelperID = RuntimeHelper::H_udiv_i64;
486         break;
487       case InstArithmetic::Sdiv:
488         HelperID = RuntimeHelper::H_sdiv_i64;
489         break;
490       case InstArithmetic::Urem:
491         HelperID = RuntimeHelper::H_urem_i64;
492         break;
493       case InstArithmetic::Srem:
494         HelperID = RuntimeHelper::H_srem_i64;
495         break;
496       }
497       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
498       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
499       constexpr SizeT MaxArgs = 2;
500       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
501                                             NoTailCall, IsTargetHelperCall);
502       Call->addArg(Instr->getSrc(0));
503       Call->addArg(Instr->getSrc(1));
504       Instr->setDeleted();
505       return;
506     }
507     case IceType_i32:
508     case IceType_i16:
509     case IceType_i8: {
510       const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
511       InstCast::OpKind CastKind;
512       RuntimeHelper HelperID = RuntimeHelper::H_Num;
513       switch (Op) {
514       default:
515         return;
516       case InstArithmetic::Udiv:
517         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
518         CastKind = InstCast::Zext;
519         break;
520       case InstArithmetic::Sdiv:
521         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
522         CastKind = InstCast::Sext;
523         break;
524       case InstArithmetic::Urem:
525         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
526         CastKind = InstCast::Zext;
527         break;
528       case InstArithmetic::Srem:
529         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
530         CastKind = InstCast::Sext;
531         break;
532       }
533       if (HelperID == RuntimeHelper::H_Num) {
534         // HelperID should only ever be undefined when the processor does not
535         // have a hardware divider. If any other helpers are ever introduced,
536         // the following assert will have to be modified.
537         assert(HasHWDiv);
538         return;
539       }
540       Operand *Src0 = Instr->getSrc(0);
541       Operand *Src1 = Instr->getSrc(1);
542       if (DestTy != IceType_i32) {
543         // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
544         // we just insert a InstCast right before the call to the helper.
545         Variable *Src0_32 = Func->makeVariable(IceType_i32);
546         Context.insert<InstCast>(CastKind, Src0_32, Src0);
547         Src0 = Src0_32;
548 
549         // For extending Src1, we will just insert an InstCast if Src1 is not a
550         // Constant. If it is, then we extend it here, and not during program
551         // runtime. This allows preambleDivRem to optimize-out the div-by-0
552         // check.
553         if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
554           const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
555           int32_t NewC = C->getValue();
556           if (CastKind == InstCast::Zext) {
557             NewC &= ~(0x80000000l >> ShAmt);
558           } else {
559             NewC = (NewC << ShAmt) >> ShAmt;
560           }
561           Src1 = Ctx->getConstantInt32(NewC);
562         } else {
563           Variable *Src1_32 = Func->makeVariable(IceType_i32);
564           Context.insert<InstCast>(CastKind, Src1_32, Src1);
565           Src1 = Src1_32;
566         }
567       }
568       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
569       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
570       constexpr SizeT MaxArgs = 2;
571       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
572                                             NoTailCall, IsTargetHelperCall);
573       assert(Src0->getType() == IceType_i32);
574       Call->addArg(Src0);
575       assert(Src1->getType() == IceType_i32);
576       Call->addArg(Src1);
577       Instr->setDeleted();
578       return;
579     }
580     case IceType_f64:
581     case IceType_f32: {
582       if (Op != InstArithmetic::Frem) {
583         return;
584       }
585       constexpr SizeT MaxArgs = 2;
586       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
587           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
588                                 : RuntimeHelper::H_frem_f64);
589       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
590                                             NoTailCall, IsTargetHelperCall);
591       Call->addArg(Instr->getSrc(0));
592       Call->addArg(Instr->getSrc(1));
593       Instr->setDeleted();
594       return;
595     }
596     }
597     llvm::report_fatal_error("Control flow should never have reached here.");
598   }
599   case Inst::Cast: {
600     Variable *Dest = Instr->getDest();
601     Operand *Src0 = Instr->getSrc(0);
602     const Type DestTy = Dest->getType();
603     const Type SrcTy = Src0->getType();
604     auto *CastInstr = llvm::cast<InstCast>(Instr);
605     const InstCast::OpKind CastKind = CastInstr->getCastKind();
606 
607     switch (CastKind) {
608     default:
609       return;
610     case InstCast::Fptosi:
611     case InstCast::Fptoui: {
612       if (DestTy != IceType_i64) {
613         return;
614       }
615       const bool DestIsSigned = CastKind == InstCast::Fptosi;
616       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
617       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
618           Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
619                                     : RuntimeHelper::H_fptoui_f32_i64)
620                     : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
621                                     : RuntimeHelper::H_fptoui_f64_i64));
622       static constexpr SizeT MaxArgs = 1;
623       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
624                                             NoTailCall, IsTargetHelperCall);
625       Call->addArg(Src0);
626       Instr->setDeleted();
627       return;
628     }
629     case InstCast::Sitofp:
630     case InstCast::Uitofp: {
631       if (SrcTy != IceType_i64) {
632         return;
633       }
634       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
635       const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
636       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
637           DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
638                                       : RuntimeHelper::H_uitofp_i64_f32)
639                     : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
640                                       : RuntimeHelper::H_uitofp_i64_f64));
641       static constexpr SizeT MaxArgs = 1;
642       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
643                                             NoTailCall, IsTargetHelperCall);
644       Call->addArg(Src0);
645       Instr->setDeleted();
646       return;
647     }
648     case InstCast::Bitcast: {
649       if (DestTy == SrcTy) {
650         return;
651       }
652       Variable *CallDest = Dest;
653       RuntimeHelper HelperID = RuntimeHelper::H_Num;
654       switch (DestTy) {
655       default:
656         return;
657       case IceType_i8:
658         assert(SrcTy == IceType_v8i1);
659         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
660         CallDest = Func->makeVariable(IceType_i32);
661         break;
662       case IceType_i16:
663         assert(SrcTy == IceType_v16i1);
664         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
665         CallDest = Func->makeVariable(IceType_i32);
666         break;
667       case IceType_v8i1: {
668         assert(SrcTy == IceType_i8);
669         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
670         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
671         // Arguments to functions are required to be at least 32 bits wide.
672         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
673         Src0 = Src0AsI32;
674       } break;
675       case IceType_v16i1: {
676         assert(SrcTy == IceType_i16);
677         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
678         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
679         // Arguments to functions are required to be at least 32 bits wide.
680         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
681         Src0 = Src0AsI32;
682       } break;
683       }
684       constexpr SizeT MaxSrcs = 1;
685       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
686       Call->addArg(Src0);
687       Context.insert(Call);
688       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
689       // call result to the appropriate type as necessary.
690       if (CallDest->getType() != Dest->getType())
691         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
692       Instr->setDeleted();
693       return;
694     }
695     case InstCast::Trunc: {
696       if (DestTy == SrcTy) {
697         return;
698       }
699       if (!isVectorType(SrcTy)) {
700         return;
701       }
702       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
703       assert(typeElementType(DestTy) == IceType_i1);
704       assert(isVectorIntegerType(SrcTy));
705       return;
706     }
707     case InstCast::Sext:
708     case InstCast::Zext: {
709       if (DestTy == SrcTy) {
710         return;
711       }
712       if (!isVectorType(DestTy)) {
713         return;
714       }
715       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
716       assert(typeElementType(SrcTy) == IceType_i1);
717       assert(isVectorIntegerType(DestTy));
718       return;
719     }
720     }
721     llvm::report_fatal_error("Control flow should never have reached here.");
722   }
723   case Inst::Intrinsic: {
724     Variable *Dest = Instr->getDest();
725     auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
726     Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
727     switch (ID) {
728     default:
729       return;
730     case Intrinsics::Ctpop: {
731       Operand *Src0 = Intrinsic->getArg(0);
732       Operand *TargetHelper =
733           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
734                                         ? RuntimeHelper::H_call_ctpop_i32
735                                         : RuntimeHelper::H_call_ctpop_i64);
736       static constexpr SizeT MaxArgs = 1;
737       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738                                             NoTailCall, IsTargetHelperCall);
739       Call->addArg(Src0);
740       Instr->setDeleted();
741       if (Src0->getType() == IceType_i64) {
742         ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
743       }
744       return;
745     }
746     case Intrinsics::Longjmp: {
747       static constexpr SizeT MaxArgs = 2;
748       static constexpr Variable *NoDest = nullptr;
749       Operand *TargetHelper =
750           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
751       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
752                                             NoTailCall, IsTargetHelperCall);
753       Call->addArg(Intrinsic->getArg(0));
754       Call->addArg(Intrinsic->getArg(1));
755       Instr->setDeleted();
756       return;
757     }
758     case Intrinsics::Memcpy: {
759       // In the future, we could potentially emit an inline memcpy/memset, etc.
760       // for intrinsic calls w/ a known length.
761       static constexpr SizeT MaxArgs = 3;
762       static constexpr Variable *NoDest = nullptr;
763       Operand *TargetHelper =
764           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
765       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
766                                             NoTailCall, IsTargetHelperCall);
767       Call->addArg(Intrinsic->getArg(0));
768       Call->addArg(Intrinsic->getArg(1));
769       Call->addArg(Intrinsic->getArg(2));
770       Instr->setDeleted();
771       return;
772     }
773     case Intrinsics::Memmove: {
774       static constexpr SizeT MaxArgs = 3;
775       static constexpr Variable *NoDest = nullptr;
776       Operand *TargetHelper =
777           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
778       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
779                                             NoTailCall, IsTargetHelperCall);
780       Call->addArg(Intrinsic->getArg(0));
781       Call->addArg(Intrinsic->getArg(1));
782       Call->addArg(Intrinsic->getArg(2));
783       Instr->setDeleted();
784       return;
785     }
786     case Intrinsics::Memset: {
787       // The value operand needs to be extended to a stack slot size because the
788       // PNaCl ABI requires arguments to be at least 32 bits wide.
789       Operand *ValOp = Intrinsic->getArg(1);
790       assert(ValOp->getType() == IceType_i8);
791       Variable *ValExt = Func->makeVariable(stackSlotType());
792       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
793 
794       // Technically, ARM has its own __aeabi_memset, but we can use plain
795       // memset too. The value and size argument need to be flipped if we ever
796       // decide to use __aeabi_memset.
797       static constexpr SizeT MaxArgs = 3;
798       static constexpr Variable *NoDest = nullptr;
799       Operand *TargetHelper =
800           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
801       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
802                                             NoTailCall, IsTargetHelperCall);
803       Call->addArg(Intrinsic->getArg(0));
804       Call->addArg(ValExt);
805       Call->addArg(Intrinsic->getArg(2));
806       Instr->setDeleted();
807       return;
808     }
809     case Intrinsics::NaClReadTP: {
810       if (SandboxingType == ST_NaCl) {
811         return;
812       }
813       static constexpr SizeT MaxArgs = 0;
814       Operand *TargetHelper =
815           SandboxingType == ST_Nonsfi
816               ? Ctx->getConstantExternSym(
817                     Ctx->getGlobalString("__aeabi_read_tp"))
818               : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
819       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
820                                IsTargetHelperCall);
821       Instr->setDeleted();
822       return;
823     }
824     case Intrinsics::Setjmp: {
825       static constexpr SizeT MaxArgs = 1;
826       Operand *TargetHelper =
827           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
828       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
829                                             NoTailCall, IsTargetHelperCall);
830       Call->addArg(Intrinsic->getArg(0));
831       Instr->setDeleted();
832       return;
833     }
834     }
835     llvm::report_fatal_error("Control flow should never have reached here.");
836   }
837   }
838 }
839 
findMaxStackOutArgsSize()840 void TargetARM32::findMaxStackOutArgsSize() {
841   // MinNeededOutArgsBytes should be updated if the Target ever creates a
842   // high-level InstCall that requires more stack bytes.
843   constexpr size_t MinNeededOutArgsBytes = 0;
844   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
845   for (CfgNode *Node : Func->getNodes()) {
846     Context.init(Node);
847     while (!Context.atEnd()) {
848       PostIncrLoweringContext PostIncrement(Context);
849       Inst *CurInstr = iteratorToInst(Context.getCur());
850       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
851         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
852         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
853       }
854     }
855   }
856 }
857 
createGotPtr()858 void TargetARM32::createGotPtr() {
859   if (SandboxingType != ST_Nonsfi) {
860     return;
861   }
862   GotPtr = Func->makeVariable(IceType_i32);
863 }
864 
insertGotPtrInitPlaceholder()865 void TargetARM32::insertGotPtrInitPlaceholder() {
866   if (SandboxingType != ST_Nonsfi) {
867     return;
868   }
869   assert(GotPtr != nullptr);
870   // We add the two placeholder instructions here. The first fakedefs T, an
871   // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
872   // This is needed because the GotPtr initialization, if needed, will require
873   // a register:
874   //
875   //   movw     reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
876   //   movt     reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
877   //   add      reg, pc, reg
878   //   mov      GotPtr, reg
879   //
880   // If GotPtr is not used, then both these pseudo-instructions are dce'd.
881   Variable *T = makeReg(IceType_i32);
882   Context.insert<InstFakeDef>(T);
883   Context.insert<InstFakeDef>(GotPtr, T);
884 }
885 
886 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)887 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
888   GlobalString CRName = CR->getName();
889   GlobalString CRGotoffName =
890       Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
891   if (KnownGotoffs.count(CRGotoffName) == 0) {
892     constexpr bool SuppressMangling = true;
893     auto *Global =
894         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
895     Global->setIsConstant(true);
896     Global->setName(CRName);
897     Func->getGlobalPool()->willNotBeEmitted(Global);
898 
899     auto *Gotoff =
900         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
901     constexpr auto GotFixup = R_ARM_GOTOFF32;
902     Gotoff->setIsConstant(true);
903     Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
904         Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
905         GotFixup));
906     Gotoff->setName(CRGotoffName);
907     Func->addGlobal(Gotoff);
908     KnownGotoffs.emplace(CRGotoffName);
909   }
910   return CRGotoffName;
911 }
912 
materializeGotAddr(CfgNode * Node)913 void TargetARM32::materializeGotAddr(CfgNode *Node) {
914   if (SandboxingType != ST_Nonsfi) {
915     return;
916   }
917 
918   // At first, we try to find the
919   //    GotPtr = def T
920   // pseudo-instruction that we placed for defining the got ptr. That
921   // instruction is not just a place-holder for defining the GotPtr (thus
922   // keeping liveness consistent), but it is also located at a point where it is
923   // safe to materialize the got addr -- i.e., before loading parameters to
924   // registers, but after moving register parameters from their home location.
925   InstFakeDef *DefGotPtr = nullptr;
926   for (auto &Inst : Node->getInsts()) {
927     auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
928     if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
929       DefGotPtr = FakeDef;
930       break;
931     }
932   }
933 
934   if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
935     return;
936   }
937 
938   // The got addr needs to be materialized at the same point where DefGotPtr
939   // lives.
940   Context.setInsertPoint(instToIterator(DefGotPtr));
941   assert(DefGotPtr->getSrcSize() == 1);
942   auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
943   loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T,
944                                   [this, T](Variable *PC) { _add(T, PC, T); });
945   _mov(GotPtr, T);
946   DefGotPtr->setDeleted();
947 }
948 
loadNamedConstantRelocatablePIC(GlobalString Name,Variable * Register,std::function<void (Variable * PC)> Finish)949 void TargetARM32::loadNamedConstantRelocatablePIC(
950     GlobalString Name, Variable *Register,
951     std::function<void(Variable *PC)> Finish) {
952   assert(SandboxingType == ST_Nonsfi);
953   // We makeReg() here instead of getPhysicalRegister() because the latter ends
954   // up creating multi-blocks temporaries that liveness fails to validate.
955   auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
956 
957   auto *AddPcReloc = RelocOffset::create(Ctx);
958   AddPcReloc->setSubtract(true);
959   auto *AddPcLabel = InstARM32Label::create(Func, this);
960   AddPcLabel->setRelocOffset(AddPcReloc);
961 
962   auto *MovwReloc = RelocOffset::create(Ctx);
963   auto *MovwLabel = InstARM32Label::create(Func, this);
964   MovwLabel->setRelocOffset(MovwReloc);
965 
966   auto *MovtReloc = RelocOffset::create(Ctx);
967   auto *MovtLabel = InstARM32Label::create(Func, this);
968   MovtLabel->setRelocOffset(MovtReloc);
969 
970   // The EmitString for these constant relocatables have hardcoded offsets
971   // attached to them. This could be dangerous if, e.g., we ever implemented
972   // instruction scheduling but llvm-mc currently does not support
973   //
974   //   movw reg, #:lower16:(Symbol - Label - Number)
975   //   movt reg, #:upper16:(Symbol - Label - Number)
976   //
977   // relocations.
978   static constexpr RelocOffsetT PcOffset = -8;
979   auto *CRLower = Ctx->getConstantSymWithEmitString(
980       PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16");
981   auto *CRUpper = Ctx->getConstantSymWithEmitString(
982       PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12");
983 
984   Context.insert(MovwLabel);
985   _movw(Register, CRLower);
986   Context.insert(MovtLabel);
987   _movt(Register, CRUpper);
988   // PC = fake-def to keep liveness consistent.
989   Context.insert<InstFakeDef>(PC);
990   Context.insert(AddPcLabel);
991   Finish(PC);
992 }
993 
translateO2()994 void TargetARM32::translateO2() {
995   TimerMarker T(TimerStack::TT_O2, Func);
996 
997   // TODO(stichnot): share passes with other targets?
998   // https://code.google.com/p/nativeclient/issues/detail?id=4094
999   if (SandboxingType == ST_Nonsfi) {
1000     createGotPtr();
1001   }
1002   genTargetHelperCalls();
1003   findMaxStackOutArgsSize();
1004 
1005   // Do not merge Alloca instructions, and lay out the stack.
1006   static constexpr bool SortAndCombineAllocas = true;
1007   Func->processAllocas(SortAndCombineAllocas);
1008   Func->dump("After Alloca processing");
1009 
1010   if (!getFlags().getEnablePhiEdgeSplit()) {
1011     // Lower Phi instructions.
1012     Func->placePhiLoads();
1013     if (Func->hasError())
1014       return;
1015     Func->placePhiStores();
1016     if (Func->hasError())
1017       return;
1018     Func->deletePhis();
1019     if (Func->hasError())
1020       return;
1021     Func->dump("After Phi lowering");
1022   }
1023 
1024   // Address mode optimization.
1025   Func->getVMetadata()->init(VMK_SingleDefs);
1026   Func->doAddressOpt();
1027   Func->materializeVectorShuffles();
1028 
1029   // Argument lowering
1030   Func->doArgLowering();
1031 
1032   // Target lowering. This requires liveness analysis for some parts of the
1033   // lowering decisions, such as compare/branch fusing. If non-lightweight
1034   // liveness analysis is used, the instructions need to be renumbered first.
1035   // TODO: This renumbering should only be necessary if we're actually
1036   // calculating live intervals, which we only do for register allocation.
1037   Func->renumberInstructions();
1038   if (Func->hasError())
1039     return;
1040 
1041   // TODO: It should be sufficient to use the fastest liveness calculation,
1042   // i.e. livenessLightweight(). However, for some reason that slows down the
1043   // rest of the translation. Investigate.
1044   Func->liveness(Liveness_Basic);
1045   if (Func->hasError())
1046     return;
1047   Func->dump("After ARM32 address mode opt");
1048 
1049   if (SandboxingType == ST_Nonsfi) {
1050     insertGotPtrInitPlaceholder();
1051   }
1052   Func->genCode();
1053   if (Func->hasError())
1054     return;
1055   Func->dump("After ARM32 codegen");
1056 
1057   // Register allocation. This requires instruction renumbering and full
1058   // liveness analysis.
1059   Func->renumberInstructions();
1060   if (Func->hasError())
1061     return;
1062   Func->liveness(Liveness_Intervals);
1063   if (Func->hasError())
1064     return;
1065   // The post-codegen dump is done here, after liveness analysis and associated
1066   // cleanup, to make the dump cleaner and more useful.
1067   Func->dump("After initial ARM32 codegen");
1068   // Validate the live range computations. The expensive validation call is
1069   // deliberately only made when assertions are enabled.
1070   assert(Func->validateLiveness());
1071   Func->getVMetadata()->init(VMK_All);
1072   regAlloc(RAK_Global);
1073   if (Func->hasError())
1074     return;
1075 
1076   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1077   Func->dump("After linear scan regalloc");
1078 
1079   if (getFlags().getEnablePhiEdgeSplit()) {
1080     Func->advancedPhiLowering();
1081     Func->dump("After advanced Phi lowering");
1082   }
1083 
1084   ForbidTemporaryWithoutReg _(this);
1085 
1086   // Stack frame mapping.
1087   Func->genFrame();
1088   if (Func->hasError())
1089     return;
1090   Func->dump("After stack frame mapping");
1091 
1092   postLowerLegalization();
1093   if (Func->hasError())
1094     return;
1095   Func->dump("After postLowerLegalization");
1096 
1097   Func->contractEmptyNodes();
1098   Func->reorderNodes();
1099 
1100   // Branch optimization. This needs to be done just before code emission. In
1101   // particular, no transformations that insert or reorder CfgNodes should be
1102   // done after branch optimization. We go ahead and do it before nop insertion
1103   // to reduce the amount of work needed for searching for opportunities.
1104   Func->doBranchOpt();
1105   Func->dump("After branch optimization");
1106 }
1107 
translateOm1()1108 void TargetARM32::translateOm1() {
1109   TimerMarker T(TimerStack::TT_Om1, Func);
1110 
1111   // TODO(stichnot): share passes with other targets?
1112   if (SandboxingType == ST_Nonsfi) {
1113     createGotPtr();
1114   }
1115 
1116   genTargetHelperCalls();
1117   findMaxStackOutArgsSize();
1118 
1119   // Do not merge Alloca instructions, and lay out the stack.
1120   static constexpr bool DontSortAndCombineAllocas = false;
1121   Func->processAllocas(DontSortAndCombineAllocas);
1122   Func->dump("After Alloca processing");
1123 
1124   Func->placePhiLoads();
1125   if (Func->hasError())
1126     return;
1127   Func->placePhiStores();
1128   if (Func->hasError())
1129     return;
1130   Func->deletePhis();
1131   if (Func->hasError())
1132     return;
1133   Func->dump("After Phi lowering");
1134 
1135   Func->doArgLowering();
1136 
1137   if (SandboxingType == ST_Nonsfi) {
1138     insertGotPtrInitPlaceholder();
1139   }
1140   Func->genCode();
1141   if (Func->hasError())
1142     return;
1143   Func->dump("After initial ARM32 codegen");
1144 
1145   regAlloc(RAK_InfOnly);
1146   if (Func->hasError())
1147     return;
1148 
1149   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1150   Func->dump("After regalloc of infinite-weight variables");
1151 
1152   ForbidTemporaryWithoutReg _(this);
1153 
1154   Func->genFrame();
1155   if (Func->hasError())
1156     return;
1157   Func->dump("After stack frame mapping");
1158 
1159   postLowerLegalization();
1160   if (Func->hasError())
1161     return;
1162   Func->dump("After postLowerLegalization");
1163 }
1164 
getStackAlignment() const1165 uint32_t TargetARM32::getStackAlignment() const {
1166   return ARM32_STACK_ALIGNMENT_BYTES;
1167 }
1168 
doBranchOpt(Inst * I,const CfgNode * NextNode)1169 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1170   if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1171     return Br->optimizeBranch(NextNode);
1172   }
1173   return false;
1174 }
1175 
getRegName(RegNumT RegNum,Type Ty) const1176 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1177   (void)Ty;
1178   return RegARM32::getRegName(RegNum);
1179 }
1180 
getPhysicalRegister(RegNumT RegNum,Type Ty)1181 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1182   static const Type DefaultType[] = {
1183 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
1184           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
1185   (isFP32)                                                                     \
1186       ? IceType_f32                                                            \
1187       : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1188       REGARM32_TABLE
1189 #undef X
1190   };
1191 
1192   if (Ty == IceType_void) {
1193     assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1194     Ty = DefaultType[RegNum];
1195   }
1196   if (PhysicalRegisters[Ty].empty())
1197     PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1198   assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1199   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1200   if (Reg == nullptr) {
1201     Reg = Func->makeVariable(Ty);
1202     Reg->setRegNum(RegNum);
1203     PhysicalRegisters[Ty][RegNum] = Reg;
1204     // Specially mark a named physical register as an "argument" so that it is
1205     // considered live upon function entry.  Otherwise it's possible to get
1206     // liveness validation errors for saving callee-save registers.
1207     Func->addImplicitArg(Reg);
1208     // Don't bother tracking the live range of a named physical register.
1209     Reg->setIgnoreLiveness();
1210   }
1211   return Reg;
1212 }
1213 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1214 void TargetARM32::emitJumpTable(const Cfg *Func,
1215                                 const InstJumpTable *JumpTable) const {
1216   (void)Func;
1217   (void)JumpTable;
1218   UnimplementedError(getFlags());
1219 }
1220 
emitVariable(const Variable * Var) const1221 void TargetARM32::emitVariable(const Variable *Var) const {
1222   if (!BuildDefs::dump())
1223     return;
1224   Ostream &Str = Ctx->getStrEmit();
1225   if (Var->hasReg()) {
1226     Str << getRegName(Var->getRegNum(), Var->getType());
1227     return;
1228   }
1229   if (Var->mustHaveReg()) {
1230     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1231                              ") has no register assigned - function " +
1232                              Func->getFunctionName());
1233   }
1234   assert(!Var->isRematerializable());
1235   int32_t Offset = Var->getStackOffset();
1236   auto BaseRegNum = Var->getBaseRegNum();
1237   if (BaseRegNum.hasNoValue()) {
1238     BaseRegNum = getFrameOrStackReg();
1239   }
1240   const Type VarTy = Var->getType();
1241   Str << "[" << getRegName(BaseRegNum, VarTy);
1242   if (Offset != 0) {
1243     Str << ", #" << Offset;
1244   }
1245   Str << "]";
1246 }
1247 
CallingConv()1248 TargetARM32::CallingConv::CallingConv()
1249     : GPRegsUsed(RegARM32::Reg_NUM),
1250       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1251       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1252       VFPRegsUsed(RegARM32::Reg_NUM),
1253       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1254       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1255       Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1256 
argInGPR(Type Ty,RegNumT * Reg)1257 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1258   CfgVector<RegNumT> *Source;
1259 
1260   switch (Ty) {
1261   default: {
1262     assert(isScalarIntegerType(Ty));
1263     Source = &GPRArgs;
1264   } break;
1265   case IceType_i64: {
1266     Source = &I64Args;
1267   } break;
1268   }
1269 
1270   discardUnavailableGPRsAndTheirAliases(Source);
1271 
1272   if (Source->empty()) {
1273     GPRegsUsed.set();
1274     return false;
1275   }
1276 
1277   *Reg = Source->back();
1278   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1279   // we mark all of Reg's aliases as Used. So, for the next argument,
1280   // Source->back() is marked as unavailable, and it is thus implicitly popped
1281   // from the stack.
1282   GPRegsUsed |= RegisterAliases[*Reg];
1283   return true;
1284 }
1285 
1286 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1287 // i32) will have the first argument in r0, the second in r1-r2, and the third
1288 // on the stack. To model this behavior, whenever we pop a register from Regs,
1289 // we remove all of its aliases from the pool of available GPRs. This has the
1290 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1291 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1292     CfgVector<RegNumT> *Regs) {
1293   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1294     GPRegsUsed |= RegisterAliases[Regs->back()];
1295     Regs->pop_back();
1296   }
1297 }
1298 
argInVFP(Type Ty,RegNumT * Reg)1299 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1300   CfgVector<RegNumT> *Source;
1301 
1302   switch (Ty) {
1303   default: {
1304     assert(isVectorType(Ty));
1305     Source = &Vec128Args;
1306   } break;
1307   case IceType_f32: {
1308     Source = &FP32Args;
1309   } break;
1310   case IceType_f64: {
1311     Source = &FP64Args;
1312   } break;
1313   }
1314 
1315   discardUnavailableVFPRegs(Source);
1316 
1317   if (Source->empty()) {
1318     VFPRegsUsed.set();
1319     return false;
1320   }
1321 
1322   *Reg = Source->back();
1323   VFPRegsUsed |= RegisterAliases[*Reg];
1324   return true;
1325 }
1326 
1327 // Arguments in VFP registers are not packed, so we don't mark the popped
1328 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1329 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1330     CfgVector<RegNumT> *Regs) {
1331   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1332     Regs->pop_back();
1333   }
1334 }
1335 
lowerArguments()1336 void TargetARM32::lowerArguments() {
1337   VarList &Args = Func->getArgs();
1338   TargetARM32::CallingConv CC;
1339 
1340   // For each register argument, replace Arg in the argument list with the home
1341   // register. Then generate an instruction in the prolog to copy the home
1342   // register to the assigned location of Arg.
1343   Context.init(Func->getEntryNode());
1344   Context.setInsertPoint(Context.getCur());
1345 
1346   for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1347     Variable *Arg = Args[I];
1348     Type Ty = Arg->getType();
1349     RegNumT RegNum;
1350     if (isScalarIntegerType(Ty)) {
1351       if (!CC.argInGPR(Ty, &RegNum)) {
1352         continue;
1353       }
1354     } else {
1355       if (!CC.argInVFP(Ty, &RegNum)) {
1356         continue;
1357       }
1358     }
1359 
1360     Variable *RegisterArg = Func->makeVariable(Ty);
1361     if (BuildDefs::dump()) {
1362       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1363     }
1364     RegisterArg->setIsArg();
1365     Arg->setIsArg(false);
1366     Args[I] = RegisterArg;
1367     switch (Ty) {
1368     default: {
1369       RegisterArg->setRegNum(RegNum);
1370     } break;
1371     case IceType_i64: {
1372       auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1373       RegisterArg64->initHiLo(Func);
1374       RegisterArg64->getLo()->setRegNum(
1375           RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1376       RegisterArg64->getHi()->setRegNum(
1377           RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1378     } break;
1379     }
1380     Context.insert<InstAssign>(Arg, RegisterArg);
1381   }
1382 }
1383 
1384 // Helper function for addProlog().
1385 //
1386 // This assumes Arg is an argument passed on the stack. This sets the frame
1387 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1388 // I64 arg that has been split into Lo and Hi components, it calls itself
1389 // recursively on the components, taking care to handle Lo first because of the
1390 // little-endian architecture. Lastly, this function generates an instruction
1391 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1392 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1393                                          size_t BasicFrameOffset,
1394                                          size_t *InArgsSizeBytes) {
1395   const Type Ty = Arg->getType();
1396   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1397 
1398   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1399     Variable *const Lo = Arg64On32->getLo();
1400     Variable *const Hi = Arg64On32->getHi();
1401     finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1402     finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1403     return;
1404   }
1405   assert(Ty != IceType_i64);
1406 
1407   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1408   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1409 
1410   if (!Arg->hasReg()) {
1411     Arg->setStackOffset(ArgStackOffset);
1412     return;
1413   }
1414 
1415   // If the argument variable has been assigned a register, we need to copy the
1416   // value from the stack slot.
1417   Variable *Parameter = Func->makeVariable(Ty);
1418   Parameter->setMustNotHaveReg();
1419   Parameter->setStackOffset(ArgStackOffset);
1420   _mov(Arg, Parameter);
1421 }
1422 
stackSlotType()1423 Type TargetARM32::stackSlotType() { return IceType_i32; }
1424 
addProlog(CfgNode * Node)1425 void TargetARM32::addProlog(CfgNode *Node) {
1426   // Stack frame layout:
1427   //
1428   // +------------------------+
1429   // | 1. preserved registers |
1430   // +------------------------+
1431   // | 2. padding             |
1432   // +------------------------+ <--- FramePointer (if used)
1433   // | 3. global spill area   |
1434   // +------------------------+
1435   // | 4. padding             |
1436   // +------------------------+
1437   // | 5. local spill area    |
1438   // +------------------------+
1439   // | 6. padding             |
1440   // +------------------------+
1441   // | 7. allocas (variable)  |
1442   // +------------------------+
1443   // | 8. padding             |
1444   // +------------------------+
1445   // | 9. out args            |
1446   // +------------------------+ <--- StackPointer
1447   //
1448   // The following variables record the size in bytes of the given areas:
1449   //  * PreservedRegsSizeBytes: area 1
1450   //  * SpillAreaPaddingBytes:  area 2
1451   //  * GlobalsSize:            area 3
1452   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1453   //  * LocalsSpillAreaSize:    area 5
1454   //  * SpillAreaSizeBytes:     areas 2 - 6, and 9
1455   //  * MaxOutArgsSizeBytes:    area 9
1456   //
1457   // Determine stack frame offsets for each Variable without a register
1458   // assignment.  This can be done as one variable per stack slot.  Or, do
1459   // coalescing by running the register allocator again with an infinite set of
1460   // registers (as a side effect, this gives variables a second chance at
1461   // physical register assignment).
1462   //
1463   // A middle ground approach is to leverage sparsity and allocate one block of
1464   // space on the frame for globals (variables with multi-block lifetime), and
1465   // one block to share for locals (single-block lifetime).
1466 
1467   Context.init(Node);
1468   Context.setInsertPoint(Context.getCur());
1469 
1470   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1471   RegsUsed = SmallBitVector(CalleeSaves.size());
1472   VarList SortedSpilledVariables;
1473   size_t GlobalsSize = 0;
1474   // If there is a separate locals area, this represents that area. Otherwise
1475   // it counts any variable not counted by GlobalsSize.
1476   SpillAreaSizeBytes = 0;
1477   // If there is a separate locals area, this specifies the alignment for it.
1478   uint32_t LocalsSlotsAlignmentBytes = 0;
1479   // The entire spill locations area gets aligned to largest natural alignment
1480   // of the variables that have a spill slot.
1481   uint32_t SpillAreaAlignmentBytes = 0;
1482   // For now, we don't have target-specific variables that need special
1483   // treatment (no stack-slot-linked SpillVariable type).
1484   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1485     static constexpr bool AssignStackSlot = false;
1486     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1487     if (llvm::isa<Variable64On32>(Var)) {
1488       return DontAssignStackSlot;
1489     }
1490     return AssignStackSlot;
1491   };
1492 
1493   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1494   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1495                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1496                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1497   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1498   SpillAreaSizeBytes += GlobalsSize;
1499 
1500   // Add push instructions for preserved registers. On ARM, "push" can push a
1501   // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1502   // callee-saved float/vector registers.
1503   //
1504   // The "vpush" instruction can handle a whole list of float/vector registers,
1505   // but it only handles contiguous sequences of registers by specifying the
1506   // start and the length.
1507   PreservedGPRs.reserve(CalleeSaves.size());
1508   PreservedSRegs.reserve(CalleeSaves.size());
1509 
1510   // Consider FP and LR as callee-save / used as needed.
1511   if (UsesFramePointer) {
1512     if (RegsUsed[RegARM32::Reg_fp]) {
1513       llvm::report_fatal_error("Frame pointer has been used.");
1514     }
1515     CalleeSaves[RegARM32::Reg_fp] = true;
1516     RegsUsed[RegARM32::Reg_fp] = true;
1517   }
1518   if (!MaybeLeafFunc) {
1519     CalleeSaves[RegARM32::Reg_lr] = true;
1520     RegsUsed[RegARM32::Reg_lr] = true;
1521   }
1522 
1523   // Make two passes over the used registers. The first pass records all the
1524   // used registers -- and their aliases. Then, we figure out which GPRs and
1525   // VFP S registers should be saved. We don't bother saving D/Q registers
1526   // because their uses are recorded as S regs uses.
1527   SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1528   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1529     if (NeedSandboxing && i == RegARM32::Reg_r9) {
1530       // r9 is never updated in sandboxed code.
1531       continue;
1532     }
1533     if (CalleeSaves[i] && RegsUsed[i]) {
1534       ToPreserve |= RegisterAliases[i];
1535     }
1536   }
1537 
1538   uint32_t NumCallee = 0;
1539   size_t PreservedRegsSizeBytes = 0;
1540 
1541   // RegClasses is a tuple of
1542   //
1543   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1544   //
1545   // We use this tuple to figure out which register we should push/pop during
1546   // prolog/epilog.
1547   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1548   const RegClassType RegClasses[] = {
1549       RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1550                    &PreservedGPRs),
1551       RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1552                    &PreservedSRegs)};
1553   for (const auto &RegClass : RegClasses) {
1554     const uint32_t FirstRegInClass = std::get<0>(RegClass);
1555     const uint32_t LastRegInClass = std::get<1>(RegClass);
1556     VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1557     for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1558       if (!ToPreserve[Reg]) {
1559         continue;
1560       }
1561       ++NumCallee;
1562       Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1563       PreservedRegsSizeBytes +=
1564           typeWidthInBytesOnStack(PhysicalRegister->getType());
1565       PreservedRegsInClass->push_back(PhysicalRegister);
1566     }
1567   }
1568 
1569   Ctx->statsUpdateRegistersSaved(NumCallee);
1570   if (!PreservedSRegs.empty())
1571     _push(PreservedSRegs);
1572   if (!PreservedGPRs.empty())
1573     _push(PreservedGPRs);
1574 
1575   // Generate "mov FP, SP" if needed.
1576   if (UsesFramePointer) {
1577     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1578     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1579     _mov(FP, SP);
1580     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1581     Context.insert<InstFakeUse>(FP);
1582   }
1583 
1584   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1585   // after the preserved registers and before the spill areas.
1586   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1587   // locals area if they are separate.
1588   assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1589   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1590   uint32_t SpillAreaPaddingBytes = 0;
1591   uint32_t LocalsSlotsPaddingBytes = 0;
1592   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1593                        GlobalsSize, LocalsSlotsAlignmentBytes,
1594                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1595   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1596   uint32_t GlobalsAndSubsequentPaddingSize =
1597       GlobalsSize + LocalsSlotsPaddingBytes;
1598 
1599   // Adds the out args space to the stack, and align SP if necessary.
1600   if (!NeedsStackAlignment) {
1601     SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1602   } else {
1603     uint32_t StackOffset = PreservedRegsSizeBytes;
1604     uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1605     StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1606     SpillAreaSizeBytes = StackSize - StackOffset;
1607   }
1608 
1609   // Combine fixed alloca with SpillAreaSize.
1610   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1611 
1612   // Generate "sub sp, SpillAreaSizeBytes"
1613   if (SpillAreaSizeBytes) {
1614     // Use the scratch register if needed to legalize the immediate.
1615     Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1616                                   Legal_Reg | Legal_Flex, getReservedTmpReg());
1617     Sandboxer(this).sub_sp(SubAmount);
1618     if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1619       Sandboxer(this).align_sp(FixedAllocaAlignBytes);
1620     }
1621   }
1622 
1623   Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1624 
1625   // Fill in stack offsets for stack args, and copy args into registers for
1626   // those that were register-allocated. Args are pushed right to left, so
1627   // Arg[0] is closest to the stack/frame pointer.
1628   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1629   size_t BasicFrameOffset = PreservedRegsSizeBytes;
1630   if (!UsesFramePointer)
1631     BasicFrameOffset += SpillAreaSizeBytes;
1632 
1633   materializeGotAddr(Node);
1634 
1635   const VarList &Args = Func->getArgs();
1636   size_t InArgsSizeBytes = 0;
1637   TargetARM32::CallingConv CC;
1638   for (Variable *Arg : Args) {
1639     RegNumT DummyReg;
1640     const Type Ty = Arg->getType();
1641 
1642     // Skip arguments passed in registers.
1643     if (isScalarIntegerType(Ty)) {
1644       if (CC.argInGPR(Ty, &DummyReg)) {
1645         continue;
1646       }
1647     } else {
1648       if (CC.argInVFP(Ty, &DummyReg)) {
1649         continue;
1650       }
1651     }
1652     finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1653   }
1654 
1655   // Fill in stack offsets for locals.
1656   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1657                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1658                       UsesFramePointer);
1659   this->HasComputedFrame = true;
1660 
1661   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1662     OstreamLocker _(Func->getContext());
1663     Ostream &Str = Func->getContext()->getStrDump();
1664 
1665     Str << "Stack layout:\n";
1666     uint32_t SPAdjustmentPaddingSize =
1667         SpillAreaSizeBytes - LocalsSpillAreaSize -
1668         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1669         MaxOutArgsSizeBytes;
1670     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1671         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1672         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1673         << " globals spill area = " << GlobalsSize << " bytes\n"
1674         << " globals-locals spill areas intermediate padding = "
1675         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1676         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1677         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1678 
1679     Str << "Stack details:\n"
1680         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1681         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1682         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1683         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1684         << " bytes\n"
1685         << " is FP based = " << UsesFramePointer << "\n";
1686   }
1687 }
1688 
addEpilog(CfgNode * Node)1689 void TargetARM32::addEpilog(CfgNode *Node) {
1690   InstList &Insts = Node->getInsts();
1691   InstList::reverse_iterator RI, E;
1692   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1693     if (llvm::isa<InstARM32Ret>(*RI))
1694       break;
1695   }
1696   if (RI == E)
1697     return;
1698 
1699   // Convert the reverse_iterator position into its corresponding (forward)
1700   // iterator position.
1701   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1702   --InsertPoint;
1703   Context.init(Node);
1704   Context.setInsertPoint(InsertPoint);
1705 
1706   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1707   if (UsesFramePointer) {
1708     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1709     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1710     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1711     // from being dead-code eliminated.
1712     Context.insert<InstFakeUse>(SP);
1713     Sandboxer(this).reset_sp(FP);
1714   } else {
1715     // add SP, SpillAreaSizeBytes
1716     if (SpillAreaSizeBytes) {
1717       // Use the scratch register if needed to legalize the immediate.
1718       Operand *AddAmount =
1719           legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1720                    Legal_Reg | Legal_Flex, getReservedTmpReg());
1721       Sandboxer(this).add_sp(AddAmount);
1722     }
1723   }
1724 
1725   if (!PreservedGPRs.empty())
1726     _pop(PreservedGPRs);
1727   if (!PreservedSRegs.empty())
1728     _pop(PreservedSRegs);
1729 
1730   if (!getFlags().getUseSandboxing())
1731     return;
1732 
1733   // Change the original ret instruction into a sandboxed return sequence.
1734   //
1735   // bundle_lock
1736   // bic lr, #0xc000000f
1737   // bx lr
1738   // bundle_unlock
1739   //
1740   // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1741   // restrict to the lower 1GB as well.
1742   Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1743   Variable *RetValue = nullptr;
1744   if (RI->getSrcSize())
1745     RetValue = llvm::cast<Variable>(RI->getSrc(0));
1746 
1747   Sandboxer(this).ret(LR, RetValue);
1748 
1749   RI->setDeleted();
1750 }
1751 
isLegalMemOffset(Type Ty,int32_t Offset) const1752 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1753   constexpr bool ZeroExt = false;
1754   return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1755 }
1756 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1757 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1758     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1759   // Legalize will likely need a movw/movt combination, but if the top bits are
1760   // all 0 from negating the offset and subtracting, we could use that instead.
1761   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1762   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1763   if (ShouldSub) {
1764     Operand *OffsetVal =
1765         Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1766                          Legal_Reg | Legal_Flex, ScratchRegNum);
1767     Target->_sub(ScratchReg, Base, OffsetVal);
1768   } else {
1769     Operand *OffsetVal =
1770         Target->legalize(Target->Ctx->getConstantInt32(Offset),
1771                          Legal_Reg | Legal_Flex, ScratchRegNum);
1772     Target->_add(ScratchReg, Base, OffsetVal);
1773   }
1774 
1775   if (ScratchRegNum == Target->getReservedTmpReg()) {
1776     const bool BaseIsStackOrFramePtr =
1777         Base->getRegNum() == Target->getFrameOrStackReg();
1778     // There is currently no code path that would trigger this assertion, so we
1779     // leave this assertion here in case it is ever violated. This is not a
1780     // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1781     // as the program compiled by subzero will still work correctly.
1782     assert(BaseIsStackOrFramePtr);
1783     // Side-effect: updates TempBase to reflect the new Temporary.
1784     if (BaseIsStackOrFramePtr) {
1785       TempBaseReg = ScratchReg;
1786       TempBaseOffset = Offset;
1787     } else {
1788       TempBaseReg = nullptr;
1789       TempBaseOffset = 0;
1790     }
1791   }
1792 
1793   return ScratchReg;
1794 }
1795 
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1796 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1797     Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1798   assert(!Base->isRematerializable());
1799   if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1800     return OperandARM32Mem::create(
1801         Target->Func, Ty, Base,
1802         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1803         OperandARM32Mem::Offset);
1804   }
1805 
1806   if (!AllowOffsets || TempBaseReg == nullptr) {
1807     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1808   }
1809 
1810   int32_t OffsetDiff = Offset - TempBaseOffset;
1811   assert(AllowOffsets || OffsetDiff == 0);
1812 
1813   if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1814     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1815     OffsetDiff = 0;
1816   }
1817 
1818   assert(!TempBaseReg->isRematerializable());
1819   return OperandARM32Mem::create(
1820       Target->Func, Ty, TempBaseReg,
1821       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1822       OperandARM32Mem::Offset);
1823 }
1824 
resetTempBaseIfClobberedBy(const Inst * Instr)1825 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1826     const Inst *Instr) {
1827   bool ClobbersTempBase = false;
1828   if (TempBaseReg != nullptr) {
1829     Variable *Dest = Instr->getDest();
1830     if (llvm::isa<InstARM32Call>(Instr)) {
1831       // The following assertion is an invariant, so we remove it from the if
1832       // test. If the invariant is ever broken/invalidated/changed, remember
1833       // to add it back to the if condition.
1834       assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1835       // The linker may need to clobber IP if the call is too far from PC. Thus,
1836       // we assume IP will be overwritten.
1837       ClobbersTempBase = true;
1838     } else if (Dest != nullptr &&
1839                Dest->getRegNum() == TempBaseReg->getRegNum()) {
1840       // Register redefinition.
1841       ClobbersTempBase = true;
1842     }
1843   }
1844 
1845   if (ClobbersTempBase) {
1846     TempBaseReg = nullptr;
1847     TempBaseOffset = 0;
1848   }
1849 }
1850 
legalizeMov(InstARM32Mov * MovInstr)1851 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1852   Variable *Dest = MovInstr->getDest();
1853   assert(Dest != nullptr);
1854   Type DestTy = Dest->getType();
1855   assert(DestTy != IceType_i64);
1856 
1857   Operand *Src = MovInstr->getSrc(0);
1858   Type SrcTy = Src->getType();
1859   (void)SrcTy;
1860   assert(SrcTy != IceType_i64);
1861 
1862   if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1863     return;
1864 
1865   bool Legalized = false;
1866   if (!Dest->hasReg()) {
1867     auto *SrcR = llvm::cast<Variable>(Src);
1868     assert(SrcR->hasReg());
1869     assert(!SrcR->isRematerializable());
1870     const int32_t Offset = Dest->getStackOffset();
1871     // This is a _mov(Mem(), Variable), i.e., a store.
1872     TargetARM32::Sandboxer(Target).str(
1873         SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1874         MovInstr->getPredicate());
1875     // _str() does not have a Dest, so we add a fake-def(Dest).
1876     Target->Context.insert<InstFakeDef>(Dest);
1877     Legalized = true;
1878   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1879     if (Var->isRematerializable()) {
1880       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1881 
1882       // ExtraOffset is only needed for frame-pointer based frames as we have
1883       // to account for spill storage.
1884       const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1885                                       ? Target->getFrameFixedAllocaOffset()
1886                                       : 0;
1887 
1888       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1889       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1890       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1891       Target->_mov(Dest, T);
1892       Legalized = true;
1893     } else {
1894       if (!Var->hasReg()) {
1895         // This is a _mov(Variable, Mem()), i.e., a load.
1896         const int32_t Offset = Var->getStackOffset();
1897         TargetARM32::Sandboxer(Target).ldr(
1898             Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1899             MovInstr->getPredicate());
1900         Legalized = true;
1901       }
1902     }
1903   }
1904 
1905   if (Legalized) {
1906     if (MovInstr->isDestRedefined()) {
1907       Target->_set_dest_redefined();
1908     }
1909     MovInstr->setDeleted();
1910   }
1911 }
1912 
1913 // ARM32 address modes:
1914 //  ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1915 //                    [reg +/- reg << shamt5]
1916 //  ld/st f[32|64]  : [reg], [reg +/- imm8] , [pc +/- imm8]
1917 //  ld/st vectors   : [reg]
1918 //
1919 // For now, we don't handle address modes with Relocatables.
1920 namespace {
1921 // MemTraits contains per-type valid address mode information.
1922 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1923           ubits, rraddr, shaddr)                                               \
1924   static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1925 ICETYPEARM32_TABLE
1926 #undef X
1927 
1928 static const struct {
1929   int32_t ValidImmMask;
1930   bool CanHaveImm;
1931   bool CanHaveIndex;
1932   bool CanHaveShiftedIndex;
1933 } MemTraits[] = {
1934 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1935           ubits, rraddr, shaddr)                                               \
1936   {                                                                            \
1937       (1 << ubits) - 1,                                                        \
1938       (ubits) > 0,                                                             \
1939       rraddr,                                                                  \
1940       shaddr,                                                                  \
1941   },
1942     ICETYPEARM32_TABLE
1943 #undef X
1944 };
1945 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1946 } // end of anonymous namespace
1947 
1948 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1949 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1950                                                        bool AllowOffsets) {
1951   assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1952   assert(Mem->isRegReg() || Target->isLegalMemOffset(
1953                                 Mem->getType(), Mem->getOffset()->getValue()));
1954 
1955   bool Legalized = false;
1956   Variable *Base = Mem->getBase();
1957   int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1958   if (Base->isRematerializable()) {
1959     const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1960                                     ? Target->getFrameFixedAllocaOffset()
1961                                     : 0;
1962     Offset += Base->getStackOffset() + ExtraOffset;
1963     Base = Target->getPhysicalRegister(Base->getRegNum());
1964     assert(!Base->isRematerializable());
1965     Legalized = true;
1966   }
1967 
1968   if (!Legalized && !Target->NeedSandboxing) {
1969     return nullptr;
1970   }
1971 
1972   if (!Mem->isRegReg()) {
1973     return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1974   }
1975 
1976   if (Target->NeedSandboxing) {
1977     llvm::report_fatal_error("Reg-Reg address mode is not allowed.");
1978   }
1979 
1980   assert(MemTraits[Mem->getType()].CanHaveIndex);
1981 
1982   if (Offset != 0) {
1983     if (TempBaseReg == nullptr) {
1984       Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1985     } else {
1986       uint32_t Imm8, Rotate;
1987       const int32_t OffsetDiff = Offset - TempBaseOffset;
1988       if (OffsetDiff == 0) {
1989         Base = TempBaseReg;
1990       } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
1991         auto *OffsetDiffF = OperandARM32FlexImm::create(
1992             Target->Func, IceType_i32, Imm8, Rotate);
1993         Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
1994         TempBaseOffset += OffsetDiff;
1995         Base = TempBaseReg;
1996       } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
1997         auto *OffsetDiffF = OperandARM32FlexImm::create(
1998             Target->Func, IceType_i32, Imm8, Rotate);
1999         Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
2000         TempBaseOffset += OffsetDiff;
2001         Base = TempBaseReg;
2002       } else {
2003         Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2004       }
2005     }
2006   }
2007 
2008   return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
2009                                  Mem->getIndex(), Mem->getShiftOp(),
2010                                  Mem->getShiftAmt(), Mem->getAddrMode());
2011 }
2012 
postLowerLegalization()2013 void TargetARM32::postLowerLegalization() {
2014   // If a stack variable's frame offset doesn't fit, convert from:
2015   //   ldr X, OFF[SP]
2016   // to:
2017   //   movw/movt TMP, OFF_PART
2018   //   add TMP, TMP, SP
2019   //   ldr X, OFF_MORE[TMP]
2020   //
2021   // This is safe because we have reserved TMP, and add for ARM does not
2022   // clobber the flags register.
2023   Func->dump("Before postLowerLegalization");
2024   assert(hasComputedFrame());
2025   // Do a fairly naive greedy clustering for now. Pick the first stack slot
2026   // that's out of bounds and make a new base reg using the architecture's temp
2027   // register. If that works for the next slot, then great. Otherwise, create a
2028   // new base register, clobbering the previous base register. Never share a
2029   // base reg across different basic blocks. This isn't ideal if local and
2030   // multi-block variables are far apart and their references are interspersed.
2031   // It may help to be more coordinated about assign stack slot numbers and may
2032   // help to assign smaller offsets to higher-weight variables so that they
2033   // don't depend on this legalization.
2034   for (CfgNode *Node : Func->getNodes()) {
2035     Context.init(Node);
2036     // One legalizer per basic block, otherwise we would share the Temporary
2037     // Base Register between basic blocks.
2038     PostLoweringLegalizer Legalizer(this);
2039     while (!Context.atEnd()) {
2040       PostIncrLoweringContext PostIncrement(Context);
2041       Inst *CurInstr = iteratorToInst(Context.getCur());
2042 
2043       // Check if the previous TempBaseReg is clobbered, and reset if needed.
2044       Legalizer.resetTempBaseIfClobberedBy(CurInstr);
2045 
2046       if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
2047         Legalizer.legalizeMov(MovInstr);
2048       } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
2049         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2050                 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
2051           Sandboxer(this).ldr(CurInstr->getDest(), LegalMem,
2052                               LdrInstr->getPredicate());
2053           CurInstr->setDeleted();
2054         }
2055       } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
2056         constexpr bool DisallowOffsetsBecauseLdrex = false;
2057         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2058                 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
2059                 DisallowOffsetsBecauseLdrex)) {
2060           Sandboxer(this).ldrex(CurInstr->getDest(), LegalMem,
2061                                 LdrexInstr->getPredicate());
2062           CurInstr->setDeleted();
2063         }
2064       } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
2065         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2066                 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
2067           Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)),
2068                               LegalMem, StrInstr->getPredicate());
2069           CurInstr->setDeleted();
2070         }
2071       } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
2072         constexpr bool DisallowOffsetsBecauseStrex = false;
2073         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2074                 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
2075                 DisallowOffsetsBecauseStrex)) {
2076           Sandboxer(this).strex(CurInstr->getDest(),
2077                                 llvm::cast<Variable>(CurInstr->getSrc(0)),
2078                                 LegalMem, StrexInstr->getPredicate());
2079           CurInstr->setDeleted();
2080         }
2081       }
2082 
2083       // Sanity-check: the Legalizer will either have no Temp, or it will be
2084       // bound to IP.
2085       Legalizer.assertNoTempOrAssignedToIP();
2086     }
2087   }
2088 }
2089 
loOperand(Operand * Operand)2090 Operand *TargetARM32::loOperand(Operand *Operand) {
2091   assert(Operand->getType() == IceType_i64);
2092   if (Operand->getType() != IceType_i64)
2093     return Operand;
2094   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2095     return Var64On32->getLo();
2096   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
2097     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2098   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2099     // Conservatively disallow memory operands with side-effects (pre/post
2100     // increment) in case of duplication.
2101     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2102            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2103     if (Mem->isRegReg()) {
2104       Variable *IndexR = legalizeToReg(Mem->getIndex());
2105       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
2106                                      Mem->getShiftOp(), Mem->getShiftAmt(),
2107                                      Mem->getAddrMode());
2108     } else {
2109       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
2110                                      Mem->getOffset(), Mem->getAddrMode());
2111     }
2112   }
2113   llvm::report_fatal_error("Unsupported operand type");
2114   return nullptr;
2115 }
2116 
hiOperand(Operand * Operand)2117 Operand *TargetARM32::hiOperand(Operand *Operand) {
2118   assert(Operand->getType() == IceType_i64);
2119   if (Operand->getType() != IceType_i64)
2120     return Operand;
2121   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2122     return Var64On32->getHi();
2123   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2124     return Ctx->getConstantInt32(
2125         static_cast<uint32_t>(Const->getValue() >> 32));
2126   }
2127   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2128     // Conservatively disallow memory operands with side-effects in case of
2129     // duplication.
2130     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2131            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2132     const Type SplitType = IceType_i32;
2133     if (Mem->isRegReg()) {
2134       // We have to make a temp variable T, and add 4 to either Base or Index.
2135       // The Index may be shifted, so adding 4 can mean something else. Thus,
2136       // prefer T := Base + 4, and use T as the new Base.
2137       Variable *Base = Mem->getBase();
2138       Constant *Four = Ctx->getConstantInt32(4);
2139       Variable *NewBase = Func->makeVariable(Base->getType());
2140       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2141                                              Base, Four));
2142       Variable *BaseR = legalizeToReg(NewBase);
2143       Variable *IndexR = legalizeToReg(Mem->getIndex());
2144       return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
2145                                      Mem->getShiftOp(), Mem->getShiftAmt(),
2146                                      Mem->getAddrMode());
2147     } else {
2148       Variable *Base = Mem->getBase();
2149       ConstantInteger32 *Offset = Mem->getOffset();
2150       assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2151       int32_t NextOffsetVal = Offset->getValue() + 4;
2152       constexpr bool ZeroExt = false;
2153       if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
2154         // We have to make a temp variable and add 4 to either Base or Offset.
2155         // If we add 4 to Offset, this will convert a non-RegReg addressing
2156         // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2157         // RegReg addressing modes, prefer adding to base and replacing
2158         // instead. Thus we leave the old offset alone.
2159         Constant *_4 = Ctx->getConstantInt32(4);
2160         Variable *NewBase = Func->makeVariable(Base->getType());
2161         lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
2162                                                NewBase, Base, _4));
2163         Base = NewBase;
2164       } else {
2165         Offset =
2166             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2167       }
2168       Variable *BaseR = legalizeToReg(Base);
2169       return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
2170                                      Mem->getAddrMode());
2171     }
2172   }
2173   llvm::report_fatal_error("Unsupported operand type");
2174   return nullptr;
2175 }
2176 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2177 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
2178                                            RegSetMask Exclude) const {
2179   SmallBitVector Registers(RegARM32::Reg_NUM);
2180 
2181   for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
2182     const auto &Entry = RegARM32::RegTable[i];
2183     if (Entry.Scratch && (Include & RegSet_CallerSave))
2184       Registers[i] = true;
2185     if (Entry.Preserved && (Include & RegSet_CalleeSave))
2186       Registers[i] = true;
2187     if (Entry.StackPtr && (Include & RegSet_StackPointer))
2188       Registers[i] = true;
2189     if (Entry.FramePtr && (Include & RegSet_FramePointer))
2190       Registers[i] = true;
2191     if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2192       Registers[i] = false;
2193     if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2194       Registers[i] = false;
2195     if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2196       Registers[i] = false;
2197     if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2198       Registers[i] = false;
2199   }
2200 
2201   return Registers;
2202 }
2203 
lowerAlloca(const InstAlloca * Instr)2204 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2205   // Conservatively require the stack to be aligned. Some stack adjustment
2206   // operations implemented below assume that the stack is aligned before the
2207   // alloca. All the alloca code ensures that the stack alignment is preserved
2208   // after the alloca. The stack alignment restriction can be relaxed in some
2209   // cases.
2210   NeedsStackAlignment = true;
2211 
2212   // For default align=0, set it to the real value 1, to avoid any
2213   // bit-manipulation problems below.
2214   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2215 
2216   // LLVM enforces power of 2 alignment.
2217   assert(llvm::isPowerOf2_32(AlignmentParam));
2218   assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2219 
2220   const uint32_t Alignment =
2221       std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2222   const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2223   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2224   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2225   const bool UseFramePointer =
2226       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2227 
2228   if (UseFramePointer)
2229     setHasFramePointer();
2230 
2231   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2232   if (OverAligned) {
2233     Sandboxer(this).align_sp(Alignment);
2234   }
2235 
2236   Variable *Dest = Instr->getDest();
2237   Operand *TotalSize = Instr->getSizeInBytes();
2238 
2239   if (const auto *ConstantTotalSize =
2240           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2241     const uint32_t Value =
2242         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2243     // Constant size alloca.
2244     if (!UseFramePointer) {
2245       // If we don't need a Frame Pointer, this alloca has a known offset to the
2246       // stack pointer. We don't need adjust the stack pointer, nor assign any
2247       // value to Dest, as Dest is rematerializable.
2248       assert(Dest->isRematerializable());
2249       FixedAllocaSizeBytes += Value;
2250       Context.insert<InstFakeDef>(Dest);
2251       return;
2252     }
2253 
2254     // If a frame pointer is required, then we need to store the alloca'd result
2255     // in Dest.
2256     Operand *SubAmountRF =
2257         legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2258     Sandboxer(this).sub_sp(SubAmountRF);
2259   } else {
2260     // Non-constant sizes need to be adjusted to the next highest multiple of
2261     // the required alignment at runtime.
2262     TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2263     Variable *T = makeReg(IceType_i32);
2264     _mov(T, TotalSize);
2265     Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2266     _add(T, T, AddAmount);
2267     alignRegisterPow2(T, Alignment);
2268     Sandboxer(this).sub_sp(T);
2269   }
2270 
2271   // Adds back a few bytes to SP to account for the out args area.
2272   Variable *T = SP;
2273   if (MaxOutArgsSizeBytes != 0) {
2274     T = makeReg(getPointerType());
2275     Operand *OutArgsSizeRF = legalize(
2276         Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2277     _add(T, SP, OutArgsSizeRF);
2278   }
2279 
2280   _mov(Dest, T);
2281 }
2282 
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2283 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2284   if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2285     return;
2286   Variable *SrcLoReg = legalizeToReg(SrcLo);
2287   switch (Ty) {
2288   default:
2289     llvm_unreachable(
2290         ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2291   case IceType_i8:
2292   case IceType_i16: {
2293     Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2294     Variable *T = makeReg(IceType_i32);
2295     _lsls(T, SrcLoReg, ShAmtImm);
2296     Context.insert<InstFakeUse>(T);
2297   } break;
2298   case IceType_i32: {
2299     _tst(SrcLoReg, SrcLoReg);
2300     break;
2301   }
2302   case IceType_i64: {
2303     Variable *T = makeReg(IceType_i32);
2304     _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2305     // T isn't going to be used, but we need the side-effect of setting flags
2306     // from this operation.
2307     Context.insert<InstFakeUse>(T);
2308   }
2309   }
2310   auto *Label = InstARM32Label::create(Func, this);
2311   _br(Label, CondARM32::NE);
2312   _trap();
2313   Context.insert(Label);
2314 }
2315 
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2316 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2317                                Operand *Src1, ExtInstr ExtFunc,
2318                                DivInstr DivFunc, bool IsRemainder) {
2319   div0Check(Dest->getType(), Src1, nullptr);
2320   Variable *Src1R = legalizeToReg(Src1);
2321   Variable *T0R = Src0R;
2322   Variable *T1R = Src1R;
2323   if (Dest->getType() != IceType_i32) {
2324     T0R = makeReg(IceType_i32);
2325     (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2326     T1R = makeReg(IceType_i32);
2327     (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2328   }
2329   if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2330     (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2331     if (IsRemainder) {
2332       Variable *T2 = makeReg(IceType_i32);
2333       _mls(T2, T, T1R, T0R);
2334       T = T2;
2335     }
2336     _mov(Dest, T);
2337   } else {
2338     llvm::report_fatal_error("div should have already been turned into a call");
2339   }
2340 }
2341 
2342 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2343 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2344   Variable *Dest = Instr->getDest();
2345   assert(Dest->getType() == IceType_i1);
2346 
2347   // So folding didn't work for Instr. Not a problem: We just need to
2348   // materialize the Sources, and perform the operation. We create regular
2349   // Variables (and not infinite-weight ones) because this call might recurse a
2350   // lot, and we might end up with tons of infinite weight temporaries.
2351   assert(Instr->getSrcSize() == 2);
2352   Variable *Src0 = Func->makeVariable(IceType_i1);
2353   SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2354 
2355   Operand *Src1 = Instr->getSrc(1);
2356   SafeBoolChain Src1Safe = SBC_Yes;
2357 
2358   if (!llvm::isa<Constant>(Src1)) {
2359     Variable *Src1V = Func->makeVariable(IceType_i1);
2360     Src1Safe = lowerInt1(Src1V, Src1);
2361     Src1 = Src1V;
2362   }
2363 
2364   Variable *T = makeReg(IceType_i1);
2365   Src0 = legalizeToReg(Src0);
2366   Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2367   switch (Instr->getOp()) {
2368   default:
2369     // If this Unreachable is ever executed, add the offending operation to
2370     // the list of valid consumers.
2371     llvm::report_fatal_error("Unhandled i1 Op");
2372   case InstArithmetic::And:
2373     _and(T, Src0, Src1RF);
2374     break;
2375   case InstArithmetic::Or:
2376     _orr(T, Src0, Src1RF);
2377     break;
2378   case InstArithmetic::Xor:
2379     _eor(T, Src0, Src1RF);
2380     break;
2381   }
2382   _mov(Dest, T);
2383   return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2384 }
2385 
2386 namespace {
2387 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2388 // It holds the two sources operands, and maintains some state as to whether one
2389 // of them is a constant. If one of the operands is a constant, then it will be
2390 // be stored as the operation's second source, with a bit indicating whether the
2391 // operands were swapped.
2392 //
2393 // The class is split into a base class with operand type-independent methods,
2394 // and a derived, templated class, for each type of operand we want to fold
2395 // constants for:
2396 //
2397 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2398 //                     --> NumericOperands<ConstantDouble>
2399 //                     --> NumericOperands<ConstantInt32>
2400 //
2401 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2402 // inverted/negated immediates.
2403 class NumericOperandsBase {
2404   NumericOperandsBase() = delete;
2405   NumericOperandsBase(const NumericOperandsBase &) = delete;
2406   NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2407 
2408 public:
NumericOperandsBase(Operand * S0,Operand * S1)2409   NumericOperandsBase(Operand *S0, Operand *S1)
2410       : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2411         Swapped(Src0 == S1 && S0 != S1) {
2412     assert(Src0 != nullptr);
2413     assert(Src1 != nullptr);
2414     assert(Src0 != Src1 || S0 == S1);
2415   }
2416 
hasConstOperand() const2417   bool hasConstOperand() const {
2418     return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2419   }
2420 
swappedOperands() const2421   bool swappedOperands() const { return Swapped; }
2422 
src0R(TargetARM32 * Target) const2423   Variable *src0R(TargetARM32 *Target) const {
2424     return legalizeToReg(Target, Src0);
2425   }
2426 
unswappedSrc0R(TargetARM32 * Target) const2427   Variable *unswappedSrc0R(TargetARM32 *Target) const {
2428     return legalizeToReg(Target, Swapped ? Src1 : Src0);
2429   }
2430 
src1RF(TargetARM32 * Target) const2431   Operand *src1RF(TargetARM32 *Target) const {
2432     return legalizeToRegOrFlex(Target, Src1);
2433   }
2434 
unswappedSrc1R(TargetARM32 * Target) const2435   Variable *unswappedSrc1R(TargetARM32 *Target) const {
2436     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2437   }
2438 
src1() const2439   Operand *src1() const { return Src1; }
2440 
2441 protected:
2442   Operand *const Src0;
2443   Operand *const Src1;
2444   const bool Swapped;
2445 
legalizeToReg(TargetARM32 * Target,Operand * Src)2446   static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2447     return Target->legalizeToReg(Src);
2448   }
2449 
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2450   static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2451     return Target->legalize(Src,
2452                             TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2453   }
2454 
2455 private:
NonConstOperand(Operand * S0,Operand * S1)2456   static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2457     if (!llvm::isa<Constant>(S0))
2458       return S0;
2459     if (!llvm::isa<Constant>(S1))
2460       return S1;
2461     if (llvm::isa<ConstantRelocatable>(S1) &&
2462         !llvm::isa<ConstantRelocatable>(S0))
2463       return S1;
2464     return S0;
2465   }
2466 
ConstOperand(Operand * S0,Operand * S1)2467   static Operand *ConstOperand(Operand *S0, Operand *S1) {
2468     if (!llvm::isa<Constant>(S0))
2469       return S1;
2470     if (!llvm::isa<Constant>(S1))
2471       return S0;
2472     if (llvm::isa<ConstantRelocatable>(S1) &&
2473         !llvm::isa<ConstantRelocatable>(S0))
2474       return S0;
2475     return S1;
2476   }
2477 };
2478 
2479 template <typename C> class NumericOperands : public NumericOperandsBase {
2480   NumericOperands() = delete;
2481   NumericOperands(const NumericOperands &) = delete;
2482   NumericOperands &operator=(const NumericOperands &) = delete;
2483 
2484 public:
NumericOperands(Operand * S0,Operand * S1)2485   NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2486     assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2487   }
2488 
getConstantValue() const2489   typename C::PrimType getConstantValue() const {
2490     return llvm::cast<C>(Src1)->getValue();
2491   }
2492 };
2493 
2494 using FloatOperands = NumericOperands<ConstantFloat>;
2495 using DoubleOperands = NumericOperands<ConstantDouble>;
2496 
2497 class Int32Operands : public NumericOperands<ConstantInteger32> {
2498   Int32Operands() = delete;
2499   Int32Operands(const Int32Operands &) = delete;
2500   Int32Operands &operator=(const Int32Operands &) = delete;
2501 
2502 public:
Int32Operands(Operand * S0,Operand * S1)2503   Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2504 
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2505   Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2506     if (!swappedOperands() && hasConstOperand()) {
2507       return Target->shAmtImm(getConstantValue() & 0x1F);
2508     }
2509     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2510   }
2511 
isSrc1ImmediateZero() const2512   bool isSrc1ImmediateZero() const {
2513     if (!swappedOperands() && hasConstOperand()) {
2514       return getConstantValue() == 0;
2515     }
2516     return false;
2517   }
2518 
immediateIsFlexEncodable() const2519   bool immediateIsFlexEncodable() const {
2520     uint32_t Rotate, Imm8;
2521     return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2522   }
2523 
negatedImmediateIsFlexEncodable() const2524   bool negatedImmediateIsFlexEncodable() const {
2525     uint32_t Rotate, Imm8;
2526     return OperandARM32FlexImm::canHoldImm(
2527         -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2528   }
2529 
negatedSrc1F(TargetARM32 * Target) const2530   Operand *negatedSrc1F(TargetARM32 *Target) const {
2531     return legalizeToRegOrFlex(Target,
2532                                Target->getCtx()->getConstantInt32(
2533                                    -static_cast<int32_t>(getConstantValue())));
2534   }
2535 
invertedImmediateIsFlexEncodable() const2536   bool invertedImmediateIsFlexEncodable() const {
2537     uint32_t Rotate, Imm8;
2538     return OperandARM32FlexImm::canHoldImm(
2539         ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2540   }
2541 
invertedSrc1F(TargetARM32 * Target) const2542   Operand *invertedSrc1F(TargetARM32 *Target) const {
2543     return legalizeToRegOrFlex(Target,
2544                                Target->getCtx()->getConstantInt32(
2545                                    ~static_cast<uint32_t>(getConstantValue())));
2546   }
2547 };
2548 } // end of anonymous namespace
2549 
preambleDivRem(const InstCall * Instr)2550 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2551   Operand *Src1 = Instr->getArg(1);
2552 
2553   switch (Src1->getType()) {
2554   default:
2555     llvm::report_fatal_error("Invalid type for idiv.");
2556   case IceType_i64: {
2557     if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2558       if (C->getValue() == 0) {
2559         _trap();
2560         return;
2561       }
2562     }
2563     div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2564     return;
2565   }
2566   case IceType_i32: {
2567     // Src0 and Src1 have already been appropriately extended to an i32, so we
2568     // don't check for i8 and i16.
2569     if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2570       if (C->getValue() == 0) {
2571         _trap();
2572         return;
2573       }
2574     }
2575     div0Check(IceType_i32, Src1, nullptr);
2576     return;
2577   }
2578   }
2579 }
2580 
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2581 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2582                                        Variable *Dest, Operand *Src0,
2583                                        Operand *Src1) {
2584   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2585   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2586   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2587   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2588 
2589   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2590   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2591   Variable *T_Lo = makeReg(DestLo->getType());
2592   Variable *T_Hi = makeReg(DestHi->getType());
2593 
2594   switch (Op) {
2595   case InstArithmetic::_num:
2596     llvm::report_fatal_error("Unknown arithmetic operator");
2597     return;
2598   case InstArithmetic::Add: {
2599     Variable *Src0LoR = SrcsLo.src0R(this);
2600     Operand *Src1LoRF = SrcsLo.src1RF(this);
2601     Variable *Src0HiR = SrcsHi.src0R(this);
2602     Operand *Src1HiRF = SrcsHi.src1RF(this);
2603     _adds(T_Lo, Src0LoR, Src1LoRF);
2604     _mov(DestLo, T_Lo);
2605     _adc(T_Hi, Src0HiR, Src1HiRF);
2606     _mov(DestHi, T_Hi);
2607     return;
2608   }
2609   case InstArithmetic::And: {
2610     Variable *Src0LoR = SrcsLo.src0R(this);
2611     Operand *Src1LoRF = SrcsLo.src1RF(this);
2612     Variable *Src0HiR = SrcsHi.src0R(this);
2613     Operand *Src1HiRF = SrcsHi.src1RF(this);
2614     _and(T_Lo, Src0LoR, Src1LoRF);
2615     _mov(DestLo, T_Lo);
2616     _and(T_Hi, Src0HiR, Src1HiRF);
2617     _mov(DestHi, T_Hi);
2618     return;
2619   }
2620   case InstArithmetic::Or: {
2621     Variable *Src0LoR = SrcsLo.src0R(this);
2622     Operand *Src1LoRF = SrcsLo.src1RF(this);
2623     Variable *Src0HiR = SrcsHi.src0R(this);
2624     Operand *Src1HiRF = SrcsHi.src1RF(this);
2625     _orr(T_Lo, Src0LoR, Src1LoRF);
2626     _mov(DestLo, T_Lo);
2627     _orr(T_Hi, Src0HiR, Src1HiRF);
2628     _mov(DestHi, T_Hi);
2629     return;
2630   }
2631   case InstArithmetic::Xor: {
2632     Variable *Src0LoR = SrcsLo.src0R(this);
2633     Operand *Src1LoRF = SrcsLo.src1RF(this);
2634     Variable *Src0HiR = SrcsHi.src0R(this);
2635     Operand *Src1HiRF = SrcsHi.src1RF(this);
2636     _eor(T_Lo, Src0LoR, Src1LoRF);
2637     _mov(DestLo, T_Lo);
2638     _eor(T_Hi, Src0HiR, Src1HiRF);
2639     _mov(DestHi, T_Hi);
2640     return;
2641   }
2642   case InstArithmetic::Sub: {
2643     Variable *Src0LoR = SrcsLo.src0R(this);
2644     Operand *Src1LoRF = SrcsLo.src1RF(this);
2645     Variable *Src0HiR = SrcsHi.src0R(this);
2646     Operand *Src1HiRF = SrcsHi.src1RF(this);
2647     if (SrcsLo.swappedOperands()) {
2648       _rsbs(T_Lo, Src0LoR, Src1LoRF);
2649       _mov(DestLo, T_Lo);
2650       _rsc(T_Hi, Src0HiR, Src1HiRF);
2651       _mov(DestHi, T_Hi);
2652     } else {
2653       _subs(T_Lo, Src0LoR, Src1LoRF);
2654       _mov(DestLo, T_Lo);
2655       _sbc(T_Hi, Src0HiR, Src1HiRF);
2656       _mov(DestHi, T_Hi);
2657     }
2658     return;
2659   }
2660   case InstArithmetic::Mul: {
2661     // GCC 4.8 does:
2662     // a=b*c ==>
2663     //   t_acc =(mul) (b.lo * c.hi)
2664     //   t_acc =(mla) (c.lo * b.hi) + t_acc
2665     //   t.hi,t.lo =(umull) b.lo * c.lo
2666     //   t.hi += t_acc
2667     //   a.lo = t.lo
2668     //   a.hi = t.hi
2669     //
2670     // LLVM does:
2671     //   t.hi,t.lo =(umull) b.lo * c.lo
2672     //   t.hi =(mla) (b.lo * c.hi) + t.hi
2673     //   t.hi =(mla) (b.hi * c.lo) + t.hi
2674     //   a.lo = t.lo
2675     //   a.hi = t.hi
2676     //
2677     // LLVM's lowering has fewer instructions, but more register pressure:
2678     // t.lo is live from beginning to end, while GCC delays the two-dest
2679     // instruction till the end, and kills c.hi immediately.
2680     Variable *T_Acc = makeReg(IceType_i32);
2681     Variable *T_Acc1 = makeReg(IceType_i32);
2682     Variable *T_Hi1 = makeReg(IceType_i32);
2683     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2684     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2685     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2686     Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2687     _mul(T_Acc, Src0RLo, Src1RHi);
2688     _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2689     _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2690     _add(T_Hi, T_Hi1, T_Acc1);
2691     _mov(DestLo, T_Lo);
2692     _mov(DestHi, T_Hi);
2693     return;
2694   }
2695   case InstArithmetic::Shl: {
2696     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2697       Variable *Src0RLo = SrcsLo.src0R(this);
2698       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2699       const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2700       if (ShAmtImm == 0) {
2701         _mov(DestLo, Src0RLo);
2702         _mov(DestHi, SrcsHi.src0R(this));
2703         return;
2704       }
2705 
2706       if (ShAmtImm >= 32) {
2707         if (ShAmtImm == 32) {
2708           _mov(DestHi, Src0RLo);
2709         } else {
2710           Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2711           _lsl(T_Hi, Src0RLo, ShAmtOp);
2712           _mov(DestHi, T_Hi);
2713         }
2714 
2715         Operand *_0 =
2716             legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2717         _mov(T_Lo, _0);
2718         _mov(DestLo, T_Lo);
2719         return;
2720       }
2721 
2722       Variable *Src0RHi = SrcsHi.src0R(this);
2723       Operand *ShAmtOp = shAmtImm(ShAmtImm);
2724       Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2725       _lsl(T_Hi, Src0RHi, ShAmtOp);
2726       _orr(T_Hi, T_Hi,
2727            OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2728                                        OperandARM32::LSR, ComplShAmtOp));
2729       _mov(DestHi, T_Hi);
2730 
2731       _lsl(T_Lo, Src0RLo, ShAmtOp);
2732       _mov(DestLo, T_Lo);
2733       return;
2734     }
2735 
2736     // a=b<<c ==>
2737     // pnacl-llc does:
2738     // mov     t_b.lo, b.lo
2739     // mov     t_b.hi, b.hi
2740     // mov     t_c.lo, c.lo
2741     // rsb     T0, t_c.lo, #32
2742     // lsr     T1, t_b.lo, T0
2743     // orr     t_a.hi, T1, t_b.hi, lsl t_c.lo
2744     // sub     T2, t_c.lo, #32
2745     // cmp     T2, #0
2746     // lslge   t_a.hi, t_b.lo, T2
2747     // lsl     t_a.lo, t_b.lo, t_c.lo
2748     // mov     a.lo, t_a.lo
2749     // mov     a.hi, t_a.hi
2750     //
2751     // GCC 4.8 does:
2752     // sub t_c1, c.lo, #32
2753     // lsl t_hi, b.hi, c.lo
2754     // orr t_hi, t_hi, b.lo, lsl t_c1
2755     // rsb t_c2, c.lo, #32
2756     // orr t_hi, t_hi, b.lo, lsr t_c2
2757     // lsl t_lo, b.lo, c.lo
2758     // a.lo = t_lo
2759     // a.hi = t_hi
2760     //
2761     // These are incompatible, therefore we mimic pnacl-llc.
2762     // Can be strength-reduced for constant-shifts, but we don't do that for
2763     // now.
2764     // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2765     // ARM, shifts only take the lower 8 bits of the shift register, and
2766     // saturate to the range 0-32, so the negative value will saturate to 32.
2767     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2768     Operand *_0 =
2769         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2770     Variable *T0 = makeReg(IceType_i32);
2771     Variable *T1 = makeReg(IceType_i32);
2772     Variable *T2 = makeReg(IceType_i32);
2773     Variable *TA_Hi = makeReg(IceType_i32);
2774     Variable *TA_Lo = makeReg(IceType_i32);
2775     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2776     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2777     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2778     _rsb(T0, Src1RLo, _32);
2779     _lsr(T1, Src0RLo, T0);
2780     _orr(TA_Hi, T1,
2781          OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2782                                      OperandARM32::LSL, Src1RLo));
2783     _sub(T2, Src1RLo, _32);
2784     _cmp(T2, _0);
2785     _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2786     _set_dest_redefined();
2787     _lsl(TA_Lo, Src0RLo, Src1RLo);
2788     _mov(DestLo, TA_Lo);
2789     _mov(DestHi, TA_Hi);
2790     return;
2791   }
2792   case InstArithmetic::Lshr:
2793   case InstArithmetic::Ashr: {
2794     const bool ASR = Op == InstArithmetic::Ashr;
2795     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2796       Variable *Src0RHi = SrcsHi.src0R(this);
2797       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2798       const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2799       if (ShAmt == 0) {
2800         _mov(DestHi, Src0RHi);
2801         _mov(DestLo, SrcsLo.src0R(this));
2802         return;
2803       }
2804 
2805       if (ShAmt >= 32) {
2806         if (ShAmt == 32) {
2807           _mov(DestLo, Src0RHi);
2808         } else {
2809           Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2810           if (ASR) {
2811             _asr(T_Lo, Src0RHi, ShAmtImm);
2812           } else {
2813             _lsr(T_Lo, Src0RHi, ShAmtImm);
2814           }
2815           _mov(DestLo, T_Lo);
2816         }
2817 
2818         if (ASR) {
2819           Operand *_31 = shAmtImm(31);
2820           _asr(T_Hi, Src0RHi, _31);
2821         } else {
2822           Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2823                                  Legal_Reg | Legal_Flex);
2824           _mov(T_Hi, _0);
2825         }
2826         _mov(DestHi, T_Hi);
2827         return;
2828       }
2829 
2830       Variable *Src0RLo = SrcsLo.src0R(this);
2831       Operand *ShAmtImm = shAmtImm(ShAmt);
2832       Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2833       _lsr(T_Lo, Src0RLo, ShAmtImm);
2834       _orr(T_Lo, T_Lo,
2835            OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2836                                        OperandARM32::LSL, ComplShAmtImm));
2837       _mov(DestLo, T_Lo);
2838 
2839       if (ASR) {
2840         _asr(T_Hi, Src0RHi, ShAmtImm);
2841       } else {
2842         _lsr(T_Hi, Src0RHi, ShAmtImm);
2843       }
2844       _mov(DestHi, T_Hi);
2845       return;
2846     }
2847 
2848     // a=b>>c
2849     // pnacl-llc does:
2850     // mov        t_b.lo, b.lo
2851     // mov        t_b.hi, b.hi
2852     // mov        t_c.lo, c.lo
2853     // lsr        T0, t_b.lo, t_c.lo
2854     // rsb        T1, t_c.lo, #32
2855     // orr        t_a.lo, T0, t_b.hi, lsl T1
2856     // sub        T2, t_c.lo, #32
2857     // cmp        T2, #0
2858     // [al]srge   t_a.lo, t_b.hi, T2
2859     // [al]sr     t_a.hi, t_b.hi, t_c.lo
2860     // mov        a.lo, t_a.lo
2861     // mov        a.hi, t_a.hi
2862     //
2863     // GCC 4.8 does (lsr):
2864     // rsb        t_c1, c.lo, #32
2865     // lsr        t_lo, b.lo, c.lo
2866     // orr        t_lo, t_lo, b.hi, lsl t_c1
2867     // sub        t_c2, c.lo, #32
2868     // orr        t_lo, t_lo, b.hi, lsr t_c2
2869     // lsr        t_hi, b.hi, c.lo
2870     // mov        a.lo, t_lo
2871     // mov        a.hi, t_hi
2872     //
2873     // These are incompatible, therefore we mimic pnacl-llc.
2874     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2875     Operand *_0 =
2876         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2877     Variable *T0 = makeReg(IceType_i32);
2878     Variable *T1 = makeReg(IceType_i32);
2879     Variable *T2 = makeReg(IceType_i32);
2880     Variable *TA_Lo = makeReg(IceType_i32);
2881     Variable *TA_Hi = makeReg(IceType_i32);
2882     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2883     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2884     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2885     _lsr(T0, Src0RLo, Src1RLo);
2886     _rsb(T1, Src1RLo, _32);
2887     _orr(TA_Lo, T0,
2888          OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2889                                      OperandARM32::LSL, T1));
2890     _sub(T2, Src1RLo, _32);
2891     _cmp(T2, _0);
2892     if (ASR) {
2893       _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2894       _set_dest_redefined();
2895       _asr(TA_Hi, Src0RHi, Src1RLo);
2896     } else {
2897       _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2898       _set_dest_redefined();
2899       _lsr(TA_Hi, Src0RHi, Src1RLo);
2900     }
2901     _mov(DestLo, TA_Lo);
2902     _mov(DestHi, TA_Hi);
2903     return;
2904   }
2905   case InstArithmetic::Fadd:
2906   case InstArithmetic::Fsub:
2907   case InstArithmetic::Fmul:
2908   case InstArithmetic::Fdiv:
2909   case InstArithmetic::Frem:
2910     llvm::report_fatal_error("FP instruction with i64 type");
2911     return;
2912   case InstArithmetic::Udiv:
2913   case InstArithmetic::Sdiv:
2914   case InstArithmetic::Urem:
2915   case InstArithmetic::Srem:
2916     llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2917                              "should have already been handled before");
2918     return;
2919   }
2920 }
2921 
2922 namespace {
2923 // StrengthReduction is a namespace with the strength reduction machinery. The
2924 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2925 // if the optimization can be performed, and false otherwise.
2926 //
2927 // If the optimization can be performed, tryToOptimize sets its NumOperations
2928 // parameter to the number of shifts that are needed to perform the
2929 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2930 // tuples that describe how to materialize the multiplication.
2931 //
2932 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2933 // or two shifts to materialize it. A sequence of 1s, e.g.,
2934 //
2935 //                  M           N
2936 //   ...00000000000011111...111110000000...
2937 //
2938 // is materializable with (1 << (M + 1)) - (1 << N):
2939 //
2940 //   ...00000000000100000...000000000000...      [1 << (M + 1)]
2941 //   ...00000000000000000...000010000000... (-)  [1 << N]
2942 //   --------------------------------------
2943 //   ...00000000000011111...111110000000...
2944 //
2945 // And a single bit set, which is just a left shift.
2946 namespace StrengthReduction {
2947 enum AggregationOperation {
2948   AO_Invalid,
2949   AO_Add,
2950   AO_Sub,
2951 };
2952 
2953 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2954 class AggregationElement {
2955   AggregationElement(const AggregationElement &) = delete;
2956 
2957 public:
2958   AggregationElement() = default;
2959   AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2960   AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2961       : Op(Op), ShAmt(ShAmt) {}
2962 
createShiftedOperand(Cfg * Func,Variable * OpR) const2963   Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2964     assert(OpR->mustHaveReg());
2965     if (ShAmt == 0) {
2966       return OpR;
2967     }
2968     return OperandARM32FlexReg::create(
2969         Func, IceType_i32, OpR, OperandARM32::LSL,
2970         OperandARM32ShAmtImm::create(
2971             Func, llvm::cast<ConstantInteger32>(
2972                       Func->getContext()->getConstantInt32(ShAmt))));
2973   }
2974 
aggregateWithAdd() const2975   bool aggregateWithAdd() const {
2976     switch (Op) {
2977     case AO_Invalid:
2978       llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2979     case AO_Add:
2980       return true;
2981     case AO_Sub:
2982       return false;
2983     }
2984     llvm_unreachable("(silence g++ warning)");
2985   }
2986 
shAmt() const2987   uint32_t shAmt() const { return ShAmt; }
2988 
2989 private:
2990   AggregationOperation Op = AO_Invalid;
2991   uint32_t ShAmt;
2992 };
2993 
2994 // [RangeStart, RangeEnd] is a range of 1s in Src.
2995 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)2996 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
2997                    std::array<AggregationElement, N> *Operations) {
2998   assert(*NumOperations < N);
2999   if (RangeStart == RangeEnd) {
3000     // Single bit set:
3001     // Src           : 0...00010...
3002     // RangeStart    :        ^
3003     // RangeEnd      :        ^
3004     // NegSrc        : 0...00001...
3005     (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
3006     ++(*NumOperations);
3007     return true;
3008   }
3009 
3010   // Sequence of 1s: (two operations required.)
3011   // Src           : 0...00011...110...
3012   // RangeStart    :        ^
3013   // RangeEnd      :              ^
3014   // NegSrc        : 0...00000...001...
3015   if (*NumOperations + 1 >= N) {
3016     return false;
3017   }
3018   (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
3019   ++(*NumOperations);
3020   (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
3021   ++(*NumOperations);
3022   return true;
3023 }
3024 
3025 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
3026 // 1 surrounded by zeroes.
3027 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3028 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
3029                    std::array<AggregationElement, N> *Operations) {
3030   constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
3031   uint32_t NegSrc = ~Src;
3032 
3033   *NumOperations = 0;
3034   while (Src != 0 && *NumOperations < N) {
3035     // Each step of the algorithm:
3036     //   * finds L, the last bit set in Src;
3037     //   * clears all the upper bits in NegSrc up to bit L;
3038     //   * finds nL, the last bit set in NegSrc;
3039     //   * clears all the upper bits in Src up to bit nL;
3040     //
3041     // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
3042     // of 1s starting at L, and ending at nL + 1, was found.
3043     const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
3044     const uint32_t NegSrcClearMask =
3045         (SrcLastBitSet == 0) ? 0
3046                              : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
3047     NegSrc &= NegSrcClearMask;
3048     if (NegSrc == 0) {
3049       if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
3050         return true;
3051       }
3052       return false;
3053     }
3054     const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
3055     assert(NegSrcLastBitSet < SrcLastBitSet);
3056     const uint32_t SrcClearMask =
3057         (NegSrcLastBitSet == 0)
3058             ? 0
3059             : (0xFFFFFFFFu) >> (SrcSizeBits - NegSrcLastBitSet);
3060     Src &= SrcClearMask;
3061     if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
3062                        Operations)) {
3063       return false;
3064     }
3065   }
3066 
3067   return Src == 0;
3068 }
3069 } // end of namespace StrengthReduction
3070 } // end of anonymous namespace
3071 
lowerArithmetic(const InstArithmetic * Instr)3072 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
3073   Variable *Dest = Instr->getDest();
3074 
3075   if (Dest->isRematerializable()) {
3076     Context.insert<InstFakeDef>(Dest);
3077     return;
3078   }
3079 
3080   Type DestTy = Dest->getType();
3081   if (DestTy == IceType_i1) {
3082     lowerInt1Arithmetic(Instr);
3083     return;
3084   }
3085 
3086   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3087   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
3088   if (DestTy == IceType_i64) {
3089     lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
3090     return;
3091   }
3092 
3093   if (isVectorType(DestTy)) {
3094     switch (Instr->getOp()) {
3095     default:
3096       UnimplementedLoweringError(this, Instr);
3097       return;
3098     // Explicitly allow vector instructions we have implemented/enabled.
3099     case InstArithmetic::Add:
3100     case InstArithmetic::And:
3101     case InstArithmetic::Ashr:
3102     case InstArithmetic::Fadd:
3103     case InstArithmetic::Fmul:
3104     case InstArithmetic::Fsub:
3105     case InstArithmetic::Lshr:
3106     case InstArithmetic::Mul:
3107     case InstArithmetic::Or:
3108     case InstArithmetic::Shl:
3109     case InstArithmetic::Sub:
3110     case InstArithmetic::Xor:
3111       break;
3112     }
3113   }
3114 
3115   Variable *T = makeReg(DestTy);
3116 
3117   // * Handle div/rem separately. They require a non-legalized Src1 to inspect
3118   // whether or not Src1 is a non-zero constant. Once legalized it is more
3119   // difficult to determine (constant may be moved to a register).
3120   // * Handle floating point arithmetic separately: they require Src1 to be
3121   // legalized to a register.
3122   switch (Instr->getOp()) {
3123   default:
3124     break;
3125   case InstArithmetic::Udiv: {
3126     constexpr bool NotRemainder = false;
3127     Variable *Src0R = legalizeToReg(Src0);
3128     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3129                  NotRemainder);
3130     return;
3131   }
3132   case InstArithmetic::Sdiv: {
3133     constexpr bool NotRemainder = false;
3134     Variable *Src0R = legalizeToReg(Src0);
3135     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3136                  NotRemainder);
3137     return;
3138   }
3139   case InstArithmetic::Urem: {
3140     constexpr bool IsRemainder = true;
3141     Variable *Src0R = legalizeToReg(Src0);
3142     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3143                  IsRemainder);
3144     return;
3145   }
3146   case InstArithmetic::Srem: {
3147     constexpr bool IsRemainder = true;
3148     Variable *Src0R = legalizeToReg(Src0);
3149     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3150                  IsRemainder);
3151     return;
3152   }
3153   case InstArithmetic::Frem: {
3154     if (!isScalarFloatingType(DestTy)) {
3155       llvm::report_fatal_error("Unexpected type when lowering frem.");
3156     }
3157     llvm::report_fatal_error("Frem should have already been lowered.");
3158   }
3159   case InstArithmetic::Fadd: {
3160     Variable *Src0R = legalizeToReg(Src0);
3161     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3162       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3163       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3164       _vmla(Src0R, Src1R, Src2R);
3165       _mov(Dest, Src0R);
3166       return;
3167     }
3168 
3169     Variable *Src1R = legalizeToReg(Src1);
3170     _vadd(T, Src0R, Src1R);
3171     _mov(Dest, T);
3172     return;
3173   }
3174   case InstArithmetic::Fsub: {
3175     Variable *Src0R = legalizeToReg(Src0);
3176     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3177       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3178       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3179       _vmls(Src0R, Src1R, Src2R);
3180       _mov(Dest, Src0R);
3181       return;
3182     }
3183     Variable *Src1R = legalizeToReg(Src1);
3184     _vsub(T, Src0R, Src1R);
3185     _mov(Dest, T);
3186     return;
3187   }
3188   case InstArithmetic::Fmul: {
3189     Variable *Src0R = legalizeToReg(Src0);
3190     Variable *Src1R = legalizeToReg(Src1);
3191     _vmul(T, Src0R, Src1R);
3192     _mov(Dest, T);
3193     return;
3194   }
3195   case InstArithmetic::Fdiv: {
3196     Variable *Src0R = legalizeToReg(Src0);
3197     Variable *Src1R = legalizeToReg(Src1);
3198     _vdiv(T, Src0R, Src1R);
3199     _mov(Dest, T);
3200     return;
3201   }
3202   }
3203 
3204   // Handle everything else here.
3205   Int32Operands Srcs(Src0, Src1);
3206   switch (Instr->getOp()) {
3207   case InstArithmetic::_num:
3208     llvm::report_fatal_error("Unknown arithmetic operator");
3209     return;
3210   case InstArithmetic::Add: {
3211     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3212       assert(!isVectorType(DestTy));
3213       Variable *Src0R = legalizeToReg(Src0);
3214       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3215       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3216       _mla(T, Src1R, Src2R, Src0R);
3217       _mov(Dest, T);
3218       return;
3219     }
3220 
3221     if (Srcs.hasConstOperand()) {
3222       if (!Srcs.immediateIsFlexEncodable() &&
3223           Srcs.negatedImmediateIsFlexEncodable()) {
3224         assert(!isVectorType(DestTy));
3225         Variable *Src0R = Srcs.src0R(this);
3226         Operand *Src1F = Srcs.negatedSrc1F(this);
3227         if (!Srcs.swappedOperands()) {
3228           _sub(T, Src0R, Src1F);
3229         } else {
3230           _rsb(T, Src0R, Src1F);
3231         }
3232         _mov(Dest, T);
3233         return;
3234       }
3235     }
3236     Variable *Src0R = Srcs.src0R(this);
3237     if (isVectorType(DestTy)) {
3238       Variable *Src1R = legalizeToReg(Src1);
3239       _vadd(T, Src0R, Src1R);
3240     } else {
3241       Operand *Src1RF = Srcs.src1RF(this);
3242       _add(T, Src0R, Src1RF);
3243     }
3244     _mov(Dest, T);
3245     return;
3246   }
3247   case InstArithmetic::And: {
3248     if (Srcs.hasConstOperand()) {
3249       if (!Srcs.immediateIsFlexEncodable() &&
3250           Srcs.invertedImmediateIsFlexEncodable()) {
3251         Variable *Src0R = Srcs.src0R(this);
3252         Operand *Src1F = Srcs.invertedSrc1F(this);
3253         _bic(T, Src0R, Src1F);
3254         _mov(Dest, T);
3255         return;
3256       }
3257     }
3258     assert(isIntegerType(DestTy));
3259     Variable *Src0R = Srcs.src0R(this);
3260     if (isVectorType(DestTy)) {
3261       Variable *Src1R = legalizeToReg(Src1);
3262       _vand(T, Src0R, Src1R);
3263     } else {
3264       Operand *Src1RF = Srcs.src1RF(this);
3265       _and(T, Src0R, Src1RF);
3266     }
3267     _mov(Dest, T);
3268     return;
3269   }
3270   case InstArithmetic::Or: {
3271     Variable *Src0R = Srcs.src0R(this);
3272     assert(isIntegerType(DestTy));
3273     if (isVectorType(DestTy)) {
3274       Variable *Src1R = legalizeToReg(Src1);
3275       _vorr(T, Src0R, Src1R);
3276     } else {
3277       Operand *Src1RF = Srcs.src1RF(this);
3278       _orr(T, Src0R, Src1RF);
3279     }
3280     _mov(Dest, T);
3281     return;
3282   }
3283   case InstArithmetic::Xor: {
3284     Variable *Src0R = Srcs.src0R(this);
3285     assert(isIntegerType(DestTy));
3286     if (isVectorType(DestTy)) {
3287       Variable *Src1R = legalizeToReg(Src1);
3288       _veor(T, Src0R, Src1R);
3289     } else {
3290       Operand *Src1RF = Srcs.src1RF(this);
3291       _eor(T, Src0R, Src1RF);
3292     }
3293     _mov(Dest, T);
3294     return;
3295   }
3296   case InstArithmetic::Sub: {
3297     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3298       assert(!isVectorType(DestTy));
3299       Variable *Src0R = legalizeToReg(Src0);
3300       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3301       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3302       _mls(T, Src1R, Src2R, Src0R);
3303       _mov(Dest, T);
3304       return;
3305     }
3306 
3307     if (Srcs.hasConstOperand()) {
3308       assert(!isVectorType(DestTy));
3309       if (Srcs.immediateIsFlexEncodable()) {
3310         Variable *Src0R = Srcs.src0R(this);
3311         Operand *Src1RF = Srcs.src1RF(this);
3312         if (Srcs.swappedOperands()) {
3313           _rsb(T, Src0R, Src1RF);
3314         } else {
3315           _sub(T, Src0R, Src1RF);
3316         }
3317         _mov(Dest, T);
3318         return;
3319       }
3320       if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3321         Variable *Src0R = Srcs.src0R(this);
3322         Operand *Src1F = Srcs.negatedSrc1F(this);
3323         _add(T, Src0R, Src1F);
3324         _mov(Dest, T);
3325         return;
3326       }
3327     }
3328     Variable *Src0R = Srcs.unswappedSrc0R(this);
3329     Variable *Src1R = Srcs.unswappedSrc1R(this);
3330     if (isVectorType(DestTy)) {
3331       _vsub(T, Src0R, Src1R);
3332     } else {
3333       _sub(T, Src0R, Src1R);
3334     }
3335     _mov(Dest, T);
3336     return;
3337   }
3338   case InstArithmetic::Mul: {
3339     const bool OptM1 = Func->getOptLevel() == Opt_m1;
3340     if (!OptM1 && Srcs.hasConstOperand()) {
3341       constexpr std::size_t MaxShifts = 4;
3342       std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3343       SizeT NumOperations;
3344       int32_t Const = Srcs.getConstantValue();
3345       const bool Invert = Const < 0;
3346       const bool MultiplyByZero = Const == 0;
3347       Operand *_0 =
3348           legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3349 
3350       if (MultiplyByZero) {
3351         _mov(T, _0);
3352         _mov(Dest, T);
3353         return;
3354       }
3355 
3356       if (Invert) {
3357         Const = -Const;
3358       }
3359 
3360       if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3361         assert(NumOperations >= 1);
3362         Variable *Src0R = Srcs.src0R(this);
3363         int32_t Start;
3364         int32_t End;
3365         if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3366           // Multiplication by a power of 2 (NumOperations == 1); or
3367           // Multiplication by a even number not a power of 2.
3368           Start = 1;
3369           End = NumOperations;
3370           assert(Shifts[0].aggregateWithAdd());
3371           _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3372         } else {
3373           // Multiplication by an odd number. Put the free barrel shifter to a
3374           // good use.
3375           Start = 0;
3376           End = NumOperations - 2;
3377           const StrengthReduction::AggregationElement &Last =
3378               Shifts[NumOperations - 1];
3379           const StrengthReduction::AggregationElement &SecondToLast =
3380               Shifts[NumOperations - 2];
3381           if (!Last.aggregateWithAdd()) {
3382             assert(SecondToLast.aggregateWithAdd());
3383             _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3384           } else if (!SecondToLast.aggregateWithAdd()) {
3385             assert(Last.aggregateWithAdd());
3386             _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3387           } else {
3388             _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3389           }
3390         }
3391 
3392         // Odd numbers :   S                                 E   I   I
3393         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3394         //     Shifts  = |   |   |   |   |   |   | ... |   |   |   |   |
3395         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3396         // Even numbers:   I   S                                     E
3397         //
3398         // S: Start; E: End; I: Init
3399         for (int32_t I = Start; I < End; ++I) {
3400           const StrengthReduction::AggregationElement &Current = Shifts[I];
3401           Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3402           if (Current.aggregateWithAdd()) {
3403             _add(T, T, SrcF);
3404           } else {
3405             _sub(T, T, SrcF);
3406           }
3407         }
3408 
3409         if (Invert) {
3410           // T = 0 - T.
3411           _rsb(T, T, _0);
3412         }
3413 
3414         _mov(Dest, T);
3415         return;
3416       }
3417     }
3418     Variable *Src0R = Srcs.unswappedSrc0R(this);
3419     Variable *Src1R = Srcs.unswappedSrc1R(this);
3420     if (isVectorType(DestTy)) {
3421       _vmul(T, Src0R, Src1R);
3422     } else {
3423       _mul(T, Src0R, Src1R);
3424     }
3425     _mov(Dest, T);
3426     return;
3427   }
3428   case InstArithmetic::Shl: {
3429     Variable *Src0R = Srcs.unswappedSrc0R(this);
3430     if (!isVectorType(T->getType())) {
3431       if (Srcs.isSrc1ImmediateZero()) {
3432         _mov(T, Src0R);
3433       } else {
3434         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3435         _lsl(T, Src0R, Src1R);
3436       }
3437     } else {
3438       if (Srcs.hasConstOperand()) {
3439         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3440         _vshl(T, Src0R, ShAmt);
3441       } else {
3442         auto *Src1R = Srcs.unswappedSrc1R(this);
3443         _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3444       }
3445     }
3446     _mov(Dest, T);
3447     return;
3448   }
3449   case InstArithmetic::Lshr: {
3450     Variable *Src0R = Srcs.unswappedSrc0R(this);
3451     if (!isVectorType(T->getType())) {
3452       if (DestTy != IceType_i32) {
3453         _uxt(Src0R, Src0R);
3454       }
3455       if (Srcs.isSrc1ImmediateZero()) {
3456         _mov(T, Src0R);
3457       } else {
3458         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3459         _lsr(T, Src0R, Src1R);
3460       }
3461     } else {
3462       if (Srcs.hasConstOperand()) {
3463         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3464         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned);
3465       } else {
3466         auto *Src1R = Srcs.unswappedSrc1R(this);
3467         auto *Src1RNeg = makeReg(Src1R->getType());
3468         _vneg(Src1RNeg, Src1R);
3469         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3470       }
3471     }
3472     _mov(Dest, T);
3473     return;
3474   }
3475   case InstArithmetic::Ashr: {
3476     Variable *Src0R = Srcs.unswappedSrc0R(this);
3477     if (!isVectorType(T->getType())) {
3478       if (DestTy != IceType_i32) {
3479         _sxt(Src0R, Src0R);
3480       }
3481       if (Srcs.isSrc1ImmediateZero()) {
3482         _mov(T, Src0R);
3483       } else {
3484         _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3485       }
3486     } else {
3487       if (Srcs.hasConstOperand()) {
3488         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3489         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed);
3490       } else {
3491         auto *Src1R = Srcs.unswappedSrc1R(this);
3492         auto *Src1RNeg = makeReg(Src1R->getType());
3493         _vneg(Src1RNeg, Src1R);
3494         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3495       }
3496     }
3497     _mov(Dest, T);
3498     return;
3499   }
3500   case InstArithmetic::Udiv:
3501   case InstArithmetic::Sdiv:
3502   case InstArithmetic::Urem:
3503   case InstArithmetic::Srem:
3504     llvm::report_fatal_error(
3505         "Integer div/rem should have been handled earlier.");
3506     return;
3507   case InstArithmetic::Fadd:
3508   case InstArithmetic::Fsub:
3509   case InstArithmetic::Fmul:
3510   case InstArithmetic::Fdiv:
3511   case InstArithmetic::Frem:
3512     llvm::report_fatal_error(
3513         "Floating point arith should have been handled earlier.");
3514     return;
3515   }
3516 }
3517 
lowerAssign(const InstAssign * Instr)3518 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3519   Variable *Dest = Instr->getDest();
3520 
3521   if (Dest->isRematerializable()) {
3522     Context.insert<InstFakeDef>(Dest);
3523     return;
3524   }
3525 
3526   Operand *Src0 = Instr->getSrc(0);
3527   assert(Dest->getType() == Src0->getType());
3528   if (Dest->getType() == IceType_i64) {
3529     Src0 = legalizeUndef(Src0);
3530 
3531     Variable *T_Lo = makeReg(IceType_i32);
3532     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3533     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3534     _mov(T_Lo, Src0Lo);
3535     _mov(DestLo, T_Lo);
3536 
3537     Variable *T_Hi = makeReg(IceType_i32);
3538     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3539     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3540     _mov(T_Hi, Src0Hi);
3541     _mov(DestHi, T_Hi);
3542 
3543     return;
3544   }
3545 
3546   Operand *NewSrc;
3547   if (Dest->hasReg()) {
3548     // If Dest already has a physical register, then legalize the Src operand
3549     // into a Variable with the same register assignment. This especially
3550     // helps allow the use of Flex operands.
3551     NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3552   } else {
3553     // Dest could be a stack operand. Since we could potentially need to do a
3554     // Store (and store can only have Register operands), legalize this to a
3555     // register.
3556     NewSrc = legalize(Src0, Legal_Reg);
3557   }
3558 
3559   if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3560     NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3561   }
3562   _mov(Dest, NewSrc);
3563 }
3564 
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3565 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3566     Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3567     const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3568   InstARM32Label *NewShortCircuitLabel = nullptr;
3569   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3570 
3571   const Inst *Producer = Computations.getProducerOf(Boolean);
3572 
3573   if (Producer == nullptr) {
3574     // No producer, no problem: just do emit code to perform (Boolean & 1) and
3575     // set the flags register. The branch should be taken if the resulting flags
3576     // indicate a non-zero result.
3577     _tst(legalizeToReg(Boolean), _1);
3578     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3579   }
3580 
3581   switch (Producer->getKind()) {
3582   default:
3583     llvm::report_fatal_error("Unexpected producer.");
3584   case Inst::Icmp: {
3585     return ShortCircuitCondAndLabel(
3586         lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3587   } break;
3588   case Inst::Fcmp: {
3589     return ShortCircuitCondAndLabel(
3590         lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3591   } break;
3592   case Inst::Cast: {
3593     const auto *CastProducer = llvm::cast<InstCast>(Producer);
3594     assert(CastProducer->getCastKind() == InstCast::Trunc);
3595     Operand *Src = CastProducer->getSrc(0);
3596     if (Src->getType() == IceType_i64)
3597       Src = loOperand(Src);
3598     _tst(legalizeToReg(Src), _1);
3599     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3600   } break;
3601   case Inst::Arithmetic: {
3602     const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3603     switch (ArithProducer->getOp()) {
3604     default:
3605       llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3606     case InstArithmetic::And: {
3607       if (!(ShortCircuitable & SC_And)) {
3608         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3609       }
3610 
3611       LowerInt1BranchTarget NewTarget =
3612           TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3613 
3614       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3615           Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3616       const CondWhenTrue &Cond = CondAndLabel.Cond;
3617 
3618       _br_short_circuit(NewTarget, Cond.invert());
3619 
3620       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3621       if (ShortCircuitLabel != nullptr)
3622         Context.insert(ShortCircuitLabel);
3623 
3624       return ShortCircuitCondAndLabel(
3625           lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3626               .assertNoLabelAndReturnCond(),
3627           NewShortCircuitLabel);
3628     } break;
3629     case InstArithmetic::Or: {
3630       if (!(ShortCircuitable & SC_Or)) {
3631         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3632       }
3633 
3634       LowerInt1BranchTarget NewTarget =
3635           TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3636 
3637       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3638           Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3639       const CondWhenTrue &Cond = CondAndLabel.Cond;
3640 
3641       _br_short_circuit(NewTarget, Cond);
3642 
3643       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3644       if (ShortCircuitLabel != nullptr)
3645         Context.insert(ShortCircuitLabel);
3646 
3647       return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3648                                                          NewTarget, TargetFalse,
3649                                                          SC_All)
3650                                           .assertNoLabelAndReturnCond(),
3651                                       NewShortCircuitLabel);
3652     } break;
3653     }
3654   }
3655   }
3656 }
3657 
lowerBr(const InstBr * Instr)3658 void TargetARM32::lowerBr(const InstBr *Instr) {
3659   if (Instr->isUnconditional()) {
3660     _br(Instr->getTargetUnconditional());
3661     return;
3662   }
3663 
3664   CfgNode *TargetTrue = Instr->getTargetTrue();
3665   CfgNode *TargetFalse = Instr->getTargetFalse();
3666   ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3667       Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3668       LowerInt1BranchTarget(TargetFalse), SC_All);
3669   assert(CondAndLabel.ShortCircuitTarget == nullptr);
3670 
3671   const CondWhenTrue &Cond = CondAndLabel.Cond;
3672   if (Cond.WhenTrue1 != CondARM32::kNone) {
3673     assert(Cond.WhenTrue0 != CondARM32::AL);
3674     _br(TargetTrue, Cond.WhenTrue1);
3675   }
3676 
3677   switch (Cond.WhenTrue0) {
3678   default:
3679     _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3680     break;
3681   case CondARM32::kNone:
3682     _br(TargetFalse);
3683     break;
3684   case CondARM32::AL:
3685     _br(TargetTrue);
3686     break;
3687   }
3688 }
3689 
lowerCall(const InstCall * Instr)3690 void TargetARM32::lowerCall(const InstCall *Instr) {
3691   Operand *CallTarget = Instr->getCallTarget();
3692   if (Instr->isTargetHelperCall()) {
3693     auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3694     if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3695       (this->*TargetHelperPreamble->second)(Instr);
3696     }
3697   }
3698   MaybeLeafFunc = false;
3699   NeedsStackAlignment = true;
3700 
3701   // Assign arguments to registers and stack. Also reserve stack.
3702   TargetARM32::CallingConv CC;
3703   // Pair of Arg Operand -> GPR number assignments.
3704   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3705   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3706   // Pair of Arg Operand -> stack offset.
3707   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3708   size_t ParameterAreaSizeBytes = 0;
3709 
3710   // Classify each argument operand according to the location where the
3711   // argument is passed.
3712   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3713     Operand *Arg = legalizeUndef(Instr->getArg(i));
3714     const Type Ty = Arg->getType();
3715     bool InReg = false;
3716     RegNumT Reg;
3717     if (isScalarIntegerType(Ty)) {
3718       InReg = CC.argInGPR(Ty, &Reg);
3719     } else {
3720       InReg = CC.argInVFP(Ty, &Reg);
3721     }
3722 
3723     if (!InReg) {
3724       ParameterAreaSizeBytes =
3725           applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3726       StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3727       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3728       continue;
3729     }
3730 
3731     if (Ty == IceType_i64) {
3732       Operand *Lo = loOperand(Arg);
3733       Operand *Hi = hiOperand(Arg);
3734       GPRArgs.push_back(std::make_pair(
3735           Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3736       GPRArgs.push_back(std::make_pair(
3737           Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3738     } else if (isScalarIntegerType(Ty)) {
3739       GPRArgs.push_back(std::make_pair(Arg, Reg));
3740     } else {
3741       FPArgs.push_back(std::make_pair(Arg, Reg));
3742     }
3743   }
3744 
3745   // Adjust the parameter area so that the stack is aligned. It is assumed that
3746   // the stack is already aligned at the start of the calling sequence.
3747   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3748 
3749   if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3750     llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3751   }
3752 
3753   // Copy arguments that are passed on the stack to the appropriate stack
3754   // locations.
3755   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3756   for (auto &StackArg : StackArgs) {
3757     ConstantInteger32 *Loc =
3758         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3759     Type Ty = StackArg.first->getType();
3760     OperandARM32Mem *Addr;
3761     constexpr bool SignExt = false;
3762     if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3763       Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3764     } else {
3765       Variable *NewBase = Func->makeVariable(SP->getType());
3766       lowerArithmetic(
3767           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3768       Addr = formMemoryOperand(NewBase, Ty);
3769     }
3770     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3771   }
3772 
3773   // Generate the call instruction. Assign its result to a temporary with high
3774   // register allocation weight.
3775   Variable *Dest = Instr->getDest();
3776   // ReturnReg doubles as ReturnRegLo as necessary.
3777   Variable *ReturnReg = nullptr;
3778   Variable *ReturnRegHi = nullptr;
3779   if (Dest) {
3780     switch (Dest->getType()) {
3781     case IceType_NUM:
3782       llvm::report_fatal_error("Invalid Call dest type");
3783       break;
3784     case IceType_void:
3785       break;
3786     case IceType_i1:
3787       assert(Computations.getProducerOf(Dest) == nullptr);
3788     // Fall-through intended.
3789     case IceType_i8:
3790     case IceType_i16:
3791     case IceType_i32:
3792       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3793       break;
3794     case IceType_i64:
3795       ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3796       ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3797       break;
3798     case IceType_f32:
3799       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3800       break;
3801     case IceType_f64:
3802       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3803       break;
3804     case IceType_v4i1:
3805     case IceType_v8i1:
3806     case IceType_v16i1:
3807     case IceType_v16i8:
3808     case IceType_v8i16:
3809     case IceType_v4i32:
3810     case IceType_v4f32:
3811       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3812       break;
3813     }
3814   }
3815 
3816   // Allow ConstantRelocatable to be left alone as a direct call, but force
3817   // other constants like ConstantInteger32 to be in a register and make it an
3818   // indirect call.
3819   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3820     CallTarget = legalize(CallTarget, Legal_Reg);
3821   }
3822 
3823   // Copy arguments to be passed in registers to the appropriate registers.
3824   CfgVector<Variable *> RegArgs;
3825   for (auto &FPArg : FPArgs) {
3826     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3827   }
3828   for (auto &GPRArg : GPRArgs) {
3829     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3830   }
3831 
3832   // Generate a FakeUse of register arguments so that they do not get dead code
3833   // eliminated as a result of the FakeKill of scratch registers after the call.
3834   // These fake-uses need to be placed here to avoid argument registers from
3835   // being used during the legalizeToReg() calls above.
3836   for (auto *RegArg : RegArgs) {
3837     Context.insert<InstFakeUse>(RegArg);
3838   }
3839 
3840   InstARM32Call *NewCall =
3841       Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
3842 
3843   if (ReturnRegHi)
3844     Context.insert<InstFakeDef>(ReturnRegHi);
3845 
3846   // Insert a register-kill pseudo instruction.
3847   Context.insert<InstFakeKill>(NewCall);
3848 
3849   // Generate a FakeUse to keep the call live if necessary.
3850   if (Instr->hasSideEffects() && ReturnReg) {
3851     Context.insert<InstFakeUse>(ReturnReg);
3852   }
3853 
3854   if (Dest != nullptr) {
3855     // Assign the result of the call to Dest.
3856     if (ReturnReg != nullptr) {
3857       if (ReturnRegHi) {
3858         auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3859         Variable *DestLo = Dest64On32->getLo();
3860         Variable *DestHi = Dest64On32->getHi();
3861         _mov(DestLo, ReturnReg);
3862         _mov(DestHi, ReturnRegHi);
3863       } else {
3864         if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3865           _mov(Dest, ReturnReg);
3866         } else {
3867           assert(isIntegerType(Dest->getType()) &&
3868                  typeWidthInBytes(Dest->getType()) <= 4);
3869           _mov(Dest, ReturnReg);
3870         }
3871       }
3872     }
3873   }
3874 
3875   if (Instr->isTargetHelperCall()) {
3876     auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3877     if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3878       (this->*TargetHelpersPostamble->second)(Instr);
3879     }
3880   }
3881 }
3882 
3883 namespace {
configureBitcastTemporary(Variable64On32 * Var)3884 void configureBitcastTemporary(Variable64On32 *Var) {
3885   Var->setMustNotHaveReg();
3886   Var->getHi()->setMustHaveReg();
3887   Var->getLo()->setMustHaveReg();
3888 }
3889 } // end of anonymous namespace
3890 
lowerCast(const InstCast * Instr)3891 void TargetARM32::lowerCast(const InstCast *Instr) {
3892   InstCast::OpKind CastKind = Instr->getCastKind();
3893   Variable *Dest = Instr->getDest();
3894   const Type DestTy = Dest->getType();
3895   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3896   switch (CastKind) {
3897   default:
3898     Func->setError("Cast type not supported");
3899     return;
3900   case InstCast::Sext: {
3901     if (isVectorType(DestTy)) {
3902       Variable *T0 = makeReg(DestTy);
3903       Variable *T1 = makeReg(DestTy);
3904       ConstantInteger32 *ShAmt = nullptr;
3905       switch (DestTy) {
3906       default:
3907         llvm::report_fatal_error("Unexpected type in vector sext.");
3908       case IceType_v16i8:
3909         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3910         break;
3911       case IceType_v8i16:
3912         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3913         break;
3914       case IceType_v4i32:
3915         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3916         break;
3917       }
3918       auto *Src0R = legalizeToReg(Src0);
3919       _vshl(T0, Src0R, ShAmt);
3920       _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3921       _mov(Dest, T1);
3922     } else if (DestTy == IceType_i64) {
3923       // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3924       Constant *ShiftAmt = Ctx->getConstantInt32(31);
3925       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3926       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3927       Variable *T_Lo = makeReg(DestLo->getType());
3928       if (Src0->getType() == IceType_i32) {
3929         Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3930         _mov(T_Lo, Src0RF);
3931       } else if (Src0->getType() != IceType_i1) {
3932         Variable *Src0R = legalizeToReg(Src0);
3933         _sxt(T_Lo, Src0R);
3934       } else {
3935         Operand *_0 = Ctx->getConstantZero(IceType_i32);
3936         Operand *_m1 = Ctx->getConstantInt32(-1);
3937         lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3938       }
3939       _mov(DestLo, T_Lo);
3940       Variable *T_Hi = makeReg(DestHi->getType());
3941       if (Src0->getType() != IceType_i1) {
3942         _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3943                                                OperandARM32::ASR, ShiftAmt));
3944       } else {
3945         // For i1, the asr instruction is already done above.
3946         _mov(T_Hi, T_Lo);
3947       }
3948       _mov(DestHi, T_Hi);
3949     } else if (Src0->getType() != IceType_i1) {
3950       // t1 = sxt src; dst = t1
3951       Variable *Src0R = legalizeToReg(Src0);
3952       Variable *T = makeReg(DestTy);
3953       _sxt(T, Src0R);
3954       _mov(Dest, T);
3955     } else {
3956       Constant *_0 = Ctx->getConstantZero(IceType_i32);
3957       Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3958       Variable *T = makeReg(DestTy);
3959       lowerInt1ForSelect(T, Src0, _m1, _0);
3960       _mov(Dest, T);
3961     }
3962     break;
3963   }
3964   case InstCast::Zext: {
3965     if (isVectorType(DestTy)) {
3966       auto *Mask = makeReg(DestTy);
3967       auto *_1 = Ctx->getConstantInt32(1);
3968       auto *T = makeReg(DestTy);
3969       auto *Src0R = legalizeToReg(Src0);
3970       _mov(Mask, _1);
3971       _vand(T, Src0R, Mask);
3972       _mov(Dest, T);
3973     } else if (DestTy == IceType_i64) {
3974       // t1=uxtb src; dst.lo=t1; dst.hi=0
3975       Operand *_0 =
3976           legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3977       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3978       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3979       Variable *T_Lo = makeReg(DestLo->getType());
3980 
3981       switch (Src0->getType()) {
3982       default: {
3983         assert(Src0->getType() != IceType_i64);
3984         _uxt(T_Lo, legalizeToReg(Src0));
3985       } break;
3986       case IceType_i32: {
3987         _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3988       } break;
3989       case IceType_i1: {
3990         SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
3991         if (Safe == SBC_No) {
3992           Operand *_1 =
3993               legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3994           _and(T_Lo, T_Lo, _1);
3995         }
3996       } break;
3997       }
3998 
3999       _mov(DestLo, T_Lo);
4000 
4001       Variable *T_Hi = makeReg(DestLo->getType());
4002       _mov(T_Hi, _0);
4003       _mov(DestHi, T_Hi);
4004     } else if (Src0->getType() == IceType_i1) {
4005       Variable *T = makeReg(DestTy);
4006 
4007       SafeBoolChain Safe = lowerInt1(T, Src0);
4008       if (Safe == SBC_No) {
4009         Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
4010         _and(T, T, _1);
4011       }
4012 
4013       _mov(Dest, T);
4014     } else {
4015       // t1 = uxt src; dst = t1
4016       Variable *Src0R = legalizeToReg(Src0);
4017       Variable *T = makeReg(DestTy);
4018       _uxt(T, Src0R);
4019       _mov(Dest, T);
4020     }
4021     break;
4022   }
4023   case InstCast::Trunc: {
4024     if (isVectorType(DestTy)) {
4025       auto *T = makeReg(DestTy);
4026       auto *Src0R = legalizeToReg(Src0);
4027       _mov(T, Src0R);
4028       _mov(Dest, T);
4029     } else {
4030       if (Src0->getType() == IceType_i64)
4031         Src0 = loOperand(Src0);
4032       Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
4033       // t1 = trunc Src0RF; Dest = t1
4034       Variable *T = makeReg(DestTy);
4035       _mov(T, Src0RF);
4036       if (DestTy == IceType_i1)
4037         _and(T, T, Ctx->getConstantInt1(1));
4038       _mov(Dest, T);
4039     }
4040     break;
4041   }
4042   case InstCast::Fptrunc:
4043   case InstCast::Fpext: {
4044     // fptrunc: dest.f32 = fptrunc src0.fp64
4045     // fpext: dest.f64 = fptrunc src0.fp32
4046     const bool IsTrunc = CastKind == InstCast::Fptrunc;
4047     assert(!isVectorType(DestTy));
4048     assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
4049     assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
4050     Variable *Src0R = legalizeToReg(Src0);
4051     Variable *T = makeReg(DestTy);
4052     _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
4053     _mov(Dest, T);
4054     break;
4055   }
4056   case InstCast::Fptosi:
4057   case InstCast::Fptoui: {
4058     const bool DestIsSigned = CastKind == InstCast::Fptosi;
4059     Variable *Src0R = legalizeToReg(Src0);
4060 
4061     if (isVectorType(DestTy)) {
4062       assert(typeElementType(Src0->getType()) == IceType_f32);
4063       auto *T = makeReg(DestTy);
4064       _vcvt(T, Src0R,
4065             DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
4066       _mov(Dest, T);
4067       break;
4068     }
4069 
4070     const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
4071     if (llvm::isa<Variable64On32>(Dest)) {
4072       llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
4073     }
4074     // fptosi:
4075     //     t1.fp = vcvt src0.fp
4076     //     t2.i32 = vmov t1.fp
4077     //     dest.int = conv t2.i32     @ Truncates the result if needed.
4078     // fptoui:
4079     //     t1.fp = vcvt src0.fp
4080     //     t2.u32 = vmov t1.fp
4081     //     dest.uint = conv t2.u32    @ Truncates the result if needed.
4082     Variable *T_fp = makeReg(IceType_f32);
4083     const InstARM32Vcvt::VcvtVariant Conversion =
4084         Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
4085                   : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
4086     _vcvt(T_fp, Src0R, Conversion);
4087     Variable *T = makeReg(IceType_i32);
4088     _mov(T, T_fp);
4089     if (DestTy != IceType_i32) {
4090       Variable *T_1 = makeReg(DestTy);
4091       lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
4092       T = T_1;
4093     }
4094     _mov(Dest, T);
4095     break;
4096   }
4097   case InstCast::Sitofp:
4098   case InstCast::Uitofp: {
4099     const bool SourceIsSigned = CastKind == InstCast::Sitofp;
4100 
4101     if (isVectorType(DestTy)) {
4102       assert(typeElementType(DestTy) == IceType_f32);
4103       auto *T = makeReg(DestTy);
4104       Variable *Src0R = legalizeToReg(Src0);
4105       _vcvt(T, Src0R,
4106             SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
4107       _mov(Dest, T);
4108       break;
4109     }
4110 
4111     const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
4112     if (Src0->getType() == IceType_i64) {
4113       llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
4114     }
4115     // sitofp:
4116     //     t1.i32 = sext src.int    @ sign-extends src0 if needed.
4117     //     t2.fp32 = vmov t1.i32
4118     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
4119     // uitofp:
4120     //     t1.i32 = zext src.int    @ zero-extends src0 if needed.
4121     //     t2.fp32 = vmov t1.i32
4122     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
4123     if (Src0->getType() != IceType_i32) {
4124       Variable *Src0R_32 = makeReg(IceType_i32);
4125       lowerCast(InstCast::create(
4126           Func, SourceIsSigned ? InstCast::Sext : InstCast::Zext, Src0R_32,
4127           Src0));
4128       Src0 = Src0R_32;
4129     }
4130     Variable *Src0R = legalizeToReg(Src0);
4131     Variable *Src0R_f32 = makeReg(IceType_f32);
4132     _mov(Src0R_f32, Src0R);
4133     Src0R = Src0R_f32;
4134     Variable *T = makeReg(DestTy);
4135     const InstARM32Vcvt::VcvtVariant Conversion =
4136         DestIsF32
4137             ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
4138             : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
4139     _vcvt(T, Src0R, Conversion);
4140     _mov(Dest, T);
4141     break;
4142   }
4143   case InstCast::Bitcast: {
4144     Operand *Src0 = Instr->getSrc(0);
4145     if (DestTy == Src0->getType()) {
4146       auto *Assign = InstAssign::create(Func, Dest, Src0);
4147       lowerAssign(Assign);
4148       return;
4149     }
4150     switch (DestTy) {
4151     case IceType_NUM:
4152     case IceType_void:
4153       llvm::report_fatal_error("Unexpected bitcast.");
4154     case IceType_i1:
4155       UnimplementedLoweringError(this, Instr);
4156       break;
4157     case IceType_i8:
4158       assert(Src0->getType() == IceType_v8i1);
4159       llvm::report_fatal_error(
4160           "i8 to v8i1 conversion should have been prelowered.");
4161       break;
4162     case IceType_i16:
4163       assert(Src0->getType() == IceType_v16i1);
4164       llvm::report_fatal_error(
4165           "i16 to v16i1 conversion should have been prelowered.");
4166       break;
4167     case IceType_i32:
4168     case IceType_f32: {
4169       Variable *Src0R = legalizeToReg(Src0);
4170       Variable *T = makeReg(DestTy);
4171       _mov(T, Src0R);
4172       lowerAssign(InstAssign::create(Func, Dest, T));
4173       break;
4174     }
4175     case IceType_i64: {
4176       // t0, t1 <- src0
4177       // dest[31..0]  = t0
4178       // dest[63..32] = t1
4179       assert(Src0->getType() == IceType_f64);
4180       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4181       T->initHiLo(Func);
4182       configureBitcastTemporary(T);
4183       Variable *Src0R = legalizeToReg(Src0);
4184       _mov(T, Src0R);
4185       Context.insert<InstFakeUse>(T->getHi());
4186       Context.insert<InstFakeUse>(T->getLo());
4187       lowerAssign(InstAssign::create(Func, Dest, T));
4188       break;
4189     }
4190     case IceType_f64: {
4191       // T0 <- lo(src)
4192       // T1 <- hi(src)
4193       // vmov T2, T0, T1
4194       // Dest <- T2
4195       assert(Src0->getType() == IceType_i64);
4196       Variable *T = makeReg(DestTy);
4197       auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4198       Src64->initHiLo(Func);
4199       configureBitcastTemporary(Src64);
4200       lowerAssign(InstAssign::create(Func, Src64, Src0));
4201       _mov(T, Src64);
4202       lowerAssign(InstAssign::create(Func, Dest, T));
4203       break;
4204     }
4205     case IceType_v8i1:
4206       assert(Src0->getType() == IceType_i8);
4207       llvm::report_fatal_error(
4208           "v8i1 to i8 conversion should have been prelowered.");
4209       break;
4210     case IceType_v16i1:
4211       assert(Src0->getType() == IceType_i16);
4212       llvm::report_fatal_error(
4213           "v16i1 to i16 conversion should have been prelowered.");
4214       break;
4215     case IceType_v4i1:
4216     case IceType_v8i16:
4217     case IceType_v16i8:
4218     case IceType_v4f32:
4219     case IceType_v4i32: {
4220       assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4221       assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4222       Variable *T = makeReg(DestTy);
4223       _mov(T, Src0);
4224       _mov(Dest, T);
4225       break;
4226     }
4227     }
4228     break;
4229   }
4230   }
4231 }
4232 
lowerExtractElement(const InstExtractElement * Instr)4233 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4234   Variable *Dest = Instr->getDest();
4235   Type DestTy = Dest->getType();
4236 
4237   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4238   Operand *Src1 = Instr->getSrc(1);
4239 
4240   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4241     const uint32_t Index = Imm->getValue();
4242     Variable *T = makeReg(DestTy);
4243     Variable *TSrc0 = makeReg(Src0->getType());
4244 
4245     if (isFloatingType(DestTy)) {
4246       // We need to make sure the source is in a suitable register.
4247       TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4248     }
4249 
4250     _mov(TSrc0, Src0);
4251     _extractelement(T, TSrc0, Index);
4252     _mov(Dest, T);
4253     return;
4254   }
4255   assert(false && "extractelement requires a constant index");
4256 }
4257 
4258 namespace {
4259 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4260 // (and naming).
4261 enum {
4262 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4263   FCMPARM32_TABLE
4264 #undef X
4265       _fcmp_ll_NUM
4266 };
4267 
4268 enum {
4269 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4270   ICEINSTFCMP_TABLE
4271 #undef X
4272       _fcmp_hl_NUM
4273 };
4274 
4275 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4276               "Inconsistency between high-level and low-level fcmp tags.");
4277 #define X(tag, str)                                                            \
4278   static_assert(                                                               \
4279       (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag,                    \
4280       "Inconsistency between high-level and low-level fcmp tag " #tag);
4281 ICEINSTFCMP_TABLE
4282 #undef X
4283 
4284 struct {
4285   CondARM32::Cond CC0;
4286   CondARM32::Cond CC1;
4287 } TableFcmp[] = {
4288 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4289   {CondARM32::CC0, CondARM32::CC1},
4290     FCMPARM32_TABLE
4291 #undef X
4292 };
4293 
isFloatingPointZero(const Operand * Src)4294 bool isFloatingPointZero(const Operand *Src) {
4295   if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4296     return Utils::isPositiveZero(F32->getValue());
4297   }
4298 
4299   if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4300     return Utils::isPositiveZero(F64->getValue());
4301   }
4302 
4303   return false;
4304 }
4305 } // end of anonymous namespace
4306 
lowerFcmpCond(const InstFcmp * Instr)4307 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4308   InstFcmp::FCond Condition = Instr->getCondition();
4309   switch (Condition) {
4310   case InstFcmp::False:
4311     return CondWhenTrue(CondARM32::kNone);
4312   case InstFcmp::True:
4313     return CondWhenTrue(CondARM32::AL);
4314     break;
4315   default: {
4316     Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4317     Operand *Src1 = Instr->getSrc(1);
4318     if (isFloatingPointZero(Src1)) {
4319       _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4320     } else {
4321       _vcmp(Src0R, legalizeToReg(Src1));
4322     }
4323     _vmrs();
4324     assert(Condition < llvm::array_lengthof(TableFcmp));
4325     return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4326   }
4327   }
4328 }
4329 
lowerFcmp(const InstFcmp * Instr)4330 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4331   Variable *Dest = Instr->getDest();
4332   const Type DestTy = Dest->getType();
4333 
4334   if (isVectorType(DestTy)) {
4335     if (Instr->getCondition() == InstFcmp::False) {
4336       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4337       auto *T = makeReg(SafeTypeForMovingConstant);
4338       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4339       _mov(Dest, T);
4340       return;
4341     }
4342 
4343     if (Instr->getCondition() == InstFcmp::True) {
4344       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4345       auto *T = makeReg(SafeTypeForMovingConstant);
4346       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4347       _mov(Dest, T);
4348       return;
4349     }
4350 
4351     Variable *T0;
4352     Variable *T1;
4353     bool Negate = false;
4354     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4355     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4356 
4357     switch (Instr->getCondition()) {
4358     default:
4359       llvm::report_fatal_error("Unhandled fp comparison.");
4360 #define _Vcnone(Tptr, S0, S1)                                                  \
4361   do {                                                                         \
4362     *(Tptr) = nullptr;                                                         \
4363   } while (0)
4364 #define _Vceq(Tptr, S0, S1)                                                    \
4365   do {                                                                         \
4366     *(Tptr) = makeReg(DestTy);                                                 \
4367     _vceq(*(Tptr), S0, S1);                                                    \
4368   } while (0)
4369 #define _Vcge(Tptr, S0, S1)                                                    \
4370   do {                                                                         \
4371     *(Tptr) = makeReg(DestTy);                                                 \
4372     _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4373   } while (0)
4374 #define _Vcgt(Tptr, S0, S1)                                                    \
4375   do {                                                                         \
4376     *(Tptr) = makeReg(DestTy);                                                 \
4377     _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4378   } while (0)
4379 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4380   case InstFcmp::val: {                                                        \
4381     _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1);             \
4382     _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0);             \
4383     Negate = NEG_V;                                                            \
4384   } break;
4385       FCMPARM32_TABLE
4386 #undef X
4387 #undef _Vcgt
4388 #undef _Vcge
4389 #undef _Vceq
4390 #undef _Vcnone
4391     }
4392     assert(T0 != nullptr);
4393     Variable *T = T0;
4394     if (T1 != nullptr) {
4395       T = makeReg(DestTy);
4396       _vorr(T, T0, T1);
4397     }
4398 
4399     if (Negate) {
4400       auto *TNeg = makeReg(DestTy);
4401       _vmvn(TNeg, T);
4402       T = TNeg;
4403     }
4404 
4405     _mov(Dest, T);
4406     return;
4407   }
4408 
4409   Variable *T = makeReg(IceType_i1);
4410   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4411   Operand *_0 =
4412       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4413 
4414   CondWhenTrue Cond = lowerFcmpCond(Instr);
4415 
4416   bool RedefineT = false;
4417   if (Cond.WhenTrue0 != CondARM32::AL) {
4418     _mov(T, _0);
4419     RedefineT = true;
4420   }
4421 
4422   if (Cond.WhenTrue0 == CondARM32::kNone) {
4423     _mov(Dest, T);
4424     return;
4425   }
4426 
4427   if (RedefineT) {
4428     _mov_redefined(T, _1, Cond.WhenTrue0);
4429   } else {
4430     _mov(T, _1, Cond.WhenTrue0);
4431   }
4432 
4433   if (Cond.WhenTrue1 != CondARM32::kNone) {
4434     _mov_redefined(T, _1, Cond.WhenTrue1);
4435   }
4436 
4437   _mov(Dest, T);
4438 }
4439 
4440 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4441 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4442                                 Operand *Src1) {
4443   assert(Condition < llvm::array_lengthof(TableIcmp64));
4444 
4445   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4446   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4447   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4448   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4449 
4450   if (SrcsLo.hasConstOperand()) {
4451     const uint32_t ValueLo = SrcsLo.getConstantValue();
4452     const uint32_t ValueHi = SrcsHi.getConstantValue();
4453     const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4454     if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4455         Value == 0) {
4456       Variable *T = makeReg(IceType_i32);
4457       Variable *Src0LoR = SrcsLo.src0R(this);
4458       Variable *Src0HiR = SrcsHi.src0R(this);
4459       _orrs(T, Src0LoR, Src0HiR);
4460       Context.insert<InstFakeUse>(T);
4461       return CondWhenTrue(TableIcmp64[Condition].C1);
4462     }
4463 
4464     Variable *Src0RLo = SrcsLo.src0R(this);
4465     Variable *Src0RHi = SrcsHi.src0R(this);
4466     Operand *Src1RFLo = SrcsLo.src1RF(this);
4467     Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4468 
4469     const bool UseRsb =
4470         TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4471 
4472     if (UseRsb) {
4473       if (TableIcmp64[Condition].IsSigned) {
4474         Variable *T = makeReg(IceType_i32);
4475         _rsbs(T, Src0RLo, Src1RFLo);
4476         Context.insert<InstFakeUse>(T);
4477 
4478         T = makeReg(IceType_i32);
4479         _rscs(T, Src0RHi, Src1RFHi);
4480         // We need to add a FakeUse here because liveness gets mad at us (Def
4481         // without Use.) Note that flag-setting instructions are considered to
4482         // have side effects and, therefore, are not DCE'ed.
4483         Context.insert<InstFakeUse>(T);
4484       } else {
4485         Variable *T = makeReg(IceType_i32);
4486         _rsbs(T, Src0RHi, Src1RFHi);
4487         Context.insert<InstFakeUse>(T);
4488 
4489         T = makeReg(IceType_i32);
4490         _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4491         Context.insert<InstFakeUse>(T);
4492       }
4493     } else {
4494       if (TableIcmp64[Condition].IsSigned) {
4495         _cmp(Src0RLo, Src1RFLo);
4496         Variable *T = makeReg(IceType_i32);
4497         _sbcs(T, Src0RHi, Src1RFHi);
4498         Context.insert<InstFakeUse>(T);
4499       } else {
4500         _cmp(Src0RHi, Src1RFHi);
4501         _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4502       }
4503     }
4504 
4505     return CondWhenTrue(TableIcmp64[Condition].C1);
4506   }
4507 
4508   Variable *Src0RLo, *Src0RHi;
4509   Operand *Src1RFLo, *Src1RFHi;
4510   if (TableIcmp64[Condition].Swapped) {
4511     Src0RLo = legalizeToReg(loOperand(Src1));
4512     Src0RHi = legalizeToReg(hiOperand(Src1));
4513     Src1RFLo = legalizeToReg(loOperand(Src0));
4514     Src1RFHi = legalizeToReg(hiOperand(Src0));
4515   } else {
4516     Src0RLo = legalizeToReg(loOperand(Src0));
4517     Src0RHi = legalizeToReg(hiOperand(Src0));
4518     Src1RFLo = legalizeToReg(loOperand(Src1));
4519     Src1RFHi = legalizeToReg(hiOperand(Src1));
4520   }
4521 
4522   // a=icmp cond, b, c ==>
4523   // GCC does:
4524   //   cmp      b.hi, c.hi     or  cmp      b.lo, c.lo
4525   //   cmp.eq   b.lo, c.lo         sbcs t1, b.hi, c.hi
4526   //   mov.<C1> t, #1              mov.<C1> t, #1
4527   //   mov.<C2> t, #0              mov.<C2> t, #0
4528   //   mov      a, t               mov      a, t
4529   // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4530   // is used for signed compares. In some cases, b and c need to be swapped as
4531   // well.
4532   //
4533   // LLVM does:
4534   // for EQ and NE:
4535   //   eor  t1, b.hi, c.hi
4536   //   eor  t2, b.lo, c.hi
4537   //   orrs t, t1, t2
4538   //   mov.<C> t, #1
4539   //   mov  a, t
4540   //
4541   // that's nice in that it's just as short but has fewer dependencies for
4542   // better ILP at the cost of more registers.
4543   //
4544   // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4545   // unconditional mov #0, two cmps, two conditional mov #1, and one
4546   // conditional reg mov. That has few dependencies for good ILP, but is a
4547   // longer sequence.
4548   //
4549   // So, we are going with the GCC version since it's usually better (except
4550   // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4551   if (TableIcmp64[Condition].IsSigned) {
4552     Variable *ScratchReg = makeReg(IceType_i32);
4553     _cmp(Src0RLo, Src1RFLo);
4554     _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4555     // ScratchReg isn't going to be used, but we need the side-effect of
4556     // setting flags from this operation.
4557     Context.insert<InstFakeUse>(ScratchReg);
4558   } else {
4559     _cmp(Src0RHi, Src1RFHi);
4560     _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4561   }
4562   return CondWhenTrue(TableIcmp64[Condition].C1);
4563 }
4564 
4565 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4566 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4567                                 Operand *Src1) {
4568   Int32Operands Srcs(Src0, Src1);
4569   if (!Srcs.hasConstOperand()) {
4570 
4571     Variable *Src0R = Srcs.src0R(this);
4572     Operand *Src1RF = Srcs.src1RF(this);
4573     _cmp(Src0R, Src1RF);
4574     return CondWhenTrue(getIcmp32Mapping(Condition));
4575   }
4576 
4577   Variable *Src0R = Srcs.src0R(this);
4578   const int32_t Value = Srcs.getConstantValue();
4579   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4580     _tst(Src0R, Src0R);
4581     return CondWhenTrue(getIcmp32Mapping(Condition));
4582   }
4583 
4584   if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4585       Srcs.negatedImmediateIsFlexEncodable()) {
4586     Operand *Src1F = Srcs.negatedSrc1F(this);
4587     _cmn(Src0R, Src1F);
4588     return CondWhenTrue(getIcmp32Mapping(Condition));
4589   }
4590 
4591   Operand *Src1RF = Srcs.src1RF(this);
4592   if (!Srcs.swappedOperands()) {
4593     _cmp(Src0R, Src1RF);
4594   } else {
4595     Variable *T = makeReg(IceType_i32);
4596     _rsbs(T, Src0R, Src1RF);
4597     Context.insert<InstFakeUse>(T);
4598   }
4599   return CondWhenTrue(getIcmp32Mapping(Condition));
4600 }
4601 
4602 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4603 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4604                                        Operand *Src1) {
4605   Int32Operands Srcs(Src0, Src1);
4606   const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4607   assert(ShAmt >= 0);
4608 
4609   if (!Srcs.hasConstOperand()) {
4610     Variable *Src0R = makeReg(IceType_i32);
4611     Operand *ShAmtImm = shAmtImm(ShAmt);
4612     _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4613 
4614     Variable *Src1R = legalizeToReg(Src1);
4615     auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4616                                               OperandARM32::LSL, ShAmtImm);
4617     _cmp(Src0R, Src1F);
4618     return CondWhenTrue(getIcmp32Mapping(Condition));
4619   }
4620 
4621   const int32_t Value = Srcs.getConstantValue();
4622   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4623     Operand *ShAmtImm = shAmtImm(ShAmt);
4624     Variable *T = makeReg(IceType_i32);
4625     _lsls(T, Srcs.src0R(this), ShAmtImm);
4626     Context.insert<InstFakeUse>(T);
4627     return CondWhenTrue(getIcmp32Mapping(Condition));
4628   }
4629 
4630   Variable *ConstR = makeReg(IceType_i32);
4631   _mov(ConstR,
4632        legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4633   Operand *NonConstF = OperandARM32FlexReg::create(
4634       Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4635       Ctx->getConstantInt32(ShAmt));
4636 
4637   if (Srcs.swappedOperands()) {
4638     _cmp(ConstR, NonConstF);
4639   } else {
4640     Variable *T = makeReg(IceType_i32);
4641     _rsbs(T, ConstR, NonConstF);
4642     Context.insert<InstFakeUse>(T);
4643   }
4644   return CondWhenTrue(getIcmp32Mapping(Condition));
4645 }
4646 
lowerIcmpCond(const InstIcmp * Instr)4647 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4648   return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4649                        Instr->getSrc(1));
4650 }
4651 
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4652 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4653                                                      Operand *Src0,
4654                                                      Operand *Src1) {
4655   Src0 = legalizeUndef(Src0);
4656   Src1 = legalizeUndef(Src1);
4657 
4658   // a=icmp cond b, c ==>
4659   // GCC does:
4660   //   <u/s>xtb tb, b
4661   //   <u/s>xtb tc, c
4662   //   cmp      tb, tc
4663   //   mov.C1   t, #0
4664   //   mov.C2   t, #1
4665   //   mov      a, t
4666   // where the unsigned/sign extension is not needed for 32-bit. They also have
4667   // special cases for EQ and NE. E.g., for NE:
4668   //   <extend to tb, tc>
4669   //   subs     t, tb, tc
4670   //   movne    t, #1
4671   //   mov      a, t
4672   //
4673   // LLVM does:
4674   //   lsl     tb, b, #<N>
4675   //   mov     t, #0
4676   //   cmp     tb, c, lsl #<N>
4677   //   mov.<C> t, #1
4678   //   mov     a, t
4679   //
4680   // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4681   // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4682   // the unsigned case, for some reason it does similar to GCC and does a uxtb
4683   // first. It's not clear to me why that special-casing is needed.
4684   //
4685   // We'll go with the LLVM way for now, since it's shorter and has just as few
4686   // dependencies.
4687   switch (Src0->getType()) {
4688   default:
4689     llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4690   case IceType_i1:
4691   case IceType_i8:
4692   case IceType_i16:
4693     return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4694   case IceType_i32:
4695     return lowerInt32IcmpCond(Condition, Src0, Src1);
4696   case IceType_i64:
4697     return lowerInt64IcmpCond(Condition, Src0, Src1);
4698   }
4699 }
4700 
lowerIcmp(const InstIcmp * Instr)4701 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4702   Variable *Dest = Instr->getDest();
4703   const Type DestTy = Dest->getType();
4704 
4705   if (isVectorType(DestTy)) {
4706     auto *T = makeReg(DestTy);
4707     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4708     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4709     const Type SrcTy = Src0->getType();
4710 
4711     bool NeedsShl = false;
4712     Type NewTypeAfterShl;
4713     SizeT ShAmt;
4714     switch (SrcTy) {
4715     default:
4716       break;
4717     case IceType_v16i1:
4718       NeedsShl = true;
4719       NewTypeAfterShl = IceType_v16i8;
4720       ShAmt = 7;
4721       break;
4722     case IceType_v8i1:
4723       NeedsShl = true;
4724       NewTypeAfterShl = IceType_v8i16;
4725       ShAmt = 15;
4726       break;
4727     case IceType_v4i1:
4728       NeedsShl = true;
4729       NewTypeAfterShl = IceType_v4i32;
4730       ShAmt = 31;
4731       break;
4732     }
4733 
4734     if (NeedsShl) {
4735       auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4736       auto *Src0T = makeReg(NewTypeAfterShl);
4737       auto *Src0Shl = makeReg(NewTypeAfterShl);
4738       _mov(Src0T, Src0);
4739       _vshl(Src0Shl, Src0T, Imm);
4740       Src0 = Src0Shl;
4741 
4742       auto *Src1T = makeReg(NewTypeAfterShl);
4743       auto *Src1Shl = makeReg(NewTypeAfterShl);
4744       _mov(Src1T, Src1);
4745       _vshl(Src1Shl, Src1T, Imm);
4746       Src1 = Src1Shl;
4747     }
4748 
4749     switch (Instr->getCondition()) {
4750     default:
4751       llvm::report_fatal_error("Unhandled integer comparison.");
4752 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4753 #define _Vcge(T, S0, S1, Signed)                                               \
4754   _vcge(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed                  \
4755                                        : InstARM32::FS_Unsigned)
4756 #define _Vcgt(T, S0, S1, Signed)                                               \
4757   _vcgt(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed                  \
4758                                        : InstARM32::FS_Unsigned)
4759 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
4760   case InstIcmp::val: {                                                        \
4761     _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed);      \
4762     if (NEG_V) {                                                               \
4763       auto *TInv = makeReg(DestTy);                                            \
4764       _vmvn(TInv, T);                                                          \
4765       T = TInv;                                                                \
4766     }                                                                          \
4767   } break;
4768       ICMPARM32_TABLE
4769 #undef X
4770 #undef _Vcgt
4771 #undef _Vcge
4772 #undef _Vceq
4773     }
4774     _mov(Dest, T);
4775     return;
4776   }
4777 
4778   Operand *_0 =
4779       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4780   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4781   Variable *T = makeReg(IceType_i1);
4782 
4783   _mov(T, _0);
4784   CondWhenTrue Cond = lowerIcmpCond(Instr);
4785   _mov_redefined(T, _1, Cond.WhenTrue0);
4786   _mov(Dest, T);
4787 
4788   assert(Cond.WhenTrue1 == CondARM32::kNone);
4789 
4790   return;
4791 }
4792 
lowerInsertElement(const InstInsertElement * Instr)4793 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4794   Variable *Dest = Instr->getDest();
4795   Type DestTy = Dest->getType();
4796 
4797   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4798   Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4799   Operand *Src2 = Instr->getSrc(2);
4800 
4801   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4802     const uint32_t Index = Imm->getValue();
4803     Variable *T = makeReg(DestTy);
4804 
4805     if (isFloatingType(DestTy)) {
4806       T->setRegClass(RegARM32::RCARM32_QtoS);
4807     }
4808 
4809     _mov(T, Src0);
4810     _insertelement(T, Src1, Index);
4811     _set_dest_redefined();
4812     _mov(Dest, T);
4813     return;
4814   }
4815   assert(false && "insertelement requires a constant index");
4816 }
4817 
4818 namespace {
getConstantMemoryOrder(Operand * Opnd)4819 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4820   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4821     return Integer->getValue();
4822   return Intrinsics::MemoryOrderInvalid;
4823 }
4824 } // end of anonymous namespace
4825 
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4826 void TargetARM32::lowerLoadLinkedStoreExclusive(
4827     Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4828     CondARM32::Cond Cond) {
4829 
4830   auto *Retry = Context.insert<InstARM32Label>(this);
4831 
4832   { // scoping for loop highlighting.
4833     Variable *Success = makeReg(IceType_i32);
4834     Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4835     auto *_0 = Ctx->getConstantZero(IceType_i32);
4836 
4837     Context.insert<InstFakeDef>(Tmp);
4838     Context.insert<InstFakeUse>(Tmp);
4839     Variable *AddrR = legalizeToReg(Addr);
4840     _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4841     auto *StoreValue = Operation(Tmp);
4842     assert(StoreValue->mustHaveReg());
4843     // strex requires Dest to be a register other than Value or Addr. This
4844     // restriction is cleanly represented by adding an "early" definition of
4845     // Dest (or a latter use of all the sources.)
4846     Context.insert<InstFakeDef>(Success);
4847     if (Cond != CondARM32::AL) {
4848       _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4849                      InstARM32::getOppositeCondition(Cond));
4850     }
4851     _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4852         ->setDestRedefined();
4853     _cmp(Success, _0);
4854   }
4855 
4856   _br(Retry, CondARM32::NE);
4857 }
4858 
4859 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4860 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4861                                 Variable *Src0, Operand *Src1) {
4862   InstArithmetic::OpKind Oper;
4863   switch (Operation) {
4864   default:
4865     llvm::report_fatal_error("Unknown AtomicRMW operation");
4866   case Intrinsics::AtomicExchange:
4867     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4868   case Intrinsics::AtomicAdd:
4869     Oper = InstArithmetic::Add;
4870     break;
4871   case Intrinsics::AtomicAnd:
4872     Oper = InstArithmetic::And;
4873     break;
4874   case Intrinsics::AtomicSub:
4875     Oper = InstArithmetic::Sub;
4876     break;
4877   case Intrinsics::AtomicOr:
4878     Oper = InstArithmetic::Or;
4879     break;
4880   case Intrinsics::AtomicXor:
4881     Oper = InstArithmetic::Xor;
4882     break;
4883   }
4884   return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4885 }
4886 } // end of anonymous namespace
4887 
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4888 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4889                                  Operand *Addr, Operand *Val) {
4890   // retry:
4891   //     ldrex tmp, [addr]
4892   //     mov contents, tmp
4893   //     op result, contents, Val
4894   //     strex success, result, [addr]
4895   //     cmp success, 0
4896   //     jne retry
4897   //     fake-use(addr, operand)  @ prevents undesirable clobbering.
4898   //     mov dest, contents
4899   auto DestTy = Dest->getType();
4900 
4901   if (DestTy == IceType_i64) {
4902     lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4903     return;
4904   }
4905 
4906   Operand *ValRF = nullptr;
4907   if (llvm::isa<ConstantInteger32>(Val)) {
4908     ValRF = Val;
4909   } else {
4910     ValRF = legalizeToReg(Val);
4911   }
4912   auto *ContentsR = makeReg(DestTy);
4913   auto *ResultR = makeReg(DestTy);
4914 
4915   _dmb();
4916   lowerLoadLinkedStoreExclusive(
4917       DestTy, Addr,
4918       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4919         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4920         if (Operation == Intrinsics::AtomicExchange) {
4921           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4922         } else {
4923           lowerArithmetic(
4924               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4925         }
4926         return ResultR;
4927       });
4928   _dmb();
4929   if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4930     Context.insert<InstFakeUse>(ValR);
4931   }
4932   // Can't dce ContentsR.
4933   Context.insert<InstFakeUse>(ContentsR);
4934   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4935 }
4936 
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4937 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4938                                       Operand *Addr, Operand *Val) {
4939   assert(Dest->getType() == IceType_i64);
4940 
4941   auto *ResultR = makeI64RegPair();
4942 
4943   Context.insert<InstFakeDef>(ResultR);
4944 
4945   Operand *ValRF = nullptr;
4946   if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4947     ValRF = Val;
4948   } else {
4949     auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4950     ValR64->initHiLo(Func);
4951     ValR64->setMustNotHaveReg();
4952     ValR64->getLo()->setMustHaveReg();
4953     ValR64->getHi()->setMustHaveReg();
4954     lowerAssign(InstAssign::create(Func, ValR64, Val));
4955     ValRF = ValR64;
4956   }
4957 
4958   auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4959   ContentsR->initHiLo(Func);
4960   ContentsR->setMustNotHaveReg();
4961   ContentsR->getLo()->setMustHaveReg();
4962   ContentsR->getHi()->setMustHaveReg();
4963 
4964   _dmb();
4965   lowerLoadLinkedStoreExclusive(
4966       IceType_i64, Addr,
4967       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4968         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4969         Context.insert<InstFakeUse>(Tmp);
4970         if (Operation == Intrinsics::AtomicExchange) {
4971           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4972         } else {
4973           lowerArithmetic(
4974               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4975         }
4976         Context.insert<InstFakeUse>(ResultR->getHi());
4977         Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4978             ->setDestRedefined();
4979         return ResultR;
4980       });
4981   _dmb();
4982   if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4983     Context.insert<InstFakeUse>(ValR64->getLo());
4984     Context.insert<InstFakeUse>(ValR64->getHi());
4985   }
4986   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4987 }
4988 
postambleCtpop64(const InstCall * Instr)4989 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
4990   Operand *Arg0 = Instr->getArg(0);
4991   if (isInt32Asserting32Or64(Arg0->getType())) {
4992     return;
4993   }
4994   // The popcount helpers always return 32-bit values, while the intrinsic's
4995   // signature matches some 64-bit platform's native instructions and expect to
4996   // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
4997   // user doesn't do that in the IR or doesn't toss the bits via truncate.
4998   auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
4999   Variable *T = makeReg(IceType_i32);
5000   Operand *_0 =
5001       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5002   _mov(T, _0);
5003   _mov(DestHi, T);
5004 }
5005 
lowerIntrinsic(const InstIntrinsic * Instr)5006 void TargetARM32::lowerIntrinsic(const InstIntrinsic *Instr) {
5007   Variable *Dest = Instr->getDest();
5008   Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
5009   Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
5010   switch (ID) {
5011   case Intrinsics::AtomicFence:
5012   case Intrinsics::AtomicFenceAll:
5013     assert(Dest == nullptr);
5014     _dmb();
5015     return;
5016   case Intrinsics::AtomicIsLockFree: {
5017     Operand *ByteSize = Instr->getArg(0);
5018     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
5019     if (CI == nullptr) {
5020       // The PNaCl ABI requires the byte size to be a compile-time constant.
5021       Func->setError("AtomicIsLockFree byte size should be compile-time const");
5022       return;
5023     }
5024     static constexpr int32_t NotLockFree = 0;
5025     static constexpr int32_t LockFree = 1;
5026     int32_t Result = NotLockFree;
5027     switch (CI->getValue()) {
5028     case 1:
5029     case 2:
5030     case 4:
5031     case 8:
5032       Result = LockFree;
5033       break;
5034     }
5035     _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
5036     return;
5037   }
5038   case Intrinsics::AtomicLoad: {
5039     assert(isScalarIntegerType(DestTy));
5040     // We require the memory address to be naturally aligned. Given that is the
5041     // case, then normal loads are atomic.
5042     if (!Intrinsics::isMemoryOrderValid(
5043             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
5044       Func->setError("Unexpected memory ordering for AtomicLoad");
5045       return;
5046     }
5047     Variable *T;
5048 
5049     if (DestTy == IceType_i64) {
5050       // ldrex is the only arm instruction that is guaranteed to load a 64-bit
5051       // integer atomically. Everything else works with a regular ldr.
5052       T = makeI64RegPair();
5053       _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
5054     } else {
5055       T = makeReg(DestTy);
5056       _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
5057     }
5058     _dmb();
5059     lowerAssign(InstAssign::create(Func, Dest, T));
5060     // Adding a fake-use T to ensure the atomic load is not removed if Dest is
5061     // unused.
5062     Context.insert<InstFakeUse>(T);
5063     return;
5064   }
5065   case Intrinsics::AtomicStore: {
5066     // We require the memory address to be naturally aligned. Given that is the
5067     // case, then normal loads are atomic.
5068     if (!Intrinsics::isMemoryOrderValid(
5069             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
5070       Func->setError("Unexpected memory ordering for AtomicStore");
5071       return;
5072     }
5073 
5074     auto *Value = Instr->getArg(0);
5075     if (Value->getType() == IceType_i64) {
5076       auto *ValueR = makeI64RegPair();
5077       Context.insert<InstFakeDef>(ValueR);
5078       lowerAssign(InstAssign::create(Func, ValueR, Value));
5079       _dmb();
5080       lowerLoadLinkedStoreExclusive(
5081           IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
5082             // The following fake-use prevents the ldrex instruction from being
5083             // dead code eliminated.
5084             Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
5085             Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
5086             Context.insert<InstFakeUse>(Tmp);
5087             return ValueR;
5088           });
5089       Context.insert<InstFakeUse>(ValueR);
5090       _dmb();
5091       return;
5092     }
5093 
5094     auto *ValueR = legalizeToReg(Instr->getArg(0));
5095     const auto ValueTy = ValueR->getType();
5096     assert(isScalarIntegerType(ValueTy));
5097     auto *Addr = legalizeToReg(Instr->getArg(1));
5098 
5099     // non-64-bit stores are atomically as long as the address is aligned. This
5100     // is PNaCl, so addresses are aligned.
5101     _dmb();
5102     _str(ValueR, formMemoryOperand(Addr, ValueTy));
5103     _dmb();
5104     return;
5105   }
5106   case Intrinsics::AtomicCmpxchg: {
5107     // retry:
5108     //     ldrex tmp, [addr]
5109     //     cmp tmp, expected
5110     //     mov expected, tmp
5111     //     strexeq success, new, [addr]
5112     //     cmpeq success, #0
5113     //     bne retry
5114     //     mov dest, expected
5115     assert(isScalarIntegerType(DestTy));
5116     // We require the memory address to be naturally aligned. Given that is the
5117     // case, then normal loads are atomic.
5118     if (!Intrinsics::isMemoryOrderValid(
5119             ID, getConstantMemoryOrder(Instr->getArg(3)),
5120             getConstantMemoryOrder(Instr->getArg(4)))) {
5121       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
5122       return;
5123     }
5124 
5125     if (DestTy == IceType_i64) {
5126       Variable *LoadedValue = nullptr;
5127 
5128       auto *New = makeI64RegPair();
5129       Context.insert<InstFakeDef>(New);
5130       lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
5131 
5132       auto *Expected = makeI64RegPair();
5133       Context.insert<InstFakeDef>(Expected);
5134       lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
5135 
5136       _dmb();
5137       lowerLoadLinkedStoreExclusive(
5138           DestTy, Instr->getArg(0),
5139           [this, Expected, New, &LoadedValue](Variable *Tmp) {
5140             auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
5141             auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
5142             auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
5143             auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
5144             _cmp(TmpLoR, ExpectedLoR);
5145             _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
5146             LoadedValue = Tmp;
5147             return New;
5148           },
5149           CondARM32::EQ);
5150       _dmb();
5151 
5152       Context.insert<InstFakeUse>(LoadedValue);
5153       lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5154       // The fake-use Expected prevents the assignments to Expected (above)
5155       // from being removed if Dest is not used.
5156       Context.insert<InstFakeUse>(Expected);
5157       // New needs to be alive here, or its live range will end in the
5158       // strex instruction.
5159       Context.insert<InstFakeUse>(New);
5160       return;
5161     }
5162 
5163     auto *New = legalizeToReg(Instr->getArg(2));
5164     auto *Expected = legalizeToReg(Instr->getArg(1));
5165     Variable *LoadedValue = nullptr;
5166 
5167     _dmb();
5168     lowerLoadLinkedStoreExclusive(
5169         DestTy, Instr->getArg(0),
5170         [this, Expected, New, &LoadedValue](Variable *Tmp) {
5171           lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
5172           LoadedValue = Tmp;
5173           return New;
5174         },
5175         CondARM32::EQ);
5176     _dmb();
5177 
5178     lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5179     Context.insert<InstFakeUse>(Expected);
5180     Context.insert<InstFakeUse>(New);
5181     return;
5182   }
5183   case Intrinsics::AtomicRMW: {
5184     if (!Intrinsics::isMemoryOrderValid(
5185             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5186       Func->setError("Unexpected memory ordering for AtomicRMW");
5187       return;
5188     }
5189     lowerAtomicRMW(
5190         Dest,
5191         static_cast<uint32_t>(
5192             llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5193         Instr->getArg(1), Instr->getArg(2));
5194     return;
5195   }
5196   case Intrinsics::Bswap: {
5197     Operand *Val = Instr->getArg(0);
5198     Type Ty = Val->getType();
5199     if (Ty == IceType_i64) {
5200       Val = legalizeUndef(Val);
5201       Variable *Val_Lo = legalizeToReg(loOperand(Val));
5202       Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5203       Variable *T_Lo = makeReg(IceType_i32);
5204       Variable *T_Hi = makeReg(IceType_i32);
5205       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5206       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5207       _rev(T_Lo, Val_Lo);
5208       _rev(T_Hi, Val_Hi);
5209       _mov(DestLo, T_Hi);
5210       _mov(DestHi, T_Lo);
5211     } else {
5212       assert(Ty == IceType_i32 || Ty == IceType_i16);
5213       Variable *ValR = legalizeToReg(Val);
5214       Variable *T = makeReg(Ty);
5215       _rev(T, ValR);
5216       if (Val->getType() == IceType_i16) {
5217         Operand *_16 = shAmtImm(16);
5218         _lsr(T, T, _16);
5219       }
5220       _mov(Dest, T);
5221     }
5222     return;
5223   }
5224   case Intrinsics::Ctpop: {
5225     llvm::report_fatal_error("Ctpop should have been prelowered.");
5226   }
5227   case Intrinsics::Ctlz: {
5228     // The "is zero undef" parameter is ignored and we always return a
5229     // well-defined value.
5230     Operand *Val = Instr->getArg(0);
5231     Variable *ValLoR;
5232     Variable *ValHiR = nullptr;
5233     if (Val->getType() == IceType_i64) {
5234       Val = legalizeUndef(Val);
5235       ValLoR = legalizeToReg(loOperand(Val));
5236       ValHiR = legalizeToReg(hiOperand(Val));
5237     } else {
5238       ValLoR = legalizeToReg(Val);
5239     }
5240     lowerCLZ(Dest, ValLoR, ValHiR);
5241     return;
5242   }
5243   case Intrinsics::Cttz: {
5244     // Essentially like Clz, but reverse the bits first.
5245     Operand *Val = Instr->getArg(0);
5246     Variable *ValLoR;
5247     Variable *ValHiR = nullptr;
5248     if (Val->getType() == IceType_i64) {
5249       Val = legalizeUndef(Val);
5250       ValLoR = legalizeToReg(loOperand(Val));
5251       ValHiR = legalizeToReg(hiOperand(Val));
5252       Variable *TLo = makeReg(IceType_i32);
5253       Variable *THi = makeReg(IceType_i32);
5254       _rbit(TLo, ValLoR);
5255       _rbit(THi, ValHiR);
5256       ValLoR = THi;
5257       ValHiR = TLo;
5258     } else {
5259       ValLoR = legalizeToReg(Val);
5260       Variable *T = makeReg(IceType_i32);
5261       _rbit(T, ValLoR);
5262       ValLoR = T;
5263     }
5264     lowerCLZ(Dest, ValLoR, ValHiR);
5265     return;
5266   }
5267   case Intrinsics::Fabs: {
5268     Variable *T = makeReg(DestTy);
5269     _vabs(T, legalizeToReg(Instr->getArg(0)));
5270     _mov(Dest, T);
5271     return;
5272   }
5273   case Intrinsics::Longjmp: {
5274     llvm::report_fatal_error("longjmp should have been prelowered.");
5275   }
5276   case Intrinsics::Memcpy: {
5277     llvm::report_fatal_error("memcpy should have been prelowered.");
5278   }
5279   case Intrinsics::Memmove: {
5280     llvm::report_fatal_error("memmove should have been prelowered.");
5281   }
5282   case Intrinsics::Memset: {
5283     llvm::report_fatal_error("memmove should have been prelowered.");
5284   }
5285   case Intrinsics::NaClReadTP: {
5286     if (SandboxingType != ST_NaCl) {
5287       llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5288     }
5289     Variable *TP = legalizeToReg(OperandARM32Mem::create(
5290         Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
5291         llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5292     _mov(Dest, TP);
5293     return;
5294   }
5295   case Intrinsics::Setjmp: {
5296     llvm::report_fatal_error("setjmp should have been prelowered.");
5297   }
5298   case Intrinsics::Sqrt: {
5299     assert(isScalarFloatingType(Dest->getType()) ||
5300            getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5301     Variable *Src = legalizeToReg(Instr->getArg(0));
5302     Variable *T = makeReg(DestTy);
5303     _vsqrt(T, Src);
5304     _mov(Dest, T);
5305     return;
5306   }
5307   case Intrinsics::Stacksave: {
5308     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5309     _mov(Dest, SP);
5310     return;
5311   }
5312   case Intrinsics::Stackrestore: {
5313     Variable *Val = legalizeToReg(Instr->getArg(0));
5314     Sandboxer(this).reset_sp(Val);
5315     return;
5316   }
5317   case Intrinsics::Trap:
5318     _trap();
5319     return;
5320   case Intrinsics::AddSaturateSigned:
5321   case Intrinsics::AddSaturateUnsigned: {
5322     bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
5323     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5324     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5325     Variable *T = makeReg(DestTy);
5326     _vqadd(T, Src0, Src1, Unsigned);
5327     _mov(Dest, T);
5328     return;
5329   }
5330   case Intrinsics::LoadSubVector: {
5331     assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
5332            "LoadSubVector second argument must be a constant");
5333     Variable *Dest = Instr->getDest();
5334     Type Ty = Dest->getType();
5335     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
5336     Operand *Addr = Instr->getArg(0);
5337     OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
5338     doMockBoundsCheck(Src);
5339 
5340     if (Dest->isRematerializable()) {
5341       Context.insert<InstFakeDef>(Dest);
5342       return;
5343     }
5344 
5345     auto *T = makeReg(Ty);
5346     switch (SubVectorSize->getValue()) {
5347     case 4:
5348       _vldr1d(T, Src);
5349       break;
5350     case 8:
5351       _vldr1q(T, Src);
5352       break;
5353     default:
5354       Func->setError("Unexpected size for LoadSubVector");
5355       return;
5356     }
5357     _mov(Dest, T);
5358     return;
5359   }
5360   case Intrinsics::StoreSubVector: {
5361     assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
5362            "StoreSubVector third argument must be a constant");
5363     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
5364     Variable *Value = legalizeToReg(Instr->getArg(0));
5365     Operand *Addr = Instr->getArg(1);
5366     OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5367     doMockBoundsCheck(NewAddr);
5368 
5369     Value = legalizeToReg(Value);
5370 
5371     switch (SubVectorSize->getValue()) {
5372     case 4:
5373       _vstr1d(Value, NewAddr);
5374       break;
5375     case 8:
5376       _vstr1q(Value, NewAddr);
5377       break;
5378     default:
5379       Func->setError("Unexpected size for StoreSubVector");
5380       return;
5381     }
5382     return;
5383   }
5384   case Intrinsics::MultiplyAddPairs: {
5385     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5386     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5387     Variable *T = makeReg(DestTy);
5388     _vmlap(T, Src0, Src1);
5389     _mov(Dest, T);
5390     return;
5391   }
5392   case Intrinsics::MultiplyHighSigned:
5393   case Intrinsics::MultiplyHighUnsigned: {
5394     bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
5395     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5396     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5397     Variable *T = makeReg(DestTy);
5398     _vmulh(T, Src0, Src1, Unsigned);
5399     _mov(Dest, T);
5400     return;
5401   }
5402   case Intrinsics::Nearbyint: {
5403     UnimplementedLoweringError(this, Instr);
5404     return;
5405   }
5406   case Intrinsics::Round: {
5407     UnimplementedLoweringError(this, Instr);
5408     return;
5409   }
5410   case Intrinsics::SignMask: {
5411     UnimplementedLoweringError(this, Instr);
5412     return;
5413   }
5414   case Intrinsics::SubtractSaturateSigned:
5415   case Intrinsics::SubtractSaturateUnsigned: {
5416     bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
5417     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5418     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5419     Variable *T = makeReg(DestTy);
5420     _vqsub(T, Src0, Src1, Unsigned);
5421     _mov(Dest, T);
5422     return;
5423   }
5424   case Intrinsics::VectorPackSigned:
5425   case Intrinsics::VectorPackUnsigned: {
5426     bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
5427     bool Saturating = true;
5428     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5429     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5430     Variable *T = makeReg(DestTy);
5431     _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
5432     _mov(Dest, T);
5433     return;
5434   }
5435   default: // UnknownIntrinsic
5436     Func->setError("Unexpected intrinsic");
5437     return;
5438   }
5439   return;
5440 }
5441 
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5442 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5443   Type Ty = Dest->getType();
5444   assert(Ty == IceType_i32 || Ty == IceType_i64);
5445   Variable *T = makeReg(IceType_i32);
5446   _clz(T, ValLoR);
5447   if (Ty == IceType_i64) {
5448     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5449     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5450     Operand *Zero =
5451         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5452     Operand *ThirtyTwo =
5453         legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5454     _cmp(ValHiR, Zero);
5455     Variable *T2 = makeReg(IceType_i32);
5456     _add(T2, T, ThirtyTwo);
5457     _clz(T2, ValHiR, CondARM32::NE);
5458     // T2 is actually a source as well when the predicate is not AL (since it
5459     // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5460     // of T2 as if it was used as a source.
5461     _set_dest_redefined();
5462     _mov(DestLo, T2);
5463     Variable *T3 = makeReg(Zero->getType());
5464     _mov(T3, Zero);
5465     _mov(DestHi, T3);
5466     return;
5467   }
5468   _mov(Dest, T);
5469   return;
5470 }
5471 
lowerLoad(const InstLoad * Load)5472 void TargetARM32::lowerLoad(const InstLoad *Load) {
5473   // A Load instruction can be treated the same as an Assign instruction, after
5474   // the source operand is transformed into an OperandARM32Mem operand.
5475   Type Ty = Load->getDest()->getType();
5476   Operand *Src0 = formMemoryOperand(Load->getLoadAddress(), Ty);
5477   Variable *DestLoad = Load->getDest();
5478 
5479   // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5480   // be folded into a load.
5481   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5482   lowerAssign(Assign);
5483 }
5484 
5485 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5486 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5487                     const Variable *OffsetReg, int16_t OffsetRegShAmt,
5488                     const Inst *Reason) {
5489   if (!BuildDefs::dump())
5490     return;
5491   if (!Func->isVerbose(IceV_AddrOpt))
5492     return;
5493   OstreamLocker _(Func->getContext());
5494   Ostream &Str = Func->getContext()->getStrDump();
5495   Str << "Instruction: ";
5496   Reason->dumpDecorated(Func);
5497   Str << "  results in Base=";
5498   if (Base)
5499     Base->dump(Func);
5500   else
5501     Str << "<null>";
5502   Str << ", OffsetReg=";
5503   if (OffsetReg)
5504     OffsetReg->dump(Func);
5505   else
5506     Str << "<null>";
5507   Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5508 }
5509 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5510 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5511                  int32_t *Offset, const Inst **Reason) {
5512   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5513   if (*Var == nullptr)
5514     return false;
5515   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5516   if (!VarAssign)
5517     return false;
5518   assert(!VMetadata->isMultiDef(*Var));
5519   if (!llvm::isa<InstAssign>(VarAssign))
5520     return false;
5521 
5522   Operand *SrcOp = VarAssign->getSrc(0);
5523   bool Optimized = false;
5524   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5525     if (!VMetadata->isMultiDef(SrcVar) ||
5526         // TODO: ensure SrcVar stays single-BB
5527         false) {
5528       Optimized = true;
5529       *Var = SrcVar;
5530     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5531       int32_t MoreOffset = Const->getValue();
5532       int32_t NewOffset = MoreOffset + *Offset;
5533       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5534         return false;
5535       *Var = nullptr;
5536       *Offset += NewOffset;
5537       Optimized = true;
5538     }
5539   }
5540 
5541   if (Optimized) {
5542     *Reason = VarAssign;
5543   }
5544 
5545   return Optimized;
5546 }
5547 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5548 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5549   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5550     switch (Arith->getOp()) {
5551     default:
5552       return false;
5553     case InstArithmetic::Add:
5554     case InstArithmetic::Sub:
5555       *Kind = Arith->getOp();
5556       return true;
5557     }
5558   }
5559   return false;
5560 }
5561 
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5562 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5563                             Variable **OffsetReg, int32_t OffsetRegShamt,
5564                             const Inst **Reason) {
5565   // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5566   //   set Base=Var1, OffsetReg=Var2, Shift=0
5567   if (*Base == nullptr)
5568     return false;
5569   if (*OffsetReg != nullptr)
5570     return false;
5571   (void)OffsetRegShamt;
5572   assert(OffsetRegShamt == 0);
5573   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5574   if (BaseInst == nullptr)
5575     return false;
5576   assert(!VMetadata->isMultiDef(*Base));
5577   if (BaseInst->getSrcSize() < 2)
5578     return false;
5579   auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5580   if (!Var1)
5581     return false;
5582   if (VMetadata->isMultiDef(Var1))
5583     return false;
5584   auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5585   if (!Var2)
5586     return false;
5587   if (VMetadata->isMultiDef(Var2))
5588     return false;
5589   InstArithmetic::OpKind _;
5590   if (!isAddOrSub(BaseInst, &_) ||
5591       // TODO: ensure Var1 and Var2 stay single-BB
5592       false)
5593     return false;
5594   *Base = Var1;
5595   *OffsetReg = Var2;
5596   // OffsetRegShamt is already 0.
5597   *Reason = BaseInst;
5598   return true;
5599 }
5600 
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5601 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5602                            Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5603                            int32_t *OffsetRegShamt, const Inst **Reason) {
5604   // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5605   //   OffsetReg=Var, Shift+=log2(Const)
5606   // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5607   //   OffsetReg=Var, Shift+=Const
5608   // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5609   //   OffsetReg=Var, Shift-=Const
5610   OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5611   if (*OffsetReg == nullptr)
5612     return false;
5613   auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5614   if (IndexInst == nullptr)
5615     return false;
5616   assert(!VMetadata->isMultiDef(*OffsetReg));
5617   if (IndexInst->getSrcSize() < 2)
5618     return false;
5619   auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5620   if (ArithInst == nullptr)
5621     return false;
5622   auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5623   if (Var == nullptr)
5624     return false;
5625   auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5626   if (Const == nullptr) {
5627     assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5628     return false;
5629   }
5630   if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5631     return false;
5632 
5633   uint32_t NewShamt = -1;
5634   switch (ArithInst->getOp()) {
5635   default:
5636     return false;
5637   case InstArithmetic::Shl: {
5638     NewShiftKind = OperandARM32::LSL;
5639     NewShamt = Const->getValue();
5640     if (NewShamt > 31)
5641       return false;
5642   } break;
5643   case InstArithmetic::Lshr: {
5644     NewShiftKind = OperandARM32::LSR;
5645     NewShamt = Const->getValue();
5646     if (NewShamt > 31)
5647       return false;
5648   } break;
5649   case InstArithmetic::Ashr: {
5650     NewShiftKind = OperandARM32::ASR;
5651     NewShamt = Const->getValue();
5652     if (NewShamt > 31)
5653       return false;
5654   } break;
5655   case InstArithmetic::Udiv:
5656   case InstArithmetic::Mul: {
5657     const uint32_t UnsignedConst = Const->getValue();
5658     NewShamt = llvm::findFirstSet(UnsignedConst);
5659     if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5660       // First bit set is not the same as the last bit set, so Const is not
5661       // a power of 2.
5662       return false;
5663     }
5664     NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5665                        ? OperandARM32::LSR
5666                        : OperandARM32::LSL;
5667   } break;
5668   }
5669   // Allowed "transitions":
5670   //   kNoShift -> * iff NewShamt < 31
5671   //   LSL -> LSL    iff NewShamt + OffsetRegShamt < 31
5672   //   LSR -> LSR    iff NewShamt + OffsetRegShamt < 31
5673   //   ASR -> ASR    iff NewShamt + OffsetRegShamt < 31
5674   if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5675     return false;
5676   }
5677   const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5678   if (NewOffsetRegShamt > 31)
5679     return false;
5680   *OffsetReg = Var;
5681   *OffsetRegShamt = NewOffsetRegShamt;
5682   *Kind = NewShiftKind;
5683   *Reason = IndexInst;
5684   return true;
5685 }
5686 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5687 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5688                      int32_t *Offset, const Inst **Reason) {
5689   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5690   //   set Base=Var, Offset+=Const
5691   // Base is Base=Var-Const ==>
5692   //   set Base=Var, Offset-=Const
5693   if (*Base == nullptr)
5694     return false;
5695   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5696   if (BaseInst == nullptr) {
5697     return false;
5698   }
5699   assert(!VMetadata->isMultiDef(*Base));
5700 
5701   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5702   if (ArithInst == nullptr)
5703     return false;
5704   InstArithmetic::OpKind Kind;
5705   if (!isAddOrSub(ArithInst, &Kind))
5706     return false;
5707   bool IsAdd = Kind == InstArithmetic::Add;
5708   Operand *Src0 = ArithInst->getSrc(0);
5709   Operand *Src1 = ArithInst->getSrc(1);
5710   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5711   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5712   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5713   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5714   Variable *NewBase = nullptr;
5715   int32_t NewOffset = *Offset;
5716 
5717   if (Var0 == nullptr && Const0 == nullptr) {
5718     assert(llvm::isa<ConstantRelocatable>(Src0));
5719     return false;
5720   }
5721 
5722   if (Var1 == nullptr && Const1 == nullptr) {
5723     assert(llvm::isa<ConstantRelocatable>(Src1));
5724     return false;
5725   }
5726 
5727   if (Var0 && Var1)
5728     // TODO(jpp): merge base/index splitting into here.
5729     return false;
5730   if (!IsAdd && Var1)
5731     return false;
5732   if (Var0)
5733     NewBase = Var0;
5734   else if (Var1)
5735     NewBase = Var1;
5736   // Compute the updated constant offset.
5737   if (Const0) {
5738     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5739     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5740       return false;
5741     NewOffset += MoreOffset;
5742   }
5743   if (Const1) {
5744     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5745     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5746       return false;
5747     NewOffset += MoreOffset;
5748   }
5749 
5750   // Update the computed address parameters once we are sure optimization
5751   // is valid.
5752   *Base = NewBase;
5753   *Offset = NewOffset;
5754   *Reason = BaseInst;
5755   return true;
5756 }
5757 } // end of anonymous namespace
5758 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5759 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5760                                                  const Inst *LdSt,
5761                                                  Operand *Base) {
5762   assert(Base != nullptr);
5763   int32_t OffsetImm = 0;
5764   Variable *OffsetReg = nullptr;
5765   int32_t OffsetRegShamt = 0;
5766   OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5767 
5768   Func->resetCurrentNode();
5769   if (Func->isVerbose(IceV_AddrOpt)) {
5770     OstreamLocker _(Func->getContext());
5771     Ostream &Str = Func->getContext()->getStrDump();
5772     Str << "\nAddress mode formation:\t";
5773     LdSt->dumpDecorated(Func);
5774   }
5775 
5776   if (isVectorType(Ty))
5777     // vector loads and stores do not allow offsets, and only support the
5778     // "[reg]" addressing mode (the other supported modes are write back.)
5779     return nullptr;
5780 
5781   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5782   if (BaseVar == nullptr)
5783     return nullptr;
5784 
5785   (void)MemTraitsSize;
5786   assert(Ty < MemTraitsSize);
5787   auto *TypeTraits = &MemTraits[Ty];
5788   const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex;
5789   const bool CanHaveShiftedIndex =
5790       !NeedSandboxing && TypeTraits->CanHaveShiftedIndex;
5791   const bool CanHaveImm = TypeTraits->CanHaveImm;
5792   const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5793   (void)ValidImmMask;
5794   assert(!CanHaveImm || ValidImmMask >= 0);
5795 
5796   const VariablesMetadata *VMetadata = Func->getVMetadata();
5797   const Inst *Reason = nullptr;
5798 
5799   do {
5800     if (Reason != nullptr) {
5801       dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5802                      Reason);
5803       Reason = nullptr;
5804     }
5805 
5806     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5807       continue;
5808     }
5809 
5810     if (CanHaveIndex &&
5811         matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5812       continue;
5813     }
5814 
5815     if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5816                                                OffsetRegShamt, &Reason)) {
5817       continue;
5818     }
5819 
5820     if (CanHaveShiftedIndex) {
5821       if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5822                                 &OffsetRegShamt, &Reason)) {
5823         continue;
5824       }
5825 
5826       if ((OffsetRegShamt == 0) &&
5827           matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5828                                 &OffsetRegShamt, &Reason)) {
5829         std::swap(BaseVar, OffsetReg);
5830         continue;
5831       }
5832     }
5833 
5834     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5835       continue;
5836     }
5837   } while (Reason);
5838 
5839   if (BaseVar == nullptr) {
5840     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5841     // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5842     // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5843     //
5844     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5845     //     mov BaseReg, #OffsetImm
5846     //     use of [BaseReg, OffsetReg{, LSL Shamt}]
5847     //
5848     const Type PointerType = getPointerType();
5849     BaseVar = makeReg(PointerType);
5850     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5851     OffsetImm = 0;
5852   } else if (OffsetImm != 0) {
5853     // ARM Ldr/Str instructions have limited range immediates. The formation
5854     // loop above materialized an Immediate carelessly, so we ensure the
5855     // generated offset is sane.
5856     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5857     const InstArithmetic::OpKind Op =
5858         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5859 
5860     if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5861         OffsetReg != nullptr) {
5862       if (OffsetReg == nullptr) {
5863         // We formed a [Base, #const] addressing mode which is not encodable in
5864         // ARM. There is little point in forming an address mode now if we don't
5865         // have an offset. Effectively, we would end up with something like
5866         //
5867         // [Base, #const] -> add T, Base, #const
5868         //                   use of [T]
5869         //
5870         // Which is exactly what we already have. So we just bite the bullet
5871         // here and don't form any address mode.
5872         return nullptr;
5873       }
5874       // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5875       //
5876       // [Base, Offset, {LSL amount}, #const] ->
5877       //      add T, Base, #const
5878       //      use of [T, Offset {, LSL amount}]
5879       const Type PointerType = getPointerType();
5880       Variable *T = makeReg(PointerType);
5881       Context.insert<InstArithmetic>(Op, T, BaseVar,
5882                                      Ctx->getConstantInt32(PositiveOffset));
5883       BaseVar = T;
5884       OffsetImm = 0;
5885     }
5886   }
5887 
5888   assert(BaseVar != nullptr);
5889   assert(OffsetImm == 0 || OffsetReg == nullptr);
5890   assert(OffsetReg == nullptr || CanHaveIndex);
5891   assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5892                        : (ValidImmMask & OffsetImm) == OffsetImm);
5893 
5894   if (OffsetReg != nullptr) {
5895     Variable *OffsetR = makeReg(getPointerType());
5896     Context.insert<InstAssign>(OffsetR, OffsetReg);
5897     return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5898                                    OffsetRegShamt);
5899   }
5900 
5901   return OperandARM32Mem::create(
5902       Func, Ty, BaseVar,
5903       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5904 }
5905 
doAddressOptLoad()5906 void TargetARM32::doAddressOptLoad() {
5907   Inst *Instr = iteratorToInst(Context.getCur());
5908   assert(llvm::isa<InstLoad>(Instr));
5909   Variable *Dest = Instr->getDest();
5910   Operand *Addr = Instr->getSrc(0);
5911   if (OperandARM32Mem *Mem =
5912           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5913     Instr->setDeleted();
5914     Context.insert<InstLoad>(Dest, Mem);
5915   }
5916 }
5917 
lowerPhi(const InstPhi *)5918 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5919   Func->setError("Phi found in regular instruction list");
5920 }
5921 
lowerRet(const InstRet * Instr)5922 void TargetARM32::lowerRet(const InstRet *Instr) {
5923   Variable *Reg = nullptr;
5924   if (Instr->hasRetValue()) {
5925     Operand *Src0 = Instr->getRetValue();
5926     Type Ty = Src0->getType();
5927     if (Ty == IceType_i64) {
5928       Src0 = legalizeUndef(Src0);
5929       Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5930       Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5931       Reg = R0;
5932       Context.insert<InstFakeUse>(R1);
5933     } else if (Ty == IceType_f32) {
5934       Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5935       Reg = S0;
5936     } else if (Ty == IceType_f64) {
5937       Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5938       Reg = D0;
5939     } else if (isVectorType(Src0->getType())) {
5940       Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5941       Reg = Q0;
5942     } else {
5943       Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5944       Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5945       _mov(Reg, Src0F, CondARM32::AL);
5946     }
5947   }
5948   // Add a ret instruction even if sandboxing is enabled, because addEpilog
5949   // explicitly looks for a ret instruction as a marker for where to insert the
5950   // frame removal instructions. addEpilog is responsible for restoring the
5951   // "lr" register as needed prior to this ret instruction.
5952   _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5953 
5954   // Add a fake use of sp to make sure sp stays alive for the entire function.
5955   // Otherwise post-call sp adjustments get dead-code eliminated.
5956   // TODO: Are there more places where the fake use should be inserted? E.g.
5957   // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5958   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5959   Context.insert<InstFakeUse>(SP);
5960 }
5961 
lowerShuffleVector(const InstShuffleVector * Instr)5962 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5963   auto *Dest = Instr->getDest();
5964   const Type DestTy = Dest->getType();
5965 
5966   auto *T = makeReg(DestTy);
5967   auto *Src0 = Instr->getSrc(0);
5968   auto *Src1 = Instr->getSrc(1);
5969   const SizeT NumElements = typeNumElements(DestTy);
5970   const Type ElementType = typeElementType(DestTy);
5971 
5972   bool Replicate = true;
5973   for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) {
5974     if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) {
5975       Replicate = false;
5976     }
5977   }
5978 
5979   if (Replicate) {
5980     Variable *Src0Var = legalizeToReg(Src0);
5981     _vdup(T, Src0Var, Instr->getIndexValue(0));
5982     _mov(Dest, T);
5983     return;
5984   }
5985 
5986   switch (DestTy) {
5987   case IceType_v8i1:
5988   case IceType_v8i16: {
5989     static constexpr SizeT ExpectedNumElements = 8;
5990     assert(ExpectedNumElements == Instr->getNumIndexes());
5991     (void)ExpectedNumElements;
5992 
5993     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) {
5994       Variable *Src0R = legalizeToReg(Src0);
5995       _vzip(T, Src0R, Src0R);
5996       _mov(Dest, T);
5997       return;
5998     }
5999 
6000     if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) {
6001       Variable *Src0R = legalizeToReg(Src0);
6002       Variable *Src1R = legalizeToReg(Src1);
6003       _vzip(T, Src0R, Src1R);
6004       _mov(Dest, T);
6005       return;
6006     }
6007 
6008     if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) {
6009       Variable *Src0R = legalizeToReg(Src0);
6010       _vqmovn2(T, Src0R, Src0R, false, false);
6011       _mov(Dest, T);
6012       return;
6013     }
6014   } break;
6015   case IceType_v16i1:
6016   case IceType_v16i8: {
6017     static constexpr SizeT ExpectedNumElements = 16;
6018     assert(ExpectedNumElements == Instr->getNumIndexes());
6019     (void)ExpectedNumElements;
6020 
6021     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) {
6022       Variable *Src0R = legalizeToReg(Src0);
6023       _vzip(T, Src0R, Src0R);
6024       _mov(Dest, T);
6025       return;
6026     }
6027 
6028     if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7,
6029                           23)) {
6030       Variable *Src0R = legalizeToReg(Src0);
6031       Variable *Src1R = legalizeToReg(Src1);
6032       _vzip(T, Src0R, Src1R);
6033       _mov(Dest, T);
6034       return;
6035     }
6036   } break;
6037   case IceType_v4i1:
6038   case IceType_v4i32:
6039   case IceType_v4f32: {
6040     static constexpr SizeT ExpectedNumElements = 4;
6041     assert(ExpectedNumElements == Instr->getNumIndexes());
6042     (void)ExpectedNumElements;
6043 
6044     if (Instr->indexesAre(0, 0, 1, 1)) {
6045       Variable *Src0R = legalizeToReg(Src0);
6046       _vzip(T, Src0R, Src0R);
6047       _mov(Dest, T);
6048       return;
6049     }
6050 
6051     if (Instr->indexesAre(0, 4, 1, 5)) {
6052       Variable *Src0R = legalizeToReg(Src0);
6053       Variable *Src1R = legalizeToReg(Src1);
6054       _vzip(T, Src0R, Src1R);
6055       _mov(Dest, T);
6056       return;
6057     }
6058 
6059     if (Instr->indexesAre(0, 1, 4, 5)) {
6060       Variable *Src0R = legalizeToReg(Src0);
6061       Variable *Src1R = legalizeToReg(Src1);
6062       _vmovlh(T, Src0R, Src1R);
6063       _mov(Dest, T);
6064       return;
6065     }
6066 
6067     if (Instr->indexesAre(2, 3, 2, 3)) {
6068       Variable *Src0R = legalizeToReg(Src0);
6069       _vmovhl(T, Src0R, Src0R);
6070       _mov(Dest, T);
6071       return;
6072     }
6073 
6074     if (Instr->indexesAre(2, 3, 6, 7)) {
6075       Variable *Src0R = legalizeToReg(Src0);
6076       Variable *Src1R = legalizeToReg(Src1);
6077       _vmovhl(T, Src1R, Src0R);
6078       _mov(Dest, T);
6079       return;
6080     }
6081   } break;
6082   default:
6083     break;
6084     // TODO(jpp): figure out how to properly lower this without scalarization.
6085   }
6086 
6087   // Unoptimized shuffle. Perform a series of inserts and extracts.
6088   Context.insert<InstFakeDef>(T);
6089   for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
6090     auto *Index = Instr->getIndex(I);
6091     const SizeT Elem = Index->getValue();
6092     auto *ExtElmt = makeReg(ElementType);
6093     if (Elem < NumElements) {
6094       lowerExtractElement(
6095           InstExtractElement::create(Func, ExtElmt, Src0, Index));
6096     } else {
6097       lowerExtractElement(InstExtractElement::create(
6098           Func, ExtElmt, Src1,
6099           Ctx->getConstantInt32(Index->getValue() - NumElements)));
6100     }
6101     auto *NewT = makeReg(DestTy);
6102     lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
6103                                                  Ctx->getConstantInt32(I)));
6104     T = NewT;
6105   }
6106   _mov(Dest, T);
6107 }
6108 
lowerSelect(const InstSelect * Instr)6109 void TargetARM32::lowerSelect(const InstSelect *Instr) {
6110   Variable *Dest = Instr->getDest();
6111   Type DestTy = Dest->getType();
6112   Operand *SrcT = Instr->getTrueOperand();
6113   Operand *SrcF = Instr->getFalseOperand();
6114   Operand *Condition = Instr->getCondition();
6115 
6116   if (!isVectorType(DestTy)) {
6117     lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
6118                        legalizeUndef(SrcF));
6119     return;
6120   }
6121 
6122   Type TType = DestTy;
6123   switch (DestTy) {
6124   default:
6125     llvm::report_fatal_error("Unexpected type for vector select.");
6126   case IceType_v4i1:
6127     TType = IceType_v4i32;
6128     break;
6129   case IceType_v8i1:
6130     TType = IceType_v8i16;
6131     break;
6132   case IceType_v16i1:
6133     TType = IceType_v16i8;
6134     break;
6135   case IceType_v4f32:
6136     TType = IceType_v4i32;
6137     break;
6138   case IceType_v4i32:
6139   case IceType_v8i16:
6140   case IceType_v16i8:
6141     break;
6142   }
6143   auto *T = makeReg(TType);
6144   lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
6145   auto *SrcTR = legalizeToReg(SrcT);
6146   auto *SrcFR = legalizeToReg(SrcF);
6147   _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
6148   _mov(Dest, T);
6149 }
6150 
lowerStore(const InstStore * Instr)6151 void TargetARM32::lowerStore(const InstStore *Instr) {
6152   Operand *Value = Instr->getData();
6153   Operand *Addr = Instr->getStoreAddress();
6154   OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
6155   Type Ty = NewAddr->getType();
6156 
6157   if (Ty == IceType_i64) {
6158     Value = legalizeUndef(Value);
6159     Variable *ValueHi = legalizeToReg(hiOperand(Value));
6160     Variable *ValueLo = legalizeToReg(loOperand(Value));
6161     _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
6162     _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
6163   } else {
6164     Variable *ValueR = legalizeToReg(Value);
6165     _str(ValueR, NewAddr);
6166   }
6167 }
6168 
doAddressOptStore()6169 void TargetARM32::doAddressOptStore() {
6170   Inst *Instr = iteratorToInst(Context.getCur());
6171   assert(llvm::isa<InstStore>(Instr));
6172   Operand *Src = Instr->getSrc(0);
6173   Operand *Addr = Instr->getSrc(1);
6174   if (OperandARM32Mem *Mem =
6175           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
6176     Instr->setDeleted();
6177     Context.insert<InstStore>(Src, Mem);
6178   }
6179 }
6180 
lowerSwitch(const InstSwitch * Instr)6181 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
6182   // This implements the most naive possible lowering.
6183   // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
6184   Operand *Src0 = Instr->getComparison();
6185   SizeT NumCases = Instr->getNumCases();
6186   if (Src0->getType() == IceType_i64) {
6187     Src0 = legalizeUndef(Src0);
6188     Variable *Src0Lo = legalizeToReg(loOperand(Src0));
6189     Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
6190     for (SizeT I = 0; I < NumCases; ++I) {
6191       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
6192       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
6193       ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
6194       ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
6195       _cmp(Src0Lo, ValueLo);
6196       _cmp(Src0Hi, ValueHi, CondARM32::EQ);
6197       _br(Instr->getLabel(I), CondARM32::EQ);
6198     }
6199     _br(Instr->getLabelDefault());
6200     return;
6201   }
6202 
6203   Variable *Src0Var = legalizeToReg(Src0);
6204   // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
6205   // reason.
6206   assert(Src0Var->mustHaveReg());
6207   const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
6208   assert(ShiftAmt < 32);
6209   if (ShiftAmt > 0) {
6210     Operand *ShAmtImm = shAmtImm(ShiftAmt);
6211     Variable *T = makeReg(IceType_i32);
6212     _lsl(T, Src0Var, ShAmtImm);
6213     Src0Var = T;
6214   }
6215 
6216   for (SizeT I = 0; I < NumCases; ++I) {
6217     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
6218     Value = legalize(Value, Legal_Reg | Legal_Flex);
6219     _cmp(Src0Var, Value);
6220     _br(Instr->getLabel(I), CondARM32::EQ);
6221   }
6222   _br(Instr->getLabelDefault());
6223 }
6224 
lowerBreakpoint(const InstBreakpoint * Instr)6225 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6226   UnimplementedLoweringError(this, Instr);
6227 }
6228 
lowerUnreachable(const InstUnreachable *)6229 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6230   _trap();
6231 }
6232 
6233 namespace {
6234 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
6235 // and fp constants will need access to the GOT address.
operandNeedsGot(const Operand * Opnd)6236 bool operandNeedsGot(const Operand *Opnd) {
6237   if (llvm::isa<ConstantRelocatable>(Opnd)) {
6238     return true;
6239   }
6240 
6241   if (llvm::isa<ConstantFloat>(Opnd)) {
6242     uint32_t _;
6243     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
6244   }
6245 
6246   const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
6247   if (F64 != nullptr) {
6248     uint32_t _;
6249     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
6250            !isFloatingPointZero(F64);
6251   }
6252 
6253   return false;
6254 }
6255 
6256 // Returns whether Phi needs the GOT address (which it does if any of its
6257 // operands needs the GOT address.)
phiNeedsGot(const InstPhi * Phi)6258 bool phiNeedsGot(const InstPhi *Phi) {
6259   if (Phi->isDeleted()) {
6260     return false;
6261   }
6262 
6263   for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6264     if (operandNeedsGot(Phi->getSrc(I))) {
6265       return true;
6266     }
6267   }
6268 
6269   return false;
6270 }
6271 
6272 // Returns whether **any** phi in Node needs the GOT address.
anyPhiInNodeNeedsGot(CfgNode * Node)6273 bool anyPhiInNodeNeedsGot(CfgNode *Node) {
6274   for (auto &Inst : Node->getPhis()) {
6275     if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
6276       return true;
6277     }
6278   }
6279   return false;
6280 }
6281 
6282 } // end of anonymous namespace
6283 
prelowerPhis()6284 void TargetARM32::prelowerPhis() {
6285   CfgNode *Node = Context.getNode();
6286 
6287   if (SandboxingType == ST_Nonsfi) {
6288     assert(GotPtr != nullptr);
6289     if (anyPhiInNodeNeedsGot(Node)) {
6290       // If any phi instruction needs the GOT address, we place a
6291       //   fake-use GotPtr
6292       // in Node to prevent the GotPtr's initialization from being dead code
6293       // eliminated.
6294       Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
6295     }
6296   }
6297 
6298   PhiLowering::prelowerPhis32Bit(this, Node, Func);
6299 }
6300 
makeVectorOfZeros(Type Ty,RegNumT RegNum)6301 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6302   Variable *Reg = makeReg(Ty, RegNum);
6303   Context.insert<InstFakeDef>(Reg);
6304   assert(isVectorType(Ty));
6305   _veor(Reg, Reg, Reg);
6306   return Reg;
6307 }
6308 
6309 // Helper for legalize() to emit the right code to lower an operand to a
6310 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6311 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6312   Type Ty = Src->getType();
6313   Variable *Reg = makeReg(Ty, RegNum);
6314   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6315     _ldr(Reg, Mem);
6316   } else {
6317     _mov(Reg, Src);
6318   }
6319   return Reg;
6320 }
6321 
6322 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6323 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6324                                RegNumT RegNum) {
6325   Type Ty = From->getType();
6326   // Assert that a physical register is allowed. To date, all calls to
6327   // legalize() allow a physical register. Legal_Flex converts registers to the
6328   // right type OperandARM32FlexReg as needed.
6329   assert(Allowed & Legal_Reg);
6330 
6331   // Copied ipsis literis from TargetX86Base<Machine>.
6332   if (RegNum.hasNoValue()) {
6333     if (Variable *Subst = getContext().availabilityGet(From)) {
6334       // At this point we know there is a potential substitution available.
6335       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6336           !Subst->hasReg()) {
6337         // At this point we know the substitution will have a register.
6338         if (From->getType() == Subst->getType()) {
6339           // At this point we know the substitution's register is compatible.
6340           return Subst;
6341         }
6342       }
6343     }
6344   }
6345 
6346   // Go through the various types of operands: OperandARM32Mem,
6347   // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6348   // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6349   // can always copy to a register.
6350   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6351     // Before doing anything with a Mem operand, we need to ensure that the
6352     // Base and Index components are in physical registers.
6353     Variable *Base = Mem->getBase();
6354     Variable *Index = Mem->getIndex();
6355     ConstantInteger32 *Offset = Mem->getOffset();
6356     assert(Index == nullptr || Offset == nullptr);
6357     Variable *RegBase = nullptr;
6358     Variable *RegIndex = nullptr;
6359     assert(Base);
6360     RegBase = llvm::cast<Variable>(
6361         legalize(Base, Legal_Reg | Legal_Rematerializable));
6362     assert(Ty < MemTraitsSize);
6363     if (Index) {
6364       assert(Offset == nullptr);
6365       assert(MemTraits[Ty].CanHaveIndex);
6366       RegIndex = legalizeToReg(Index);
6367     }
6368     if (Offset && Offset->getValue() != 0) {
6369       assert(Index == nullptr);
6370       static constexpr bool ZeroExt = false;
6371       assert(MemTraits[Ty].CanHaveImm);
6372       if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6373         llvm::report_fatal_error("Invalid memory offset.");
6374       }
6375     }
6376 
6377     // Create a new operand if there was a change.
6378     if (Base != RegBase || Index != RegIndex) {
6379       // There is only a reg +/- reg or reg + imm form.
6380       // Figure out which to re-create.
6381       if (RegIndex) {
6382         Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6383                                       Mem->getShiftOp(), Mem->getShiftAmt(),
6384                                       Mem->getAddrMode());
6385       } else {
6386         Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6387                                       Mem->getAddrMode());
6388       }
6389     }
6390     if (Allowed & Legal_Mem) {
6391       From = Mem;
6392     } else {
6393       Variable *Reg = makeReg(Ty, RegNum);
6394       _ldr(Reg, Mem);
6395       From = Reg;
6396     }
6397     return From;
6398   }
6399 
6400   if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6401     if (!(Allowed & Legal_Flex)) {
6402       if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6403         if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6404           From = FlexReg->getReg();
6405           // Fall through and let From be checked as a Variable below, where it
6406           // may or may not need a register.
6407         } else {
6408           return copyToReg(Flex, RegNum);
6409         }
6410       } else {
6411         return copyToReg(Flex, RegNum);
6412       }
6413     } else {
6414       return From;
6415     }
6416   }
6417 
6418   if (llvm::isa<Constant>(From)) {
6419     if (llvm::isa<ConstantUndef>(From)) {
6420       From = legalizeUndef(From, RegNum);
6421       if (isVectorType(Ty))
6422         return From;
6423     }
6424     // There should be no constants of vector type (other than undef).
6425     assert(!isVectorType(Ty));
6426     if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6427       uint32_t RotateAmt;
6428       uint32_t Immed_8;
6429       uint32_t Value = static_cast<uint32_t>(C32->getValue());
6430       if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6431         // The immediate can be encoded as a Flex immediate. We may return the
6432         // Flex operand if the caller has Allow'ed it.
6433         auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6434         const bool CanBeFlex = Allowed & Legal_Flex;
6435         if (CanBeFlex)
6436           return OpF;
6437         return copyToReg(OpF, RegNum);
6438       } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6439                                                  &Immed_8)) {
6440         // Even though the immediate can't be encoded as a Flex operand, its
6441         // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6442         // constant with a single instruction.
6443         auto *InvOpF =
6444             OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6445         Variable *Reg = makeReg(Ty, RegNum);
6446         _mvn(Reg, InvOpF);
6447         return Reg;
6448       } else {
6449         // Do a movw/movt to a register.
6450         Variable *Reg = makeReg(Ty, RegNum);
6451         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6452         _movw(Reg,
6453               UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6454         if (UpperBits != 0) {
6455           _movt(Reg, Ctx->getConstantInt32(UpperBits));
6456         }
6457         return Reg;
6458       }
6459     } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6460       Variable *Reg = makeReg(Ty, RegNum);
6461       if (SandboxingType != ST_Nonsfi) {
6462         _movw(Reg, C);
6463         _movt(Reg, C);
6464       } else {
6465         auto *GotAddr = legalizeToReg(GotPtr);
6466         GlobalString CGotoffName = createGotoffRelocation(C);
6467         loadNamedConstantRelocatablePIC(
6468             CGotoffName, Reg, [this, Reg](Variable *PC) {
6469               _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
6470             });
6471         _add(Reg, GotAddr, Reg);
6472       }
6473       return Reg;
6474     } else {
6475       assert(isScalarFloatingType(Ty));
6476       uint32_t ModifiedImm;
6477       if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6478         Variable *T = makeReg(Ty, RegNum);
6479         _mov(T,
6480              OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6481         return T;
6482       }
6483 
6484       if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6485         // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6486         // because ARM does not have a veor instruction with S registers.
6487         Variable *T = makeReg(IceType_f64, RegNum);
6488         Context.insert<InstFakeDef>(T);
6489         _veor(T, T, T);
6490         return T;
6491       }
6492 
6493       // Load floats/doubles from literal pool.
6494       auto *CFrom = llvm::cast<Constant>(From);
6495       assert(CFrom->getShouldBePooled());
6496       Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6497       Variable *BaseReg = nullptr;
6498       if (SandboxingType == ST_Nonsfi) {
6499         // vldr does not support the [base, index] addressing mode, so we need
6500         // to legalize Offset to a register. Otherwise, we could simply
6501         //   vldr dest, [got, reg(Offset)]
6502         BaseReg = legalizeToReg(Offset);
6503       } else {
6504         BaseReg = makeReg(getPointerType());
6505         _movw(BaseReg, Offset);
6506         _movt(BaseReg, Offset);
6507       }
6508       From = formMemoryOperand(BaseReg, Ty);
6509       return copyToReg(From, RegNum);
6510     }
6511   }
6512 
6513   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6514     if (Var->isRematerializable()) {
6515       if (Allowed & Legal_Rematerializable) {
6516         return From;
6517       }
6518 
6519       Variable *T = makeReg(Var->getType(), RegNum);
6520       _mov(T, Var);
6521       return T;
6522     }
6523     // Check if the variable is guaranteed a physical register. This can happen
6524     // either when the variable is pre-colored or when it is assigned infinite
6525     // weight.
6526     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6527     // We need a new physical register for the operand if:
6528     //   Mem is not allowed and Var isn't guaranteed a physical
6529     //   register, or
6530     //   RegNum is required and Var->getRegNum() doesn't match.
6531     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6532         (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6533       From = copyToReg(From, RegNum);
6534     }
6535     return From;
6536   }
6537   llvm::report_fatal_error("Unhandled operand kind in legalize()");
6538 
6539   return From;
6540 }
6541 
6542 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6543 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6544   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6545 }
6546 
6547 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6548 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6549   Type Ty = From->getType();
6550   if (llvm::isa<ConstantUndef>(From)) {
6551     // Lower undefs to zero. Another option is to lower undefs to an
6552     // uninitialized register; however, using an uninitialized register results
6553     // in less predictable code.
6554     //
6555     // If in the future the implementation is changed to lower undef values to
6556     // uninitialized registers, a FakeDef will be needed:
6557     // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6558     // ensure that the live range of Reg is not overestimated. If the constant
6559     // being lowered is a 64 bit value, then the result should be split and the
6560     // lo and hi components will need to go in uninitialized registers.
6561     if (isVectorType(Ty))
6562       return makeVectorOfZeros(Ty, RegNum);
6563     return Ctx->getConstantZero(Ty);
6564   }
6565   return From;
6566 }
6567 
formMemoryOperand(Operand * Operand,Type Ty)6568 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6569   auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6570   // It may be the case that address mode optimization already creates an
6571   // OperandARM32Mem, so in that case it wouldn't need another level of
6572   // transformation.
6573   if (Mem) {
6574     return llvm::cast<OperandARM32Mem>(legalize(Mem));
6575   }
6576   // If we didn't do address mode optimization, then we only have a
6577   // base/offset to work with. ARM always requires a base register, so
6578   // just use that to hold the operand.
6579   auto *Base = llvm::cast<Variable>(
6580       legalize(Operand, Legal_Reg | Legal_Rematerializable));
6581   return OperandARM32Mem::create(
6582       Func, Ty, Base,
6583       llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6584 }
6585 
makeI64RegPair()6586 Variable64On32 *TargetARM32::makeI64RegPair() {
6587   Variable64On32 *Reg =
6588       llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6589   Reg->setMustHaveReg();
6590   Reg->initHiLo(Func);
6591   Reg->getLo()->setMustNotHaveReg();
6592   Reg->getHi()->setMustNotHaveReg();
6593   return Reg;
6594 }
6595 
makeReg(Type Type,RegNumT RegNum)6596 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6597   // There aren't any 64-bit integer registers for ARM32.
6598   assert(Type != IceType_i64);
6599   assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6600   Variable *Reg = Func->makeVariable(Type);
6601   if (RegNum.hasValue())
6602     Reg->setRegNum(RegNum);
6603   else
6604     Reg->setMustHaveReg();
6605   return Reg;
6606 }
6607 
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6608 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6609                                     RegNumT TmpRegNum) {
6610   assert(llvm::isPowerOf2_32(Align));
6611   uint32_t RotateAmt;
6612   uint32_t Immed_8;
6613   Operand *Mask;
6614   // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6615   // it fits at all). Assume Align is usually small, in which case BIC works
6616   // better. Thus, this rounds down to the alignment.
6617   if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6618     Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6619                     TmpRegNum);
6620     _bic(Reg, Reg, Mask);
6621   } else {
6622     Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6623                     TmpRegNum);
6624     _and(Reg, Reg, Mask);
6625   }
6626 }
6627 
postLower()6628 void TargetARM32::postLower() {
6629   if (Func->getOptLevel() == Opt_m1)
6630     return;
6631   markRedefinitions();
6632   Context.availabilityUpdate();
6633 }
6634 
emit(const ConstantInteger32 * C) const6635 void TargetARM32::emit(const ConstantInteger32 *C) const {
6636   if (!BuildDefs::dump())
6637     return;
6638   Ostream &Str = Ctx->getStrEmit();
6639   Str << "#" << C->getValue();
6640 }
6641 
emit(const ConstantInteger64 *) const6642 void TargetARM32::emit(const ConstantInteger64 *) const {
6643   llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6644 }
6645 
emit(const ConstantFloat * C) const6646 void TargetARM32::emit(const ConstantFloat *C) const {
6647   (void)C;
6648   UnimplementedError(getFlags());
6649 }
6650 
emit(const ConstantDouble * C) const6651 void TargetARM32::emit(const ConstantDouble *C) const {
6652   (void)C;
6653   UnimplementedError(getFlags());
6654 }
6655 
emit(const ConstantUndef *) const6656 void TargetARM32::emit(const ConstantUndef *) const {
6657   llvm::report_fatal_error("undef value encountered by emitter.");
6658 }
6659 
emit(const ConstantRelocatable * C) const6660 void TargetARM32::emit(const ConstantRelocatable *C) const {
6661   if (!BuildDefs::dump())
6662     return;
6663   Ostream &Str = Ctx->getStrEmit();
6664   Str << "#";
6665   emitWithoutPrefix(C);
6666 }
6667 
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6668 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6669                                      Operand *TrueValue, Operand *FalseValue) {
6670   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6671 
6672   assert(Boolean->getType() == IceType_i1);
6673 
6674   bool NeedsAnd1 = false;
6675   if (TrueValue->getType() == IceType_i1) {
6676     assert(FalseValue->getType() == IceType_i1);
6677 
6678     Variable *TrueValueV = Func->makeVariable(IceType_i1);
6679     SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6680     TrueValue = TrueValueV;
6681 
6682     Variable *FalseValueV = Func->makeVariable(IceType_i1);
6683     SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6684     FalseValue = FalseValueV;
6685 
6686     NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6687   }
6688 
6689   Variable *DestLo = (Dest->getType() == IceType_i64)
6690                          ? llvm::cast<Variable>(loOperand(Dest))
6691                          : Dest;
6692   Variable *DestHi = (Dest->getType() == IceType_i64)
6693                          ? llvm::cast<Variable>(hiOperand(Dest))
6694                          : nullptr;
6695   Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6696                               ? loOperand(FalseValue)
6697                               : FalseValue;
6698   Operand *FalseValueHi =
6699       (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6700 
6701   Operand *TrueValueLo =
6702       (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6703   Operand *TrueValueHi =
6704       (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6705 
6706   Variable *T_Lo = makeReg(DestLo->getType());
6707   Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6708 
6709   _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6710   if (DestHi) {
6711     _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6712   }
6713 
6714   CondWhenTrue Cond(CondARM32::kNone);
6715   // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6716   // add an explicit _tst instruction below.
6717   bool FlagsWereSet = false;
6718   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6719     switch (Producer->getKind()) {
6720     default:
6721       llvm::report_fatal_error("Unexpected producer.");
6722     case Inst::Icmp: {
6723       Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6724       FlagsWereSet = true;
6725     } break;
6726     case Inst::Fcmp: {
6727       Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6728       FlagsWereSet = true;
6729     } break;
6730     case Inst::Cast: {
6731       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6732       assert(CastProducer->getCastKind() == InstCast::Trunc);
6733       Boolean = CastProducer->getSrc(0);
6734       // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6735       // bother legalizing Src to a Reg because it will be legalized before
6736       // emitting the tst instruction.
6737       FlagsWereSet = false;
6738     } break;
6739     case Inst::Arithmetic: {
6740       // This is a special case: we eagerly assumed Producer could be folded,
6741       // but in reality, it can't. No reason to panic: we just lower it using
6742       // the regular lowerArithmetic helper.
6743       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6744       lowerArithmetic(ArithProducer);
6745       Boolean = ArithProducer->getDest();
6746       // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6747       // bother legalizing Dest to a Reg because it will be legalized before
6748       // emitting  the tst instruction.
6749       FlagsWereSet = false;
6750     } break;
6751     }
6752   }
6753 
6754   if (!FlagsWereSet) {
6755     // No flags have been set, so emit a tst Boolean, 1.
6756     Variable *Src = legalizeToReg(Boolean);
6757     _tst(Src, _1);
6758     Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6759   }
6760 
6761   if (Cond.WhenTrue0 == CondARM32::kNone) {
6762     assert(Cond.WhenTrue1 == CondARM32::kNone);
6763   } else {
6764     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6765                    Cond.WhenTrue0);
6766     if (DestHi) {
6767       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6768                      Cond.WhenTrue0);
6769     }
6770   }
6771 
6772   if (Cond.WhenTrue1 != CondARM32::kNone) {
6773     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6774                    Cond.WhenTrue1);
6775     if (DestHi) {
6776       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6777                      Cond.WhenTrue1);
6778     }
6779   }
6780 
6781   if (NeedsAnd1) {
6782     // We lowered something that is unsafe (i.e., can't provably be zero or
6783     // one). Truncate the result.
6784     _and(T_Lo, T_Lo, _1);
6785   }
6786 
6787   _mov(DestLo, T_Lo);
6788   if (DestHi) {
6789     _mov(DestHi, T_Hi);
6790   }
6791 }
6792 
lowerInt1(Variable * Dest,Operand * Boolean)6793 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6794                                                   Operand *Boolean) {
6795   assert(Boolean->getType() == IceType_i1);
6796   Variable *T = makeReg(IceType_i1);
6797   Operand *_0 =
6798       legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6799   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6800 
6801   SafeBoolChain Safe = SBC_Yes;
6802   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6803     switch (Producer->getKind()) {
6804     default:
6805       llvm::report_fatal_error("Unexpected producer.");
6806     case Inst::Icmp: {
6807       _mov(T, _0);
6808       CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6809       assert(Cond.WhenTrue0 != CondARM32::AL);
6810       assert(Cond.WhenTrue0 != CondARM32::kNone);
6811       assert(Cond.WhenTrue1 == CondARM32::kNone);
6812       _mov_redefined(T, _1, Cond.WhenTrue0);
6813     } break;
6814     case Inst::Fcmp: {
6815       _mov(T, _0);
6816       Inst *MovZero = Context.getLastInserted();
6817       CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6818       if (Cond.WhenTrue0 == CondARM32::AL) {
6819         assert(Cond.WhenTrue1 == CondARM32::kNone);
6820         MovZero->setDeleted();
6821         _mov(T, _1);
6822       } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6823         _mov_redefined(T, _1, Cond.WhenTrue0);
6824       }
6825       if (Cond.WhenTrue1 != CondARM32::kNone) {
6826         assert(Cond.WhenTrue0 != CondARM32::kNone);
6827         assert(Cond.WhenTrue0 != CondARM32::AL);
6828         _mov_redefined(T, _1, Cond.WhenTrue1);
6829       }
6830     } break;
6831     case Inst::Cast: {
6832       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6833       assert(CastProducer->getCastKind() == InstCast::Trunc);
6834       Operand *Src = CastProducer->getSrc(0);
6835       if (Src->getType() == IceType_i64)
6836         Src = loOperand(Src);
6837       _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6838       Safe = SBC_No;
6839     } break;
6840     case Inst::Arithmetic: {
6841       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6842       Safe = lowerInt1Arithmetic(ArithProducer);
6843       _mov(T, ArithProducer->getDest());
6844     } break;
6845     }
6846   } else {
6847     _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6848   }
6849 
6850   _mov(Dest, T);
6851   return Safe;
6852 }
6853 
6854 namespace {
6855 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6856 bool shouldTrackProducer(const Inst &Instr) {
6857   switch (Instr.getKind()) {
6858   default:
6859     return false;
6860   case Inst::Icmp:
6861   case Inst::Fcmp:
6862     return true;
6863   case Inst::Cast: {
6864     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6865     default:
6866       return false;
6867     case InstCast::Trunc:
6868       return true;
6869     }
6870   }
6871   case Inst::Arithmetic: {
6872     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6873     default:
6874       return false;
6875     case InstArithmetic::And:
6876     case InstArithmetic::Or:
6877       return true;
6878     }
6879   }
6880   }
6881 }
6882 
isValidConsumer(const Inst & Instr)6883 bool isValidConsumer(const Inst &Instr) {
6884   switch (Instr.getKind()) {
6885   default:
6886     return false;
6887   case Inst::Br:
6888     return true;
6889   case Inst::Select:
6890     return !isVectorType(Instr.getDest()->getType());
6891   case Inst::Cast: {
6892     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6893     default:
6894       return false;
6895     case InstCast::Sext:
6896       return !isVectorType(Instr.getDest()->getType());
6897     case InstCast::Zext:
6898       return !isVectorType(Instr.getDest()->getType());
6899     }
6900   }
6901   case Inst::Arithmetic: {
6902     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6903     default:
6904       return false;
6905     case InstArithmetic::And:
6906       return !isVectorType(Instr.getDest()->getType());
6907     case InstArithmetic::Or:
6908       return !isVectorType(Instr.getDest()->getType());
6909     }
6910   }
6911   }
6912 }
6913 } // end of namespace BoolFolding
6914 
6915 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6916 bool shouldTrackProducer(const Inst &Instr) {
6917   switch (Instr.getKind()) {
6918   default:
6919     return false;
6920   case Inst::Arithmetic: {
6921     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6922     default:
6923       return false;
6924     case InstArithmetic::Fmul:
6925       return true;
6926     }
6927   }
6928   }
6929 }
6930 
isValidConsumer(const Inst & Instr)6931 bool isValidConsumer(const Inst &Instr) {
6932   switch (Instr.getKind()) {
6933   default:
6934     return false;
6935   case Inst::Arithmetic: {
6936     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6937     default:
6938       return false;
6939     case InstArithmetic::Fadd:
6940     case InstArithmetic::Fsub:
6941       return true;
6942     }
6943   }
6944   }
6945 }
6946 } // end of namespace FpFolding
6947 
6948 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6949 bool shouldTrackProducer(const Inst &Instr) {
6950   switch (Instr.getKind()) {
6951   default:
6952     return false;
6953   case Inst::Arithmetic: {
6954     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6955     default:
6956       return false;
6957     case InstArithmetic::Mul:
6958       return true;
6959     }
6960   }
6961   }
6962 }
6963 
isValidConsumer(const Inst & Instr)6964 bool isValidConsumer(const Inst &Instr) {
6965   switch (Instr.getKind()) {
6966   default:
6967     return false;
6968   case Inst::Arithmetic: {
6969     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6970     default:
6971       return false;
6972     case InstArithmetic::Add:
6973     case InstArithmetic::Sub:
6974       return true;
6975     }
6976   }
6977   }
6978 }
6979 } // namespace IntFolding
6980 } // end of anonymous namespace
6981 
recordProducers(CfgNode * Node)6982 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
6983   for (Inst &Instr : Node->getInsts()) {
6984     // Check whether Instr is a valid producer.
6985     Variable *Dest = Instr.getDest();
6986     if (!Instr.isDeleted() // only consider non-deleted instructions; and
6987         && Dest            // only instructions with an actual dest var; and
6988         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6989         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6990       KnownComputations.emplace(Dest->getIndex(),
6991                                 ComputationEntry(&Instr, IceType_i1));
6992     }
6993     if (!Instr.isDeleted() // only consider non-deleted instructions; and
6994         && Dest            // only instructions with an actual dest var; and
6995         && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
6996         && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6997       KnownComputations.emplace(Dest->getIndex(),
6998                                 ComputationEntry(&Instr, Dest->getType()));
6999     }
7000     if (!Instr.isDeleted() // only consider non-deleted instructions; and
7001         && Dest            // only instructions with an actual dest var; and
7002         && Dest->getType() == IceType_i32            // i32 only dest vars; and
7003         && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7004       KnownComputations.emplace(Dest->getIndex(),
7005                                 ComputationEntry(&Instr, IceType_i32));
7006     }
7007     // Check each src variable against the map.
7008     FOREACH_VAR_IN_INST(Var, Instr) {
7009       SizeT VarNum = Var->getIndex();
7010       auto ComputationIter = KnownComputations.find(VarNum);
7011       if (ComputationIter == KnownComputations.end()) {
7012         continue;
7013       }
7014 
7015       ++ComputationIter->second.NumUses;
7016       switch (ComputationIter->second.ComputationType) {
7017       default:
7018         KnownComputations.erase(VarNum);
7019         continue;
7020       case IceType_i1:
7021         if (!BoolFolding::isValidConsumer(Instr)) {
7022           KnownComputations.erase(VarNum);
7023           continue;
7024         }
7025         break;
7026       case IceType_i32:
7027         if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
7028           KnownComputations.erase(VarNum);
7029           continue;
7030         }
7031         break;
7032       case IceType_f32:
7033       case IceType_f64:
7034         if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
7035           KnownComputations.erase(VarNum);
7036           continue;
7037         }
7038         break;
7039       }
7040 
7041       if (Instr.isLastUse(Var)) {
7042         ComputationIter->second.IsLiveOut = false;
7043       }
7044     }
7045   }
7046 
7047   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
7048        Iter != End;) {
7049     // Disable the folding if its dest may be live beyond this block.
7050     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
7051       Iter = KnownComputations.erase(Iter);
7052       continue;
7053     }
7054 
7055     // Mark as "dead" rather than outright deleting. This is so that other
7056     // peephole style optimizations during or before lowering have access to
7057     // this instruction in undeleted form. See for example
7058     // tryOptimizedCmpxchgCmpBr().
7059     Iter->second.Instr->setDead();
7060     ++Iter;
7061   }
7062 }
7063 
Sandboxer(TargetARM32 * Target,InstBundleLock::Option BundleOption)7064 TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target,
7065                                   InstBundleLock::Option BundleOption)
7066     : Target(Target), BundleOption(BundleOption) {}
7067 
~Sandboxer()7068 TargetARM32::Sandboxer::~Sandboxer() {}
7069 
7070 namespace {
indirectBranchBicMask(Cfg * Func)7071 OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) {
7072   constexpr uint32_t Imm8 = 0xFC; // 0xC000000F
7073   constexpr uint32_t RotateAmt = 2;
7074   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7075 }
7076 
memOpBicMask(Cfg * Func)7077 OperandARM32FlexImm *memOpBicMask(Cfg *Func) {
7078   constexpr uint32_t Imm8 = 0x0C; // 0xC0000000
7079   constexpr uint32_t RotateAmt = 2;
7080   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7081 }
7082 
baseNeedsBic(Variable * Base)7083 static bool baseNeedsBic(Variable *Base) {
7084   return Base->getRegNum() != RegARM32::Reg_r9 &&
7085          Base->getRegNum() != RegARM32::Reg_sp;
7086 }
7087 } // end of anonymous namespace
7088 
createAutoBundle()7089 void TargetARM32::Sandboxer::createAutoBundle() {
7090   Bundler = makeUnique<AutoBundle>(Target, BundleOption);
7091 }
7092 
add_sp(Operand * AddAmount)7093 void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) {
7094   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7095   if (!Target->NeedSandboxing) {
7096     Target->_add(SP, SP, AddAmount);
7097     return;
7098   }
7099   createAutoBundle();
7100   Target->_add(SP, SP, AddAmount);
7101   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7102 }
7103 
align_sp(size_t Alignment)7104 void TargetARM32::Sandboxer::align_sp(size_t Alignment) {
7105   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7106   if (!Target->NeedSandboxing) {
7107     Target->alignRegisterPow2(SP, Alignment);
7108     return;
7109   }
7110   createAutoBundle();
7111   Target->alignRegisterPow2(SP, Alignment);
7112   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7113 }
7114 
bl(Variable * ReturnReg,Operand * CallTarget)7115 InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg,
7116                                           Operand *CallTarget) {
7117   if (Target->NeedSandboxing) {
7118     createAutoBundle();
7119     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
7120       Target->_bic(CallTargetR, CallTargetR,
7121                    indirectBranchBicMask(Target->Func));
7122     }
7123   }
7124   return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget);
7125 }
7126 
ldr(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7127 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem,
7128                                  CondARM32::Cond Pred) {
7129   Variable *MemBase = Mem->getBase();
7130   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7131     createAutoBundle();
7132     assert(!Mem->isRegReg());
7133     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7134   }
7135   Target->_ldr(Dest, Mem, Pred);
7136 }
7137 
ldrex(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7138 void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem,
7139                                    CondARM32::Cond Pred) {
7140   Variable *MemBase = Mem->getBase();
7141   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7142     createAutoBundle();
7143     assert(!Mem->isRegReg());
7144     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7145   }
7146   Target->_ldrex(Dest, Mem, Pred);
7147 }
7148 
reset_sp(Variable * Src)7149 void TargetARM32::Sandboxer::reset_sp(Variable *Src) {
7150   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7151   if (!Target->NeedSandboxing) {
7152     Target->_mov_redefined(SP, Src);
7153     return;
7154   }
7155   createAutoBundle();
7156   Target->_mov_redefined(SP, Src);
7157   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7158 }
7159 
ret(Variable * RetAddr,Variable * RetValue)7160 void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
7161   if (Target->NeedSandboxing) {
7162     createAutoBundle();
7163     Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func));
7164   }
7165   Target->_ret(RetAddr, RetValue);
7166 }
7167 
str(Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7168 void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem,
7169                                  CondARM32::Cond Pred) {
7170   Variable *MemBase = Mem->getBase();
7171   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7172     createAutoBundle();
7173     assert(!Mem->isRegReg());
7174     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7175   }
7176   Target->_str(Src, Mem, Pred);
7177 }
7178 
strex(Variable * Dest,Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7179 void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src,
7180                                    OperandARM32Mem *Mem, CondARM32::Cond Pred) {
7181   Variable *MemBase = Mem->getBase();
7182   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7183     createAutoBundle();
7184     assert(!Mem->isRegReg());
7185     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7186   }
7187   Target->_strex(Dest, Src, Mem, Pred);
7188 }
7189 
sub_sp(Operand * SubAmount)7190 void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) {
7191   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7192   if (!Target->NeedSandboxing) {
7193     Target->_sub(SP, SP, SubAmount);
7194     return;
7195   }
7196   createAutoBundle();
7197   Target->_sub(SP, SP, SubAmount);
7198   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7199 }
7200 
TargetDataARM32(GlobalContext * Ctx)7201 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
7202     : TargetDataLowering(Ctx) {}
7203 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)7204 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
7205                                    const std::string &SectionSuffix) {
7206   const bool IsPIC = getFlags().getUseNonsfi();
7207   switch (getFlags().getOutFileType()) {
7208   case FT_Elf: {
7209     ELFObjectWriter *Writer = Ctx->getObjectWriter();
7210     Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
7211                              IsPIC);
7212   } break;
7213   case FT_Asm:
7214   case FT_Iasm: {
7215     OstreamLocker _(Ctx);
7216     for (const VariableDeclaration *Var : Vars) {
7217       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
7218         emitGlobal(*Var, SectionSuffix);
7219       }
7220     }
7221   } break;
7222   }
7223 }
7224 
7225 namespace {
7226 template <typename T> struct ConstantPoolEmitterTraits;
7227 
7228 static_assert(sizeof(uint64_t) == 8,
7229               "uint64_t is supposed to be 8 bytes wide.");
7230 
7231 // TODO(jpp): implement the following when implementing constant randomization:
7232 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
7233 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
7234 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
7235 template <> struct ConstantPoolEmitterTraits<float> {
7236   using ConstantType = ConstantFloat;
7237   static constexpr Type IceType = IceType_f32;
7238   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
7239   // about them being constexpr.
7240   static const char AsmTag[];
7241   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anone162c0691e11::ConstantPoolEmitterTraits7242   static uint64_t bitcastToUint64(float Value) {
7243     static_assert(sizeof(Value) == sizeof(uint32_t),
7244                   "Float should be 4 bytes.");
7245     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
7246     return static_cast<uint64_t>(IntValue);
7247   }
7248 };
7249 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
7250 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
7251 
7252 template <> struct ConstantPoolEmitterTraits<double> {
7253   using ConstantType = ConstantDouble;
7254   static constexpr Type IceType = IceType_f64;
7255   static const char AsmTag[];
7256   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anone162c0691e11::ConstantPoolEmitterTraits7257   static uint64_t bitcastToUint64(double Value) {
7258     static_assert(sizeof(double) == sizeof(uint64_t),
7259                   "Double should be 8 bytes.");
7260     return Utils::bitCopy<uint64_t>(Value);
7261   }
7262 };
7263 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
7264 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
7265 
7266 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)7267 void emitConstant(
7268     Ostream &Str,
7269     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
7270   using Traits = ConstantPoolEmitterTraits<T>;
7271   Str << Const->getLabelName();
7272   Str << ":\n\t" << Traits::AsmTag << "\t0x";
7273   T Value = Const->getValue();
7274   Str.write_hex(Traits::bitcastToUint64(Value));
7275   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
7276 }
7277 
emitConstantPool(GlobalContext * Ctx)7278 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
7279   if (!BuildDefs::dump()) {
7280     return;
7281   }
7282 
7283   using Traits = ConstantPoolEmitterTraits<T>;
7284   static constexpr size_t MinimumAlignment = 4;
7285   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
7286   assert((Align % 4) == 0 && "Constants should be aligned");
7287   Ostream &Str = Ctx->getStrEmit();
7288   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
7289 
7290   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
7291       << "\n"
7292       << "\t.align\t" << Align << "\n";
7293 
7294   for (Constant *C : Pool) {
7295     if (!C->getShouldBePooled()) {
7296       continue;
7297     }
7298 
7299     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
7300   }
7301 }
7302 } // end of anonymous namespace
7303 
lowerConstants()7304 void TargetDataARM32::lowerConstants() {
7305   if (getFlags().getDisableTranslation())
7306     return;
7307   switch (getFlags().getOutFileType()) {
7308   case FT_Elf: {
7309     ELFObjectWriter *Writer = Ctx->getObjectWriter();
7310     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7311     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7312   } break;
7313   case FT_Asm:
7314   case FT_Iasm: {
7315     OstreamLocker _(Ctx);
7316     emitConstantPool<float>(Ctx);
7317     emitConstantPool<double>(Ctx);
7318     break;
7319   }
7320   }
7321 }
7322 
lowerJumpTables()7323 void TargetDataARM32::lowerJumpTables() {
7324   if (getFlags().getDisableTranslation())
7325     return;
7326   switch (getFlags().getOutFileType()) {
7327   case FT_Elf:
7328     if (!Ctx->getJumpTables().empty()) {
7329       llvm::report_fatal_error("ARM32 does not support jump tables yet.");
7330     }
7331     break;
7332   case FT_Asm:
7333     // Already emitted from Cfg
7334     break;
7335   case FT_Iasm: {
7336     // TODO(kschimpf): Fill this in when we get more information.
7337     break;
7338   }
7339   }
7340 }
7341 
TargetHeaderARM32(GlobalContext * Ctx)7342 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
7343     : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
7344 
lower()7345 void TargetHeaderARM32::lower() {
7346   OstreamLocker _(Ctx);
7347   Ostream &Str = Ctx->getStrEmit();
7348   Str << ".syntax unified\n";
7349   // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
7350   // "Addenda to, and Errata in the ABI for the ARM architecture"
7351   // http://infocenter.arm.com
7352   //                  /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
7353   //
7354   // Tag_conformance should be be emitted first in a file-scope sub-subsection
7355   // of the first public subsection of the attributes.
7356   Str << ".eabi_attribute 67, \"2.09\"      @ Tag_conformance\n";
7357   // Chromebooks are at least A15, but do A9 for higher compat. For some
7358   // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
7359   // specified on the commandline. So to test hwdiv, we need to set the .cpu
7360   // directive higher (can't just rely on --mattr=...).
7361   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7362     Str << ".cpu    cortex-a15\n";
7363   } else {
7364     Str << ".cpu    cortex-a9\n";
7365   }
7366   Str << ".eabi_attribute 6, 10   @ Tag_CPU_arch: ARMv7\n"
7367       << ".eabi_attribute 7, 65   @ Tag_CPU_arch_profile: App profile\n";
7368   Str << ".eabi_attribute 8, 1    @ Tag_ARM_ISA_use: Yes\n"
7369       << ".eabi_attribute 9, 2    @ Tag_THUMB_ISA_use: Thumb-2\n";
7370   Str << ".fpu    neon\n"
7371       << ".eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use: permit directly\n"
7372       << ".eabi_attribute 20, 1   @ Tag_ABI_FP_denormal\n"
7373       << ".eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions\n"
7374       << ".eabi_attribute 23, 3   @ Tag_ABI_FP_number_model: IEEE 754\n"
7375       << ".eabi_attribute 34, 1   @ Tag_CPU_unaligned_access\n"
7376       << ".eabi_attribute 24, 1   @ Tag_ABI_align_needed: 8-byte\n"
7377       << ".eabi_attribute 25, 1   @ Tag_ABI_align_preserved: 8-byte\n"
7378       << ".eabi_attribute 28, 1   @ Tag_ABI_VFP_args\n"
7379       << ".eabi_attribute 36, 1   @ Tag_FP_HP_extension\n"
7380       << ".eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format\n"
7381       << ".eabi_attribute 42, 1   @ Tag_MPextension_use\n"
7382       << ".eabi_attribute 68, 1   @ Tag_Virtualization_use\n";
7383   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7384     Str << ".eabi_attribute 44, 2   @ Tag_DIV_use\n";
7385   }
7386   // Technically R9 is used for TLS with Sandboxing, and we reserve it.
7387   // However, for compatibility with current NaCl LLVM, don't claim that.
7388   Str << ".eabi_attribute 14, 3   @ Tag_ABI_PCS_R9_use: Not used\n";
7389 }
7390 
7391 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
7392 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
7393 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
7394 
7395 } // end of namespace ARM32
7396 } // end of namespace Ice
7397