1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40 return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55 if (Ice::getFlags().getUseNonsfi()) {
56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
57 // globals. The GOT is an external symbol (i.e., it is not defined in the
58 // pexe) so we need to register it as such so that ELF emission won't barf
59 // on an "unknown" symbol. The GOT is added to the External symbols list
60 // here because staticInit() is invoked in a single-thread context.
61 Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
62 }
63 }
64
shouldBePooled(const::Ice::Constant * C)65 bool shouldBePooled(const ::Ice::Constant *C) {
66 return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
67 }
68
getPointerType()69 ::Ice::Type getPointerType() {
70 return ::Ice::ARM32::TargetARM32::getPointerType();
71 }
72
73 } // end of namespace ARM32
74
75 namespace Ice {
76 namespace ARM32 {
77
78 namespace {
79
80 /// SizeOf is used to obtain the size of an initializer list as a constexpr
81 /// expression. This is only needed until our C++ library is updated to
82 /// C++ 14 -- which defines constexpr members to std::initializer_list.
83 class SizeOf {
84 SizeOf(const SizeOf &) = delete;
85 SizeOf &operator=(const SizeOf &) = delete;
86
87 public:
SizeOf()88 constexpr SizeOf() : Size(0) {}
89 template <typename... T>
SizeOf(T...)90 explicit constexpr SizeOf(T...)
91 : Size(__length<T...>::value) {}
size() const92 constexpr SizeT size() const { return Size; }
93
94 private:
95 template <typename T, typename... U> struct __length {
96 static constexpr std::size_t value = 1 + __length<U...>::value;
97 };
98
99 template <typename T> struct __length<T> {
100 static constexpr std::size_t value = 1;
101 };
102
103 const std::size_t Size;
104 };
105
106 } // end of anonymous namespace
107
108 // Defines the RegARM32::Table table with register information.
109 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
110 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
111 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
112 { \
113 name, encode, cc_arg, scratch, preserved, stackptr, frameptr, isGPR, \
114 isInt, isI64Pair, isFP32, isFP64, isVec128, \
115 (SizeOf alias_init).size(), alias_init \
116 } \
117 ,
118 REGARM32_TABLE
119 #undef X
120 };
121
122 namespace {
123
124 // The following table summarizes the logic for lowering the icmp instruction
125 // for i32 and narrower types. Each icmp condition has a clear mapping to an
126 // ARM32 conditional move instruction.
127
128 const struct TableIcmp32_ {
129 CondARM32::Cond Mapping;
130 } TableIcmp32[] = {
131 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
132 { CondARM32::C_32 } \
133 ,
134 ICMPARM32_TABLE
135 #undef X
136 };
137
138 // The following table summarizes the logic for lowering the icmp instruction
139 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
140 // The operands may need to be swapped, and there is a slight difference for
141 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
142 const struct TableIcmp64_ {
143 bool IsSigned;
144 bool Swapped;
145 CondARM32::Cond C1, C2;
146 } TableIcmp64[] = {
147 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
148 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
149 ,
150 ICMPARM32_TABLE
151 #undef X
152 };
153
getIcmp32Mapping(InstIcmp::ICond Cond)154 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
155 assert(Cond < llvm::array_lengthof(TableIcmp32));
156 return TableIcmp32[Cond].Mapping;
157 }
158
159 // In some cases, there are x-macros tables for both high-level and low-level
160 // instructions/operands that use the same enum key value. The tables are kept
161 // separate to maintain a proper separation between abstraction layers. There
162 // is a risk that the tables could get out of sync if enum values are reordered
163 // or if entries are added or deleted. The following anonymous namespaces use
164 // static_asserts to ensure everything is kept in sync.
165
166 // Validate the enum values in ICMPARM32_TABLE.
167 namespace {
168 // Define a temporary set of enum values based on low-level table entries.
169 enum _icmp_ll_enum {
170 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
171 _icmp_ll_##val,
172 ICMPARM32_TABLE
173 #undef X
174 _num
175 };
176 // Define a set of constants based on high-level table entries.
177 #define X(tag, reverse, str) \
178 static constexpr int _icmp_hl_##tag = InstIcmp::tag;
179 ICEINSTICMP_TABLE
180 #undef X
181 // Define a set of constants based on low-level table entries, and ensure the
182 // table entry keys are consistent.
183 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
184 static_assert( \
185 _icmp_ll_##val == _icmp_hl_##val, \
186 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
187 ICMPARM32_TABLE
188 #undef X
189 // Repeat the static asserts with respect to the high-level table entries in
190 // case the high-level table has extra entries.
191 #define X(tag, reverse, str) \
192 static_assert( \
193 _icmp_hl_##tag == _icmp_ll_##tag, \
194 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
195 ICEINSTICMP_TABLE
196 #undef X
197 } // end of anonymous namespace
198
199 // Stack alignment
200 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
201
202 // Value is in bytes. Return Value adjusted to the next highest multiple of the
203 // stack alignment.
applyStackAlignment(uint32_t Value)204 uint32_t applyStackAlignment(uint32_t Value) {
205 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
206 }
207
208 // Value is in bytes. Return Value adjusted to the next highest multiple of the
209 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)210 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
211 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
212 // vectors to 8 bytes.
213 // TODO(jvoung): Check this ...
214 size_t typeAlignInBytes = typeWidthInBytes(Ty);
215 if (isVectorType(Ty))
216 typeAlignInBytes = 8;
217 return Utils::applyAlignment(Value, typeAlignInBytes);
218 }
219
220 // Conservatively check if at compile time we know that the operand is
221 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)222 bool isGuaranteedNonzeroInt(const Operand *Op) {
223 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
224 return Const->getValue() != 0;
225 }
226 return false;
227 }
228
229 } // end of anonymous namespace
230
TargetARM32Features(const ClFlags & Flags)231 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
232 static_assert(
233 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
234 (TargetInstructionSet::ARM32InstructionSet_End -
235 TargetInstructionSet::ARM32InstructionSet_Begin),
236 "ARM32InstructionSet range different from TargetInstructionSet");
237 if (Flags.getTargetInstructionSet() !=
238 TargetInstructionSet::BaseInstructionSet) {
239 InstructionSet = static_cast<ARM32InstructionSet>(
240 (Flags.getTargetInstructionSet() -
241 TargetInstructionSet::ARM32InstructionSet_Begin) +
242 ARM32InstructionSet::Begin);
243 }
244 }
245
246 namespace {
247 constexpr SizeT NumGPRArgs =
248 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
249 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
250 +(((cc_arg) > 0) ? 1 : 0)
251 REGARM32_GPR_TABLE
252 #undef X
253 ;
254 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
255
256 constexpr SizeT NumI64Args =
257 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
258 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
259 +(((cc_arg) > 0) ? 1 : 0)
260 REGARM32_I64PAIR_TABLE
261 #undef X
262 ;
263 std::array<RegNumT, NumI64Args> I64ArgInitializer;
264
265 constexpr SizeT NumFP32Args =
266 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
267 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
268 +(((cc_arg) > 0) ? 1 : 0)
269 REGARM32_FP32_TABLE
270 #undef X
271 ;
272 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
273
274 constexpr SizeT NumFP64Args =
275 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
276 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
277 +(((cc_arg) > 0) ? 1 : 0)
278 REGARM32_FP64_TABLE
279 #undef X
280 ;
281 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
282
283 constexpr SizeT NumVec128Args =
284 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
285 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
286 +(((cc_arg > 0)) ? 1 : 0)
287 REGARM32_VEC128_TABLE
288 #undef X
289 ;
290 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
291
getRegClassName(RegClass C)292 const char *getRegClassName(RegClass C) {
293 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
294 assert(ClassNum < RegARM32::RCARM32_NUM);
295 switch (ClassNum) {
296 default:
297 assert(C < RC_Target);
298 return regClassString(C);
299 // Add handling of new register classes below.
300 case RegARM32::RCARM32_QtoS:
301 return "QtoS";
302 }
303 }
304
305 } // end of anonymous namespace
306
TargetARM32(Cfg * Func)307 TargetARM32::TargetARM32(Cfg *Func)
308 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),
309 CPUFeatures(getFlags()) {}
310
staticInit(GlobalContext * Ctx)311 void TargetARM32::staticInit(GlobalContext *Ctx) {
312 RegNumT::setLimit(RegARM32::Reg_NUM);
313 // Limit this size (or do all bitsets need to be the same width)???
314 SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
315 SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
316 SmallBitVector Float32Registers(RegARM32::Reg_NUM);
317 SmallBitVector Float64Registers(RegARM32::Reg_NUM);
318 SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
319 SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
320 SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
321 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
322 for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
323 const auto &Entry = RegARM32::RegTable[i];
324 IntegerRegisters[i] = Entry.IsInt;
325 I64PairRegisters[i] = Entry.IsI64Pair;
326 Float32Registers[i] = Entry.IsFP32;
327 Float64Registers[i] = Entry.IsFP64;
328 VectorRegisters[i] = Entry.IsVec128;
329 RegisterAliases[i].resize(RegARM32::Reg_NUM);
330 // TODO(eholk): It would be better to store a QtoS flag in the
331 // IceRegistersARM32 table than to compare their encodings here.
332 QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
333 for (int j = 0; j < Entry.NumAliases; ++j) {
334 assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
335 RegisterAliases[i].set(Entry.Aliases[j]);
336 }
337 assert(RegisterAliases[i][i]);
338 if (Entry.CCArg <= 0) {
339 continue;
340 }
341 const auto RegNum = RegNumT::fromInt(i);
342 if (Entry.IsGPR) {
343 GPRArgInitializer[Entry.CCArg - 1] = RegNum;
344 } else if (Entry.IsI64Pair) {
345 I64ArgInitializer[Entry.CCArg - 1] = RegNum;
346 } else if (Entry.IsFP32) {
347 FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
348 } else if (Entry.IsFP64) {
349 FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
350 } else if (Entry.IsVec128) {
351 Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
352 }
353 }
354 TypeToRegisterSet[IceType_void] = InvalidRegisters;
355 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
356 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
357 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
358 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
359 TypeToRegisterSet[IceType_i64] = I64PairRegisters;
360 TypeToRegisterSet[IceType_f32] = Float32Registers;
361 TypeToRegisterSet[IceType_f64] = Float64Registers;
362 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
363 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
364 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
365 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
366 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
367 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
368 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
369 TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
370
371 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
372 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
373
374 filterTypeToRegisterSet(Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
375 llvm::array_lengthof(TypeToRegisterSet),
376 [](RegNumT RegNum) -> std::string {
377 // This function simply removes ", " from the
378 // register name.
379 std::string Name = RegARM32::getRegName(RegNum);
380 constexpr const char RegSeparator[] = ", ";
381 constexpr size_t RegSeparatorWidth =
382 llvm::array_lengthof(RegSeparator) - 1;
383 for (size_t Pos = Name.find(RegSeparator);
384 Pos != std::string::npos;
385 Pos = Name.find(RegSeparator)) {
386 Name.replace(Pos, RegSeparatorWidth, "");
387 }
388 return Name;
389 },
390 getRegClassName);
391 }
392
393 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)394 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
395 for (Variable *Var : Vars) {
396 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
397 if (!Var64) {
398 // This is not the variable we are looking for.
399 continue;
400 }
401 // only allow infinite-weight i64 temporaries to be register allocated.
402 assert(!Var64->hasReg() || Var64->mustHaveReg());
403 if (!Var64->hasReg()) {
404 continue;
405 }
406 const auto FirstReg =
407 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
408 // This assumes little endian.
409 Variable *Lo = Var64->getLo();
410 Variable *Hi = Var64->getHi();
411 assert(Lo->hasReg() == Hi->hasReg());
412 if (Lo->hasReg()) {
413 continue;
414 }
415 Lo->setRegNum(FirstReg);
416 Lo->setMustHaveReg();
417 Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
418 Hi->setMustHaveReg();
419 }
420 }
421 } // end of anonymous namespace
422
getCallStackArgumentsSizeBytes(const InstCall * Call)423 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
424 TargetARM32::CallingConv CC;
425 RegNumT DummyReg;
426 size_t OutArgsSizeBytes = 0;
427 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
428 Operand *Arg = legalizeUndef(Call->getArg(i));
429 const Type Ty = Arg->getType();
430 if (isScalarIntegerType(Ty)) {
431 if (CC.argInGPR(Ty, &DummyReg)) {
432 continue;
433 }
434 } else {
435 if (CC.argInVFP(Ty, &DummyReg)) {
436 continue;
437 }
438 }
439
440 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
441 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
442 }
443
444 return applyStackAlignment(OutArgsSizeBytes);
445 }
446
genTargetHelperCallFor(Inst * Instr)447 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
448 constexpr bool NoTailCall = false;
449 constexpr bool IsTargetHelperCall = true;
450
451 switch (Instr->getKind()) {
452 default:
453 return;
454 case Inst::Arithmetic: {
455 Variable *Dest = Instr->getDest();
456 const Type DestTy = Dest->getType();
457 const InstArithmetic::OpKind Op =
458 llvm::cast<InstArithmetic>(Instr)->getOp();
459 if (isVectorType(DestTy)) {
460 switch (Op) {
461 default:
462 break;
463 case InstArithmetic::Fdiv:
464 case InstArithmetic::Frem:
465 case InstArithmetic::Sdiv:
466 case InstArithmetic::Srem:
467 case InstArithmetic::Udiv:
468 case InstArithmetic::Urem:
469 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
470 Instr->setDeleted();
471 return;
472 }
473 }
474 switch (DestTy) {
475 default:
476 return;
477 case IceType_i64: {
478 // Technically, ARM has its own aeabi routines, but we can use the
479 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
480 // the more standard __moddi3 for rem.
481 RuntimeHelper HelperID = RuntimeHelper::H_Num;
482 switch (Op) {
483 default:
484 return;
485 case InstArithmetic::Udiv:
486 HelperID = RuntimeHelper::H_udiv_i64;
487 break;
488 case InstArithmetic::Sdiv:
489 HelperID = RuntimeHelper::H_sdiv_i64;
490 break;
491 case InstArithmetic::Urem:
492 HelperID = RuntimeHelper::H_urem_i64;
493 break;
494 case InstArithmetic::Srem:
495 HelperID = RuntimeHelper::H_srem_i64;
496 break;
497 }
498 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
499 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
500 constexpr SizeT MaxArgs = 2;
501 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
502 NoTailCall, IsTargetHelperCall);
503 Call->addArg(Instr->getSrc(0));
504 Call->addArg(Instr->getSrc(1));
505 Instr->setDeleted();
506 return;
507 }
508 case IceType_i32:
509 case IceType_i16:
510 case IceType_i8: {
511 const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
512 InstCast::OpKind CastKind;
513 RuntimeHelper HelperID = RuntimeHelper::H_Num;
514 switch (Op) {
515 default:
516 return;
517 case InstArithmetic::Udiv:
518 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
519 CastKind = InstCast::Zext;
520 break;
521 case InstArithmetic::Sdiv:
522 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
523 CastKind = InstCast::Sext;
524 break;
525 case InstArithmetic::Urem:
526 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
527 CastKind = InstCast::Zext;
528 break;
529 case InstArithmetic::Srem:
530 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
531 CastKind = InstCast::Sext;
532 break;
533 }
534 if (HelperID == RuntimeHelper::H_Num) {
535 // HelperID should only ever be undefined when the processor does not
536 // have a hardware divider. If any other helpers are ever introduced,
537 // the following assert will have to be modified.
538 assert(HasHWDiv);
539 return;
540 }
541 Operand *Src0 = Instr->getSrc(0);
542 Operand *Src1 = Instr->getSrc(1);
543 if (DestTy != IceType_i32) {
544 // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
545 // we just insert a InstCast right before the call to the helper.
546 Variable *Src0_32 = Func->makeVariable(IceType_i32);
547 Context.insert<InstCast>(CastKind, Src0_32, Src0);
548 Src0 = Src0_32;
549
550 // For extending Src1, we will just insert an InstCast if Src1 is not a
551 // Constant. If it is, then we extend it here, and not during program
552 // runtime. This allows preambleDivRem to optimize-out the div-by-0
553 // check.
554 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
555 const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
556 int32_t NewC = C->getValue();
557 if (CastKind == InstCast::Zext) {
558 NewC &= ~(0x80000000l >> ShAmt);
559 } else {
560 NewC = (NewC << ShAmt) >> ShAmt;
561 }
562 Src1 = Ctx->getConstantInt32(NewC);
563 } else {
564 Variable *Src1_32 = Func->makeVariable(IceType_i32);
565 Context.insert<InstCast>(CastKind, Src1_32, Src1);
566 Src1 = Src1_32;
567 }
568 }
569 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
570 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
571 constexpr SizeT MaxArgs = 2;
572 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
573 NoTailCall, IsTargetHelperCall);
574 assert(Src0->getType() == IceType_i32);
575 Call->addArg(Src0);
576 assert(Src1->getType() == IceType_i32);
577 Call->addArg(Src1);
578 Instr->setDeleted();
579 return;
580 }
581 case IceType_f64:
582 case IceType_f32: {
583 if (Op != InstArithmetic::Frem) {
584 return;
585 }
586 constexpr SizeT MaxArgs = 2;
587 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
588 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
589 : RuntimeHelper::H_frem_f64);
590 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
591 NoTailCall, IsTargetHelperCall);
592 Call->addArg(Instr->getSrc(0));
593 Call->addArg(Instr->getSrc(1));
594 Instr->setDeleted();
595 return;
596 }
597 }
598 llvm::report_fatal_error("Control flow should never have reached here.");
599 }
600 case Inst::Cast: {
601 Variable *Dest = Instr->getDest();
602 Operand *Src0 = Instr->getSrc(0);
603 const Type DestTy = Dest->getType();
604 const Type SrcTy = Src0->getType();
605 auto *CastInstr = llvm::cast<InstCast>(Instr);
606 const InstCast::OpKind CastKind = CastInstr->getCastKind();
607
608 switch (CastKind) {
609 default:
610 return;
611 case InstCast::Fptosi:
612 case InstCast::Fptoui: {
613 if (DestTy != IceType_i64) {
614 return;
615 }
616 const bool DestIsSigned = CastKind == InstCast::Fptosi;
617 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
618 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
619 Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
620 : RuntimeHelper::H_fptoui_f32_i64)
621 : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
622 : RuntimeHelper::H_fptoui_f64_i64));
623 static constexpr SizeT MaxArgs = 1;
624 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
625 NoTailCall, IsTargetHelperCall);
626 Call->addArg(Src0);
627 Instr->setDeleted();
628 return;
629 }
630 case InstCast::Sitofp:
631 case InstCast::Uitofp: {
632 if (SrcTy != IceType_i64) {
633 return;
634 }
635 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
636 const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
637 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
638 DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
639 : RuntimeHelper::H_uitofp_i64_f32)
640 : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
641 : RuntimeHelper::H_uitofp_i64_f64));
642 static constexpr SizeT MaxArgs = 1;
643 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
644 NoTailCall, IsTargetHelperCall);
645 Call->addArg(Src0);
646 Instr->setDeleted();
647 return;
648 }
649 case InstCast::Bitcast: {
650 if (DestTy == SrcTy) {
651 return;
652 }
653 Variable *CallDest = Dest;
654 RuntimeHelper HelperID = RuntimeHelper::H_Num;
655 switch (DestTy) {
656 default:
657 return;
658 case IceType_i8:
659 assert(SrcTy == IceType_v8i1);
660 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
661 CallDest = Func->makeVariable(IceType_i32);
662 break;
663 case IceType_i16:
664 assert(SrcTy == IceType_v16i1);
665 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
666 CallDest = Func->makeVariable(IceType_i32);
667 break;
668 case IceType_v8i1: {
669 assert(SrcTy == IceType_i8);
670 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
671 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
672 // Arguments to functions are required to be at least 32 bits wide.
673 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
674 Src0 = Src0AsI32;
675 } break;
676 case IceType_v16i1: {
677 assert(SrcTy == IceType_i16);
678 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
679 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
680 // Arguments to functions are required to be at least 32 bits wide.
681 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
682 Src0 = Src0AsI32;
683 } break;
684 }
685 constexpr SizeT MaxSrcs = 1;
686 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
687 Call->addArg(Src0);
688 Context.insert(Call);
689 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
690 // call result to the appropriate type as necessary.
691 if (CallDest->getType() != Dest->getType())
692 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
693 Instr->setDeleted();
694 return;
695 }
696 case InstCast::Trunc: {
697 if (DestTy == SrcTy) {
698 return;
699 }
700 if (!isVectorType(SrcTy)) {
701 return;
702 }
703 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
704 assert(typeElementType(DestTy) == IceType_i1);
705 assert(isVectorIntegerType(SrcTy));
706 return;
707 }
708 case InstCast::Sext:
709 case InstCast::Zext: {
710 if (DestTy == SrcTy) {
711 return;
712 }
713 if (!isVectorType(DestTy)) {
714 return;
715 }
716 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
717 assert(typeElementType(SrcTy) == IceType_i1);
718 assert(isVectorIntegerType(DestTy));
719 return;
720 }
721 }
722 llvm::report_fatal_error("Control flow should never have reached here.");
723 }
724 case Inst::IntrinsicCall: {
725 Variable *Dest = Instr->getDest();
726 auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
727 Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
728 switch (ID) {
729 default:
730 return;
731 case Intrinsics::Ctpop: {
732 Operand *Src0 = IntrinsicCall->getArg(0);
733 Operand *TargetHelper =
734 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
735 ? RuntimeHelper::H_call_ctpop_i32
736 : RuntimeHelper::H_call_ctpop_i64);
737 static constexpr SizeT MaxArgs = 1;
738 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
739 NoTailCall, IsTargetHelperCall);
740 Call->addArg(Src0);
741 Instr->setDeleted();
742 if (Src0->getType() == IceType_i64) {
743 ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
744 }
745 return;
746 }
747 case Intrinsics::Longjmp: {
748 static constexpr SizeT MaxArgs = 2;
749 static constexpr Variable *NoDest = nullptr;
750 Operand *TargetHelper =
751 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
752 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
753 NoTailCall, IsTargetHelperCall);
754 Call->addArg(IntrinsicCall->getArg(0));
755 Call->addArg(IntrinsicCall->getArg(1));
756 Instr->setDeleted();
757 return;
758 }
759 case Intrinsics::Memcpy: {
760 // In the future, we could potentially emit an inline memcpy/memset, etc.
761 // for intrinsic calls w/ a known length.
762 static constexpr SizeT MaxArgs = 3;
763 static constexpr Variable *NoDest = nullptr;
764 Operand *TargetHelper =
765 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
766 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
767 NoTailCall, IsTargetHelperCall);
768 Call->addArg(IntrinsicCall->getArg(0));
769 Call->addArg(IntrinsicCall->getArg(1));
770 Call->addArg(IntrinsicCall->getArg(2));
771 Instr->setDeleted();
772 return;
773 }
774 case Intrinsics::Memmove: {
775 static constexpr SizeT MaxArgs = 3;
776 static constexpr Variable *NoDest = nullptr;
777 Operand *TargetHelper =
778 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
779 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
780 NoTailCall, IsTargetHelperCall);
781 Call->addArg(IntrinsicCall->getArg(0));
782 Call->addArg(IntrinsicCall->getArg(1));
783 Call->addArg(IntrinsicCall->getArg(2));
784 Instr->setDeleted();
785 return;
786 }
787 case Intrinsics::Memset: {
788 // The value operand needs to be extended to a stack slot size because the
789 // PNaCl ABI requires arguments to be at least 32 bits wide.
790 Operand *ValOp = IntrinsicCall->getArg(1);
791 assert(ValOp->getType() == IceType_i8);
792 Variable *ValExt = Func->makeVariable(stackSlotType());
793 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
794
795 // Technically, ARM has its own __aeabi_memset, but we can use plain
796 // memset too. The value and size argument need to be flipped if we ever
797 // decide to use __aeabi_memset.
798 static constexpr SizeT MaxArgs = 3;
799 static constexpr Variable *NoDest = nullptr;
800 Operand *TargetHelper =
801 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
802 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
803 NoTailCall, IsTargetHelperCall);
804 Call->addArg(IntrinsicCall->getArg(0));
805 Call->addArg(ValExt);
806 Call->addArg(IntrinsicCall->getArg(2));
807 Instr->setDeleted();
808 return;
809 }
810 case Intrinsics::NaClReadTP: {
811 if (SandboxingType == ST_NaCl) {
812 return;
813 }
814 static constexpr SizeT MaxArgs = 0;
815 Operand *TargetHelper =
816 SandboxingType == ST_Nonsfi
817 ? Ctx->getConstantExternSym(
818 Ctx->getGlobalString("__aeabi_read_tp"))
819 : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
820 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
821 IsTargetHelperCall);
822 Instr->setDeleted();
823 return;
824 }
825 case Intrinsics::Setjmp: {
826 static constexpr SizeT MaxArgs = 1;
827 Operand *TargetHelper =
828 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
829 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
830 NoTailCall, IsTargetHelperCall);
831 Call->addArg(IntrinsicCall->getArg(0));
832 Instr->setDeleted();
833 return;
834 }
835 }
836 llvm::report_fatal_error("Control flow should never have reached here.");
837 }
838 }
839 }
840
findMaxStackOutArgsSize()841 void TargetARM32::findMaxStackOutArgsSize() {
842 // MinNeededOutArgsBytes should be updated if the Target ever creates a
843 // high-level InstCall that requires more stack bytes.
844 constexpr size_t MinNeededOutArgsBytes = 0;
845 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
846 for (CfgNode *Node : Func->getNodes()) {
847 Context.init(Node);
848 while (!Context.atEnd()) {
849 PostIncrLoweringContext PostIncrement(Context);
850 Inst *CurInstr = iteratorToInst(Context.getCur());
851 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
852 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
853 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
854 }
855 }
856 }
857 }
858
createGotPtr()859 void TargetARM32::createGotPtr() {
860 if (SandboxingType != ST_Nonsfi) {
861 return;
862 }
863 GotPtr = Func->makeVariable(IceType_i32);
864 }
865
insertGotPtrInitPlaceholder()866 void TargetARM32::insertGotPtrInitPlaceholder() {
867 if (SandboxingType != ST_Nonsfi) {
868 return;
869 }
870 assert(GotPtr != nullptr);
871 // We add the two placeholder instructions here. The first fakedefs T, an
872 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
873 // This is needed because the GotPtr initialization, if needed, will require
874 // a register:
875 //
876 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
877 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
878 // add reg, pc, reg
879 // mov GotPtr, reg
880 //
881 // If GotPtr is not used, then both these pseudo-instructions are dce'd.
882 Variable *T = makeReg(IceType_i32);
883 Context.insert<InstFakeDef>(T);
884 Context.insert<InstFakeDef>(GotPtr, T);
885 }
886
887 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)888 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
889 GlobalString CRName = CR->getName();
890 GlobalString CRGotoffName =
891 Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
892 if (KnownGotoffs.count(CRGotoffName) == 0) {
893 constexpr bool SuppressMangling = true;
894 auto *Global =
895 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
896 Global->setIsConstant(true);
897 Global->setName(CRName);
898 Func->getGlobalPool()->willNotBeEmitted(Global);
899
900 auto *Gotoff =
901 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
902 constexpr auto GotFixup = R_ARM_GOTOFF32;
903 Gotoff->setIsConstant(true);
904 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
905 Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
906 GotFixup));
907 Gotoff->setName(CRGotoffName);
908 Func->addGlobal(Gotoff);
909 KnownGotoffs.emplace(CRGotoffName);
910 }
911 return CRGotoffName;
912 }
913
materializeGotAddr(CfgNode * Node)914 void TargetARM32::materializeGotAddr(CfgNode *Node) {
915 if (SandboxingType != ST_Nonsfi) {
916 return;
917 }
918
919 // At first, we try to find the
920 // GotPtr = def T
921 // pseudo-instruction that we placed for defining the got ptr. That
922 // instruction is not just a place-holder for defining the GotPtr (thus
923 // keeping liveness consistent), but it is also located at a point where it is
924 // safe to materialize the got addr -- i.e., before loading parameters to
925 // registers, but after moving register parameters from their home location.
926 InstFakeDef *DefGotPtr = nullptr;
927 for (auto &Inst : Node->getInsts()) {
928 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
929 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
930 DefGotPtr = FakeDef;
931 break;
932 }
933 }
934
935 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
936 return;
937 }
938
939 // The got addr needs to be materialized at the same point where DefGotPtr
940 // lives.
941 Context.setInsertPoint(instToIterator(DefGotPtr));
942 assert(DefGotPtr->getSrcSize() == 1);
943 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
944 loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T,
945 [this, T](Variable *PC) { _add(T, PC, T); });
946 _mov(GotPtr, T);
947 DefGotPtr->setDeleted();
948 }
949
loadNamedConstantRelocatablePIC(GlobalString Name,Variable * Register,std::function<void (Variable * PC)> Finish)950 void TargetARM32::loadNamedConstantRelocatablePIC(
951 GlobalString Name, Variable *Register,
952 std::function<void(Variable *PC)> Finish) {
953 assert(SandboxingType == ST_Nonsfi);
954 // We makeReg() here instead of getPhysicalRegister() because the latter ends
955 // up creating multi-blocks temporaries that liveness fails to validate.
956 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
957
958 auto *AddPcReloc = RelocOffset::create(Ctx);
959 AddPcReloc->setSubtract(true);
960 auto *AddPcLabel = InstARM32Label::create(Func, this);
961 AddPcLabel->setRelocOffset(AddPcReloc);
962
963 auto *MovwReloc = RelocOffset::create(Ctx);
964 auto *MovwLabel = InstARM32Label::create(Func, this);
965 MovwLabel->setRelocOffset(MovwReloc);
966
967 auto *MovtReloc = RelocOffset::create(Ctx);
968 auto *MovtLabel = InstARM32Label::create(Func, this);
969 MovtLabel->setRelocOffset(MovtReloc);
970
971 // The EmitString for these constant relocatables have hardcoded offsets
972 // attached to them. This could be dangerous if, e.g., we ever implemented
973 // instruction scheduling but llvm-mc currently does not support
974 //
975 // movw reg, #:lower16:(Symbol - Label - Number)
976 // movt reg, #:upper16:(Symbol - Label - Number)
977 //
978 // relocations.
979 static constexpr RelocOffsetT PcOffset = -8;
980 auto *CRLower = Ctx->getConstantSymWithEmitString(
981 PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16");
982 auto *CRUpper = Ctx->getConstantSymWithEmitString(
983 PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12");
984
985 Context.insert(MovwLabel);
986 _movw(Register, CRLower);
987 Context.insert(MovtLabel);
988 _movt(Register, CRUpper);
989 // PC = fake-def to keep liveness consistent.
990 Context.insert<InstFakeDef>(PC);
991 Context.insert(AddPcLabel);
992 Finish(PC);
993 }
994
translateO2()995 void TargetARM32::translateO2() {
996 TimerMarker T(TimerStack::TT_O2, Func);
997
998 // TODO(stichnot): share passes with other targets?
999 // https://code.google.com/p/nativeclient/issues/detail?id=4094
1000 if (SandboxingType == ST_Nonsfi) {
1001 createGotPtr();
1002 }
1003 genTargetHelperCalls();
1004 findMaxStackOutArgsSize();
1005
1006 // Do not merge Alloca instructions, and lay out the stack.
1007 static constexpr bool SortAndCombineAllocas = true;
1008 Func->processAllocas(SortAndCombineAllocas);
1009 Func->dump("After Alloca processing");
1010
1011 if (!getFlags().getEnablePhiEdgeSplit()) {
1012 // Lower Phi instructions.
1013 Func->placePhiLoads();
1014 if (Func->hasError())
1015 return;
1016 Func->placePhiStores();
1017 if (Func->hasError())
1018 return;
1019 Func->deletePhis();
1020 if (Func->hasError())
1021 return;
1022 Func->dump("After Phi lowering");
1023 }
1024
1025 // Address mode optimization.
1026 Func->getVMetadata()->init(VMK_SingleDefs);
1027 Func->doAddressOpt();
1028 Func->materializeVectorShuffles();
1029
1030 // Argument lowering
1031 Func->doArgLowering();
1032
1033 // Target lowering. This requires liveness analysis for some parts of the
1034 // lowering decisions, such as compare/branch fusing. If non-lightweight
1035 // liveness analysis is used, the instructions need to be renumbered first.
1036 // TODO: This renumbering should only be necessary if we're actually
1037 // calculating live intervals, which we only do for register allocation.
1038 Func->renumberInstructions();
1039 if (Func->hasError())
1040 return;
1041
1042 // TODO: It should be sufficient to use the fastest liveness calculation,
1043 // i.e. livenessLightweight(). However, for some reason that slows down the
1044 // rest of the translation. Investigate.
1045 Func->liveness(Liveness_Basic);
1046 if (Func->hasError())
1047 return;
1048 Func->dump("After ARM32 address mode opt");
1049
1050 if (SandboxingType == ST_Nonsfi) {
1051 insertGotPtrInitPlaceholder();
1052 }
1053 Func->genCode();
1054 if (Func->hasError())
1055 return;
1056 Func->dump("After ARM32 codegen");
1057
1058 // Register allocation. This requires instruction renumbering and full
1059 // liveness analysis.
1060 Func->renumberInstructions();
1061 if (Func->hasError())
1062 return;
1063 Func->liveness(Liveness_Intervals);
1064 if (Func->hasError())
1065 return;
1066 // The post-codegen dump is done here, after liveness analysis and associated
1067 // cleanup, to make the dump cleaner and more useful.
1068 Func->dump("After initial ARM32 codegen");
1069 // Validate the live range computations. The expensive validation call is
1070 // deliberately only made when assertions are enabled.
1071 assert(Func->validateLiveness());
1072 Func->getVMetadata()->init(VMK_All);
1073 regAlloc(RAK_Global);
1074 if (Func->hasError())
1075 return;
1076
1077 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1078 Func->dump("After linear scan regalloc");
1079
1080 if (getFlags().getEnablePhiEdgeSplit()) {
1081 Func->advancedPhiLowering();
1082 Func->dump("After advanced Phi lowering");
1083 }
1084
1085 ForbidTemporaryWithoutReg _(this);
1086
1087 // Stack frame mapping.
1088 Func->genFrame();
1089 if (Func->hasError())
1090 return;
1091 Func->dump("After stack frame mapping");
1092
1093 postLowerLegalization();
1094 if (Func->hasError())
1095 return;
1096 Func->dump("After postLowerLegalization");
1097
1098 Func->contractEmptyNodes();
1099 Func->reorderNodes();
1100
1101 // Branch optimization. This needs to be done just before code emission. In
1102 // particular, no transformations that insert or reorder CfgNodes should be
1103 // done after branch optimization. We go ahead and do it before nop insertion
1104 // to reduce the amount of work needed for searching for opportunities.
1105 Func->doBranchOpt();
1106 Func->dump("After branch optimization");
1107
1108 // Nop insertion
1109 if (getFlags().getShouldDoNopInsertion()) {
1110 Func->doNopInsertion();
1111 }
1112 }
1113
translateOm1()1114 void TargetARM32::translateOm1() {
1115 TimerMarker T(TimerStack::TT_Om1, Func);
1116
1117 // TODO(stichnot): share passes with other targets?
1118 if (SandboxingType == ST_Nonsfi) {
1119 createGotPtr();
1120 }
1121
1122 genTargetHelperCalls();
1123 findMaxStackOutArgsSize();
1124
1125 // Do not merge Alloca instructions, and lay out the stack.
1126 static constexpr bool DontSortAndCombineAllocas = false;
1127 Func->processAllocas(DontSortAndCombineAllocas);
1128 Func->dump("After Alloca processing");
1129
1130 Func->placePhiLoads();
1131 if (Func->hasError())
1132 return;
1133 Func->placePhiStores();
1134 if (Func->hasError())
1135 return;
1136 Func->deletePhis();
1137 if (Func->hasError())
1138 return;
1139 Func->dump("After Phi lowering");
1140
1141 Func->doArgLowering();
1142
1143 if (SandboxingType == ST_Nonsfi) {
1144 insertGotPtrInitPlaceholder();
1145 }
1146 Func->genCode();
1147 if (Func->hasError())
1148 return;
1149 Func->dump("After initial ARM32 codegen");
1150
1151 regAlloc(RAK_InfOnly);
1152 if (Func->hasError())
1153 return;
1154
1155 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1156 Func->dump("After regalloc of infinite-weight variables");
1157
1158 ForbidTemporaryWithoutReg _(this);
1159
1160 Func->genFrame();
1161 if (Func->hasError())
1162 return;
1163 Func->dump("After stack frame mapping");
1164
1165 postLowerLegalization();
1166 if (Func->hasError())
1167 return;
1168 Func->dump("After postLowerLegalization");
1169
1170 // Nop insertion
1171 if (getFlags().getShouldDoNopInsertion()) {
1172 Func->doNopInsertion();
1173 }
1174 }
1175
getStackAlignment() const1176 uint32_t TargetARM32::getStackAlignment() const {
1177 return ARM32_STACK_ALIGNMENT_BYTES;
1178 }
1179
doBranchOpt(Inst * I,const CfgNode * NextNode)1180 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1181 if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1182 return Br->optimizeBranch(NextNode);
1183 }
1184 return false;
1185 }
1186
getRegName(RegNumT RegNum,Type Ty) const1187 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1188 (void)Ty;
1189 return RegARM32::getRegName(RegNum);
1190 }
1191
getPhysicalRegister(RegNumT RegNum,Type Ty)1192 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1193 static const Type DefaultType[] = {
1194 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
1195 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1196 (isFP32) \
1197 ? IceType_f32 \
1198 : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1199 REGARM32_TABLE
1200 #undef X
1201 };
1202
1203 if (Ty == IceType_void) {
1204 assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1205 Ty = DefaultType[RegNum];
1206 }
1207 if (PhysicalRegisters[Ty].empty())
1208 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1209 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1210 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1211 if (Reg == nullptr) {
1212 Reg = Func->makeVariable(Ty);
1213 Reg->setRegNum(RegNum);
1214 PhysicalRegisters[Ty][RegNum] = Reg;
1215 // Specially mark a named physical register as an "argument" so that it is
1216 // considered live upon function entry. Otherwise it's possible to get
1217 // liveness validation errors for saving callee-save registers.
1218 Func->addImplicitArg(Reg);
1219 // Don't bother tracking the live range of a named physical register.
1220 Reg->setIgnoreLiveness();
1221 }
1222 return Reg;
1223 }
1224
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1225 void TargetARM32::emitJumpTable(const Cfg *Func,
1226 const InstJumpTable *JumpTable) const {
1227 (void)Func;
1228 (void)JumpTable;
1229 UnimplementedError(getFlags());
1230 }
1231
emitVariable(const Variable * Var) const1232 void TargetARM32::emitVariable(const Variable *Var) const {
1233 if (!BuildDefs::dump())
1234 return;
1235 Ostream &Str = Ctx->getStrEmit();
1236 if (Var->hasReg()) {
1237 Str << getRegName(Var->getRegNum(), Var->getType());
1238 return;
1239 }
1240 if (Var->mustHaveReg()) {
1241 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1242 ") has no register assigned - function " +
1243 Func->getFunctionName());
1244 }
1245 assert(!Var->isRematerializable());
1246 int32_t Offset = Var->getStackOffset();
1247 auto BaseRegNum = Var->getBaseRegNum();
1248 if (BaseRegNum.hasNoValue()) {
1249 BaseRegNum = getFrameOrStackReg();
1250 }
1251 const Type VarTy = Var->getType();
1252 Str << "[" << getRegName(BaseRegNum, VarTy);
1253 if (Offset != 0) {
1254 Str << ", #" << Offset;
1255 }
1256 Str << "]";
1257 }
1258
CallingConv()1259 TargetARM32::CallingConv::CallingConv()
1260 : GPRegsUsed(RegARM32::Reg_NUM),
1261 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1262 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1263 VFPRegsUsed(RegARM32::Reg_NUM),
1264 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1265 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1266 Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1267
argInGPR(Type Ty,RegNumT * Reg)1268 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1269 CfgVector<RegNumT> *Source;
1270
1271 switch (Ty) {
1272 default: {
1273 assert(isScalarIntegerType(Ty));
1274 Source = &GPRArgs;
1275 } break;
1276 case IceType_i64: {
1277 Source = &I64Args;
1278 } break;
1279 }
1280
1281 discardUnavailableGPRsAndTheirAliases(Source);
1282
1283 if (Source->empty()) {
1284 GPRegsUsed.set();
1285 return false;
1286 }
1287
1288 *Reg = Source->back();
1289 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1290 // we mark all of Reg's aliases as Used. So, for the next argument,
1291 // Source->back() is marked as unavailable, and it is thus implicitly popped
1292 // from the stack.
1293 GPRegsUsed |= RegisterAliases[*Reg];
1294 return true;
1295 }
1296
1297 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1298 // i32) will have the first argument in r0, the second in r1-r2, and the third
1299 // on the stack. To model this behavior, whenever we pop a register from Regs,
1300 // we remove all of its aliases from the pool of available GPRs. This has the
1301 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1302 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1303 CfgVector<RegNumT> *Regs) {
1304 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1305 GPRegsUsed |= RegisterAliases[Regs->back()];
1306 Regs->pop_back();
1307 }
1308 }
1309
argInVFP(Type Ty,RegNumT * Reg)1310 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1311 CfgVector<RegNumT> *Source;
1312
1313 switch (Ty) {
1314 default: {
1315 assert(isVectorType(Ty));
1316 Source = &Vec128Args;
1317 } break;
1318 case IceType_f32: {
1319 Source = &FP32Args;
1320 } break;
1321 case IceType_f64: {
1322 Source = &FP64Args;
1323 } break;
1324 }
1325
1326 discardUnavailableVFPRegs(Source);
1327
1328 if (Source->empty()) {
1329 VFPRegsUsed.set();
1330 return false;
1331 }
1332
1333 *Reg = Source->back();
1334 VFPRegsUsed |= RegisterAliases[*Reg];
1335 return true;
1336 }
1337
1338 // Arguments in VFP registers are not packed, so we don't mark the popped
1339 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1340 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1341 CfgVector<RegNumT> *Regs) {
1342 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1343 Regs->pop_back();
1344 }
1345 }
1346
lowerArguments()1347 void TargetARM32::lowerArguments() {
1348 VarList &Args = Func->getArgs();
1349 TargetARM32::CallingConv CC;
1350
1351 // For each register argument, replace Arg in the argument list with the home
1352 // register. Then generate an instruction in the prolog to copy the home
1353 // register to the assigned location of Arg.
1354 Context.init(Func->getEntryNode());
1355 Context.setInsertPoint(Context.getCur());
1356
1357 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1358 Variable *Arg = Args[I];
1359 Type Ty = Arg->getType();
1360 RegNumT RegNum;
1361 if (isScalarIntegerType(Ty)) {
1362 if (!CC.argInGPR(Ty, &RegNum)) {
1363 continue;
1364 }
1365 } else {
1366 if (!CC.argInVFP(Ty, &RegNum)) {
1367 continue;
1368 }
1369 }
1370
1371 Variable *RegisterArg = Func->makeVariable(Ty);
1372 if (BuildDefs::dump()) {
1373 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1374 }
1375 RegisterArg->setIsArg();
1376 Arg->setIsArg(false);
1377 Args[I] = RegisterArg;
1378 switch (Ty) {
1379 default: { RegisterArg->setRegNum(RegNum); } break;
1380 case IceType_i64: {
1381 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1382 RegisterArg64->initHiLo(Func);
1383 RegisterArg64->getLo()->setRegNum(
1384 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1385 RegisterArg64->getHi()->setRegNum(
1386 RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1387 } break;
1388 }
1389 Context.insert<InstAssign>(Arg, RegisterArg);
1390 }
1391 }
1392
1393 // Helper function for addProlog().
1394 //
1395 // This assumes Arg is an argument passed on the stack. This sets the frame
1396 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1397 // I64 arg that has been split into Lo and Hi components, it calls itself
1398 // recursively on the components, taking care to handle Lo first because of the
1399 // little-endian architecture. Lastly, this function generates an instruction
1400 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1401 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1402 size_t BasicFrameOffset,
1403 size_t *InArgsSizeBytes) {
1404 const Type Ty = Arg->getType();
1405 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1406
1407 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1408 Variable *const Lo = Arg64On32->getLo();
1409 Variable *const Hi = Arg64On32->getHi();
1410 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1411 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1412 return;
1413 }
1414 assert(Ty != IceType_i64);
1415
1416 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1417 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1418
1419 if (!Arg->hasReg()) {
1420 Arg->setStackOffset(ArgStackOffset);
1421 return;
1422 }
1423
1424 // If the argument variable has been assigned a register, we need to copy the
1425 // value from the stack slot.
1426 Variable *Parameter = Func->makeVariable(Ty);
1427 Parameter->setMustNotHaveReg();
1428 Parameter->setStackOffset(ArgStackOffset);
1429 _mov(Arg, Parameter);
1430 }
1431
stackSlotType()1432 Type TargetARM32::stackSlotType() { return IceType_i32; }
1433
addProlog(CfgNode * Node)1434 void TargetARM32::addProlog(CfgNode *Node) {
1435 // Stack frame layout:
1436 //
1437 // +------------------------+
1438 // | 1. preserved registers |
1439 // +------------------------+
1440 // | 2. padding |
1441 // +------------------------+ <--- FramePointer (if used)
1442 // | 3. global spill area |
1443 // +------------------------+
1444 // | 4. padding |
1445 // +------------------------+
1446 // | 5. local spill area |
1447 // +------------------------+
1448 // | 6. padding |
1449 // +------------------------+
1450 // | 7. allocas (variable) |
1451 // +------------------------+
1452 // | 8. padding |
1453 // +------------------------+
1454 // | 9. out args |
1455 // +------------------------+ <--- StackPointer
1456 //
1457 // The following variables record the size in bytes of the given areas:
1458 // * PreservedRegsSizeBytes: area 1
1459 // * SpillAreaPaddingBytes: area 2
1460 // * GlobalsSize: area 3
1461 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1462 // * LocalsSpillAreaSize: area 5
1463 // * SpillAreaSizeBytes: areas 2 - 6, and 9
1464 // * MaxOutArgsSizeBytes: area 9
1465 //
1466 // Determine stack frame offsets for each Variable without a register
1467 // assignment. This can be done as one variable per stack slot. Or, do
1468 // coalescing by running the register allocator again with an infinite set of
1469 // registers (as a side effect, this gives variables a second chance at
1470 // physical register assignment).
1471 //
1472 // A middle ground approach is to leverage sparsity and allocate one block of
1473 // space on the frame for globals (variables with multi-block lifetime), and
1474 // one block to share for locals (single-block lifetime).
1475
1476 Context.init(Node);
1477 Context.setInsertPoint(Context.getCur());
1478
1479 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1480 RegsUsed = SmallBitVector(CalleeSaves.size());
1481 VarList SortedSpilledVariables;
1482 size_t GlobalsSize = 0;
1483 // If there is a separate locals area, this represents that area. Otherwise
1484 // it counts any variable not counted by GlobalsSize.
1485 SpillAreaSizeBytes = 0;
1486 // If there is a separate locals area, this specifies the alignment for it.
1487 uint32_t LocalsSlotsAlignmentBytes = 0;
1488 // The entire spill locations area gets aligned to largest natural alignment
1489 // of the variables that have a spill slot.
1490 uint32_t SpillAreaAlignmentBytes = 0;
1491 // For now, we don't have target-specific variables that need special
1492 // treatment (no stack-slot-linked SpillVariable type).
1493 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1494 static constexpr bool AssignStackSlot = false;
1495 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1496 if (llvm::isa<Variable64On32>(Var)) {
1497 return DontAssignStackSlot;
1498 }
1499 return AssignStackSlot;
1500 };
1501
1502 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1503 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1504 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1505 &LocalsSlotsAlignmentBytes, TargetVarHook);
1506 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1507 SpillAreaSizeBytes += GlobalsSize;
1508
1509 // Add push instructions for preserved registers. On ARM, "push" can push a
1510 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1511 // callee-saved float/vector registers.
1512 //
1513 // The "vpush" instruction can handle a whole list of float/vector registers,
1514 // but it only handles contiguous sequences of registers by specifying the
1515 // start and the length.
1516 PreservedGPRs.reserve(CalleeSaves.size());
1517 PreservedSRegs.reserve(CalleeSaves.size());
1518
1519 // Consider FP and LR as callee-save / used as needed.
1520 if (UsesFramePointer) {
1521 if (RegsUsed[RegARM32::Reg_fp]) {
1522 llvm::report_fatal_error("Frame pointer has been used.");
1523 }
1524 CalleeSaves[RegARM32::Reg_fp] = true;
1525 RegsUsed[RegARM32::Reg_fp] = true;
1526 }
1527 if (!MaybeLeafFunc) {
1528 CalleeSaves[RegARM32::Reg_lr] = true;
1529 RegsUsed[RegARM32::Reg_lr] = true;
1530 }
1531
1532 // Make two passes over the used registers. The first pass records all the
1533 // used registers -- and their aliases. Then, we figure out which GPRs and
1534 // VFP S registers should be saved. We don't bother saving D/Q registers
1535 // because their uses are recorded as S regs uses.
1536 SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1537 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1538 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1539 // r9 is never updated in sandboxed code.
1540 continue;
1541 }
1542 if (CalleeSaves[i] && RegsUsed[i]) {
1543 ToPreserve |= RegisterAliases[i];
1544 }
1545 }
1546
1547 uint32_t NumCallee = 0;
1548 size_t PreservedRegsSizeBytes = 0;
1549
1550 // RegClasses is a tuple of
1551 //
1552 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1553 //
1554 // We use this tuple to figure out which register we should push/pop during
1555 // prolog/epilog.
1556 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1557 const RegClassType RegClasses[] = {
1558 RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1559 &PreservedGPRs),
1560 RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1561 &PreservedSRegs)};
1562 for (const auto &RegClass : RegClasses) {
1563 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1564 const uint32_t LastRegInClass = std::get<1>(RegClass);
1565 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1566 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1567 if (!ToPreserve[Reg]) {
1568 continue;
1569 }
1570 ++NumCallee;
1571 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1572 PreservedRegsSizeBytes +=
1573 typeWidthInBytesOnStack(PhysicalRegister->getType());
1574 PreservedRegsInClass->push_back(PhysicalRegister);
1575 }
1576 }
1577
1578 Ctx->statsUpdateRegistersSaved(NumCallee);
1579 if (!PreservedSRegs.empty())
1580 _push(PreservedSRegs);
1581 if (!PreservedGPRs.empty())
1582 _push(PreservedGPRs);
1583
1584 // Generate "mov FP, SP" if needed.
1585 if (UsesFramePointer) {
1586 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1587 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1588 _mov(FP, SP);
1589 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1590 Context.insert<InstFakeUse>(FP);
1591 }
1592
1593 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1594 // after the preserved registers and before the spill areas.
1595 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1596 // locals area if they are separate.
1597 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1598 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1599 uint32_t SpillAreaPaddingBytes = 0;
1600 uint32_t LocalsSlotsPaddingBytes = 0;
1601 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1602 GlobalsSize, LocalsSlotsAlignmentBytes,
1603 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1604 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1605 uint32_t GlobalsAndSubsequentPaddingSize =
1606 GlobalsSize + LocalsSlotsPaddingBytes;
1607
1608 // Adds the out args space to the stack, and align SP if necessary.
1609 if (!NeedsStackAlignment) {
1610 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1611 } else {
1612 uint32_t StackOffset = PreservedRegsSizeBytes;
1613 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1614 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1615 SpillAreaSizeBytes = StackSize - StackOffset;
1616 }
1617
1618 // Combine fixed alloca with SpillAreaSize.
1619 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1620
1621 // Generate "sub sp, SpillAreaSizeBytes"
1622 if (SpillAreaSizeBytes) {
1623 // Use the scratch register if needed to legalize the immediate.
1624 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1625 Legal_Reg | Legal_Flex, getReservedTmpReg());
1626 Sandboxer(this).sub_sp(SubAmount);
1627 if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1628 Sandboxer(this).align_sp(FixedAllocaAlignBytes);
1629 }
1630 }
1631
1632 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1633
1634 // Fill in stack offsets for stack args, and copy args into registers for
1635 // those that were register-allocated. Args are pushed right to left, so
1636 // Arg[0] is closest to the stack/frame pointer.
1637 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1638 size_t BasicFrameOffset = PreservedRegsSizeBytes;
1639 if (!UsesFramePointer)
1640 BasicFrameOffset += SpillAreaSizeBytes;
1641
1642 materializeGotAddr(Node);
1643
1644 const VarList &Args = Func->getArgs();
1645 size_t InArgsSizeBytes = 0;
1646 TargetARM32::CallingConv CC;
1647 for (Variable *Arg : Args) {
1648 RegNumT DummyReg;
1649 const Type Ty = Arg->getType();
1650
1651 // Skip arguments passed in registers.
1652 if (isScalarIntegerType(Ty)) {
1653 if (CC.argInGPR(Ty, &DummyReg)) {
1654 continue;
1655 }
1656 } else {
1657 if (CC.argInVFP(Ty, &DummyReg)) {
1658 continue;
1659 }
1660 }
1661 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1662 }
1663
1664 // Fill in stack offsets for locals.
1665 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1666 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1667 UsesFramePointer);
1668 this->HasComputedFrame = true;
1669
1670 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1671 OstreamLocker _(Func->getContext());
1672 Ostream &Str = Func->getContext()->getStrDump();
1673
1674 Str << "Stack layout:\n";
1675 uint32_t SPAdjustmentPaddingSize =
1676 SpillAreaSizeBytes - LocalsSpillAreaSize -
1677 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1678 MaxOutArgsSizeBytes;
1679 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1680 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1681 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1682 << " globals spill area = " << GlobalsSize << " bytes\n"
1683 << " globals-locals spill areas intermediate padding = "
1684 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1685 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1686 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1687
1688 Str << "Stack details:\n"
1689 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1690 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1691 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1692 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1693 << " bytes\n"
1694 << " is FP based = " << UsesFramePointer << "\n";
1695 }
1696 }
1697
addEpilog(CfgNode * Node)1698 void TargetARM32::addEpilog(CfgNode *Node) {
1699 InstList &Insts = Node->getInsts();
1700 InstList::reverse_iterator RI, E;
1701 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1702 if (llvm::isa<InstARM32Ret>(*RI))
1703 break;
1704 }
1705 if (RI == E)
1706 return;
1707
1708 // Convert the reverse_iterator position into its corresponding (forward)
1709 // iterator position.
1710 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1711 --InsertPoint;
1712 Context.init(Node);
1713 Context.setInsertPoint(InsertPoint);
1714
1715 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1716 if (UsesFramePointer) {
1717 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1718 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1719 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1720 // from being dead-code eliminated.
1721 Context.insert<InstFakeUse>(SP);
1722 Sandboxer(this).reset_sp(FP);
1723 } else {
1724 // add SP, SpillAreaSizeBytes
1725 if (SpillAreaSizeBytes) {
1726 // Use the scratch register if needed to legalize the immediate.
1727 Operand *AddAmount =
1728 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1729 Legal_Reg | Legal_Flex, getReservedTmpReg());
1730 Sandboxer(this).add_sp(AddAmount);
1731 }
1732 }
1733
1734 if (!PreservedGPRs.empty())
1735 _pop(PreservedGPRs);
1736 if (!PreservedSRegs.empty())
1737 _pop(PreservedSRegs);
1738
1739 if (!getFlags().getUseSandboxing())
1740 return;
1741
1742 // Change the original ret instruction into a sandboxed return sequence.
1743 //
1744 // bundle_lock
1745 // bic lr, #0xc000000f
1746 // bx lr
1747 // bundle_unlock
1748 //
1749 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1750 // restrict to the lower 1GB as well.
1751 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1752 Variable *RetValue = nullptr;
1753 if (RI->getSrcSize())
1754 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1755
1756 Sandboxer(this).ret(LR, RetValue);
1757
1758 RI->setDeleted();
1759 }
1760
isLegalMemOffset(Type Ty,int32_t Offset) const1761 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1762 constexpr bool ZeroExt = false;
1763 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1764 }
1765
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1766 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1767 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1768 // Legalize will likely need a movw/movt combination, but if the top bits are
1769 // all 0 from negating the offset and subtracting, we could use that instead.
1770 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1771 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1772 if (ShouldSub) {
1773 Operand *OffsetVal =
1774 Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1775 Legal_Reg | Legal_Flex, ScratchRegNum);
1776 Target->_sub(ScratchReg, Base, OffsetVal);
1777 } else {
1778 Operand *OffsetVal =
1779 Target->legalize(Target->Ctx->getConstantInt32(Offset),
1780 Legal_Reg | Legal_Flex, ScratchRegNum);
1781 Target->_add(ScratchReg, Base, OffsetVal);
1782 }
1783
1784 if (ScratchRegNum == Target->getReservedTmpReg()) {
1785 const bool BaseIsStackOrFramePtr =
1786 Base->getRegNum() == Target->getFrameOrStackReg();
1787 // There is currently no code path that would trigger this assertion, so we
1788 // leave this assertion here in case it is ever violated. This is not a
1789 // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1790 // as the program compiled by subzero will still work correctly.
1791 assert(BaseIsStackOrFramePtr);
1792 // Side-effect: updates TempBase to reflect the new Temporary.
1793 if (BaseIsStackOrFramePtr) {
1794 TempBaseReg = ScratchReg;
1795 TempBaseOffset = Offset;
1796 } else {
1797 TempBaseReg = nullptr;
1798 TempBaseOffset = 0;
1799 }
1800 }
1801
1802 return ScratchReg;
1803 }
1804
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1805 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1806 Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1807 assert(!Base->isRematerializable());
1808 if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1809 return OperandARM32Mem::create(
1810 Target->Func, Ty, Base,
1811 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1812 OperandARM32Mem::Offset);
1813 }
1814
1815 if (!AllowOffsets || TempBaseReg == nullptr) {
1816 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1817 }
1818
1819 int32_t OffsetDiff = Offset - TempBaseOffset;
1820 assert(AllowOffsets || OffsetDiff == 0);
1821
1822 if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1823 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1824 OffsetDiff = 0;
1825 }
1826
1827 assert(!TempBaseReg->isRematerializable());
1828 return OperandARM32Mem::create(
1829 Target->Func, Ty, TempBaseReg,
1830 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1831 OperandARM32Mem::Offset);
1832 }
1833
resetTempBaseIfClobberedBy(const Inst * Instr)1834 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1835 const Inst *Instr) {
1836 bool ClobbersTempBase = false;
1837 if (TempBaseReg != nullptr) {
1838 Variable *Dest = Instr->getDest();
1839 if (llvm::isa<InstARM32Call>(Instr)) {
1840 // The following assertion is an invariant, so we remove it from the if
1841 // test. If the invariant is ever broken/invalidated/changed, remember
1842 // to add it back to the if condition.
1843 assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1844 // The linker may need to clobber IP if the call is too far from PC. Thus,
1845 // we assume IP will be overwritten.
1846 ClobbersTempBase = true;
1847 } else if (Dest != nullptr &&
1848 Dest->getRegNum() == TempBaseReg->getRegNum()) {
1849 // Register redefinition.
1850 ClobbersTempBase = true;
1851 }
1852 }
1853
1854 if (ClobbersTempBase) {
1855 TempBaseReg = nullptr;
1856 TempBaseOffset = 0;
1857 }
1858 }
1859
legalizeMov(InstARM32Mov * MovInstr)1860 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1861 Variable *Dest = MovInstr->getDest();
1862 assert(Dest != nullptr);
1863 Type DestTy = Dest->getType();
1864 assert(DestTy != IceType_i64);
1865
1866 Operand *Src = MovInstr->getSrc(0);
1867 Type SrcTy = Src->getType();
1868 (void)SrcTy;
1869 assert(SrcTy != IceType_i64);
1870
1871 if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1872 return;
1873
1874 bool Legalized = false;
1875 if (!Dest->hasReg()) {
1876 auto *SrcR = llvm::cast<Variable>(Src);
1877 assert(SrcR->hasReg());
1878 assert(!SrcR->isRematerializable());
1879 const int32_t Offset = Dest->getStackOffset();
1880 // This is a _mov(Mem(), Variable), i.e., a store.
1881 TargetARM32::Sandboxer(Target)
1882 .str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1883 MovInstr->getPredicate());
1884 // _str() does not have a Dest, so we add a fake-def(Dest).
1885 Target->Context.insert<InstFakeDef>(Dest);
1886 Legalized = true;
1887 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1888 if (Var->isRematerializable()) {
1889 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1890
1891 // ExtraOffset is only needed for frame-pointer based frames as we have
1892 // to account for spill storage.
1893 const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1894 ? Target->getFrameFixedAllocaOffset()
1895 : 0;
1896
1897 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1898 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1899 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1900 Target->_mov(Dest, T);
1901 Legalized = true;
1902 } else {
1903 if (!Var->hasReg()) {
1904 // This is a _mov(Variable, Mem()), i.e., a load.
1905 const int32_t Offset = Var->getStackOffset();
1906 TargetARM32::Sandboxer(Target)
1907 .ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1908 MovInstr->getPredicate());
1909 Legalized = true;
1910 }
1911 }
1912 }
1913
1914 if (Legalized) {
1915 if (MovInstr->isDestRedefined()) {
1916 Target->_set_dest_redefined();
1917 }
1918 MovInstr->setDeleted();
1919 }
1920 }
1921
1922 // ARM32 address modes:
1923 // ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1924 // [reg +/- reg << shamt5]
1925 // ld/st f[32|64] : [reg], [reg +/- imm8] , [pc +/- imm8]
1926 // ld/st vectors : [reg]
1927 //
1928 // For now, we don't handle address modes with Relocatables.
1929 namespace {
1930 // MemTraits contains per-type valid address mode information.
1931 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1932 ubits, rraddr, shaddr) \
1933 static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1934 ICETYPEARM32_TABLE
1935 #undef X
1936
1937 static const struct {
1938 int32_t ValidImmMask;
1939 bool CanHaveImm;
1940 bool CanHaveIndex;
1941 bool CanHaveShiftedIndex;
1942 } MemTraits[] = {
1943 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1944 ubits, rraddr, shaddr) \
1945 { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, } \
1946 ,
1947 ICETYPEARM32_TABLE
1948 #undef X
1949 };
1950 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1951 } // end of anonymous namespace
1952
1953 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1954 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1955 bool AllowOffsets) {
1956 assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1957 assert(
1958 Mem->isRegReg() ||
1959 Target->isLegalMemOffset(Mem->getType(), Mem->getOffset()->getValue()));
1960
1961 bool Legalized = false;
1962 Variable *Base = Mem->getBase();
1963 int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1964 if (Base->isRematerializable()) {
1965 const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1966 ? Target->getFrameFixedAllocaOffset()
1967 : 0;
1968 Offset += Base->getStackOffset() + ExtraOffset;
1969 Base = Target->getPhysicalRegister(Base->getRegNum());
1970 assert(!Base->isRematerializable());
1971 Legalized = true;
1972 }
1973
1974 if (!Legalized && !Target->NeedSandboxing) {
1975 return nullptr;
1976 }
1977
1978 if (!Mem->isRegReg()) {
1979 return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1980 }
1981
1982 if (Target->NeedSandboxing) {
1983 llvm::report_fatal_error("Reg-Reg address mode is not allowed.");
1984 }
1985
1986 assert(MemTraits[Mem->getType()].CanHaveIndex);
1987
1988 if (Offset != 0) {
1989 if (TempBaseReg == nullptr) {
1990 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1991 } else {
1992 uint32_t Imm8, Rotate;
1993 const int32_t OffsetDiff = Offset - TempBaseOffset;
1994 if (OffsetDiff == 0) {
1995 Base = TempBaseReg;
1996 } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
1997 auto *OffsetDiffF = OperandARM32FlexImm::create(
1998 Target->Func, IceType_i32, Imm8, Rotate);
1999 Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
2000 TempBaseOffset += OffsetDiff;
2001 Base = TempBaseReg;
2002 } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
2003 auto *OffsetDiffF = OperandARM32FlexImm::create(
2004 Target->Func, IceType_i32, Imm8, Rotate);
2005 Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
2006 TempBaseOffset += OffsetDiff;
2007 Base = TempBaseReg;
2008 } else {
2009 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2010 }
2011 }
2012 }
2013
2014 return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
2015 Mem->getIndex(), Mem->getShiftOp(),
2016 Mem->getShiftAmt(), Mem->getAddrMode());
2017 }
2018
postLowerLegalization()2019 void TargetARM32::postLowerLegalization() {
2020 // If a stack variable's frame offset doesn't fit, convert from:
2021 // ldr X, OFF[SP]
2022 // to:
2023 // movw/movt TMP, OFF_PART
2024 // add TMP, TMP, SP
2025 // ldr X, OFF_MORE[TMP]
2026 //
2027 // This is safe because we have reserved TMP, and add for ARM does not
2028 // clobber the flags register.
2029 Func->dump("Before postLowerLegalization");
2030 assert(hasComputedFrame());
2031 // Do a fairly naive greedy clustering for now. Pick the first stack slot
2032 // that's out of bounds and make a new base reg using the architecture's temp
2033 // register. If that works for the next slot, then great. Otherwise, create a
2034 // new base register, clobbering the previous base register. Never share a
2035 // base reg across different basic blocks. This isn't ideal if local and
2036 // multi-block variables are far apart and their references are interspersed.
2037 // It may help to be more coordinated about assign stack slot numbers and may
2038 // help to assign smaller offsets to higher-weight variables so that they
2039 // don't depend on this legalization.
2040 for (CfgNode *Node : Func->getNodes()) {
2041 Context.init(Node);
2042 // One legalizer per basic block, otherwise we would share the Temporary
2043 // Base Register between basic blocks.
2044 PostLoweringLegalizer Legalizer(this);
2045 while (!Context.atEnd()) {
2046 PostIncrLoweringContext PostIncrement(Context);
2047 Inst *CurInstr = iteratorToInst(Context.getCur());
2048
2049 // Check if the previous TempBaseReg is clobbered, and reset if needed.
2050 Legalizer.resetTempBaseIfClobberedBy(CurInstr);
2051
2052 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
2053 Legalizer.legalizeMov(MovInstr);
2054 } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
2055 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2056 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
2057 Sandboxer(this)
2058 .ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate());
2059 CurInstr->setDeleted();
2060 }
2061 } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
2062 constexpr bool DisallowOffsetsBecauseLdrex = false;
2063 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2064 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
2065 DisallowOffsetsBecauseLdrex)) {
2066 Sandboxer(this)
2067 .ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate());
2068 CurInstr->setDeleted();
2069 }
2070 } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
2071 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2072 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
2073 Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)),
2074 LegalMem, StrInstr->getPredicate());
2075 CurInstr->setDeleted();
2076 }
2077 } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
2078 constexpr bool DisallowOffsetsBecauseStrex = false;
2079 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2080 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
2081 DisallowOffsetsBecauseStrex)) {
2082 Sandboxer(this).strex(CurInstr->getDest(),
2083 llvm::cast<Variable>(CurInstr->getSrc(0)),
2084 LegalMem, StrexInstr->getPredicate());
2085 CurInstr->setDeleted();
2086 }
2087 }
2088
2089 // Sanity-check: the Legalizer will either have no Temp, or it will be
2090 // bound to IP.
2091 Legalizer.assertNoTempOrAssignedToIP();
2092 }
2093 }
2094 }
2095
loOperand(Operand * Operand)2096 Operand *TargetARM32::loOperand(Operand *Operand) {
2097 assert(Operand->getType() == IceType_i64);
2098 if (Operand->getType() != IceType_i64)
2099 return Operand;
2100 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2101 return Var64On32->getLo();
2102 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
2103 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2104 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2105 // Conservatively disallow memory operands with side-effects (pre/post
2106 // increment) in case of duplication.
2107 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2108 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2109 if (Mem->isRegReg()) {
2110 Variable *IndexR = legalizeToReg(Mem->getIndex());
2111 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
2112 Mem->getShiftOp(), Mem->getShiftAmt(),
2113 Mem->getAddrMode());
2114 } else {
2115 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
2116 Mem->getOffset(), Mem->getAddrMode());
2117 }
2118 }
2119 llvm::report_fatal_error("Unsupported operand type");
2120 return nullptr;
2121 }
2122
hiOperand(Operand * Operand)2123 Operand *TargetARM32::hiOperand(Operand *Operand) {
2124 assert(Operand->getType() == IceType_i64);
2125 if (Operand->getType() != IceType_i64)
2126 return Operand;
2127 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2128 return Var64On32->getHi();
2129 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2130 return Ctx->getConstantInt32(
2131 static_cast<uint32_t>(Const->getValue() >> 32));
2132 }
2133 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2134 // Conservatively disallow memory operands with side-effects in case of
2135 // duplication.
2136 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2137 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2138 const Type SplitType = IceType_i32;
2139 if (Mem->isRegReg()) {
2140 // We have to make a temp variable T, and add 4 to either Base or Index.
2141 // The Index may be shifted, so adding 4 can mean something else. Thus,
2142 // prefer T := Base + 4, and use T as the new Base.
2143 Variable *Base = Mem->getBase();
2144 Constant *Four = Ctx->getConstantInt32(4);
2145 Variable *NewBase = Func->makeVariable(Base->getType());
2146 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2147 Base, Four));
2148 Variable *BaseR = legalizeToReg(NewBase);
2149 Variable *IndexR = legalizeToReg(Mem->getIndex());
2150 return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
2151 Mem->getShiftOp(), Mem->getShiftAmt(),
2152 Mem->getAddrMode());
2153 } else {
2154 Variable *Base = Mem->getBase();
2155 ConstantInteger32 *Offset = Mem->getOffset();
2156 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2157 int32_t NextOffsetVal = Offset->getValue() + 4;
2158 constexpr bool ZeroExt = false;
2159 if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
2160 // We have to make a temp variable and add 4 to either Base or Offset.
2161 // If we add 4 to Offset, this will convert a non-RegReg addressing
2162 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2163 // RegReg addressing modes, prefer adding to base and replacing
2164 // instead. Thus we leave the old offset alone.
2165 Constant *_4 = Ctx->getConstantInt32(4);
2166 Variable *NewBase = Func->makeVariable(Base->getType());
2167 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
2168 NewBase, Base, _4));
2169 Base = NewBase;
2170 } else {
2171 Offset =
2172 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2173 }
2174 Variable *BaseR = legalizeToReg(Base);
2175 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
2176 Mem->getAddrMode());
2177 }
2178 }
2179 llvm::report_fatal_error("Unsupported operand type");
2180 return nullptr;
2181 }
2182
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2183 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
2184 RegSetMask Exclude) const {
2185 SmallBitVector Registers(RegARM32::Reg_NUM);
2186
2187 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
2188 const auto &Entry = RegARM32::RegTable[i];
2189 if (Entry.Scratch && (Include & RegSet_CallerSave))
2190 Registers[i] = true;
2191 if (Entry.Preserved && (Include & RegSet_CalleeSave))
2192 Registers[i] = true;
2193 if (Entry.StackPtr && (Include & RegSet_StackPointer))
2194 Registers[i] = true;
2195 if (Entry.FramePtr && (Include & RegSet_FramePointer))
2196 Registers[i] = true;
2197 if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2198 Registers[i] = false;
2199 if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2200 Registers[i] = false;
2201 if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2202 Registers[i] = false;
2203 if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2204 Registers[i] = false;
2205 }
2206
2207 return Registers;
2208 }
2209
lowerAlloca(const InstAlloca * Instr)2210 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2211 // Conservatively require the stack to be aligned. Some stack adjustment
2212 // operations implemented below assume that the stack is aligned before the
2213 // alloca. All the alloca code ensures that the stack alignment is preserved
2214 // after the alloca. The stack alignment restriction can be relaxed in some
2215 // cases.
2216 NeedsStackAlignment = true;
2217
2218 // For default align=0, set it to the real value 1, to avoid any
2219 // bit-manipulation problems below.
2220 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2221
2222 // LLVM enforces power of 2 alignment.
2223 assert(llvm::isPowerOf2_32(AlignmentParam));
2224 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2225
2226 const uint32_t Alignment =
2227 std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2228 const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2229 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2230 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2231 const bool UseFramePointer =
2232 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2233
2234 if (UseFramePointer)
2235 setHasFramePointer();
2236
2237 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2238 if (OverAligned) {
2239 Sandboxer(this).align_sp(Alignment);
2240 }
2241
2242 Variable *Dest = Instr->getDest();
2243 Operand *TotalSize = Instr->getSizeInBytes();
2244
2245 if (const auto *ConstantTotalSize =
2246 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2247 const uint32_t Value =
2248 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2249 // Constant size alloca.
2250 if (!UseFramePointer) {
2251 // If we don't need a Frame Pointer, this alloca has a known offset to the
2252 // stack pointer. We don't need adjust the stack pointer, nor assign any
2253 // value to Dest, as Dest is rematerializable.
2254 assert(Dest->isRematerializable());
2255 FixedAllocaSizeBytes += Value;
2256 Context.insert<InstFakeDef>(Dest);
2257 return;
2258 }
2259
2260 // If a frame pointer is required, then we need to store the alloca'd result
2261 // in Dest.
2262 Operand *SubAmountRF =
2263 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2264 Sandboxer(this).sub_sp(SubAmountRF);
2265 } else {
2266 // Non-constant sizes need to be adjusted to the next highest multiple of
2267 // the required alignment at runtime.
2268 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2269 Variable *T = makeReg(IceType_i32);
2270 _mov(T, TotalSize);
2271 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2272 _add(T, T, AddAmount);
2273 alignRegisterPow2(T, Alignment);
2274 Sandboxer(this).sub_sp(T);
2275 }
2276
2277 // Adds back a few bytes to SP to account for the out args area.
2278 Variable *T = SP;
2279 if (MaxOutArgsSizeBytes != 0) {
2280 T = makeReg(getPointerType());
2281 Operand *OutArgsSizeRF = legalize(
2282 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2283 _add(T, SP, OutArgsSizeRF);
2284 }
2285
2286 _mov(Dest, T);
2287 }
2288
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2289 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2290 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2291 return;
2292 Variable *SrcLoReg = legalizeToReg(SrcLo);
2293 switch (Ty) {
2294 default:
2295 llvm_unreachable(
2296 ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2297 case IceType_i8:
2298 case IceType_i16: {
2299 Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2300 Variable *T = makeReg(IceType_i32);
2301 _lsls(T, SrcLoReg, ShAmtImm);
2302 Context.insert<InstFakeUse>(T);
2303 } break;
2304 case IceType_i32: {
2305 _tst(SrcLoReg, SrcLoReg);
2306 break;
2307 }
2308 case IceType_i64: {
2309 Variable *T = makeReg(IceType_i32);
2310 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2311 // T isn't going to be used, but we need the side-effect of setting flags
2312 // from this operation.
2313 Context.insert<InstFakeUse>(T);
2314 }
2315 }
2316 auto *Label = InstARM32Label::create(Func, this);
2317 _br(Label, CondARM32::NE);
2318 _trap();
2319 Context.insert(Label);
2320 }
2321
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2322 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2323 Operand *Src1, ExtInstr ExtFunc,
2324 DivInstr DivFunc, bool IsRemainder) {
2325 div0Check(Dest->getType(), Src1, nullptr);
2326 Variable *Src1R = legalizeToReg(Src1);
2327 Variable *T0R = Src0R;
2328 Variable *T1R = Src1R;
2329 if (Dest->getType() != IceType_i32) {
2330 T0R = makeReg(IceType_i32);
2331 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2332 T1R = makeReg(IceType_i32);
2333 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2334 }
2335 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2336 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2337 if (IsRemainder) {
2338 Variable *T2 = makeReg(IceType_i32);
2339 _mls(T2, T, T1R, T0R);
2340 T = T2;
2341 }
2342 _mov(Dest, T);
2343 } else {
2344 llvm::report_fatal_error("div should have already been turned into a call");
2345 }
2346 }
2347
2348 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2349 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2350 Variable *Dest = Instr->getDest();
2351 assert(Dest->getType() == IceType_i1);
2352
2353 // So folding didn't work for Instr. Not a problem: We just need to
2354 // materialize the Sources, and perform the operation. We create regular
2355 // Variables (and not infinite-weight ones) because this call might recurse a
2356 // lot, and we might end up with tons of infinite weight temporaries.
2357 assert(Instr->getSrcSize() == 2);
2358 Variable *Src0 = Func->makeVariable(IceType_i1);
2359 SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2360
2361 Operand *Src1 = Instr->getSrc(1);
2362 SafeBoolChain Src1Safe = SBC_Yes;
2363
2364 if (!llvm::isa<Constant>(Src1)) {
2365 Variable *Src1V = Func->makeVariable(IceType_i1);
2366 Src1Safe = lowerInt1(Src1V, Src1);
2367 Src1 = Src1V;
2368 }
2369
2370 Variable *T = makeReg(IceType_i1);
2371 Src0 = legalizeToReg(Src0);
2372 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2373 switch (Instr->getOp()) {
2374 default:
2375 // If this Unreachable is ever executed, add the offending operation to
2376 // the list of valid consumers.
2377 llvm::report_fatal_error("Unhandled i1 Op");
2378 case InstArithmetic::And:
2379 _and(T, Src0, Src1RF);
2380 break;
2381 case InstArithmetic::Or:
2382 _orr(T, Src0, Src1RF);
2383 break;
2384 case InstArithmetic::Xor:
2385 _eor(T, Src0, Src1RF);
2386 break;
2387 }
2388 _mov(Dest, T);
2389 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2390 }
2391
2392 namespace {
2393 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2394 // It holds the two sources operands, and maintains some state as to whether one
2395 // of them is a constant. If one of the operands is a constant, then it will be
2396 // be stored as the operation's second source, with a bit indicating whether the
2397 // operands were swapped.
2398 //
2399 // The class is split into a base class with operand type-independent methods,
2400 // and a derived, templated class, for each type of operand we want to fold
2401 // constants for:
2402 //
2403 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2404 // --> NumericOperands<ConstantDouble>
2405 // --> NumericOperands<ConstantInt32>
2406 //
2407 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2408 // inverted/negated immediates.
2409 class NumericOperandsBase {
2410 NumericOperandsBase() = delete;
2411 NumericOperandsBase(const NumericOperandsBase &) = delete;
2412 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2413
2414 public:
NumericOperandsBase(Operand * S0,Operand * S1)2415 NumericOperandsBase(Operand *S0, Operand *S1)
2416 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2417 Swapped(Src0 == S1 && S0 != S1) {
2418 assert(Src0 != nullptr);
2419 assert(Src1 != nullptr);
2420 assert(Src0 != Src1 || S0 == S1);
2421 }
2422
hasConstOperand() const2423 bool hasConstOperand() const {
2424 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2425 }
2426
swappedOperands() const2427 bool swappedOperands() const { return Swapped; }
2428
src0R(TargetARM32 * Target) const2429 Variable *src0R(TargetARM32 *Target) const {
2430 return legalizeToReg(Target, Src0);
2431 }
2432
unswappedSrc0R(TargetARM32 * Target) const2433 Variable *unswappedSrc0R(TargetARM32 *Target) const {
2434 return legalizeToReg(Target, Swapped ? Src1 : Src0);
2435 }
2436
src1RF(TargetARM32 * Target) const2437 Operand *src1RF(TargetARM32 *Target) const {
2438 return legalizeToRegOrFlex(Target, Src1);
2439 }
2440
unswappedSrc1R(TargetARM32 * Target) const2441 Variable *unswappedSrc1R(TargetARM32 *Target) const {
2442 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2443 }
2444
2445 protected:
2446 Operand *const Src0;
2447 Operand *const Src1;
2448 const bool Swapped;
2449
legalizeToReg(TargetARM32 * Target,Operand * Src)2450 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2451 return Target->legalizeToReg(Src);
2452 }
2453
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2454 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2455 return Target->legalize(Src,
2456 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2457 }
2458
2459 private:
NonConstOperand(Operand * S0,Operand * S1)2460 static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2461 if (!llvm::isa<Constant>(S0))
2462 return S0;
2463 if (!llvm::isa<Constant>(S1))
2464 return S1;
2465 if (llvm::isa<ConstantRelocatable>(S1) &&
2466 !llvm::isa<ConstantRelocatable>(S0))
2467 return S1;
2468 return S0;
2469 }
2470
ConstOperand(Operand * S0,Operand * S1)2471 static Operand *ConstOperand(Operand *S0, Operand *S1) {
2472 if (!llvm::isa<Constant>(S0))
2473 return S1;
2474 if (!llvm::isa<Constant>(S1))
2475 return S0;
2476 if (llvm::isa<ConstantRelocatable>(S1) &&
2477 !llvm::isa<ConstantRelocatable>(S0))
2478 return S0;
2479 return S1;
2480 }
2481 };
2482
2483 template <typename C> class NumericOperands : public NumericOperandsBase {
2484 NumericOperands() = delete;
2485 NumericOperands(const NumericOperands &) = delete;
2486 NumericOperands &operator=(const NumericOperands &) = delete;
2487
2488 public:
NumericOperands(Operand * S0,Operand * S1)2489 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2490 assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2491 }
2492
getConstantValue() const2493 typename C::PrimType getConstantValue() const {
2494 return llvm::cast<C>(Src1)->getValue();
2495 }
2496 };
2497
2498 using FloatOperands = NumericOperands<ConstantFloat>;
2499 using DoubleOperands = NumericOperands<ConstantDouble>;
2500
2501 class Int32Operands : public NumericOperands<ConstantInteger32> {
2502 Int32Operands() = delete;
2503 Int32Operands(const Int32Operands &) = delete;
2504 Int32Operands &operator=(const Int32Operands &) = delete;
2505
2506 public:
Int32Operands(Operand * S0,Operand * S1)2507 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2508
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2509 Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2510 if (!swappedOperands() && hasConstOperand()) {
2511 return Target->shAmtImm(getConstantValue() & 0x1F);
2512 }
2513 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2514 }
2515
isSrc1ImmediateZero() const2516 bool isSrc1ImmediateZero() const {
2517 if (!swappedOperands() && hasConstOperand()) {
2518 return getConstantValue() == 0;
2519 }
2520 return false;
2521 }
2522
immediateIsFlexEncodable() const2523 bool immediateIsFlexEncodable() const {
2524 uint32_t Rotate, Imm8;
2525 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2526 }
2527
negatedImmediateIsFlexEncodable() const2528 bool negatedImmediateIsFlexEncodable() const {
2529 uint32_t Rotate, Imm8;
2530 return OperandARM32FlexImm::canHoldImm(
2531 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2532 }
2533
negatedSrc1F(TargetARM32 * Target) const2534 Operand *negatedSrc1F(TargetARM32 *Target) const {
2535 return legalizeToRegOrFlex(Target,
2536 Target->getCtx()->getConstantInt32(
2537 -static_cast<int32_t>(getConstantValue())));
2538 }
2539
invertedImmediateIsFlexEncodable() const2540 bool invertedImmediateIsFlexEncodable() const {
2541 uint32_t Rotate, Imm8;
2542 return OperandARM32FlexImm::canHoldImm(
2543 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2544 }
2545
invertedSrc1F(TargetARM32 * Target) const2546 Operand *invertedSrc1F(TargetARM32 *Target) const {
2547 return legalizeToRegOrFlex(Target,
2548 Target->getCtx()->getConstantInt32(
2549 ~static_cast<uint32_t>(getConstantValue())));
2550 }
2551 };
2552 } // end of anonymous namespace
2553
preambleDivRem(const InstCall * Instr)2554 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2555 Operand *Src1 = Instr->getArg(1);
2556
2557 switch (Src1->getType()) {
2558 default:
2559 llvm::report_fatal_error("Invalid type for idiv.");
2560 case IceType_i64: {
2561 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2562 if (C->getValue() == 0) {
2563 _trap();
2564 return;
2565 }
2566 }
2567 div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2568 return;
2569 }
2570 case IceType_i32: {
2571 // Src0 and Src1 have already been appropriately extended to an i32, so we
2572 // don't check for i8 and i16.
2573 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2574 if (C->getValue() == 0) {
2575 _trap();
2576 return;
2577 }
2578 }
2579 div0Check(IceType_i32, Src1, nullptr);
2580 return;
2581 }
2582 }
2583 }
2584
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2585 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2586 Variable *Dest, Operand *Src0,
2587 Operand *Src1) {
2588 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2589 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2590 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2591 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2592
2593 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2594 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2595 Variable *T_Lo = makeReg(DestLo->getType());
2596 Variable *T_Hi = makeReg(DestHi->getType());
2597
2598 switch (Op) {
2599 case InstArithmetic::_num:
2600 llvm::report_fatal_error("Unknown arithmetic operator");
2601 return;
2602 case InstArithmetic::Add: {
2603 Variable *Src0LoR = SrcsLo.src0R(this);
2604 Operand *Src1LoRF = SrcsLo.src1RF(this);
2605 Variable *Src0HiR = SrcsHi.src0R(this);
2606 Operand *Src1HiRF = SrcsHi.src1RF(this);
2607 _adds(T_Lo, Src0LoR, Src1LoRF);
2608 _mov(DestLo, T_Lo);
2609 _adc(T_Hi, Src0HiR, Src1HiRF);
2610 _mov(DestHi, T_Hi);
2611 return;
2612 }
2613 case InstArithmetic::And: {
2614 Variable *Src0LoR = SrcsLo.src0R(this);
2615 Operand *Src1LoRF = SrcsLo.src1RF(this);
2616 Variable *Src0HiR = SrcsHi.src0R(this);
2617 Operand *Src1HiRF = SrcsHi.src1RF(this);
2618 _and(T_Lo, Src0LoR, Src1LoRF);
2619 _mov(DestLo, T_Lo);
2620 _and(T_Hi, Src0HiR, Src1HiRF);
2621 _mov(DestHi, T_Hi);
2622 return;
2623 }
2624 case InstArithmetic::Or: {
2625 Variable *Src0LoR = SrcsLo.src0R(this);
2626 Operand *Src1LoRF = SrcsLo.src1RF(this);
2627 Variable *Src0HiR = SrcsHi.src0R(this);
2628 Operand *Src1HiRF = SrcsHi.src1RF(this);
2629 _orr(T_Lo, Src0LoR, Src1LoRF);
2630 _mov(DestLo, T_Lo);
2631 _orr(T_Hi, Src0HiR, Src1HiRF);
2632 _mov(DestHi, T_Hi);
2633 return;
2634 }
2635 case InstArithmetic::Xor: {
2636 Variable *Src0LoR = SrcsLo.src0R(this);
2637 Operand *Src1LoRF = SrcsLo.src1RF(this);
2638 Variable *Src0HiR = SrcsHi.src0R(this);
2639 Operand *Src1HiRF = SrcsHi.src1RF(this);
2640 _eor(T_Lo, Src0LoR, Src1LoRF);
2641 _mov(DestLo, T_Lo);
2642 _eor(T_Hi, Src0HiR, Src1HiRF);
2643 _mov(DestHi, T_Hi);
2644 return;
2645 }
2646 case InstArithmetic::Sub: {
2647 Variable *Src0LoR = SrcsLo.src0R(this);
2648 Operand *Src1LoRF = SrcsLo.src1RF(this);
2649 Variable *Src0HiR = SrcsHi.src0R(this);
2650 Operand *Src1HiRF = SrcsHi.src1RF(this);
2651 if (SrcsLo.swappedOperands()) {
2652 _rsbs(T_Lo, Src0LoR, Src1LoRF);
2653 _mov(DestLo, T_Lo);
2654 _rsc(T_Hi, Src0HiR, Src1HiRF);
2655 _mov(DestHi, T_Hi);
2656 } else {
2657 _subs(T_Lo, Src0LoR, Src1LoRF);
2658 _mov(DestLo, T_Lo);
2659 _sbc(T_Hi, Src0HiR, Src1HiRF);
2660 _mov(DestHi, T_Hi);
2661 }
2662 return;
2663 }
2664 case InstArithmetic::Mul: {
2665 // GCC 4.8 does:
2666 // a=b*c ==>
2667 // t_acc =(mul) (b.lo * c.hi)
2668 // t_acc =(mla) (c.lo * b.hi) + t_acc
2669 // t.hi,t.lo =(umull) b.lo * c.lo
2670 // t.hi += t_acc
2671 // a.lo = t.lo
2672 // a.hi = t.hi
2673 //
2674 // LLVM does:
2675 // t.hi,t.lo =(umull) b.lo * c.lo
2676 // t.hi =(mla) (b.lo * c.hi) + t.hi
2677 // t.hi =(mla) (b.hi * c.lo) + t.hi
2678 // a.lo = t.lo
2679 // a.hi = t.hi
2680 //
2681 // LLVM's lowering has fewer instructions, but more register pressure:
2682 // t.lo is live from beginning to end, while GCC delays the two-dest
2683 // instruction till the end, and kills c.hi immediately.
2684 Variable *T_Acc = makeReg(IceType_i32);
2685 Variable *T_Acc1 = makeReg(IceType_i32);
2686 Variable *T_Hi1 = makeReg(IceType_i32);
2687 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2688 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2689 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2690 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2691 _mul(T_Acc, Src0RLo, Src1RHi);
2692 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2693 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2694 _add(T_Hi, T_Hi1, T_Acc1);
2695 _mov(DestLo, T_Lo);
2696 _mov(DestHi, T_Hi);
2697 return;
2698 }
2699 case InstArithmetic::Shl: {
2700 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2701 Variable *Src0RLo = SrcsLo.src0R(this);
2702 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2703 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2704 if (ShAmtImm == 0) {
2705 _mov(DestLo, Src0RLo);
2706 _mov(DestHi, SrcsHi.src0R(this));
2707 return;
2708 }
2709
2710 if (ShAmtImm >= 32) {
2711 if (ShAmtImm == 32) {
2712 _mov(DestHi, Src0RLo);
2713 } else {
2714 Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2715 _lsl(T_Hi, Src0RLo, ShAmtOp);
2716 _mov(DestHi, T_Hi);
2717 }
2718
2719 Operand *_0 =
2720 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2721 _mov(T_Lo, _0);
2722 _mov(DestLo, T_Lo);
2723 return;
2724 }
2725
2726 Variable *Src0RHi = SrcsHi.src0R(this);
2727 Operand *ShAmtOp = shAmtImm(ShAmtImm);
2728 Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2729 _lsl(T_Hi, Src0RHi, ShAmtOp);
2730 _orr(T_Hi, T_Hi,
2731 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2732 OperandARM32::LSR, ComplShAmtOp));
2733 _mov(DestHi, T_Hi);
2734
2735 _lsl(T_Lo, Src0RLo, ShAmtOp);
2736 _mov(DestLo, T_Lo);
2737 return;
2738 }
2739
2740 // a=b<<c ==>
2741 // pnacl-llc does:
2742 // mov t_b.lo, b.lo
2743 // mov t_b.hi, b.hi
2744 // mov t_c.lo, c.lo
2745 // rsb T0, t_c.lo, #32
2746 // lsr T1, t_b.lo, T0
2747 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
2748 // sub T2, t_c.lo, #32
2749 // cmp T2, #0
2750 // lslge t_a.hi, t_b.lo, T2
2751 // lsl t_a.lo, t_b.lo, t_c.lo
2752 // mov a.lo, t_a.lo
2753 // mov a.hi, t_a.hi
2754 //
2755 // GCC 4.8 does:
2756 // sub t_c1, c.lo, #32
2757 // lsl t_hi, b.hi, c.lo
2758 // orr t_hi, t_hi, b.lo, lsl t_c1
2759 // rsb t_c2, c.lo, #32
2760 // orr t_hi, t_hi, b.lo, lsr t_c2
2761 // lsl t_lo, b.lo, c.lo
2762 // a.lo = t_lo
2763 // a.hi = t_hi
2764 //
2765 // These are incompatible, therefore we mimic pnacl-llc.
2766 // Can be strength-reduced for constant-shifts, but we don't do that for
2767 // now.
2768 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2769 // ARM, shifts only take the lower 8 bits of the shift register, and
2770 // saturate to the range 0-32, so the negative value will saturate to 32.
2771 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2772 Operand *_0 =
2773 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2774 Variable *T0 = makeReg(IceType_i32);
2775 Variable *T1 = makeReg(IceType_i32);
2776 Variable *T2 = makeReg(IceType_i32);
2777 Variable *TA_Hi = makeReg(IceType_i32);
2778 Variable *TA_Lo = makeReg(IceType_i32);
2779 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2780 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2781 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2782 _rsb(T0, Src1RLo, _32);
2783 _lsr(T1, Src0RLo, T0);
2784 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2785 OperandARM32::LSL, Src1RLo));
2786 _sub(T2, Src1RLo, _32);
2787 _cmp(T2, _0);
2788 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2789 _set_dest_redefined();
2790 _lsl(TA_Lo, Src0RLo, Src1RLo);
2791 _mov(DestLo, TA_Lo);
2792 _mov(DestHi, TA_Hi);
2793 return;
2794 }
2795 case InstArithmetic::Lshr:
2796 case InstArithmetic::Ashr: {
2797 const bool ASR = Op == InstArithmetic::Ashr;
2798 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2799 Variable *Src0RHi = SrcsHi.src0R(this);
2800 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2801 const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2802 if (ShAmt == 0) {
2803 _mov(DestHi, Src0RHi);
2804 _mov(DestLo, SrcsLo.src0R(this));
2805 return;
2806 }
2807
2808 if (ShAmt >= 32) {
2809 if (ShAmt == 32) {
2810 _mov(DestLo, Src0RHi);
2811 } else {
2812 Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2813 if (ASR) {
2814 _asr(T_Lo, Src0RHi, ShAmtImm);
2815 } else {
2816 _lsr(T_Lo, Src0RHi, ShAmtImm);
2817 }
2818 _mov(DestLo, T_Lo);
2819 }
2820
2821 if (ASR) {
2822 Operand *_31 = shAmtImm(31);
2823 _asr(T_Hi, Src0RHi, _31);
2824 } else {
2825 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2826 Legal_Reg | Legal_Flex);
2827 _mov(T_Hi, _0);
2828 }
2829 _mov(DestHi, T_Hi);
2830 return;
2831 }
2832
2833 Variable *Src0RLo = SrcsLo.src0R(this);
2834 Operand *ShAmtImm = shAmtImm(ShAmt);
2835 Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2836 _lsr(T_Lo, Src0RLo, ShAmtImm);
2837 _orr(T_Lo, T_Lo,
2838 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2839 OperandARM32::LSL, ComplShAmtImm));
2840 _mov(DestLo, T_Lo);
2841
2842 if (ASR) {
2843 _asr(T_Hi, Src0RHi, ShAmtImm);
2844 } else {
2845 _lsr(T_Hi, Src0RHi, ShAmtImm);
2846 }
2847 _mov(DestHi, T_Hi);
2848 return;
2849 }
2850
2851 // a=b>>c
2852 // pnacl-llc does:
2853 // mov t_b.lo, b.lo
2854 // mov t_b.hi, b.hi
2855 // mov t_c.lo, c.lo
2856 // lsr T0, t_b.lo, t_c.lo
2857 // rsb T1, t_c.lo, #32
2858 // orr t_a.lo, T0, t_b.hi, lsl T1
2859 // sub T2, t_c.lo, #32
2860 // cmp T2, #0
2861 // [al]srge t_a.lo, t_b.hi, T2
2862 // [al]sr t_a.hi, t_b.hi, t_c.lo
2863 // mov a.lo, t_a.lo
2864 // mov a.hi, t_a.hi
2865 //
2866 // GCC 4.8 does (lsr):
2867 // rsb t_c1, c.lo, #32
2868 // lsr t_lo, b.lo, c.lo
2869 // orr t_lo, t_lo, b.hi, lsl t_c1
2870 // sub t_c2, c.lo, #32
2871 // orr t_lo, t_lo, b.hi, lsr t_c2
2872 // lsr t_hi, b.hi, c.lo
2873 // mov a.lo, t_lo
2874 // mov a.hi, t_hi
2875 //
2876 // These are incompatible, therefore we mimic pnacl-llc.
2877 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2878 Operand *_0 =
2879 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2880 Variable *T0 = makeReg(IceType_i32);
2881 Variable *T1 = makeReg(IceType_i32);
2882 Variable *T2 = makeReg(IceType_i32);
2883 Variable *TA_Lo = makeReg(IceType_i32);
2884 Variable *TA_Hi = makeReg(IceType_i32);
2885 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2886 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2887 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2888 _lsr(T0, Src0RLo, Src1RLo);
2889 _rsb(T1, Src1RLo, _32);
2890 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2891 OperandARM32::LSL, T1));
2892 _sub(T2, Src1RLo, _32);
2893 _cmp(T2, _0);
2894 if (ASR) {
2895 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2896 _set_dest_redefined();
2897 _asr(TA_Hi, Src0RHi, Src1RLo);
2898 } else {
2899 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2900 _set_dest_redefined();
2901 _lsr(TA_Hi, Src0RHi, Src1RLo);
2902 }
2903 _mov(DestLo, TA_Lo);
2904 _mov(DestHi, TA_Hi);
2905 return;
2906 }
2907 case InstArithmetic::Fadd:
2908 case InstArithmetic::Fsub:
2909 case InstArithmetic::Fmul:
2910 case InstArithmetic::Fdiv:
2911 case InstArithmetic::Frem:
2912 llvm::report_fatal_error("FP instruction with i64 type");
2913 return;
2914 case InstArithmetic::Udiv:
2915 case InstArithmetic::Sdiv:
2916 case InstArithmetic::Urem:
2917 case InstArithmetic::Srem:
2918 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2919 "should have already been handled before");
2920 return;
2921 }
2922 }
2923
2924 namespace {
2925 // StrengthReduction is a namespace with the strength reduction machinery. The
2926 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2927 // if the optimization can be performed, and false otherwise.
2928 //
2929 // If the optimization can be performed, tryToOptimize sets its NumOperations
2930 // parameter to the number of shifts that are needed to perform the
2931 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2932 // tuples that describe how to materialize the multiplication.
2933 //
2934 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2935 // or two shifts to materialize it. A sequence of 1s, e.g.,
2936 //
2937 // M N
2938 // ...00000000000011111...111110000000...
2939 //
2940 // is materializable with (1 << (M + 1)) - (1 << N):
2941 //
2942 // ...00000000000100000...000000000000... [1 << (M + 1)]
2943 // ...00000000000000000...000010000000... (-) [1 << N]
2944 // --------------------------------------
2945 // ...00000000000011111...111110000000...
2946 //
2947 // And a single bit set, which is just a left shift.
2948 namespace StrengthReduction {
2949 enum AggregationOperation {
2950 AO_Invalid,
2951 AO_Add,
2952 AO_Sub,
2953 };
2954
2955 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2956 class AggregationElement {
2957 AggregationElement(const AggregationElement &) = delete;
2958
2959 public:
2960 AggregationElement() = default;
2961 AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2962 AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2963 : Op(Op), ShAmt(ShAmt) {}
2964
createShiftedOperand(Cfg * Func,Variable * OpR) const2965 Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2966 assert(OpR->mustHaveReg());
2967 if (ShAmt == 0) {
2968 return OpR;
2969 }
2970 return OperandARM32FlexReg::create(
2971 Func, IceType_i32, OpR, OperandARM32::LSL,
2972 OperandARM32ShAmtImm::create(
2973 Func, llvm::cast<ConstantInteger32>(
2974 Func->getContext()->getConstantInt32(ShAmt))));
2975 }
2976
aggregateWithAdd() const2977 bool aggregateWithAdd() const {
2978 switch (Op) {
2979 case AO_Invalid:
2980 llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2981 case AO_Add:
2982 return true;
2983 case AO_Sub:
2984 return false;
2985 }
2986 llvm_unreachable("(silence g++ warning)");
2987 }
2988
shAmt() const2989 uint32_t shAmt() const { return ShAmt; }
2990
2991 private:
2992 AggregationOperation Op = AO_Invalid;
2993 uint32_t ShAmt;
2994 };
2995
2996 // [RangeStart, RangeEnd] is a range of 1s in Src.
2997 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)2998 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
2999 std::array<AggregationElement, N> *Operations) {
3000 assert(*NumOperations < N);
3001 if (RangeStart == RangeEnd) {
3002 // Single bit set:
3003 // Src : 0...00010...
3004 // RangeStart : ^
3005 // RangeEnd : ^
3006 // NegSrc : 0...00001...
3007 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
3008 ++(*NumOperations);
3009 return true;
3010 }
3011
3012 // Sequence of 1s: (two operations required.)
3013 // Src : 0...00011...110...
3014 // RangeStart : ^
3015 // RangeEnd : ^
3016 // NegSrc : 0...00000...001...
3017 if (*NumOperations + 1 >= N) {
3018 return false;
3019 }
3020 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
3021 ++(*NumOperations);
3022 (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
3023 ++(*NumOperations);
3024 return true;
3025 }
3026
3027 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
3028 // 1 surrounded by zeroes.
3029 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3030 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
3031 std::array<AggregationElement, N> *Operations) {
3032 constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
3033 uint32_t NegSrc = ~Src;
3034
3035 *NumOperations = 0;
3036 while (Src != 0 && *NumOperations < N) {
3037 // Each step of the algorithm:
3038 // * finds L, the last bit set in Src;
3039 // * clears all the upper bits in NegSrc up to bit L;
3040 // * finds nL, the last bit set in NegSrc;
3041 // * clears all the upper bits in Src up to bit nL;
3042 //
3043 // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
3044 // of 1s starting at L, and ending at nL + 1, was found.
3045 const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
3046 const uint32_t NegSrcClearMask =
3047 (SrcLastBitSet == 0) ? 0
3048 : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
3049 NegSrc &= NegSrcClearMask;
3050 if (NegSrc == 0) {
3051 if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
3052 return true;
3053 }
3054 return false;
3055 }
3056 const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
3057 assert(NegSrcLastBitSet < SrcLastBitSet);
3058 const uint32_t SrcClearMask =
3059 (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >>
3060 (SrcSizeBits - NegSrcLastBitSet);
3061 Src &= SrcClearMask;
3062 if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
3063 Operations)) {
3064 return false;
3065 }
3066 }
3067
3068 return Src == 0;
3069 }
3070 } // end of namespace StrengthReduction
3071 } // end of anonymous namespace
3072
lowerArithmetic(const InstArithmetic * Instr)3073 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
3074 Variable *Dest = Instr->getDest();
3075
3076 if (Dest->isRematerializable()) {
3077 Context.insert<InstFakeDef>(Dest);
3078 return;
3079 }
3080
3081 Type DestTy = Dest->getType();
3082 if (DestTy == IceType_i1) {
3083 lowerInt1Arithmetic(Instr);
3084 return;
3085 }
3086
3087 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3088 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
3089 if (DestTy == IceType_i64) {
3090 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
3091 return;
3092 }
3093
3094 if (isVectorType(DestTy)) {
3095 switch (Instr->getOp()) {
3096 default:
3097 UnimplementedLoweringError(this, Instr);
3098 return;
3099 // Explicitly whitelist vector instructions we have implemented/enabled.
3100 case InstArithmetic::Add:
3101 case InstArithmetic::And:
3102 case InstArithmetic::Ashr:
3103 case InstArithmetic::Fadd:
3104 case InstArithmetic::Fmul:
3105 case InstArithmetic::Fsub:
3106 case InstArithmetic::Lshr:
3107 case InstArithmetic::Mul:
3108 case InstArithmetic::Or:
3109 case InstArithmetic::Shl:
3110 case InstArithmetic::Sub:
3111 case InstArithmetic::Xor:
3112 break;
3113 }
3114 }
3115
3116 Variable *T = makeReg(DestTy);
3117
3118 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
3119 // whether or not Src1 is a non-zero constant. Once legalized it is more
3120 // difficult to determine (constant may be moved to a register).
3121 // * Handle floating point arithmetic separately: they require Src1 to be
3122 // legalized to a register.
3123 switch (Instr->getOp()) {
3124 default:
3125 break;
3126 case InstArithmetic::Udiv: {
3127 constexpr bool NotRemainder = false;
3128 Variable *Src0R = legalizeToReg(Src0);
3129 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3130 NotRemainder);
3131 return;
3132 }
3133 case InstArithmetic::Sdiv: {
3134 constexpr bool NotRemainder = false;
3135 Variable *Src0R = legalizeToReg(Src0);
3136 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3137 NotRemainder);
3138 return;
3139 }
3140 case InstArithmetic::Urem: {
3141 constexpr bool IsRemainder = true;
3142 Variable *Src0R = legalizeToReg(Src0);
3143 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3144 IsRemainder);
3145 return;
3146 }
3147 case InstArithmetic::Srem: {
3148 constexpr bool IsRemainder = true;
3149 Variable *Src0R = legalizeToReg(Src0);
3150 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3151 IsRemainder);
3152 return;
3153 }
3154 case InstArithmetic::Frem: {
3155 if (!isScalarFloatingType(DestTy)) {
3156 llvm::report_fatal_error("Unexpected type when lowering frem.");
3157 }
3158 llvm::report_fatal_error("Frem should have already been lowered.");
3159 }
3160 case InstArithmetic::Fadd: {
3161 Variable *Src0R = legalizeToReg(Src0);
3162 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3163 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3164 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3165 _vmla(Src0R, Src1R, Src2R);
3166 _mov(Dest, Src0R);
3167 return;
3168 }
3169
3170 Variable *Src1R = legalizeToReg(Src1);
3171 _vadd(T, Src0R, Src1R);
3172 _mov(Dest, T);
3173 return;
3174 }
3175 case InstArithmetic::Fsub: {
3176 Variable *Src0R = legalizeToReg(Src0);
3177 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3178 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3179 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3180 _vmls(Src0R, Src1R, Src2R);
3181 _mov(Dest, Src0R);
3182 return;
3183 }
3184 Variable *Src1R = legalizeToReg(Src1);
3185 _vsub(T, Src0R, Src1R);
3186 _mov(Dest, T);
3187 return;
3188 }
3189 case InstArithmetic::Fmul: {
3190 Variable *Src0R = legalizeToReg(Src0);
3191 Variable *Src1R = legalizeToReg(Src1);
3192 _vmul(T, Src0R, Src1R);
3193 _mov(Dest, T);
3194 return;
3195 }
3196 case InstArithmetic::Fdiv: {
3197 Variable *Src0R = legalizeToReg(Src0);
3198 Variable *Src1R = legalizeToReg(Src1);
3199 _vdiv(T, Src0R, Src1R);
3200 _mov(Dest, T);
3201 return;
3202 }
3203 }
3204
3205 // Handle everything else here.
3206 Int32Operands Srcs(Src0, Src1);
3207 switch (Instr->getOp()) {
3208 case InstArithmetic::_num:
3209 llvm::report_fatal_error("Unknown arithmetic operator");
3210 return;
3211 case InstArithmetic::Add: {
3212 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3213 assert(!isVectorType(DestTy));
3214 Variable *Src0R = legalizeToReg(Src0);
3215 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3216 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3217 _mla(T, Src1R, Src2R, Src0R);
3218 _mov(Dest, T);
3219 return;
3220 }
3221
3222 if (Srcs.hasConstOperand()) {
3223 if (!Srcs.immediateIsFlexEncodable() &&
3224 Srcs.negatedImmediateIsFlexEncodable()) {
3225 assert(!isVectorType(DestTy));
3226 Variable *Src0R = Srcs.src0R(this);
3227 Operand *Src1F = Srcs.negatedSrc1F(this);
3228 if (!Srcs.swappedOperands()) {
3229 _sub(T, Src0R, Src1F);
3230 } else {
3231 _rsb(T, Src0R, Src1F);
3232 }
3233 _mov(Dest, T);
3234 return;
3235 }
3236 }
3237 Variable *Src0R = Srcs.src0R(this);
3238 if (isVectorType(DestTy)) {
3239 Variable *Src1R = legalizeToReg(Src1);
3240 _vadd(T, Src0R, Src1R);
3241 } else {
3242 Operand *Src1RF = Srcs.src1RF(this);
3243 _add(T, Src0R, Src1RF);
3244 }
3245 _mov(Dest, T);
3246 return;
3247 }
3248 case InstArithmetic::And: {
3249 if (Srcs.hasConstOperand()) {
3250 if (!Srcs.immediateIsFlexEncodable() &&
3251 Srcs.invertedImmediateIsFlexEncodable()) {
3252 Variable *Src0R = Srcs.src0R(this);
3253 Operand *Src1F = Srcs.invertedSrc1F(this);
3254 _bic(T, Src0R, Src1F);
3255 _mov(Dest, T);
3256 return;
3257 }
3258 }
3259 assert(isIntegerType(DestTy));
3260 Variable *Src0R = Srcs.src0R(this);
3261 if (isVectorType(DestTy)) {
3262 Variable *Src1R = legalizeToReg(Src1);
3263 _vand(T, Src0R, Src1R);
3264 } else {
3265 Operand *Src1RF = Srcs.src1RF(this);
3266 _and(T, Src0R, Src1RF);
3267 }
3268 _mov(Dest, T);
3269 return;
3270 }
3271 case InstArithmetic::Or: {
3272 Variable *Src0R = Srcs.src0R(this);
3273 assert(isIntegerType(DestTy));
3274 if (isVectorType(DestTy)) {
3275 Variable *Src1R = legalizeToReg(Src1);
3276 _vorr(T, Src0R, Src1R);
3277 } else {
3278 Operand *Src1RF = Srcs.src1RF(this);
3279 _orr(T, Src0R, Src1RF);
3280 }
3281 _mov(Dest, T);
3282 return;
3283 }
3284 case InstArithmetic::Xor: {
3285 Variable *Src0R = Srcs.src0R(this);
3286 assert(isIntegerType(DestTy));
3287 if (isVectorType(DestTy)) {
3288 Variable *Src1R = legalizeToReg(Src1);
3289 _veor(T, Src0R, Src1R);
3290 } else {
3291 Operand *Src1RF = Srcs.src1RF(this);
3292 _eor(T, Src0R, Src1RF);
3293 }
3294 _mov(Dest, T);
3295 return;
3296 }
3297 case InstArithmetic::Sub: {
3298 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3299 assert(!isVectorType(DestTy));
3300 Variable *Src0R = legalizeToReg(Src0);
3301 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3302 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3303 _mls(T, Src1R, Src2R, Src0R);
3304 _mov(Dest, T);
3305 return;
3306 }
3307
3308 if (Srcs.hasConstOperand()) {
3309 assert(!isVectorType(DestTy));
3310 if (Srcs.immediateIsFlexEncodable()) {
3311 Variable *Src0R = Srcs.src0R(this);
3312 Operand *Src1RF = Srcs.src1RF(this);
3313 if (Srcs.swappedOperands()) {
3314 _rsb(T, Src0R, Src1RF);
3315 } else {
3316 _sub(T, Src0R, Src1RF);
3317 }
3318 _mov(Dest, T);
3319 return;
3320 }
3321 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3322 Variable *Src0R = Srcs.src0R(this);
3323 Operand *Src1F = Srcs.negatedSrc1F(this);
3324 _add(T, Src0R, Src1F);
3325 _mov(Dest, T);
3326 return;
3327 }
3328 }
3329 Variable *Src0R = Srcs.unswappedSrc0R(this);
3330 Variable *Src1R = Srcs.unswappedSrc1R(this);
3331 if (isVectorType(DestTy)) {
3332 _vsub(T, Src0R, Src1R);
3333 } else {
3334 _sub(T, Src0R, Src1R);
3335 }
3336 _mov(Dest, T);
3337 return;
3338 }
3339 case InstArithmetic::Mul: {
3340 const bool OptM1 = Func->getOptLevel() == Opt_m1;
3341 if (!OptM1 && Srcs.hasConstOperand()) {
3342 constexpr std::size_t MaxShifts = 4;
3343 std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3344 SizeT NumOperations;
3345 int32_t Const = Srcs.getConstantValue();
3346 const bool Invert = Const < 0;
3347 const bool MultiplyByZero = Const == 0;
3348 Operand *_0 =
3349 legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3350
3351 if (MultiplyByZero) {
3352 _mov(T, _0);
3353 _mov(Dest, T);
3354 return;
3355 }
3356
3357 if (Invert) {
3358 Const = -Const;
3359 }
3360
3361 if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3362 assert(NumOperations >= 1);
3363 Variable *Src0R = Srcs.src0R(this);
3364 int32_t Start;
3365 int32_t End;
3366 if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3367 // Multiplication by a power of 2 (NumOperations == 1); or
3368 // Multiplication by a even number not a power of 2.
3369 Start = 1;
3370 End = NumOperations;
3371 assert(Shifts[0].aggregateWithAdd());
3372 _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3373 } else {
3374 // Multiplication by an odd number. Put the free barrel shifter to a
3375 // good use.
3376 Start = 0;
3377 End = NumOperations - 2;
3378 const StrengthReduction::AggregationElement &Last =
3379 Shifts[NumOperations - 1];
3380 const StrengthReduction::AggregationElement &SecondToLast =
3381 Shifts[NumOperations - 2];
3382 if (!Last.aggregateWithAdd()) {
3383 assert(SecondToLast.aggregateWithAdd());
3384 _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3385 } else if (!SecondToLast.aggregateWithAdd()) {
3386 assert(Last.aggregateWithAdd());
3387 _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3388 } else {
3389 _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3390 }
3391 }
3392
3393 // Odd numbers : S E I I
3394 // +---+---+---+---+---+---+ ... +---+---+---+---+
3395 // Shifts = | | | | | | | ... | | | | |
3396 // +---+---+---+---+---+---+ ... +---+---+---+---+
3397 // Even numbers: I S E
3398 //
3399 // S: Start; E: End; I: Init
3400 for (int32_t I = Start; I < End; ++I) {
3401 const StrengthReduction::AggregationElement &Current = Shifts[I];
3402 Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3403 if (Current.aggregateWithAdd()) {
3404 _add(T, T, SrcF);
3405 } else {
3406 _sub(T, T, SrcF);
3407 }
3408 }
3409
3410 if (Invert) {
3411 // T = 0 - T.
3412 _rsb(T, T, _0);
3413 }
3414
3415 _mov(Dest, T);
3416 return;
3417 }
3418 }
3419 Variable *Src0R = Srcs.unswappedSrc0R(this);
3420 Variable *Src1R = Srcs.unswappedSrc1R(this);
3421 if (isVectorType(DestTy)) {
3422 _vmul(T, Src0R, Src1R);
3423 } else {
3424 _mul(T, Src0R, Src1R);
3425 }
3426 _mov(Dest, T);
3427 return;
3428 }
3429 case InstArithmetic::Shl: {
3430 Variable *Src0R = Srcs.unswappedSrc0R(this);
3431 if (!isVectorType(T->getType())) {
3432 if (Srcs.isSrc1ImmediateZero()) {
3433 _mov(T, Src0R);
3434 } else {
3435 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3436 _lsl(T, Src0R, Src1R);
3437 }
3438 } else {
3439 auto *Src1R = Srcs.unswappedSrc1R(this);
3440 _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3441 }
3442 _mov(Dest, T);
3443 return;
3444 }
3445 case InstArithmetic::Lshr: {
3446 Variable *Src0R = Srcs.unswappedSrc0R(this);
3447 if (!isVectorType(T->getType())) {
3448 if (DestTy != IceType_i32) {
3449 _uxt(Src0R, Src0R);
3450 }
3451 if (Srcs.isSrc1ImmediateZero()) {
3452 _mov(T, Src0R);
3453 } else {
3454 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3455 _lsr(T, Src0R, Src1R);
3456 }
3457 } else {
3458 auto *Src1R = Srcs.unswappedSrc1R(this);
3459 auto *Src1RNeg = makeReg(Src1R->getType());
3460 _vneg(Src1RNeg, Src1R);
3461 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3462 }
3463 _mov(Dest, T);
3464 return;
3465 }
3466 case InstArithmetic::Ashr: {
3467 Variable *Src0R = Srcs.unswappedSrc0R(this);
3468 if (!isVectorType(T->getType())) {
3469 if (DestTy != IceType_i32) {
3470 _sxt(Src0R, Src0R);
3471 }
3472 if (Srcs.isSrc1ImmediateZero()) {
3473 _mov(T, Src0R);
3474 } else {
3475 _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3476 }
3477 } else {
3478 auto *Src1R = Srcs.unswappedSrc1R(this);
3479 auto *Src1RNeg = makeReg(Src1R->getType());
3480 _vneg(Src1RNeg, Src1R);
3481 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3482 }
3483 _mov(Dest, T);
3484 return;
3485 }
3486 case InstArithmetic::Udiv:
3487 case InstArithmetic::Sdiv:
3488 case InstArithmetic::Urem:
3489 case InstArithmetic::Srem:
3490 llvm::report_fatal_error(
3491 "Integer div/rem should have been handled earlier.");
3492 return;
3493 case InstArithmetic::Fadd:
3494 case InstArithmetic::Fsub:
3495 case InstArithmetic::Fmul:
3496 case InstArithmetic::Fdiv:
3497 case InstArithmetic::Frem:
3498 llvm::report_fatal_error(
3499 "Floating point arith should have been handled earlier.");
3500 return;
3501 }
3502 }
3503
lowerAssign(const InstAssign * Instr)3504 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3505 Variable *Dest = Instr->getDest();
3506
3507 if (Dest->isRematerializable()) {
3508 Context.insert<InstFakeDef>(Dest);
3509 return;
3510 }
3511
3512 Operand *Src0 = Instr->getSrc(0);
3513 assert(Dest->getType() == Src0->getType());
3514 if (Dest->getType() == IceType_i64) {
3515 Src0 = legalizeUndef(Src0);
3516
3517 Variable *T_Lo = makeReg(IceType_i32);
3518 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3519 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3520 _mov(T_Lo, Src0Lo);
3521 _mov(DestLo, T_Lo);
3522
3523 Variable *T_Hi = makeReg(IceType_i32);
3524 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3525 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3526 _mov(T_Hi, Src0Hi);
3527 _mov(DestHi, T_Hi);
3528
3529 return;
3530 }
3531
3532 Operand *NewSrc;
3533 if (Dest->hasReg()) {
3534 // If Dest already has a physical register, then legalize the Src operand
3535 // into a Variable with the same register assignment. This especially
3536 // helps allow the use of Flex operands.
3537 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3538 } else {
3539 // Dest could be a stack operand. Since we could potentially need to do a
3540 // Store (and store can only have Register operands), legalize this to a
3541 // register.
3542 NewSrc = legalize(Src0, Legal_Reg);
3543 }
3544
3545 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3546 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3547 }
3548 _mov(Dest, NewSrc);
3549 }
3550
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3551 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3552 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3553 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3554 InstARM32Label *NewShortCircuitLabel = nullptr;
3555 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3556
3557 const Inst *Producer = Computations.getProducerOf(Boolean);
3558
3559 if (Producer == nullptr) {
3560 // No producer, no problem: just do emit code to perform (Boolean & 1) and
3561 // set the flags register. The branch should be taken if the resulting flags
3562 // indicate a non-zero result.
3563 _tst(legalizeToReg(Boolean), _1);
3564 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3565 }
3566
3567 switch (Producer->getKind()) {
3568 default:
3569 llvm::report_fatal_error("Unexpected producer.");
3570 case Inst::Icmp: {
3571 return ShortCircuitCondAndLabel(
3572 lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3573 } break;
3574 case Inst::Fcmp: {
3575 return ShortCircuitCondAndLabel(
3576 lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3577 } break;
3578 case Inst::Cast: {
3579 const auto *CastProducer = llvm::cast<InstCast>(Producer);
3580 assert(CastProducer->getCastKind() == InstCast::Trunc);
3581 Operand *Src = CastProducer->getSrc(0);
3582 if (Src->getType() == IceType_i64)
3583 Src = loOperand(Src);
3584 _tst(legalizeToReg(Src), _1);
3585 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3586 } break;
3587 case Inst::Arithmetic: {
3588 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3589 switch (ArithProducer->getOp()) {
3590 default:
3591 llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3592 case InstArithmetic::And: {
3593 if (!(ShortCircuitable & SC_And)) {
3594 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3595 }
3596
3597 LowerInt1BranchTarget NewTarget =
3598 TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3599
3600 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3601 Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3602 const CondWhenTrue &Cond = CondAndLabel.Cond;
3603
3604 _br_short_circuit(NewTarget, Cond.invert());
3605
3606 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3607 if (ShortCircuitLabel != nullptr)
3608 Context.insert(ShortCircuitLabel);
3609
3610 return ShortCircuitCondAndLabel(
3611 lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3612 .assertNoLabelAndReturnCond(),
3613 NewShortCircuitLabel);
3614 } break;
3615 case InstArithmetic::Or: {
3616 if (!(ShortCircuitable & SC_Or)) {
3617 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3618 }
3619
3620 LowerInt1BranchTarget NewTarget =
3621 TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3622
3623 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3624 Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3625 const CondWhenTrue &Cond = CondAndLabel.Cond;
3626
3627 _br_short_circuit(NewTarget, Cond);
3628
3629 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3630 if (ShortCircuitLabel != nullptr)
3631 Context.insert(ShortCircuitLabel);
3632
3633 return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3634 NewTarget, TargetFalse,
3635 SC_All)
3636 .assertNoLabelAndReturnCond(),
3637 NewShortCircuitLabel);
3638 } break;
3639 }
3640 }
3641 }
3642 }
3643
lowerBr(const InstBr * Instr)3644 void TargetARM32::lowerBr(const InstBr *Instr) {
3645 if (Instr->isUnconditional()) {
3646 _br(Instr->getTargetUnconditional());
3647 return;
3648 }
3649
3650 CfgNode *TargetTrue = Instr->getTargetTrue();
3651 CfgNode *TargetFalse = Instr->getTargetFalse();
3652 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3653 Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3654 LowerInt1BranchTarget(TargetFalse), SC_All);
3655 assert(CondAndLabel.ShortCircuitTarget == nullptr);
3656
3657 const CondWhenTrue &Cond = CondAndLabel.Cond;
3658 if (Cond.WhenTrue1 != CondARM32::kNone) {
3659 assert(Cond.WhenTrue0 != CondARM32::AL);
3660 _br(TargetTrue, Cond.WhenTrue1);
3661 }
3662
3663 switch (Cond.WhenTrue0) {
3664 default:
3665 _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3666 break;
3667 case CondARM32::kNone:
3668 _br(TargetFalse);
3669 break;
3670 case CondARM32::AL:
3671 _br(TargetTrue);
3672 break;
3673 }
3674 }
3675
lowerCall(const InstCall * Instr)3676 void TargetARM32::lowerCall(const InstCall *Instr) {
3677 Operand *CallTarget = Instr->getCallTarget();
3678 if (Instr->isTargetHelperCall()) {
3679 auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3680 if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3681 (this->*TargetHelperPreamble->second)(Instr);
3682 }
3683 }
3684 MaybeLeafFunc = false;
3685 NeedsStackAlignment = true;
3686
3687 // Assign arguments to registers and stack. Also reserve stack.
3688 TargetARM32::CallingConv CC;
3689 // Pair of Arg Operand -> GPR number assignments.
3690 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3691 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3692 // Pair of Arg Operand -> stack offset.
3693 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3694 size_t ParameterAreaSizeBytes = 0;
3695
3696 // Classify each argument operand according to the location where the
3697 // argument is passed.
3698 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3699 Operand *Arg = legalizeUndef(Instr->getArg(i));
3700 const Type Ty = Arg->getType();
3701 bool InReg = false;
3702 RegNumT Reg;
3703 if (isScalarIntegerType(Ty)) {
3704 InReg = CC.argInGPR(Ty, &Reg);
3705 } else {
3706 InReg = CC.argInVFP(Ty, &Reg);
3707 }
3708
3709 if (!InReg) {
3710 ParameterAreaSizeBytes =
3711 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3712 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3713 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3714 continue;
3715 }
3716
3717 if (Ty == IceType_i64) {
3718 Operand *Lo = loOperand(Arg);
3719 Operand *Hi = hiOperand(Arg);
3720 GPRArgs.push_back(std::make_pair(
3721 Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3722 GPRArgs.push_back(std::make_pair(
3723 Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3724 } else if (isScalarIntegerType(Ty)) {
3725 GPRArgs.push_back(std::make_pair(Arg, Reg));
3726 } else {
3727 FPArgs.push_back(std::make_pair(Arg, Reg));
3728 }
3729 }
3730
3731 // Adjust the parameter area so that the stack is aligned. It is assumed that
3732 // the stack is already aligned at the start of the calling sequence.
3733 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3734
3735 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3736 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3737 }
3738
3739 // Copy arguments that are passed on the stack to the appropriate stack
3740 // locations.
3741 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3742 for (auto &StackArg : StackArgs) {
3743 ConstantInteger32 *Loc =
3744 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3745 Type Ty = StackArg.first->getType();
3746 OperandARM32Mem *Addr;
3747 constexpr bool SignExt = false;
3748 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3749 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3750 } else {
3751 Variable *NewBase = Func->makeVariable(SP->getType());
3752 lowerArithmetic(
3753 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3754 Addr = formMemoryOperand(NewBase, Ty);
3755 }
3756 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3757 }
3758
3759 // Generate the call instruction. Assign its result to a temporary with high
3760 // register allocation weight.
3761 Variable *Dest = Instr->getDest();
3762 // ReturnReg doubles as ReturnRegLo as necessary.
3763 Variable *ReturnReg = nullptr;
3764 Variable *ReturnRegHi = nullptr;
3765 if (Dest) {
3766 switch (Dest->getType()) {
3767 case IceType_NUM:
3768 llvm::report_fatal_error("Invalid Call dest type");
3769 break;
3770 case IceType_void:
3771 break;
3772 case IceType_i1:
3773 assert(Computations.getProducerOf(Dest) == nullptr);
3774 // Fall-through intended.
3775 case IceType_i8:
3776 case IceType_i16:
3777 case IceType_i32:
3778 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3779 break;
3780 case IceType_i64:
3781 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3782 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3783 break;
3784 case IceType_f32:
3785 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3786 break;
3787 case IceType_f64:
3788 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3789 break;
3790 case IceType_v4i1:
3791 case IceType_v8i1:
3792 case IceType_v16i1:
3793 case IceType_v16i8:
3794 case IceType_v8i16:
3795 case IceType_v4i32:
3796 case IceType_v4f32:
3797 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3798 break;
3799 }
3800 }
3801
3802 // Allow ConstantRelocatable to be left alone as a direct call, but force
3803 // other constants like ConstantInteger32 to be in a register and make it an
3804 // indirect call.
3805 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3806 CallTarget = legalize(CallTarget, Legal_Reg);
3807 }
3808
3809 // Copy arguments to be passed in registers to the appropriate registers.
3810 CfgVector<Variable *> RegArgs;
3811 for (auto &FPArg : FPArgs) {
3812 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3813 }
3814 for (auto &GPRArg : GPRArgs) {
3815 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3816 }
3817
3818 // Generate a FakeUse of register arguments so that they do not get dead code
3819 // eliminated as a result of the FakeKill of scratch registers after the call.
3820 // These fake-uses need to be placed here to avoid argument registers from
3821 // being used during the legalizeToReg() calls above.
3822 for (auto *RegArg : RegArgs) {
3823 Context.insert<InstFakeUse>(RegArg);
3824 }
3825
3826 InstARM32Call *NewCall =
3827 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
3828
3829 if (ReturnRegHi)
3830 Context.insert<InstFakeDef>(ReturnRegHi);
3831
3832 // Insert a register-kill pseudo instruction.
3833 Context.insert<InstFakeKill>(NewCall);
3834
3835 // Generate a FakeUse to keep the call live if necessary.
3836 if (Instr->hasSideEffects() && ReturnReg) {
3837 Context.insert<InstFakeUse>(ReturnReg);
3838 }
3839
3840 if (Dest != nullptr) {
3841 // Assign the result of the call to Dest.
3842 if (ReturnReg != nullptr) {
3843 if (ReturnRegHi) {
3844 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3845 Variable *DestLo = Dest64On32->getLo();
3846 Variable *DestHi = Dest64On32->getHi();
3847 _mov(DestLo, ReturnReg);
3848 _mov(DestHi, ReturnRegHi);
3849 } else {
3850 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3851 _mov(Dest, ReturnReg);
3852 } else {
3853 assert(isIntegerType(Dest->getType()) &&
3854 typeWidthInBytes(Dest->getType()) <= 4);
3855 _mov(Dest, ReturnReg);
3856 }
3857 }
3858 }
3859 }
3860
3861 if (Instr->isTargetHelperCall()) {
3862 auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3863 if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3864 (this->*TargetHelpersPostamble->second)(Instr);
3865 }
3866 }
3867 }
3868
3869 namespace {
configureBitcastTemporary(Variable64On32 * Var)3870 void configureBitcastTemporary(Variable64On32 *Var) {
3871 Var->setMustNotHaveReg();
3872 Var->getHi()->setMustHaveReg();
3873 Var->getLo()->setMustHaveReg();
3874 }
3875 } // end of anonymous namespace
3876
lowerCast(const InstCast * Instr)3877 void TargetARM32::lowerCast(const InstCast *Instr) {
3878 InstCast::OpKind CastKind = Instr->getCastKind();
3879 Variable *Dest = Instr->getDest();
3880 const Type DestTy = Dest->getType();
3881 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3882 switch (CastKind) {
3883 default:
3884 Func->setError("Cast type not supported");
3885 return;
3886 case InstCast::Sext: {
3887 if (isVectorType(DestTy)) {
3888 Variable *T0 = makeReg(DestTy);
3889 Variable *T1 = makeReg(DestTy);
3890 ConstantInteger32 *ShAmt = nullptr;
3891 switch (DestTy) {
3892 default:
3893 llvm::report_fatal_error("Unexpected type in vector sext.");
3894 case IceType_v16i8:
3895 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3896 break;
3897 case IceType_v8i16:
3898 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3899 break;
3900 case IceType_v4i32:
3901 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3902 break;
3903 }
3904 auto *Src0R = legalizeToReg(Src0);
3905 _vshl(T0, Src0R, ShAmt);
3906 _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3907 _mov(Dest, T1);
3908 } else if (DestTy == IceType_i64) {
3909 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3910 Constant *ShiftAmt = Ctx->getConstantInt32(31);
3911 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3912 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3913 Variable *T_Lo = makeReg(DestLo->getType());
3914 if (Src0->getType() == IceType_i32) {
3915 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3916 _mov(T_Lo, Src0RF);
3917 } else if (Src0->getType() != IceType_i1) {
3918 Variable *Src0R = legalizeToReg(Src0);
3919 _sxt(T_Lo, Src0R);
3920 } else {
3921 Operand *_0 = Ctx->getConstantZero(IceType_i32);
3922 Operand *_m1 = Ctx->getConstantInt32(-1);
3923 lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3924 }
3925 _mov(DestLo, T_Lo);
3926 Variable *T_Hi = makeReg(DestHi->getType());
3927 if (Src0->getType() != IceType_i1) {
3928 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3929 OperandARM32::ASR, ShiftAmt));
3930 } else {
3931 // For i1, the asr instruction is already done above.
3932 _mov(T_Hi, T_Lo);
3933 }
3934 _mov(DestHi, T_Hi);
3935 } else if (Src0->getType() != IceType_i1) {
3936 // t1 = sxt src; dst = t1
3937 Variable *Src0R = legalizeToReg(Src0);
3938 Variable *T = makeReg(DestTy);
3939 _sxt(T, Src0R);
3940 _mov(Dest, T);
3941 } else {
3942 Constant *_0 = Ctx->getConstantZero(IceType_i32);
3943 Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3944 Variable *T = makeReg(DestTy);
3945 lowerInt1ForSelect(T, Src0, _m1, _0);
3946 _mov(Dest, T);
3947 }
3948 break;
3949 }
3950 case InstCast::Zext: {
3951 if (isVectorType(DestTy)) {
3952 auto *Mask = makeReg(DestTy);
3953 auto *_1 = Ctx->getConstantInt32(1);
3954 auto *T = makeReg(DestTy);
3955 auto *Src0R = legalizeToReg(Src0);
3956 _mov(Mask, _1);
3957 _vand(T, Src0R, Mask);
3958 _mov(Dest, T);
3959 } else if (DestTy == IceType_i64) {
3960 // t1=uxtb src; dst.lo=t1; dst.hi=0
3961 Operand *_0 =
3962 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3963 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3964 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3965 Variable *T_Lo = makeReg(DestLo->getType());
3966
3967 switch (Src0->getType()) {
3968 default: {
3969 assert(Src0->getType() != IceType_i64);
3970 _uxt(T_Lo, legalizeToReg(Src0));
3971 } break;
3972 case IceType_i32: {
3973 _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3974 } break;
3975 case IceType_i1: {
3976 SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
3977 if (Safe == SBC_No) {
3978 Operand *_1 =
3979 legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3980 _and(T_Lo, T_Lo, _1);
3981 }
3982 } break;
3983 }
3984
3985 _mov(DestLo, T_Lo);
3986
3987 Variable *T_Hi = makeReg(DestLo->getType());
3988 _mov(T_Hi, _0);
3989 _mov(DestHi, T_Hi);
3990 } else if (Src0->getType() == IceType_i1) {
3991 Variable *T = makeReg(DestTy);
3992
3993 SafeBoolChain Safe = lowerInt1(T, Src0);
3994 if (Safe == SBC_No) {
3995 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3996 _and(T, T, _1);
3997 }
3998
3999 _mov(Dest, T);
4000 } else {
4001 // t1 = uxt src; dst = t1
4002 Variable *Src0R = legalizeToReg(Src0);
4003 Variable *T = makeReg(DestTy);
4004 _uxt(T, Src0R);
4005 _mov(Dest, T);
4006 }
4007 break;
4008 }
4009 case InstCast::Trunc: {
4010 if (isVectorType(DestTy)) {
4011 auto *T = makeReg(DestTy);
4012 auto *Src0R = legalizeToReg(Src0);
4013 _mov(T, Src0R);
4014 _mov(Dest, T);
4015 } else {
4016 if (Src0->getType() == IceType_i64)
4017 Src0 = loOperand(Src0);
4018 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
4019 // t1 = trunc Src0RF; Dest = t1
4020 Variable *T = makeReg(DestTy);
4021 _mov(T, Src0RF);
4022 if (DestTy == IceType_i1)
4023 _and(T, T, Ctx->getConstantInt1(1));
4024 _mov(Dest, T);
4025 }
4026 break;
4027 }
4028 case InstCast::Fptrunc:
4029 case InstCast::Fpext: {
4030 // fptrunc: dest.f32 = fptrunc src0.fp64
4031 // fpext: dest.f64 = fptrunc src0.fp32
4032 const bool IsTrunc = CastKind == InstCast::Fptrunc;
4033 assert(!isVectorType(DestTy));
4034 assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
4035 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
4036 Variable *Src0R = legalizeToReg(Src0);
4037 Variable *T = makeReg(DestTy);
4038 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
4039 _mov(Dest, T);
4040 break;
4041 }
4042 case InstCast::Fptosi:
4043 case InstCast::Fptoui: {
4044 const bool DestIsSigned = CastKind == InstCast::Fptosi;
4045 Variable *Src0R = legalizeToReg(Src0);
4046
4047 if (isVectorType(DestTy)) {
4048 assert(typeElementType(Src0->getType()) == IceType_f32);
4049 auto *T = makeReg(DestTy);
4050 _vcvt(T, Src0R,
4051 DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
4052 _mov(Dest, T);
4053 break;
4054 }
4055
4056 const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
4057 if (llvm::isa<Variable64On32>(Dest)) {
4058 llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
4059 }
4060 // fptosi:
4061 // t1.fp = vcvt src0.fp
4062 // t2.i32 = vmov t1.fp
4063 // dest.int = conv t2.i32 @ Truncates the result if needed.
4064 // fptoui:
4065 // t1.fp = vcvt src0.fp
4066 // t2.u32 = vmov t1.fp
4067 // dest.uint = conv t2.u32 @ Truncates the result if needed.
4068 Variable *T_fp = makeReg(IceType_f32);
4069 const InstARM32Vcvt::VcvtVariant Conversion =
4070 Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
4071 : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
4072 _vcvt(T_fp, Src0R, Conversion);
4073 Variable *T = makeReg(IceType_i32);
4074 _mov(T, T_fp);
4075 if (DestTy != IceType_i32) {
4076 Variable *T_1 = makeReg(DestTy);
4077 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
4078 T = T_1;
4079 }
4080 _mov(Dest, T);
4081 break;
4082 }
4083 case InstCast::Sitofp:
4084 case InstCast::Uitofp: {
4085 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
4086
4087 if (isVectorType(DestTy)) {
4088 assert(typeElementType(DestTy) == IceType_f32);
4089 auto *T = makeReg(DestTy);
4090 Variable *Src0R = legalizeToReg(Src0);
4091 _vcvt(T, Src0R,
4092 SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
4093 _mov(Dest, T);
4094 break;
4095 }
4096
4097 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
4098 if (Src0->getType() == IceType_i64) {
4099 llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
4100 }
4101 // sitofp:
4102 // t1.i32 = sext src.int @ sign-extends src0 if needed.
4103 // t2.fp32 = vmov t1.i32
4104 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
4105 // uitofp:
4106 // t1.i32 = zext src.int @ zero-extends src0 if needed.
4107 // t2.fp32 = vmov t1.i32
4108 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
4109 if (Src0->getType() != IceType_i32) {
4110 Variable *Src0R_32 = makeReg(IceType_i32);
4111 lowerCast(InstCast::create(Func, SourceIsSigned ? InstCast::Sext
4112 : InstCast::Zext,
4113 Src0R_32, Src0));
4114 Src0 = Src0R_32;
4115 }
4116 Variable *Src0R = legalizeToReg(Src0);
4117 Variable *Src0R_f32 = makeReg(IceType_f32);
4118 _mov(Src0R_f32, Src0R);
4119 Src0R = Src0R_f32;
4120 Variable *T = makeReg(DestTy);
4121 const InstARM32Vcvt::VcvtVariant Conversion =
4122 DestIsF32
4123 ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
4124 : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
4125 _vcvt(T, Src0R, Conversion);
4126 _mov(Dest, T);
4127 break;
4128 }
4129 case InstCast::Bitcast: {
4130 Operand *Src0 = Instr->getSrc(0);
4131 if (DestTy == Src0->getType()) {
4132 auto *Assign = InstAssign::create(Func, Dest, Src0);
4133 lowerAssign(Assign);
4134 return;
4135 }
4136 switch (DestTy) {
4137 case IceType_NUM:
4138 case IceType_void:
4139 llvm::report_fatal_error("Unexpected bitcast.");
4140 case IceType_i1:
4141 UnimplementedLoweringError(this, Instr);
4142 break;
4143 case IceType_i8:
4144 assert(Src0->getType() == IceType_v8i1);
4145 llvm::report_fatal_error(
4146 "i8 to v8i1 conversion should have been prelowered.");
4147 break;
4148 case IceType_i16:
4149 assert(Src0->getType() == IceType_v16i1);
4150 llvm::report_fatal_error(
4151 "i16 to v16i1 conversion should have been prelowered.");
4152 break;
4153 case IceType_i32:
4154 case IceType_f32: {
4155 Variable *Src0R = legalizeToReg(Src0);
4156 Variable *T = makeReg(DestTy);
4157 _mov(T, Src0R);
4158 lowerAssign(InstAssign::create(Func, Dest, T));
4159 break;
4160 }
4161 case IceType_i64: {
4162 // t0, t1 <- src0
4163 // dest[31..0] = t0
4164 // dest[63..32] = t1
4165 assert(Src0->getType() == IceType_f64);
4166 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4167 T->initHiLo(Func);
4168 configureBitcastTemporary(T);
4169 Variable *Src0R = legalizeToReg(Src0);
4170 _mov(T, Src0R);
4171 Context.insert<InstFakeUse>(T->getHi());
4172 Context.insert<InstFakeUse>(T->getLo());
4173 lowerAssign(InstAssign::create(Func, Dest, T));
4174 break;
4175 }
4176 case IceType_f64: {
4177 // T0 <- lo(src)
4178 // T1 <- hi(src)
4179 // vmov T2, T0, T1
4180 // Dest <- T2
4181 assert(Src0->getType() == IceType_i64);
4182 Variable *T = makeReg(DestTy);
4183 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4184 Src64->initHiLo(Func);
4185 configureBitcastTemporary(Src64);
4186 lowerAssign(InstAssign::create(Func, Src64, Src0));
4187 _mov(T, Src64);
4188 lowerAssign(InstAssign::create(Func, Dest, T));
4189 break;
4190 }
4191 case IceType_v8i1:
4192 assert(Src0->getType() == IceType_i8);
4193 llvm::report_fatal_error(
4194 "v8i1 to i8 conversion should have been prelowered.");
4195 break;
4196 case IceType_v16i1:
4197 assert(Src0->getType() == IceType_i16);
4198 llvm::report_fatal_error(
4199 "v16i1 to i16 conversion should have been prelowered.");
4200 break;
4201 case IceType_v4i1:
4202 case IceType_v8i16:
4203 case IceType_v16i8:
4204 case IceType_v4f32:
4205 case IceType_v4i32: {
4206 assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4207 assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4208 Variable *T = makeReg(DestTy);
4209 _mov(T, Src0);
4210 _mov(Dest, T);
4211 break;
4212 }
4213 }
4214 break;
4215 }
4216 }
4217 }
4218
lowerExtractElement(const InstExtractElement * Instr)4219 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4220 Variable *Dest = Instr->getDest();
4221 Type DestTy = Dest->getType();
4222
4223 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4224 Operand *Src1 = Instr->getSrc(1);
4225
4226 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4227 const uint32_t Index = Imm->getValue();
4228 Variable *T = makeReg(DestTy);
4229 Variable *TSrc0 = makeReg(Src0->getType());
4230
4231 if (isFloatingType(DestTy)) {
4232 // We need to make sure the source is in a suitable register.
4233 TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4234 }
4235
4236 _mov(TSrc0, Src0);
4237 _extractelement(T, TSrc0, Index);
4238 _mov(Dest, T);
4239 return;
4240 }
4241 assert(false && "extractelement requires a constant index");
4242 }
4243
4244 namespace {
4245 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4246 // (and naming).
4247 enum {
4248 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4249 FCMPARM32_TABLE
4250 #undef X
4251 _fcmp_ll_NUM
4252 };
4253
4254 enum {
4255 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4256 ICEINSTFCMP_TABLE
4257 #undef X
4258 _fcmp_hl_NUM
4259 };
4260
4261 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4262 "Inconsistency between high-level and low-level fcmp tags.");
4263 #define X(tag, str) \
4264 static_assert( \
4265 (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag, \
4266 "Inconsistency between high-level and low-level fcmp tag " #tag);
4267 ICEINSTFCMP_TABLE
4268 #undef X
4269
4270 struct {
4271 CondARM32::Cond CC0;
4272 CondARM32::Cond CC1;
4273 } TableFcmp[] = {
4274 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4275 { CondARM32::CC0, CondARM32::CC1 } \
4276 ,
4277 FCMPARM32_TABLE
4278 #undef X
4279 };
4280
isFloatingPointZero(const Operand * Src)4281 bool isFloatingPointZero(const Operand *Src) {
4282 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4283 return Utils::isPositiveZero(F32->getValue());
4284 }
4285
4286 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4287 return Utils::isPositiveZero(F64->getValue());
4288 }
4289
4290 return false;
4291 }
4292 } // end of anonymous namespace
4293
lowerFcmpCond(const InstFcmp * Instr)4294 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4295 InstFcmp::FCond Condition = Instr->getCondition();
4296 switch (Condition) {
4297 case InstFcmp::False:
4298 return CondWhenTrue(CondARM32::kNone);
4299 case InstFcmp::True:
4300 return CondWhenTrue(CondARM32::AL);
4301 break;
4302 default: {
4303 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4304 Operand *Src1 = Instr->getSrc(1);
4305 if (isFloatingPointZero(Src1)) {
4306 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4307 } else {
4308 _vcmp(Src0R, legalizeToReg(Src1));
4309 }
4310 _vmrs();
4311 assert(Condition < llvm::array_lengthof(TableFcmp));
4312 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4313 }
4314 }
4315 }
4316
lowerFcmp(const InstFcmp * Instr)4317 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4318 Variable *Dest = Instr->getDest();
4319 const Type DestTy = Dest->getType();
4320
4321 if (isVectorType(DestTy)) {
4322 if (Instr->getCondition() == InstFcmp::False) {
4323 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4324 auto *T = makeReg(SafeTypeForMovingConstant);
4325 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4326 _mov(Dest, T);
4327 return;
4328 }
4329
4330 if (Instr->getCondition() == InstFcmp::True) {
4331 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4332 auto *T = makeReg(SafeTypeForMovingConstant);
4333 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4334 _mov(Dest, T);
4335 return;
4336 }
4337
4338 Variable *T0;
4339 Variable *T1;
4340 bool Negate = false;
4341 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4342 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4343
4344 switch (Instr->getCondition()) {
4345 default:
4346 llvm::report_fatal_error("Unhandled fp comparison.");
4347 #define _Vcnone(Tptr, S0, S1) \
4348 do { \
4349 *(Tptr) = nullptr; \
4350 } while (0)
4351 #define _Vceq(Tptr, S0, S1) \
4352 do { \
4353 *(Tptr) = makeReg(DestTy); \
4354 _vceq(*(Tptr), S0, S1); \
4355 } while (0)
4356 #define _Vcge(Tptr, S0, S1) \
4357 do { \
4358 *(Tptr) = makeReg(DestTy); \
4359 _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4360 } while (0)
4361 #define _Vcgt(Tptr, S0, S1) \
4362 do { \
4363 *(Tptr) = makeReg(DestTy); \
4364 _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4365 } while (0)
4366 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4367 case InstFcmp::val: { \
4368 _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1); \
4369 _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0); \
4370 Negate = NEG_V; \
4371 } break;
4372 FCMPARM32_TABLE
4373 #undef X
4374 #undef _Vcgt
4375 #undef _Vcge
4376 #undef _Vceq
4377 #undef _Vcnone
4378 }
4379 assert(T0 != nullptr);
4380 Variable *T = T0;
4381 if (T1 != nullptr) {
4382 T = makeReg(DestTy);
4383 _vorr(T, T0, T1);
4384 }
4385
4386 if (Negate) {
4387 auto *TNeg = makeReg(DestTy);
4388 _vmvn(TNeg, T);
4389 T = TNeg;
4390 }
4391
4392 _mov(Dest, T);
4393 return;
4394 }
4395
4396 Variable *T = makeReg(IceType_i1);
4397 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4398 Operand *_0 =
4399 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4400
4401 CondWhenTrue Cond = lowerFcmpCond(Instr);
4402
4403 bool RedefineT = false;
4404 if (Cond.WhenTrue0 != CondARM32::AL) {
4405 _mov(T, _0);
4406 RedefineT = true;
4407 }
4408
4409 if (Cond.WhenTrue0 == CondARM32::kNone) {
4410 _mov(Dest, T);
4411 return;
4412 }
4413
4414 if (RedefineT) {
4415 _mov_redefined(T, _1, Cond.WhenTrue0);
4416 } else {
4417 _mov(T, _1, Cond.WhenTrue0);
4418 }
4419
4420 if (Cond.WhenTrue1 != CondARM32::kNone) {
4421 _mov_redefined(T, _1, Cond.WhenTrue1);
4422 }
4423
4424 _mov(Dest, T);
4425 }
4426
4427 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4428 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4429 Operand *Src1) {
4430 assert(Condition < llvm::array_lengthof(TableIcmp64));
4431
4432 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4433 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4434 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4435 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4436
4437 if (SrcsLo.hasConstOperand()) {
4438 const uint32_t ValueLo = SrcsLo.getConstantValue();
4439 const uint32_t ValueHi = SrcsHi.getConstantValue();
4440 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4441 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4442 Value == 0) {
4443 Variable *T = makeReg(IceType_i32);
4444 Variable *Src0LoR = SrcsLo.src0R(this);
4445 Variable *Src0HiR = SrcsHi.src0R(this);
4446 _orrs(T, Src0LoR, Src0HiR);
4447 Context.insert<InstFakeUse>(T);
4448 return CondWhenTrue(TableIcmp64[Condition].C1);
4449 }
4450
4451 Variable *Src0RLo = SrcsLo.src0R(this);
4452 Variable *Src0RHi = SrcsHi.src0R(this);
4453 Operand *Src1RFLo = SrcsLo.src1RF(this);
4454 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4455
4456 const bool UseRsb =
4457 TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4458
4459 if (UseRsb) {
4460 if (TableIcmp64[Condition].IsSigned) {
4461 Variable *T = makeReg(IceType_i32);
4462 _rsbs(T, Src0RLo, Src1RFLo);
4463 Context.insert<InstFakeUse>(T);
4464
4465 T = makeReg(IceType_i32);
4466 _rscs(T, Src0RHi, Src1RFHi);
4467 // We need to add a FakeUse here because liveness gets mad at us (Def
4468 // without Use.) Note that flag-setting instructions are considered to
4469 // have side effects and, therefore, are not DCE'ed.
4470 Context.insert<InstFakeUse>(T);
4471 } else {
4472 Variable *T = makeReg(IceType_i32);
4473 _rsbs(T, Src0RHi, Src1RFHi);
4474 Context.insert<InstFakeUse>(T);
4475
4476 T = makeReg(IceType_i32);
4477 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4478 Context.insert<InstFakeUse>(T);
4479 }
4480 } else {
4481 if (TableIcmp64[Condition].IsSigned) {
4482 _cmp(Src0RLo, Src1RFLo);
4483 Variable *T = makeReg(IceType_i32);
4484 _sbcs(T, Src0RHi, Src1RFHi);
4485 Context.insert<InstFakeUse>(T);
4486 } else {
4487 _cmp(Src0RHi, Src1RFHi);
4488 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4489 }
4490 }
4491
4492 return CondWhenTrue(TableIcmp64[Condition].C1);
4493 }
4494
4495 Variable *Src0RLo, *Src0RHi;
4496 Operand *Src1RFLo, *Src1RFHi;
4497 if (TableIcmp64[Condition].Swapped) {
4498 Src0RLo = legalizeToReg(loOperand(Src1));
4499 Src0RHi = legalizeToReg(hiOperand(Src1));
4500 Src1RFLo = legalizeToReg(loOperand(Src0));
4501 Src1RFHi = legalizeToReg(hiOperand(Src0));
4502 } else {
4503 Src0RLo = legalizeToReg(loOperand(Src0));
4504 Src0RHi = legalizeToReg(hiOperand(Src0));
4505 Src1RFLo = legalizeToReg(loOperand(Src1));
4506 Src1RFHi = legalizeToReg(hiOperand(Src1));
4507 }
4508
4509 // a=icmp cond, b, c ==>
4510 // GCC does:
4511 // cmp b.hi, c.hi or cmp b.lo, c.lo
4512 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
4513 // mov.<C1> t, #1 mov.<C1> t, #1
4514 // mov.<C2> t, #0 mov.<C2> t, #0
4515 // mov a, t mov a, t
4516 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4517 // is used for signed compares. In some cases, b and c need to be swapped as
4518 // well.
4519 //
4520 // LLVM does:
4521 // for EQ and NE:
4522 // eor t1, b.hi, c.hi
4523 // eor t2, b.lo, c.hi
4524 // orrs t, t1, t2
4525 // mov.<C> t, #1
4526 // mov a, t
4527 //
4528 // that's nice in that it's just as short but has fewer dependencies for
4529 // better ILP at the cost of more registers.
4530 //
4531 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4532 // unconditional mov #0, two cmps, two conditional mov #1, and one
4533 // conditional reg mov. That has few dependencies for good ILP, but is a
4534 // longer sequence.
4535 //
4536 // So, we are going with the GCC version since it's usually better (except
4537 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4538 if (TableIcmp64[Condition].IsSigned) {
4539 Variable *ScratchReg = makeReg(IceType_i32);
4540 _cmp(Src0RLo, Src1RFLo);
4541 _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4542 // ScratchReg isn't going to be used, but we need the side-effect of
4543 // setting flags from this operation.
4544 Context.insert<InstFakeUse>(ScratchReg);
4545 } else {
4546 _cmp(Src0RHi, Src1RFHi);
4547 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4548 }
4549 return CondWhenTrue(TableIcmp64[Condition].C1);
4550 }
4551
4552 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4553 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4554 Operand *Src1) {
4555 Int32Operands Srcs(Src0, Src1);
4556 if (!Srcs.hasConstOperand()) {
4557
4558 Variable *Src0R = Srcs.src0R(this);
4559 Operand *Src1RF = Srcs.src1RF(this);
4560 _cmp(Src0R, Src1RF);
4561 return CondWhenTrue(getIcmp32Mapping(Condition));
4562 }
4563
4564 Variable *Src0R = Srcs.src0R(this);
4565 const int32_t Value = Srcs.getConstantValue();
4566 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4567 _tst(Src0R, Src0R);
4568 return CondWhenTrue(getIcmp32Mapping(Condition));
4569 }
4570
4571 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4572 Srcs.negatedImmediateIsFlexEncodable()) {
4573 Operand *Src1F = Srcs.negatedSrc1F(this);
4574 _cmn(Src0R, Src1F);
4575 return CondWhenTrue(getIcmp32Mapping(Condition));
4576 }
4577
4578 Operand *Src1RF = Srcs.src1RF(this);
4579 if (!Srcs.swappedOperands()) {
4580 _cmp(Src0R, Src1RF);
4581 } else {
4582 Variable *T = makeReg(IceType_i32);
4583 _rsbs(T, Src0R, Src1RF);
4584 Context.insert<InstFakeUse>(T);
4585 }
4586 return CondWhenTrue(getIcmp32Mapping(Condition));
4587 }
4588
4589 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4590 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4591 Operand *Src1) {
4592 Int32Operands Srcs(Src0, Src1);
4593 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4594 assert(ShAmt >= 0);
4595
4596 if (!Srcs.hasConstOperand()) {
4597 Variable *Src0R = makeReg(IceType_i32);
4598 Operand *ShAmtImm = shAmtImm(ShAmt);
4599 _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4600
4601 Variable *Src1R = legalizeToReg(Src1);
4602 auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4603 OperandARM32::LSL, ShAmtImm);
4604 _cmp(Src0R, Src1F);
4605 return CondWhenTrue(getIcmp32Mapping(Condition));
4606 }
4607
4608 const int32_t Value = Srcs.getConstantValue();
4609 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4610 Operand *ShAmtImm = shAmtImm(ShAmt);
4611 Variable *T = makeReg(IceType_i32);
4612 _lsls(T, Srcs.src0R(this), ShAmtImm);
4613 Context.insert<InstFakeUse>(T);
4614 return CondWhenTrue(getIcmp32Mapping(Condition));
4615 }
4616
4617 Variable *ConstR = makeReg(IceType_i32);
4618 _mov(ConstR,
4619 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4620 Operand *NonConstF = OperandARM32FlexReg::create(
4621 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4622 Ctx->getConstantInt32(ShAmt));
4623
4624 if (Srcs.swappedOperands()) {
4625 _cmp(ConstR, NonConstF);
4626 } else {
4627 Variable *T = makeReg(IceType_i32);
4628 _rsbs(T, ConstR, NonConstF);
4629 Context.insert<InstFakeUse>(T);
4630 }
4631 return CondWhenTrue(getIcmp32Mapping(Condition));
4632 }
4633
lowerIcmpCond(const InstIcmp * Instr)4634 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4635 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4636 Instr->getSrc(1));
4637 }
4638
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4639 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4640 Operand *Src0,
4641 Operand *Src1) {
4642 Src0 = legalizeUndef(Src0);
4643 Src1 = legalizeUndef(Src1);
4644
4645 // a=icmp cond b, c ==>
4646 // GCC does:
4647 // <u/s>xtb tb, b
4648 // <u/s>xtb tc, c
4649 // cmp tb, tc
4650 // mov.C1 t, #0
4651 // mov.C2 t, #1
4652 // mov a, t
4653 // where the unsigned/sign extension is not needed for 32-bit. They also have
4654 // special cases for EQ and NE. E.g., for NE:
4655 // <extend to tb, tc>
4656 // subs t, tb, tc
4657 // movne t, #1
4658 // mov a, t
4659 //
4660 // LLVM does:
4661 // lsl tb, b, #<N>
4662 // mov t, #0
4663 // cmp tb, c, lsl #<N>
4664 // mov.<C> t, #1
4665 // mov a, t
4666 //
4667 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4668 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4669 // the unsigned case, for some reason it does similar to GCC and does a uxtb
4670 // first. It's not clear to me why that special-casing is needed.
4671 //
4672 // We'll go with the LLVM way for now, since it's shorter and has just as few
4673 // dependencies.
4674 switch (Src0->getType()) {
4675 default:
4676 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4677 case IceType_i1:
4678 case IceType_i8:
4679 case IceType_i16:
4680 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4681 case IceType_i32:
4682 return lowerInt32IcmpCond(Condition, Src0, Src1);
4683 case IceType_i64:
4684 return lowerInt64IcmpCond(Condition, Src0, Src1);
4685 }
4686 }
4687
lowerIcmp(const InstIcmp * Instr)4688 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4689 Variable *Dest = Instr->getDest();
4690 const Type DestTy = Dest->getType();
4691
4692 if (isVectorType(DestTy)) {
4693 auto *T = makeReg(DestTy);
4694 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4695 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4696 const Type SrcTy = Src0->getType();
4697
4698 bool NeedsShl = false;
4699 Type NewTypeAfterShl;
4700 SizeT ShAmt;
4701 switch (SrcTy) {
4702 default:
4703 break;
4704 case IceType_v16i1:
4705 NeedsShl = true;
4706 NewTypeAfterShl = IceType_v16i8;
4707 ShAmt = 7;
4708 break;
4709 case IceType_v8i1:
4710 NeedsShl = true;
4711 NewTypeAfterShl = IceType_v8i16;
4712 ShAmt = 15;
4713 break;
4714 case IceType_v4i1:
4715 NeedsShl = true;
4716 NewTypeAfterShl = IceType_v4i32;
4717 ShAmt = 31;
4718 break;
4719 }
4720
4721 if (NeedsShl) {
4722 auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4723 auto *Src0T = makeReg(NewTypeAfterShl);
4724 auto *Src0Shl = makeReg(NewTypeAfterShl);
4725 _mov(Src0T, Src0);
4726 _vshl(Src0Shl, Src0T, Imm);
4727 Src0 = Src0Shl;
4728
4729 auto *Src1T = makeReg(NewTypeAfterShl);
4730 auto *Src1Shl = makeReg(NewTypeAfterShl);
4731 _mov(Src1T, Src1);
4732 _vshl(Src1Shl, Src1T, Imm);
4733 Src1 = Src1Shl;
4734 }
4735
4736 switch (Instr->getCondition()) {
4737 default:
4738 llvm::report_fatal_error("Unhandled integer comparison.");
4739 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4740 #define _Vcge(T, S0, S1, Signed) \
4741 _vcge(T, S0, S1) \
4742 ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned)
4743 #define _Vcgt(T, S0, S1, Signed) \
4744 _vcgt(T, S0, S1) \
4745 ->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned)
4746 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
4747 case InstIcmp::val: { \
4748 _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed); \
4749 if (NEG_V) { \
4750 auto *TInv = makeReg(DestTy); \
4751 _vmvn(TInv, T); \
4752 T = TInv; \
4753 } \
4754 } break;
4755 ICMPARM32_TABLE
4756 #undef X
4757 #undef _Vcgt
4758 #undef _Vcge
4759 #undef _Vceq
4760 }
4761 _mov(Dest, T);
4762 return;
4763 }
4764
4765 Operand *_0 =
4766 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4767 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4768 Variable *T = makeReg(IceType_i1);
4769
4770 _mov(T, _0);
4771 CondWhenTrue Cond = lowerIcmpCond(Instr);
4772 _mov_redefined(T, _1, Cond.WhenTrue0);
4773 _mov(Dest, T);
4774
4775 assert(Cond.WhenTrue1 == CondARM32::kNone);
4776
4777 return;
4778 }
4779
lowerInsertElement(const InstInsertElement * Instr)4780 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4781 Variable *Dest = Instr->getDest();
4782 Type DestTy = Dest->getType();
4783
4784 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4785 Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4786 Operand *Src2 = Instr->getSrc(2);
4787
4788 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4789 const uint32_t Index = Imm->getValue();
4790 Variable *T = makeReg(DestTy);
4791
4792 if (isFloatingType(DestTy)) {
4793 T->setRegClass(RegARM32::RCARM32_QtoS);
4794 }
4795
4796 _mov(T, Src0);
4797 _insertelement(T, Src1, Index);
4798 _set_dest_redefined();
4799 _mov(Dest, T);
4800 return;
4801 }
4802 assert(false && "insertelement requires a constant index");
4803 }
4804
4805 namespace {
getConstantMemoryOrder(Operand * Opnd)4806 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4807 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4808 return Integer->getValue();
4809 return Intrinsics::MemoryOrderInvalid;
4810 }
4811 } // end of anonymous namespace
4812
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4813 void TargetARM32::lowerLoadLinkedStoreExclusive(
4814 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4815 CondARM32::Cond Cond) {
4816
4817 auto *Retry = Context.insert<InstARM32Label>(this);
4818
4819 { // scoping for loop highlighting.
4820 Variable *Success = makeReg(IceType_i32);
4821 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4822 auto *_0 = Ctx->getConstantZero(IceType_i32);
4823
4824 Context.insert<InstFakeDef>(Tmp);
4825 Context.insert<InstFakeUse>(Tmp);
4826 Variable *AddrR = legalizeToReg(Addr);
4827 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4828 auto *StoreValue = Operation(Tmp);
4829 assert(StoreValue->mustHaveReg());
4830 // strex requires Dest to be a register other than Value or Addr. This
4831 // restriction is cleanly represented by adding an "early" definition of
4832 // Dest (or a latter use of all the sources.)
4833 Context.insert<InstFakeDef>(Success);
4834 if (Cond != CondARM32::AL) {
4835 _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4836 InstARM32::getOppositeCondition(Cond));
4837 }
4838 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4839 ->setDestRedefined();
4840 _cmp(Success, _0);
4841 }
4842
4843 _br(Retry, CondARM32::NE);
4844 }
4845
4846 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4847 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4848 Variable *Src0, Operand *Src1) {
4849 InstArithmetic::OpKind Oper;
4850 switch (Operation) {
4851 default:
4852 llvm::report_fatal_error("Unknown AtomicRMW operation");
4853 case Intrinsics::AtomicExchange:
4854 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4855 case Intrinsics::AtomicAdd:
4856 Oper = InstArithmetic::Add;
4857 break;
4858 case Intrinsics::AtomicAnd:
4859 Oper = InstArithmetic::And;
4860 break;
4861 case Intrinsics::AtomicSub:
4862 Oper = InstArithmetic::Sub;
4863 break;
4864 case Intrinsics::AtomicOr:
4865 Oper = InstArithmetic::Or;
4866 break;
4867 case Intrinsics::AtomicXor:
4868 Oper = InstArithmetic::Xor;
4869 break;
4870 }
4871 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4872 }
4873 } // end of anonymous namespace
4874
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4875 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4876 Operand *Addr, Operand *Val) {
4877 // retry:
4878 // ldrex tmp, [addr]
4879 // mov contents, tmp
4880 // op result, contents, Val
4881 // strex success, result, [addr]
4882 // cmp success, 0
4883 // jne retry
4884 // fake-use(addr, operand) @ prevents undesirable clobbering.
4885 // mov dest, contents
4886 auto DestTy = Dest->getType();
4887
4888 if (DestTy == IceType_i64) {
4889 lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4890 return;
4891 }
4892
4893 Operand *ValRF = nullptr;
4894 if (llvm::isa<ConstantInteger32>(Val)) {
4895 ValRF = Val;
4896 } else {
4897 ValRF = legalizeToReg(Val);
4898 }
4899 auto *ContentsR = makeReg(DestTy);
4900 auto *ResultR = makeReg(DestTy);
4901
4902 _dmb();
4903 lowerLoadLinkedStoreExclusive(
4904 DestTy, Addr,
4905 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4906 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4907 if (Operation == Intrinsics::AtomicExchange) {
4908 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4909 } else {
4910 lowerArithmetic(
4911 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4912 }
4913 return ResultR;
4914 });
4915 _dmb();
4916 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4917 Context.insert<InstFakeUse>(ValR);
4918 }
4919 // Can't dce ContentsR.
4920 Context.insert<InstFakeUse>(ContentsR);
4921 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4922 }
4923
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4924 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4925 Operand *Addr, Operand *Val) {
4926 assert(Dest->getType() == IceType_i64);
4927
4928 auto *ResultR = makeI64RegPair();
4929
4930 Context.insert<InstFakeDef>(ResultR);
4931
4932 Operand *ValRF = nullptr;
4933 if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4934 ValRF = Val;
4935 } else {
4936 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4937 ValR64->initHiLo(Func);
4938 ValR64->setMustNotHaveReg();
4939 ValR64->getLo()->setMustHaveReg();
4940 ValR64->getHi()->setMustHaveReg();
4941 lowerAssign(InstAssign::create(Func, ValR64, Val));
4942 ValRF = ValR64;
4943 }
4944
4945 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4946 ContentsR->initHiLo(Func);
4947 ContentsR->setMustNotHaveReg();
4948 ContentsR->getLo()->setMustHaveReg();
4949 ContentsR->getHi()->setMustHaveReg();
4950
4951 _dmb();
4952 lowerLoadLinkedStoreExclusive(
4953 IceType_i64, Addr,
4954 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4955 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4956 Context.insert<InstFakeUse>(Tmp);
4957 if (Operation == Intrinsics::AtomicExchange) {
4958 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4959 } else {
4960 lowerArithmetic(
4961 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4962 }
4963 Context.insert<InstFakeUse>(ResultR->getHi());
4964 Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4965 ->setDestRedefined();
4966 return ResultR;
4967 });
4968 _dmb();
4969 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4970 Context.insert<InstFakeUse>(ValR64->getLo());
4971 Context.insert<InstFakeUse>(ValR64->getHi());
4972 }
4973 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4974 }
4975
postambleCtpop64(const InstCall * Instr)4976 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
4977 Operand *Arg0 = Instr->getArg(0);
4978 if (isInt32Asserting32Or64(Arg0->getType())) {
4979 return;
4980 }
4981 // The popcount helpers always return 32-bit values, while the intrinsic's
4982 // signature matches some 64-bit platform's native instructions and expect to
4983 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
4984 // user doesn't do that in the IR or doesn't toss the bits via truncate.
4985 auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
4986 Variable *T = makeReg(IceType_i32);
4987 Operand *_0 =
4988 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4989 _mov(T, _0);
4990 _mov(DestHi, T);
4991 }
4992
lowerIntrinsicCall(const InstIntrinsicCall * Instr)4993 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
4994 Variable *Dest = Instr->getDest();
4995 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
4996 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
4997 switch (ID) {
4998 case Intrinsics::AtomicFence:
4999 case Intrinsics::AtomicFenceAll:
5000 assert(Dest == nullptr);
5001 _dmb();
5002 return;
5003 case Intrinsics::AtomicIsLockFree: {
5004 Operand *ByteSize = Instr->getArg(0);
5005 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
5006 if (CI == nullptr) {
5007 // The PNaCl ABI requires the byte size to be a compile-time constant.
5008 Func->setError("AtomicIsLockFree byte size should be compile-time const");
5009 return;
5010 }
5011 static constexpr int32_t NotLockFree = 0;
5012 static constexpr int32_t LockFree = 1;
5013 int32_t Result = NotLockFree;
5014 switch (CI->getValue()) {
5015 case 1:
5016 case 2:
5017 case 4:
5018 case 8:
5019 Result = LockFree;
5020 break;
5021 }
5022 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
5023 return;
5024 }
5025 case Intrinsics::AtomicLoad: {
5026 assert(isScalarIntegerType(DestTy));
5027 // We require the memory address to be naturally aligned. Given that is the
5028 // case, then normal loads are atomic.
5029 if (!Intrinsics::isMemoryOrderValid(
5030 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
5031 Func->setError("Unexpected memory ordering for AtomicLoad");
5032 return;
5033 }
5034 Variable *T;
5035
5036 if (DestTy == IceType_i64) {
5037 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
5038 // integer atomically. Everything else works with a regular ldr.
5039 T = makeI64RegPair();
5040 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
5041 } else {
5042 T = makeReg(DestTy);
5043 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
5044 }
5045 _dmb();
5046 lowerAssign(InstAssign::create(Func, Dest, T));
5047 // Adding a fake-use T to ensure the atomic load is not removed if Dest is
5048 // unused.
5049 Context.insert<InstFakeUse>(T);
5050 return;
5051 }
5052 case Intrinsics::AtomicStore: {
5053 // We require the memory address to be naturally aligned. Given that is the
5054 // case, then normal loads are atomic.
5055 if (!Intrinsics::isMemoryOrderValid(
5056 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
5057 Func->setError("Unexpected memory ordering for AtomicStore");
5058 return;
5059 }
5060
5061 auto *Value = Instr->getArg(0);
5062 if (Value->getType() == IceType_i64) {
5063 auto *ValueR = makeI64RegPair();
5064 Context.insert<InstFakeDef>(ValueR);
5065 lowerAssign(InstAssign::create(Func, ValueR, Value));
5066 _dmb();
5067 lowerLoadLinkedStoreExclusive(
5068 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
5069 // The following fake-use prevents the ldrex instruction from being
5070 // dead code eliminated.
5071 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
5072 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
5073 Context.insert<InstFakeUse>(Tmp);
5074 return ValueR;
5075 });
5076 Context.insert<InstFakeUse>(ValueR);
5077 _dmb();
5078 return;
5079 }
5080
5081 auto *ValueR = legalizeToReg(Instr->getArg(0));
5082 const auto ValueTy = ValueR->getType();
5083 assert(isScalarIntegerType(ValueTy));
5084 auto *Addr = legalizeToReg(Instr->getArg(1));
5085
5086 // non-64-bit stores are atomically as long as the address is aligned. This
5087 // is PNaCl, so addresses are aligned.
5088 _dmb();
5089 _str(ValueR, formMemoryOperand(Addr, ValueTy));
5090 _dmb();
5091 return;
5092 }
5093 case Intrinsics::AtomicCmpxchg: {
5094 // retry:
5095 // ldrex tmp, [addr]
5096 // cmp tmp, expected
5097 // mov expected, tmp
5098 // strexeq success, new, [addr]
5099 // cmpeq success, #0
5100 // bne retry
5101 // mov dest, expected
5102 assert(isScalarIntegerType(DestTy));
5103 // We require the memory address to be naturally aligned. Given that is the
5104 // case, then normal loads are atomic.
5105 if (!Intrinsics::isMemoryOrderValid(
5106 ID, getConstantMemoryOrder(Instr->getArg(3)),
5107 getConstantMemoryOrder(Instr->getArg(4)))) {
5108 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
5109 return;
5110 }
5111
5112 if (DestTy == IceType_i64) {
5113 Variable *LoadedValue = nullptr;
5114
5115 auto *New = makeI64RegPair();
5116 Context.insert<InstFakeDef>(New);
5117 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
5118
5119 auto *Expected = makeI64RegPair();
5120 Context.insert<InstFakeDef>(Expected);
5121 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
5122
5123 _dmb();
5124 lowerLoadLinkedStoreExclusive(
5125 DestTy, Instr->getArg(0),
5126 [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) {
5127 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
5128 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
5129 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
5130 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
5131 _cmp(TmpLoR, ExpectedLoR);
5132 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
5133 LoadedValue = Tmp;
5134 return New;
5135 },
5136 CondARM32::EQ);
5137 _dmb();
5138
5139 Context.insert<InstFakeUse>(LoadedValue);
5140 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5141 // The fake-use Expected prevents the assignments to Expected (above)
5142 // from being removed if Dest is not used.
5143 Context.insert<InstFakeUse>(Expected);
5144 // New needs to be alive here, or its live range will end in the
5145 // strex instruction.
5146 Context.insert<InstFakeUse>(New);
5147 return;
5148 }
5149
5150 auto *New = legalizeToReg(Instr->getArg(2));
5151 auto *Expected = legalizeToReg(Instr->getArg(1));
5152 Variable *LoadedValue = nullptr;
5153
5154 _dmb();
5155 lowerLoadLinkedStoreExclusive(
5156 DestTy, Instr->getArg(0),
5157 [this, Expected, New, Instr, DestTy, &LoadedValue](Variable *Tmp) {
5158 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
5159 LoadedValue = Tmp;
5160 return New;
5161 },
5162 CondARM32::EQ);
5163 _dmb();
5164
5165 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5166 Context.insert<InstFakeUse>(Expected);
5167 Context.insert<InstFakeUse>(New);
5168 return;
5169 }
5170 case Intrinsics::AtomicRMW: {
5171 if (!Intrinsics::isMemoryOrderValid(
5172 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5173 Func->setError("Unexpected memory ordering for AtomicRMW");
5174 return;
5175 }
5176 lowerAtomicRMW(
5177 Dest, static_cast<uint32_t>(
5178 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5179 Instr->getArg(1), Instr->getArg(2));
5180 return;
5181 }
5182 case Intrinsics::Bswap: {
5183 Operand *Val = Instr->getArg(0);
5184 Type Ty = Val->getType();
5185 if (Ty == IceType_i64) {
5186 Val = legalizeUndef(Val);
5187 Variable *Val_Lo = legalizeToReg(loOperand(Val));
5188 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5189 Variable *T_Lo = makeReg(IceType_i32);
5190 Variable *T_Hi = makeReg(IceType_i32);
5191 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5192 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5193 _rev(T_Lo, Val_Lo);
5194 _rev(T_Hi, Val_Hi);
5195 _mov(DestLo, T_Hi);
5196 _mov(DestHi, T_Lo);
5197 } else {
5198 assert(Ty == IceType_i32 || Ty == IceType_i16);
5199 Variable *ValR = legalizeToReg(Val);
5200 Variable *T = makeReg(Ty);
5201 _rev(T, ValR);
5202 if (Val->getType() == IceType_i16) {
5203 Operand *_16 = shAmtImm(16);
5204 _lsr(T, T, _16);
5205 }
5206 _mov(Dest, T);
5207 }
5208 return;
5209 }
5210 case Intrinsics::Ctpop: {
5211 llvm::report_fatal_error("Ctpop should have been prelowered.");
5212 }
5213 case Intrinsics::Ctlz: {
5214 // The "is zero undef" parameter is ignored and we always return a
5215 // well-defined value.
5216 Operand *Val = Instr->getArg(0);
5217 Variable *ValLoR;
5218 Variable *ValHiR = nullptr;
5219 if (Val->getType() == IceType_i64) {
5220 Val = legalizeUndef(Val);
5221 ValLoR = legalizeToReg(loOperand(Val));
5222 ValHiR = legalizeToReg(hiOperand(Val));
5223 } else {
5224 ValLoR = legalizeToReg(Val);
5225 }
5226 lowerCLZ(Dest, ValLoR, ValHiR);
5227 return;
5228 }
5229 case Intrinsics::Cttz: {
5230 // Essentially like Clz, but reverse the bits first.
5231 Operand *Val = Instr->getArg(0);
5232 Variable *ValLoR;
5233 Variable *ValHiR = nullptr;
5234 if (Val->getType() == IceType_i64) {
5235 Val = legalizeUndef(Val);
5236 ValLoR = legalizeToReg(loOperand(Val));
5237 ValHiR = legalizeToReg(hiOperand(Val));
5238 Variable *TLo = makeReg(IceType_i32);
5239 Variable *THi = makeReg(IceType_i32);
5240 _rbit(TLo, ValLoR);
5241 _rbit(THi, ValHiR);
5242 ValLoR = THi;
5243 ValHiR = TLo;
5244 } else {
5245 ValLoR = legalizeToReg(Val);
5246 Variable *T = makeReg(IceType_i32);
5247 _rbit(T, ValLoR);
5248 ValLoR = T;
5249 }
5250 lowerCLZ(Dest, ValLoR, ValHiR);
5251 return;
5252 }
5253 case Intrinsics::Fabs: {
5254 Type DestTy = Dest->getType();
5255 Variable *T = makeReg(DestTy);
5256 _vabs(T, legalizeToReg(Instr->getArg(0)));
5257 _mov(Dest, T);
5258 return;
5259 }
5260 case Intrinsics::Longjmp: {
5261 llvm::report_fatal_error("longjmp should have been prelowered.");
5262 }
5263 case Intrinsics::Memcpy: {
5264 llvm::report_fatal_error("memcpy should have been prelowered.");
5265 }
5266 case Intrinsics::Memmove: {
5267 llvm::report_fatal_error("memmove should have been prelowered.");
5268 }
5269 case Intrinsics::Memset: {
5270 llvm::report_fatal_error("memmove should have been prelowered.");
5271 }
5272 case Intrinsics::NaClReadTP: {
5273 if (SandboxingType != ST_NaCl) {
5274 llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5275 }
5276 Variable *TP = legalizeToReg(OperandARM32Mem::create(
5277 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
5278 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5279 _mov(Dest, TP);
5280 return;
5281 }
5282 case Intrinsics::Setjmp: {
5283 llvm::report_fatal_error("setjmp should have been prelowered.");
5284 }
5285 case Intrinsics::Sqrt: {
5286 assert(isScalarFloatingType(Dest->getType()) ||
5287 getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5288 Variable *Src = legalizeToReg(Instr->getArg(0));
5289 Variable *T = makeReg(Dest->getType());
5290 _vsqrt(T, Src);
5291 _mov(Dest, T);
5292 return;
5293 }
5294 case Intrinsics::Stacksave: {
5295 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5296 _mov(Dest, SP);
5297 return;
5298 }
5299 case Intrinsics::Stackrestore: {
5300 Variable *Val = legalizeToReg(Instr->getArg(0));
5301 Sandboxer(this).reset_sp(Val);
5302 return;
5303 }
5304 case Intrinsics::Trap:
5305 _trap();
5306 return;
5307 case Intrinsics::LoadSubVector: {
5308 UnimplementedLoweringError(this, Instr);
5309 return;
5310 }
5311 case Intrinsics::StoreSubVector: {
5312 UnimplementedLoweringError(this, Instr);
5313 return;
5314 }
5315 default: // UnknownIntrinsic
5316 Func->setError("Unexpected intrinsic");
5317 return;
5318 }
5319 return;
5320 }
5321
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5322 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5323 Type Ty = Dest->getType();
5324 assert(Ty == IceType_i32 || Ty == IceType_i64);
5325 Variable *T = makeReg(IceType_i32);
5326 _clz(T, ValLoR);
5327 if (Ty == IceType_i64) {
5328 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5329 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5330 Operand *Zero =
5331 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5332 Operand *ThirtyTwo =
5333 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5334 _cmp(ValHiR, Zero);
5335 Variable *T2 = makeReg(IceType_i32);
5336 _add(T2, T, ThirtyTwo);
5337 _clz(T2, ValHiR, CondARM32::NE);
5338 // T2 is actually a source as well when the predicate is not AL (since it
5339 // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5340 // of T2 as if it was used as a source.
5341 _set_dest_redefined();
5342 _mov(DestLo, T2);
5343 Variable *T3 = makeReg(Zero->getType());
5344 _mov(T3, Zero);
5345 _mov(DestHi, T3);
5346 return;
5347 }
5348 _mov(Dest, T);
5349 return;
5350 }
5351
lowerLoad(const InstLoad * Load)5352 void TargetARM32::lowerLoad(const InstLoad *Load) {
5353 // A Load instruction can be treated the same as an Assign instruction, after
5354 // the source operand is transformed into an OperandARM32Mem operand.
5355 Type Ty = Load->getDest()->getType();
5356 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
5357 Variable *DestLoad = Load->getDest();
5358
5359 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5360 // be folded into a load.
5361 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5362 lowerAssign(Assign);
5363 }
5364
5365 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5366 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5367 const Variable *OffsetReg, int16_t OffsetRegShAmt,
5368 const Inst *Reason) {
5369 if (!BuildDefs::dump())
5370 return;
5371 if (!Func->isVerbose(IceV_AddrOpt))
5372 return;
5373 OstreamLocker _(Func->getContext());
5374 Ostream &Str = Func->getContext()->getStrDump();
5375 Str << "Instruction: ";
5376 Reason->dumpDecorated(Func);
5377 Str << " results in Base=";
5378 if (Base)
5379 Base->dump(Func);
5380 else
5381 Str << "<null>";
5382 Str << ", OffsetReg=";
5383 if (OffsetReg)
5384 OffsetReg->dump(Func);
5385 else
5386 Str << "<null>";
5387 Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5388 }
5389
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5390 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5391 int32_t *Offset, const Inst **Reason) {
5392 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5393 if (*Var == nullptr)
5394 return false;
5395 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5396 if (!VarAssign)
5397 return false;
5398 assert(!VMetadata->isMultiDef(*Var));
5399 if (!llvm::isa<InstAssign>(VarAssign))
5400 return false;
5401
5402 Operand *SrcOp = VarAssign->getSrc(0);
5403 bool Optimized = false;
5404 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5405 if (!VMetadata->isMultiDef(SrcVar) ||
5406 // TODO: ensure SrcVar stays single-BB
5407 false) {
5408 Optimized = true;
5409 *Var = SrcVar;
5410 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5411 int32_t MoreOffset = Const->getValue();
5412 int32_t NewOffset = MoreOffset + *Offset;
5413 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5414 return false;
5415 *Var = nullptr;
5416 *Offset += NewOffset;
5417 Optimized = true;
5418 }
5419 }
5420
5421 if (Optimized) {
5422 *Reason = VarAssign;
5423 }
5424
5425 return Optimized;
5426 }
5427
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5428 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5429 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5430 switch (Arith->getOp()) {
5431 default:
5432 return false;
5433 case InstArithmetic::Add:
5434 case InstArithmetic::Sub:
5435 *Kind = Arith->getOp();
5436 return true;
5437 }
5438 }
5439 return false;
5440 }
5441
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5442 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5443 Variable **OffsetReg, int32_t OffsetRegShamt,
5444 const Inst **Reason) {
5445 // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5446 // set Base=Var1, OffsetReg=Var2, Shift=0
5447 if (*Base == nullptr)
5448 return false;
5449 if (*OffsetReg != nullptr)
5450 return false;
5451 (void)OffsetRegShamt;
5452 assert(OffsetRegShamt == 0);
5453 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5454 if (BaseInst == nullptr)
5455 return false;
5456 assert(!VMetadata->isMultiDef(*Base));
5457 if (BaseInst->getSrcSize() < 2)
5458 return false;
5459 auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5460 if (!Var1)
5461 return false;
5462 if (VMetadata->isMultiDef(Var1))
5463 return false;
5464 auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5465 if (!Var2)
5466 return false;
5467 if (VMetadata->isMultiDef(Var2))
5468 return false;
5469 InstArithmetic::OpKind _;
5470 if (!isAddOrSub(BaseInst, &_) ||
5471 // TODO: ensure Var1 and Var2 stay single-BB
5472 false)
5473 return false;
5474 *Base = Var1;
5475 *OffsetReg = Var2;
5476 // OffsetRegShamt is already 0.
5477 *Reason = BaseInst;
5478 return true;
5479 }
5480
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5481 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5482 Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5483 int32_t *OffsetRegShamt, const Inst **Reason) {
5484 // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5485 // OffsetReg=Var, Shift+=log2(Const)
5486 // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5487 // OffsetReg=Var, Shift+=Const
5488 // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5489 // OffsetReg=Var, Shift-=Const
5490 OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5491 if (*OffsetReg == nullptr)
5492 return false;
5493 auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5494 if (IndexInst == nullptr)
5495 return false;
5496 assert(!VMetadata->isMultiDef(*OffsetReg));
5497 if (IndexInst->getSrcSize() < 2)
5498 return false;
5499 auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5500 if (ArithInst == nullptr)
5501 return false;
5502 auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5503 if (Var == nullptr)
5504 return false;
5505 auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5506 if (Const == nullptr) {
5507 assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5508 return false;
5509 }
5510 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5511 return false;
5512
5513 uint32_t NewShamt = -1;
5514 switch (ArithInst->getOp()) {
5515 default:
5516 return false;
5517 case InstArithmetic::Shl: {
5518 NewShiftKind = OperandARM32::LSL;
5519 NewShamt = Const->getValue();
5520 if (NewShamt > 31)
5521 return false;
5522 } break;
5523 case InstArithmetic::Lshr: {
5524 NewShiftKind = OperandARM32::LSR;
5525 NewShamt = Const->getValue();
5526 if (NewShamt > 31)
5527 return false;
5528 } break;
5529 case InstArithmetic::Ashr: {
5530 NewShiftKind = OperandARM32::ASR;
5531 NewShamt = Const->getValue();
5532 if (NewShamt > 31)
5533 return false;
5534 } break;
5535 case InstArithmetic::Udiv:
5536 case InstArithmetic::Mul: {
5537 const uint32_t UnsignedConst = Const->getValue();
5538 NewShamt = llvm::findFirstSet(UnsignedConst);
5539 if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5540 // First bit set is not the same as the last bit set, so Const is not
5541 // a power of 2.
5542 return false;
5543 }
5544 NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5545 ? OperandARM32::LSR
5546 : OperandARM32::LSL;
5547 } break;
5548 }
5549 // Allowed "transitions":
5550 // kNoShift -> * iff NewShamt < 31
5551 // LSL -> LSL iff NewShamt + OffsetRegShamt < 31
5552 // LSR -> LSR iff NewShamt + OffsetRegShamt < 31
5553 // ASR -> ASR iff NewShamt + OffsetRegShamt < 31
5554 if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5555 return false;
5556 }
5557 const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5558 if (NewOffsetRegShamt > 31)
5559 return false;
5560 *OffsetReg = Var;
5561 *OffsetRegShamt = NewOffsetRegShamt;
5562 *Kind = NewShiftKind;
5563 *Reason = IndexInst;
5564 return true;
5565 }
5566
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5567 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5568 int32_t *Offset, const Inst **Reason) {
5569 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5570 // set Base=Var, Offset+=Const
5571 // Base is Base=Var-Const ==>
5572 // set Base=Var, Offset-=Const
5573 if (*Base == nullptr)
5574 return false;
5575 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5576 if (BaseInst == nullptr) {
5577 return false;
5578 }
5579 assert(!VMetadata->isMultiDef(*Base));
5580
5581 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5582 if (ArithInst == nullptr)
5583 return false;
5584 InstArithmetic::OpKind Kind;
5585 if (!isAddOrSub(ArithInst, &Kind))
5586 return false;
5587 bool IsAdd = Kind == InstArithmetic::Add;
5588 Operand *Src0 = ArithInst->getSrc(0);
5589 Operand *Src1 = ArithInst->getSrc(1);
5590 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5591 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5592 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5593 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5594 Variable *NewBase = nullptr;
5595 int32_t NewOffset = *Offset;
5596
5597 if (Var0 == nullptr && Const0 == nullptr) {
5598 assert(llvm::isa<ConstantRelocatable>(Src0));
5599 return false;
5600 }
5601
5602 if (Var1 == nullptr && Const1 == nullptr) {
5603 assert(llvm::isa<ConstantRelocatable>(Src1));
5604 return false;
5605 }
5606
5607 if (Var0 && Var1)
5608 // TODO(jpp): merge base/index splitting into here.
5609 return false;
5610 if (!IsAdd && Var1)
5611 return false;
5612 if (Var0)
5613 NewBase = Var0;
5614 else if (Var1)
5615 NewBase = Var1;
5616 // Compute the updated constant offset.
5617 if (Const0) {
5618 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5619 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5620 return false;
5621 NewOffset += MoreOffset;
5622 }
5623 if (Const1) {
5624 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5625 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5626 return false;
5627 NewOffset += MoreOffset;
5628 }
5629
5630 // Update the computed address parameters once we are sure optimization
5631 // is valid.
5632 *Base = NewBase;
5633 *Offset = NewOffset;
5634 *Reason = BaseInst;
5635 return true;
5636 }
5637 } // end of anonymous namespace
5638
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5639 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5640 const Inst *LdSt,
5641 Operand *Base) {
5642 assert(Base != nullptr);
5643 int32_t OffsetImm = 0;
5644 Variable *OffsetReg = nullptr;
5645 int32_t OffsetRegShamt = 0;
5646 OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5647
5648 Func->resetCurrentNode();
5649 if (Func->isVerbose(IceV_AddrOpt)) {
5650 OstreamLocker _(Func->getContext());
5651 Ostream &Str = Func->getContext()->getStrDump();
5652 Str << "\nAddress mode formation:\t";
5653 LdSt->dumpDecorated(Func);
5654 }
5655
5656 if (isVectorType(Ty))
5657 // vector loads and stores do not allow offsets, and only support the
5658 // "[reg]" addressing mode (the other supported modes are write back.)
5659 return nullptr;
5660
5661 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5662 if (BaseVar == nullptr)
5663 return nullptr;
5664
5665 (void)MemTraitsSize;
5666 assert(Ty < MemTraitsSize);
5667 auto *TypeTraits = &MemTraits[Ty];
5668 const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex;
5669 const bool CanHaveShiftedIndex =
5670 !NeedSandboxing && TypeTraits->CanHaveShiftedIndex;
5671 const bool CanHaveImm = TypeTraits->CanHaveImm;
5672 const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5673 (void)ValidImmMask;
5674 assert(!CanHaveImm || ValidImmMask >= 0);
5675
5676 const VariablesMetadata *VMetadata = Func->getVMetadata();
5677 const Inst *Reason = nullptr;
5678
5679 do {
5680 if (Reason != nullptr) {
5681 dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5682 Reason);
5683 Reason = nullptr;
5684 }
5685
5686 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5687 continue;
5688 }
5689
5690 if (CanHaveIndex &&
5691 matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5692 continue;
5693 }
5694
5695 if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5696 OffsetRegShamt, &Reason)) {
5697 continue;
5698 }
5699
5700 if (CanHaveShiftedIndex) {
5701 if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5702 &OffsetRegShamt, &Reason)) {
5703 continue;
5704 }
5705
5706 if ((OffsetRegShamt == 0) &&
5707 matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5708 &OffsetRegShamt, &Reason)) {
5709 std::swap(BaseVar, OffsetReg);
5710 continue;
5711 }
5712 }
5713
5714 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5715 continue;
5716 }
5717 } while (Reason);
5718
5719 if (BaseVar == nullptr) {
5720 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5721 // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5722 // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5723 //
5724 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5725 // mov BaseReg, #OffsetImm
5726 // use of [BaseReg, OffsetReg{, LSL Shamt}]
5727 //
5728 const Type PointerType = getPointerType();
5729 BaseVar = makeReg(PointerType);
5730 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5731 OffsetImm = 0;
5732 } else if (OffsetImm != 0) {
5733 // ARM Ldr/Str instructions have limited range immediates. The formation
5734 // loop above materialized an Immediate carelessly, so we ensure the
5735 // generated offset is sane.
5736 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5737 const InstArithmetic::OpKind Op =
5738 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5739
5740 if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5741 OffsetReg != nullptr) {
5742 if (OffsetReg == nullptr) {
5743 // We formed a [Base, #const] addressing mode which is not encodable in
5744 // ARM. There is little point in forming an address mode now if we don't
5745 // have an offset. Effectively, we would end up with something like
5746 //
5747 // [Base, #const] -> add T, Base, #const
5748 // use of [T]
5749 //
5750 // Which is exactly what we already have. So we just bite the bullet
5751 // here and don't form any address mode.
5752 return nullptr;
5753 }
5754 // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5755 //
5756 // [Base, Offset, {LSL amount}, #const] ->
5757 // add T, Base, #const
5758 // use of [T, Offset {, LSL amount}]
5759 const Type PointerType = getPointerType();
5760 Variable *T = makeReg(PointerType);
5761 Context.insert<InstArithmetic>(Op, T, BaseVar,
5762 Ctx->getConstantInt32(PositiveOffset));
5763 BaseVar = T;
5764 OffsetImm = 0;
5765 }
5766 }
5767
5768 assert(BaseVar != nullptr);
5769 assert(OffsetImm == 0 || OffsetReg == nullptr);
5770 assert(OffsetReg == nullptr || CanHaveIndex);
5771 assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5772 : (ValidImmMask & OffsetImm) == OffsetImm);
5773
5774 if (OffsetReg != nullptr) {
5775 Variable *OffsetR = makeReg(getPointerType());
5776 Context.insert<InstAssign>(OffsetR, OffsetReg);
5777 return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5778 OffsetRegShamt);
5779 }
5780
5781 return OperandARM32Mem::create(
5782 Func, Ty, BaseVar,
5783 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5784 }
5785
doAddressOptLoad()5786 void TargetARM32::doAddressOptLoad() {
5787 Inst *Instr = iteratorToInst(Context.getCur());
5788 assert(llvm::isa<InstLoad>(Instr));
5789 Variable *Dest = Instr->getDest();
5790 Operand *Addr = Instr->getSrc(0);
5791 if (OperandARM32Mem *Mem =
5792 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5793 Instr->setDeleted();
5794 Context.insert<InstLoad>(Dest, Mem);
5795 }
5796 }
5797
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5798 void TargetARM32::randomlyInsertNop(float Probability,
5799 RandomNumberGenerator &RNG) {
5800 RandomNumberGeneratorWrapper RNGW(RNG);
5801 if (RNGW.getTrueWithProbability(Probability)) {
5802 _nop();
5803 }
5804 }
5805
lowerPhi(const InstPhi *)5806 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5807 Func->setError("Phi found in regular instruction list");
5808 }
5809
lowerRet(const InstRet * Instr)5810 void TargetARM32::lowerRet(const InstRet *Instr) {
5811 Variable *Reg = nullptr;
5812 if (Instr->hasRetValue()) {
5813 Operand *Src0 = Instr->getRetValue();
5814 Type Ty = Src0->getType();
5815 if (Ty == IceType_i64) {
5816 Src0 = legalizeUndef(Src0);
5817 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5818 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5819 Reg = R0;
5820 Context.insert<InstFakeUse>(R1);
5821 } else if (Ty == IceType_f32) {
5822 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5823 Reg = S0;
5824 } else if (Ty == IceType_f64) {
5825 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5826 Reg = D0;
5827 } else if (isVectorType(Src0->getType())) {
5828 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5829 Reg = Q0;
5830 } else {
5831 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5832 Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5833 _mov(Reg, Src0F, CondARM32::AL);
5834 }
5835 }
5836 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5837 // explicitly looks for a ret instruction as a marker for where to insert the
5838 // frame removal instructions. addEpilog is responsible for restoring the
5839 // "lr" register as needed prior to this ret instruction.
5840 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5841
5842 // Add a fake use of sp to make sure sp stays alive for the entire function.
5843 // Otherwise post-call sp adjustments get dead-code eliminated.
5844 // TODO: Are there more places where the fake use should be inserted? E.g.
5845 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5846 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5847 Context.insert<InstFakeUse>(SP);
5848 }
5849
lowerShuffleVector(const InstShuffleVector * Instr)5850 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5851 auto *Dest = Instr->getDest();
5852 const Type DestTy = Dest->getType();
5853
5854 auto *T = makeReg(DestTy);
5855
5856 switch (DestTy) {
5857 default:
5858 break;
5859 // TODO(jpp): figure out how to properly lower this without scalarization.
5860 }
5861
5862 // Unoptimized shuffle. Perform a series of inserts and extracts.
5863 Context.insert<InstFakeDef>(T);
5864 auto *Src0 = Instr->getSrc(0);
5865 auto *Src1 = Instr->getSrc(1);
5866 const SizeT NumElements = typeNumElements(DestTy);
5867 const Type ElementType = typeElementType(DestTy);
5868 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
5869 auto *Index = Instr->getIndex(I);
5870 const SizeT Elem = Index->getValue();
5871 auto *ExtElmt = makeReg(ElementType);
5872 if (Elem < NumElements) {
5873 lowerExtractElement(
5874 InstExtractElement::create(Func, ExtElmt, Src0, Index));
5875 } else {
5876 lowerExtractElement(InstExtractElement::create(
5877 Func, ExtElmt, Src1,
5878 Ctx->getConstantInt32(Index->getValue() - NumElements)));
5879 }
5880 auto *NewT = makeReg(DestTy);
5881 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
5882 Ctx->getConstantInt32(I)));
5883 T = NewT;
5884 }
5885 _mov(Dest, T);
5886 }
5887
lowerSelect(const InstSelect * Instr)5888 void TargetARM32::lowerSelect(const InstSelect *Instr) {
5889 Variable *Dest = Instr->getDest();
5890 Type DestTy = Dest->getType();
5891 Operand *SrcT = Instr->getTrueOperand();
5892 Operand *SrcF = Instr->getFalseOperand();
5893 Operand *Condition = Instr->getCondition();
5894
5895 if (!isVectorType(DestTy)) {
5896 lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
5897 legalizeUndef(SrcF));
5898 return;
5899 }
5900
5901 Type TType = DestTy;
5902 switch (DestTy) {
5903 default:
5904 llvm::report_fatal_error("Unexpected type for vector select.");
5905 case IceType_v4i1:
5906 TType = IceType_v4i32;
5907 break;
5908 case IceType_v8i1:
5909 TType = IceType_v8i16;
5910 break;
5911 case IceType_v16i1:
5912 TType = IceType_v16i8;
5913 break;
5914 case IceType_v4f32:
5915 TType = IceType_v4i32;
5916 break;
5917 case IceType_v4i32:
5918 case IceType_v8i16:
5919 case IceType_v16i8:
5920 break;
5921 }
5922 auto *T = makeReg(TType);
5923 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
5924 auto *SrcTR = legalizeToReg(SrcT);
5925 auto *SrcFR = legalizeToReg(SrcF);
5926 _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
5927 _mov(Dest, T);
5928 }
5929
lowerStore(const InstStore * Instr)5930 void TargetARM32::lowerStore(const InstStore *Instr) {
5931 Operand *Value = Instr->getData();
5932 Operand *Addr = Instr->getAddr();
5933 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5934 Type Ty = NewAddr->getType();
5935
5936 if (Ty == IceType_i64) {
5937 Value = legalizeUndef(Value);
5938 Variable *ValueHi = legalizeToReg(hiOperand(Value));
5939 Variable *ValueLo = legalizeToReg(loOperand(Value));
5940 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
5941 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
5942 } else {
5943 Variable *ValueR = legalizeToReg(Value);
5944 _str(ValueR, NewAddr);
5945 }
5946 }
5947
doAddressOptStore()5948 void TargetARM32::doAddressOptStore() {
5949 Inst *Instr = iteratorToInst(Context.getCur());
5950 assert(llvm::isa<InstStore>(Instr));
5951 Operand *Src = Instr->getSrc(0);
5952 Operand *Addr = Instr->getSrc(1);
5953 if (OperandARM32Mem *Mem =
5954 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5955 Instr->setDeleted();
5956 Context.insert<InstStore>(Src, Mem);
5957 }
5958 }
5959
lowerSwitch(const InstSwitch * Instr)5960 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
5961 // This implements the most naive possible lowering.
5962 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
5963 Operand *Src0 = Instr->getComparison();
5964 SizeT NumCases = Instr->getNumCases();
5965 if (Src0->getType() == IceType_i64) {
5966 Src0 = legalizeUndef(Src0);
5967 Variable *Src0Lo = legalizeToReg(loOperand(Src0));
5968 Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
5969 for (SizeT I = 0; I < NumCases; ++I) {
5970 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5971 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5972 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
5973 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
5974 _cmp(Src0Lo, ValueLo);
5975 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
5976 _br(Instr->getLabel(I), CondARM32::EQ);
5977 }
5978 _br(Instr->getLabelDefault());
5979 return;
5980 }
5981
5982 Variable *Src0Var = legalizeToReg(Src0);
5983 // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
5984 // reason.
5985 assert(Src0Var->mustHaveReg());
5986 const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
5987 assert(ShiftAmt < 32);
5988 if (ShiftAmt > 0) {
5989 Operand *ShAmtImm = shAmtImm(ShiftAmt);
5990 Variable *T = makeReg(IceType_i32);
5991 _lsl(T, Src0Var, ShAmtImm);
5992 Src0Var = T;
5993 }
5994
5995 for (SizeT I = 0; I < NumCases; ++I) {
5996 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
5997 Value = legalize(Value, Legal_Reg | Legal_Flex);
5998 _cmp(Src0Var, Value);
5999 _br(Instr->getLabel(I), CondARM32::EQ);
6000 }
6001 _br(Instr->getLabelDefault());
6002 }
6003
lowerBreakpoint(const InstBreakpoint * Instr)6004 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6005 UnimplementedLoweringError(this, Instr);
6006 }
6007
lowerUnreachable(const InstUnreachable *)6008 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6009 _trap();
6010 }
6011
6012 namespace {
6013 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
6014 // and fp constants will need access to the GOT address.
operandNeedsGot(const Operand * Opnd)6015 bool operandNeedsGot(const Operand *Opnd) {
6016 if (llvm::isa<ConstantRelocatable>(Opnd)) {
6017 return true;
6018 }
6019
6020 if (llvm::isa<ConstantFloat>(Opnd)) {
6021 uint32_t _;
6022 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
6023 }
6024
6025 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
6026 if (F64 != nullptr) {
6027 uint32_t _;
6028 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
6029 !isFloatingPointZero(F64);
6030 }
6031
6032 return false;
6033 }
6034
6035 // Returns whether Phi needs the GOT address (which it does if any of its
6036 // operands needs the GOT address.)
phiNeedsGot(const InstPhi * Phi)6037 bool phiNeedsGot(const InstPhi *Phi) {
6038 if (Phi->isDeleted()) {
6039 return false;
6040 }
6041
6042 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6043 if (operandNeedsGot(Phi->getSrc(I))) {
6044 return true;
6045 }
6046 }
6047
6048 return false;
6049 }
6050
6051 // Returns whether **any** phi in Node needs the GOT address.
anyPhiInNodeNeedsGot(CfgNode * Node)6052 bool anyPhiInNodeNeedsGot(CfgNode *Node) {
6053 for (auto &Inst : Node->getPhis()) {
6054 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
6055 return true;
6056 }
6057 }
6058 return false;
6059 }
6060
6061 } // end of anonymous namespace
6062
prelowerPhis()6063 void TargetARM32::prelowerPhis() {
6064 CfgNode *Node = Context.getNode();
6065
6066 if (SandboxingType == ST_Nonsfi) {
6067 assert(GotPtr != nullptr);
6068 if (anyPhiInNodeNeedsGot(Node)) {
6069 // If any phi instruction needs the GOT address, we place a
6070 // fake-use GotPtr
6071 // in Node to prevent the GotPtr's initialization from being dead code
6072 // eliminated.
6073 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
6074 }
6075 }
6076
6077 PhiLowering::prelowerPhis32Bit(this, Node, Func);
6078 }
6079
makeVectorOfZeros(Type Ty,RegNumT RegNum)6080 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6081 Variable *Reg = makeReg(Ty, RegNum);
6082 Context.insert<InstFakeDef>(Reg);
6083 assert(isVectorType(Ty));
6084 _veor(Reg, Reg, Reg);
6085 return Reg;
6086 }
6087
6088 // Helper for legalize() to emit the right code to lower an operand to a
6089 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6090 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6091 Type Ty = Src->getType();
6092 Variable *Reg = makeReg(Ty, RegNum);
6093 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6094 _ldr(Reg, Mem);
6095 } else {
6096 _mov(Reg, Src);
6097 }
6098 return Reg;
6099 }
6100
6101 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6102 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6103 RegNumT RegNum) {
6104 Type Ty = From->getType();
6105 // Assert that a physical register is allowed. To date, all calls to
6106 // legalize() allow a physical register. Legal_Flex converts registers to the
6107 // right type OperandARM32FlexReg as needed.
6108 assert(Allowed & Legal_Reg);
6109
6110 // Copied ipsis literis from TargetX86Base<Machine>.
6111 if (RegNum.hasNoValue()) {
6112 if (Variable *Subst = getContext().availabilityGet(From)) {
6113 // At this point we know there is a potential substitution available.
6114 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6115 !Subst->hasReg()) {
6116 // At this point we know the substitution will have a register.
6117 if (From->getType() == Subst->getType()) {
6118 // At this point we know the substitution's register is compatible.
6119 return Subst;
6120 }
6121 }
6122 }
6123 }
6124
6125 // Go through the various types of operands: OperandARM32Mem,
6126 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6127 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6128 // can always copy to a register.
6129 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6130 // Before doing anything with a Mem operand, we need to ensure that the
6131 // Base and Index components are in physical registers.
6132 Variable *Base = Mem->getBase();
6133 Variable *Index = Mem->getIndex();
6134 ConstantInteger32 *Offset = Mem->getOffset();
6135 assert(Index == nullptr || Offset == nullptr);
6136 Variable *RegBase = nullptr;
6137 Variable *RegIndex = nullptr;
6138 assert(Base);
6139 RegBase = llvm::cast<Variable>(
6140 legalize(Base, Legal_Reg | Legal_Rematerializable));
6141 assert(Ty < MemTraitsSize);
6142 if (Index) {
6143 assert(Offset == nullptr);
6144 assert(MemTraits[Ty].CanHaveIndex);
6145 RegIndex = legalizeToReg(Index);
6146 }
6147 if (Offset && Offset->getValue() != 0) {
6148 assert(Index == nullptr);
6149 static constexpr bool ZeroExt = false;
6150 assert(MemTraits[Ty].CanHaveImm);
6151 if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6152 llvm::report_fatal_error("Invalid memory offset.");
6153 }
6154 }
6155
6156 // Create a new operand if there was a change.
6157 if (Base != RegBase || Index != RegIndex) {
6158 // There is only a reg +/- reg or reg + imm form.
6159 // Figure out which to re-create.
6160 if (RegIndex) {
6161 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6162 Mem->getShiftOp(), Mem->getShiftAmt(),
6163 Mem->getAddrMode());
6164 } else {
6165 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6166 Mem->getAddrMode());
6167 }
6168 }
6169 if (Allowed & Legal_Mem) {
6170 From = Mem;
6171 } else {
6172 Variable *Reg = makeReg(Ty, RegNum);
6173 _ldr(Reg, Mem);
6174 From = Reg;
6175 }
6176 return From;
6177 }
6178
6179 if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6180 if (!(Allowed & Legal_Flex)) {
6181 if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6182 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6183 From = FlexReg->getReg();
6184 // Fall through and let From be checked as a Variable below, where it
6185 // may or may not need a register.
6186 } else {
6187 return copyToReg(Flex, RegNum);
6188 }
6189 } else {
6190 return copyToReg(Flex, RegNum);
6191 }
6192 } else {
6193 return From;
6194 }
6195 }
6196
6197 if (llvm::isa<Constant>(From)) {
6198 if (llvm::isa<ConstantUndef>(From)) {
6199 From = legalizeUndef(From, RegNum);
6200 if (isVectorType(Ty))
6201 return From;
6202 }
6203 // There should be no constants of vector type (other than undef).
6204 assert(!isVectorType(Ty));
6205 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6206 uint32_t RotateAmt;
6207 uint32_t Immed_8;
6208 uint32_t Value = static_cast<uint32_t>(C32->getValue());
6209 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6210 // The immediate can be encoded as a Flex immediate. We may return the
6211 // Flex operand if the caller has Allow'ed it.
6212 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6213 const bool CanBeFlex = Allowed & Legal_Flex;
6214 if (CanBeFlex)
6215 return OpF;
6216 return copyToReg(OpF, RegNum);
6217 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6218 &Immed_8)) {
6219 // Even though the immediate can't be encoded as a Flex operand, its
6220 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6221 // constant with a single instruction.
6222 auto *InvOpF =
6223 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6224 Variable *Reg = makeReg(Ty, RegNum);
6225 _mvn(Reg, InvOpF);
6226 return Reg;
6227 } else {
6228 // Do a movw/movt to a register.
6229 Variable *Reg = makeReg(Ty, RegNum);
6230 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6231 _movw(Reg,
6232 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6233 if (UpperBits != 0) {
6234 _movt(Reg, Ctx->getConstantInt32(UpperBits));
6235 }
6236 return Reg;
6237 }
6238 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6239 Variable *Reg = makeReg(Ty, RegNum);
6240 if (SandboxingType != ST_Nonsfi) {
6241 _movw(Reg, C);
6242 _movt(Reg, C);
6243 } else {
6244 auto *GotAddr = legalizeToReg(GotPtr);
6245 GlobalString CGotoffName = createGotoffRelocation(C);
6246 loadNamedConstantRelocatablePIC(
6247 CGotoffName, Reg, [this, Reg](Variable *PC) {
6248 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
6249 });
6250 _add(Reg, GotAddr, Reg);
6251 }
6252 return Reg;
6253 } else {
6254 assert(isScalarFloatingType(Ty));
6255 uint32_t ModifiedImm;
6256 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6257 Variable *T = makeReg(Ty, RegNum);
6258 _mov(T,
6259 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6260 return T;
6261 }
6262
6263 if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6264 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6265 // because ARM does not have a veor instruction with S registers.
6266 Variable *T = makeReg(IceType_f64, RegNum);
6267 Context.insert<InstFakeDef>(T);
6268 _veor(T, T, T);
6269 return T;
6270 }
6271
6272 // Load floats/doubles from literal pool.
6273 auto *CFrom = llvm::cast<Constant>(From);
6274 assert(CFrom->getShouldBePooled());
6275 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6276 Variable *BaseReg = nullptr;
6277 if (SandboxingType == ST_Nonsfi) {
6278 // vldr does not support the [base, index] addressing mode, so we need
6279 // to legalize Offset to a register. Otherwise, we could simply
6280 // vldr dest, [got, reg(Offset)]
6281 BaseReg = legalizeToReg(Offset);
6282 } else {
6283 BaseReg = makeReg(getPointerType());
6284 _movw(BaseReg, Offset);
6285 _movt(BaseReg, Offset);
6286 }
6287 From = formMemoryOperand(BaseReg, Ty);
6288 return copyToReg(From, RegNum);
6289 }
6290 }
6291
6292 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6293 if (Var->isRematerializable()) {
6294 if (Allowed & Legal_Rematerializable) {
6295 return From;
6296 }
6297
6298 Variable *T = makeReg(Var->getType(), RegNum);
6299 _mov(T, Var);
6300 return T;
6301 }
6302 // Check if the variable is guaranteed a physical register. This can happen
6303 // either when the variable is pre-colored or when it is assigned infinite
6304 // weight.
6305 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6306 // We need a new physical register for the operand if:
6307 // Mem is not allowed and Var isn't guaranteed a physical
6308 // register, or
6309 // RegNum is required and Var->getRegNum() doesn't match.
6310 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6311 (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6312 From = copyToReg(From, RegNum);
6313 }
6314 return From;
6315 }
6316 llvm::report_fatal_error("Unhandled operand kind in legalize()");
6317
6318 return From;
6319 }
6320
6321 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6322 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6323 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6324 }
6325
6326 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6327 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6328 Type Ty = From->getType();
6329 if (llvm::isa<ConstantUndef>(From)) {
6330 // Lower undefs to zero. Another option is to lower undefs to an
6331 // uninitialized register; however, using an uninitialized register results
6332 // in less predictable code.
6333 //
6334 // If in the future the implementation is changed to lower undef values to
6335 // uninitialized registers, a FakeDef will be needed:
6336 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6337 // ensure that the live range of Reg is not overestimated. If the constant
6338 // being lowered is a 64 bit value, then the result should be split and the
6339 // lo and hi components will need to go in uninitialized registers.
6340 if (isVectorType(Ty))
6341 return makeVectorOfZeros(Ty, RegNum);
6342 return Ctx->getConstantZero(Ty);
6343 }
6344 return From;
6345 }
6346
formMemoryOperand(Operand * Operand,Type Ty)6347 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6348 auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6349 // It may be the case that address mode optimization already creates an
6350 // OperandARM32Mem, so in that case it wouldn't need another level of
6351 // transformation.
6352 if (Mem) {
6353 return llvm::cast<OperandARM32Mem>(legalize(Mem));
6354 }
6355 // If we didn't do address mode optimization, then we only have a
6356 // base/offset to work with. ARM always requires a base register, so
6357 // just use that to hold the operand.
6358 auto *Base = llvm::cast<Variable>(
6359 legalize(Operand, Legal_Reg | Legal_Rematerializable));
6360 return OperandARM32Mem::create(
6361 Func, Ty, Base,
6362 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6363 }
6364
makeI64RegPair()6365 Variable64On32 *TargetARM32::makeI64RegPair() {
6366 Variable64On32 *Reg =
6367 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6368 Reg->setMustHaveReg();
6369 Reg->initHiLo(Func);
6370 Reg->getLo()->setMustNotHaveReg();
6371 Reg->getHi()->setMustNotHaveReg();
6372 return Reg;
6373 }
6374
makeReg(Type Type,RegNumT RegNum)6375 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6376 // There aren't any 64-bit integer registers for ARM32.
6377 assert(Type != IceType_i64);
6378 assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6379 Variable *Reg = Func->makeVariable(Type);
6380 if (RegNum.hasValue())
6381 Reg->setRegNum(RegNum);
6382 else
6383 Reg->setMustHaveReg();
6384 return Reg;
6385 }
6386
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6387 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6388 RegNumT TmpRegNum) {
6389 assert(llvm::isPowerOf2_32(Align));
6390 uint32_t RotateAmt;
6391 uint32_t Immed_8;
6392 Operand *Mask;
6393 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6394 // it fits at all). Assume Align is usually small, in which case BIC works
6395 // better. Thus, this rounds down to the alignment.
6396 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6397 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6398 TmpRegNum);
6399 _bic(Reg, Reg, Mask);
6400 } else {
6401 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6402 TmpRegNum);
6403 _and(Reg, Reg, Mask);
6404 }
6405 }
6406
postLower()6407 void TargetARM32::postLower() {
6408 if (Func->getOptLevel() == Opt_m1)
6409 return;
6410 markRedefinitions();
6411 Context.availabilityUpdate();
6412 }
6413
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const6414 void TargetARM32::makeRandomRegisterPermutation(
6415 llvm::SmallVectorImpl<RegNumT> &Permutation,
6416 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
6417 (void)Permutation;
6418 (void)ExcludeRegisters;
6419 (void)Salt;
6420 UnimplementedError(getFlags());
6421 }
6422
emit(const ConstantInteger32 * C) const6423 void TargetARM32::emit(const ConstantInteger32 *C) const {
6424 if (!BuildDefs::dump())
6425 return;
6426 Ostream &Str = Ctx->getStrEmit();
6427 Str << "#" << C->getValue();
6428 }
6429
emit(const ConstantInteger64 *) const6430 void TargetARM32::emit(const ConstantInteger64 *) const {
6431 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6432 }
6433
emit(const ConstantFloat * C) const6434 void TargetARM32::emit(const ConstantFloat *C) const {
6435 (void)C;
6436 UnimplementedError(getFlags());
6437 }
6438
emit(const ConstantDouble * C) const6439 void TargetARM32::emit(const ConstantDouble *C) const {
6440 (void)C;
6441 UnimplementedError(getFlags());
6442 }
6443
emit(const ConstantUndef *) const6444 void TargetARM32::emit(const ConstantUndef *) const {
6445 llvm::report_fatal_error("undef value encountered by emitter.");
6446 }
6447
emit(const ConstantRelocatable * C) const6448 void TargetARM32::emit(const ConstantRelocatable *C) const {
6449 if (!BuildDefs::dump())
6450 return;
6451 Ostream &Str = Ctx->getStrEmit();
6452 Str << "#";
6453 emitWithoutPrefix(C);
6454 }
6455
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6456 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6457 Operand *TrueValue, Operand *FalseValue) {
6458 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6459
6460 assert(Boolean->getType() == IceType_i1);
6461
6462 bool NeedsAnd1 = false;
6463 if (TrueValue->getType() == IceType_i1) {
6464 assert(FalseValue->getType() == IceType_i1);
6465
6466 Variable *TrueValueV = Func->makeVariable(IceType_i1);
6467 SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6468 TrueValue = TrueValueV;
6469
6470 Variable *FalseValueV = Func->makeVariable(IceType_i1);
6471 SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6472 FalseValue = FalseValueV;
6473
6474 NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6475 }
6476
6477 Variable *DestLo = (Dest->getType() == IceType_i64)
6478 ? llvm::cast<Variable>(loOperand(Dest))
6479 : Dest;
6480 Variable *DestHi = (Dest->getType() == IceType_i64)
6481 ? llvm::cast<Variable>(hiOperand(Dest))
6482 : nullptr;
6483 Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6484 ? loOperand(FalseValue)
6485 : FalseValue;
6486 Operand *FalseValueHi =
6487 (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6488
6489 Operand *TrueValueLo =
6490 (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6491 Operand *TrueValueHi =
6492 (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6493
6494 Variable *T_Lo = makeReg(DestLo->getType());
6495 Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6496
6497 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6498 if (DestHi) {
6499 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6500 }
6501
6502 CondWhenTrue Cond(CondARM32::kNone);
6503 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6504 // add an explicit _tst instruction below.
6505 bool FlagsWereSet = false;
6506 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6507 switch (Producer->getKind()) {
6508 default:
6509 llvm::report_fatal_error("Unexpected producer.");
6510 case Inst::Icmp: {
6511 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6512 FlagsWereSet = true;
6513 } break;
6514 case Inst::Fcmp: {
6515 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6516 FlagsWereSet = true;
6517 } break;
6518 case Inst::Cast: {
6519 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6520 assert(CastProducer->getCastKind() == InstCast::Trunc);
6521 Boolean = CastProducer->getSrc(0);
6522 // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6523 // bother legalizing Src to a Reg because it will be legalized before
6524 // emitting the tst instruction.
6525 FlagsWereSet = false;
6526 } break;
6527 case Inst::Arithmetic: {
6528 // This is a special case: we eagerly assumed Producer could be folded,
6529 // but in reality, it can't. No reason to panic: we just lower it using
6530 // the regular lowerArithmetic helper.
6531 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6532 lowerArithmetic(ArithProducer);
6533 Boolean = ArithProducer->getDest();
6534 // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6535 // bother legalizing Dest to a Reg because it will be legalized before
6536 // emitting the tst instruction.
6537 FlagsWereSet = false;
6538 } break;
6539 }
6540 }
6541
6542 if (!FlagsWereSet) {
6543 // No flags have been set, so emit a tst Boolean, 1.
6544 Variable *Src = legalizeToReg(Boolean);
6545 _tst(Src, _1);
6546 Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6547 }
6548
6549 if (Cond.WhenTrue0 == CondARM32::kNone) {
6550 assert(Cond.WhenTrue1 == CondARM32::kNone);
6551 } else {
6552 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6553 Cond.WhenTrue0);
6554 if (DestHi) {
6555 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6556 Cond.WhenTrue0);
6557 }
6558 }
6559
6560 if (Cond.WhenTrue1 != CondARM32::kNone) {
6561 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6562 Cond.WhenTrue1);
6563 if (DestHi) {
6564 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6565 Cond.WhenTrue1);
6566 }
6567 }
6568
6569 if (NeedsAnd1) {
6570 // We lowered something that is unsafe (i.e., can't provably be zero or
6571 // one). Truncate the result.
6572 _and(T_Lo, T_Lo, _1);
6573 }
6574
6575 _mov(DestLo, T_Lo);
6576 if (DestHi) {
6577 _mov(DestHi, T_Hi);
6578 }
6579 }
6580
lowerInt1(Variable * Dest,Operand * Boolean)6581 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6582 Operand *Boolean) {
6583 assert(Boolean->getType() == IceType_i1);
6584 Variable *T = makeReg(IceType_i1);
6585 Operand *_0 =
6586 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6587 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6588
6589 SafeBoolChain Safe = SBC_Yes;
6590 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6591 switch (Producer->getKind()) {
6592 default:
6593 llvm::report_fatal_error("Unexpected producer.");
6594 case Inst::Icmp: {
6595 _mov(T, _0);
6596 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6597 assert(Cond.WhenTrue0 != CondARM32::AL);
6598 assert(Cond.WhenTrue0 != CondARM32::kNone);
6599 assert(Cond.WhenTrue1 == CondARM32::kNone);
6600 _mov_redefined(T, _1, Cond.WhenTrue0);
6601 } break;
6602 case Inst::Fcmp: {
6603 _mov(T, _0);
6604 Inst *MovZero = Context.getLastInserted();
6605 CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6606 if (Cond.WhenTrue0 == CondARM32::AL) {
6607 assert(Cond.WhenTrue1 == CondARM32::kNone);
6608 MovZero->setDeleted();
6609 _mov(T, _1);
6610 } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6611 _mov_redefined(T, _1, Cond.WhenTrue0);
6612 }
6613 if (Cond.WhenTrue1 != CondARM32::kNone) {
6614 assert(Cond.WhenTrue0 != CondARM32::kNone);
6615 assert(Cond.WhenTrue0 != CondARM32::AL);
6616 _mov_redefined(T, _1, Cond.WhenTrue1);
6617 }
6618 } break;
6619 case Inst::Cast: {
6620 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6621 assert(CastProducer->getCastKind() == InstCast::Trunc);
6622 Operand *Src = CastProducer->getSrc(0);
6623 if (Src->getType() == IceType_i64)
6624 Src = loOperand(Src);
6625 _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6626 Safe = SBC_No;
6627 } break;
6628 case Inst::Arithmetic: {
6629 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6630 Safe = lowerInt1Arithmetic(ArithProducer);
6631 _mov(T, ArithProducer->getDest());
6632 } break;
6633 }
6634 } else {
6635 _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6636 }
6637
6638 _mov(Dest, T);
6639 return Safe;
6640 }
6641
6642 namespace {
6643 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6644 bool shouldTrackProducer(const Inst &Instr) {
6645 switch (Instr.getKind()) {
6646 default:
6647 return false;
6648 case Inst::Icmp:
6649 case Inst::Fcmp:
6650 return true;
6651 case Inst::Cast: {
6652 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6653 default:
6654 return false;
6655 case InstCast::Trunc:
6656 return true;
6657 }
6658 }
6659 case Inst::Arithmetic: {
6660 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6661 default:
6662 return false;
6663 case InstArithmetic::And:
6664 case InstArithmetic::Or:
6665 return true;
6666 }
6667 }
6668 }
6669 }
6670
isValidConsumer(const Inst & Instr)6671 bool isValidConsumer(const Inst &Instr) {
6672 switch (Instr.getKind()) {
6673 default:
6674 return false;
6675 case Inst::Br:
6676 return true;
6677 case Inst::Select:
6678 return !isVectorType(Instr.getDest()->getType());
6679 case Inst::Cast: {
6680 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6681 default:
6682 return false;
6683 case InstCast::Sext:
6684 return !isVectorType(Instr.getDest()->getType());
6685 case InstCast::Zext:
6686 return !isVectorType(Instr.getDest()->getType());
6687 }
6688 }
6689 case Inst::Arithmetic: {
6690 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6691 default:
6692 return false;
6693 case InstArithmetic::And:
6694 return !isVectorType(Instr.getDest()->getType());
6695 case InstArithmetic::Or:
6696 return !isVectorType(Instr.getDest()->getType());
6697 }
6698 }
6699 }
6700 }
6701 } // end of namespace BoolFolding
6702
6703 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6704 bool shouldTrackProducer(const Inst &Instr) {
6705 switch (Instr.getKind()) {
6706 default:
6707 return false;
6708 case Inst::Arithmetic: {
6709 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6710 default:
6711 return false;
6712 case InstArithmetic::Fmul:
6713 return true;
6714 }
6715 }
6716 }
6717 }
6718
isValidConsumer(const Inst & Instr)6719 bool isValidConsumer(const Inst &Instr) {
6720 switch (Instr.getKind()) {
6721 default:
6722 return false;
6723 case Inst::Arithmetic: {
6724 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6725 default:
6726 return false;
6727 case InstArithmetic::Fadd:
6728 case InstArithmetic::Fsub:
6729 return true;
6730 }
6731 }
6732 }
6733 }
6734 } // end of namespace FpFolding
6735
6736 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6737 bool shouldTrackProducer(const Inst &Instr) {
6738 switch (Instr.getKind()) {
6739 default:
6740 return false;
6741 case Inst::Arithmetic: {
6742 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6743 default:
6744 return false;
6745 case InstArithmetic::Mul:
6746 return true;
6747 }
6748 }
6749 }
6750 }
6751
isValidConsumer(const Inst & Instr)6752 bool isValidConsumer(const Inst &Instr) {
6753 switch (Instr.getKind()) {
6754 default:
6755 return false;
6756 case Inst::Arithmetic: {
6757 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6758 default:
6759 return false;
6760 case InstArithmetic::Add:
6761 case InstArithmetic::Sub:
6762 return true;
6763 }
6764 }
6765 }
6766 }
6767 } // end of namespace FpFolding
6768 } // end of anonymous namespace
6769
recordProducers(CfgNode * Node)6770 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
6771 for (Inst &Instr : Node->getInsts()) {
6772 // Check whether Instr is a valid producer.
6773 Variable *Dest = Instr.getDest();
6774 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6775 && Dest // only instructions with an actual dest var; and
6776 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6777 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6778 KnownComputations.emplace(Dest->getIndex(),
6779 ComputationEntry(&Instr, IceType_i1));
6780 }
6781 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6782 && Dest // only instructions with an actual dest var; and
6783 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
6784 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6785 KnownComputations.emplace(Dest->getIndex(),
6786 ComputationEntry(&Instr, Dest->getType()));
6787 }
6788 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6789 && Dest // only instructions with an actual dest var; and
6790 && Dest->getType() == IceType_i32 // i32 only dest vars; and
6791 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6792 KnownComputations.emplace(Dest->getIndex(),
6793 ComputationEntry(&Instr, IceType_i32));
6794 }
6795 // Check each src variable against the map.
6796 FOREACH_VAR_IN_INST(Var, Instr) {
6797 SizeT VarNum = Var->getIndex();
6798 auto ComputationIter = KnownComputations.find(VarNum);
6799 if (ComputationIter == KnownComputations.end()) {
6800 continue;
6801 }
6802
6803 ++ComputationIter->second.NumUses;
6804 switch (ComputationIter->second.ComputationType) {
6805 default:
6806 KnownComputations.erase(VarNum);
6807 continue;
6808 case IceType_i1:
6809 if (!BoolFolding::isValidConsumer(Instr)) {
6810 KnownComputations.erase(VarNum);
6811 continue;
6812 }
6813 break;
6814 case IceType_i32:
6815 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
6816 KnownComputations.erase(VarNum);
6817 continue;
6818 }
6819 break;
6820 case IceType_f32:
6821 case IceType_f64:
6822 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
6823 KnownComputations.erase(VarNum);
6824 continue;
6825 }
6826 break;
6827 }
6828
6829 if (Instr.isLastUse(Var)) {
6830 ComputationIter->second.IsLiveOut = false;
6831 }
6832 }
6833 }
6834
6835 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6836 Iter != End;) {
6837 // Disable the folding if its dest may be live beyond this block.
6838 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6839 Iter = KnownComputations.erase(Iter);
6840 continue;
6841 }
6842
6843 // Mark as "dead" rather than outright deleting. This is so that other
6844 // peephole style optimizations during or before lowering have access to
6845 // this instruction in undeleted form. See for example
6846 // tryOptimizedCmpxchgCmpBr().
6847 Iter->second.Instr->setDead();
6848 ++Iter;
6849 }
6850 }
6851
Sandboxer(TargetARM32 * Target,InstBundleLock::Option BundleOption)6852 TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target,
6853 InstBundleLock::Option BundleOption)
6854 : Target(Target), BundleOption(BundleOption) {}
6855
~Sandboxer()6856 TargetARM32::Sandboxer::~Sandboxer() {}
6857
6858 namespace {
indirectBranchBicMask(Cfg * Func)6859 OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) {
6860 constexpr uint32_t Imm8 = 0xFC; // 0xC000000F
6861 constexpr uint32_t RotateAmt = 2;
6862 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
6863 }
6864
memOpBicMask(Cfg * Func)6865 OperandARM32FlexImm *memOpBicMask(Cfg *Func) {
6866 constexpr uint32_t Imm8 = 0x0C; // 0xC0000000
6867 constexpr uint32_t RotateAmt = 2;
6868 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
6869 }
6870
baseNeedsBic(Variable * Base)6871 static bool baseNeedsBic(Variable *Base) {
6872 return Base->getRegNum() != RegARM32::Reg_r9 &&
6873 Base->getRegNum() != RegARM32::Reg_sp;
6874 }
6875 } // end of anonymous namespace
6876
createAutoBundle()6877 void TargetARM32::Sandboxer::createAutoBundle() {
6878 Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6879 }
6880
add_sp(Operand * AddAmount)6881 void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) {
6882 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
6883 if (!Target->NeedSandboxing) {
6884 Target->_add(SP, SP, AddAmount);
6885 return;
6886 }
6887 createAutoBundle();
6888 Target->_add(SP, SP, AddAmount);
6889 Target->_bic(SP, SP, memOpBicMask(Target->Func));
6890 }
6891
align_sp(size_t Alignment)6892 void TargetARM32::Sandboxer::align_sp(size_t Alignment) {
6893 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
6894 if (!Target->NeedSandboxing) {
6895 Target->alignRegisterPow2(SP, Alignment);
6896 return;
6897 }
6898 createAutoBundle();
6899 Target->alignRegisterPow2(SP, Alignment);
6900 Target->_bic(SP, SP, memOpBicMask(Target->Func));
6901 }
6902
bl(Variable * ReturnReg,Operand * CallTarget)6903 InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg,
6904 Operand *CallTarget) {
6905 if (Target->NeedSandboxing) {
6906 createAutoBundle();
6907 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6908 Target->_bic(CallTargetR, CallTargetR,
6909 indirectBranchBicMask(Target->Func));
6910 }
6911 }
6912 return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget);
6913 }
6914
ldr(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)6915 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem,
6916 CondARM32::Cond Pred) {
6917 Variable *MemBase = Mem->getBase();
6918 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
6919 createAutoBundle();
6920 assert(!Mem->isRegReg());
6921 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
6922 }
6923 Target->_ldr(Dest, Mem, Pred);
6924 }
6925
ldrex(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)6926 void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem,
6927 CondARM32::Cond Pred) {
6928 Variable *MemBase = Mem->getBase();
6929 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
6930 createAutoBundle();
6931 assert(!Mem->isRegReg());
6932 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
6933 }
6934 Target->_ldrex(Dest, Mem, Pred);
6935 }
6936
reset_sp(Variable * Src)6937 void TargetARM32::Sandboxer::reset_sp(Variable *Src) {
6938 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
6939 if (!Target->NeedSandboxing) {
6940 Target->_mov_redefined(SP, Src);
6941 return;
6942 }
6943 createAutoBundle();
6944 Target->_mov_redefined(SP, Src);
6945 Target->_bic(SP, SP, memOpBicMask(Target->Func));
6946 }
6947
ret(Variable * RetAddr,Variable * RetValue)6948 void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6949 if (Target->NeedSandboxing) {
6950 createAutoBundle();
6951 Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func));
6952 }
6953 Target->_ret(RetAddr, RetValue);
6954 }
6955
str(Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)6956 void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem,
6957 CondARM32::Cond Pred) {
6958 Variable *MemBase = Mem->getBase();
6959 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
6960 createAutoBundle();
6961 assert(!Mem->isRegReg());
6962 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
6963 }
6964 Target->_str(Src, Mem, Pred);
6965 }
6966
strex(Variable * Dest,Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)6967 void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src,
6968 OperandARM32Mem *Mem, CondARM32::Cond Pred) {
6969 Variable *MemBase = Mem->getBase();
6970 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
6971 createAutoBundle();
6972 assert(!Mem->isRegReg());
6973 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
6974 }
6975 Target->_strex(Dest, Src, Mem, Pred);
6976 }
6977
sub_sp(Operand * SubAmount)6978 void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) {
6979 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
6980 if (!Target->NeedSandboxing) {
6981 Target->_sub(SP, SP, SubAmount);
6982 return;
6983 }
6984 createAutoBundle();
6985 Target->_sub(SP, SP, SubAmount);
6986 Target->_bic(SP, SP, memOpBicMask(Target->Func));
6987 }
6988
TargetDataARM32(GlobalContext * Ctx)6989 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
6990 : TargetDataLowering(Ctx) {}
6991
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)6992 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
6993 const std::string &SectionSuffix) {
6994 const bool IsPIC = getFlags().getUseNonsfi();
6995 switch (getFlags().getOutFileType()) {
6996 case FT_Elf: {
6997 ELFObjectWriter *Writer = Ctx->getObjectWriter();
6998 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
6999 IsPIC);
7000 } break;
7001 case FT_Asm:
7002 case FT_Iasm: {
7003 OstreamLocker _(Ctx);
7004 for (const VariableDeclaration *Var : Vars) {
7005 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
7006 emitGlobal(*Var, SectionSuffix);
7007 }
7008 }
7009 } break;
7010 }
7011 }
7012
7013 namespace {
7014 template <typename T> struct ConstantPoolEmitterTraits;
7015
7016 static_assert(sizeof(uint64_t) == 8,
7017 "uint64_t is supposed to be 8 bytes wide.");
7018
7019 // TODO(jpp): implement the following when implementing constant randomization:
7020 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
7021 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
7022 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
7023 template <> struct ConstantPoolEmitterTraits<float> {
7024 using ConstantType = ConstantFloat;
7025 static constexpr Type IceType = IceType_f32;
7026 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
7027 // about them being constexpr.
7028 static const char AsmTag[];
7029 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anonc54490811e11::ConstantPoolEmitterTraits7030 static uint64_t bitcastToUint64(float Value) {
7031 static_assert(sizeof(Value) == sizeof(uint32_t),
7032 "Float should be 4 bytes.");
7033 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
7034 return static_cast<uint64_t>(IntValue);
7035 }
7036 };
7037 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
7038 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
7039
7040 template <> struct ConstantPoolEmitterTraits<double> {
7041 using ConstantType = ConstantDouble;
7042 static constexpr Type IceType = IceType_f64;
7043 static const char AsmTag[];
7044 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anonc54490811e11::ConstantPoolEmitterTraits7045 static uint64_t bitcastToUint64(double Value) {
7046 static_assert(sizeof(double) == sizeof(uint64_t),
7047 "Double should be 8 bytes.");
7048 return Utils::bitCopy<uint64_t>(Value);
7049 }
7050 };
7051 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
7052 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
7053
7054 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)7055 void emitConstant(
7056 Ostream &Str,
7057 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
7058 using Traits = ConstantPoolEmitterTraits<T>;
7059 Str << Const->getLabelName();
7060 Str << ":\n\t" << Traits::AsmTag << "\t0x";
7061 T Value = Const->getValue();
7062 Str.write_hex(Traits::bitcastToUint64(Value));
7063 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
7064 }
7065
emitConstantPool(GlobalContext * Ctx)7066 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
7067 if (!BuildDefs::dump()) {
7068 return;
7069 }
7070
7071 using Traits = ConstantPoolEmitterTraits<T>;
7072 static constexpr size_t MinimumAlignment = 4;
7073 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
7074 assert((Align % 4) == 0 && "Constants should be aligned");
7075 Ostream &Str = Ctx->getStrEmit();
7076 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
7077
7078 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
7079 << "\n"
7080 << "\t.align\t" << Align << "\n";
7081
7082 if (getFlags().getReorderPooledConstants()) {
7083 // TODO(jpp): add constant pooling.
7084 UnimplementedError(getFlags());
7085 }
7086
7087 for (Constant *C : Pool) {
7088 if (!C->getShouldBePooled()) {
7089 continue;
7090 }
7091
7092 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
7093 }
7094 }
7095 } // end of anonymous namespace
7096
lowerConstants()7097 void TargetDataARM32::lowerConstants() {
7098 if (getFlags().getDisableTranslation())
7099 return;
7100 switch (getFlags().getOutFileType()) {
7101 case FT_Elf: {
7102 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7103 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7104 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7105 } break;
7106 case FT_Asm:
7107 case FT_Iasm: {
7108 OstreamLocker _(Ctx);
7109 emitConstantPool<float>(Ctx);
7110 emitConstantPool<double>(Ctx);
7111 break;
7112 }
7113 }
7114 }
7115
lowerJumpTables()7116 void TargetDataARM32::lowerJumpTables() {
7117 if (getFlags().getDisableTranslation())
7118 return;
7119 switch (getFlags().getOutFileType()) {
7120 case FT_Elf:
7121 if (!Ctx->getJumpTables().empty()) {
7122 llvm::report_fatal_error("ARM32 does not support jump tables yet.");
7123 }
7124 break;
7125 case FT_Asm:
7126 // Already emitted from Cfg
7127 break;
7128 case FT_Iasm: {
7129 // TODO(kschimpf): Fill this in when we get more information.
7130 break;
7131 }
7132 }
7133 }
7134
TargetHeaderARM32(GlobalContext * Ctx)7135 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
7136 : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
7137
lower()7138 void TargetHeaderARM32::lower() {
7139 OstreamLocker _(Ctx);
7140 Ostream &Str = Ctx->getStrEmit();
7141 Str << ".syntax unified\n";
7142 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
7143 // "Addenda to, and Errata in the ABI for the ARM architecture"
7144 // http://infocenter.arm.com
7145 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
7146 //
7147 // Tag_conformance should be be emitted first in a file-scope sub-subsection
7148 // of the first public subsection of the attributes.
7149 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
7150 // Chromebooks are at least A15, but do A9 for higher compat. For some
7151 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
7152 // specified on the commandline. So to test hwdiv, we need to set the .cpu
7153 // directive higher (can't just rely on --mattr=...).
7154 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7155 Str << ".cpu cortex-a15\n";
7156 } else {
7157 Str << ".cpu cortex-a9\n";
7158 }
7159 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
7160 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
7161 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
7162 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
7163 Str << ".fpu neon\n"
7164 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
7165 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
7166 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
7167 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
7168 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
7169 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
7170 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
7171 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
7172 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
7173 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
7174 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
7175 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
7176 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7177 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
7178 }
7179 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
7180 // However, for compatibility with current NaCl LLVM, don't claim that.
7181 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
7182 }
7183
7184 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
7185 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
7186 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
7187
7188 } // end of namespace ARM32
7189 } // end of namespace Ice
7190