1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40 return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55 if (Ice::getFlags().getUseNonsfi()) {
56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
57 // globals. The GOT is an external symbol (i.e., it is not defined in the
58 // pexe) so we need to register it as such so that ELF emission won't barf
59 // on an "unknown" symbol. The GOT is added to the External symbols list
60 // here because staticInit() is invoked in a single-thread context.
61 Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
62 }
63 }
64
shouldBePooled(const::Ice::Constant * C)65 bool shouldBePooled(const ::Ice::Constant *C) {
66 return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
67 }
68
getPointerType()69 ::Ice::Type getPointerType() {
70 return ::Ice::ARM32::TargetARM32::getPointerType();
71 }
72
73 } // end of namespace ARM32
74
75 namespace Ice {
76 namespace ARM32 {
77
78 namespace {
79
80 /// SizeOf is used to obtain the size of an initializer list as a constexpr
81 /// expression. This is only needed until our C++ library is updated to
82 /// C++ 14 -- which defines constexpr members to std::initializer_list.
83 class SizeOf {
84 SizeOf(const SizeOf &) = delete;
85 SizeOf &operator=(const SizeOf &) = delete;
86
87 public:
SizeOf()88 constexpr SizeOf() : Size(0) {}
89 template <typename... T>
SizeOf(T...)90 explicit constexpr SizeOf(T...) : Size(__length<T...>::value) {}
size() const91 constexpr SizeT size() const { return Size; }
92
93 private:
94 template <typename T, typename... U> struct __length {
95 static constexpr std::size_t value = 1 + __length<U...>::value;
96 };
97
98 template <typename T> struct __length<T> {
99 static constexpr std::size_t value = 1;
100 };
101
102 const std::size_t Size;
103 };
104
105 } // end of anonymous namespace
106
107 // Defines the RegARM32::Table table with register information.
108 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
109 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
110 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
111 {name, encode, \
112 cc_arg, scratch, \
113 preserved, stackptr, \
114 frameptr, isGPR, \
115 isInt, isI64Pair, \
116 isFP32, isFP64, \
117 isVec128, (SizeOf alias_init).size(), \
118 alias_init},
119 REGARM32_TABLE
120 #undef X
121 };
122
123 namespace {
124
125 // The following table summarizes the logic for lowering the icmp instruction
126 // for i32 and narrower types. Each icmp condition has a clear mapping to an
127 // ARM32 conditional move instruction.
128
129 const struct TableIcmp32_ {
130 CondARM32::Cond Mapping;
131 } TableIcmp32[] = {
132 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
133 {CondARM32::C_32},
134 ICMPARM32_TABLE
135 #undef X
136 };
137
138 // The following table summarizes the logic for lowering the icmp instruction
139 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
140 // The operands may need to be swapped, and there is a slight difference for
141 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
142 const struct TableIcmp64_ {
143 bool IsSigned;
144 bool Swapped;
145 CondARM32::Cond C1, C2;
146 } TableIcmp64[] = {
147 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
148 {is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64},
149 ICMPARM32_TABLE
150 #undef X
151 };
152
getIcmp32Mapping(InstIcmp::ICond Cond)153 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
154 assert(Cond < llvm::array_lengthof(TableIcmp32));
155 return TableIcmp32[Cond].Mapping;
156 }
157
158 // In some cases, there are x-macros tables for both high-level and low-level
159 // instructions/operands that use the same enum key value. The tables are kept
160 // separate to maintain a proper separation between abstraction layers. There
161 // is a risk that the tables could get out of sync if enum values are reordered
162 // or if entries are added or deleted. The following anonymous namespaces use
163 // static_asserts to ensure everything is kept in sync.
164
165 // Validate the enum values in ICMPARM32_TABLE.
166 namespace {
167 // Define a temporary set of enum values based on low-level table entries.
168 enum _icmp_ll_enum {
169 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
170 _icmp_ll_##val,
171 ICMPARM32_TABLE
172 #undef X
173 _num
174 };
175 // Define a set of constants based on high-level table entries.
176 #define X(tag, reverse, str) \
177 static constexpr int _icmp_hl_##tag = InstIcmp::tag;
178 ICEINSTICMP_TABLE
179 #undef X
180 // Define a set of constants based on low-level table entries, and ensure the
181 // table entry keys are consistent.
182 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
183 static_assert( \
184 _icmp_ll_##val == _icmp_hl_##val, \
185 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
186 ICMPARM32_TABLE
187 #undef X
188 // Repeat the static asserts with respect to the high-level table entries in
189 // case the high-level table has extra entries.
190 #define X(tag, reverse, str) \
191 static_assert( \
192 _icmp_hl_##tag == _icmp_ll_##tag, \
193 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
194 ICEINSTICMP_TABLE
195 #undef X
196 } // end of anonymous namespace
197
198 // Stack alignment
199 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
200
201 // Value is in bytes. Return Value adjusted to the next highest multiple of the
202 // stack alignment.
applyStackAlignment(uint32_t Value)203 uint32_t applyStackAlignment(uint32_t Value) {
204 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
205 }
206
207 // Value is in bytes. Return Value adjusted to the next highest multiple of the
208 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)209 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
210 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
211 // vectors to 8 bytes.
212 // TODO(jvoung): Check this ...
213 size_t typeAlignInBytes = typeWidthInBytes(Ty);
214 if (isVectorType(Ty))
215 typeAlignInBytes = 8;
216 return Utils::applyAlignment(Value, typeAlignInBytes);
217 }
218
219 // Conservatively check if at compile time we know that the operand is
220 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)221 bool isGuaranteedNonzeroInt(const Operand *Op) {
222 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
223 return Const->getValue() != 0;
224 }
225 return false;
226 }
227
228 } // end of anonymous namespace
229
TargetARM32Features(const ClFlags & Flags)230 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
231 static_assert(
232 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
233 (TargetInstructionSet::ARM32InstructionSet_End -
234 TargetInstructionSet::ARM32InstructionSet_Begin),
235 "ARM32InstructionSet range different from TargetInstructionSet");
236 if (Flags.getTargetInstructionSet() !=
237 TargetInstructionSet::BaseInstructionSet) {
238 InstructionSet = static_cast<ARM32InstructionSet>(
239 (Flags.getTargetInstructionSet() -
240 TargetInstructionSet::ARM32InstructionSet_Begin) +
241 ARM32InstructionSet::Begin);
242 }
243 }
244
245 namespace {
246 constexpr SizeT NumGPRArgs =
247 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
248 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
249 +(((cc_arg) > 0) ? 1 : 0)
250 REGARM32_GPR_TABLE
251 #undef X
252 ;
253 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
254
255 constexpr SizeT NumI64Args =
256 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
257 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
258 +(((cc_arg) > 0) ? 1 : 0)
259 REGARM32_I64PAIR_TABLE
260 #undef X
261 ;
262 std::array<RegNumT, NumI64Args> I64ArgInitializer;
263
264 constexpr SizeT NumFP32Args =
265 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
266 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
267 +(((cc_arg) > 0) ? 1 : 0)
268 REGARM32_FP32_TABLE
269 #undef X
270 ;
271 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
272
273 constexpr SizeT NumFP64Args =
274 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
275 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
276 +(((cc_arg) > 0) ? 1 : 0)
277 REGARM32_FP64_TABLE
278 #undef X
279 ;
280 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
281
282 constexpr SizeT NumVec128Args =
283 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
284 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
285 +(((cc_arg > 0)) ? 1 : 0)
286 REGARM32_VEC128_TABLE
287 #undef X
288 ;
289 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
290
getRegClassName(RegClass C)291 const char *getRegClassName(RegClass C) {
292 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
293 assert(ClassNum < RegARM32::RCARM32_NUM);
294 switch (ClassNum) {
295 default:
296 assert(C < RC_Target);
297 return regClassString(C);
298 // Add handling of new register classes below.
299 case RegARM32::RCARM32_QtoS:
300 return "QtoS";
301 }
302 }
303
304 } // end of anonymous namespace
305
TargetARM32(Cfg * Func)306 TargetARM32::TargetARM32(Cfg *Func)
307 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),
308 CPUFeatures(getFlags()) {}
309
staticInit(GlobalContext * Ctx)310 void TargetARM32::staticInit(GlobalContext *Ctx) {
311 RegNumT::setLimit(RegARM32::Reg_NUM);
312 // Limit this size (or do all bitsets need to be the same width)???
313 SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
314 SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
315 SmallBitVector Float32Registers(RegARM32::Reg_NUM);
316 SmallBitVector Float64Registers(RegARM32::Reg_NUM);
317 SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
318 SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
319 SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
320 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
321 for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
322 const auto &Entry = RegARM32::RegTable[i];
323 IntegerRegisters[i] = Entry.IsInt;
324 I64PairRegisters[i] = Entry.IsI64Pair;
325 Float32Registers[i] = Entry.IsFP32;
326 Float64Registers[i] = Entry.IsFP64;
327 VectorRegisters[i] = Entry.IsVec128;
328 RegisterAliases[i].resize(RegARM32::Reg_NUM);
329 // TODO(eholk): It would be better to store a QtoS flag in the
330 // IceRegistersARM32 table than to compare their encodings here.
331 QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
332 for (int j = 0; j < Entry.NumAliases; ++j) {
333 assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
334 RegisterAliases[i].set(Entry.Aliases[j]);
335 }
336 assert(RegisterAliases[i][i]);
337 if (Entry.CCArg <= 0) {
338 continue;
339 }
340 const auto RegNum = RegNumT::fromInt(i);
341 if (Entry.IsGPR) {
342 GPRArgInitializer[Entry.CCArg - 1] = RegNum;
343 } else if (Entry.IsI64Pair) {
344 I64ArgInitializer[Entry.CCArg - 1] = RegNum;
345 } else if (Entry.IsFP32) {
346 FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
347 } else if (Entry.IsFP64) {
348 FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
349 } else if (Entry.IsVec128) {
350 Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
351 }
352 }
353 TypeToRegisterSet[IceType_void] = InvalidRegisters;
354 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
355 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
356 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
357 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
358 TypeToRegisterSet[IceType_i64] = I64PairRegisters;
359 TypeToRegisterSet[IceType_f32] = Float32Registers;
360 TypeToRegisterSet[IceType_f64] = Float64Registers;
361 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
362 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
363 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
364 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
365 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
366 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
367 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
368 TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
369
370 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
371 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
372
373 filterTypeToRegisterSet(
374 Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
375 llvm::array_lengthof(TypeToRegisterSet),
376 [](RegNumT RegNum) -> std::string {
377 // This function simply removes ", " from the
378 // register name.
379 std::string Name = RegARM32::getRegName(RegNum);
380 constexpr const char RegSeparator[] = ", ";
381 constexpr size_t RegSeparatorWidth =
382 llvm::array_lengthof(RegSeparator) - 1;
383 for (size_t Pos = Name.find(RegSeparator); Pos != std::string::npos;
384 Pos = Name.find(RegSeparator)) {
385 Name.replace(Pos, RegSeparatorWidth, "");
386 }
387 return Name;
388 },
389 getRegClassName);
390 }
391
392 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)393 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
394 for (Variable *Var : Vars) {
395 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
396 if (!Var64) {
397 // This is not the variable we are looking for.
398 continue;
399 }
400 // only allow infinite-weight i64 temporaries to be register allocated.
401 assert(!Var64->hasReg() || Var64->mustHaveReg());
402 if (!Var64->hasReg()) {
403 continue;
404 }
405 const auto FirstReg =
406 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
407 // This assumes little endian.
408 Variable *Lo = Var64->getLo();
409 Variable *Hi = Var64->getHi();
410 assert(Lo->hasReg() == Hi->hasReg());
411 if (Lo->hasReg()) {
412 continue;
413 }
414 Lo->setRegNum(FirstReg);
415 Lo->setMustHaveReg();
416 Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
417 Hi->setMustHaveReg();
418 }
419 }
420 } // end of anonymous namespace
421
getCallStackArgumentsSizeBytes(const InstCall * Call)422 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
423 TargetARM32::CallingConv CC;
424 RegNumT DummyReg;
425 size_t OutArgsSizeBytes = 0;
426 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
427 Operand *Arg = legalizeUndef(Call->getArg(i));
428 const Type Ty = Arg->getType();
429 if (isScalarIntegerType(Ty)) {
430 if (CC.argInGPR(Ty, &DummyReg)) {
431 continue;
432 }
433 } else {
434 if (CC.argInVFP(Ty, &DummyReg)) {
435 continue;
436 }
437 }
438
439 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
440 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
441 }
442
443 return applyStackAlignment(OutArgsSizeBytes);
444 }
445
genTargetHelperCallFor(Inst * Instr)446 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
447 constexpr bool NoTailCall = false;
448 constexpr bool IsTargetHelperCall = true;
449
450 switch (Instr->getKind()) {
451 default:
452 return;
453 case Inst::Arithmetic: {
454 Variable *Dest = Instr->getDest();
455 const Type DestTy = Dest->getType();
456 const InstArithmetic::OpKind Op =
457 llvm::cast<InstArithmetic>(Instr)->getOp();
458 if (isVectorType(DestTy)) {
459 switch (Op) {
460 default:
461 break;
462 case InstArithmetic::Fdiv:
463 case InstArithmetic::Frem:
464 case InstArithmetic::Sdiv:
465 case InstArithmetic::Srem:
466 case InstArithmetic::Udiv:
467 case InstArithmetic::Urem:
468 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
469 Instr->setDeleted();
470 return;
471 }
472 }
473 switch (DestTy) {
474 default:
475 return;
476 case IceType_i64: {
477 // Technically, ARM has its own aeabi routines, but we can use the
478 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
479 // the more standard __moddi3 for rem.
480 RuntimeHelper HelperID = RuntimeHelper::H_Num;
481 switch (Op) {
482 default:
483 return;
484 case InstArithmetic::Udiv:
485 HelperID = RuntimeHelper::H_udiv_i64;
486 break;
487 case InstArithmetic::Sdiv:
488 HelperID = RuntimeHelper::H_sdiv_i64;
489 break;
490 case InstArithmetic::Urem:
491 HelperID = RuntimeHelper::H_urem_i64;
492 break;
493 case InstArithmetic::Srem:
494 HelperID = RuntimeHelper::H_srem_i64;
495 break;
496 }
497 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
498 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
499 constexpr SizeT MaxArgs = 2;
500 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
501 NoTailCall, IsTargetHelperCall);
502 Call->addArg(Instr->getSrc(0));
503 Call->addArg(Instr->getSrc(1));
504 Instr->setDeleted();
505 return;
506 }
507 case IceType_i32:
508 case IceType_i16:
509 case IceType_i8: {
510 const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
511 InstCast::OpKind CastKind;
512 RuntimeHelper HelperID = RuntimeHelper::H_Num;
513 switch (Op) {
514 default:
515 return;
516 case InstArithmetic::Udiv:
517 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
518 CastKind = InstCast::Zext;
519 break;
520 case InstArithmetic::Sdiv:
521 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
522 CastKind = InstCast::Sext;
523 break;
524 case InstArithmetic::Urem:
525 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
526 CastKind = InstCast::Zext;
527 break;
528 case InstArithmetic::Srem:
529 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
530 CastKind = InstCast::Sext;
531 break;
532 }
533 if (HelperID == RuntimeHelper::H_Num) {
534 // HelperID should only ever be undefined when the processor does not
535 // have a hardware divider. If any other helpers are ever introduced,
536 // the following assert will have to be modified.
537 assert(HasHWDiv);
538 return;
539 }
540 Operand *Src0 = Instr->getSrc(0);
541 Operand *Src1 = Instr->getSrc(1);
542 if (DestTy != IceType_i32) {
543 // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
544 // we just insert a InstCast right before the call to the helper.
545 Variable *Src0_32 = Func->makeVariable(IceType_i32);
546 Context.insert<InstCast>(CastKind, Src0_32, Src0);
547 Src0 = Src0_32;
548
549 // For extending Src1, we will just insert an InstCast if Src1 is not a
550 // Constant. If it is, then we extend it here, and not during program
551 // runtime. This allows preambleDivRem to optimize-out the div-by-0
552 // check.
553 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
554 const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
555 int32_t NewC = C->getValue();
556 if (CastKind == InstCast::Zext) {
557 NewC &= ~(0x80000000l >> ShAmt);
558 } else {
559 NewC = (NewC << ShAmt) >> ShAmt;
560 }
561 Src1 = Ctx->getConstantInt32(NewC);
562 } else {
563 Variable *Src1_32 = Func->makeVariable(IceType_i32);
564 Context.insert<InstCast>(CastKind, Src1_32, Src1);
565 Src1 = Src1_32;
566 }
567 }
568 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
569 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
570 constexpr SizeT MaxArgs = 2;
571 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
572 NoTailCall, IsTargetHelperCall);
573 assert(Src0->getType() == IceType_i32);
574 Call->addArg(Src0);
575 assert(Src1->getType() == IceType_i32);
576 Call->addArg(Src1);
577 Instr->setDeleted();
578 return;
579 }
580 case IceType_f64:
581 case IceType_f32: {
582 if (Op != InstArithmetic::Frem) {
583 return;
584 }
585 constexpr SizeT MaxArgs = 2;
586 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
587 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
588 : RuntimeHelper::H_frem_f64);
589 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
590 NoTailCall, IsTargetHelperCall);
591 Call->addArg(Instr->getSrc(0));
592 Call->addArg(Instr->getSrc(1));
593 Instr->setDeleted();
594 return;
595 }
596 }
597 llvm::report_fatal_error("Control flow should never have reached here.");
598 }
599 case Inst::Cast: {
600 Variable *Dest = Instr->getDest();
601 Operand *Src0 = Instr->getSrc(0);
602 const Type DestTy = Dest->getType();
603 const Type SrcTy = Src0->getType();
604 auto *CastInstr = llvm::cast<InstCast>(Instr);
605 const InstCast::OpKind CastKind = CastInstr->getCastKind();
606
607 switch (CastKind) {
608 default:
609 return;
610 case InstCast::Fptosi:
611 case InstCast::Fptoui: {
612 if (DestTy != IceType_i64) {
613 return;
614 }
615 const bool DestIsSigned = CastKind == InstCast::Fptosi;
616 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
617 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
618 Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
619 : RuntimeHelper::H_fptoui_f32_i64)
620 : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
621 : RuntimeHelper::H_fptoui_f64_i64));
622 static constexpr SizeT MaxArgs = 1;
623 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
624 NoTailCall, IsTargetHelperCall);
625 Call->addArg(Src0);
626 Instr->setDeleted();
627 return;
628 }
629 case InstCast::Sitofp:
630 case InstCast::Uitofp: {
631 if (SrcTy != IceType_i64) {
632 return;
633 }
634 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
635 const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
636 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
637 DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
638 : RuntimeHelper::H_uitofp_i64_f32)
639 : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
640 : RuntimeHelper::H_uitofp_i64_f64));
641 static constexpr SizeT MaxArgs = 1;
642 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
643 NoTailCall, IsTargetHelperCall);
644 Call->addArg(Src0);
645 Instr->setDeleted();
646 return;
647 }
648 case InstCast::Bitcast: {
649 if (DestTy == SrcTy) {
650 return;
651 }
652 Variable *CallDest = Dest;
653 RuntimeHelper HelperID = RuntimeHelper::H_Num;
654 switch (DestTy) {
655 default:
656 return;
657 case IceType_i8:
658 assert(SrcTy == IceType_v8i1);
659 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
660 CallDest = Func->makeVariable(IceType_i32);
661 break;
662 case IceType_i16:
663 assert(SrcTy == IceType_v16i1);
664 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
665 CallDest = Func->makeVariable(IceType_i32);
666 break;
667 case IceType_v8i1: {
668 assert(SrcTy == IceType_i8);
669 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
670 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
671 // Arguments to functions are required to be at least 32 bits wide.
672 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
673 Src0 = Src0AsI32;
674 } break;
675 case IceType_v16i1: {
676 assert(SrcTy == IceType_i16);
677 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
678 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
679 // Arguments to functions are required to be at least 32 bits wide.
680 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
681 Src0 = Src0AsI32;
682 } break;
683 }
684 constexpr SizeT MaxSrcs = 1;
685 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
686 Call->addArg(Src0);
687 Context.insert(Call);
688 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
689 // call result to the appropriate type as necessary.
690 if (CallDest->getType() != Dest->getType())
691 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
692 Instr->setDeleted();
693 return;
694 }
695 case InstCast::Trunc: {
696 if (DestTy == SrcTy) {
697 return;
698 }
699 if (!isVectorType(SrcTy)) {
700 return;
701 }
702 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
703 assert(typeElementType(DestTy) == IceType_i1);
704 assert(isVectorIntegerType(SrcTy));
705 return;
706 }
707 case InstCast::Sext:
708 case InstCast::Zext: {
709 if (DestTy == SrcTy) {
710 return;
711 }
712 if (!isVectorType(DestTy)) {
713 return;
714 }
715 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
716 assert(typeElementType(SrcTy) == IceType_i1);
717 assert(isVectorIntegerType(DestTy));
718 return;
719 }
720 }
721 llvm::report_fatal_error("Control flow should never have reached here.");
722 }
723 case Inst::Intrinsic: {
724 Variable *Dest = Instr->getDest();
725 auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
726 Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
727 switch (ID) {
728 default:
729 return;
730 case Intrinsics::Ctpop: {
731 Operand *Src0 = Intrinsic->getArg(0);
732 Operand *TargetHelper =
733 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
734 ? RuntimeHelper::H_call_ctpop_i32
735 : RuntimeHelper::H_call_ctpop_i64);
736 static constexpr SizeT MaxArgs = 1;
737 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738 NoTailCall, IsTargetHelperCall);
739 Call->addArg(Src0);
740 Instr->setDeleted();
741 if (Src0->getType() == IceType_i64) {
742 ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
743 }
744 return;
745 }
746 case Intrinsics::Longjmp: {
747 static constexpr SizeT MaxArgs = 2;
748 static constexpr Variable *NoDest = nullptr;
749 Operand *TargetHelper =
750 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
751 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
752 NoTailCall, IsTargetHelperCall);
753 Call->addArg(Intrinsic->getArg(0));
754 Call->addArg(Intrinsic->getArg(1));
755 Instr->setDeleted();
756 return;
757 }
758 case Intrinsics::Memcpy: {
759 // In the future, we could potentially emit an inline memcpy/memset, etc.
760 // for intrinsic calls w/ a known length.
761 static constexpr SizeT MaxArgs = 3;
762 static constexpr Variable *NoDest = nullptr;
763 Operand *TargetHelper =
764 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
765 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
766 NoTailCall, IsTargetHelperCall);
767 Call->addArg(Intrinsic->getArg(0));
768 Call->addArg(Intrinsic->getArg(1));
769 Call->addArg(Intrinsic->getArg(2));
770 Instr->setDeleted();
771 return;
772 }
773 case Intrinsics::Memmove: {
774 static constexpr SizeT MaxArgs = 3;
775 static constexpr Variable *NoDest = nullptr;
776 Operand *TargetHelper =
777 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
778 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
779 NoTailCall, IsTargetHelperCall);
780 Call->addArg(Intrinsic->getArg(0));
781 Call->addArg(Intrinsic->getArg(1));
782 Call->addArg(Intrinsic->getArg(2));
783 Instr->setDeleted();
784 return;
785 }
786 case Intrinsics::Memset: {
787 // The value operand needs to be extended to a stack slot size because the
788 // PNaCl ABI requires arguments to be at least 32 bits wide.
789 Operand *ValOp = Intrinsic->getArg(1);
790 assert(ValOp->getType() == IceType_i8);
791 Variable *ValExt = Func->makeVariable(stackSlotType());
792 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
793
794 // Technically, ARM has its own __aeabi_memset, but we can use plain
795 // memset too. The value and size argument need to be flipped if we ever
796 // decide to use __aeabi_memset.
797 static constexpr SizeT MaxArgs = 3;
798 static constexpr Variable *NoDest = nullptr;
799 Operand *TargetHelper =
800 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
801 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
802 NoTailCall, IsTargetHelperCall);
803 Call->addArg(Intrinsic->getArg(0));
804 Call->addArg(ValExt);
805 Call->addArg(Intrinsic->getArg(2));
806 Instr->setDeleted();
807 return;
808 }
809 case Intrinsics::NaClReadTP: {
810 if (SandboxingType == ST_NaCl) {
811 return;
812 }
813 static constexpr SizeT MaxArgs = 0;
814 Operand *TargetHelper =
815 SandboxingType == ST_Nonsfi
816 ? Ctx->getConstantExternSym(
817 Ctx->getGlobalString("__aeabi_read_tp"))
818 : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
819 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
820 IsTargetHelperCall);
821 Instr->setDeleted();
822 return;
823 }
824 case Intrinsics::Setjmp: {
825 static constexpr SizeT MaxArgs = 1;
826 Operand *TargetHelper =
827 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
828 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
829 NoTailCall, IsTargetHelperCall);
830 Call->addArg(Intrinsic->getArg(0));
831 Instr->setDeleted();
832 return;
833 }
834 }
835 llvm::report_fatal_error("Control flow should never have reached here.");
836 }
837 }
838 }
839
findMaxStackOutArgsSize()840 void TargetARM32::findMaxStackOutArgsSize() {
841 // MinNeededOutArgsBytes should be updated if the Target ever creates a
842 // high-level InstCall that requires more stack bytes.
843 constexpr size_t MinNeededOutArgsBytes = 0;
844 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
845 for (CfgNode *Node : Func->getNodes()) {
846 Context.init(Node);
847 while (!Context.atEnd()) {
848 PostIncrLoweringContext PostIncrement(Context);
849 Inst *CurInstr = iteratorToInst(Context.getCur());
850 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
851 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
852 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
853 }
854 }
855 }
856 }
857
createGotPtr()858 void TargetARM32::createGotPtr() {
859 if (SandboxingType != ST_Nonsfi) {
860 return;
861 }
862 GotPtr = Func->makeVariable(IceType_i32);
863 }
864
insertGotPtrInitPlaceholder()865 void TargetARM32::insertGotPtrInitPlaceholder() {
866 if (SandboxingType != ST_Nonsfi) {
867 return;
868 }
869 assert(GotPtr != nullptr);
870 // We add the two placeholder instructions here. The first fakedefs T, an
871 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
872 // This is needed because the GotPtr initialization, if needed, will require
873 // a register:
874 //
875 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
876 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
877 // add reg, pc, reg
878 // mov GotPtr, reg
879 //
880 // If GotPtr is not used, then both these pseudo-instructions are dce'd.
881 Variable *T = makeReg(IceType_i32);
882 Context.insert<InstFakeDef>(T);
883 Context.insert<InstFakeDef>(GotPtr, T);
884 }
885
886 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)887 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
888 GlobalString CRName = CR->getName();
889 GlobalString CRGotoffName =
890 Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
891 if (KnownGotoffs.count(CRGotoffName) == 0) {
892 constexpr bool SuppressMangling = true;
893 auto *Global =
894 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
895 Global->setIsConstant(true);
896 Global->setName(CRName);
897 Func->getGlobalPool()->willNotBeEmitted(Global);
898
899 auto *Gotoff =
900 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
901 constexpr auto GotFixup = R_ARM_GOTOFF32;
902 Gotoff->setIsConstant(true);
903 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
904 Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
905 GotFixup));
906 Gotoff->setName(CRGotoffName);
907 Func->addGlobal(Gotoff);
908 KnownGotoffs.emplace(CRGotoffName);
909 }
910 return CRGotoffName;
911 }
912
materializeGotAddr(CfgNode * Node)913 void TargetARM32::materializeGotAddr(CfgNode *Node) {
914 if (SandboxingType != ST_Nonsfi) {
915 return;
916 }
917
918 // At first, we try to find the
919 // GotPtr = def T
920 // pseudo-instruction that we placed for defining the got ptr. That
921 // instruction is not just a place-holder for defining the GotPtr (thus
922 // keeping liveness consistent), but it is also located at a point where it is
923 // safe to materialize the got addr -- i.e., before loading parameters to
924 // registers, but after moving register parameters from their home location.
925 InstFakeDef *DefGotPtr = nullptr;
926 for (auto &Inst : Node->getInsts()) {
927 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
928 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
929 DefGotPtr = FakeDef;
930 break;
931 }
932 }
933
934 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
935 return;
936 }
937
938 // The got addr needs to be materialized at the same point where DefGotPtr
939 // lives.
940 Context.setInsertPoint(instToIterator(DefGotPtr));
941 assert(DefGotPtr->getSrcSize() == 1);
942 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
943 loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T,
944 [this, T](Variable *PC) { _add(T, PC, T); });
945 _mov(GotPtr, T);
946 DefGotPtr->setDeleted();
947 }
948
loadNamedConstantRelocatablePIC(GlobalString Name,Variable * Register,std::function<void (Variable * PC)> Finish)949 void TargetARM32::loadNamedConstantRelocatablePIC(
950 GlobalString Name, Variable *Register,
951 std::function<void(Variable *PC)> Finish) {
952 assert(SandboxingType == ST_Nonsfi);
953 // We makeReg() here instead of getPhysicalRegister() because the latter ends
954 // up creating multi-blocks temporaries that liveness fails to validate.
955 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
956
957 auto *AddPcReloc = RelocOffset::create(Ctx);
958 AddPcReloc->setSubtract(true);
959 auto *AddPcLabel = InstARM32Label::create(Func, this);
960 AddPcLabel->setRelocOffset(AddPcReloc);
961
962 auto *MovwReloc = RelocOffset::create(Ctx);
963 auto *MovwLabel = InstARM32Label::create(Func, this);
964 MovwLabel->setRelocOffset(MovwReloc);
965
966 auto *MovtReloc = RelocOffset::create(Ctx);
967 auto *MovtLabel = InstARM32Label::create(Func, this);
968 MovtLabel->setRelocOffset(MovtReloc);
969
970 // The EmitString for these constant relocatables have hardcoded offsets
971 // attached to them. This could be dangerous if, e.g., we ever implemented
972 // instruction scheduling but llvm-mc currently does not support
973 //
974 // movw reg, #:lower16:(Symbol - Label - Number)
975 // movt reg, #:upper16:(Symbol - Label - Number)
976 //
977 // relocations.
978 static constexpr RelocOffsetT PcOffset = -8;
979 auto *CRLower = Ctx->getConstantSymWithEmitString(
980 PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16");
981 auto *CRUpper = Ctx->getConstantSymWithEmitString(
982 PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12");
983
984 Context.insert(MovwLabel);
985 _movw(Register, CRLower);
986 Context.insert(MovtLabel);
987 _movt(Register, CRUpper);
988 // PC = fake-def to keep liveness consistent.
989 Context.insert<InstFakeDef>(PC);
990 Context.insert(AddPcLabel);
991 Finish(PC);
992 }
993
translateO2()994 void TargetARM32::translateO2() {
995 TimerMarker T(TimerStack::TT_O2, Func);
996
997 // TODO(stichnot): share passes with other targets?
998 // https://code.google.com/p/nativeclient/issues/detail?id=4094
999 if (SandboxingType == ST_Nonsfi) {
1000 createGotPtr();
1001 }
1002 genTargetHelperCalls();
1003 findMaxStackOutArgsSize();
1004
1005 // Do not merge Alloca instructions, and lay out the stack.
1006 static constexpr bool SortAndCombineAllocas = true;
1007 Func->processAllocas(SortAndCombineAllocas);
1008 Func->dump("After Alloca processing");
1009
1010 if (!getFlags().getEnablePhiEdgeSplit()) {
1011 // Lower Phi instructions.
1012 Func->placePhiLoads();
1013 if (Func->hasError())
1014 return;
1015 Func->placePhiStores();
1016 if (Func->hasError())
1017 return;
1018 Func->deletePhis();
1019 if (Func->hasError())
1020 return;
1021 Func->dump("After Phi lowering");
1022 }
1023
1024 // Address mode optimization.
1025 Func->getVMetadata()->init(VMK_SingleDefs);
1026 Func->doAddressOpt();
1027 Func->materializeVectorShuffles();
1028
1029 // Argument lowering
1030 Func->doArgLowering();
1031
1032 // Target lowering. This requires liveness analysis for some parts of the
1033 // lowering decisions, such as compare/branch fusing. If non-lightweight
1034 // liveness analysis is used, the instructions need to be renumbered first.
1035 // TODO: This renumbering should only be necessary if we're actually
1036 // calculating live intervals, which we only do for register allocation.
1037 Func->renumberInstructions();
1038 if (Func->hasError())
1039 return;
1040
1041 // TODO: It should be sufficient to use the fastest liveness calculation,
1042 // i.e. livenessLightweight(). However, for some reason that slows down the
1043 // rest of the translation. Investigate.
1044 Func->liveness(Liveness_Basic);
1045 if (Func->hasError())
1046 return;
1047 Func->dump("After ARM32 address mode opt");
1048
1049 if (SandboxingType == ST_Nonsfi) {
1050 insertGotPtrInitPlaceholder();
1051 }
1052 Func->genCode();
1053 if (Func->hasError())
1054 return;
1055 Func->dump("After ARM32 codegen");
1056
1057 // Register allocation. This requires instruction renumbering and full
1058 // liveness analysis.
1059 Func->renumberInstructions();
1060 if (Func->hasError())
1061 return;
1062 Func->liveness(Liveness_Intervals);
1063 if (Func->hasError())
1064 return;
1065 // The post-codegen dump is done here, after liveness analysis and associated
1066 // cleanup, to make the dump cleaner and more useful.
1067 Func->dump("After initial ARM32 codegen");
1068 // Validate the live range computations. The expensive validation call is
1069 // deliberately only made when assertions are enabled.
1070 assert(Func->validateLiveness());
1071 Func->getVMetadata()->init(VMK_All);
1072 regAlloc(RAK_Global);
1073 if (Func->hasError())
1074 return;
1075
1076 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1077 Func->dump("After linear scan regalloc");
1078
1079 if (getFlags().getEnablePhiEdgeSplit()) {
1080 Func->advancedPhiLowering();
1081 Func->dump("After advanced Phi lowering");
1082 }
1083
1084 ForbidTemporaryWithoutReg _(this);
1085
1086 // Stack frame mapping.
1087 Func->genFrame();
1088 if (Func->hasError())
1089 return;
1090 Func->dump("After stack frame mapping");
1091
1092 postLowerLegalization();
1093 if (Func->hasError())
1094 return;
1095 Func->dump("After postLowerLegalization");
1096
1097 Func->contractEmptyNodes();
1098 Func->reorderNodes();
1099
1100 // Branch optimization. This needs to be done just before code emission. In
1101 // particular, no transformations that insert or reorder CfgNodes should be
1102 // done after branch optimization. We go ahead and do it before nop insertion
1103 // to reduce the amount of work needed for searching for opportunities.
1104 Func->doBranchOpt();
1105 Func->dump("After branch optimization");
1106 }
1107
translateOm1()1108 void TargetARM32::translateOm1() {
1109 TimerMarker T(TimerStack::TT_Om1, Func);
1110
1111 // TODO(stichnot): share passes with other targets?
1112 if (SandboxingType == ST_Nonsfi) {
1113 createGotPtr();
1114 }
1115
1116 genTargetHelperCalls();
1117 findMaxStackOutArgsSize();
1118
1119 // Do not merge Alloca instructions, and lay out the stack.
1120 static constexpr bool DontSortAndCombineAllocas = false;
1121 Func->processAllocas(DontSortAndCombineAllocas);
1122 Func->dump("After Alloca processing");
1123
1124 Func->placePhiLoads();
1125 if (Func->hasError())
1126 return;
1127 Func->placePhiStores();
1128 if (Func->hasError())
1129 return;
1130 Func->deletePhis();
1131 if (Func->hasError())
1132 return;
1133 Func->dump("After Phi lowering");
1134
1135 Func->doArgLowering();
1136
1137 if (SandboxingType == ST_Nonsfi) {
1138 insertGotPtrInitPlaceholder();
1139 }
1140 Func->genCode();
1141 if (Func->hasError())
1142 return;
1143 Func->dump("After initial ARM32 codegen");
1144
1145 regAlloc(RAK_InfOnly);
1146 if (Func->hasError())
1147 return;
1148
1149 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1150 Func->dump("After regalloc of infinite-weight variables");
1151
1152 ForbidTemporaryWithoutReg _(this);
1153
1154 Func->genFrame();
1155 if (Func->hasError())
1156 return;
1157 Func->dump("After stack frame mapping");
1158
1159 postLowerLegalization();
1160 if (Func->hasError())
1161 return;
1162 Func->dump("After postLowerLegalization");
1163 }
1164
getStackAlignment() const1165 uint32_t TargetARM32::getStackAlignment() const {
1166 return ARM32_STACK_ALIGNMENT_BYTES;
1167 }
1168
doBranchOpt(Inst * I,const CfgNode * NextNode)1169 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1170 if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1171 return Br->optimizeBranch(NextNode);
1172 }
1173 return false;
1174 }
1175
getRegName(RegNumT RegNum,Type Ty) const1176 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1177 (void)Ty;
1178 return RegARM32::getRegName(RegNum);
1179 }
1180
getPhysicalRegister(RegNumT RegNum,Type Ty)1181 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1182 static const Type DefaultType[] = {
1183 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
1184 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1185 (isFP32) \
1186 ? IceType_f32 \
1187 : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1188 REGARM32_TABLE
1189 #undef X
1190 };
1191
1192 if (Ty == IceType_void) {
1193 assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1194 Ty = DefaultType[RegNum];
1195 }
1196 if (PhysicalRegisters[Ty].empty())
1197 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1198 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1199 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1200 if (Reg == nullptr) {
1201 Reg = Func->makeVariable(Ty);
1202 Reg->setRegNum(RegNum);
1203 PhysicalRegisters[Ty][RegNum] = Reg;
1204 // Specially mark a named physical register as an "argument" so that it is
1205 // considered live upon function entry. Otherwise it's possible to get
1206 // liveness validation errors for saving callee-save registers.
1207 Func->addImplicitArg(Reg);
1208 // Don't bother tracking the live range of a named physical register.
1209 Reg->setIgnoreLiveness();
1210 }
1211 return Reg;
1212 }
1213
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1214 void TargetARM32::emitJumpTable(const Cfg *Func,
1215 const InstJumpTable *JumpTable) const {
1216 (void)Func;
1217 (void)JumpTable;
1218 UnimplementedError(getFlags());
1219 }
1220
emitVariable(const Variable * Var) const1221 void TargetARM32::emitVariable(const Variable *Var) const {
1222 if (!BuildDefs::dump())
1223 return;
1224 Ostream &Str = Ctx->getStrEmit();
1225 if (Var->hasReg()) {
1226 Str << getRegName(Var->getRegNum(), Var->getType());
1227 return;
1228 }
1229 if (Var->mustHaveReg()) {
1230 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1231 ") has no register assigned - function " +
1232 Func->getFunctionName());
1233 }
1234 assert(!Var->isRematerializable());
1235 int32_t Offset = Var->getStackOffset();
1236 auto BaseRegNum = Var->getBaseRegNum();
1237 if (BaseRegNum.hasNoValue()) {
1238 BaseRegNum = getFrameOrStackReg();
1239 }
1240 const Type VarTy = Var->getType();
1241 Str << "[" << getRegName(BaseRegNum, VarTy);
1242 if (Offset != 0) {
1243 Str << ", #" << Offset;
1244 }
1245 Str << "]";
1246 }
1247
CallingConv()1248 TargetARM32::CallingConv::CallingConv()
1249 : GPRegsUsed(RegARM32::Reg_NUM),
1250 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1251 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1252 VFPRegsUsed(RegARM32::Reg_NUM),
1253 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1254 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1255 Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1256
argInGPR(Type Ty,RegNumT * Reg)1257 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1258 CfgVector<RegNumT> *Source;
1259
1260 switch (Ty) {
1261 default: {
1262 assert(isScalarIntegerType(Ty));
1263 Source = &GPRArgs;
1264 } break;
1265 case IceType_i64: {
1266 Source = &I64Args;
1267 } break;
1268 }
1269
1270 discardUnavailableGPRsAndTheirAliases(Source);
1271
1272 if (Source->empty()) {
1273 GPRegsUsed.set();
1274 return false;
1275 }
1276
1277 *Reg = Source->back();
1278 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1279 // we mark all of Reg's aliases as Used. So, for the next argument,
1280 // Source->back() is marked as unavailable, and it is thus implicitly popped
1281 // from the stack.
1282 GPRegsUsed |= RegisterAliases[*Reg];
1283 return true;
1284 }
1285
1286 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1287 // i32) will have the first argument in r0, the second in r1-r2, and the third
1288 // on the stack. To model this behavior, whenever we pop a register from Regs,
1289 // we remove all of its aliases from the pool of available GPRs. This has the
1290 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1291 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1292 CfgVector<RegNumT> *Regs) {
1293 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1294 GPRegsUsed |= RegisterAliases[Regs->back()];
1295 Regs->pop_back();
1296 }
1297 }
1298
argInVFP(Type Ty,RegNumT * Reg)1299 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1300 CfgVector<RegNumT> *Source;
1301
1302 switch (Ty) {
1303 default: {
1304 assert(isVectorType(Ty));
1305 Source = &Vec128Args;
1306 } break;
1307 case IceType_f32: {
1308 Source = &FP32Args;
1309 } break;
1310 case IceType_f64: {
1311 Source = &FP64Args;
1312 } break;
1313 }
1314
1315 discardUnavailableVFPRegs(Source);
1316
1317 if (Source->empty()) {
1318 VFPRegsUsed.set();
1319 return false;
1320 }
1321
1322 *Reg = Source->back();
1323 VFPRegsUsed |= RegisterAliases[*Reg];
1324 return true;
1325 }
1326
1327 // Arguments in VFP registers are not packed, so we don't mark the popped
1328 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1329 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1330 CfgVector<RegNumT> *Regs) {
1331 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1332 Regs->pop_back();
1333 }
1334 }
1335
lowerArguments()1336 void TargetARM32::lowerArguments() {
1337 VarList &Args = Func->getArgs();
1338 TargetARM32::CallingConv CC;
1339
1340 // For each register argument, replace Arg in the argument list with the home
1341 // register. Then generate an instruction in the prolog to copy the home
1342 // register to the assigned location of Arg.
1343 Context.init(Func->getEntryNode());
1344 Context.setInsertPoint(Context.getCur());
1345
1346 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1347 Variable *Arg = Args[I];
1348 Type Ty = Arg->getType();
1349 RegNumT RegNum;
1350 if (isScalarIntegerType(Ty)) {
1351 if (!CC.argInGPR(Ty, &RegNum)) {
1352 continue;
1353 }
1354 } else {
1355 if (!CC.argInVFP(Ty, &RegNum)) {
1356 continue;
1357 }
1358 }
1359
1360 Variable *RegisterArg = Func->makeVariable(Ty);
1361 if (BuildDefs::dump()) {
1362 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1363 }
1364 RegisterArg->setIsArg();
1365 Arg->setIsArg(false);
1366 Args[I] = RegisterArg;
1367 switch (Ty) {
1368 default: {
1369 RegisterArg->setRegNum(RegNum);
1370 } break;
1371 case IceType_i64: {
1372 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1373 RegisterArg64->initHiLo(Func);
1374 RegisterArg64->getLo()->setRegNum(
1375 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1376 RegisterArg64->getHi()->setRegNum(
1377 RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1378 } break;
1379 }
1380 Context.insert<InstAssign>(Arg, RegisterArg);
1381 }
1382 }
1383
1384 // Helper function for addProlog().
1385 //
1386 // This assumes Arg is an argument passed on the stack. This sets the frame
1387 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1388 // I64 arg that has been split into Lo and Hi components, it calls itself
1389 // recursively on the components, taking care to handle Lo first because of the
1390 // little-endian architecture. Lastly, this function generates an instruction
1391 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1392 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1393 size_t BasicFrameOffset,
1394 size_t *InArgsSizeBytes) {
1395 const Type Ty = Arg->getType();
1396 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1397
1398 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1399 Variable *const Lo = Arg64On32->getLo();
1400 Variable *const Hi = Arg64On32->getHi();
1401 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1402 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1403 return;
1404 }
1405 assert(Ty != IceType_i64);
1406
1407 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1408 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1409
1410 if (!Arg->hasReg()) {
1411 Arg->setStackOffset(ArgStackOffset);
1412 return;
1413 }
1414
1415 // If the argument variable has been assigned a register, we need to copy the
1416 // value from the stack slot.
1417 Variable *Parameter = Func->makeVariable(Ty);
1418 Parameter->setMustNotHaveReg();
1419 Parameter->setStackOffset(ArgStackOffset);
1420 _mov(Arg, Parameter);
1421 }
1422
stackSlotType()1423 Type TargetARM32::stackSlotType() { return IceType_i32; }
1424
addProlog(CfgNode * Node)1425 void TargetARM32::addProlog(CfgNode *Node) {
1426 // Stack frame layout:
1427 //
1428 // +------------------------+
1429 // | 1. preserved registers |
1430 // +------------------------+
1431 // | 2. padding |
1432 // +------------------------+ <--- FramePointer (if used)
1433 // | 3. global spill area |
1434 // +------------------------+
1435 // | 4. padding |
1436 // +------------------------+
1437 // | 5. local spill area |
1438 // +------------------------+
1439 // | 6. padding |
1440 // +------------------------+
1441 // | 7. allocas (variable) |
1442 // +------------------------+
1443 // | 8. padding |
1444 // +------------------------+
1445 // | 9. out args |
1446 // +------------------------+ <--- StackPointer
1447 //
1448 // The following variables record the size in bytes of the given areas:
1449 // * PreservedRegsSizeBytes: area 1
1450 // * SpillAreaPaddingBytes: area 2
1451 // * GlobalsSize: area 3
1452 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1453 // * LocalsSpillAreaSize: area 5
1454 // * SpillAreaSizeBytes: areas 2 - 6, and 9
1455 // * MaxOutArgsSizeBytes: area 9
1456 //
1457 // Determine stack frame offsets for each Variable without a register
1458 // assignment. This can be done as one variable per stack slot. Or, do
1459 // coalescing by running the register allocator again with an infinite set of
1460 // registers (as a side effect, this gives variables a second chance at
1461 // physical register assignment).
1462 //
1463 // A middle ground approach is to leverage sparsity and allocate one block of
1464 // space on the frame for globals (variables with multi-block lifetime), and
1465 // one block to share for locals (single-block lifetime).
1466
1467 Context.init(Node);
1468 Context.setInsertPoint(Context.getCur());
1469
1470 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1471 RegsUsed = SmallBitVector(CalleeSaves.size());
1472 VarList SortedSpilledVariables;
1473 size_t GlobalsSize = 0;
1474 // If there is a separate locals area, this represents that area. Otherwise
1475 // it counts any variable not counted by GlobalsSize.
1476 SpillAreaSizeBytes = 0;
1477 // If there is a separate locals area, this specifies the alignment for it.
1478 uint32_t LocalsSlotsAlignmentBytes = 0;
1479 // The entire spill locations area gets aligned to largest natural alignment
1480 // of the variables that have a spill slot.
1481 uint32_t SpillAreaAlignmentBytes = 0;
1482 // For now, we don't have target-specific variables that need special
1483 // treatment (no stack-slot-linked SpillVariable type).
1484 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1485 static constexpr bool AssignStackSlot = false;
1486 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1487 if (llvm::isa<Variable64On32>(Var)) {
1488 return DontAssignStackSlot;
1489 }
1490 return AssignStackSlot;
1491 };
1492
1493 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1494 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1495 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1496 &LocalsSlotsAlignmentBytes, TargetVarHook);
1497 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1498 SpillAreaSizeBytes += GlobalsSize;
1499
1500 // Add push instructions for preserved registers. On ARM, "push" can push a
1501 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1502 // callee-saved float/vector registers.
1503 //
1504 // The "vpush" instruction can handle a whole list of float/vector registers,
1505 // but it only handles contiguous sequences of registers by specifying the
1506 // start and the length.
1507 PreservedGPRs.reserve(CalleeSaves.size());
1508 PreservedSRegs.reserve(CalleeSaves.size());
1509
1510 // Consider FP and LR as callee-save / used as needed.
1511 if (UsesFramePointer) {
1512 if (RegsUsed[RegARM32::Reg_fp]) {
1513 llvm::report_fatal_error("Frame pointer has been used.");
1514 }
1515 CalleeSaves[RegARM32::Reg_fp] = true;
1516 RegsUsed[RegARM32::Reg_fp] = true;
1517 }
1518 if (!MaybeLeafFunc) {
1519 CalleeSaves[RegARM32::Reg_lr] = true;
1520 RegsUsed[RegARM32::Reg_lr] = true;
1521 }
1522
1523 // Make two passes over the used registers. The first pass records all the
1524 // used registers -- and their aliases. Then, we figure out which GPRs and
1525 // VFP S registers should be saved. We don't bother saving D/Q registers
1526 // because their uses are recorded as S regs uses.
1527 SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1528 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1529 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1530 // r9 is never updated in sandboxed code.
1531 continue;
1532 }
1533 if (CalleeSaves[i] && RegsUsed[i]) {
1534 ToPreserve |= RegisterAliases[i];
1535 }
1536 }
1537
1538 uint32_t NumCallee = 0;
1539 size_t PreservedRegsSizeBytes = 0;
1540
1541 // RegClasses is a tuple of
1542 //
1543 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1544 //
1545 // We use this tuple to figure out which register we should push/pop during
1546 // prolog/epilog.
1547 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1548 const RegClassType RegClasses[] = {
1549 RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1550 &PreservedGPRs),
1551 RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1552 &PreservedSRegs)};
1553 for (const auto &RegClass : RegClasses) {
1554 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1555 const uint32_t LastRegInClass = std::get<1>(RegClass);
1556 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1557 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1558 if (!ToPreserve[Reg]) {
1559 continue;
1560 }
1561 ++NumCallee;
1562 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1563 PreservedRegsSizeBytes +=
1564 typeWidthInBytesOnStack(PhysicalRegister->getType());
1565 PreservedRegsInClass->push_back(PhysicalRegister);
1566 }
1567 }
1568
1569 Ctx->statsUpdateRegistersSaved(NumCallee);
1570 if (!PreservedSRegs.empty())
1571 _push(PreservedSRegs);
1572 if (!PreservedGPRs.empty())
1573 _push(PreservedGPRs);
1574
1575 // Generate "mov FP, SP" if needed.
1576 if (UsesFramePointer) {
1577 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1578 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1579 _mov(FP, SP);
1580 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1581 Context.insert<InstFakeUse>(FP);
1582 }
1583
1584 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1585 // after the preserved registers and before the spill areas.
1586 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1587 // locals area if they are separate.
1588 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1589 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1590 uint32_t SpillAreaPaddingBytes = 0;
1591 uint32_t LocalsSlotsPaddingBytes = 0;
1592 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1593 GlobalsSize, LocalsSlotsAlignmentBytes,
1594 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1595 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1596 uint32_t GlobalsAndSubsequentPaddingSize =
1597 GlobalsSize + LocalsSlotsPaddingBytes;
1598
1599 // Adds the out args space to the stack, and align SP if necessary.
1600 if (!NeedsStackAlignment) {
1601 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1602 } else {
1603 uint32_t StackOffset = PreservedRegsSizeBytes;
1604 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1605 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1606 SpillAreaSizeBytes = StackSize - StackOffset;
1607 }
1608
1609 // Combine fixed alloca with SpillAreaSize.
1610 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1611
1612 // Generate "sub sp, SpillAreaSizeBytes"
1613 if (SpillAreaSizeBytes) {
1614 // Use the scratch register if needed to legalize the immediate.
1615 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1616 Legal_Reg | Legal_Flex, getReservedTmpReg());
1617 Sandboxer(this).sub_sp(SubAmount);
1618 if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1619 Sandboxer(this).align_sp(FixedAllocaAlignBytes);
1620 }
1621 }
1622
1623 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1624
1625 // Fill in stack offsets for stack args, and copy args into registers for
1626 // those that were register-allocated. Args are pushed right to left, so
1627 // Arg[0] is closest to the stack/frame pointer.
1628 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1629 size_t BasicFrameOffset = PreservedRegsSizeBytes;
1630 if (!UsesFramePointer)
1631 BasicFrameOffset += SpillAreaSizeBytes;
1632
1633 materializeGotAddr(Node);
1634
1635 const VarList &Args = Func->getArgs();
1636 size_t InArgsSizeBytes = 0;
1637 TargetARM32::CallingConv CC;
1638 for (Variable *Arg : Args) {
1639 RegNumT DummyReg;
1640 const Type Ty = Arg->getType();
1641
1642 // Skip arguments passed in registers.
1643 if (isScalarIntegerType(Ty)) {
1644 if (CC.argInGPR(Ty, &DummyReg)) {
1645 continue;
1646 }
1647 } else {
1648 if (CC.argInVFP(Ty, &DummyReg)) {
1649 continue;
1650 }
1651 }
1652 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1653 }
1654
1655 // Fill in stack offsets for locals.
1656 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1657 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1658 UsesFramePointer);
1659 this->HasComputedFrame = true;
1660
1661 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1662 OstreamLocker _(Func->getContext());
1663 Ostream &Str = Func->getContext()->getStrDump();
1664
1665 Str << "Stack layout:\n";
1666 uint32_t SPAdjustmentPaddingSize =
1667 SpillAreaSizeBytes - LocalsSpillAreaSize -
1668 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1669 MaxOutArgsSizeBytes;
1670 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1671 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1672 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1673 << " globals spill area = " << GlobalsSize << " bytes\n"
1674 << " globals-locals spill areas intermediate padding = "
1675 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1676 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1677 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1678
1679 Str << "Stack details:\n"
1680 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1681 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1682 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1683 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1684 << " bytes\n"
1685 << " is FP based = " << UsesFramePointer << "\n";
1686 }
1687 }
1688
addEpilog(CfgNode * Node)1689 void TargetARM32::addEpilog(CfgNode *Node) {
1690 InstList &Insts = Node->getInsts();
1691 InstList::reverse_iterator RI, E;
1692 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1693 if (llvm::isa<InstARM32Ret>(*RI))
1694 break;
1695 }
1696 if (RI == E)
1697 return;
1698
1699 // Convert the reverse_iterator position into its corresponding (forward)
1700 // iterator position.
1701 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1702 --InsertPoint;
1703 Context.init(Node);
1704 Context.setInsertPoint(InsertPoint);
1705
1706 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1707 if (UsesFramePointer) {
1708 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1709 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1710 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1711 // from being dead-code eliminated.
1712 Context.insert<InstFakeUse>(SP);
1713 Sandboxer(this).reset_sp(FP);
1714 } else {
1715 // add SP, SpillAreaSizeBytes
1716 if (SpillAreaSizeBytes) {
1717 // Use the scratch register if needed to legalize the immediate.
1718 Operand *AddAmount =
1719 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1720 Legal_Reg | Legal_Flex, getReservedTmpReg());
1721 Sandboxer(this).add_sp(AddAmount);
1722 }
1723 }
1724
1725 if (!PreservedGPRs.empty())
1726 _pop(PreservedGPRs);
1727 if (!PreservedSRegs.empty())
1728 _pop(PreservedSRegs);
1729
1730 if (!getFlags().getUseSandboxing())
1731 return;
1732
1733 // Change the original ret instruction into a sandboxed return sequence.
1734 //
1735 // bundle_lock
1736 // bic lr, #0xc000000f
1737 // bx lr
1738 // bundle_unlock
1739 //
1740 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1741 // restrict to the lower 1GB as well.
1742 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1743 Variable *RetValue = nullptr;
1744 if (RI->getSrcSize())
1745 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1746
1747 Sandboxer(this).ret(LR, RetValue);
1748
1749 RI->setDeleted();
1750 }
1751
isLegalMemOffset(Type Ty,int32_t Offset) const1752 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1753 constexpr bool ZeroExt = false;
1754 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1755 }
1756
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1757 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1758 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1759 // Legalize will likely need a movw/movt combination, but if the top bits are
1760 // all 0 from negating the offset and subtracting, we could use that instead.
1761 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1762 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1763 if (ShouldSub) {
1764 Operand *OffsetVal =
1765 Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1766 Legal_Reg | Legal_Flex, ScratchRegNum);
1767 Target->_sub(ScratchReg, Base, OffsetVal);
1768 } else {
1769 Operand *OffsetVal =
1770 Target->legalize(Target->Ctx->getConstantInt32(Offset),
1771 Legal_Reg | Legal_Flex, ScratchRegNum);
1772 Target->_add(ScratchReg, Base, OffsetVal);
1773 }
1774
1775 if (ScratchRegNum == Target->getReservedTmpReg()) {
1776 const bool BaseIsStackOrFramePtr =
1777 Base->getRegNum() == Target->getFrameOrStackReg();
1778 // There is currently no code path that would trigger this assertion, so we
1779 // leave this assertion here in case it is ever violated. This is not a
1780 // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1781 // as the program compiled by subzero will still work correctly.
1782 assert(BaseIsStackOrFramePtr);
1783 // Side-effect: updates TempBase to reflect the new Temporary.
1784 if (BaseIsStackOrFramePtr) {
1785 TempBaseReg = ScratchReg;
1786 TempBaseOffset = Offset;
1787 } else {
1788 TempBaseReg = nullptr;
1789 TempBaseOffset = 0;
1790 }
1791 }
1792
1793 return ScratchReg;
1794 }
1795
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1796 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1797 Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1798 assert(!Base->isRematerializable());
1799 if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1800 return OperandARM32Mem::create(
1801 Target->Func, Ty, Base,
1802 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1803 OperandARM32Mem::Offset);
1804 }
1805
1806 if (!AllowOffsets || TempBaseReg == nullptr) {
1807 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1808 }
1809
1810 int32_t OffsetDiff = Offset - TempBaseOffset;
1811 assert(AllowOffsets || OffsetDiff == 0);
1812
1813 if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1814 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1815 OffsetDiff = 0;
1816 }
1817
1818 assert(!TempBaseReg->isRematerializable());
1819 return OperandARM32Mem::create(
1820 Target->Func, Ty, TempBaseReg,
1821 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1822 OperandARM32Mem::Offset);
1823 }
1824
resetTempBaseIfClobberedBy(const Inst * Instr)1825 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1826 const Inst *Instr) {
1827 bool ClobbersTempBase = false;
1828 if (TempBaseReg != nullptr) {
1829 Variable *Dest = Instr->getDest();
1830 if (llvm::isa<InstARM32Call>(Instr)) {
1831 // The following assertion is an invariant, so we remove it from the if
1832 // test. If the invariant is ever broken/invalidated/changed, remember
1833 // to add it back to the if condition.
1834 assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1835 // The linker may need to clobber IP if the call is too far from PC. Thus,
1836 // we assume IP will be overwritten.
1837 ClobbersTempBase = true;
1838 } else if (Dest != nullptr &&
1839 Dest->getRegNum() == TempBaseReg->getRegNum()) {
1840 // Register redefinition.
1841 ClobbersTempBase = true;
1842 }
1843 }
1844
1845 if (ClobbersTempBase) {
1846 TempBaseReg = nullptr;
1847 TempBaseOffset = 0;
1848 }
1849 }
1850
legalizeMov(InstARM32Mov * MovInstr)1851 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1852 Variable *Dest = MovInstr->getDest();
1853 assert(Dest != nullptr);
1854 Type DestTy = Dest->getType();
1855 assert(DestTy != IceType_i64);
1856
1857 Operand *Src = MovInstr->getSrc(0);
1858 Type SrcTy = Src->getType();
1859 (void)SrcTy;
1860 assert(SrcTy != IceType_i64);
1861
1862 if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1863 return;
1864
1865 bool Legalized = false;
1866 if (!Dest->hasReg()) {
1867 auto *SrcR = llvm::cast<Variable>(Src);
1868 assert(SrcR->hasReg());
1869 assert(!SrcR->isRematerializable());
1870 const int32_t Offset = Dest->getStackOffset();
1871 // This is a _mov(Mem(), Variable), i.e., a store.
1872 TargetARM32::Sandboxer(Target).str(
1873 SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1874 MovInstr->getPredicate());
1875 // _str() does not have a Dest, so we add a fake-def(Dest).
1876 Target->Context.insert<InstFakeDef>(Dest);
1877 Legalized = true;
1878 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1879 if (Var->isRematerializable()) {
1880 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1881
1882 // ExtraOffset is only needed for frame-pointer based frames as we have
1883 // to account for spill storage.
1884 const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1885 ? Target->getFrameFixedAllocaOffset()
1886 : 0;
1887
1888 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1889 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1890 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1891 Target->_mov(Dest, T);
1892 Legalized = true;
1893 } else {
1894 if (!Var->hasReg()) {
1895 // This is a _mov(Variable, Mem()), i.e., a load.
1896 const int32_t Offset = Var->getStackOffset();
1897 TargetARM32::Sandboxer(Target).ldr(
1898 Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1899 MovInstr->getPredicate());
1900 Legalized = true;
1901 }
1902 }
1903 }
1904
1905 if (Legalized) {
1906 if (MovInstr->isDestRedefined()) {
1907 Target->_set_dest_redefined();
1908 }
1909 MovInstr->setDeleted();
1910 }
1911 }
1912
1913 // ARM32 address modes:
1914 // ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1915 // [reg +/- reg << shamt5]
1916 // ld/st f[32|64] : [reg], [reg +/- imm8] , [pc +/- imm8]
1917 // ld/st vectors : [reg]
1918 //
1919 // For now, we don't handle address modes with Relocatables.
1920 namespace {
1921 // MemTraits contains per-type valid address mode information.
1922 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1923 ubits, rraddr, shaddr) \
1924 static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1925 ICETYPEARM32_TABLE
1926 #undef X
1927
1928 static const struct {
1929 int32_t ValidImmMask;
1930 bool CanHaveImm;
1931 bool CanHaveIndex;
1932 bool CanHaveShiftedIndex;
1933 } MemTraits[] = {
1934 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1935 ubits, rraddr, shaddr) \
1936 { \
1937 (1 << ubits) - 1, \
1938 (ubits) > 0, \
1939 rraddr, \
1940 shaddr, \
1941 },
1942 ICETYPEARM32_TABLE
1943 #undef X
1944 };
1945 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1946 } // end of anonymous namespace
1947
1948 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1949 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1950 bool AllowOffsets) {
1951 assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1952 assert(Mem->isRegReg() || Target->isLegalMemOffset(
1953 Mem->getType(), Mem->getOffset()->getValue()));
1954
1955 bool Legalized = false;
1956 Variable *Base = Mem->getBase();
1957 int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1958 if (Base->isRematerializable()) {
1959 const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1960 ? Target->getFrameFixedAllocaOffset()
1961 : 0;
1962 Offset += Base->getStackOffset() + ExtraOffset;
1963 Base = Target->getPhysicalRegister(Base->getRegNum());
1964 assert(!Base->isRematerializable());
1965 Legalized = true;
1966 }
1967
1968 if (!Legalized && !Target->NeedSandboxing) {
1969 return nullptr;
1970 }
1971
1972 if (!Mem->isRegReg()) {
1973 return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1974 }
1975
1976 if (Target->NeedSandboxing) {
1977 llvm::report_fatal_error("Reg-Reg address mode is not allowed.");
1978 }
1979
1980 assert(MemTraits[Mem->getType()].CanHaveIndex);
1981
1982 if (Offset != 0) {
1983 if (TempBaseReg == nullptr) {
1984 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1985 } else {
1986 uint32_t Imm8, Rotate;
1987 const int32_t OffsetDiff = Offset - TempBaseOffset;
1988 if (OffsetDiff == 0) {
1989 Base = TempBaseReg;
1990 } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
1991 auto *OffsetDiffF = OperandARM32FlexImm::create(
1992 Target->Func, IceType_i32, Imm8, Rotate);
1993 Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
1994 TempBaseOffset += OffsetDiff;
1995 Base = TempBaseReg;
1996 } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
1997 auto *OffsetDiffF = OperandARM32FlexImm::create(
1998 Target->Func, IceType_i32, Imm8, Rotate);
1999 Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
2000 TempBaseOffset += OffsetDiff;
2001 Base = TempBaseReg;
2002 } else {
2003 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2004 }
2005 }
2006 }
2007
2008 return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
2009 Mem->getIndex(), Mem->getShiftOp(),
2010 Mem->getShiftAmt(), Mem->getAddrMode());
2011 }
2012
postLowerLegalization()2013 void TargetARM32::postLowerLegalization() {
2014 // If a stack variable's frame offset doesn't fit, convert from:
2015 // ldr X, OFF[SP]
2016 // to:
2017 // movw/movt TMP, OFF_PART
2018 // add TMP, TMP, SP
2019 // ldr X, OFF_MORE[TMP]
2020 //
2021 // This is safe because we have reserved TMP, and add for ARM does not
2022 // clobber the flags register.
2023 Func->dump("Before postLowerLegalization");
2024 assert(hasComputedFrame());
2025 // Do a fairly naive greedy clustering for now. Pick the first stack slot
2026 // that's out of bounds and make a new base reg using the architecture's temp
2027 // register. If that works for the next slot, then great. Otherwise, create a
2028 // new base register, clobbering the previous base register. Never share a
2029 // base reg across different basic blocks. This isn't ideal if local and
2030 // multi-block variables are far apart and their references are interspersed.
2031 // It may help to be more coordinated about assign stack slot numbers and may
2032 // help to assign smaller offsets to higher-weight variables so that they
2033 // don't depend on this legalization.
2034 for (CfgNode *Node : Func->getNodes()) {
2035 Context.init(Node);
2036 // One legalizer per basic block, otherwise we would share the Temporary
2037 // Base Register between basic blocks.
2038 PostLoweringLegalizer Legalizer(this);
2039 while (!Context.atEnd()) {
2040 PostIncrLoweringContext PostIncrement(Context);
2041 Inst *CurInstr = iteratorToInst(Context.getCur());
2042
2043 // Check if the previous TempBaseReg is clobbered, and reset if needed.
2044 Legalizer.resetTempBaseIfClobberedBy(CurInstr);
2045
2046 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
2047 Legalizer.legalizeMov(MovInstr);
2048 } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
2049 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2050 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
2051 Sandboxer(this).ldr(CurInstr->getDest(), LegalMem,
2052 LdrInstr->getPredicate());
2053 CurInstr->setDeleted();
2054 }
2055 } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
2056 constexpr bool DisallowOffsetsBecauseLdrex = false;
2057 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2058 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
2059 DisallowOffsetsBecauseLdrex)) {
2060 Sandboxer(this).ldrex(CurInstr->getDest(), LegalMem,
2061 LdrexInstr->getPredicate());
2062 CurInstr->setDeleted();
2063 }
2064 } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
2065 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2066 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
2067 Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)),
2068 LegalMem, StrInstr->getPredicate());
2069 CurInstr->setDeleted();
2070 }
2071 } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
2072 constexpr bool DisallowOffsetsBecauseStrex = false;
2073 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2074 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
2075 DisallowOffsetsBecauseStrex)) {
2076 Sandboxer(this).strex(CurInstr->getDest(),
2077 llvm::cast<Variable>(CurInstr->getSrc(0)),
2078 LegalMem, StrexInstr->getPredicate());
2079 CurInstr->setDeleted();
2080 }
2081 }
2082
2083 // Sanity-check: the Legalizer will either have no Temp, or it will be
2084 // bound to IP.
2085 Legalizer.assertNoTempOrAssignedToIP();
2086 }
2087 }
2088 }
2089
loOperand(Operand * Operand)2090 Operand *TargetARM32::loOperand(Operand *Operand) {
2091 assert(Operand->getType() == IceType_i64);
2092 if (Operand->getType() != IceType_i64)
2093 return Operand;
2094 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2095 return Var64On32->getLo();
2096 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
2097 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2098 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2099 // Conservatively disallow memory operands with side-effects (pre/post
2100 // increment) in case of duplication.
2101 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2102 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2103 if (Mem->isRegReg()) {
2104 Variable *IndexR = legalizeToReg(Mem->getIndex());
2105 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
2106 Mem->getShiftOp(), Mem->getShiftAmt(),
2107 Mem->getAddrMode());
2108 } else {
2109 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
2110 Mem->getOffset(), Mem->getAddrMode());
2111 }
2112 }
2113 llvm::report_fatal_error("Unsupported operand type");
2114 return nullptr;
2115 }
2116
hiOperand(Operand * Operand)2117 Operand *TargetARM32::hiOperand(Operand *Operand) {
2118 assert(Operand->getType() == IceType_i64);
2119 if (Operand->getType() != IceType_i64)
2120 return Operand;
2121 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2122 return Var64On32->getHi();
2123 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2124 return Ctx->getConstantInt32(
2125 static_cast<uint32_t>(Const->getValue() >> 32));
2126 }
2127 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2128 // Conservatively disallow memory operands with side-effects in case of
2129 // duplication.
2130 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2131 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2132 const Type SplitType = IceType_i32;
2133 if (Mem->isRegReg()) {
2134 // We have to make a temp variable T, and add 4 to either Base or Index.
2135 // The Index may be shifted, so adding 4 can mean something else. Thus,
2136 // prefer T := Base + 4, and use T as the new Base.
2137 Variable *Base = Mem->getBase();
2138 Constant *Four = Ctx->getConstantInt32(4);
2139 Variable *NewBase = Func->makeVariable(Base->getType());
2140 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2141 Base, Four));
2142 Variable *BaseR = legalizeToReg(NewBase);
2143 Variable *IndexR = legalizeToReg(Mem->getIndex());
2144 return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
2145 Mem->getShiftOp(), Mem->getShiftAmt(),
2146 Mem->getAddrMode());
2147 } else {
2148 Variable *Base = Mem->getBase();
2149 ConstantInteger32 *Offset = Mem->getOffset();
2150 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2151 int32_t NextOffsetVal = Offset->getValue() + 4;
2152 constexpr bool ZeroExt = false;
2153 if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
2154 // We have to make a temp variable and add 4 to either Base or Offset.
2155 // If we add 4 to Offset, this will convert a non-RegReg addressing
2156 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2157 // RegReg addressing modes, prefer adding to base and replacing
2158 // instead. Thus we leave the old offset alone.
2159 Constant *_4 = Ctx->getConstantInt32(4);
2160 Variable *NewBase = Func->makeVariable(Base->getType());
2161 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
2162 NewBase, Base, _4));
2163 Base = NewBase;
2164 } else {
2165 Offset =
2166 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2167 }
2168 Variable *BaseR = legalizeToReg(Base);
2169 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
2170 Mem->getAddrMode());
2171 }
2172 }
2173 llvm::report_fatal_error("Unsupported operand type");
2174 return nullptr;
2175 }
2176
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2177 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
2178 RegSetMask Exclude) const {
2179 SmallBitVector Registers(RegARM32::Reg_NUM);
2180
2181 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
2182 const auto &Entry = RegARM32::RegTable[i];
2183 if (Entry.Scratch && (Include & RegSet_CallerSave))
2184 Registers[i] = true;
2185 if (Entry.Preserved && (Include & RegSet_CalleeSave))
2186 Registers[i] = true;
2187 if (Entry.StackPtr && (Include & RegSet_StackPointer))
2188 Registers[i] = true;
2189 if (Entry.FramePtr && (Include & RegSet_FramePointer))
2190 Registers[i] = true;
2191 if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2192 Registers[i] = false;
2193 if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2194 Registers[i] = false;
2195 if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2196 Registers[i] = false;
2197 if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2198 Registers[i] = false;
2199 }
2200
2201 return Registers;
2202 }
2203
lowerAlloca(const InstAlloca * Instr)2204 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2205 // Conservatively require the stack to be aligned. Some stack adjustment
2206 // operations implemented below assume that the stack is aligned before the
2207 // alloca. All the alloca code ensures that the stack alignment is preserved
2208 // after the alloca. The stack alignment restriction can be relaxed in some
2209 // cases.
2210 NeedsStackAlignment = true;
2211
2212 // For default align=0, set it to the real value 1, to avoid any
2213 // bit-manipulation problems below.
2214 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2215
2216 // LLVM enforces power of 2 alignment.
2217 assert(llvm::isPowerOf2_32(AlignmentParam));
2218 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2219
2220 const uint32_t Alignment =
2221 std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2222 const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2223 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2224 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2225 const bool UseFramePointer =
2226 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2227
2228 if (UseFramePointer)
2229 setHasFramePointer();
2230
2231 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2232 if (OverAligned) {
2233 Sandboxer(this).align_sp(Alignment);
2234 }
2235
2236 Variable *Dest = Instr->getDest();
2237 Operand *TotalSize = Instr->getSizeInBytes();
2238
2239 if (const auto *ConstantTotalSize =
2240 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2241 const uint32_t Value =
2242 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2243 // Constant size alloca.
2244 if (!UseFramePointer) {
2245 // If we don't need a Frame Pointer, this alloca has a known offset to the
2246 // stack pointer. We don't need adjust the stack pointer, nor assign any
2247 // value to Dest, as Dest is rematerializable.
2248 assert(Dest->isRematerializable());
2249 FixedAllocaSizeBytes += Value;
2250 Context.insert<InstFakeDef>(Dest);
2251 return;
2252 }
2253
2254 // If a frame pointer is required, then we need to store the alloca'd result
2255 // in Dest.
2256 Operand *SubAmountRF =
2257 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2258 Sandboxer(this).sub_sp(SubAmountRF);
2259 } else {
2260 // Non-constant sizes need to be adjusted to the next highest multiple of
2261 // the required alignment at runtime.
2262 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2263 Variable *T = makeReg(IceType_i32);
2264 _mov(T, TotalSize);
2265 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2266 _add(T, T, AddAmount);
2267 alignRegisterPow2(T, Alignment);
2268 Sandboxer(this).sub_sp(T);
2269 }
2270
2271 // Adds back a few bytes to SP to account for the out args area.
2272 Variable *T = SP;
2273 if (MaxOutArgsSizeBytes != 0) {
2274 T = makeReg(getPointerType());
2275 Operand *OutArgsSizeRF = legalize(
2276 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2277 _add(T, SP, OutArgsSizeRF);
2278 }
2279
2280 _mov(Dest, T);
2281 }
2282
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2283 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2284 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2285 return;
2286 Variable *SrcLoReg = legalizeToReg(SrcLo);
2287 switch (Ty) {
2288 default:
2289 llvm_unreachable(
2290 ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2291 case IceType_i8:
2292 case IceType_i16: {
2293 Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2294 Variable *T = makeReg(IceType_i32);
2295 _lsls(T, SrcLoReg, ShAmtImm);
2296 Context.insert<InstFakeUse>(T);
2297 } break;
2298 case IceType_i32: {
2299 _tst(SrcLoReg, SrcLoReg);
2300 break;
2301 }
2302 case IceType_i64: {
2303 Variable *T = makeReg(IceType_i32);
2304 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2305 // T isn't going to be used, but we need the side-effect of setting flags
2306 // from this operation.
2307 Context.insert<InstFakeUse>(T);
2308 }
2309 }
2310 auto *Label = InstARM32Label::create(Func, this);
2311 _br(Label, CondARM32::NE);
2312 _trap();
2313 Context.insert(Label);
2314 }
2315
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2316 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2317 Operand *Src1, ExtInstr ExtFunc,
2318 DivInstr DivFunc, bool IsRemainder) {
2319 div0Check(Dest->getType(), Src1, nullptr);
2320 Variable *Src1R = legalizeToReg(Src1);
2321 Variable *T0R = Src0R;
2322 Variable *T1R = Src1R;
2323 if (Dest->getType() != IceType_i32) {
2324 T0R = makeReg(IceType_i32);
2325 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2326 T1R = makeReg(IceType_i32);
2327 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2328 }
2329 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2330 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2331 if (IsRemainder) {
2332 Variable *T2 = makeReg(IceType_i32);
2333 _mls(T2, T, T1R, T0R);
2334 T = T2;
2335 }
2336 _mov(Dest, T);
2337 } else {
2338 llvm::report_fatal_error("div should have already been turned into a call");
2339 }
2340 }
2341
2342 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2343 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2344 Variable *Dest = Instr->getDest();
2345 assert(Dest->getType() == IceType_i1);
2346
2347 // So folding didn't work for Instr. Not a problem: We just need to
2348 // materialize the Sources, and perform the operation. We create regular
2349 // Variables (and not infinite-weight ones) because this call might recurse a
2350 // lot, and we might end up with tons of infinite weight temporaries.
2351 assert(Instr->getSrcSize() == 2);
2352 Variable *Src0 = Func->makeVariable(IceType_i1);
2353 SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2354
2355 Operand *Src1 = Instr->getSrc(1);
2356 SafeBoolChain Src1Safe = SBC_Yes;
2357
2358 if (!llvm::isa<Constant>(Src1)) {
2359 Variable *Src1V = Func->makeVariable(IceType_i1);
2360 Src1Safe = lowerInt1(Src1V, Src1);
2361 Src1 = Src1V;
2362 }
2363
2364 Variable *T = makeReg(IceType_i1);
2365 Src0 = legalizeToReg(Src0);
2366 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2367 switch (Instr->getOp()) {
2368 default:
2369 // If this Unreachable is ever executed, add the offending operation to
2370 // the list of valid consumers.
2371 llvm::report_fatal_error("Unhandled i1 Op");
2372 case InstArithmetic::And:
2373 _and(T, Src0, Src1RF);
2374 break;
2375 case InstArithmetic::Or:
2376 _orr(T, Src0, Src1RF);
2377 break;
2378 case InstArithmetic::Xor:
2379 _eor(T, Src0, Src1RF);
2380 break;
2381 }
2382 _mov(Dest, T);
2383 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2384 }
2385
2386 namespace {
2387 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2388 // It holds the two sources operands, and maintains some state as to whether one
2389 // of them is a constant. If one of the operands is a constant, then it will be
2390 // be stored as the operation's second source, with a bit indicating whether the
2391 // operands were swapped.
2392 //
2393 // The class is split into a base class with operand type-independent methods,
2394 // and a derived, templated class, for each type of operand we want to fold
2395 // constants for:
2396 //
2397 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2398 // --> NumericOperands<ConstantDouble>
2399 // --> NumericOperands<ConstantInt32>
2400 //
2401 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2402 // inverted/negated immediates.
2403 class NumericOperandsBase {
2404 NumericOperandsBase() = delete;
2405 NumericOperandsBase(const NumericOperandsBase &) = delete;
2406 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2407
2408 public:
NumericOperandsBase(Operand * S0,Operand * S1)2409 NumericOperandsBase(Operand *S0, Operand *S1)
2410 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2411 Swapped(Src0 == S1 && S0 != S1) {
2412 assert(Src0 != nullptr);
2413 assert(Src1 != nullptr);
2414 assert(Src0 != Src1 || S0 == S1);
2415 }
2416
hasConstOperand() const2417 bool hasConstOperand() const {
2418 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2419 }
2420
swappedOperands() const2421 bool swappedOperands() const { return Swapped; }
2422
src0R(TargetARM32 * Target) const2423 Variable *src0R(TargetARM32 *Target) const {
2424 return legalizeToReg(Target, Src0);
2425 }
2426
unswappedSrc0R(TargetARM32 * Target) const2427 Variable *unswappedSrc0R(TargetARM32 *Target) const {
2428 return legalizeToReg(Target, Swapped ? Src1 : Src0);
2429 }
2430
src1RF(TargetARM32 * Target) const2431 Operand *src1RF(TargetARM32 *Target) const {
2432 return legalizeToRegOrFlex(Target, Src1);
2433 }
2434
unswappedSrc1R(TargetARM32 * Target) const2435 Variable *unswappedSrc1R(TargetARM32 *Target) const {
2436 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2437 }
2438
src1() const2439 Operand *src1() const { return Src1; }
2440
2441 protected:
2442 Operand *const Src0;
2443 Operand *const Src1;
2444 const bool Swapped;
2445
legalizeToReg(TargetARM32 * Target,Operand * Src)2446 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2447 return Target->legalizeToReg(Src);
2448 }
2449
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2450 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2451 return Target->legalize(Src,
2452 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2453 }
2454
2455 private:
NonConstOperand(Operand * S0,Operand * S1)2456 static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2457 if (!llvm::isa<Constant>(S0))
2458 return S0;
2459 if (!llvm::isa<Constant>(S1))
2460 return S1;
2461 if (llvm::isa<ConstantRelocatable>(S1) &&
2462 !llvm::isa<ConstantRelocatable>(S0))
2463 return S1;
2464 return S0;
2465 }
2466
ConstOperand(Operand * S0,Operand * S1)2467 static Operand *ConstOperand(Operand *S0, Operand *S1) {
2468 if (!llvm::isa<Constant>(S0))
2469 return S1;
2470 if (!llvm::isa<Constant>(S1))
2471 return S0;
2472 if (llvm::isa<ConstantRelocatable>(S1) &&
2473 !llvm::isa<ConstantRelocatable>(S0))
2474 return S0;
2475 return S1;
2476 }
2477 };
2478
2479 template <typename C> class NumericOperands : public NumericOperandsBase {
2480 NumericOperands() = delete;
2481 NumericOperands(const NumericOperands &) = delete;
2482 NumericOperands &operator=(const NumericOperands &) = delete;
2483
2484 public:
NumericOperands(Operand * S0,Operand * S1)2485 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2486 assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2487 }
2488
getConstantValue() const2489 typename C::PrimType getConstantValue() const {
2490 return llvm::cast<C>(Src1)->getValue();
2491 }
2492 };
2493
2494 using FloatOperands = NumericOperands<ConstantFloat>;
2495 using DoubleOperands = NumericOperands<ConstantDouble>;
2496
2497 class Int32Operands : public NumericOperands<ConstantInteger32> {
2498 Int32Operands() = delete;
2499 Int32Operands(const Int32Operands &) = delete;
2500 Int32Operands &operator=(const Int32Operands &) = delete;
2501
2502 public:
Int32Operands(Operand * S0,Operand * S1)2503 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2504
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2505 Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2506 if (!swappedOperands() && hasConstOperand()) {
2507 return Target->shAmtImm(getConstantValue() & 0x1F);
2508 }
2509 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2510 }
2511
isSrc1ImmediateZero() const2512 bool isSrc1ImmediateZero() const {
2513 if (!swappedOperands() && hasConstOperand()) {
2514 return getConstantValue() == 0;
2515 }
2516 return false;
2517 }
2518
immediateIsFlexEncodable() const2519 bool immediateIsFlexEncodable() const {
2520 uint32_t Rotate, Imm8;
2521 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2522 }
2523
negatedImmediateIsFlexEncodable() const2524 bool negatedImmediateIsFlexEncodable() const {
2525 uint32_t Rotate, Imm8;
2526 return OperandARM32FlexImm::canHoldImm(
2527 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2528 }
2529
negatedSrc1F(TargetARM32 * Target) const2530 Operand *negatedSrc1F(TargetARM32 *Target) const {
2531 return legalizeToRegOrFlex(Target,
2532 Target->getCtx()->getConstantInt32(
2533 -static_cast<int32_t>(getConstantValue())));
2534 }
2535
invertedImmediateIsFlexEncodable() const2536 bool invertedImmediateIsFlexEncodable() const {
2537 uint32_t Rotate, Imm8;
2538 return OperandARM32FlexImm::canHoldImm(
2539 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2540 }
2541
invertedSrc1F(TargetARM32 * Target) const2542 Operand *invertedSrc1F(TargetARM32 *Target) const {
2543 return legalizeToRegOrFlex(Target,
2544 Target->getCtx()->getConstantInt32(
2545 ~static_cast<uint32_t>(getConstantValue())));
2546 }
2547 };
2548 } // end of anonymous namespace
2549
preambleDivRem(const InstCall * Instr)2550 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2551 Operand *Src1 = Instr->getArg(1);
2552
2553 switch (Src1->getType()) {
2554 default:
2555 llvm::report_fatal_error("Invalid type for idiv.");
2556 case IceType_i64: {
2557 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2558 if (C->getValue() == 0) {
2559 _trap();
2560 return;
2561 }
2562 }
2563 div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2564 return;
2565 }
2566 case IceType_i32: {
2567 // Src0 and Src1 have already been appropriately extended to an i32, so we
2568 // don't check for i8 and i16.
2569 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2570 if (C->getValue() == 0) {
2571 _trap();
2572 return;
2573 }
2574 }
2575 div0Check(IceType_i32, Src1, nullptr);
2576 return;
2577 }
2578 }
2579 }
2580
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2581 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2582 Variable *Dest, Operand *Src0,
2583 Operand *Src1) {
2584 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2585 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2586 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2587 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2588
2589 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2590 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2591 Variable *T_Lo = makeReg(DestLo->getType());
2592 Variable *T_Hi = makeReg(DestHi->getType());
2593
2594 switch (Op) {
2595 case InstArithmetic::_num:
2596 llvm::report_fatal_error("Unknown arithmetic operator");
2597 return;
2598 case InstArithmetic::Add: {
2599 Variable *Src0LoR = SrcsLo.src0R(this);
2600 Operand *Src1LoRF = SrcsLo.src1RF(this);
2601 Variable *Src0HiR = SrcsHi.src0R(this);
2602 Operand *Src1HiRF = SrcsHi.src1RF(this);
2603 _adds(T_Lo, Src0LoR, Src1LoRF);
2604 _mov(DestLo, T_Lo);
2605 _adc(T_Hi, Src0HiR, Src1HiRF);
2606 _mov(DestHi, T_Hi);
2607 return;
2608 }
2609 case InstArithmetic::And: {
2610 Variable *Src0LoR = SrcsLo.src0R(this);
2611 Operand *Src1LoRF = SrcsLo.src1RF(this);
2612 Variable *Src0HiR = SrcsHi.src0R(this);
2613 Operand *Src1HiRF = SrcsHi.src1RF(this);
2614 _and(T_Lo, Src0LoR, Src1LoRF);
2615 _mov(DestLo, T_Lo);
2616 _and(T_Hi, Src0HiR, Src1HiRF);
2617 _mov(DestHi, T_Hi);
2618 return;
2619 }
2620 case InstArithmetic::Or: {
2621 Variable *Src0LoR = SrcsLo.src0R(this);
2622 Operand *Src1LoRF = SrcsLo.src1RF(this);
2623 Variable *Src0HiR = SrcsHi.src0R(this);
2624 Operand *Src1HiRF = SrcsHi.src1RF(this);
2625 _orr(T_Lo, Src0LoR, Src1LoRF);
2626 _mov(DestLo, T_Lo);
2627 _orr(T_Hi, Src0HiR, Src1HiRF);
2628 _mov(DestHi, T_Hi);
2629 return;
2630 }
2631 case InstArithmetic::Xor: {
2632 Variable *Src0LoR = SrcsLo.src0R(this);
2633 Operand *Src1LoRF = SrcsLo.src1RF(this);
2634 Variable *Src0HiR = SrcsHi.src0R(this);
2635 Operand *Src1HiRF = SrcsHi.src1RF(this);
2636 _eor(T_Lo, Src0LoR, Src1LoRF);
2637 _mov(DestLo, T_Lo);
2638 _eor(T_Hi, Src0HiR, Src1HiRF);
2639 _mov(DestHi, T_Hi);
2640 return;
2641 }
2642 case InstArithmetic::Sub: {
2643 Variable *Src0LoR = SrcsLo.src0R(this);
2644 Operand *Src1LoRF = SrcsLo.src1RF(this);
2645 Variable *Src0HiR = SrcsHi.src0R(this);
2646 Operand *Src1HiRF = SrcsHi.src1RF(this);
2647 if (SrcsLo.swappedOperands()) {
2648 _rsbs(T_Lo, Src0LoR, Src1LoRF);
2649 _mov(DestLo, T_Lo);
2650 _rsc(T_Hi, Src0HiR, Src1HiRF);
2651 _mov(DestHi, T_Hi);
2652 } else {
2653 _subs(T_Lo, Src0LoR, Src1LoRF);
2654 _mov(DestLo, T_Lo);
2655 _sbc(T_Hi, Src0HiR, Src1HiRF);
2656 _mov(DestHi, T_Hi);
2657 }
2658 return;
2659 }
2660 case InstArithmetic::Mul: {
2661 // GCC 4.8 does:
2662 // a=b*c ==>
2663 // t_acc =(mul) (b.lo * c.hi)
2664 // t_acc =(mla) (c.lo * b.hi) + t_acc
2665 // t.hi,t.lo =(umull) b.lo * c.lo
2666 // t.hi += t_acc
2667 // a.lo = t.lo
2668 // a.hi = t.hi
2669 //
2670 // LLVM does:
2671 // t.hi,t.lo =(umull) b.lo * c.lo
2672 // t.hi =(mla) (b.lo * c.hi) + t.hi
2673 // t.hi =(mla) (b.hi * c.lo) + t.hi
2674 // a.lo = t.lo
2675 // a.hi = t.hi
2676 //
2677 // LLVM's lowering has fewer instructions, but more register pressure:
2678 // t.lo is live from beginning to end, while GCC delays the two-dest
2679 // instruction till the end, and kills c.hi immediately.
2680 Variable *T_Acc = makeReg(IceType_i32);
2681 Variable *T_Acc1 = makeReg(IceType_i32);
2682 Variable *T_Hi1 = makeReg(IceType_i32);
2683 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2684 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2685 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2686 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2687 _mul(T_Acc, Src0RLo, Src1RHi);
2688 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2689 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2690 _add(T_Hi, T_Hi1, T_Acc1);
2691 _mov(DestLo, T_Lo);
2692 _mov(DestHi, T_Hi);
2693 return;
2694 }
2695 case InstArithmetic::Shl: {
2696 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2697 Variable *Src0RLo = SrcsLo.src0R(this);
2698 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2699 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2700 if (ShAmtImm == 0) {
2701 _mov(DestLo, Src0RLo);
2702 _mov(DestHi, SrcsHi.src0R(this));
2703 return;
2704 }
2705
2706 if (ShAmtImm >= 32) {
2707 if (ShAmtImm == 32) {
2708 _mov(DestHi, Src0RLo);
2709 } else {
2710 Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2711 _lsl(T_Hi, Src0RLo, ShAmtOp);
2712 _mov(DestHi, T_Hi);
2713 }
2714
2715 Operand *_0 =
2716 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2717 _mov(T_Lo, _0);
2718 _mov(DestLo, T_Lo);
2719 return;
2720 }
2721
2722 Variable *Src0RHi = SrcsHi.src0R(this);
2723 Operand *ShAmtOp = shAmtImm(ShAmtImm);
2724 Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2725 _lsl(T_Hi, Src0RHi, ShAmtOp);
2726 _orr(T_Hi, T_Hi,
2727 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2728 OperandARM32::LSR, ComplShAmtOp));
2729 _mov(DestHi, T_Hi);
2730
2731 _lsl(T_Lo, Src0RLo, ShAmtOp);
2732 _mov(DestLo, T_Lo);
2733 return;
2734 }
2735
2736 // a=b<<c ==>
2737 // pnacl-llc does:
2738 // mov t_b.lo, b.lo
2739 // mov t_b.hi, b.hi
2740 // mov t_c.lo, c.lo
2741 // rsb T0, t_c.lo, #32
2742 // lsr T1, t_b.lo, T0
2743 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
2744 // sub T2, t_c.lo, #32
2745 // cmp T2, #0
2746 // lslge t_a.hi, t_b.lo, T2
2747 // lsl t_a.lo, t_b.lo, t_c.lo
2748 // mov a.lo, t_a.lo
2749 // mov a.hi, t_a.hi
2750 //
2751 // GCC 4.8 does:
2752 // sub t_c1, c.lo, #32
2753 // lsl t_hi, b.hi, c.lo
2754 // orr t_hi, t_hi, b.lo, lsl t_c1
2755 // rsb t_c2, c.lo, #32
2756 // orr t_hi, t_hi, b.lo, lsr t_c2
2757 // lsl t_lo, b.lo, c.lo
2758 // a.lo = t_lo
2759 // a.hi = t_hi
2760 //
2761 // These are incompatible, therefore we mimic pnacl-llc.
2762 // Can be strength-reduced for constant-shifts, but we don't do that for
2763 // now.
2764 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2765 // ARM, shifts only take the lower 8 bits of the shift register, and
2766 // saturate to the range 0-32, so the negative value will saturate to 32.
2767 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2768 Operand *_0 =
2769 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2770 Variable *T0 = makeReg(IceType_i32);
2771 Variable *T1 = makeReg(IceType_i32);
2772 Variable *T2 = makeReg(IceType_i32);
2773 Variable *TA_Hi = makeReg(IceType_i32);
2774 Variable *TA_Lo = makeReg(IceType_i32);
2775 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2776 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2777 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2778 _rsb(T0, Src1RLo, _32);
2779 _lsr(T1, Src0RLo, T0);
2780 _orr(TA_Hi, T1,
2781 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2782 OperandARM32::LSL, Src1RLo));
2783 _sub(T2, Src1RLo, _32);
2784 _cmp(T2, _0);
2785 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2786 _set_dest_redefined();
2787 _lsl(TA_Lo, Src0RLo, Src1RLo);
2788 _mov(DestLo, TA_Lo);
2789 _mov(DestHi, TA_Hi);
2790 return;
2791 }
2792 case InstArithmetic::Lshr:
2793 case InstArithmetic::Ashr: {
2794 const bool ASR = Op == InstArithmetic::Ashr;
2795 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2796 Variable *Src0RHi = SrcsHi.src0R(this);
2797 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2798 const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2799 if (ShAmt == 0) {
2800 _mov(DestHi, Src0RHi);
2801 _mov(DestLo, SrcsLo.src0R(this));
2802 return;
2803 }
2804
2805 if (ShAmt >= 32) {
2806 if (ShAmt == 32) {
2807 _mov(DestLo, Src0RHi);
2808 } else {
2809 Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2810 if (ASR) {
2811 _asr(T_Lo, Src0RHi, ShAmtImm);
2812 } else {
2813 _lsr(T_Lo, Src0RHi, ShAmtImm);
2814 }
2815 _mov(DestLo, T_Lo);
2816 }
2817
2818 if (ASR) {
2819 Operand *_31 = shAmtImm(31);
2820 _asr(T_Hi, Src0RHi, _31);
2821 } else {
2822 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2823 Legal_Reg | Legal_Flex);
2824 _mov(T_Hi, _0);
2825 }
2826 _mov(DestHi, T_Hi);
2827 return;
2828 }
2829
2830 Variable *Src0RLo = SrcsLo.src0R(this);
2831 Operand *ShAmtImm = shAmtImm(ShAmt);
2832 Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2833 _lsr(T_Lo, Src0RLo, ShAmtImm);
2834 _orr(T_Lo, T_Lo,
2835 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2836 OperandARM32::LSL, ComplShAmtImm));
2837 _mov(DestLo, T_Lo);
2838
2839 if (ASR) {
2840 _asr(T_Hi, Src0RHi, ShAmtImm);
2841 } else {
2842 _lsr(T_Hi, Src0RHi, ShAmtImm);
2843 }
2844 _mov(DestHi, T_Hi);
2845 return;
2846 }
2847
2848 // a=b>>c
2849 // pnacl-llc does:
2850 // mov t_b.lo, b.lo
2851 // mov t_b.hi, b.hi
2852 // mov t_c.lo, c.lo
2853 // lsr T0, t_b.lo, t_c.lo
2854 // rsb T1, t_c.lo, #32
2855 // orr t_a.lo, T0, t_b.hi, lsl T1
2856 // sub T2, t_c.lo, #32
2857 // cmp T2, #0
2858 // [al]srge t_a.lo, t_b.hi, T2
2859 // [al]sr t_a.hi, t_b.hi, t_c.lo
2860 // mov a.lo, t_a.lo
2861 // mov a.hi, t_a.hi
2862 //
2863 // GCC 4.8 does (lsr):
2864 // rsb t_c1, c.lo, #32
2865 // lsr t_lo, b.lo, c.lo
2866 // orr t_lo, t_lo, b.hi, lsl t_c1
2867 // sub t_c2, c.lo, #32
2868 // orr t_lo, t_lo, b.hi, lsr t_c2
2869 // lsr t_hi, b.hi, c.lo
2870 // mov a.lo, t_lo
2871 // mov a.hi, t_hi
2872 //
2873 // These are incompatible, therefore we mimic pnacl-llc.
2874 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2875 Operand *_0 =
2876 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2877 Variable *T0 = makeReg(IceType_i32);
2878 Variable *T1 = makeReg(IceType_i32);
2879 Variable *T2 = makeReg(IceType_i32);
2880 Variable *TA_Lo = makeReg(IceType_i32);
2881 Variable *TA_Hi = makeReg(IceType_i32);
2882 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2883 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2884 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2885 _lsr(T0, Src0RLo, Src1RLo);
2886 _rsb(T1, Src1RLo, _32);
2887 _orr(TA_Lo, T0,
2888 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2889 OperandARM32::LSL, T1));
2890 _sub(T2, Src1RLo, _32);
2891 _cmp(T2, _0);
2892 if (ASR) {
2893 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2894 _set_dest_redefined();
2895 _asr(TA_Hi, Src0RHi, Src1RLo);
2896 } else {
2897 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2898 _set_dest_redefined();
2899 _lsr(TA_Hi, Src0RHi, Src1RLo);
2900 }
2901 _mov(DestLo, TA_Lo);
2902 _mov(DestHi, TA_Hi);
2903 return;
2904 }
2905 case InstArithmetic::Fadd:
2906 case InstArithmetic::Fsub:
2907 case InstArithmetic::Fmul:
2908 case InstArithmetic::Fdiv:
2909 case InstArithmetic::Frem:
2910 llvm::report_fatal_error("FP instruction with i64 type");
2911 return;
2912 case InstArithmetic::Udiv:
2913 case InstArithmetic::Sdiv:
2914 case InstArithmetic::Urem:
2915 case InstArithmetic::Srem:
2916 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2917 "should have already been handled before");
2918 return;
2919 }
2920 }
2921
2922 namespace {
2923 // StrengthReduction is a namespace with the strength reduction machinery. The
2924 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2925 // if the optimization can be performed, and false otherwise.
2926 //
2927 // If the optimization can be performed, tryToOptimize sets its NumOperations
2928 // parameter to the number of shifts that are needed to perform the
2929 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2930 // tuples that describe how to materialize the multiplication.
2931 //
2932 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2933 // or two shifts to materialize it. A sequence of 1s, e.g.,
2934 //
2935 // M N
2936 // ...00000000000011111...111110000000...
2937 //
2938 // is materializable with (1 << (M + 1)) - (1 << N):
2939 //
2940 // ...00000000000100000...000000000000... [1 << (M + 1)]
2941 // ...00000000000000000...000010000000... (-) [1 << N]
2942 // --------------------------------------
2943 // ...00000000000011111...111110000000...
2944 //
2945 // And a single bit set, which is just a left shift.
2946 namespace StrengthReduction {
2947 enum AggregationOperation {
2948 AO_Invalid,
2949 AO_Add,
2950 AO_Sub,
2951 };
2952
2953 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2954 class AggregationElement {
2955 AggregationElement(const AggregationElement &) = delete;
2956
2957 public:
2958 AggregationElement() = default;
2959 AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2960 AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2961 : Op(Op), ShAmt(ShAmt) {}
2962
createShiftedOperand(Cfg * Func,Variable * OpR) const2963 Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2964 assert(OpR->mustHaveReg());
2965 if (ShAmt == 0) {
2966 return OpR;
2967 }
2968 return OperandARM32FlexReg::create(
2969 Func, IceType_i32, OpR, OperandARM32::LSL,
2970 OperandARM32ShAmtImm::create(
2971 Func, llvm::cast<ConstantInteger32>(
2972 Func->getContext()->getConstantInt32(ShAmt))));
2973 }
2974
aggregateWithAdd() const2975 bool aggregateWithAdd() const {
2976 switch (Op) {
2977 case AO_Invalid:
2978 llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2979 case AO_Add:
2980 return true;
2981 case AO_Sub:
2982 return false;
2983 }
2984 llvm_unreachable("(silence g++ warning)");
2985 }
2986
shAmt() const2987 uint32_t shAmt() const { return ShAmt; }
2988
2989 private:
2990 AggregationOperation Op = AO_Invalid;
2991 uint32_t ShAmt;
2992 };
2993
2994 // [RangeStart, RangeEnd] is a range of 1s in Src.
2995 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)2996 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
2997 std::array<AggregationElement, N> *Operations) {
2998 assert(*NumOperations < N);
2999 if (RangeStart == RangeEnd) {
3000 // Single bit set:
3001 // Src : 0...00010...
3002 // RangeStart : ^
3003 // RangeEnd : ^
3004 // NegSrc : 0...00001...
3005 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
3006 ++(*NumOperations);
3007 return true;
3008 }
3009
3010 // Sequence of 1s: (two operations required.)
3011 // Src : 0...00011...110...
3012 // RangeStart : ^
3013 // RangeEnd : ^
3014 // NegSrc : 0...00000...001...
3015 if (*NumOperations + 1 >= N) {
3016 return false;
3017 }
3018 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
3019 ++(*NumOperations);
3020 (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
3021 ++(*NumOperations);
3022 return true;
3023 }
3024
3025 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
3026 // 1 surrounded by zeroes.
3027 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3028 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
3029 std::array<AggregationElement, N> *Operations) {
3030 constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
3031 uint32_t NegSrc = ~Src;
3032
3033 *NumOperations = 0;
3034 while (Src != 0 && *NumOperations < N) {
3035 // Each step of the algorithm:
3036 // * finds L, the last bit set in Src;
3037 // * clears all the upper bits in NegSrc up to bit L;
3038 // * finds nL, the last bit set in NegSrc;
3039 // * clears all the upper bits in Src up to bit nL;
3040 //
3041 // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
3042 // of 1s starting at L, and ending at nL + 1, was found.
3043 const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
3044 const uint32_t NegSrcClearMask =
3045 (SrcLastBitSet == 0) ? 0
3046 : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
3047 NegSrc &= NegSrcClearMask;
3048 if (NegSrc == 0) {
3049 if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
3050 return true;
3051 }
3052 return false;
3053 }
3054 const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
3055 assert(NegSrcLastBitSet < SrcLastBitSet);
3056 const uint32_t SrcClearMask =
3057 (NegSrcLastBitSet == 0)
3058 ? 0
3059 : (0xFFFFFFFFu) >> (SrcSizeBits - NegSrcLastBitSet);
3060 Src &= SrcClearMask;
3061 if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
3062 Operations)) {
3063 return false;
3064 }
3065 }
3066
3067 return Src == 0;
3068 }
3069 } // end of namespace StrengthReduction
3070 } // end of anonymous namespace
3071
lowerArithmetic(const InstArithmetic * Instr)3072 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
3073 Variable *Dest = Instr->getDest();
3074
3075 if (Dest->isRematerializable()) {
3076 Context.insert<InstFakeDef>(Dest);
3077 return;
3078 }
3079
3080 Type DestTy = Dest->getType();
3081 if (DestTy == IceType_i1) {
3082 lowerInt1Arithmetic(Instr);
3083 return;
3084 }
3085
3086 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3087 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
3088 if (DestTy == IceType_i64) {
3089 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
3090 return;
3091 }
3092
3093 if (isVectorType(DestTy)) {
3094 switch (Instr->getOp()) {
3095 default:
3096 UnimplementedLoweringError(this, Instr);
3097 return;
3098 // Explicitly allow vector instructions we have implemented/enabled.
3099 case InstArithmetic::Add:
3100 case InstArithmetic::And:
3101 case InstArithmetic::Ashr:
3102 case InstArithmetic::Fadd:
3103 case InstArithmetic::Fmul:
3104 case InstArithmetic::Fsub:
3105 case InstArithmetic::Lshr:
3106 case InstArithmetic::Mul:
3107 case InstArithmetic::Or:
3108 case InstArithmetic::Shl:
3109 case InstArithmetic::Sub:
3110 case InstArithmetic::Xor:
3111 break;
3112 }
3113 }
3114
3115 Variable *T = makeReg(DestTy);
3116
3117 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
3118 // whether or not Src1 is a non-zero constant. Once legalized it is more
3119 // difficult to determine (constant may be moved to a register).
3120 // * Handle floating point arithmetic separately: they require Src1 to be
3121 // legalized to a register.
3122 switch (Instr->getOp()) {
3123 default:
3124 break;
3125 case InstArithmetic::Udiv: {
3126 constexpr bool NotRemainder = false;
3127 Variable *Src0R = legalizeToReg(Src0);
3128 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3129 NotRemainder);
3130 return;
3131 }
3132 case InstArithmetic::Sdiv: {
3133 constexpr bool NotRemainder = false;
3134 Variable *Src0R = legalizeToReg(Src0);
3135 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3136 NotRemainder);
3137 return;
3138 }
3139 case InstArithmetic::Urem: {
3140 constexpr bool IsRemainder = true;
3141 Variable *Src0R = legalizeToReg(Src0);
3142 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3143 IsRemainder);
3144 return;
3145 }
3146 case InstArithmetic::Srem: {
3147 constexpr bool IsRemainder = true;
3148 Variable *Src0R = legalizeToReg(Src0);
3149 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3150 IsRemainder);
3151 return;
3152 }
3153 case InstArithmetic::Frem: {
3154 if (!isScalarFloatingType(DestTy)) {
3155 llvm::report_fatal_error("Unexpected type when lowering frem.");
3156 }
3157 llvm::report_fatal_error("Frem should have already been lowered.");
3158 }
3159 case InstArithmetic::Fadd: {
3160 Variable *Src0R = legalizeToReg(Src0);
3161 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3162 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3163 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3164 _vmla(Src0R, Src1R, Src2R);
3165 _mov(Dest, Src0R);
3166 return;
3167 }
3168
3169 Variable *Src1R = legalizeToReg(Src1);
3170 _vadd(T, Src0R, Src1R);
3171 _mov(Dest, T);
3172 return;
3173 }
3174 case InstArithmetic::Fsub: {
3175 Variable *Src0R = legalizeToReg(Src0);
3176 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3177 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3178 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3179 _vmls(Src0R, Src1R, Src2R);
3180 _mov(Dest, Src0R);
3181 return;
3182 }
3183 Variable *Src1R = legalizeToReg(Src1);
3184 _vsub(T, Src0R, Src1R);
3185 _mov(Dest, T);
3186 return;
3187 }
3188 case InstArithmetic::Fmul: {
3189 Variable *Src0R = legalizeToReg(Src0);
3190 Variable *Src1R = legalizeToReg(Src1);
3191 _vmul(T, Src0R, Src1R);
3192 _mov(Dest, T);
3193 return;
3194 }
3195 case InstArithmetic::Fdiv: {
3196 Variable *Src0R = legalizeToReg(Src0);
3197 Variable *Src1R = legalizeToReg(Src1);
3198 _vdiv(T, Src0R, Src1R);
3199 _mov(Dest, T);
3200 return;
3201 }
3202 }
3203
3204 // Handle everything else here.
3205 Int32Operands Srcs(Src0, Src1);
3206 switch (Instr->getOp()) {
3207 case InstArithmetic::_num:
3208 llvm::report_fatal_error("Unknown arithmetic operator");
3209 return;
3210 case InstArithmetic::Add: {
3211 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3212 assert(!isVectorType(DestTy));
3213 Variable *Src0R = legalizeToReg(Src0);
3214 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3215 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3216 _mla(T, Src1R, Src2R, Src0R);
3217 _mov(Dest, T);
3218 return;
3219 }
3220
3221 if (Srcs.hasConstOperand()) {
3222 if (!Srcs.immediateIsFlexEncodable() &&
3223 Srcs.negatedImmediateIsFlexEncodable()) {
3224 assert(!isVectorType(DestTy));
3225 Variable *Src0R = Srcs.src0R(this);
3226 Operand *Src1F = Srcs.negatedSrc1F(this);
3227 if (!Srcs.swappedOperands()) {
3228 _sub(T, Src0R, Src1F);
3229 } else {
3230 _rsb(T, Src0R, Src1F);
3231 }
3232 _mov(Dest, T);
3233 return;
3234 }
3235 }
3236 Variable *Src0R = Srcs.src0R(this);
3237 if (isVectorType(DestTy)) {
3238 Variable *Src1R = legalizeToReg(Src1);
3239 _vadd(T, Src0R, Src1R);
3240 } else {
3241 Operand *Src1RF = Srcs.src1RF(this);
3242 _add(T, Src0R, Src1RF);
3243 }
3244 _mov(Dest, T);
3245 return;
3246 }
3247 case InstArithmetic::And: {
3248 if (Srcs.hasConstOperand()) {
3249 if (!Srcs.immediateIsFlexEncodable() &&
3250 Srcs.invertedImmediateIsFlexEncodable()) {
3251 Variable *Src0R = Srcs.src0R(this);
3252 Operand *Src1F = Srcs.invertedSrc1F(this);
3253 _bic(T, Src0R, Src1F);
3254 _mov(Dest, T);
3255 return;
3256 }
3257 }
3258 assert(isIntegerType(DestTy));
3259 Variable *Src0R = Srcs.src0R(this);
3260 if (isVectorType(DestTy)) {
3261 Variable *Src1R = legalizeToReg(Src1);
3262 _vand(T, Src0R, Src1R);
3263 } else {
3264 Operand *Src1RF = Srcs.src1RF(this);
3265 _and(T, Src0R, Src1RF);
3266 }
3267 _mov(Dest, T);
3268 return;
3269 }
3270 case InstArithmetic::Or: {
3271 Variable *Src0R = Srcs.src0R(this);
3272 assert(isIntegerType(DestTy));
3273 if (isVectorType(DestTy)) {
3274 Variable *Src1R = legalizeToReg(Src1);
3275 _vorr(T, Src0R, Src1R);
3276 } else {
3277 Operand *Src1RF = Srcs.src1RF(this);
3278 _orr(T, Src0R, Src1RF);
3279 }
3280 _mov(Dest, T);
3281 return;
3282 }
3283 case InstArithmetic::Xor: {
3284 Variable *Src0R = Srcs.src0R(this);
3285 assert(isIntegerType(DestTy));
3286 if (isVectorType(DestTy)) {
3287 Variable *Src1R = legalizeToReg(Src1);
3288 _veor(T, Src0R, Src1R);
3289 } else {
3290 Operand *Src1RF = Srcs.src1RF(this);
3291 _eor(T, Src0R, Src1RF);
3292 }
3293 _mov(Dest, T);
3294 return;
3295 }
3296 case InstArithmetic::Sub: {
3297 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3298 assert(!isVectorType(DestTy));
3299 Variable *Src0R = legalizeToReg(Src0);
3300 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3301 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3302 _mls(T, Src1R, Src2R, Src0R);
3303 _mov(Dest, T);
3304 return;
3305 }
3306
3307 if (Srcs.hasConstOperand()) {
3308 assert(!isVectorType(DestTy));
3309 if (Srcs.immediateIsFlexEncodable()) {
3310 Variable *Src0R = Srcs.src0R(this);
3311 Operand *Src1RF = Srcs.src1RF(this);
3312 if (Srcs.swappedOperands()) {
3313 _rsb(T, Src0R, Src1RF);
3314 } else {
3315 _sub(T, Src0R, Src1RF);
3316 }
3317 _mov(Dest, T);
3318 return;
3319 }
3320 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3321 Variable *Src0R = Srcs.src0R(this);
3322 Operand *Src1F = Srcs.negatedSrc1F(this);
3323 _add(T, Src0R, Src1F);
3324 _mov(Dest, T);
3325 return;
3326 }
3327 }
3328 Variable *Src0R = Srcs.unswappedSrc0R(this);
3329 Variable *Src1R = Srcs.unswappedSrc1R(this);
3330 if (isVectorType(DestTy)) {
3331 _vsub(T, Src0R, Src1R);
3332 } else {
3333 _sub(T, Src0R, Src1R);
3334 }
3335 _mov(Dest, T);
3336 return;
3337 }
3338 case InstArithmetic::Mul: {
3339 const bool OptM1 = Func->getOptLevel() == Opt_m1;
3340 if (!OptM1 && Srcs.hasConstOperand()) {
3341 constexpr std::size_t MaxShifts = 4;
3342 std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3343 SizeT NumOperations;
3344 int32_t Const = Srcs.getConstantValue();
3345 const bool Invert = Const < 0;
3346 const bool MultiplyByZero = Const == 0;
3347 Operand *_0 =
3348 legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3349
3350 if (MultiplyByZero) {
3351 _mov(T, _0);
3352 _mov(Dest, T);
3353 return;
3354 }
3355
3356 if (Invert) {
3357 Const = -Const;
3358 }
3359
3360 if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3361 assert(NumOperations >= 1);
3362 Variable *Src0R = Srcs.src0R(this);
3363 int32_t Start;
3364 int32_t End;
3365 if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3366 // Multiplication by a power of 2 (NumOperations == 1); or
3367 // Multiplication by a even number not a power of 2.
3368 Start = 1;
3369 End = NumOperations;
3370 assert(Shifts[0].aggregateWithAdd());
3371 _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3372 } else {
3373 // Multiplication by an odd number. Put the free barrel shifter to a
3374 // good use.
3375 Start = 0;
3376 End = NumOperations - 2;
3377 const StrengthReduction::AggregationElement &Last =
3378 Shifts[NumOperations - 1];
3379 const StrengthReduction::AggregationElement &SecondToLast =
3380 Shifts[NumOperations - 2];
3381 if (!Last.aggregateWithAdd()) {
3382 assert(SecondToLast.aggregateWithAdd());
3383 _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3384 } else if (!SecondToLast.aggregateWithAdd()) {
3385 assert(Last.aggregateWithAdd());
3386 _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3387 } else {
3388 _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3389 }
3390 }
3391
3392 // Odd numbers : S E I I
3393 // +---+---+---+---+---+---+ ... +---+---+---+---+
3394 // Shifts = | | | | | | | ... | | | | |
3395 // +---+---+---+---+---+---+ ... +---+---+---+---+
3396 // Even numbers: I S E
3397 //
3398 // S: Start; E: End; I: Init
3399 for (int32_t I = Start; I < End; ++I) {
3400 const StrengthReduction::AggregationElement &Current = Shifts[I];
3401 Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3402 if (Current.aggregateWithAdd()) {
3403 _add(T, T, SrcF);
3404 } else {
3405 _sub(T, T, SrcF);
3406 }
3407 }
3408
3409 if (Invert) {
3410 // T = 0 - T.
3411 _rsb(T, T, _0);
3412 }
3413
3414 _mov(Dest, T);
3415 return;
3416 }
3417 }
3418 Variable *Src0R = Srcs.unswappedSrc0R(this);
3419 Variable *Src1R = Srcs.unswappedSrc1R(this);
3420 if (isVectorType(DestTy)) {
3421 _vmul(T, Src0R, Src1R);
3422 } else {
3423 _mul(T, Src0R, Src1R);
3424 }
3425 _mov(Dest, T);
3426 return;
3427 }
3428 case InstArithmetic::Shl: {
3429 Variable *Src0R = Srcs.unswappedSrc0R(this);
3430 if (!isVectorType(T->getType())) {
3431 if (Srcs.isSrc1ImmediateZero()) {
3432 _mov(T, Src0R);
3433 } else {
3434 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3435 _lsl(T, Src0R, Src1R);
3436 }
3437 } else {
3438 if (Srcs.hasConstOperand()) {
3439 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3440 _vshl(T, Src0R, ShAmt);
3441 } else {
3442 auto *Src1R = Srcs.unswappedSrc1R(this);
3443 _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3444 }
3445 }
3446 _mov(Dest, T);
3447 return;
3448 }
3449 case InstArithmetic::Lshr: {
3450 Variable *Src0R = Srcs.unswappedSrc0R(this);
3451 if (!isVectorType(T->getType())) {
3452 if (DestTy != IceType_i32) {
3453 _uxt(Src0R, Src0R);
3454 }
3455 if (Srcs.isSrc1ImmediateZero()) {
3456 _mov(T, Src0R);
3457 } else {
3458 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3459 _lsr(T, Src0R, Src1R);
3460 }
3461 } else {
3462 if (Srcs.hasConstOperand()) {
3463 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3464 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned);
3465 } else {
3466 auto *Src1R = Srcs.unswappedSrc1R(this);
3467 auto *Src1RNeg = makeReg(Src1R->getType());
3468 _vneg(Src1RNeg, Src1R);
3469 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3470 }
3471 }
3472 _mov(Dest, T);
3473 return;
3474 }
3475 case InstArithmetic::Ashr: {
3476 Variable *Src0R = Srcs.unswappedSrc0R(this);
3477 if (!isVectorType(T->getType())) {
3478 if (DestTy != IceType_i32) {
3479 _sxt(Src0R, Src0R);
3480 }
3481 if (Srcs.isSrc1ImmediateZero()) {
3482 _mov(T, Src0R);
3483 } else {
3484 _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3485 }
3486 } else {
3487 if (Srcs.hasConstOperand()) {
3488 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3489 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed);
3490 } else {
3491 auto *Src1R = Srcs.unswappedSrc1R(this);
3492 auto *Src1RNeg = makeReg(Src1R->getType());
3493 _vneg(Src1RNeg, Src1R);
3494 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3495 }
3496 }
3497 _mov(Dest, T);
3498 return;
3499 }
3500 case InstArithmetic::Udiv:
3501 case InstArithmetic::Sdiv:
3502 case InstArithmetic::Urem:
3503 case InstArithmetic::Srem:
3504 llvm::report_fatal_error(
3505 "Integer div/rem should have been handled earlier.");
3506 return;
3507 case InstArithmetic::Fadd:
3508 case InstArithmetic::Fsub:
3509 case InstArithmetic::Fmul:
3510 case InstArithmetic::Fdiv:
3511 case InstArithmetic::Frem:
3512 llvm::report_fatal_error(
3513 "Floating point arith should have been handled earlier.");
3514 return;
3515 }
3516 }
3517
lowerAssign(const InstAssign * Instr)3518 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3519 Variable *Dest = Instr->getDest();
3520
3521 if (Dest->isRematerializable()) {
3522 Context.insert<InstFakeDef>(Dest);
3523 return;
3524 }
3525
3526 Operand *Src0 = Instr->getSrc(0);
3527 assert(Dest->getType() == Src0->getType());
3528 if (Dest->getType() == IceType_i64) {
3529 Src0 = legalizeUndef(Src0);
3530
3531 Variable *T_Lo = makeReg(IceType_i32);
3532 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3533 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3534 _mov(T_Lo, Src0Lo);
3535 _mov(DestLo, T_Lo);
3536
3537 Variable *T_Hi = makeReg(IceType_i32);
3538 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3539 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3540 _mov(T_Hi, Src0Hi);
3541 _mov(DestHi, T_Hi);
3542
3543 return;
3544 }
3545
3546 Operand *NewSrc;
3547 if (Dest->hasReg()) {
3548 // If Dest already has a physical register, then legalize the Src operand
3549 // into a Variable with the same register assignment. This especially
3550 // helps allow the use of Flex operands.
3551 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3552 } else {
3553 // Dest could be a stack operand. Since we could potentially need to do a
3554 // Store (and store can only have Register operands), legalize this to a
3555 // register.
3556 NewSrc = legalize(Src0, Legal_Reg);
3557 }
3558
3559 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3560 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3561 }
3562 _mov(Dest, NewSrc);
3563 }
3564
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3565 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3566 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3567 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3568 InstARM32Label *NewShortCircuitLabel = nullptr;
3569 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3570
3571 const Inst *Producer = Computations.getProducerOf(Boolean);
3572
3573 if (Producer == nullptr) {
3574 // No producer, no problem: just do emit code to perform (Boolean & 1) and
3575 // set the flags register. The branch should be taken if the resulting flags
3576 // indicate a non-zero result.
3577 _tst(legalizeToReg(Boolean), _1);
3578 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3579 }
3580
3581 switch (Producer->getKind()) {
3582 default:
3583 llvm::report_fatal_error("Unexpected producer.");
3584 case Inst::Icmp: {
3585 return ShortCircuitCondAndLabel(
3586 lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3587 } break;
3588 case Inst::Fcmp: {
3589 return ShortCircuitCondAndLabel(
3590 lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3591 } break;
3592 case Inst::Cast: {
3593 const auto *CastProducer = llvm::cast<InstCast>(Producer);
3594 assert(CastProducer->getCastKind() == InstCast::Trunc);
3595 Operand *Src = CastProducer->getSrc(0);
3596 if (Src->getType() == IceType_i64)
3597 Src = loOperand(Src);
3598 _tst(legalizeToReg(Src), _1);
3599 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3600 } break;
3601 case Inst::Arithmetic: {
3602 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3603 switch (ArithProducer->getOp()) {
3604 default:
3605 llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3606 case InstArithmetic::And: {
3607 if (!(ShortCircuitable & SC_And)) {
3608 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3609 }
3610
3611 LowerInt1BranchTarget NewTarget =
3612 TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3613
3614 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3615 Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3616 const CondWhenTrue &Cond = CondAndLabel.Cond;
3617
3618 _br_short_circuit(NewTarget, Cond.invert());
3619
3620 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3621 if (ShortCircuitLabel != nullptr)
3622 Context.insert(ShortCircuitLabel);
3623
3624 return ShortCircuitCondAndLabel(
3625 lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3626 .assertNoLabelAndReturnCond(),
3627 NewShortCircuitLabel);
3628 } break;
3629 case InstArithmetic::Or: {
3630 if (!(ShortCircuitable & SC_Or)) {
3631 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3632 }
3633
3634 LowerInt1BranchTarget NewTarget =
3635 TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3636
3637 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3638 Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3639 const CondWhenTrue &Cond = CondAndLabel.Cond;
3640
3641 _br_short_circuit(NewTarget, Cond);
3642
3643 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3644 if (ShortCircuitLabel != nullptr)
3645 Context.insert(ShortCircuitLabel);
3646
3647 return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3648 NewTarget, TargetFalse,
3649 SC_All)
3650 .assertNoLabelAndReturnCond(),
3651 NewShortCircuitLabel);
3652 } break;
3653 }
3654 }
3655 }
3656 }
3657
lowerBr(const InstBr * Instr)3658 void TargetARM32::lowerBr(const InstBr *Instr) {
3659 if (Instr->isUnconditional()) {
3660 _br(Instr->getTargetUnconditional());
3661 return;
3662 }
3663
3664 CfgNode *TargetTrue = Instr->getTargetTrue();
3665 CfgNode *TargetFalse = Instr->getTargetFalse();
3666 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3667 Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3668 LowerInt1BranchTarget(TargetFalse), SC_All);
3669 assert(CondAndLabel.ShortCircuitTarget == nullptr);
3670
3671 const CondWhenTrue &Cond = CondAndLabel.Cond;
3672 if (Cond.WhenTrue1 != CondARM32::kNone) {
3673 assert(Cond.WhenTrue0 != CondARM32::AL);
3674 _br(TargetTrue, Cond.WhenTrue1);
3675 }
3676
3677 switch (Cond.WhenTrue0) {
3678 default:
3679 _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3680 break;
3681 case CondARM32::kNone:
3682 _br(TargetFalse);
3683 break;
3684 case CondARM32::AL:
3685 _br(TargetTrue);
3686 break;
3687 }
3688 }
3689
lowerCall(const InstCall * Instr)3690 void TargetARM32::lowerCall(const InstCall *Instr) {
3691 Operand *CallTarget = Instr->getCallTarget();
3692 if (Instr->isTargetHelperCall()) {
3693 auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3694 if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3695 (this->*TargetHelperPreamble->second)(Instr);
3696 }
3697 }
3698 MaybeLeafFunc = false;
3699 NeedsStackAlignment = true;
3700
3701 // Assign arguments to registers and stack. Also reserve stack.
3702 TargetARM32::CallingConv CC;
3703 // Pair of Arg Operand -> GPR number assignments.
3704 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3705 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3706 // Pair of Arg Operand -> stack offset.
3707 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3708 size_t ParameterAreaSizeBytes = 0;
3709
3710 // Classify each argument operand according to the location where the
3711 // argument is passed.
3712 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3713 Operand *Arg = legalizeUndef(Instr->getArg(i));
3714 const Type Ty = Arg->getType();
3715 bool InReg = false;
3716 RegNumT Reg;
3717 if (isScalarIntegerType(Ty)) {
3718 InReg = CC.argInGPR(Ty, &Reg);
3719 } else {
3720 InReg = CC.argInVFP(Ty, &Reg);
3721 }
3722
3723 if (!InReg) {
3724 ParameterAreaSizeBytes =
3725 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3726 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3727 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3728 continue;
3729 }
3730
3731 if (Ty == IceType_i64) {
3732 Operand *Lo = loOperand(Arg);
3733 Operand *Hi = hiOperand(Arg);
3734 GPRArgs.push_back(std::make_pair(
3735 Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3736 GPRArgs.push_back(std::make_pair(
3737 Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3738 } else if (isScalarIntegerType(Ty)) {
3739 GPRArgs.push_back(std::make_pair(Arg, Reg));
3740 } else {
3741 FPArgs.push_back(std::make_pair(Arg, Reg));
3742 }
3743 }
3744
3745 // Adjust the parameter area so that the stack is aligned. It is assumed that
3746 // the stack is already aligned at the start of the calling sequence.
3747 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3748
3749 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3750 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3751 }
3752
3753 // Copy arguments that are passed on the stack to the appropriate stack
3754 // locations.
3755 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3756 for (auto &StackArg : StackArgs) {
3757 ConstantInteger32 *Loc =
3758 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3759 Type Ty = StackArg.first->getType();
3760 OperandARM32Mem *Addr;
3761 constexpr bool SignExt = false;
3762 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3763 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3764 } else {
3765 Variable *NewBase = Func->makeVariable(SP->getType());
3766 lowerArithmetic(
3767 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3768 Addr = formMemoryOperand(NewBase, Ty);
3769 }
3770 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3771 }
3772
3773 // Generate the call instruction. Assign its result to a temporary with high
3774 // register allocation weight.
3775 Variable *Dest = Instr->getDest();
3776 // ReturnReg doubles as ReturnRegLo as necessary.
3777 Variable *ReturnReg = nullptr;
3778 Variable *ReturnRegHi = nullptr;
3779 if (Dest) {
3780 switch (Dest->getType()) {
3781 case IceType_NUM:
3782 llvm::report_fatal_error("Invalid Call dest type");
3783 break;
3784 case IceType_void:
3785 break;
3786 case IceType_i1:
3787 assert(Computations.getProducerOf(Dest) == nullptr);
3788 // Fall-through intended.
3789 case IceType_i8:
3790 case IceType_i16:
3791 case IceType_i32:
3792 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3793 break;
3794 case IceType_i64:
3795 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3796 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3797 break;
3798 case IceType_f32:
3799 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3800 break;
3801 case IceType_f64:
3802 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3803 break;
3804 case IceType_v4i1:
3805 case IceType_v8i1:
3806 case IceType_v16i1:
3807 case IceType_v16i8:
3808 case IceType_v8i16:
3809 case IceType_v4i32:
3810 case IceType_v4f32:
3811 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3812 break;
3813 }
3814 }
3815
3816 // Allow ConstantRelocatable to be left alone as a direct call, but force
3817 // other constants like ConstantInteger32 to be in a register and make it an
3818 // indirect call.
3819 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3820 CallTarget = legalize(CallTarget, Legal_Reg);
3821 }
3822
3823 // Copy arguments to be passed in registers to the appropriate registers.
3824 CfgVector<Variable *> RegArgs;
3825 for (auto &FPArg : FPArgs) {
3826 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3827 }
3828 for (auto &GPRArg : GPRArgs) {
3829 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3830 }
3831
3832 // Generate a FakeUse of register arguments so that they do not get dead code
3833 // eliminated as a result of the FakeKill of scratch registers after the call.
3834 // These fake-uses need to be placed here to avoid argument registers from
3835 // being used during the legalizeToReg() calls above.
3836 for (auto *RegArg : RegArgs) {
3837 Context.insert<InstFakeUse>(RegArg);
3838 }
3839
3840 InstARM32Call *NewCall =
3841 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
3842
3843 if (ReturnRegHi)
3844 Context.insert<InstFakeDef>(ReturnRegHi);
3845
3846 // Insert a register-kill pseudo instruction.
3847 Context.insert<InstFakeKill>(NewCall);
3848
3849 // Generate a FakeUse to keep the call live if necessary.
3850 if (Instr->hasSideEffects() && ReturnReg) {
3851 Context.insert<InstFakeUse>(ReturnReg);
3852 }
3853
3854 if (Dest != nullptr) {
3855 // Assign the result of the call to Dest.
3856 if (ReturnReg != nullptr) {
3857 if (ReturnRegHi) {
3858 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3859 Variable *DestLo = Dest64On32->getLo();
3860 Variable *DestHi = Dest64On32->getHi();
3861 _mov(DestLo, ReturnReg);
3862 _mov(DestHi, ReturnRegHi);
3863 } else {
3864 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3865 _mov(Dest, ReturnReg);
3866 } else {
3867 assert(isIntegerType(Dest->getType()) &&
3868 typeWidthInBytes(Dest->getType()) <= 4);
3869 _mov(Dest, ReturnReg);
3870 }
3871 }
3872 }
3873 }
3874
3875 if (Instr->isTargetHelperCall()) {
3876 auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3877 if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3878 (this->*TargetHelpersPostamble->second)(Instr);
3879 }
3880 }
3881 }
3882
3883 namespace {
configureBitcastTemporary(Variable64On32 * Var)3884 void configureBitcastTemporary(Variable64On32 *Var) {
3885 Var->setMustNotHaveReg();
3886 Var->getHi()->setMustHaveReg();
3887 Var->getLo()->setMustHaveReg();
3888 }
3889 } // end of anonymous namespace
3890
lowerCast(const InstCast * Instr)3891 void TargetARM32::lowerCast(const InstCast *Instr) {
3892 InstCast::OpKind CastKind = Instr->getCastKind();
3893 Variable *Dest = Instr->getDest();
3894 const Type DestTy = Dest->getType();
3895 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3896 switch (CastKind) {
3897 default:
3898 Func->setError("Cast type not supported");
3899 return;
3900 case InstCast::Sext: {
3901 if (isVectorType(DestTy)) {
3902 Variable *T0 = makeReg(DestTy);
3903 Variable *T1 = makeReg(DestTy);
3904 ConstantInteger32 *ShAmt = nullptr;
3905 switch (DestTy) {
3906 default:
3907 llvm::report_fatal_error("Unexpected type in vector sext.");
3908 case IceType_v16i8:
3909 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3910 break;
3911 case IceType_v8i16:
3912 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3913 break;
3914 case IceType_v4i32:
3915 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3916 break;
3917 }
3918 auto *Src0R = legalizeToReg(Src0);
3919 _vshl(T0, Src0R, ShAmt);
3920 _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3921 _mov(Dest, T1);
3922 } else if (DestTy == IceType_i64) {
3923 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3924 Constant *ShiftAmt = Ctx->getConstantInt32(31);
3925 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3926 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3927 Variable *T_Lo = makeReg(DestLo->getType());
3928 if (Src0->getType() == IceType_i32) {
3929 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3930 _mov(T_Lo, Src0RF);
3931 } else if (Src0->getType() != IceType_i1) {
3932 Variable *Src0R = legalizeToReg(Src0);
3933 _sxt(T_Lo, Src0R);
3934 } else {
3935 Operand *_0 = Ctx->getConstantZero(IceType_i32);
3936 Operand *_m1 = Ctx->getConstantInt32(-1);
3937 lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3938 }
3939 _mov(DestLo, T_Lo);
3940 Variable *T_Hi = makeReg(DestHi->getType());
3941 if (Src0->getType() != IceType_i1) {
3942 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3943 OperandARM32::ASR, ShiftAmt));
3944 } else {
3945 // For i1, the asr instruction is already done above.
3946 _mov(T_Hi, T_Lo);
3947 }
3948 _mov(DestHi, T_Hi);
3949 } else if (Src0->getType() != IceType_i1) {
3950 // t1 = sxt src; dst = t1
3951 Variable *Src0R = legalizeToReg(Src0);
3952 Variable *T = makeReg(DestTy);
3953 _sxt(T, Src0R);
3954 _mov(Dest, T);
3955 } else {
3956 Constant *_0 = Ctx->getConstantZero(IceType_i32);
3957 Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3958 Variable *T = makeReg(DestTy);
3959 lowerInt1ForSelect(T, Src0, _m1, _0);
3960 _mov(Dest, T);
3961 }
3962 break;
3963 }
3964 case InstCast::Zext: {
3965 if (isVectorType(DestTy)) {
3966 auto *Mask = makeReg(DestTy);
3967 auto *_1 = Ctx->getConstantInt32(1);
3968 auto *T = makeReg(DestTy);
3969 auto *Src0R = legalizeToReg(Src0);
3970 _mov(Mask, _1);
3971 _vand(T, Src0R, Mask);
3972 _mov(Dest, T);
3973 } else if (DestTy == IceType_i64) {
3974 // t1=uxtb src; dst.lo=t1; dst.hi=0
3975 Operand *_0 =
3976 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3977 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3978 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3979 Variable *T_Lo = makeReg(DestLo->getType());
3980
3981 switch (Src0->getType()) {
3982 default: {
3983 assert(Src0->getType() != IceType_i64);
3984 _uxt(T_Lo, legalizeToReg(Src0));
3985 } break;
3986 case IceType_i32: {
3987 _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3988 } break;
3989 case IceType_i1: {
3990 SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
3991 if (Safe == SBC_No) {
3992 Operand *_1 =
3993 legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3994 _and(T_Lo, T_Lo, _1);
3995 }
3996 } break;
3997 }
3998
3999 _mov(DestLo, T_Lo);
4000
4001 Variable *T_Hi = makeReg(DestLo->getType());
4002 _mov(T_Hi, _0);
4003 _mov(DestHi, T_Hi);
4004 } else if (Src0->getType() == IceType_i1) {
4005 Variable *T = makeReg(DestTy);
4006
4007 SafeBoolChain Safe = lowerInt1(T, Src0);
4008 if (Safe == SBC_No) {
4009 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
4010 _and(T, T, _1);
4011 }
4012
4013 _mov(Dest, T);
4014 } else {
4015 // t1 = uxt src; dst = t1
4016 Variable *Src0R = legalizeToReg(Src0);
4017 Variable *T = makeReg(DestTy);
4018 _uxt(T, Src0R);
4019 _mov(Dest, T);
4020 }
4021 break;
4022 }
4023 case InstCast::Trunc: {
4024 if (isVectorType(DestTy)) {
4025 auto *T = makeReg(DestTy);
4026 auto *Src0R = legalizeToReg(Src0);
4027 _mov(T, Src0R);
4028 _mov(Dest, T);
4029 } else {
4030 if (Src0->getType() == IceType_i64)
4031 Src0 = loOperand(Src0);
4032 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
4033 // t1 = trunc Src0RF; Dest = t1
4034 Variable *T = makeReg(DestTy);
4035 _mov(T, Src0RF);
4036 if (DestTy == IceType_i1)
4037 _and(T, T, Ctx->getConstantInt1(1));
4038 _mov(Dest, T);
4039 }
4040 break;
4041 }
4042 case InstCast::Fptrunc:
4043 case InstCast::Fpext: {
4044 // fptrunc: dest.f32 = fptrunc src0.fp64
4045 // fpext: dest.f64 = fptrunc src0.fp32
4046 const bool IsTrunc = CastKind == InstCast::Fptrunc;
4047 assert(!isVectorType(DestTy));
4048 assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
4049 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
4050 Variable *Src0R = legalizeToReg(Src0);
4051 Variable *T = makeReg(DestTy);
4052 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
4053 _mov(Dest, T);
4054 break;
4055 }
4056 case InstCast::Fptosi:
4057 case InstCast::Fptoui: {
4058 const bool DestIsSigned = CastKind == InstCast::Fptosi;
4059 Variable *Src0R = legalizeToReg(Src0);
4060
4061 if (isVectorType(DestTy)) {
4062 assert(typeElementType(Src0->getType()) == IceType_f32);
4063 auto *T = makeReg(DestTy);
4064 _vcvt(T, Src0R,
4065 DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
4066 _mov(Dest, T);
4067 break;
4068 }
4069
4070 const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
4071 if (llvm::isa<Variable64On32>(Dest)) {
4072 llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
4073 }
4074 // fptosi:
4075 // t1.fp = vcvt src0.fp
4076 // t2.i32 = vmov t1.fp
4077 // dest.int = conv t2.i32 @ Truncates the result if needed.
4078 // fptoui:
4079 // t1.fp = vcvt src0.fp
4080 // t2.u32 = vmov t1.fp
4081 // dest.uint = conv t2.u32 @ Truncates the result if needed.
4082 Variable *T_fp = makeReg(IceType_f32);
4083 const InstARM32Vcvt::VcvtVariant Conversion =
4084 Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
4085 : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
4086 _vcvt(T_fp, Src0R, Conversion);
4087 Variable *T = makeReg(IceType_i32);
4088 _mov(T, T_fp);
4089 if (DestTy != IceType_i32) {
4090 Variable *T_1 = makeReg(DestTy);
4091 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
4092 T = T_1;
4093 }
4094 _mov(Dest, T);
4095 break;
4096 }
4097 case InstCast::Sitofp:
4098 case InstCast::Uitofp: {
4099 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
4100
4101 if (isVectorType(DestTy)) {
4102 assert(typeElementType(DestTy) == IceType_f32);
4103 auto *T = makeReg(DestTy);
4104 Variable *Src0R = legalizeToReg(Src0);
4105 _vcvt(T, Src0R,
4106 SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
4107 _mov(Dest, T);
4108 break;
4109 }
4110
4111 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
4112 if (Src0->getType() == IceType_i64) {
4113 llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
4114 }
4115 // sitofp:
4116 // t1.i32 = sext src.int @ sign-extends src0 if needed.
4117 // t2.fp32 = vmov t1.i32
4118 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
4119 // uitofp:
4120 // t1.i32 = zext src.int @ zero-extends src0 if needed.
4121 // t2.fp32 = vmov t1.i32
4122 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
4123 if (Src0->getType() != IceType_i32) {
4124 Variable *Src0R_32 = makeReg(IceType_i32);
4125 lowerCast(InstCast::create(
4126 Func, SourceIsSigned ? InstCast::Sext : InstCast::Zext, Src0R_32,
4127 Src0));
4128 Src0 = Src0R_32;
4129 }
4130 Variable *Src0R = legalizeToReg(Src0);
4131 Variable *Src0R_f32 = makeReg(IceType_f32);
4132 _mov(Src0R_f32, Src0R);
4133 Src0R = Src0R_f32;
4134 Variable *T = makeReg(DestTy);
4135 const InstARM32Vcvt::VcvtVariant Conversion =
4136 DestIsF32
4137 ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
4138 : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
4139 _vcvt(T, Src0R, Conversion);
4140 _mov(Dest, T);
4141 break;
4142 }
4143 case InstCast::Bitcast: {
4144 Operand *Src0 = Instr->getSrc(0);
4145 if (DestTy == Src0->getType()) {
4146 auto *Assign = InstAssign::create(Func, Dest, Src0);
4147 lowerAssign(Assign);
4148 return;
4149 }
4150 switch (DestTy) {
4151 case IceType_NUM:
4152 case IceType_void:
4153 llvm::report_fatal_error("Unexpected bitcast.");
4154 case IceType_i1:
4155 UnimplementedLoweringError(this, Instr);
4156 break;
4157 case IceType_i8:
4158 assert(Src0->getType() == IceType_v8i1);
4159 llvm::report_fatal_error(
4160 "i8 to v8i1 conversion should have been prelowered.");
4161 break;
4162 case IceType_i16:
4163 assert(Src0->getType() == IceType_v16i1);
4164 llvm::report_fatal_error(
4165 "i16 to v16i1 conversion should have been prelowered.");
4166 break;
4167 case IceType_i32:
4168 case IceType_f32: {
4169 Variable *Src0R = legalizeToReg(Src0);
4170 Variable *T = makeReg(DestTy);
4171 _mov(T, Src0R);
4172 lowerAssign(InstAssign::create(Func, Dest, T));
4173 break;
4174 }
4175 case IceType_i64: {
4176 // t0, t1 <- src0
4177 // dest[31..0] = t0
4178 // dest[63..32] = t1
4179 assert(Src0->getType() == IceType_f64);
4180 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4181 T->initHiLo(Func);
4182 configureBitcastTemporary(T);
4183 Variable *Src0R = legalizeToReg(Src0);
4184 _mov(T, Src0R);
4185 Context.insert<InstFakeUse>(T->getHi());
4186 Context.insert<InstFakeUse>(T->getLo());
4187 lowerAssign(InstAssign::create(Func, Dest, T));
4188 break;
4189 }
4190 case IceType_f64: {
4191 // T0 <- lo(src)
4192 // T1 <- hi(src)
4193 // vmov T2, T0, T1
4194 // Dest <- T2
4195 assert(Src0->getType() == IceType_i64);
4196 Variable *T = makeReg(DestTy);
4197 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4198 Src64->initHiLo(Func);
4199 configureBitcastTemporary(Src64);
4200 lowerAssign(InstAssign::create(Func, Src64, Src0));
4201 _mov(T, Src64);
4202 lowerAssign(InstAssign::create(Func, Dest, T));
4203 break;
4204 }
4205 case IceType_v8i1:
4206 assert(Src0->getType() == IceType_i8);
4207 llvm::report_fatal_error(
4208 "v8i1 to i8 conversion should have been prelowered.");
4209 break;
4210 case IceType_v16i1:
4211 assert(Src0->getType() == IceType_i16);
4212 llvm::report_fatal_error(
4213 "v16i1 to i16 conversion should have been prelowered.");
4214 break;
4215 case IceType_v4i1:
4216 case IceType_v8i16:
4217 case IceType_v16i8:
4218 case IceType_v4f32:
4219 case IceType_v4i32: {
4220 assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4221 assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4222 Variable *T = makeReg(DestTy);
4223 _mov(T, Src0);
4224 _mov(Dest, T);
4225 break;
4226 }
4227 }
4228 break;
4229 }
4230 }
4231 }
4232
lowerExtractElement(const InstExtractElement * Instr)4233 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4234 Variable *Dest = Instr->getDest();
4235 Type DestTy = Dest->getType();
4236
4237 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4238 Operand *Src1 = Instr->getSrc(1);
4239
4240 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4241 const uint32_t Index = Imm->getValue();
4242 Variable *T = makeReg(DestTy);
4243 Variable *TSrc0 = makeReg(Src0->getType());
4244
4245 if (isFloatingType(DestTy)) {
4246 // We need to make sure the source is in a suitable register.
4247 TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4248 }
4249
4250 _mov(TSrc0, Src0);
4251 _extractelement(T, TSrc0, Index);
4252 _mov(Dest, T);
4253 return;
4254 }
4255 assert(false && "extractelement requires a constant index");
4256 }
4257
4258 namespace {
4259 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4260 // (and naming).
4261 enum {
4262 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4263 FCMPARM32_TABLE
4264 #undef X
4265 _fcmp_ll_NUM
4266 };
4267
4268 enum {
4269 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4270 ICEINSTFCMP_TABLE
4271 #undef X
4272 _fcmp_hl_NUM
4273 };
4274
4275 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4276 "Inconsistency between high-level and low-level fcmp tags.");
4277 #define X(tag, str) \
4278 static_assert( \
4279 (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag, \
4280 "Inconsistency between high-level and low-level fcmp tag " #tag);
4281 ICEINSTFCMP_TABLE
4282 #undef X
4283
4284 struct {
4285 CondARM32::Cond CC0;
4286 CondARM32::Cond CC1;
4287 } TableFcmp[] = {
4288 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4289 {CondARM32::CC0, CondARM32::CC1},
4290 FCMPARM32_TABLE
4291 #undef X
4292 };
4293
isFloatingPointZero(const Operand * Src)4294 bool isFloatingPointZero(const Operand *Src) {
4295 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4296 return Utils::isPositiveZero(F32->getValue());
4297 }
4298
4299 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4300 return Utils::isPositiveZero(F64->getValue());
4301 }
4302
4303 return false;
4304 }
4305 } // end of anonymous namespace
4306
lowerFcmpCond(const InstFcmp * Instr)4307 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4308 InstFcmp::FCond Condition = Instr->getCondition();
4309 switch (Condition) {
4310 case InstFcmp::False:
4311 return CondWhenTrue(CondARM32::kNone);
4312 case InstFcmp::True:
4313 return CondWhenTrue(CondARM32::AL);
4314 break;
4315 default: {
4316 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4317 Operand *Src1 = Instr->getSrc(1);
4318 if (isFloatingPointZero(Src1)) {
4319 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4320 } else {
4321 _vcmp(Src0R, legalizeToReg(Src1));
4322 }
4323 _vmrs();
4324 assert(Condition < llvm::array_lengthof(TableFcmp));
4325 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4326 }
4327 }
4328 }
4329
lowerFcmp(const InstFcmp * Instr)4330 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4331 Variable *Dest = Instr->getDest();
4332 const Type DestTy = Dest->getType();
4333
4334 if (isVectorType(DestTy)) {
4335 if (Instr->getCondition() == InstFcmp::False) {
4336 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4337 auto *T = makeReg(SafeTypeForMovingConstant);
4338 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4339 _mov(Dest, T);
4340 return;
4341 }
4342
4343 if (Instr->getCondition() == InstFcmp::True) {
4344 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4345 auto *T = makeReg(SafeTypeForMovingConstant);
4346 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4347 _mov(Dest, T);
4348 return;
4349 }
4350
4351 Variable *T0;
4352 Variable *T1;
4353 bool Negate = false;
4354 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4355 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4356
4357 switch (Instr->getCondition()) {
4358 default:
4359 llvm::report_fatal_error("Unhandled fp comparison.");
4360 #define _Vcnone(Tptr, S0, S1) \
4361 do { \
4362 *(Tptr) = nullptr; \
4363 } while (0)
4364 #define _Vceq(Tptr, S0, S1) \
4365 do { \
4366 *(Tptr) = makeReg(DestTy); \
4367 _vceq(*(Tptr), S0, S1); \
4368 } while (0)
4369 #define _Vcge(Tptr, S0, S1) \
4370 do { \
4371 *(Tptr) = makeReg(DestTy); \
4372 _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4373 } while (0)
4374 #define _Vcgt(Tptr, S0, S1) \
4375 do { \
4376 *(Tptr) = makeReg(DestTy); \
4377 _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4378 } while (0)
4379 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4380 case InstFcmp::val: { \
4381 _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1); \
4382 _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0); \
4383 Negate = NEG_V; \
4384 } break;
4385 FCMPARM32_TABLE
4386 #undef X
4387 #undef _Vcgt
4388 #undef _Vcge
4389 #undef _Vceq
4390 #undef _Vcnone
4391 }
4392 assert(T0 != nullptr);
4393 Variable *T = T0;
4394 if (T1 != nullptr) {
4395 T = makeReg(DestTy);
4396 _vorr(T, T0, T1);
4397 }
4398
4399 if (Negate) {
4400 auto *TNeg = makeReg(DestTy);
4401 _vmvn(TNeg, T);
4402 T = TNeg;
4403 }
4404
4405 _mov(Dest, T);
4406 return;
4407 }
4408
4409 Variable *T = makeReg(IceType_i1);
4410 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4411 Operand *_0 =
4412 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4413
4414 CondWhenTrue Cond = lowerFcmpCond(Instr);
4415
4416 bool RedefineT = false;
4417 if (Cond.WhenTrue0 != CondARM32::AL) {
4418 _mov(T, _0);
4419 RedefineT = true;
4420 }
4421
4422 if (Cond.WhenTrue0 == CondARM32::kNone) {
4423 _mov(Dest, T);
4424 return;
4425 }
4426
4427 if (RedefineT) {
4428 _mov_redefined(T, _1, Cond.WhenTrue0);
4429 } else {
4430 _mov(T, _1, Cond.WhenTrue0);
4431 }
4432
4433 if (Cond.WhenTrue1 != CondARM32::kNone) {
4434 _mov_redefined(T, _1, Cond.WhenTrue1);
4435 }
4436
4437 _mov(Dest, T);
4438 }
4439
4440 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4441 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4442 Operand *Src1) {
4443 assert(Condition < llvm::array_lengthof(TableIcmp64));
4444
4445 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4446 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4447 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4448 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4449
4450 if (SrcsLo.hasConstOperand()) {
4451 const uint32_t ValueLo = SrcsLo.getConstantValue();
4452 const uint32_t ValueHi = SrcsHi.getConstantValue();
4453 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4454 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4455 Value == 0) {
4456 Variable *T = makeReg(IceType_i32);
4457 Variable *Src0LoR = SrcsLo.src0R(this);
4458 Variable *Src0HiR = SrcsHi.src0R(this);
4459 _orrs(T, Src0LoR, Src0HiR);
4460 Context.insert<InstFakeUse>(T);
4461 return CondWhenTrue(TableIcmp64[Condition].C1);
4462 }
4463
4464 Variable *Src0RLo = SrcsLo.src0R(this);
4465 Variable *Src0RHi = SrcsHi.src0R(this);
4466 Operand *Src1RFLo = SrcsLo.src1RF(this);
4467 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4468
4469 const bool UseRsb =
4470 TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4471
4472 if (UseRsb) {
4473 if (TableIcmp64[Condition].IsSigned) {
4474 Variable *T = makeReg(IceType_i32);
4475 _rsbs(T, Src0RLo, Src1RFLo);
4476 Context.insert<InstFakeUse>(T);
4477
4478 T = makeReg(IceType_i32);
4479 _rscs(T, Src0RHi, Src1RFHi);
4480 // We need to add a FakeUse here because liveness gets mad at us (Def
4481 // without Use.) Note that flag-setting instructions are considered to
4482 // have side effects and, therefore, are not DCE'ed.
4483 Context.insert<InstFakeUse>(T);
4484 } else {
4485 Variable *T = makeReg(IceType_i32);
4486 _rsbs(T, Src0RHi, Src1RFHi);
4487 Context.insert<InstFakeUse>(T);
4488
4489 T = makeReg(IceType_i32);
4490 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4491 Context.insert<InstFakeUse>(T);
4492 }
4493 } else {
4494 if (TableIcmp64[Condition].IsSigned) {
4495 _cmp(Src0RLo, Src1RFLo);
4496 Variable *T = makeReg(IceType_i32);
4497 _sbcs(T, Src0RHi, Src1RFHi);
4498 Context.insert<InstFakeUse>(T);
4499 } else {
4500 _cmp(Src0RHi, Src1RFHi);
4501 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4502 }
4503 }
4504
4505 return CondWhenTrue(TableIcmp64[Condition].C1);
4506 }
4507
4508 Variable *Src0RLo, *Src0RHi;
4509 Operand *Src1RFLo, *Src1RFHi;
4510 if (TableIcmp64[Condition].Swapped) {
4511 Src0RLo = legalizeToReg(loOperand(Src1));
4512 Src0RHi = legalizeToReg(hiOperand(Src1));
4513 Src1RFLo = legalizeToReg(loOperand(Src0));
4514 Src1RFHi = legalizeToReg(hiOperand(Src0));
4515 } else {
4516 Src0RLo = legalizeToReg(loOperand(Src0));
4517 Src0RHi = legalizeToReg(hiOperand(Src0));
4518 Src1RFLo = legalizeToReg(loOperand(Src1));
4519 Src1RFHi = legalizeToReg(hiOperand(Src1));
4520 }
4521
4522 // a=icmp cond, b, c ==>
4523 // GCC does:
4524 // cmp b.hi, c.hi or cmp b.lo, c.lo
4525 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
4526 // mov.<C1> t, #1 mov.<C1> t, #1
4527 // mov.<C2> t, #0 mov.<C2> t, #0
4528 // mov a, t mov a, t
4529 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4530 // is used for signed compares. In some cases, b and c need to be swapped as
4531 // well.
4532 //
4533 // LLVM does:
4534 // for EQ and NE:
4535 // eor t1, b.hi, c.hi
4536 // eor t2, b.lo, c.hi
4537 // orrs t, t1, t2
4538 // mov.<C> t, #1
4539 // mov a, t
4540 //
4541 // that's nice in that it's just as short but has fewer dependencies for
4542 // better ILP at the cost of more registers.
4543 //
4544 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4545 // unconditional mov #0, two cmps, two conditional mov #1, and one
4546 // conditional reg mov. That has few dependencies for good ILP, but is a
4547 // longer sequence.
4548 //
4549 // So, we are going with the GCC version since it's usually better (except
4550 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4551 if (TableIcmp64[Condition].IsSigned) {
4552 Variable *ScratchReg = makeReg(IceType_i32);
4553 _cmp(Src0RLo, Src1RFLo);
4554 _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4555 // ScratchReg isn't going to be used, but we need the side-effect of
4556 // setting flags from this operation.
4557 Context.insert<InstFakeUse>(ScratchReg);
4558 } else {
4559 _cmp(Src0RHi, Src1RFHi);
4560 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4561 }
4562 return CondWhenTrue(TableIcmp64[Condition].C1);
4563 }
4564
4565 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4566 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4567 Operand *Src1) {
4568 Int32Operands Srcs(Src0, Src1);
4569 if (!Srcs.hasConstOperand()) {
4570
4571 Variable *Src0R = Srcs.src0R(this);
4572 Operand *Src1RF = Srcs.src1RF(this);
4573 _cmp(Src0R, Src1RF);
4574 return CondWhenTrue(getIcmp32Mapping(Condition));
4575 }
4576
4577 Variable *Src0R = Srcs.src0R(this);
4578 const int32_t Value = Srcs.getConstantValue();
4579 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4580 _tst(Src0R, Src0R);
4581 return CondWhenTrue(getIcmp32Mapping(Condition));
4582 }
4583
4584 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4585 Srcs.negatedImmediateIsFlexEncodable()) {
4586 Operand *Src1F = Srcs.negatedSrc1F(this);
4587 _cmn(Src0R, Src1F);
4588 return CondWhenTrue(getIcmp32Mapping(Condition));
4589 }
4590
4591 Operand *Src1RF = Srcs.src1RF(this);
4592 if (!Srcs.swappedOperands()) {
4593 _cmp(Src0R, Src1RF);
4594 } else {
4595 Variable *T = makeReg(IceType_i32);
4596 _rsbs(T, Src0R, Src1RF);
4597 Context.insert<InstFakeUse>(T);
4598 }
4599 return CondWhenTrue(getIcmp32Mapping(Condition));
4600 }
4601
4602 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4603 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4604 Operand *Src1) {
4605 Int32Operands Srcs(Src0, Src1);
4606 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4607 assert(ShAmt >= 0);
4608
4609 if (!Srcs.hasConstOperand()) {
4610 Variable *Src0R = makeReg(IceType_i32);
4611 Operand *ShAmtImm = shAmtImm(ShAmt);
4612 _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4613
4614 Variable *Src1R = legalizeToReg(Src1);
4615 auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4616 OperandARM32::LSL, ShAmtImm);
4617 _cmp(Src0R, Src1F);
4618 return CondWhenTrue(getIcmp32Mapping(Condition));
4619 }
4620
4621 const int32_t Value = Srcs.getConstantValue();
4622 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4623 Operand *ShAmtImm = shAmtImm(ShAmt);
4624 Variable *T = makeReg(IceType_i32);
4625 _lsls(T, Srcs.src0R(this), ShAmtImm);
4626 Context.insert<InstFakeUse>(T);
4627 return CondWhenTrue(getIcmp32Mapping(Condition));
4628 }
4629
4630 Variable *ConstR = makeReg(IceType_i32);
4631 _mov(ConstR,
4632 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4633 Operand *NonConstF = OperandARM32FlexReg::create(
4634 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4635 Ctx->getConstantInt32(ShAmt));
4636
4637 if (Srcs.swappedOperands()) {
4638 _cmp(ConstR, NonConstF);
4639 } else {
4640 Variable *T = makeReg(IceType_i32);
4641 _rsbs(T, ConstR, NonConstF);
4642 Context.insert<InstFakeUse>(T);
4643 }
4644 return CondWhenTrue(getIcmp32Mapping(Condition));
4645 }
4646
lowerIcmpCond(const InstIcmp * Instr)4647 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4648 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4649 Instr->getSrc(1));
4650 }
4651
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4652 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4653 Operand *Src0,
4654 Operand *Src1) {
4655 Src0 = legalizeUndef(Src0);
4656 Src1 = legalizeUndef(Src1);
4657
4658 // a=icmp cond b, c ==>
4659 // GCC does:
4660 // <u/s>xtb tb, b
4661 // <u/s>xtb tc, c
4662 // cmp tb, tc
4663 // mov.C1 t, #0
4664 // mov.C2 t, #1
4665 // mov a, t
4666 // where the unsigned/sign extension is not needed for 32-bit. They also have
4667 // special cases for EQ and NE. E.g., for NE:
4668 // <extend to tb, tc>
4669 // subs t, tb, tc
4670 // movne t, #1
4671 // mov a, t
4672 //
4673 // LLVM does:
4674 // lsl tb, b, #<N>
4675 // mov t, #0
4676 // cmp tb, c, lsl #<N>
4677 // mov.<C> t, #1
4678 // mov a, t
4679 //
4680 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4681 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4682 // the unsigned case, for some reason it does similar to GCC and does a uxtb
4683 // first. It's not clear to me why that special-casing is needed.
4684 //
4685 // We'll go with the LLVM way for now, since it's shorter and has just as few
4686 // dependencies.
4687 switch (Src0->getType()) {
4688 default:
4689 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4690 case IceType_i1:
4691 case IceType_i8:
4692 case IceType_i16:
4693 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4694 case IceType_i32:
4695 return lowerInt32IcmpCond(Condition, Src0, Src1);
4696 case IceType_i64:
4697 return lowerInt64IcmpCond(Condition, Src0, Src1);
4698 }
4699 }
4700
lowerIcmp(const InstIcmp * Instr)4701 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4702 Variable *Dest = Instr->getDest();
4703 const Type DestTy = Dest->getType();
4704
4705 if (isVectorType(DestTy)) {
4706 auto *T = makeReg(DestTy);
4707 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4708 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4709 const Type SrcTy = Src0->getType();
4710
4711 bool NeedsShl = false;
4712 Type NewTypeAfterShl;
4713 SizeT ShAmt;
4714 switch (SrcTy) {
4715 default:
4716 break;
4717 case IceType_v16i1:
4718 NeedsShl = true;
4719 NewTypeAfterShl = IceType_v16i8;
4720 ShAmt = 7;
4721 break;
4722 case IceType_v8i1:
4723 NeedsShl = true;
4724 NewTypeAfterShl = IceType_v8i16;
4725 ShAmt = 15;
4726 break;
4727 case IceType_v4i1:
4728 NeedsShl = true;
4729 NewTypeAfterShl = IceType_v4i32;
4730 ShAmt = 31;
4731 break;
4732 }
4733
4734 if (NeedsShl) {
4735 auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4736 auto *Src0T = makeReg(NewTypeAfterShl);
4737 auto *Src0Shl = makeReg(NewTypeAfterShl);
4738 _mov(Src0T, Src0);
4739 _vshl(Src0Shl, Src0T, Imm);
4740 Src0 = Src0Shl;
4741
4742 auto *Src1T = makeReg(NewTypeAfterShl);
4743 auto *Src1Shl = makeReg(NewTypeAfterShl);
4744 _mov(Src1T, Src1);
4745 _vshl(Src1Shl, Src1T, Imm);
4746 Src1 = Src1Shl;
4747 }
4748
4749 switch (Instr->getCondition()) {
4750 default:
4751 llvm::report_fatal_error("Unhandled integer comparison.");
4752 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4753 #define _Vcge(T, S0, S1, Signed) \
4754 _vcge(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed \
4755 : InstARM32::FS_Unsigned)
4756 #define _Vcgt(T, S0, S1, Signed) \
4757 _vcgt(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed \
4758 : InstARM32::FS_Unsigned)
4759 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
4760 case InstIcmp::val: { \
4761 _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed); \
4762 if (NEG_V) { \
4763 auto *TInv = makeReg(DestTy); \
4764 _vmvn(TInv, T); \
4765 T = TInv; \
4766 } \
4767 } break;
4768 ICMPARM32_TABLE
4769 #undef X
4770 #undef _Vcgt
4771 #undef _Vcge
4772 #undef _Vceq
4773 }
4774 _mov(Dest, T);
4775 return;
4776 }
4777
4778 Operand *_0 =
4779 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4780 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4781 Variable *T = makeReg(IceType_i1);
4782
4783 _mov(T, _0);
4784 CondWhenTrue Cond = lowerIcmpCond(Instr);
4785 _mov_redefined(T, _1, Cond.WhenTrue0);
4786 _mov(Dest, T);
4787
4788 assert(Cond.WhenTrue1 == CondARM32::kNone);
4789
4790 return;
4791 }
4792
lowerInsertElement(const InstInsertElement * Instr)4793 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4794 Variable *Dest = Instr->getDest();
4795 Type DestTy = Dest->getType();
4796
4797 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4798 Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4799 Operand *Src2 = Instr->getSrc(2);
4800
4801 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4802 const uint32_t Index = Imm->getValue();
4803 Variable *T = makeReg(DestTy);
4804
4805 if (isFloatingType(DestTy)) {
4806 T->setRegClass(RegARM32::RCARM32_QtoS);
4807 }
4808
4809 _mov(T, Src0);
4810 _insertelement(T, Src1, Index);
4811 _set_dest_redefined();
4812 _mov(Dest, T);
4813 return;
4814 }
4815 assert(false && "insertelement requires a constant index");
4816 }
4817
4818 namespace {
getConstantMemoryOrder(Operand * Opnd)4819 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4820 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4821 return Integer->getValue();
4822 return Intrinsics::MemoryOrderInvalid;
4823 }
4824 } // end of anonymous namespace
4825
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4826 void TargetARM32::lowerLoadLinkedStoreExclusive(
4827 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4828 CondARM32::Cond Cond) {
4829
4830 auto *Retry = Context.insert<InstARM32Label>(this);
4831
4832 { // scoping for loop highlighting.
4833 Variable *Success = makeReg(IceType_i32);
4834 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4835 auto *_0 = Ctx->getConstantZero(IceType_i32);
4836
4837 Context.insert<InstFakeDef>(Tmp);
4838 Context.insert<InstFakeUse>(Tmp);
4839 Variable *AddrR = legalizeToReg(Addr);
4840 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4841 auto *StoreValue = Operation(Tmp);
4842 assert(StoreValue->mustHaveReg());
4843 // strex requires Dest to be a register other than Value or Addr. This
4844 // restriction is cleanly represented by adding an "early" definition of
4845 // Dest (or a latter use of all the sources.)
4846 Context.insert<InstFakeDef>(Success);
4847 if (Cond != CondARM32::AL) {
4848 _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4849 InstARM32::getOppositeCondition(Cond));
4850 }
4851 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4852 ->setDestRedefined();
4853 _cmp(Success, _0);
4854 }
4855
4856 _br(Retry, CondARM32::NE);
4857 }
4858
4859 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4860 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4861 Variable *Src0, Operand *Src1) {
4862 InstArithmetic::OpKind Oper;
4863 switch (Operation) {
4864 default:
4865 llvm::report_fatal_error("Unknown AtomicRMW operation");
4866 case Intrinsics::AtomicExchange:
4867 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4868 case Intrinsics::AtomicAdd:
4869 Oper = InstArithmetic::Add;
4870 break;
4871 case Intrinsics::AtomicAnd:
4872 Oper = InstArithmetic::And;
4873 break;
4874 case Intrinsics::AtomicSub:
4875 Oper = InstArithmetic::Sub;
4876 break;
4877 case Intrinsics::AtomicOr:
4878 Oper = InstArithmetic::Or;
4879 break;
4880 case Intrinsics::AtomicXor:
4881 Oper = InstArithmetic::Xor;
4882 break;
4883 }
4884 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4885 }
4886 } // end of anonymous namespace
4887
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4888 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4889 Operand *Addr, Operand *Val) {
4890 // retry:
4891 // ldrex tmp, [addr]
4892 // mov contents, tmp
4893 // op result, contents, Val
4894 // strex success, result, [addr]
4895 // cmp success, 0
4896 // jne retry
4897 // fake-use(addr, operand) @ prevents undesirable clobbering.
4898 // mov dest, contents
4899 auto DestTy = Dest->getType();
4900
4901 if (DestTy == IceType_i64) {
4902 lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4903 return;
4904 }
4905
4906 Operand *ValRF = nullptr;
4907 if (llvm::isa<ConstantInteger32>(Val)) {
4908 ValRF = Val;
4909 } else {
4910 ValRF = legalizeToReg(Val);
4911 }
4912 auto *ContentsR = makeReg(DestTy);
4913 auto *ResultR = makeReg(DestTy);
4914
4915 _dmb();
4916 lowerLoadLinkedStoreExclusive(
4917 DestTy, Addr,
4918 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4919 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4920 if (Operation == Intrinsics::AtomicExchange) {
4921 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4922 } else {
4923 lowerArithmetic(
4924 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4925 }
4926 return ResultR;
4927 });
4928 _dmb();
4929 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4930 Context.insert<InstFakeUse>(ValR);
4931 }
4932 // Can't dce ContentsR.
4933 Context.insert<InstFakeUse>(ContentsR);
4934 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4935 }
4936
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4937 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4938 Operand *Addr, Operand *Val) {
4939 assert(Dest->getType() == IceType_i64);
4940
4941 auto *ResultR = makeI64RegPair();
4942
4943 Context.insert<InstFakeDef>(ResultR);
4944
4945 Operand *ValRF = nullptr;
4946 if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4947 ValRF = Val;
4948 } else {
4949 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4950 ValR64->initHiLo(Func);
4951 ValR64->setMustNotHaveReg();
4952 ValR64->getLo()->setMustHaveReg();
4953 ValR64->getHi()->setMustHaveReg();
4954 lowerAssign(InstAssign::create(Func, ValR64, Val));
4955 ValRF = ValR64;
4956 }
4957
4958 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4959 ContentsR->initHiLo(Func);
4960 ContentsR->setMustNotHaveReg();
4961 ContentsR->getLo()->setMustHaveReg();
4962 ContentsR->getHi()->setMustHaveReg();
4963
4964 _dmb();
4965 lowerLoadLinkedStoreExclusive(
4966 IceType_i64, Addr,
4967 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4968 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4969 Context.insert<InstFakeUse>(Tmp);
4970 if (Operation == Intrinsics::AtomicExchange) {
4971 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4972 } else {
4973 lowerArithmetic(
4974 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4975 }
4976 Context.insert<InstFakeUse>(ResultR->getHi());
4977 Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4978 ->setDestRedefined();
4979 return ResultR;
4980 });
4981 _dmb();
4982 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4983 Context.insert<InstFakeUse>(ValR64->getLo());
4984 Context.insert<InstFakeUse>(ValR64->getHi());
4985 }
4986 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4987 }
4988
postambleCtpop64(const InstCall * Instr)4989 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
4990 Operand *Arg0 = Instr->getArg(0);
4991 if (isInt32Asserting32Or64(Arg0->getType())) {
4992 return;
4993 }
4994 // The popcount helpers always return 32-bit values, while the intrinsic's
4995 // signature matches some 64-bit platform's native instructions and expect to
4996 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
4997 // user doesn't do that in the IR or doesn't toss the bits via truncate.
4998 auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
4999 Variable *T = makeReg(IceType_i32);
5000 Operand *_0 =
5001 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5002 _mov(T, _0);
5003 _mov(DestHi, T);
5004 }
5005
lowerIntrinsic(const InstIntrinsic * Instr)5006 void TargetARM32::lowerIntrinsic(const InstIntrinsic *Instr) {
5007 Variable *Dest = Instr->getDest();
5008 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
5009 Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
5010 switch (ID) {
5011 case Intrinsics::AtomicFence:
5012 case Intrinsics::AtomicFenceAll:
5013 assert(Dest == nullptr);
5014 _dmb();
5015 return;
5016 case Intrinsics::AtomicIsLockFree: {
5017 Operand *ByteSize = Instr->getArg(0);
5018 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
5019 if (CI == nullptr) {
5020 // The PNaCl ABI requires the byte size to be a compile-time constant.
5021 Func->setError("AtomicIsLockFree byte size should be compile-time const");
5022 return;
5023 }
5024 static constexpr int32_t NotLockFree = 0;
5025 static constexpr int32_t LockFree = 1;
5026 int32_t Result = NotLockFree;
5027 switch (CI->getValue()) {
5028 case 1:
5029 case 2:
5030 case 4:
5031 case 8:
5032 Result = LockFree;
5033 break;
5034 }
5035 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
5036 return;
5037 }
5038 case Intrinsics::AtomicLoad: {
5039 assert(isScalarIntegerType(DestTy));
5040 // We require the memory address to be naturally aligned. Given that is the
5041 // case, then normal loads are atomic.
5042 if (!Intrinsics::isMemoryOrderValid(
5043 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
5044 Func->setError("Unexpected memory ordering for AtomicLoad");
5045 return;
5046 }
5047 Variable *T;
5048
5049 if (DestTy == IceType_i64) {
5050 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
5051 // integer atomically. Everything else works with a regular ldr.
5052 T = makeI64RegPair();
5053 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
5054 } else {
5055 T = makeReg(DestTy);
5056 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
5057 }
5058 _dmb();
5059 lowerAssign(InstAssign::create(Func, Dest, T));
5060 // Adding a fake-use T to ensure the atomic load is not removed if Dest is
5061 // unused.
5062 Context.insert<InstFakeUse>(T);
5063 return;
5064 }
5065 case Intrinsics::AtomicStore: {
5066 // We require the memory address to be naturally aligned. Given that is the
5067 // case, then normal loads are atomic.
5068 if (!Intrinsics::isMemoryOrderValid(
5069 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
5070 Func->setError("Unexpected memory ordering for AtomicStore");
5071 return;
5072 }
5073
5074 auto *Value = Instr->getArg(0);
5075 if (Value->getType() == IceType_i64) {
5076 auto *ValueR = makeI64RegPair();
5077 Context.insert<InstFakeDef>(ValueR);
5078 lowerAssign(InstAssign::create(Func, ValueR, Value));
5079 _dmb();
5080 lowerLoadLinkedStoreExclusive(
5081 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
5082 // The following fake-use prevents the ldrex instruction from being
5083 // dead code eliminated.
5084 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
5085 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
5086 Context.insert<InstFakeUse>(Tmp);
5087 return ValueR;
5088 });
5089 Context.insert<InstFakeUse>(ValueR);
5090 _dmb();
5091 return;
5092 }
5093
5094 auto *ValueR = legalizeToReg(Instr->getArg(0));
5095 const auto ValueTy = ValueR->getType();
5096 assert(isScalarIntegerType(ValueTy));
5097 auto *Addr = legalizeToReg(Instr->getArg(1));
5098
5099 // non-64-bit stores are atomically as long as the address is aligned. This
5100 // is PNaCl, so addresses are aligned.
5101 _dmb();
5102 _str(ValueR, formMemoryOperand(Addr, ValueTy));
5103 _dmb();
5104 return;
5105 }
5106 case Intrinsics::AtomicCmpxchg: {
5107 // retry:
5108 // ldrex tmp, [addr]
5109 // cmp tmp, expected
5110 // mov expected, tmp
5111 // strexeq success, new, [addr]
5112 // cmpeq success, #0
5113 // bne retry
5114 // mov dest, expected
5115 assert(isScalarIntegerType(DestTy));
5116 // We require the memory address to be naturally aligned. Given that is the
5117 // case, then normal loads are atomic.
5118 if (!Intrinsics::isMemoryOrderValid(
5119 ID, getConstantMemoryOrder(Instr->getArg(3)),
5120 getConstantMemoryOrder(Instr->getArg(4)))) {
5121 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
5122 return;
5123 }
5124
5125 if (DestTy == IceType_i64) {
5126 Variable *LoadedValue = nullptr;
5127
5128 auto *New = makeI64RegPair();
5129 Context.insert<InstFakeDef>(New);
5130 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
5131
5132 auto *Expected = makeI64RegPair();
5133 Context.insert<InstFakeDef>(Expected);
5134 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
5135
5136 _dmb();
5137 lowerLoadLinkedStoreExclusive(
5138 DestTy, Instr->getArg(0),
5139 [this, Expected, New, &LoadedValue](Variable *Tmp) {
5140 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
5141 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
5142 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
5143 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
5144 _cmp(TmpLoR, ExpectedLoR);
5145 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
5146 LoadedValue = Tmp;
5147 return New;
5148 },
5149 CondARM32::EQ);
5150 _dmb();
5151
5152 Context.insert<InstFakeUse>(LoadedValue);
5153 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5154 // The fake-use Expected prevents the assignments to Expected (above)
5155 // from being removed if Dest is not used.
5156 Context.insert<InstFakeUse>(Expected);
5157 // New needs to be alive here, or its live range will end in the
5158 // strex instruction.
5159 Context.insert<InstFakeUse>(New);
5160 return;
5161 }
5162
5163 auto *New = legalizeToReg(Instr->getArg(2));
5164 auto *Expected = legalizeToReg(Instr->getArg(1));
5165 Variable *LoadedValue = nullptr;
5166
5167 _dmb();
5168 lowerLoadLinkedStoreExclusive(
5169 DestTy, Instr->getArg(0),
5170 [this, Expected, New, &LoadedValue](Variable *Tmp) {
5171 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
5172 LoadedValue = Tmp;
5173 return New;
5174 },
5175 CondARM32::EQ);
5176 _dmb();
5177
5178 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5179 Context.insert<InstFakeUse>(Expected);
5180 Context.insert<InstFakeUse>(New);
5181 return;
5182 }
5183 case Intrinsics::AtomicRMW: {
5184 if (!Intrinsics::isMemoryOrderValid(
5185 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5186 Func->setError("Unexpected memory ordering for AtomicRMW");
5187 return;
5188 }
5189 lowerAtomicRMW(
5190 Dest,
5191 static_cast<uint32_t>(
5192 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5193 Instr->getArg(1), Instr->getArg(2));
5194 return;
5195 }
5196 case Intrinsics::Bswap: {
5197 Operand *Val = Instr->getArg(0);
5198 Type Ty = Val->getType();
5199 if (Ty == IceType_i64) {
5200 Val = legalizeUndef(Val);
5201 Variable *Val_Lo = legalizeToReg(loOperand(Val));
5202 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5203 Variable *T_Lo = makeReg(IceType_i32);
5204 Variable *T_Hi = makeReg(IceType_i32);
5205 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5206 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5207 _rev(T_Lo, Val_Lo);
5208 _rev(T_Hi, Val_Hi);
5209 _mov(DestLo, T_Hi);
5210 _mov(DestHi, T_Lo);
5211 } else {
5212 assert(Ty == IceType_i32 || Ty == IceType_i16);
5213 Variable *ValR = legalizeToReg(Val);
5214 Variable *T = makeReg(Ty);
5215 _rev(T, ValR);
5216 if (Val->getType() == IceType_i16) {
5217 Operand *_16 = shAmtImm(16);
5218 _lsr(T, T, _16);
5219 }
5220 _mov(Dest, T);
5221 }
5222 return;
5223 }
5224 case Intrinsics::Ctpop: {
5225 llvm::report_fatal_error("Ctpop should have been prelowered.");
5226 }
5227 case Intrinsics::Ctlz: {
5228 // The "is zero undef" parameter is ignored and we always return a
5229 // well-defined value.
5230 Operand *Val = Instr->getArg(0);
5231 Variable *ValLoR;
5232 Variable *ValHiR = nullptr;
5233 if (Val->getType() == IceType_i64) {
5234 Val = legalizeUndef(Val);
5235 ValLoR = legalizeToReg(loOperand(Val));
5236 ValHiR = legalizeToReg(hiOperand(Val));
5237 } else {
5238 ValLoR = legalizeToReg(Val);
5239 }
5240 lowerCLZ(Dest, ValLoR, ValHiR);
5241 return;
5242 }
5243 case Intrinsics::Cttz: {
5244 // Essentially like Clz, but reverse the bits first.
5245 Operand *Val = Instr->getArg(0);
5246 Variable *ValLoR;
5247 Variable *ValHiR = nullptr;
5248 if (Val->getType() == IceType_i64) {
5249 Val = legalizeUndef(Val);
5250 ValLoR = legalizeToReg(loOperand(Val));
5251 ValHiR = legalizeToReg(hiOperand(Val));
5252 Variable *TLo = makeReg(IceType_i32);
5253 Variable *THi = makeReg(IceType_i32);
5254 _rbit(TLo, ValLoR);
5255 _rbit(THi, ValHiR);
5256 ValLoR = THi;
5257 ValHiR = TLo;
5258 } else {
5259 ValLoR = legalizeToReg(Val);
5260 Variable *T = makeReg(IceType_i32);
5261 _rbit(T, ValLoR);
5262 ValLoR = T;
5263 }
5264 lowerCLZ(Dest, ValLoR, ValHiR);
5265 return;
5266 }
5267 case Intrinsics::Fabs: {
5268 Variable *T = makeReg(DestTy);
5269 _vabs(T, legalizeToReg(Instr->getArg(0)));
5270 _mov(Dest, T);
5271 return;
5272 }
5273 case Intrinsics::Longjmp: {
5274 llvm::report_fatal_error("longjmp should have been prelowered.");
5275 }
5276 case Intrinsics::Memcpy: {
5277 llvm::report_fatal_error("memcpy should have been prelowered.");
5278 }
5279 case Intrinsics::Memmove: {
5280 llvm::report_fatal_error("memmove should have been prelowered.");
5281 }
5282 case Intrinsics::Memset: {
5283 llvm::report_fatal_error("memmove should have been prelowered.");
5284 }
5285 case Intrinsics::NaClReadTP: {
5286 if (SandboxingType != ST_NaCl) {
5287 llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5288 }
5289 Variable *TP = legalizeToReg(OperandARM32Mem::create(
5290 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
5291 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5292 _mov(Dest, TP);
5293 return;
5294 }
5295 case Intrinsics::Setjmp: {
5296 llvm::report_fatal_error("setjmp should have been prelowered.");
5297 }
5298 case Intrinsics::Sqrt: {
5299 assert(isScalarFloatingType(Dest->getType()) ||
5300 getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5301 Variable *Src = legalizeToReg(Instr->getArg(0));
5302 Variable *T = makeReg(DestTy);
5303 _vsqrt(T, Src);
5304 _mov(Dest, T);
5305 return;
5306 }
5307 case Intrinsics::Stacksave: {
5308 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5309 _mov(Dest, SP);
5310 return;
5311 }
5312 case Intrinsics::Stackrestore: {
5313 Variable *Val = legalizeToReg(Instr->getArg(0));
5314 Sandboxer(this).reset_sp(Val);
5315 return;
5316 }
5317 case Intrinsics::Trap:
5318 _trap();
5319 return;
5320 case Intrinsics::AddSaturateSigned:
5321 case Intrinsics::AddSaturateUnsigned: {
5322 bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
5323 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5324 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5325 Variable *T = makeReg(DestTy);
5326 _vqadd(T, Src0, Src1, Unsigned);
5327 _mov(Dest, T);
5328 return;
5329 }
5330 case Intrinsics::LoadSubVector: {
5331 assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
5332 "LoadSubVector second argument must be a constant");
5333 Variable *Dest = Instr->getDest();
5334 Type Ty = Dest->getType();
5335 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
5336 Operand *Addr = Instr->getArg(0);
5337 OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
5338 doMockBoundsCheck(Src);
5339
5340 if (Dest->isRematerializable()) {
5341 Context.insert<InstFakeDef>(Dest);
5342 return;
5343 }
5344
5345 auto *T = makeReg(Ty);
5346 switch (SubVectorSize->getValue()) {
5347 case 4:
5348 _vldr1d(T, Src);
5349 break;
5350 case 8:
5351 _vldr1q(T, Src);
5352 break;
5353 default:
5354 Func->setError("Unexpected size for LoadSubVector");
5355 return;
5356 }
5357 _mov(Dest, T);
5358 return;
5359 }
5360 case Intrinsics::StoreSubVector: {
5361 assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
5362 "StoreSubVector third argument must be a constant");
5363 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
5364 Variable *Value = legalizeToReg(Instr->getArg(0));
5365 Operand *Addr = Instr->getArg(1);
5366 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5367 doMockBoundsCheck(NewAddr);
5368
5369 Value = legalizeToReg(Value);
5370
5371 switch (SubVectorSize->getValue()) {
5372 case 4:
5373 _vstr1d(Value, NewAddr);
5374 break;
5375 case 8:
5376 _vstr1q(Value, NewAddr);
5377 break;
5378 default:
5379 Func->setError("Unexpected size for StoreSubVector");
5380 return;
5381 }
5382 return;
5383 }
5384 case Intrinsics::MultiplyAddPairs: {
5385 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5386 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5387 Variable *T = makeReg(DestTy);
5388 _vmlap(T, Src0, Src1);
5389 _mov(Dest, T);
5390 return;
5391 }
5392 case Intrinsics::MultiplyHighSigned:
5393 case Intrinsics::MultiplyHighUnsigned: {
5394 bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
5395 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5396 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5397 Variable *T = makeReg(DestTy);
5398 _vmulh(T, Src0, Src1, Unsigned);
5399 _mov(Dest, T);
5400 return;
5401 }
5402 case Intrinsics::Nearbyint: {
5403 UnimplementedLoweringError(this, Instr);
5404 return;
5405 }
5406 case Intrinsics::Round: {
5407 UnimplementedLoweringError(this, Instr);
5408 return;
5409 }
5410 case Intrinsics::SignMask: {
5411 UnimplementedLoweringError(this, Instr);
5412 return;
5413 }
5414 case Intrinsics::SubtractSaturateSigned:
5415 case Intrinsics::SubtractSaturateUnsigned: {
5416 bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
5417 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5418 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5419 Variable *T = makeReg(DestTy);
5420 _vqsub(T, Src0, Src1, Unsigned);
5421 _mov(Dest, T);
5422 return;
5423 }
5424 case Intrinsics::VectorPackSigned:
5425 case Intrinsics::VectorPackUnsigned: {
5426 bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
5427 bool Saturating = true;
5428 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5429 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5430 Variable *T = makeReg(DestTy);
5431 _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
5432 _mov(Dest, T);
5433 return;
5434 }
5435 default: // UnknownIntrinsic
5436 Func->setError("Unexpected intrinsic");
5437 return;
5438 }
5439 return;
5440 }
5441
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5442 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5443 Type Ty = Dest->getType();
5444 assert(Ty == IceType_i32 || Ty == IceType_i64);
5445 Variable *T = makeReg(IceType_i32);
5446 _clz(T, ValLoR);
5447 if (Ty == IceType_i64) {
5448 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5449 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5450 Operand *Zero =
5451 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5452 Operand *ThirtyTwo =
5453 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5454 _cmp(ValHiR, Zero);
5455 Variable *T2 = makeReg(IceType_i32);
5456 _add(T2, T, ThirtyTwo);
5457 _clz(T2, ValHiR, CondARM32::NE);
5458 // T2 is actually a source as well when the predicate is not AL (since it
5459 // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5460 // of T2 as if it was used as a source.
5461 _set_dest_redefined();
5462 _mov(DestLo, T2);
5463 Variable *T3 = makeReg(Zero->getType());
5464 _mov(T3, Zero);
5465 _mov(DestHi, T3);
5466 return;
5467 }
5468 _mov(Dest, T);
5469 return;
5470 }
5471
lowerLoad(const InstLoad * Load)5472 void TargetARM32::lowerLoad(const InstLoad *Load) {
5473 // A Load instruction can be treated the same as an Assign instruction, after
5474 // the source operand is transformed into an OperandARM32Mem operand.
5475 Type Ty = Load->getDest()->getType();
5476 Operand *Src0 = formMemoryOperand(Load->getLoadAddress(), Ty);
5477 Variable *DestLoad = Load->getDest();
5478
5479 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5480 // be folded into a load.
5481 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5482 lowerAssign(Assign);
5483 }
5484
5485 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5486 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5487 const Variable *OffsetReg, int16_t OffsetRegShAmt,
5488 const Inst *Reason) {
5489 if (!BuildDefs::dump())
5490 return;
5491 if (!Func->isVerbose(IceV_AddrOpt))
5492 return;
5493 OstreamLocker _(Func->getContext());
5494 Ostream &Str = Func->getContext()->getStrDump();
5495 Str << "Instruction: ";
5496 Reason->dumpDecorated(Func);
5497 Str << " results in Base=";
5498 if (Base)
5499 Base->dump(Func);
5500 else
5501 Str << "<null>";
5502 Str << ", OffsetReg=";
5503 if (OffsetReg)
5504 OffsetReg->dump(Func);
5505 else
5506 Str << "<null>";
5507 Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5508 }
5509
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5510 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5511 int32_t *Offset, const Inst **Reason) {
5512 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5513 if (*Var == nullptr)
5514 return false;
5515 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5516 if (!VarAssign)
5517 return false;
5518 assert(!VMetadata->isMultiDef(*Var));
5519 if (!llvm::isa<InstAssign>(VarAssign))
5520 return false;
5521
5522 Operand *SrcOp = VarAssign->getSrc(0);
5523 bool Optimized = false;
5524 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5525 if (!VMetadata->isMultiDef(SrcVar) ||
5526 // TODO: ensure SrcVar stays single-BB
5527 false) {
5528 Optimized = true;
5529 *Var = SrcVar;
5530 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5531 int32_t MoreOffset = Const->getValue();
5532 int32_t NewOffset = MoreOffset + *Offset;
5533 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5534 return false;
5535 *Var = nullptr;
5536 *Offset += NewOffset;
5537 Optimized = true;
5538 }
5539 }
5540
5541 if (Optimized) {
5542 *Reason = VarAssign;
5543 }
5544
5545 return Optimized;
5546 }
5547
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5548 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5549 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5550 switch (Arith->getOp()) {
5551 default:
5552 return false;
5553 case InstArithmetic::Add:
5554 case InstArithmetic::Sub:
5555 *Kind = Arith->getOp();
5556 return true;
5557 }
5558 }
5559 return false;
5560 }
5561
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5562 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5563 Variable **OffsetReg, int32_t OffsetRegShamt,
5564 const Inst **Reason) {
5565 // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5566 // set Base=Var1, OffsetReg=Var2, Shift=0
5567 if (*Base == nullptr)
5568 return false;
5569 if (*OffsetReg != nullptr)
5570 return false;
5571 (void)OffsetRegShamt;
5572 assert(OffsetRegShamt == 0);
5573 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5574 if (BaseInst == nullptr)
5575 return false;
5576 assert(!VMetadata->isMultiDef(*Base));
5577 if (BaseInst->getSrcSize() < 2)
5578 return false;
5579 auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5580 if (!Var1)
5581 return false;
5582 if (VMetadata->isMultiDef(Var1))
5583 return false;
5584 auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5585 if (!Var2)
5586 return false;
5587 if (VMetadata->isMultiDef(Var2))
5588 return false;
5589 InstArithmetic::OpKind _;
5590 if (!isAddOrSub(BaseInst, &_) ||
5591 // TODO: ensure Var1 and Var2 stay single-BB
5592 false)
5593 return false;
5594 *Base = Var1;
5595 *OffsetReg = Var2;
5596 // OffsetRegShamt is already 0.
5597 *Reason = BaseInst;
5598 return true;
5599 }
5600
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5601 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5602 Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5603 int32_t *OffsetRegShamt, const Inst **Reason) {
5604 // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5605 // OffsetReg=Var, Shift+=log2(Const)
5606 // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5607 // OffsetReg=Var, Shift+=Const
5608 // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5609 // OffsetReg=Var, Shift-=Const
5610 OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5611 if (*OffsetReg == nullptr)
5612 return false;
5613 auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5614 if (IndexInst == nullptr)
5615 return false;
5616 assert(!VMetadata->isMultiDef(*OffsetReg));
5617 if (IndexInst->getSrcSize() < 2)
5618 return false;
5619 auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5620 if (ArithInst == nullptr)
5621 return false;
5622 auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5623 if (Var == nullptr)
5624 return false;
5625 auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5626 if (Const == nullptr) {
5627 assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5628 return false;
5629 }
5630 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5631 return false;
5632
5633 uint32_t NewShamt = -1;
5634 switch (ArithInst->getOp()) {
5635 default:
5636 return false;
5637 case InstArithmetic::Shl: {
5638 NewShiftKind = OperandARM32::LSL;
5639 NewShamt = Const->getValue();
5640 if (NewShamt > 31)
5641 return false;
5642 } break;
5643 case InstArithmetic::Lshr: {
5644 NewShiftKind = OperandARM32::LSR;
5645 NewShamt = Const->getValue();
5646 if (NewShamt > 31)
5647 return false;
5648 } break;
5649 case InstArithmetic::Ashr: {
5650 NewShiftKind = OperandARM32::ASR;
5651 NewShamt = Const->getValue();
5652 if (NewShamt > 31)
5653 return false;
5654 } break;
5655 case InstArithmetic::Udiv:
5656 case InstArithmetic::Mul: {
5657 const uint32_t UnsignedConst = Const->getValue();
5658 NewShamt = llvm::findFirstSet(UnsignedConst);
5659 if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5660 // First bit set is not the same as the last bit set, so Const is not
5661 // a power of 2.
5662 return false;
5663 }
5664 NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5665 ? OperandARM32::LSR
5666 : OperandARM32::LSL;
5667 } break;
5668 }
5669 // Allowed "transitions":
5670 // kNoShift -> * iff NewShamt < 31
5671 // LSL -> LSL iff NewShamt + OffsetRegShamt < 31
5672 // LSR -> LSR iff NewShamt + OffsetRegShamt < 31
5673 // ASR -> ASR iff NewShamt + OffsetRegShamt < 31
5674 if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5675 return false;
5676 }
5677 const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5678 if (NewOffsetRegShamt > 31)
5679 return false;
5680 *OffsetReg = Var;
5681 *OffsetRegShamt = NewOffsetRegShamt;
5682 *Kind = NewShiftKind;
5683 *Reason = IndexInst;
5684 return true;
5685 }
5686
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5687 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5688 int32_t *Offset, const Inst **Reason) {
5689 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5690 // set Base=Var, Offset+=Const
5691 // Base is Base=Var-Const ==>
5692 // set Base=Var, Offset-=Const
5693 if (*Base == nullptr)
5694 return false;
5695 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5696 if (BaseInst == nullptr) {
5697 return false;
5698 }
5699 assert(!VMetadata->isMultiDef(*Base));
5700
5701 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5702 if (ArithInst == nullptr)
5703 return false;
5704 InstArithmetic::OpKind Kind;
5705 if (!isAddOrSub(ArithInst, &Kind))
5706 return false;
5707 bool IsAdd = Kind == InstArithmetic::Add;
5708 Operand *Src0 = ArithInst->getSrc(0);
5709 Operand *Src1 = ArithInst->getSrc(1);
5710 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5711 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5712 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5713 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5714 Variable *NewBase = nullptr;
5715 int32_t NewOffset = *Offset;
5716
5717 if (Var0 == nullptr && Const0 == nullptr) {
5718 assert(llvm::isa<ConstantRelocatable>(Src0));
5719 return false;
5720 }
5721
5722 if (Var1 == nullptr && Const1 == nullptr) {
5723 assert(llvm::isa<ConstantRelocatable>(Src1));
5724 return false;
5725 }
5726
5727 if (Var0 && Var1)
5728 // TODO(jpp): merge base/index splitting into here.
5729 return false;
5730 if (!IsAdd && Var1)
5731 return false;
5732 if (Var0)
5733 NewBase = Var0;
5734 else if (Var1)
5735 NewBase = Var1;
5736 // Compute the updated constant offset.
5737 if (Const0) {
5738 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5739 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5740 return false;
5741 NewOffset += MoreOffset;
5742 }
5743 if (Const1) {
5744 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5745 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5746 return false;
5747 NewOffset += MoreOffset;
5748 }
5749
5750 // Update the computed address parameters once we are sure optimization
5751 // is valid.
5752 *Base = NewBase;
5753 *Offset = NewOffset;
5754 *Reason = BaseInst;
5755 return true;
5756 }
5757 } // end of anonymous namespace
5758
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5759 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5760 const Inst *LdSt,
5761 Operand *Base) {
5762 assert(Base != nullptr);
5763 int32_t OffsetImm = 0;
5764 Variable *OffsetReg = nullptr;
5765 int32_t OffsetRegShamt = 0;
5766 OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5767
5768 Func->resetCurrentNode();
5769 if (Func->isVerbose(IceV_AddrOpt)) {
5770 OstreamLocker _(Func->getContext());
5771 Ostream &Str = Func->getContext()->getStrDump();
5772 Str << "\nAddress mode formation:\t";
5773 LdSt->dumpDecorated(Func);
5774 }
5775
5776 if (isVectorType(Ty))
5777 // vector loads and stores do not allow offsets, and only support the
5778 // "[reg]" addressing mode (the other supported modes are write back.)
5779 return nullptr;
5780
5781 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5782 if (BaseVar == nullptr)
5783 return nullptr;
5784
5785 (void)MemTraitsSize;
5786 assert(Ty < MemTraitsSize);
5787 auto *TypeTraits = &MemTraits[Ty];
5788 const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex;
5789 const bool CanHaveShiftedIndex =
5790 !NeedSandboxing && TypeTraits->CanHaveShiftedIndex;
5791 const bool CanHaveImm = TypeTraits->CanHaveImm;
5792 const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5793 (void)ValidImmMask;
5794 assert(!CanHaveImm || ValidImmMask >= 0);
5795
5796 const VariablesMetadata *VMetadata = Func->getVMetadata();
5797 const Inst *Reason = nullptr;
5798
5799 do {
5800 if (Reason != nullptr) {
5801 dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5802 Reason);
5803 Reason = nullptr;
5804 }
5805
5806 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5807 continue;
5808 }
5809
5810 if (CanHaveIndex &&
5811 matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5812 continue;
5813 }
5814
5815 if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5816 OffsetRegShamt, &Reason)) {
5817 continue;
5818 }
5819
5820 if (CanHaveShiftedIndex) {
5821 if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5822 &OffsetRegShamt, &Reason)) {
5823 continue;
5824 }
5825
5826 if ((OffsetRegShamt == 0) &&
5827 matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5828 &OffsetRegShamt, &Reason)) {
5829 std::swap(BaseVar, OffsetReg);
5830 continue;
5831 }
5832 }
5833
5834 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5835 continue;
5836 }
5837 } while (Reason);
5838
5839 if (BaseVar == nullptr) {
5840 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5841 // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5842 // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5843 //
5844 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5845 // mov BaseReg, #OffsetImm
5846 // use of [BaseReg, OffsetReg{, LSL Shamt}]
5847 //
5848 const Type PointerType = getPointerType();
5849 BaseVar = makeReg(PointerType);
5850 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5851 OffsetImm = 0;
5852 } else if (OffsetImm != 0) {
5853 // ARM Ldr/Str instructions have limited range immediates. The formation
5854 // loop above materialized an Immediate carelessly, so we ensure the
5855 // generated offset is sane.
5856 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5857 const InstArithmetic::OpKind Op =
5858 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5859
5860 if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5861 OffsetReg != nullptr) {
5862 if (OffsetReg == nullptr) {
5863 // We formed a [Base, #const] addressing mode which is not encodable in
5864 // ARM. There is little point in forming an address mode now if we don't
5865 // have an offset. Effectively, we would end up with something like
5866 //
5867 // [Base, #const] -> add T, Base, #const
5868 // use of [T]
5869 //
5870 // Which is exactly what we already have. So we just bite the bullet
5871 // here and don't form any address mode.
5872 return nullptr;
5873 }
5874 // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5875 //
5876 // [Base, Offset, {LSL amount}, #const] ->
5877 // add T, Base, #const
5878 // use of [T, Offset {, LSL amount}]
5879 const Type PointerType = getPointerType();
5880 Variable *T = makeReg(PointerType);
5881 Context.insert<InstArithmetic>(Op, T, BaseVar,
5882 Ctx->getConstantInt32(PositiveOffset));
5883 BaseVar = T;
5884 OffsetImm = 0;
5885 }
5886 }
5887
5888 assert(BaseVar != nullptr);
5889 assert(OffsetImm == 0 || OffsetReg == nullptr);
5890 assert(OffsetReg == nullptr || CanHaveIndex);
5891 assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5892 : (ValidImmMask & OffsetImm) == OffsetImm);
5893
5894 if (OffsetReg != nullptr) {
5895 Variable *OffsetR = makeReg(getPointerType());
5896 Context.insert<InstAssign>(OffsetR, OffsetReg);
5897 return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5898 OffsetRegShamt);
5899 }
5900
5901 return OperandARM32Mem::create(
5902 Func, Ty, BaseVar,
5903 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5904 }
5905
doAddressOptLoad()5906 void TargetARM32::doAddressOptLoad() {
5907 Inst *Instr = iteratorToInst(Context.getCur());
5908 assert(llvm::isa<InstLoad>(Instr));
5909 Variable *Dest = Instr->getDest();
5910 Operand *Addr = Instr->getSrc(0);
5911 if (OperandARM32Mem *Mem =
5912 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5913 Instr->setDeleted();
5914 Context.insert<InstLoad>(Dest, Mem);
5915 }
5916 }
5917
lowerPhi(const InstPhi *)5918 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5919 Func->setError("Phi found in regular instruction list");
5920 }
5921
lowerRet(const InstRet * Instr)5922 void TargetARM32::lowerRet(const InstRet *Instr) {
5923 Variable *Reg = nullptr;
5924 if (Instr->hasRetValue()) {
5925 Operand *Src0 = Instr->getRetValue();
5926 Type Ty = Src0->getType();
5927 if (Ty == IceType_i64) {
5928 Src0 = legalizeUndef(Src0);
5929 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5930 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5931 Reg = R0;
5932 Context.insert<InstFakeUse>(R1);
5933 } else if (Ty == IceType_f32) {
5934 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5935 Reg = S0;
5936 } else if (Ty == IceType_f64) {
5937 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5938 Reg = D0;
5939 } else if (isVectorType(Src0->getType())) {
5940 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5941 Reg = Q0;
5942 } else {
5943 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5944 Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5945 _mov(Reg, Src0F, CondARM32::AL);
5946 }
5947 }
5948 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5949 // explicitly looks for a ret instruction as a marker for where to insert the
5950 // frame removal instructions. addEpilog is responsible for restoring the
5951 // "lr" register as needed prior to this ret instruction.
5952 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5953
5954 // Add a fake use of sp to make sure sp stays alive for the entire function.
5955 // Otherwise post-call sp adjustments get dead-code eliminated.
5956 // TODO: Are there more places where the fake use should be inserted? E.g.
5957 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5958 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5959 Context.insert<InstFakeUse>(SP);
5960 }
5961
lowerShuffleVector(const InstShuffleVector * Instr)5962 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5963 auto *Dest = Instr->getDest();
5964 const Type DestTy = Dest->getType();
5965
5966 auto *T = makeReg(DestTy);
5967 auto *Src0 = Instr->getSrc(0);
5968 auto *Src1 = Instr->getSrc(1);
5969 const SizeT NumElements = typeNumElements(DestTy);
5970 const Type ElementType = typeElementType(DestTy);
5971
5972 bool Replicate = true;
5973 for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) {
5974 if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) {
5975 Replicate = false;
5976 }
5977 }
5978
5979 if (Replicate) {
5980 Variable *Src0Var = legalizeToReg(Src0);
5981 _vdup(T, Src0Var, Instr->getIndexValue(0));
5982 _mov(Dest, T);
5983 return;
5984 }
5985
5986 switch (DestTy) {
5987 case IceType_v8i1:
5988 case IceType_v8i16: {
5989 static constexpr SizeT ExpectedNumElements = 8;
5990 assert(ExpectedNumElements == Instr->getNumIndexes());
5991 (void)ExpectedNumElements;
5992
5993 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) {
5994 Variable *Src0R = legalizeToReg(Src0);
5995 _vzip(T, Src0R, Src0R);
5996 _mov(Dest, T);
5997 return;
5998 }
5999
6000 if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) {
6001 Variable *Src0R = legalizeToReg(Src0);
6002 Variable *Src1R = legalizeToReg(Src1);
6003 _vzip(T, Src0R, Src1R);
6004 _mov(Dest, T);
6005 return;
6006 }
6007
6008 if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) {
6009 Variable *Src0R = legalizeToReg(Src0);
6010 _vqmovn2(T, Src0R, Src0R, false, false);
6011 _mov(Dest, T);
6012 return;
6013 }
6014 } break;
6015 case IceType_v16i1:
6016 case IceType_v16i8: {
6017 static constexpr SizeT ExpectedNumElements = 16;
6018 assert(ExpectedNumElements == Instr->getNumIndexes());
6019 (void)ExpectedNumElements;
6020
6021 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) {
6022 Variable *Src0R = legalizeToReg(Src0);
6023 _vzip(T, Src0R, Src0R);
6024 _mov(Dest, T);
6025 return;
6026 }
6027
6028 if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7,
6029 23)) {
6030 Variable *Src0R = legalizeToReg(Src0);
6031 Variable *Src1R = legalizeToReg(Src1);
6032 _vzip(T, Src0R, Src1R);
6033 _mov(Dest, T);
6034 return;
6035 }
6036 } break;
6037 case IceType_v4i1:
6038 case IceType_v4i32:
6039 case IceType_v4f32: {
6040 static constexpr SizeT ExpectedNumElements = 4;
6041 assert(ExpectedNumElements == Instr->getNumIndexes());
6042 (void)ExpectedNumElements;
6043
6044 if (Instr->indexesAre(0, 0, 1, 1)) {
6045 Variable *Src0R = legalizeToReg(Src0);
6046 _vzip(T, Src0R, Src0R);
6047 _mov(Dest, T);
6048 return;
6049 }
6050
6051 if (Instr->indexesAre(0, 4, 1, 5)) {
6052 Variable *Src0R = legalizeToReg(Src0);
6053 Variable *Src1R = legalizeToReg(Src1);
6054 _vzip(T, Src0R, Src1R);
6055 _mov(Dest, T);
6056 return;
6057 }
6058
6059 if (Instr->indexesAre(0, 1, 4, 5)) {
6060 Variable *Src0R = legalizeToReg(Src0);
6061 Variable *Src1R = legalizeToReg(Src1);
6062 _vmovlh(T, Src0R, Src1R);
6063 _mov(Dest, T);
6064 return;
6065 }
6066
6067 if (Instr->indexesAre(2, 3, 2, 3)) {
6068 Variable *Src0R = legalizeToReg(Src0);
6069 _vmovhl(T, Src0R, Src0R);
6070 _mov(Dest, T);
6071 return;
6072 }
6073
6074 if (Instr->indexesAre(2, 3, 6, 7)) {
6075 Variable *Src0R = legalizeToReg(Src0);
6076 Variable *Src1R = legalizeToReg(Src1);
6077 _vmovhl(T, Src1R, Src0R);
6078 _mov(Dest, T);
6079 return;
6080 }
6081 } break;
6082 default:
6083 break;
6084 // TODO(jpp): figure out how to properly lower this without scalarization.
6085 }
6086
6087 // Unoptimized shuffle. Perform a series of inserts and extracts.
6088 Context.insert<InstFakeDef>(T);
6089 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
6090 auto *Index = Instr->getIndex(I);
6091 const SizeT Elem = Index->getValue();
6092 auto *ExtElmt = makeReg(ElementType);
6093 if (Elem < NumElements) {
6094 lowerExtractElement(
6095 InstExtractElement::create(Func, ExtElmt, Src0, Index));
6096 } else {
6097 lowerExtractElement(InstExtractElement::create(
6098 Func, ExtElmt, Src1,
6099 Ctx->getConstantInt32(Index->getValue() - NumElements)));
6100 }
6101 auto *NewT = makeReg(DestTy);
6102 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
6103 Ctx->getConstantInt32(I)));
6104 T = NewT;
6105 }
6106 _mov(Dest, T);
6107 }
6108
lowerSelect(const InstSelect * Instr)6109 void TargetARM32::lowerSelect(const InstSelect *Instr) {
6110 Variable *Dest = Instr->getDest();
6111 Type DestTy = Dest->getType();
6112 Operand *SrcT = Instr->getTrueOperand();
6113 Operand *SrcF = Instr->getFalseOperand();
6114 Operand *Condition = Instr->getCondition();
6115
6116 if (!isVectorType(DestTy)) {
6117 lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
6118 legalizeUndef(SrcF));
6119 return;
6120 }
6121
6122 Type TType = DestTy;
6123 switch (DestTy) {
6124 default:
6125 llvm::report_fatal_error("Unexpected type for vector select.");
6126 case IceType_v4i1:
6127 TType = IceType_v4i32;
6128 break;
6129 case IceType_v8i1:
6130 TType = IceType_v8i16;
6131 break;
6132 case IceType_v16i1:
6133 TType = IceType_v16i8;
6134 break;
6135 case IceType_v4f32:
6136 TType = IceType_v4i32;
6137 break;
6138 case IceType_v4i32:
6139 case IceType_v8i16:
6140 case IceType_v16i8:
6141 break;
6142 }
6143 auto *T = makeReg(TType);
6144 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
6145 auto *SrcTR = legalizeToReg(SrcT);
6146 auto *SrcFR = legalizeToReg(SrcF);
6147 _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
6148 _mov(Dest, T);
6149 }
6150
lowerStore(const InstStore * Instr)6151 void TargetARM32::lowerStore(const InstStore *Instr) {
6152 Operand *Value = Instr->getData();
6153 Operand *Addr = Instr->getStoreAddress();
6154 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
6155 Type Ty = NewAddr->getType();
6156
6157 if (Ty == IceType_i64) {
6158 Value = legalizeUndef(Value);
6159 Variable *ValueHi = legalizeToReg(hiOperand(Value));
6160 Variable *ValueLo = legalizeToReg(loOperand(Value));
6161 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
6162 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
6163 } else {
6164 Variable *ValueR = legalizeToReg(Value);
6165 _str(ValueR, NewAddr);
6166 }
6167 }
6168
doAddressOptStore()6169 void TargetARM32::doAddressOptStore() {
6170 Inst *Instr = iteratorToInst(Context.getCur());
6171 assert(llvm::isa<InstStore>(Instr));
6172 Operand *Src = Instr->getSrc(0);
6173 Operand *Addr = Instr->getSrc(1);
6174 if (OperandARM32Mem *Mem =
6175 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
6176 Instr->setDeleted();
6177 Context.insert<InstStore>(Src, Mem);
6178 }
6179 }
6180
lowerSwitch(const InstSwitch * Instr)6181 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
6182 // This implements the most naive possible lowering.
6183 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
6184 Operand *Src0 = Instr->getComparison();
6185 SizeT NumCases = Instr->getNumCases();
6186 if (Src0->getType() == IceType_i64) {
6187 Src0 = legalizeUndef(Src0);
6188 Variable *Src0Lo = legalizeToReg(loOperand(Src0));
6189 Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
6190 for (SizeT I = 0; I < NumCases; ++I) {
6191 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
6192 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
6193 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
6194 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
6195 _cmp(Src0Lo, ValueLo);
6196 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
6197 _br(Instr->getLabel(I), CondARM32::EQ);
6198 }
6199 _br(Instr->getLabelDefault());
6200 return;
6201 }
6202
6203 Variable *Src0Var = legalizeToReg(Src0);
6204 // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
6205 // reason.
6206 assert(Src0Var->mustHaveReg());
6207 const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
6208 assert(ShiftAmt < 32);
6209 if (ShiftAmt > 0) {
6210 Operand *ShAmtImm = shAmtImm(ShiftAmt);
6211 Variable *T = makeReg(IceType_i32);
6212 _lsl(T, Src0Var, ShAmtImm);
6213 Src0Var = T;
6214 }
6215
6216 for (SizeT I = 0; I < NumCases; ++I) {
6217 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
6218 Value = legalize(Value, Legal_Reg | Legal_Flex);
6219 _cmp(Src0Var, Value);
6220 _br(Instr->getLabel(I), CondARM32::EQ);
6221 }
6222 _br(Instr->getLabelDefault());
6223 }
6224
lowerBreakpoint(const InstBreakpoint * Instr)6225 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6226 UnimplementedLoweringError(this, Instr);
6227 }
6228
lowerUnreachable(const InstUnreachable *)6229 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6230 _trap();
6231 }
6232
6233 namespace {
6234 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
6235 // and fp constants will need access to the GOT address.
operandNeedsGot(const Operand * Opnd)6236 bool operandNeedsGot(const Operand *Opnd) {
6237 if (llvm::isa<ConstantRelocatable>(Opnd)) {
6238 return true;
6239 }
6240
6241 if (llvm::isa<ConstantFloat>(Opnd)) {
6242 uint32_t _;
6243 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
6244 }
6245
6246 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
6247 if (F64 != nullptr) {
6248 uint32_t _;
6249 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
6250 !isFloatingPointZero(F64);
6251 }
6252
6253 return false;
6254 }
6255
6256 // Returns whether Phi needs the GOT address (which it does if any of its
6257 // operands needs the GOT address.)
phiNeedsGot(const InstPhi * Phi)6258 bool phiNeedsGot(const InstPhi *Phi) {
6259 if (Phi->isDeleted()) {
6260 return false;
6261 }
6262
6263 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6264 if (operandNeedsGot(Phi->getSrc(I))) {
6265 return true;
6266 }
6267 }
6268
6269 return false;
6270 }
6271
6272 // Returns whether **any** phi in Node needs the GOT address.
anyPhiInNodeNeedsGot(CfgNode * Node)6273 bool anyPhiInNodeNeedsGot(CfgNode *Node) {
6274 for (auto &Inst : Node->getPhis()) {
6275 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
6276 return true;
6277 }
6278 }
6279 return false;
6280 }
6281
6282 } // end of anonymous namespace
6283
prelowerPhis()6284 void TargetARM32::prelowerPhis() {
6285 CfgNode *Node = Context.getNode();
6286
6287 if (SandboxingType == ST_Nonsfi) {
6288 assert(GotPtr != nullptr);
6289 if (anyPhiInNodeNeedsGot(Node)) {
6290 // If any phi instruction needs the GOT address, we place a
6291 // fake-use GotPtr
6292 // in Node to prevent the GotPtr's initialization from being dead code
6293 // eliminated.
6294 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
6295 }
6296 }
6297
6298 PhiLowering::prelowerPhis32Bit(this, Node, Func);
6299 }
6300
makeVectorOfZeros(Type Ty,RegNumT RegNum)6301 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6302 Variable *Reg = makeReg(Ty, RegNum);
6303 Context.insert<InstFakeDef>(Reg);
6304 assert(isVectorType(Ty));
6305 _veor(Reg, Reg, Reg);
6306 return Reg;
6307 }
6308
6309 // Helper for legalize() to emit the right code to lower an operand to a
6310 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6311 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6312 Type Ty = Src->getType();
6313 Variable *Reg = makeReg(Ty, RegNum);
6314 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6315 _ldr(Reg, Mem);
6316 } else {
6317 _mov(Reg, Src);
6318 }
6319 return Reg;
6320 }
6321
6322 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6323 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6324 RegNumT RegNum) {
6325 Type Ty = From->getType();
6326 // Assert that a physical register is allowed. To date, all calls to
6327 // legalize() allow a physical register. Legal_Flex converts registers to the
6328 // right type OperandARM32FlexReg as needed.
6329 assert(Allowed & Legal_Reg);
6330
6331 // Copied ipsis literis from TargetX86Base<Machine>.
6332 if (RegNum.hasNoValue()) {
6333 if (Variable *Subst = getContext().availabilityGet(From)) {
6334 // At this point we know there is a potential substitution available.
6335 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6336 !Subst->hasReg()) {
6337 // At this point we know the substitution will have a register.
6338 if (From->getType() == Subst->getType()) {
6339 // At this point we know the substitution's register is compatible.
6340 return Subst;
6341 }
6342 }
6343 }
6344 }
6345
6346 // Go through the various types of operands: OperandARM32Mem,
6347 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6348 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6349 // can always copy to a register.
6350 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6351 // Before doing anything with a Mem operand, we need to ensure that the
6352 // Base and Index components are in physical registers.
6353 Variable *Base = Mem->getBase();
6354 Variable *Index = Mem->getIndex();
6355 ConstantInteger32 *Offset = Mem->getOffset();
6356 assert(Index == nullptr || Offset == nullptr);
6357 Variable *RegBase = nullptr;
6358 Variable *RegIndex = nullptr;
6359 assert(Base);
6360 RegBase = llvm::cast<Variable>(
6361 legalize(Base, Legal_Reg | Legal_Rematerializable));
6362 assert(Ty < MemTraitsSize);
6363 if (Index) {
6364 assert(Offset == nullptr);
6365 assert(MemTraits[Ty].CanHaveIndex);
6366 RegIndex = legalizeToReg(Index);
6367 }
6368 if (Offset && Offset->getValue() != 0) {
6369 assert(Index == nullptr);
6370 static constexpr bool ZeroExt = false;
6371 assert(MemTraits[Ty].CanHaveImm);
6372 if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6373 llvm::report_fatal_error("Invalid memory offset.");
6374 }
6375 }
6376
6377 // Create a new operand if there was a change.
6378 if (Base != RegBase || Index != RegIndex) {
6379 // There is only a reg +/- reg or reg + imm form.
6380 // Figure out which to re-create.
6381 if (RegIndex) {
6382 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6383 Mem->getShiftOp(), Mem->getShiftAmt(),
6384 Mem->getAddrMode());
6385 } else {
6386 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6387 Mem->getAddrMode());
6388 }
6389 }
6390 if (Allowed & Legal_Mem) {
6391 From = Mem;
6392 } else {
6393 Variable *Reg = makeReg(Ty, RegNum);
6394 _ldr(Reg, Mem);
6395 From = Reg;
6396 }
6397 return From;
6398 }
6399
6400 if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6401 if (!(Allowed & Legal_Flex)) {
6402 if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6403 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6404 From = FlexReg->getReg();
6405 // Fall through and let From be checked as a Variable below, where it
6406 // may or may not need a register.
6407 } else {
6408 return copyToReg(Flex, RegNum);
6409 }
6410 } else {
6411 return copyToReg(Flex, RegNum);
6412 }
6413 } else {
6414 return From;
6415 }
6416 }
6417
6418 if (llvm::isa<Constant>(From)) {
6419 if (llvm::isa<ConstantUndef>(From)) {
6420 From = legalizeUndef(From, RegNum);
6421 if (isVectorType(Ty))
6422 return From;
6423 }
6424 // There should be no constants of vector type (other than undef).
6425 assert(!isVectorType(Ty));
6426 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6427 uint32_t RotateAmt;
6428 uint32_t Immed_8;
6429 uint32_t Value = static_cast<uint32_t>(C32->getValue());
6430 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6431 // The immediate can be encoded as a Flex immediate. We may return the
6432 // Flex operand if the caller has Allow'ed it.
6433 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6434 const bool CanBeFlex = Allowed & Legal_Flex;
6435 if (CanBeFlex)
6436 return OpF;
6437 return copyToReg(OpF, RegNum);
6438 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6439 &Immed_8)) {
6440 // Even though the immediate can't be encoded as a Flex operand, its
6441 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6442 // constant with a single instruction.
6443 auto *InvOpF =
6444 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6445 Variable *Reg = makeReg(Ty, RegNum);
6446 _mvn(Reg, InvOpF);
6447 return Reg;
6448 } else {
6449 // Do a movw/movt to a register.
6450 Variable *Reg = makeReg(Ty, RegNum);
6451 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6452 _movw(Reg,
6453 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6454 if (UpperBits != 0) {
6455 _movt(Reg, Ctx->getConstantInt32(UpperBits));
6456 }
6457 return Reg;
6458 }
6459 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6460 Variable *Reg = makeReg(Ty, RegNum);
6461 if (SandboxingType != ST_Nonsfi) {
6462 _movw(Reg, C);
6463 _movt(Reg, C);
6464 } else {
6465 auto *GotAddr = legalizeToReg(GotPtr);
6466 GlobalString CGotoffName = createGotoffRelocation(C);
6467 loadNamedConstantRelocatablePIC(
6468 CGotoffName, Reg, [this, Reg](Variable *PC) {
6469 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
6470 });
6471 _add(Reg, GotAddr, Reg);
6472 }
6473 return Reg;
6474 } else {
6475 assert(isScalarFloatingType(Ty));
6476 uint32_t ModifiedImm;
6477 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6478 Variable *T = makeReg(Ty, RegNum);
6479 _mov(T,
6480 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6481 return T;
6482 }
6483
6484 if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6485 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6486 // because ARM does not have a veor instruction with S registers.
6487 Variable *T = makeReg(IceType_f64, RegNum);
6488 Context.insert<InstFakeDef>(T);
6489 _veor(T, T, T);
6490 return T;
6491 }
6492
6493 // Load floats/doubles from literal pool.
6494 auto *CFrom = llvm::cast<Constant>(From);
6495 assert(CFrom->getShouldBePooled());
6496 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6497 Variable *BaseReg = nullptr;
6498 if (SandboxingType == ST_Nonsfi) {
6499 // vldr does not support the [base, index] addressing mode, so we need
6500 // to legalize Offset to a register. Otherwise, we could simply
6501 // vldr dest, [got, reg(Offset)]
6502 BaseReg = legalizeToReg(Offset);
6503 } else {
6504 BaseReg = makeReg(getPointerType());
6505 _movw(BaseReg, Offset);
6506 _movt(BaseReg, Offset);
6507 }
6508 From = formMemoryOperand(BaseReg, Ty);
6509 return copyToReg(From, RegNum);
6510 }
6511 }
6512
6513 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6514 if (Var->isRematerializable()) {
6515 if (Allowed & Legal_Rematerializable) {
6516 return From;
6517 }
6518
6519 Variable *T = makeReg(Var->getType(), RegNum);
6520 _mov(T, Var);
6521 return T;
6522 }
6523 // Check if the variable is guaranteed a physical register. This can happen
6524 // either when the variable is pre-colored or when it is assigned infinite
6525 // weight.
6526 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6527 // We need a new physical register for the operand if:
6528 // Mem is not allowed and Var isn't guaranteed a physical
6529 // register, or
6530 // RegNum is required and Var->getRegNum() doesn't match.
6531 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6532 (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6533 From = copyToReg(From, RegNum);
6534 }
6535 return From;
6536 }
6537 llvm::report_fatal_error("Unhandled operand kind in legalize()");
6538
6539 return From;
6540 }
6541
6542 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6543 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6544 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6545 }
6546
6547 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6548 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6549 Type Ty = From->getType();
6550 if (llvm::isa<ConstantUndef>(From)) {
6551 // Lower undefs to zero. Another option is to lower undefs to an
6552 // uninitialized register; however, using an uninitialized register results
6553 // in less predictable code.
6554 //
6555 // If in the future the implementation is changed to lower undef values to
6556 // uninitialized registers, a FakeDef will be needed:
6557 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6558 // ensure that the live range of Reg is not overestimated. If the constant
6559 // being lowered is a 64 bit value, then the result should be split and the
6560 // lo and hi components will need to go in uninitialized registers.
6561 if (isVectorType(Ty))
6562 return makeVectorOfZeros(Ty, RegNum);
6563 return Ctx->getConstantZero(Ty);
6564 }
6565 return From;
6566 }
6567
formMemoryOperand(Operand * Operand,Type Ty)6568 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6569 auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6570 // It may be the case that address mode optimization already creates an
6571 // OperandARM32Mem, so in that case it wouldn't need another level of
6572 // transformation.
6573 if (Mem) {
6574 return llvm::cast<OperandARM32Mem>(legalize(Mem));
6575 }
6576 // If we didn't do address mode optimization, then we only have a
6577 // base/offset to work with. ARM always requires a base register, so
6578 // just use that to hold the operand.
6579 auto *Base = llvm::cast<Variable>(
6580 legalize(Operand, Legal_Reg | Legal_Rematerializable));
6581 return OperandARM32Mem::create(
6582 Func, Ty, Base,
6583 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6584 }
6585
makeI64RegPair()6586 Variable64On32 *TargetARM32::makeI64RegPair() {
6587 Variable64On32 *Reg =
6588 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6589 Reg->setMustHaveReg();
6590 Reg->initHiLo(Func);
6591 Reg->getLo()->setMustNotHaveReg();
6592 Reg->getHi()->setMustNotHaveReg();
6593 return Reg;
6594 }
6595
makeReg(Type Type,RegNumT RegNum)6596 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6597 // There aren't any 64-bit integer registers for ARM32.
6598 assert(Type != IceType_i64);
6599 assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6600 Variable *Reg = Func->makeVariable(Type);
6601 if (RegNum.hasValue())
6602 Reg->setRegNum(RegNum);
6603 else
6604 Reg->setMustHaveReg();
6605 return Reg;
6606 }
6607
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6608 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6609 RegNumT TmpRegNum) {
6610 assert(llvm::isPowerOf2_32(Align));
6611 uint32_t RotateAmt;
6612 uint32_t Immed_8;
6613 Operand *Mask;
6614 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6615 // it fits at all). Assume Align is usually small, in which case BIC works
6616 // better. Thus, this rounds down to the alignment.
6617 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6618 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6619 TmpRegNum);
6620 _bic(Reg, Reg, Mask);
6621 } else {
6622 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6623 TmpRegNum);
6624 _and(Reg, Reg, Mask);
6625 }
6626 }
6627
postLower()6628 void TargetARM32::postLower() {
6629 if (Func->getOptLevel() == Opt_m1)
6630 return;
6631 markRedefinitions();
6632 Context.availabilityUpdate();
6633 }
6634
emit(const ConstantInteger32 * C) const6635 void TargetARM32::emit(const ConstantInteger32 *C) const {
6636 if (!BuildDefs::dump())
6637 return;
6638 Ostream &Str = Ctx->getStrEmit();
6639 Str << "#" << C->getValue();
6640 }
6641
emit(const ConstantInteger64 *) const6642 void TargetARM32::emit(const ConstantInteger64 *) const {
6643 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6644 }
6645
emit(const ConstantFloat * C) const6646 void TargetARM32::emit(const ConstantFloat *C) const {
6647 (void)C;
6648 UnimplementedError(getFlags());
6649 }
6650
emit(const ConstantDouble * C) const6651 void TargetARM32::emit(const ConstantDouble *C) const {
6652 (void)C;
6653 UnimplementedError(getFlags());
6654 }
6655
emit(const ConstantUndef *) const6656 void TargetARM32::emit(const ConstantUndef *) const {
6657 llvm::report_fatal_error("undef value encountered by emitter.");
6658 }
6659
emit(const ConstantRelocatable * C) const6660 void TargetARM32::emit(const ConstantRelocatable *C) const {
6661 if (!BuildDefs::dump())
6662 return;
6663 Ostream &Str = Ctx->getStrEmit();
6664 Str << "#";
6665 emitWithoutPrefix(C);
6666 }
6667
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6668 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6669 Operand *TrueValue, Operand *FalseValue) {
6670 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6671
6672 assert(Boolean->getType() == IceType_i1);
6673
6674 bool NeedsAnd1 = false;
6675 if (TrueValue->getType() == IceType_i1) {
6676 assert(FalseValue->getType() == IceType_i1);
6677
6678 Variable *TrueValueV = Func->makeVariable(IceType_i1);
6679 SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6680 TrueValue = TrueValueV;
6681
6682 Variable *FalseValueV = Func->makeVariable(IceType_i1);
6683 SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6684 FalseValue = FalseValueV;
6685
6686 NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6687 }
6688
6689 Variable *DestLo = (Dest->getType() == IceType_i64)
6690 ? llvm::cast<Variable>(loOperand(Dest))
6691 : Dest;
6692 Variable *DestHi = (Dest->getType() == IceType_i64)
6693 ? llvm::cast<Variable>(hiOperand(Dest))
6694 : nullptr;
6695 Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6696 ? loOperand(FalseValue)
6697 : FalseValue;
6698 Operand *FalseValueHi =
6699 (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6700
6701 Operand *TrueValueLo =
6702 (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6703 Operand *TrueValueHi =
6704 (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6705
6706 Variable *T_Lo = makeReg(DestLo->getType());
6707 Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6708
6709 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6710 if (DestHi) {
6711 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6712 }
6713
6714 CondWhenTrue Cond(CondARM32::kNone);
6715 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6716 // add an explicit _tst instruction below.
6717 bool FlagsWereSet = false;
6718 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6719 switch (Producer->getKind()) {
6720 default:
6721 llvm::report_fatal_error("Unexpected producer.");
6722 case Inst::Icmp: {
6723 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6724 FlagsWereSet = true;
6725 } break;
6726 case Inst::Fcmp: {
6727 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6728 FlagsWereSet = true;
6729 } break;
6730 case Inst::Cast: {
6731 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6732 assert(CastProducer->getCastKind() == InstCast::Trunc);
6733 Boolean = CastProducer->getSrc(0);
6734 // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6735 // bother legalizing Src to a Reg because it will be legalized before
6736 // emitting the tst instruction.
6737 FlagsWereSet = false;
6738 } break;
6739 case Inst::Arithmetic: {
6740 // This is a special case: we eagerly assumed Producer could be folded,
6741 // but in reality, it can't. No reason to panic: we just lower it using
6742 // the regular lowerArithmetic helper.
6743 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6744 lowerArithmetic(ArithProducer);
6745 Boolean = ArithProducer->getDest();
6746 // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6747 // bother legalizing Dest to a Reg because it will be legalized before
6748 // emitting the tst instruction.
6749 FlagsWereSet = false;
6750 } break;
6751 }
6752 }
6753
6754 if (!FlagsWereSet) {
6755 // No flags have been set, so emit a tst Boolean, 1.
6756 Variable *Src = legalizeToReg(Boolean);
6757 _tst(Src, _1);
6758 Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6759 }
6760
6761 if (Cond.WhenTrue0 == CondARM32::kNone) {
6762 assert(Cond.WhenTrue1 == CondARM32::kNone);
6763 } else {
6764 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6765 Cond.WhenTrue0);
6766 if (DestHi) {
6767 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6768 Cond.WhenTrue0);
6769 }
6770 }
6771
6772 if (Cond.WhenTrue1 != CondARM32::kNone) {
6773 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6774 Cond.WhenTrue1);
6775 if (DestHi) {
6776 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6777 Cond.WhenTrue1);
6778 }
6779 }
6780
6781 if (NeedsAnd1) {
6782 // We lowered something that is unsafe (i.e., can't provably be zero or
6783 // one). Truncate the result.
6784 _and(T_Lo, T_Lo, _1);
6785 }
6786
6787 _mov(DestLo, T_Lo);
6788 if (DestHi) {
6789 _mov(DestHi, T_Hi);
6790 }
6791 }
6792
lowerInt1(Variable * Dest,Operand * Boolean)6793 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6794 Operand *Boolean) {
6795 assert(Boolean->getType() == IceType_i1);
6796 Variable *T = makeReg(IceType_i1);
6797 Operand *_0 =
6798 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6799 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6800
6801 SafeBoolChain Safe = SBC_Yes;
6802 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6803 switch (Producer->getKind()) {
6804 default:
6805 llvm::report_fatal_error("Unexpected producer.");
6806 case Inst::Icmp: {
6807 _mov(T, _0);
6808 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6809 assert(Cond.WhenTrue0 != CondARM32::AL);
6810 assert(Cond.WhenTrue0 != CondARM32::kNone);
6811 assert(Cond.WhenTrue1 == CondARM32::kNone);
6812 _mov_redefined(T, _1, Cond.WhenTrue0);
6813 } break;
6814 case Inst::Fcmp: {
6815 _mov(T, _0);
6816 Inst *MovZero = Context.getLastInserted();
6817 CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6818 if (Cond.WhenTrue0 == CondARM32::AL) {
6819 assert(Cond.WhenTrue1 == CondARM32::kNone);
6820 MovZero->setDeleted();
6821 _mov(T, _1);
6822 } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6823 _mov_redefined(T, _1, Cond.WhenTrue0);
6824 }
6825 if (Cond.WhenTrue1 != CondARM32::kNone) {
6826 assert(Cond.WhenTrue0 != CondARM32::kNone);
6827 assert(Cond.WhenTrue0 != CondARM32::AL);
6828 _mov_redefined(T, _1, Cond.WhenTrue1);
6829 }
6830 } break;
6831 case Inst::Cast: {
6832 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6833 assert(CastProducer->getCastKind() == InstCast::Trunc);
6834 Operand *Src = CastProducer->getSrc(0);
6835 if (Src->getType() == IceType_i64)
6836 Src = loOperand(Src);
6837 _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6838 Safe = SBC_No;
6839 } break;
6840 case Inst::Arithmetic: {
6841 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6842 Safe = lowerInt1Arithmetic(ArithProducer);
6843 _mov(T, ArithProducer->getDest());
6844 } break;
6845 }
6846 } else {
6847 _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6848 }
6849
6850 _mov(Dest, T);
6851 return Safe;
6852 }
6853
6854 namespace {
6855 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6856 bool shouldTrackProducer(const Inst &Instr) {
6857 switch (Instr.getKind()) {
6858 default:
6859 return false;
6860 case Inst::Icmp:
6861 case Inst::Fcmp:
6862 return true;
6863 case Inst::Cast: {
6864 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6865 default:
6866 return false;
6867 case InstCast::Trunc:
6868 return true;
6869 }
6870 }
6871 case Inst::Arithmetic: {
6872 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6873 default:
6874 return false;
6875 case InstArithmetic::And:
6876 case InstArithmetic::Or:
6877 return true;
6878 }
6879 }
6880 }
6881 }
6882
isValidConsumer(const Inst & Instr)6883 bool isValidConsumer(const Inst &Instr) {
6884 switch (Instr.getKind()) {
6885 default:
6886 return false;
6887 case Inst::Br:
6888 return true;
6889 case Inst::Select:
6890 return !isVectorType(Instr.getDest()->getType());
6891 case Inst::Cast: {
6892 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6893 default:
6894 return false;
6895 case InstCast::Sext:
6896 return !isVectorType(Instr.getDest()->getType());
6897 case InstCast::Zext:
6898 return !isVectorType(Instr.getDest()->getType());
6899 }
6900 }
6901 case Inst::Arithmetic: {
6902 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6903 default:
6904 return false;
6905 case InstArithmetic::And:
6906 return !isVectorType(Instr.getDest()->getType());
6907 case InstArithmetic::Or:
6908 return !isVectorType(Instr.getDest()->getType());
6909 }
6910 }
6911 }
6912 }
6913 } // end of namespace BoolFolding
6914
6915 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6916 bool shouldTrackProducer(const Inst &Instr) {
6917 switch (Instr.getKind()) {
6918 default:
6919 return false;
6920 case Inst::Arithmetic: {
6921 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6922 default:
6923 return false;
6924 case InstArithmetic::Fmul:
6925 return true;
6926 }
6927 }
6928 }
6929 }
6930
isValidConsumer(const Inst & Instr)6931 bool isValidConsumer(const Inst &Instr) {
6932 switch (Instr.getKind()) {
6933 default:
6934 return false;
6935 case Inst::Arithmetic: {
6936 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6937 default:
6938 return false;
6939 case InstArithmetic::Fadd:
6940 case InstArithmetic::Fsub:
6941 return true;
6942 }
6943 }
6944 }
6945 }
6946 } // end of namespace FpFolding
6947
6948 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6949 bool shouldTrackProducer(const Inst &Instr) {
6950 switch (Instr.getKind()) {
6951 default:
6952 return false;
6953 case Inst::Arithmetic: {
6954 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6955 default:
6956 return false;
6957 case InstArithmetic::Mul:
6958 return true;
6959 }
6960 }
6961 }
6962 }
6963
isValidConsumer(const Inst & Instr)6964 bool isValidConsumer(const Inst &Instr) {
6965 switch (Instr.getKind()) {
6966 default:
6967 return false;
6968 case Inst::Arithmetic: {
6969 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6970 default:
6971 return false;
6972 case InstArithmetic::Add:
6973 case InstArithmetic::Sub:
6974 return true;
6975 }
6976 }
6977 }
6978 }
6979 } // namespace IntFolding
6980 } // end of anonymous namespace
6981
recordProducers(CfgNode * Node)6982 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
6983 for (Inst &Instr : Node->getInsts()) {
6984 // Check whether Instr is a valid producer.
6985 Variable *Dest = Instr.getDest();
6986 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6987 && Dest // only instructions with an actual dest var; and
6988 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6989 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6990 KnownComputations.emplace(Dest->getIndex(),
6991 ComputationEntry(&Instr, IceType_i1));
6992 }
6993 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6994 && Dest // only instructions with an actual dest var; and
6995 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
6996 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6997 KnownComputations.emplace(Dest->getIndex(),
6998 ComputationEntry(&Instr, Dest->getType()));
6999 }
7000 if (!Instr.isDeleted() // only consider non-deleted instructions; and
7001 && Dest // only instructions with an actual dest var; and
7002 && Dest->getType() == IceType_i32 // i32 only dest vars; and
7003 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7004 KnownComputations.emplace(Dest->getIndex(),
7005 ComputationEntry(&Instr, IceType_i32));
7006 }
7007 // Check each src variable against the map.
7008 FOREACH_VAR_IN_INST(Var, Instr) {
7009 SizeT VarNum = Var->getIndex();
7010 auto ComputationIter = KnownComputations.find(VarNum);
7011 if (ComputationIter == KnownComputations.end()) {
7012 continue;
7013 }
7014
7015 ++ComputationIter->second.NumUses;
7016 switch (ComputationIter->second.ComputationType) {
7017 default:
7018 KnownComputations.erase(VarNum);
7019 continue;
7020 case IceType_i1:
7021 if (!BoolFolding::isValidConsumer(Instr)) {
7022 KnownComputations.erase(VarNum);
7023 continue;
7024 }
7025 break;
7026 case IceType_i32:
7027 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
7028 KnownComputations.erase(VarNum);
7029 continue;
7030 }
7031 break;
7032 case IceType_f32:
7033 case IceType_f64:
7034 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
7035 KnownComputations.erase(VarNum);
7036 continue;
7037 }
7038 break;
7039 }
7040
7041 if (Instr.isLastUse(Var)) {
7042 ComputationIter->second.IsLiveOut = false;
7043 }
7044 }
7045 }
7046
7047 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
7048 Iter != End;) {
7049 // Disable the folding if its dest may be live beyond this block.
7050 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
7051 Iter = KnownComputations.erase(Iter);
7052 continue;
7053 }
7054
7055 // Mark as "dead" rather than outright deleting. This is so that other
7056 // peephole style optimizations during or before lowering have access to
7057 // this instruction in undeleted form. See for example
7058 // tryOptimizedCmpxchgCmpBr().
7059 Iter->second.Instr->setDead();
7060 ++Iter;
7061 }
7062 }
7063
Sandboxer(TargetARM32 * Target,InstBundleLock::Option BundleOption)7064 TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target,
7065 InstBundleLock::Option BundleOption)
7066 : Target(Target), BundleOption(BundleOption) {}
7067
~Sandboxer()7068 TargetARM32::Sandboxer::~Sandboxer() {}
7069
7070 namespace {
indirectBranchBicMask(Cfg * Func)7071 OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) {
7072 constexpr uint32_t Imm8 = 0xFC; // 0xC000000F
7073 constexpr uint32_t RotateAmt = 2;
7074 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7075 }
7076
memOpBicMask(Cfg * Func)7077 OperandARM32FlexImm *memOpBicMask(Cfg *Func) {
7078 constexpr uint32_t Imm8 = 0x0C; // 0xC0000000
7079 constexpr uint32_t RotateAmt = 2;
7080 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7081 }
7082
baseNeedsBic(Variable * Base)7083 static bool baseNeedsBic(Variable *Base) {
7084 return Base->getRegNum() != RegARM32::Reg_r9 &&
7085 Base->getRegNum() != RegARM32::Reg_sp;
7086 }
7087 } // end of anonymous namespace
7088
createAutoBundle()7089 void TargetARM32::Sandboxer::createAutoBundle() {
7090 Bundler = makeUnique<AutoBundle>(Target, BundleOption);
7091 }
7092
add_sp(Operand * AddAmount)7093 void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) {
7094 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7095 if (!Target->NeedSandboxing) {
7096 Target->_add(SP, SP, AddAmount);
7097 return;
7098 }
7099 createAutoBundle();
7100 Target->_add(SP, SP, AddAmount);
7101 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7102 }
7103
align_sp(size_t Alignment)7104 void TargetARM32::Sandboxer::align_sp(size_t Alignment) {
7105 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7106 if (!Target->NeedSandboxing) {
7107 Target->alignRegisterPow2(SP, Alignment);
7108 return;
7109 }
7110 createAutoBundle();
7111 Target->alignRegisterPow2(SP, Alignment);
7112 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7113 }
7114
bl(Variable * ReturnReg,Operand * CallTarget)7115 InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg,
7116 Operand *CallTarget) {
7117 if (Target->NeedSandboxing) {
7118 createAutoBundle();
7119 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
7120 Target->_bic(CallTargetR, CallTargetR,
7121 indirectBranchBicMask(Target->Func));
7122 }
7123 }
7124 return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget);
7125 }
7126
ldr(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7127 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem,
7128 CondARM32::Cond Pred) {
7129 Variable *MemBase = Mem->getBase();
7130 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7131 createAutoBundle();
7132 assert(!Mem->isRegReg());
7133 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7134 }
7135 Target->_ldr(Dest, Mem, Pred);
7136 }
7137
ldrex(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7138 void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem,
7139 CondARM32::Cond Pred) {
7140 Variable *MemBase = Mem->getBase();
7141 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7142 createAutoBundle();
7143 assert(!Mem->isRegReg());
7144 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7145 }
7146 Target->_ldrex(Dest, Mem, Pred);
7147 }
7148
reset_sp(Variable * Src)7149 void TargetARM32::Sandboxer::reset_sp(Variable *Src) {
7150 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7151 if (!Target->NeedSandboxing) {
7152 Target->_mov_redefined(SP, Src);
7153 return;
7154 }
7155 createAutoBundle();
7156 Target->_mov_redefined(SP, Src);
7157 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7158 }
7159
ret(Variable * RetAddr,Variable * RetValue)7160 void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
7161 if (Target->NeedSandboxing) {
7162 createAutoBundle();
7163 Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func));
7164 }
7165 Target->_ret(RetAddr, RetValue);
7166 }
7167
str(Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7168 void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem,
7169 CondARM32::Cond Pred) {
7170 Variable *MemBase = Mem->getBase();
7171 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7172 createAutoBundle();
7173 assert(!Mem->isRegReg());
7174 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7175 }
7176 Target->_str(Src, Mem, Pred);
7177 }
7178
strex(Variable * Dest,Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7179 void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src,
7180 OperandARM32Mem *Mem, CondARM32::Cond Pred) {
7181 Variable *MemBase = Mem->getBase();
7182 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7183 createAutoBundle();
7184 assert(!Mem->isRegReg());
7185 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7186 }
7187 Target->_strex(Dest, Src, Mem, Pred);
7188 }
7189
sub_sp(Operand * SubAmount)7190 void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) {
7191 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7192 if (!Target->NeedSandboxing) {
7193 Target->_sub(SP, SP, SubAmount);
7194 return;
7195 }
7196 createAutoBundle();
7197 Target->_sub(SP, SP, SubAmount);
7198 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7199 }
7200
TargetDataARM32(GlobalContext * Ctx)7201 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
7202 : TargetDataLowering(Ctx) {}
7203
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)7204 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
7205 const std::string &SectionSuffix) {
7206 const bool IsPIC = getFlags().getUseNonsfi();
7207 switch (getFlags().getOutFileType()) {
7208 case FT_Elf: {
7209 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7210 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
7211 IsPIC);
7212 } break;
7213 case FT_Asm:
7214 case FT_Iasm: {
7215 OstreamLocker _(Ctx);
7216 for (const VariableDeclaration *Var : Vars) {
7217 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
7218 emitGlobal(*Var, SectionSuffix);
7219 }
7220 }
7221 } break;
7222 }
7223 }
7224
7225 namespace {
7226 template <typename T> struct ConstantPoolEmitterTraits;
7227
7228 static_assert(sizeof(uint64_t) == 8,
7229 "uint64_t is supposed to be 8 bytes wide.");
7230
7231 // TODO(jpp): implement the following when implementing constant randomization:
7232 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
7233 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
7234 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
7235 template <> struct ConstantPoolEmitterTraits<float> {
7236 using ConstantType = ConstantFloat;
7237 static constexpr Type IceType = IceType_f32;
7238 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
7239 // about them being constexpr.
7240 static const char AsmTag[];
7241 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anone162c0691e11::ConstantPoolEmitterTraits7242 static uint64_t bitcastToUint64(float Value) {
7243 static_assert(sizeof(Value) == sizeof(uint32_t),
7244 "Float should be 4 bytes.");
7245 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
7246 return static_cast<uint64_t>(IntValue);
7247 }
7248 };
7249 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
7250 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
7251
7252 template <> struct ConstantPoolEmitterTraits<double> {
7253 using ConstantType = ConstantDouble;
7254 static constexpr Type IceType = IceType_f64;
7255 static const char AsmTag[];
7256 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anone162c0691e11::ConstantPoolEmitterTraits7257 static uint64_t bitcastToUint64(double Value) {
7258 static_assert(sizeof(double) == sizeof(uint64_t),
7259 "Double should be 8 bytes.");
7260 return Utils::bitCopy<uint64_t>(Value);
7261 }
7262 };
7263 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
7264 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
7265
7266 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)7267 void emitConstant(
7268 Ostream &Str,
7269 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
7270 using Traits = ConstantPoolEmitterTraits<T>;
7271 Str << Const->getLabelName();
7272 Str << ":\n\t" << Traits::AsmTag << "\t0x";
7273 T Value = Const->getValue();
7274 Str.write_hex(Traits::bitcastToUint64(Value));
7275 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
7276 }
7277
emitConstantPool(GlobalContext * Ctx)7278 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
7279 if (!BuildDefs::dump()) {
7280 return;
7281 }
7282
7283 using Traits = ConstantPoolEmitterTraits<T>;
7284 static constexpr size_t MinimumAlignment = 4;
7285 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
7286 assert((Align % 4) == 0 && "Constants should be aligned");
7287 Ostream &Str = Ctx->getStrEmit();
7288 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
7289
7290 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
7291 << "\n"
7292 << "\t.align\t" << Align << "\n";
7293
7294 for (Constant *C : Pool) {
7295 if (!C->getShouldBePooled()) {
7296 continue;
7297 }
7298
7299 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
7300 }
7301 }
7302 } // end of anonymous namespace
7303
lowerConstants()7304 void TargetDataARM32::lowerConstants() {
7305 if (getFlags().getDisableTranslation())
7306 return;
7307 switch (getFlags().getOutFileType()) {
7308 case FT_Elf: {
7309 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7310 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7311 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7312 } break;
7313 case FT_Asm:
7314 case FT_Iasm: {
7315 OstreamLocker _(Ctx);
7316 emitConstantPool<float>(Ctx);
7317 emitConstantPool<double>(Ctx);
7318 break;
7319 }
7320 }
7321 }
7322
lowerJumpTables()7323 void TargetDataARM32::lowerJumpTables() {
7324 if (getFlags().getDisableTranslation())
7325 return;
7326 switch (getFlags().getOutFileType()) {
7327 case FT_Elf:
7328 if (!Ctx->getJumpTables().empty()) {
7329 llvm::report_fatal_error("ARM32 does not support jump tables yet.");
7330 }
7331 break;
7332 case FT_Asm:
7333 // Already emitted from Cfg
7334 break;
7335 case FT_Iasm: {
7336 // TODO(kschimpf): Fill this in when we get more information.
7337 break;
7338 }
7339 }
7340 }
7341
TargetHeaderARM32(GlobalContext * Ctx)7342 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
7343 : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
7344
lower()7345 void TargetHeaderARM32::lower() {
7346 OstreamLocker _(Ctx);
7347 Ostream &Str = Ctx->getStrEmit();
7348 Str << ".syntax unified\n";
7349 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
7350 // "Addenda to, and Errata in the ABI for the ARM architecture"
7351 // http://infocenter.arm.com
7352 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
7353 //
7354 // Tag_conformance should be be emitted first in a file-scope sub-subsection
7355 // of the first public subsection of the attributes.
7356 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
7357 // Chromebooks are at least A15, but do A9 for higher compat. For some
7358 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
7359 // specified on the commandline. So to test hwdiv, we need to set the .cpu
7360 // directive higher (can't just rely on --mattr=...).
7361 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7362 Str << ".cpu cortex-a15\n";
7363 } else {
7364 Str << ".cpu cortex-a9\n";
7365 }
7366 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
7367 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
7368 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
7369 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
7370 Str << ".fpu neon\n"
7371 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
7372 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
7373 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
7374 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
7375 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
7376 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
7377 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
7378 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
7379 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
7380 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
7381 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
7382 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
7383 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7384 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
7385 }
7386 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
7387 // However, for compatibility with current NaCl LLVM, don't claim that.
7388 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
7389 }
7390
7391 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
7392 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
7393 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
7394
7395 } // end of namespace ARM32
7396 } // end of namespace Ice
7397