1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringX8632 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "IceTargetLoweringX8632.h"
17
18 #include "IceTargetLoweringX8632Traits.h"
19
20 #if defined(SUBZERO_USE_MICROSOFT_ABI)
21 extern "C" void _chkstk();
22 #endif
23
24 namespace X8632 {
createTargetLowering(::Ice::Cfg * Func)25 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
26 return ::Ice::X8632::TargetX8632::create(Func);
27 }
28
29 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)30 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
31 return ::Ice::X8632::TargetDataX86<::Ice::X8632::TargetX8632Traits>::create(
32 Ctx);
33 }
34
35 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)36 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
37 return ::Ice::X8632::TargetHeaderX86::create(Ctx);
38 }
39
staticInit(::Ice::GlobalContext * Ctx)40 void staticInit(::Ice::GlobalContext *Ctx) {
41 ::Ice::X8632::TargetX8632::staticInit(Ctx);
42 if (Ice::getFlags().getUseNonsfi()) {
43 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
44 // globals. The GOT is an external symbol (i.e., it is not defined in the
45 // pexe) so we need to register it as such so that ELF emission won't barf
46 // on an "unknown" symbol. The GOT is added to the External symbols list
47 // here because staticInit() is invoked in a single-thread context.
48 Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
49 }
50 }
51
shouldBePooled(const class::Ice::Constant * C)52 bool shouldBePooled(const class ::Ice::Constant *C) {
53 return ::Ice::X8632::TargetX8632::shouldBePooled(C);
54 }
55
getPointerType()56 ::Ice::Type getPointerType() {
57 return ::Ice::X8632::TargetX8632::getPointerType();
58 }
59
60 } // end of namespace X8632
61
62 namespace Ice {
63 namespace X8632 {
64
65 //------------------------------------------------------------------------------
66 // ______ ______ ______ __ ______ ______
67 // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
68 // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
69 // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
70 // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
71 //
72 //------------------------------------------------------------------------------
73 const TargetX8632Traits::TableFcmpType TargetX8632Traits::TableFcmp[] = {
74 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
75 {dflt, \
76 swapS, \
77 X8632::Traits::Cond::C1, \
78 X8632::Traits::Cond::C2, \
79 swapV, \
80 X8632::Traits::Cond::pred},
81 FCMPX8632_TABLE
82 #undef X
83 };
84
85 const size_t TargetX8632Traits::TableFcmpSize = llvm::array_lengthof(TableFcmp);
86
87 const TargetX8632Traits::TableIcmp32Type TargetX8632Traits::TableIcmp32[] = {
88 #define X(val, C_32, C1_64, C2_64, C3_64) {X8632::Traits::Cond::C_32},
89 ICMPX8632_TABLE
90 #undef X
91 };
92
93 const size_t TargetX8632Traits::TableIcmp32Size =
94 llvm::array_lengthof(TableIcmp32);
95
96 const TargetX8632Traits::TableIcmp64Type TargetX8632Traits::TableIcmp64[] = {
97 #define X(val, C_32, C1_64, C2_64, C3_64) \
98 {X8632::Traits::Cond::C1_64, X8632::Traits::Cond::C2_64, \
99 X8632::Traits::Cond::C3_64},
100 ICMPX8632_TABLE
101 #undef X
102 };
103
104 const size_t TargetX8632Traits::TableIcmp64Size =
105 llvm::array_lengthof(TableIcmp64);
106
107 const TargetX8632Traits::TableTypeX8632AttributesType
108 TargetX8632Traits::TableTypeX8632Attributes[] = {
109 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
110 {IceType_##elty},
111 ICETYPEX8632_TABLE
112 #undef X
113 };
114
115 const size_t TargetX8632Traits::TableTypeX8632AttributesSize =
116 llvm::array_lengthof(TableTypeX8632Attributes);
117
118 #if defined(SUBZERO_USE_MICROSOFT_ABI)
119 // Windows 32-bit only guarantees 4 byte stack alignment
120 const uint32_t TargetX8632Traits::X86_STACK_ALIGNMENT_BYTES = 4;
121 #else
122 const uint32_t TargetX8632Traits::X86_STACK_ALIGNMENT_BYTES = 16;
123 #endif
124 const char *TargetX8632Traits::TargetName = "X8632";
125
126 template <>
127 std::array<SmallBitVector, RCX86_NUM>
128 TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}};
129
130 template <>
131 std::array<SmallBitVector, RCX86_NUM>
132 TargetX86Base<X8632::Traits>::TypeToRegisterSetUnfiltered = {{}};
133
134 template <>
135 std::array<SmallBitVector,
136 TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM>
137 TargetX86Base<X8632::Traits>::RegisterAliases = {{}};
138
139 template <>
140 FixupKind TargetX86Base<X8632::Traits>::PcRelFixup =
141 TargetX86Base<X8632::Traits>::Traits::FK_PcRel;
142
143 template <>
144 FixupKind TargetX86Base<X8632::Traits>::AbsFixup =
145 TargetX86Base<X8632::Traits>::Traits::FK_Abs;
146
147 //------------------------------------------------------------------------------
148 // __ ______ __ __ ______ ______ __ __ __ ______
149 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
150 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
151 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
152 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
153 //
154 //------------------------------------------------------------------------------
_add_sp(Operand * Adjustment)155 void TargetX8632::_add_sp(Operand *Adjustment) {
156 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
157 _add(esp, Adjustment);
158 }
159
_mov_sp(Operand * NewValue)160 void TargetX8632::_mov_sp(Operand *NewValue) {
161 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
162 _redefined(_mov(esp, NewValue));
163 }
164
_sandbox_mem_reference(X86OperandMem * Mem)165 Traits::X86OperandMem *TargetX8632::_sandbox_mem_reference(X86OperandMem *Mem) {
166 switch (SandboxingType) {
167 case ST_None:
168 case ST_NaCl:
169 return Mem;
170 case ST_Nonsfi: {
171 if (Mem->getIsRebased()) {
172 return Mem;
173 }
174 // For Non-SFI mode, if the Offset field is a ConstantRelocatable, we
175 // replace either Base or Index with a legalized RebasePtr. At emission
176 // time, the ConstantRelocatable will be emitted with the @GOTOFF
177 // relocation.
178 if (llvm::dyn_cast_or_null<ConstantRelocatable>(Mem->getOffset()) ==
179 nullptr) {
180 return Mem;
181 }
182 Variable *T;
183 uint16_t Shift = 0;
184 if (Mem->getIndex() == nullptr) {
185 T = Mem->getBase();
186 } else if (Mem->getBase() == nullptr) {
187 T = Mem->getIndex();
188 Shift = Mem->getShift();
189 } else {
190 llvm::report_fatal_error(
191 "Either Base or Index must be unused in Non-SFI mode");
192 }
193 Variable *RebasePtrR = legalizeToReg(RebasePtr);
194 static constexpr bool IsRebased = true;
195 return Traits::X86OperandMem::create(
196 Func, Mem->getType(), RebasePtrR, Mem->getOffset(), T, Shift,
197 Traits::X86OperandMem::DefaultSegment, IsRebased);
198 }
199 }
200 llvm::report_fatal_error("Unhandled sandboxing type: " +
201 std::to_string(SandboxingType));
202 }
203
_sub_sp(Operand * Adjustment)204 void TargetX8632::_sub_sp(Operand *Adjustment) {
205 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
206 _sub(esp, Adjustment);
207 // Add a fake use of the stack pointer, to prevent the stack pointer adustment
208 // from being dead-code eliminated in a function that doesn't return.
209 Context.insert<InstFakeUse>(esp);
210 }
211
_link_bp()212 void TargetX8632::_link_bp() {
213 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
214 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
215 _push(ebp);
216 _mov(ebp, esp);
217 // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
218 Context.insert<InstFakeUse>(ebp);
219 }
220
_unlink_bp()221 void TargetX8632::_unlink_bp() {
222 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
223 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
224 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
225 // use of esp before the assignment of esp=ebp keeps previous esp
226 // adjustments from being dead-code eliminated.
227 Context.insert<InstFakeUse>(esp);
228 _mov(esp, ebp);
229 _pop(ebp);
230 }
231
_push_reg(RegNumT RegNum)232 void TargetX8632::_push_reg(RegNumT RegNum) {
233 _push(getPhysicalRegister(RegNum, Traits::WordType));
234 }
235
_pop_reg(RegNumT RegNum)236 void TargetX8632::_pop_reg(RegNumT RegNum) {
237 _pop(getPhysicalRegister(RegNum, Traits::WordType));
238 }
239
emitGetIP(CfgNode * Node)240 void TargetX8632::emitGetIP(CfgNode *Node) {
241 // If there is a non-deleted InstX86GetIP instruction, we need to move it to
242 // the point after the stack frame has stabilized but before
243 // register-allocated in-args are copied into their home registers. It would
244 // be slightly faster to search for the GetIP instruction before other prolog
245 // instructions are inserted, but it's more clear to do the whole
246 // transformation in a single place.
247 Traits::Insts::GetIP *GetIPInst = nullptr;
248 if (getFlags().getUseNonsfi()) {
249 for (Inst &Instr : Node->getInsts()) {
250 if (auto *GetIP = llvm::dyn_cast<Traits::Insts::GetIP>(&Instr)) {
251 if (!Instr.isDeleted())
252 GetIPInst = GetIP;
253 break;
254 }
255 }
256 }
257 // Delete any existing InstX86GetIP instruction and reinsert it here. Also,
258 // insert the call to the helper function and the spill to the stack, to
259 // simplify emission.
260 if (GetIPInst) {
261 GetIPInst->setDeleted();
262 Variable *Dest = GetIPInst->getDest();
263 Variable *CallDest =
264 Dest->hasReg() ? Dest
265 : getPhysicalRegister(Traits::RegisterSet::Reg_eax);
266 auto *BeforeAddReloc = RelocOffset::create(Ctx);
267 BeforeAddReloc->setSubtract(true);
268 auto *BeforeAdd = InstX86Label::create(Func, this);
269 BeforeAdd->setRelocOffset(BeforeAddReloc);
270
271 auto *AfterAddReloc = RelocOffset::create(Ctx);
272 auto *AfterAdd = InstX86Label::create(Func, this);
273 AfterAdd->setRelocOffset(AfterAddReloc);
274
275 const RelocOffsetT ImmSize = -typeWidthInBytes(IceType_i32);
276
277 auto *GotFromPc =
278 llvm::cast<ConstantRelocatable>(Ctx->getConstantSymWithEmitString(
279 ImmSize, {AfterAddReloc, BeforeAddReloc},
280 Ctx->getGlobalString(GlobalOffsetTable), GlobalOffsetTable));
281
282 // Insert a new version of InstX86GetIP.
283 Context.insert<Traits::Insts::GetIP>(CallDest);
284
285 Context.insert(BeforeAdd);
286 _add(CallDest, GotFromPc);
287 Context.insert(AfterAdd);
288
289 // Spill the register to its home stack location if necessary.
290 if (Dest != CallDest) {
291 _mov(Dest, CallDest);
292 }
293 }
294 }
295
lowerIndirectJump(Variable * JumpTarget)296 void TargetX8632::lowerIndirectJump(Variable *JumpTarget) {
297 AutoBundle _(this);
298
299 if (NeedSandboxing) {
300 const SizeT BundleSize =
301 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
302 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1)));
303 }
304
305 _jmp(JumpTarget);
306 }
307
initRebasePtr()308 void TargetX8632::initRebasePtr() {
309 if (SandboxingType == ST_Nonsfi) {
310 RebasePtr = Func->makeVariable(IceType_i32);
311 }
312 }
313
initSandbox()314 void TargetX8632::initSandbox() {
315 if (SandboxingType != ST_Nonsfi) {
316 return;
317 }
318 // Insert the RebasePtr assignment as the very first lowered instruction.
319 // Later, it will be moved into the right place - after the stack frame is set
320 // up but before in-args are copied into registers.
321 Context.init(Func->getEntryNode());
322 Context.setInsertPoint(Context.getCur());
323 Context.insert<Traits::Insts::GetIP>(RebasePtr);
324 }
325
legalizeOptAddrForSandbox(OptAddr * Addr)326 bool TargetX8632::legalizeOptAddrForSandbox(OptAddr *Addr) {
327 if (Addr->Relocatable == nullptr || SandboxingType != ST_Nonsfi) {
328 return true;
329 }
330
331 if (Addr->Base == RebasePtr || Addr->Index == RebasePtr) {
332 return true;
333 }
334
335 if (Addr->Base == nullptr) {
336 Addr->Base = RebasePtr;
337 return true;
338 }
339
340 if (Addr->Index == nullptr) {
341 Addr->Index = RebasePtr;
342 Addr->Shift = 0;
343 return true;
344 }
345
346 return false;
347 }
348
emitCallToTarget(Operand * CallTarget,Variable * ReturnReg,size_t NumVariadicFpArgs)349 Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg,
350 size_t NumVariadicFpArgs) {
351 (void)NumVariadicFpArgs;
352 // Note that NumVariadicFpArgs is only used for System V x86-64 variadic
353 // calls, because floating point arguments are passed via vector registers,
354 // whereas for x86-32, all args are passed via the stack.
355
356 std::unique_ptr<AutoBundle> Bundle;
357 if (NeedSandboxing) {
358 if (llvm::isa<Constant>(CallTarget)) {
359 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
360 } else {
361 Variable *CallTargetVar = nullptr;
362 _mov(CallTargetVar, CallTarget);
363 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
364 const SizeT BundleSize =
365 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
366 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
367 CallTarget = CallTargetVar;
368 }
369 }
370 return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
371 }
372
moveReturnValueToRegister(Operand * Value,Type ReturnType)373 Variable *TargetX8632::moveReturnValueToRegister(Operand *Value,
374 Type ReturnType) {
375 if (isVectorType(ReturnType)) {
376 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
377 } else if (isScalarFloatingType(ReturnType)) {
378 _fld(Value);
379 return nullptr;
380 } else {
381 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
382 if (ReturnType == IceType_i64) {
383 Variable *eax =
384 legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax);
385 Variable *edx =
386 legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx);
387 Context.insert<InstFakeUse>(edx);
388 return eax;
389 } else {
390 Variable *Reg = nullptr;
391 _mov(Reg, Value, Traits::RegisterSet::Reg_eax);
392 return Reg;
393 }
394 }
395 }
396
emitSandboxedReturn()397 void TargetX8632::emitSandboxedReturn() {
398 // Change the original ret instruction into a sandboxed return sequence.
399 // t:ecx = pop
400 // bundle_lock
401 // and t, ~31
402 // jmp *t
403 // bundle_unlock
404 // FakeUse <original_ret_operand>
405 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
406 _pop(T_ecx);
407 lowerIndirectJump(T_ecx);
408 }
409
emitStackProbe(size_t StackSizeBytes)410 void TargetX8632::emitStackProbe(size_t StackSizeBytes) {
411 #if defined(SUBZERO_USE_MICROSOFT_ABI)
412 if (StackSizeBytes >= 4096) {
413 // _chkstk on Win32 is actually __alloca_probe, which adjusts ESP by the
414 // stack amount specified in EAX, so we save ESP in ECX, and restore them
415 // both after the call.
416
417 Variable *EAX = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
418 Variable *ESP = makeReg(IceType_i32, Traits::RegisterSet::Reg_esp);
419 Variable *ECX = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
420
421 _push_reg(ECX->getRegNum());
422 _mov(ECX, ESP);
423
424 _mov(EAX, Ctx->getConstantInt32(StackSizeBytes));
425
426 auto *CallTarget =
427 Ctx->getConstantInt32(reinterpret_cast<int32_t>(&_chkstk));
428 emitCallToTarget(CallTarget, nullptr);
429
430 _mov(ESP, ECX);
431 _pop_reg(ECX->getRegNum());
432 }
433 #endif
434 }
435
436 // In some cases, there are x-macros tables for both high-level and low-level
437 // instructions/operands that use the same enum key value. The tables are kept
438 // separate to maintain a proper separation between abstraction layers. There
439 // is a risk that the tables could get out of sync if enum values are reordered
440 // or if entries are added or deleted. The following dummy namespaces use
441 // static_asserts to ensure everything is kept in sync.
442
443 namespace {
444 // Validate the enum values in FCMPX8632_TABLE.
445 namespace dummy1 {
446 // Define a temporary set of enum values based on low-level table entries.
447 enum _tmp_enum {
448 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
449 FCMPX8632_TABLE
450 #undef X
451 _num
452 };
453 // Define a set of constants based on high-level table entries.
454 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
455 ICEINSTFCMP_TABLE
456 #undef X
457 // Define a set of constants based on low-level table entries, and ensure the
458 // table entry keys are consistent.
459 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
460 static const int _table2_##val = _tmp_##val; \
461 static_assert( \
462 _table1_##val == _table2_##val, \
463 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
464 FCMPX8632_TABLE
465 #undef X
466 // Repeat the static asserts with respect to the high-level table entries in
467 // case the high-level table has extra entries.
468 #define X(tag, str) \
469 static_assert( \
470 _table1_##tag == _table2_##tag, \
471 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
472 ICEINSTFCMP_TABLE
473 #undef X
474 } // end of namespace dummy1
475
476 // Validate the enum values in ICMPX8632_TABLE.
477 namespace dummy2 {
478 // Define a temporary set of enum values based on low-level table entries.
479 enum _tmp_enum {
480 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
481 ICMPX8632_TABLE
482 #undef X
483 _num
484 };
485 // Define a set of constants based on high-level table entries.
486 #define X(tag, reverse, str) static const int _table1_##tag = InstIcmp::tag;
487 ICEINSTICMP_TABLE
488 #undef X
489 // Define a set of constants based on low-level table entries, and ensure the
490 // table entry keys are consistent.
491 #define X(val, C_32, C1_64, C2_64, C3_64) \
492 static const int _table2_##val = _tmp_##val; \
493 static_assert( \
494 _table1_##val == _table2_##val, \
495 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
496 ICMPX8632_TABLE
497 #undef X
498 // Repeat the static asserts with respect to the high-level table entries in
499 // case the high-level table has extra entries.
500 #define X(tag, reverse, str) \
501 static_assert( \
502 _table1_##tag == _table2_##tag, \
503 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
504 ICEINSTICMP_TABLE
505 #undef X
506 } // end of namespace dummy2
507
508 // Validate the enum values in ICETYPEX8632_TABLE.
509 namespace dummy3 {
510 // Define a temporary set of enum values based on low-level table entries.
511 enum _tmp_enum {
512 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
513 _tmp_##tag,
514 ICETYPEX8632_TABLE
515 #undef X
516 _num
517 };
518 // Define a set of constants based on high-level table entries.
519 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
520 static const int _table1_##tag = IceType_##tag;
521 ICETYPE_TABLE
522 #undef X
523 // Define a set of constants based on low-level table entries, and ensure the
524 // table entry keys are consistent.
525 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
526 static const int _table2_##tag = _tmp_##tag; \
527 static_assert(_table1_##tag == _table2_##tag, \
528 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
529 ICETYPEX8632_TABLE
530 #undef X
531 // Repeat the static asserts with respect to the high-level table entries in
532 // case the high-level table has extra entries.
533 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
534 static_assert(_table1_##tag == _table2_##tag, \
535 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
536 ICETYPE_TABLE
537 #undef X
538 } // end of namespace dummy3
539 } // end of anonymous namespace
540
541 } // end of namespace X8632
542 } // end of namespace Ice
543