1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringX8632 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "IceTargetLoweringX8632.h"
17
18 #include "IceTargetLoweringX8632Traits.h"
19
20 #if defined(SUBZERO_USE_MICROSOFT_ABI)
21 extern "C" void _chkstk();
22 #endif
23
24 namespace X8632 {
createTargetLowering(::Ice::Cfg * Func)25 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
26 return ::Ice::X8632::TargetX8632::create(Func);
27 }
28
29 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)30 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
31 return ::Ice::X8632::TargetDataX86<::Ice::X8632::TargetX8632Traits>::create(
32 Ctx);
33 }
34
35 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)36 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
37 return ::Ice::X8632::TargetHeaderX86::create(Ctx);
38 }
39
staticInit(::Ice::GlobalContext * Ctx)40 void staticInit(::Ice::GlobalContext *Ctx) {
41 ::Ice::X8632::TargetX8632::staticInit(Ctx);
42 if (Ice::getFlags().getUseNonsfi()) {
43 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
44 // globals. The GOT is an external symbol (i.e., it is not defined in the
45 // pexe) so we need to register it as such so that ELF emission won't barf
46 // on an "unknown" symbol. The GOT is added to the External symbols list
47 // here because staticInit() is invoked in a single-thread context.
48 Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
49 }
50 }
51
shouldBePooled(const class::Ice::Constant * C)52 bool shouldBePooled(const class ::Ice::Constant *C) {
53 return ::Ice::X8632::TargetX8632::shouldBePooled(C);
54 }
55
getPointerType()56 ::Ice::Type getPointerType() {
57 return ::Ice::X8632::TargetX8632::getPointerType();
58 }
59
60 } // end of namespace X8632
61
62 namespace Ice {
63 namespace X8632 {
64
65 //------------------------------------------------------------------------------
66 // ______ ______ ______ __ ______ ______
67 // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
68 // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
69 // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
70 // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
71 //
72 //------------------------------------------------------------------------------
73 const TargetX8632Traits::TableFcmpType TargetX8632Traits::TableFcmp[] = {
74 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
75 { \
76 dflt, swapS, X8632::Traits::Cond::C1, X8632::Traits::Cond::C2, swapV, \
77 X8632::Traits::Cond::pred \
78 } \
79 ,
80 FCMPX8632_TABLE
81 #undef X
82 };
83
84 const size_t TargetX8632Traits::TableFcmpSize = llvm::array_lengthof(TableFcmp);
85
86 const TargetX8632Traits::TableIcmp32Type TargetX8632Traits::TableIcmp32[] = {
87 #define X(val, C_32, C1_64, C2_64, C3_64) \
88 { X8632::Traits::Cond::C_32 } \
89 ,
90 ICMPX8632_TABLE
91 #undef X
92 };
93
94 const size_t TargetX8632Traits::TableIcmp32Size =
95 llvm::array_lengthof(TableIcmp32);
96
97 const TargetX8632Traits::TableIcmp64Type TargetX8632Traits::TableIcmp64[] = {
98 #define X(val, C_32, C1_64, C2_64, C3_64) \
99 { \
100 X8632::Traits::Cond::C1_64, X8632::Traits::Cond::C2_64, \
101 X8632::Traits::Cond::C3_64 \
102 } \
103 ,
104 ICMPX8632_TABLE
105 #undef X
106 };
107
108 const size_t TargetX8632Traits::TableIcmp64Size =
109 llvm::array_lengthof(TableIcmp64);
110
111 const TargetX8632Traits::TableTypeX8632AttributesType
112 TargetX8632Traits::TableTypeX8632Attributes[] = {
113 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
114 { IceType_##elty } \
115 ,
116 ICETYPEX8632_TABLE
117 #undef X
118 };
119
120 const size_t TargetX8632Traits::TableTypeX8632AttributesSize =
121 llvm::array_lengthof(TableTypeX8632Attributes);
122
123 #if defined(SUBZERO_USE_MICROSOFT_ABI)
124 // Windows 32-bit only guarantees 4 byte stack alignment
125 const uint32_t TargetX8632Traits::X86_STACK_ALIGNMENT_BYTES = 4;
126 #else
127 const uint32_t TargetX8632Traits::X86_STACK_ALIGNMENT_BYTES = 16;
128 #endif
129 const char *TargetX8632Traits::TargetName = "X8632";
130
131 template <>
132 std::array<SmallBitVector, RCX86_NUM>
133 TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}};
134
135 template <>
136 std::array<SmallBitVector, RCX86_NUM>
137 TargetX86Base<X8632::Traits>::TypeToRegisterSetUnfiltered = {{}};
138
139 template <>
140 std::array<SmallBitVector,
141 TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM>
142 TargetX86Base<X8632::Traits>::RegisterAliases = {{}};
143
144 template <>
145 FixupKind TargetX86Base<X8632::Traits>::PcRelFixup =
146 TargetX86Base<X8632::Traits>::Traits::FK_PcRel;
147
148 template <>
149 FixupKind TargetX86Base<X8632::Traits>::AbsFixup =
150 TargetX86Base<X8632::Traits>::Traits::FK_Abs;
151
152 //------------------------------------------------------------------------------
153 // __ ______ __ __ ______ ______ __ __ __ ______
154 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
155 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
156 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
157 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
158 //
159 //------------------------------------------------------------------------------
_add_sp(Operand * Adjustment)160 void TargetX8632::_add_sp(Operand *Adjustment) {
161 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
162 _add(esp, Adjustment);
163 }
164
_mov_sp(Operand * NewValue)165 void TargetX8632::_mov_sp(Operand *NewValue) {
166 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
167 _redefined(_mov(esp, NewValue));
168 }
169
_sandbox_mem_reference(X86OperandMem * Mem)170 Traits::X86OperandMem *TargetX8632::_sandbox_mem_reference(X86OperandMem *Mem) {
171 switch (SandboxingType) {
172 case ST_None:
173 case ST_NaCl:
174 return Mem;
175 case ST_Nonsfi: {
176 if (Mem->getIsRebased()) {
177 return Mem;
178 }
179 // For Non-SFI mode, if the Offset field is a ConstantRelocatable, we
180 // replace either Base or Index with a legalized RebasePtr. At emission
181 // time, the ConstantRelocatable will be emitted with the @GOTOFF
182 // relocation.
183 if (llvm::dyn_cast_or_null<ConstantRelocatable>(Mem->getOffset()) ==
184 nullptr) {
185 return Mem;
186 }
187 Variable *T;
188 uint16_t Shift = 0;
189 if (Mem->getIndex() == nullptr) {
190 T = Mem->getBase();
191 } else if (Mem->getBase() == nullptr) {
192 T = Mem->getIndex();
193 Shift = Mem->getShift();
194 } else {
195 llvm::report_fatal_error(
196 "Either Base or Index must be unused in Non-SFI mode");
197 }
198 Variable *RebasePtrR = legalizeToReg(RebasePtr);
199 static constexpr bool IsRebased = true;
200 return Traits::X86OperandMem::create(
201 Func, Mem->getType(), RebasePtrR, Mem->getOffset(), T, Shift,
202 Traits::X86OperandMem::DefaultSegment, IsRebased);
203 }
204 }
205 llvm::report_fatal_error("Unhandled sandboxing type: " +
206 std::to_string(SandboxingType));
207 }
208
_sub_sp(Operand * Adjustment)209 void TargetX8632::_sub_sp(Operand *Adjustment) {
210 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
211 _sub(esp, Adjustment);
212 // Add a fake use of the stack pointer, to prevent the stack pointer adustment
213 // from being dead-code eliminated in a function that doesn't return.
214 Context.insert<InstFakeUse>(esp);
215 }
216
_link_bp()217 void TargetX8632::_link_bp() {
218 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
219 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
220 _push(ebp);
221 _mov(ebp, esp);
222 // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
223 Context.insert<InstFakeUse>(ebp);
224 }
225
_unlink_bp()226 void TargetX8632::_unlink_bp() {
227 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
228 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
229 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
230 // use of esp before the assignment of esp=ebp keeps previous esp
231 // adjustments from being dead-code eliminated.
232 Context.insert<InstFakeUse>(esp);
233 _mov(esp, ebp);
234 _pop(ebp);
235 }
236
_push_reg(RegNumT RegNum)237 void TargetX8632::_push_reg(RegNumT RegNum) {
238 _push(getPhysicalRegister(RegNum, Traits::WordType));
239 }
240
_pop_reg(RegNumT RegNum)241 void TargetX8632::_pop_reg(RegNumT RegNum) {
242 _pop(getPhysicalRegister(RegNum, Traits::WordType));
243 }
244
emitGetIP(CfgNode * Node)245 void TargetX8632::emitGetIP(CfgNode *Node) {
246 // If there is a non-deleted InstX86GetIP instruction, we need to move it to
247 // the point after the stack frame has stabilized but before
248 // register-allocated in-args are copied into their home registers. It would
249 // be slightly faster to search for the GetIP instruction before other prolog
250 // instructions are inserted, but it's more clear to do the whole
251 // transformation in a single place.
252 Traits::Insts::GetIP *GetIPInst = nullptr;
253 if (getFlags().getUseNonsfi()) {
254 for (Inst &Instr : Node->getInsts()) {
255 if (auto *GetIP = llvm::dyn_cast<Traits::Insts::GetIP>(&Instr)) {
256 if (!Instr.isDeleted())
257 GetIPInst = GetIP;
258 break;
259 }
260 }
261 }
262 // Delete any existing InstX86GetIP instruction and reinsert it here. Also,
263 // insert the call to the helper function and the spill to the stack, to
264 // simplify emission.
265 if (GetIPInst) {
266 GetIPInst->setDeleted();
267 Variable *Dest = GetIPInst->getDest();
268 Variable *CallDest =
269 Dest->hasReg() ? Dest
270 : getPhysicalRegister(Traits::RegisterSet::Reg_eax);
271 auto *BeforeAddReloc = RelocOffset::create(Ctx);
272 BeforeAddReloc->setSubtract(true);
273 auto *BeforeAdd = InstX86Label::create(Func, this);
274 BeforeAdd->setRelocOffset(BeforeAddReloc);
275
276 auto *AfterAddReloc = RelocOffset::create(Ctx);
277 auto *AfterAdd = InstX86Label::create(Func, this);
278 AfterAdd->setRelocOffset(AfterAddReloc);
279
280 const RelocOffsetT ImmSize = -typeWidthInBytes(IceType_i32);
281
282 auto *GotFromPc =
283 llvm::cast<ConstantRelocatable>(Ctx->getConstantSymWithEmitString(
284 ImmSize, {AfterAddReloc, BeforeAddReloc},
285 Ctx->getGlobalString(GlobalOffsetTable), GlobalOffsetTable));
286
287 // Insert a new version of InstX86GetIP.
288 Context.insert<Traits::Insts::GetIP>(CallDest);
289
290 Context.insert(BeforeAdd);
291 _add(CallDest, GotFromPc);
292 Context.insert(AfterAdd);
293
294 // Spill the register to its home stack location if necessary.
295 if (Dest != CallDest) {
296 _mov(Dest, CallDest);
297 }
298 }
299 }
300
lowerIndirectJump(Variable * JumpTarget)301 void TargetX8632::lowerIndirectJump(Variable *JumpTarget) {
302 AutoBundle _(this);
303
304 if (NeedSandboxing) {
305 const SizeT BundleSize =
306 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
307 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1)));
308 }
309
310 _jmp(JumpTarget);
311 }
312
initRebasePtr()313 void TargetX8632::initRebasePtr() {
314 if (SandboxingType == ST_Nonsfi) {
315 RebasePtr = Func->makeVariable(IceType_i32);
316 }
317 }
318
initSandbox()319 void TargetX8632::initSandbox() {
320 if (SandboxingType != ST_Nonsfi) {
321 return;
322 }
323 // Insert the RebasePtr assignment as the very first lowered instruction.
324 // Later, it will be moved into the right place - after the stack frame is set
325 // up but before in-args are copied into registers.
326 Context.init(Func->getEntryNode());
327 Context.setInsertPoint(Context.getCur());
328 Context.insert<Traits::Insts::GetIP>(RebasePtr);
329 }
330
legalizeOptAddrForSandbox(OptAddr * Addr)331 bool TargetX8632::legalizeOptAddrForSandbox(OptAddr *Addr) {
332 if (Addr->Relocatable == nullptr || SandboxingType != ST_Nonsfi) {
333 return true;
334 }
335
336 if (Addr->Base == RebasePtr || Addr->Index == RebasePtr) {
337 return true;
338 }
339
340 if (Addr->Base == nullptr) {
341 Addr->Base = RebasePtr;
342 return true;
343 }
344
345 if (Addr->Index == nullptr) {
346 Addr->Index = RebasePtr;
347 Addr->Shift = 0;
348 return true;
349 }
350
351 return false;
352 }
353
emitCallToTarget(Operand * CallTarget,Variable * ReturnReg,size_t NumVariadicFpArgs)354 Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg,
355 size_t NumVariadicFpArgs) {
356 (void)NumVariadicFpArgs;
357 // Note that NumVariadicFpArgs is only used for System V x86-64 variadic
358 // calls, because floating point arguments are passed via vector registers,
359 // whereas for x86-32, all args are passed via the stack.
360
361 std::unique_ptr<AutoBundle> Bundle;
362 if (NeedSandboxing) {
363 if (llvm::isa<Constant>(CallTarget)) {
364 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
365 } else {
366 Variable *CallTargetVar = nullptr;
367 _mov(CallTargetVar, CallTarget);
368 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
369 const SizeT BundleSize =
370 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
371 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
372 CallTarget = CallTargetVar;
373 }
374 }
375 return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
376 }
377
moveReturnValueToRegister(Operand * Value,Type ReturnType)378 Variable *TargetX8632::moveReturnValueToRegister(Operand *Value,
379 Type ReturnType) {
380 if (isVectorType(ReturnType)) {
381 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
382 } else if (isScalarFloatingType(ReturnType)) {
383 _fld(Value);
384 return nullptr;
385 } else {
386 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
387 if (ReturnType == IceType_i64) {
388 Variable *eax =
389 legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax);
390 Variable *edx =
391 legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx);
392 Context.insert<InstFakeUse>(edx);
393 return eax;
394 } else {
395 Variable *Reg = nullptr;
396 _mov(Reg, Value, Traits::RegisterSet::Reg_eax);
397 return Reg;
398 }
399 }
400 }
401
emitSandboxedReturn()402 void TargetX8632::emitSandboxedReturn() {
403 // Change the original ret instruction into a sandboxed return sequence.
404 // t:ecx = pop
405 // bundle_lock
406 // and t, ~31
407 // jmp *t
408 // bundle_unlock
409 // FakeUse <original_ret_operand>
410 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
411 _pop(T_ecx);
412 lowerIndirectJump(T_ecx);
413 }
414
emitStackProbe(size_t StackSizeBytes)415 void TargetX8632::emitStackProbe(size_t StackSizeBytes) {
416 #if defined(SUBZERO_USE_MICROSOFT_ABI)
417 if (StackSizeBytes >= 4096) {
418 // _chkstk on Win32 is actually __alloca_probe, which adjusts ESP by the
419 // stack amount specified in EAX, so we save ESP in ECX, and restore them
420 // both after the call.
421
422 Variable *EAX = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
423 Variable *ESP = makeReg(IceType_i32, Traits::RegisterSet::Reg_esp);
424 Variable *ECX = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
425
426 _push_reg(ECX->getRegNum());
427 _mov(ECX, ESP);
428
429 _mov(EAX, Ctx->getConstantInt32(StackSizeBytes));
430
431 auto *CallTarget =
432 Ctx->getConstantInt32(reinterpret_cast<int32_t>(&_chkstk));
433 emitCallToTarget(CallTarget, nullptr);
434
435 _mov(ESP, ECX);
436 _pop_reg(ECX->getRegNum());
437 }
438 #endif
439 }
440
441 // In some cases, there are x-macros tables for both high-level and low-level
442 // instructions/operands that use the same enum key value. The tables are kept
443 // separate to maintain a proper separation between abstraction layers. There
444 // is a risk that the tables could get out of sync if enum values are reordered
445 // or if entries are added or deleted. The following dummy namespaces use
446 // static_asserts to ensure everything is kept in sync.
447
448 namespace {
449 // Validate the enum values in FCMPX8632_TABLE.
450 namespace dummy1 {
451 // Define a temporary set of enum values based on low-level table entries.
452 enum _tmp_enum {
453 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
454 FCMPX8632_TABLE
455 #undef X
456 _num
457 };
458 // Define a set of constants based on high-level table entries.
459 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
460 ICEINSTFCMP_TABLE
461 #undef X
462 // Define a set of constants based on low-level table entries, and ensure the
463 // table entry keys are consistent.
464 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
465 static const int _table2_##val = _tmp_##val; \
466 static_assert( \
467 _table1_##val == _table2_##val, \
468 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
469 FCMPX8632_TABLE
470 #undef X
471 // Repeat the static asserts with respect to the high-level table entries in
472 // case the high-level table has extra entries.
473 #define X(tag, str) \
474 static_assert( \
475 _table1_##tag == _table2_##tag, \
476 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
477 ICEINSTFCMP_TABLE
478 #undef X
479 } // end of namespace dummy1
480
481 // Validate the enum values in ICMPX8632_TABLE.
482 namespace dummy2 {
483 // Define a temporary set of enum values based on low-level table entries.
484 enum _tmp_enum {
485 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
486 ICMPX8632_TABLE
487 #undef X
488 _num
489 };
490 // Define a set of constants based on high-level table entries.
491 #define X(tag, reverse, str) static const int _table1_##tag = InstIcmp::tag;
492 ICEINSTICMP_TABLE
493 #undef X
494 // Define a set of constants based on low-level table entries, and ensure the
495 // table entry keys are consistent.
496 #define X(val, C_32, C1_64, C2_64, C3_64) \
497 static const int _table2_##val = _tmp_##val; \
498 static_assert( \
499 _table1_##val == _table2_##val, \
500 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
501 ICMPX8632_TABLE
502 #undef X
503 // Repeat the static asserts with respect to the high-level table entries in
504 // case the high-level table has extra entries.
505 #define X(tag, reverse, str) \
506 static_assert( \
507 _table1_##tag == _table2_##tag, \
508 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
509 ICEINSTICMP_TABLE
510 #undef X
511 } // end of namespace dummy2
512
513 // Validate the enum values in ICETYPEX8632_TABLE.
514 namespace dummy3 {
515 // Define a temporary set of enum values based on low-level table entries.
516 enum _tmp_enum {
517 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
518 _tmp_##tag,
519 ICETYPEX8632_TABLE
520 #undef X
521 _num
522 };
523 // Define a set of constants based on high-level table entries.
524 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
525 static const int _table1_##tag = IceType_##tag;
526 ICETYPE_TABLE
527 #undef X
528 // Define a set of constants based on low-level table entries, and ensure the
529 // table entry keys are consistent.
530 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
531 static const int _table2_##tag = _tmp_##tag; \
532 static_assert(_table1_##tag == _table2_##tag, \
533 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
534 ICETYPEX8632_TABLE
535 #undef X
536 // Repeat the static asserts with respect to the high-level table entries in
537 // case the high-level table has extra entries.
538 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
539 static_assert(_table1_##tag == _table2_##tag, \
540 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
541 ICETYPE_TABLE
542 #undef X
543 } // end of namespace dummy3
544 } // end of anonymous namespace
545
546 } // end of namespace X8632
547 } // end of namespace Ice
548