1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringX8664 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringX8664.h"
16
17 #include "IceDefs.h"
18 #include "IceTargetLoweringX8664Traits.h"
19
20 #if defined(SUBZERO_USE_MICROSOFT_ABI)
21 extern "C" void __chkstk();
22 #endif
23
24 namespace X8664 {
createTargetLowering(::Ice::Cfg * Func)25 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
26 return ::Ice::X8664::TargetX8664::create(Func);
27 }
28
29 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)30 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
31 return ::Ice::X8664::TargetDataX86<::Ice::X8664::TargetX8664Traits>::create(
32 Ctx);
33 }
34
35 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)36 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
37 return ::Ice::X8664::TargetHeaderX86::create(Ctx);
38 }
39
staticInit(::Ice::GlobalContext * Ctx)40 void staticInit(::Ice::GlobalContext *Ctx) {
41 ::Ice::X8664::TargetX8664::staticInit(Ctx);
42 }
43
shouldBePooled(const class::Ice::Constant * C)44 bool shouldBePooled(const class ::Ice::Constant *C) {
45 return ::Ice::X8664::TargetX8664::shouldBePooled(C);
46 }
47
getPointerType()48 ::Ice::Type getPointerType() {
49 return ::Ice::X8664::TargetX8664::getPointerType();
50 }
51
52 } // end of namespace X8664
53
54 namespace Ice {
55 namespace X8664 {
56
57 //------------------------------------------------------------------------------
58 // ______ ______ ______ __ ______ ______
59 // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
60 // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
61 // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
62 // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
63 //
64 //------------------------------------------------------------------------------
65 const TargetX8664Traits::TableFcmpType TargetX8664Traits::TableFcmp[] = {
66 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
67 {dflt, \
68 swapS, \
69 X8664::Traits::Cond::C1, \
70 X8664::Traits::Cond::C2, \
71 swapV, \
72 X8664::Traits::Cond::pred},
73 FCMPX8664_TABLE
74 #undef X
75 };
76
77 const size_t TargetX8664Traits::TableFcmpSize = llvm::array_lengthof(TableFcmp);
78
79 const TargetX8664Traits::TableIcmp32Type TargetX8664Traits::TableIcmp32[] = {
80 #define X(val, C_32, C1_64, C2_64, C3_64) {X8664::Traits::Cond::C_32},
81 ICMPX8664_TABLE
82 #undef X
83 };
84
85 const size_t TargetX8664Traits::TableIcmp32Size =
86 llvm::array_lengthof(TableIcmp32);
87
88 const TargetX8664Traits::TableIcmp64Type TargetX8664Traits::TableIcmp64[] = {
89 #define X(val, C_32, C1_64, C2_64, C3_64) \
90 {X8664::Traits::Cond::C1_64, X8664::Traits::Cond::C2_64, \
91 X8664::Traits::Cond::C3_64},
92 ICMPX8664_TABLE
93 #undef X
94 };
95
96 const size_t TargetX8664Traits::TableIcmp64Size =
97 llvm::array_lengthof(TableIcmp64);
98
99 const TargetX8664Traits::TableTypeX8664AttributesType
100 TargetX8664Traits::TableTypeX8664Attributes[] = {
101 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
102 {IceType_##elty},
103 ICETYPEX8664_TABLE
104 #undef X
105 };
106
107 const size_t TargetX8664Traits::TableTypeX8664AttributesSize =
108 llvm::array_lengthof(TableTypeX8664Attributes);
109
110 const uint32_t TargetX8664Traits::X86_STACK_ALIGNMENT_BYTES = 16;
111 const char *TargetX8664Traits::TargetName = "X8664";
112
113 template <>
114 std::array<SmallBitVector, RCX86_NUM>
115 TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}};
116
117 template <>
118 std::array<SmallBitVector, RCX86_NUM>
119 TargetX86Base<X8664::Traits>::TypeToRegisterSetUnfiltered = {{}};
120
121 template <>
122 std::array<SmallBitVector,
123 TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM>
124 TargetX86Base<X8664::Traits>::RegisterAliases = {{}};
125
126 template <>
127 FixupKind TargetX86Base<X8664::Traits>::PcRelFixup =
128 TargetX86Base<X8664::Traits>::Traits::FK_PcRel;
129
130 template <>
131 FixupKind TargetX86Base<X8664::Traits>::AbsFixup =
132 TargetX86Base<X8664::Traits>::Traits::FK_Abs;
133
134 //------------------------------------------------------------------------------
135 // __ ______ __ __ ______ ______ __ __ __ ______
136 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
137 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
138 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
139 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
140 //
141 //------------------------------------------------------------------------------
_add_sp(Operand * Adjustment)142 void TargetX8664::_add_sp(Operand *Adjustment) {
143 Variable *rsp =
144 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
145 if (!NeedSandboxing) {
146 _add(rsp, Adjustment);
147 return;
148 }
149
150 Variable *esp =
151 getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
152 Variable *r15 =
153 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
154
155 // When incrementing rsp, NaCl sandboxing requires the following sequence
156 //
157 // .bundle_start
158 // add Adjustment, %esp
159 // add %r15, %rsp
160 // .bundle_end
161 //
162 // In Subzero, even though rsp and esp alias each other, defining one does not
163 // define the other. Therefore, we must emit
164 //
165 // .bundle_start
166 // %esp = fake-def %rsp
167 // add Adjustment, %esp
168 // %rsp = fake-def %esp
169 // add %r15, %rsp
170 // .bundle_end
171 //
172 // The fake-defs ensure that the
173 //
174 // add Adjustment, %esp
175 //
176 // instruction is not DCE'd.
177 AutoBundle _(this);
178 _redefined(Context.insert<InstFakeDef>(esp, rsp));
179 _add(esp, Adjustment);
180 _redefined(Context.insert<InstFakeDef>(rsp, esp));
181 _add(rsp, r15);
182 }
183
_mov_sp(Operand * NewValue)184 void TargetX8664::_mov_sp(Operand *NewValue) {
185 assert(NewValue->getType() == IceType_i32);
186
187 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
188 Variable *rsp =
189 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
190
191 AutoBundle _(this);
192
193 _redefined(Context.insert<InstFakeDef>(esp, rsp));
194 _redefined(_mov(esp, NewValue));
195 _redefined(Context.insert<InstFakeDef>(rsp, esp));
196
197 if (!NeedSandboxing) {
198 return;
199 }
200
201 Variable *r15 =
202 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
203 _add(rsp, r15);
204 }
205
_push_rbp()206 void TargetX8664::_push_rbp() {
207 assert(NeedSandboxing);
208
209 Constant *_0 = Ctx->getConstantZero(IceType_i32);
210 Variable *ebp =
211 getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
212 Variable *rsp =
213 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
214 auto *TopOfStack = llvm::cast<X86OperandMem>(
215 legalize(X86OperandMem::create(Func, IceType_i32, rsp, _0),
216 Legal_Reg | Legal_Mem));
217
218 // Emits a sequence:
219 //
220 // .bundle_start
221 // push 0
222 // mov %ebp, %(rsp)
223 // .bundle_end
224 //
225 // to avoid leaking the upper 32-bits (i.e., the sandbox address.)
226 AutoBundle _(this);
227 _push(_0);
228 Context.insert<typename Traits::Insts::Store>(ebp, TopOfStack);
229 }
230
_link_bp()231 void TargetX8664::_link_bp() {
232 Variable *esp =
233 getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
234 Variable *rsp =
235 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
236 Variable *ebp =
237 getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
238 Variable *rbp =
239 getPhysicalRegister(Traits::RegisterSet::Reg_rbp, Traits::WordType);
240 Variable *r15 =
241 getPhysicalRegister(Traits::RegisterSet::Reg_r15, Traits::WordType);
242
243 if (!NeedSandboxing) {
244 _push(rbp);
245 _mov(rbp, rsp);
246 } else {
247 _push_rbp();
248
249 AutoBundle _(this);
250 _redefined(Context.insert<InstFakeDef>(ebp, rbp));
251 _redefined(Context.insert<InstFakeDef>(esp, rsp));
252 _mov(ebp, esp);
253 _redefined(Context.insert<InstFakeDef>(rsp, esp));
254 _add(rbp, r15);
255 }
256 // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
257 Context.insert<InstFakeUse>(rbp);
258 }
259
_unlink_bp()260 void TargetX8664::_unlink_bp() {
261 Variable *rsp =
262 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
263 Variable *rbp =
264 getPhysicalRegister(Traits::RegisterSet::Reg_rbp, IceType_i64);
265 Variable *ebp =
266 getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
267 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
268 // use of rsp before the assignment of rsp=rbp keeps previous rsp
269 // adjustments from being dead-code eliminated.
270 Context.insert<InstFakeUse>(rsp);
271 if (!NeedSandboxing) {
272 _mov(rsp, rbp);
273 _pop(rbp);
274 } else {
275 _mov_sp(ebp);
276
277 Variable *r15 =
278 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
279 Variable *rcx =
280 getPhysicalRegister(Traits::RegisterSet::Reg_rcx, IceType_i64);
281 Variable *ecx =
282 getPhysicalRegister(Traits::RegisterSet::Reg_ecx, IceType_i32);
283
284 _pop(rcx);
285 Context.insert<InstFakeDef>(ecx, rcx);
286 AutoBundle _(this);
287 _mov(ebp, ecx);
288
289 _redefined(Context.insert<InstFakeDef>(rbp, ebp));
290 _add(rbp, r15);
291 }
292 }
293
_push_reg(RegNumT RegNum)294 void TargetX8664::_push_reg(RegNumT RegNum) {
295 if (Traits::isXmm(RegNum)) {
296 Variable *reg = getPhysicalRegister(RegNum, IceType_v4f32);
297 Variable *rsp =
298 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
299 auto *address =
300 Traits::X86OperandMem::create(Func, reg->getType(), rsp, nullptr);
301 _sub_sp(
302 Ctx->getConstantInt32(16)); // TODO(capn): accumulate all the offsets
303 // and adjust the stack pointer once.
304 _storep(reg, address);
305 } else if (RegNum != Traits::RegisterSet::Reg_rbp || !NeedSandboxing) {
306 _push(getPhysicalRegister(RegNum, Traits::WordType));
307 } else {
308 _push_rbp();
309 }
310 }
311
_pop_reg(RegNumT RegNum)312 void TargetX8664::_pop_reg(RegNumT RegNum) {
313 if (Traits::isXmm(RegNum)) {
314 Variable *reg = getPhysicalRegister(RegNum, IceType_v4f32);
315 Variable *rsp =
316 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
317 auto *address =
318 Traits::X86OperandMem::create(Func, reg->getType(), rsp, nullptr);
319 _movp(reg, address);
320 _add_sp(
321 Ctx->getConstantInt32(16)); // TODO(capn): accumulate all the offsets
322 // and adjust the stack pointer once.
323 } else {
324 _pop(getPhysicalRegister(RegNum, Traits::WordType));
325 }
326 }
327
emitGetIP(CfgNode * Node)328 void TargetX8664::emitGetIP(CfgNode *Node) {
329 // No IP base register is needed on X86-64.
330 (void)Node;
331 }
332
333 namespace {
isAssignedToRspOrRbp(const Variable * Var)334 bool isAssignedToRspOrRbp(const Variable *Var) {
335 if (Var == nullptr) {
336 return false;
337 }
338
339 if (Var->isRematerializable()) {
340 return true;
341 }
342
343 if (!Var->hasReg()) {
344 return false;
345 }
346
347 const auto RegNum = Var->getRegNum();
348 if ((RegNum == Traits::RegisterSet::Reg_rsp) ||
349 (RegNum == Traits::RegisterSet::Reg_rbp)) {
350 return true;
351 }
352
353 return false;
354 }
355 } // end of anonymous namespace
356
_sandbox_mem_reference(X86OperandMem * Mem)357 Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) {
358 if (SandboxingType == ST_None) {
359 return Mem;
360 }
361
362 if (SandboxingType == ST_Nonsfi) {
363 llvm::report_fatal_error(
364 "_sandbox_mem_reference not implemented for nonsfi");
365 }
366
367 // In x86_64-nacl, all memory references are relative to a base register
368 // (%r15, %rsp, %rbp, or %rip).
369
370 Variable *Base = Mem->getBase();
371 Variable *Index = Mem->getIndex();
372 uint16_t Shift = 0;
373 Variable *ZeroReg = RebasePtr;
374 Constant *Offset = Mem->getOffset();
375 Variable *T = nullptr;
376
377 bool AbsoluteAddress = false;
378 if (Base == nullptr && Index == nullptr) {
379 if (llvm::isa<ConstantRelocatable>(Offset)) {
380 // Mem is RIP-relative. There's no need to rebase it.
381 return Mem;
382 }
383 // Offset is an absolute address, so we need to emit
384 // Offset(%r15)
385 AbsoluteAddress = true;
386 }
387
388 if (Mem->getIsRebased()) {
389 // If Mem.IsRebased, then we don't need to update Mem, as it's already been
390 // updated to contain a reference to one of %rsp, %rbp, or %r15.
391 // We don't return early because we still need to zero extend Index.
392 assert(ZeroReg == Base || AbsoluteAddress || isAssignedToRspOrRbp(Base));
393 if (!AbsoluteAddress) {
394 // If Mem is an absolute address, no need to update ZeroReg (which is
395 // already set to %r15.)
396 ZeroReg = Base;
397 }
398 if (Index != nullptr) {
399 T = makeReg(IceType_i32);
400 _mov(T, Index);
401 Shift = Mem->getShift();
402 }
403 } else {
404 if (Base != nullptr) {
405 // If Base is a valid base pointer we don't need to use the RebasePtr. By
406 // doing this we might save us the need to zero extend the memory operand.
407 if (isAssignedToRspOrRbp(Base)) {
408 ZeroReg = Base;
409 } else {
410 T = Base;
411 }
412 }
413
414 if (Index != nullptr) {
415 assert(!Index->isRematerializable());
416 // If Index is not nullptr, it is mandatory that T is a nullptr.
417 // Otherwise, the lowering generated a memory operand with two registers.
418 // Note that Base might still be non-nullptr, but it must be a valid
419 // base register.
420 if (T != nullptr) {
421 llvm::report_fatal_error("memory reference contains base and index.");
422 }
423 // If the Index is not shifted, and it is a Valid Base, and the ZeroReg is
424 // still RebasePtr, then we do ZeroReg = Index, and hopefully prevent the
425 // need to zero-extend the memory operand (which may still happen -- see
426 // NeedLea below.)
427 if (Shift == 0 && isAssignedToRspOrRbp(Index) && ZeroReg == RebasePtr) {
428 ZeroReg = Index;
429 } else {
430 T = Index;
431 Shift = Mem->getShift();
432 }
433 }
434 }
435
436 // NeedsLea is a flag indicating whether Mem needs to be materialized to a GPR
437 // prior to being used. A LEA is needed if Mem.Offset is a constant
438 // relocatable with a nonzero offset, or if Mem.Offset is a nonzero immediate;
439 // but only when the address mode contains a "user" register other than the
440 // rsp/rbp/r15 base. In both these cases, the LEA is needed to ensure the
441 // sandboxed memory operand will only use the lower 32-bits of T+Offset.
442 bool NeedsLea = false;
443 if (!Mem->getIsRebased()) {
444 bool IsOffsetZero = false;
445 if (Offset == nullptr) {
446 IsOffsetZero = true;
447 } else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
448 IsOffsetZero = (CR->getOffset() == 0);
449 } else if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Offset)) {
450 IsOffsetZero = (Imm->getValue() == 0);
451 } else {
452 llvm::report_fatal_error("Unexpected Offset type.");
453 }
454 if (!IsOffsetZero) {
455 if (Base != nullptr && Base != ZeroReg)
456 NeedsLea = true;
457 if (Index != nullptr && Index != ZeroReg)
458 NeedsLea = true;
459 }
460 }
461
462 RegNumT RegNum, RegNum32;
463 if (T != nullptr) {
464 if (T->hasReg()) {
465 RegNum = Traits::getGprForType(IceType_i64, T->getRegNum());
466 RegNum32 = Traits::getGprForType(IceType_i32, RegNum);
467 // At this point, if T was assigned to rsp/rbp, then we would have already
468 // made this the ZeroReg.
469 assert(RegNum != Traits::RegisterSet::Reg_rsp);
470 assert(RegNum != Traits::RegisterSet::Reg_rbp);
471 }
472
473 switch (T->getType()) {
474 default:
475 llvm::report_fatal_error("Mem pointer should be a 32-bit GPR.");
476 case IceType_i64:
477 // Even though "default:" would also catch T.Type == IceType_i64, an
478 // explicit 'case IceType_i64' shows that memory operands are always
479 // supposed to be 32-bits.
480 llvm::report_fatal_error("Mem pointer should not be a 64-bit GPR.");
481 case IceType_i32: {
482 Variable *T64 = makeReg(IceType_i64, RegNum);
483 auto *Movzx = _movzx(T64, T);
484 if (!NeedsLea) {
485 // This movzx is only needed when Mem does not need to be lea'd into a
486 // temporary. If an lea is going to be emitted, then eliding this movzx
487 // is safe because the emitted lea will write a 32-bit result --
488 // implicitly zero-extended to 64-bit.
489 Movzx->setMustKeep();
490 }
491 T = T64;
492 } break;
493 }
494 }
495
496 if (NeedsLea) {
497 Variable *NewT = makeReg(IceType_i32, RegNum32);
498 Variable *Base = T;
499 Variable *Index = T;
500 static constexpr bool NotRebased = false;
501 if (Shift == 0) {
502 Index = nullptr;
503 } else {
504 Base = nullptr;
505 }
506 _lea(NewT, Traits::X86OperandMem::create(
507 Func, Mem->getType(), Base, Offset, Index, Shift,
508 Traits::X86OperandMem::DefaultSegment, NotRebased));
509
510 T = makeReg(IceType_i64, RegNum);
511 _movzx(T, NewT);
512 Shift = 0;
513 Offset = nullptr;
514 }
515
516 static constexpr bool IsRebased = true;
517 return Traits::X86OperandMem::create(
518 Func, Mem->getType(), ZeroReg, Offset, T, Shift,
519 Traits::X86OperandMem::DefaultSegment, IsRebased);
520 }
521
_sub_sp(Operand * Adjustment)522 void TargetX8664::_sub_sp(Operand *Adjustment) {
523 Variable *rsp =
524 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
525
526 if (NeedSandboxing) {
527 Variable *esp =
528 getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
529 Variable *r15 =
530 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
531
532 // .bundle_start
533 // sub Adjustment, %esp
534 // add %r15, %rsp
535 // .bundle_end
536 AutoBundle _(this);
537 _redefined(Context.insert<InstFakeDef>(esp, rsp));
538 _sub(esp, Adjustment);
539 _redefined(Context.insert<InstFakeDef>(rsp, esp));
540 _add(rsp, r15);
541 } else {
542 _sub(rsp, Adjustment);
543 }
544
545 // Add a fake use of the stack pointer, to prevent the stack pointer adustment
546 // from being dead-code eliminated in a function that doesn't return.
547 Context.insert<InstFakeUse>(rsp);
548 }
549
initRebasePtr()550 void TargetX8664::initRebasePtr() {
551 switch (SandboxingType) {
552 case ST_Nonsfi:
553 // Probably no implementation is needed, but error to be safe for now.
554 llvm::report_fatal_error(
555 "initRebasePtr() is not yet implemented on x32-nonsfi.");
556 case ST_NaCl:
557 RebasePtr = getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
558 break;
559 case ST_None:
560 // nothing.
561 break;
562 }
563 }
564
initSandbox()565 void TargetX8664::initSandbox() {
566 assert(SandboxingType == ST_NaCl);
567 Context.init(Func->getEntryNode());
568 Context.setInsertPoint(Context.getCur());
569 Variable *r15 =
570 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
571 Context.insert<InstFakeDef>(r15);
572 Context.insert<InstFakeUse>(r15);
573 }
574
575 namespace {
isRematerializable(const Variable * Var)576 bool isRematerializable(const Variable *Var) {
577 return Var != nullptr && Var->isRematerializable();
578 }
579 } // end of anonymous namespace
580
legalizeOptAddrForSandbox(OptAddr * Addr)581 bool TargetX8664::legalizeOptAddrForSandbox(OptAddr *Addr) {
582 if (SandboxingType == ST_Nonsfi) {
583 llvm::report_fatal_error("Nonsfi not yet implemented for x8664.");
584 }
585
586 if (isRematerializable(Addr->Base)) {
587 if (Addr->Index == RebasePtr) {
588 Addr->Index = nullptr;
589 Addr->Shift = 0;
590 }
591 return true;
592 }
593
594 if (isRematerializable(Addr->Index)) {
595 if (Addr->Base == RebasePtr) {
596 Addr->Base = nullptr;
597 }
598 return true;
599 }
600
601 assert(Addr->Base != RebasePtr && Addr->Index != RebasePtr);
602
603 if (Addr->Base == nullptr) {
604 return true;
605 }
606
607 if (Addr->Index == nullptr) {
608 return true;
609 }
610
611 return false;
612 }
613
lowerIndirectJump(Variable * JumpTarget)614 void TargetX8664::lowerIndirectJump(Variable *JumpTarget) {
615 std::unique_ptr<AutoBundle> Bundler;
616
617 if (!NeedSandboxing) {
618 if (JumpTarget->getType() != IceType_i64) {
619 Variable *T = makeReg(IceType_i64);
620 _movzx(T, JumpTarget);
621 JumpTarget = T;
622 }
623 } else {
624 Variable *T = makeReg(IceType_i32);
625 Variable *T64 = makeReg(IceType_i64);
626 Variable *r15 =
627 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
628
629 _mov(T, JumpTarget);
630 Bundler = makeUnique<AutoBundle>(this);
631 const SizeT BundleSize =
632 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
633 _and(T, Ctx->getConstantInt32(~(BundleSize - 1)));
634 _movzx(T64, T);
635 _add(T64, r15);
636 JumpTarget = T64;
637 }
638
639 _jmp(JumpTarget);
640 }
641
emitCallToTarget(Operand * CallTarget,Variable * ReturnReg,size_t NumVariadicFpArgs)642 Inst *TargetX8664::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg,
643 size_t NumVariadicFpArgs) {
644 Inst *NewCall = nullptr;
645 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget);
646 if (NeedSandboxing) {
647 // In NaCl sandbox, calls are replaced by a push/jmp pair:
648 //
649 // push .after_call
650 // jmp CallTarget
651 // .align bundle_size
652 // after_call:
653 //
654 // In order to emit this sequence, we need a temporary label ("after_call",
655 // in this example.)
656 //
657 // The operand to push is a ConstantRelocatable. The easy way to implement
658 // this sequence is to create a ConstantRelocatable(0, "after_call"), but
659 // this ends up creating more relocations for the linker to resolve.
660 // Therefore, we create a ConstantRelocatable from the name of the function
661 // being compiled (i.e., ConstantRelocatable(after_call - Func, Func).
662 //
663 // By default, ConstantRelocatables are emitted (in textual output) as
664 //
665 // ConstantName + Offset
666 //
667 // ReturnReloc has an offset that is only known during binary emission.
668 // Therefore, we set a custom emit string for ReturnReloc that will be
669 // used instead. In this particular case, the code will be emitted as
670 //
671 // push .after_call
672 InstX86Label *ReturnAddress = InstX86Label::create(Func, this);
673 auto *ReturnRelocOffset = RelocOffset::create(Func->getAssembler());
674 ReturnAddress->setRelocOffset(ReturnRelocOffset);
675 constexpr RelocOffsetT NoFixedOffset = 0;
676 const std::string EmitString =
677 BuildDefs::dump() ? ReturnAddress->getLabelName().toString() : "";
678 auto *ReturnReloc = ConstantRelocatable::create(
679 Func->getAssembler(), IceType_i32,
680 RelocatableTuple(NoFixedOffset, {ReturnRelocOffset},
681 Func->getFunctionName(), EmitString));
682 /* AutoBundle scoping */ {
683 std::unique_ptr<AutoBundle> Bundler;
684 if (CallTargetR == nullptr) {
685 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd);
686 _push(ReturnReloc);
687 } else {
688 Variable *T = makeReg(IceType_i32);
689 Variable *T64 = makeReg(IceType_i64);
690 Variable *r15 =
691 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
692
693 _mov(T, CallTargetR);
694 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd);
695 _push(ReturnReloc);
696 const SizeT BundleSize =
697 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
698 _and(T, Ctx->getConstantInt32(~(BundleSize - 1)));
699 _movzx(T64, T);
700 _add(T64, r15);
701 CallTarget = T64;
702 }
703 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget);
704 }
705 if (ReturnReg != nullptr) {
706 Context.insert<InstFakeDef>(ReturnReg);
707 }
708
709 Context.insert(ReturnAddress);
710 } else {
711 if (CallTargetR != nullptr && CallTarget->getType() == IceType_i32) {
712 // x86-64 in PNaCl is ILP32. Therefore, CallTarget is i32, but the
713 // emitted call needs an i64 register (for textual asm.)
714 Variable *T = makeReg(IceType_i64);
715 _movzx(T, CallTargetR);
716 CallTarget = T;
717
718 } else if (CallTarget->getType() == IceType_i64) {
719 // x86-64 does not support 64-bit direct calls, so write the value to a
720 // register and make an indirect call for Constant call targets.
721 RegNumT TargetReg = {};
722
723 // System V: force r11 when calling a variadic function so that rax isn't
724 // used, since rax stores the number of FP args (see NumVariadicFpArgs
725 // usage below).
726 #if !defined(SUBZERO_USE_MICROSOFT_ABI)
727 if (NumVariadicFpArgs > 0)
728 TargetReg = Traits::RegisterSet::Reg_r11;
729 #endif
730
731 if (llvm::isa<Constant>(CallTarget)) {
732 Variable *T = makeReg(IceType_i64, TargetReg);
733 _mov(T, CallTarget);
734 CallTarget = T;
735 } else if (llvm::isa<Variable>(CallTarget)) {
736 Operand *T = legalizeToReg(CallTarget, TargetReg);
737 CallTarget = T;
738 }
739 }
740
741 // System V: store number of FP args in RAX for variadic calls
742 #if !defined(SUBZERO_USE_MICROSOFT_ABI)
743 if (NumVariadicFpArgs > 0) {
744 // Store number of FP args (stored in XMM registers) in RAX for variadic
745 // calls
746 auto *NumFpArgs = Ctx->getConstantInt64(NumVariadicFpArgs);
747 Variable *NumFpArgsReg =
748 legalizeToReg(NumFpArgs, Traits::RegisterSet::Reg_rax);
749 Context.insert<InstFakeUse>(NumFpArgsReg);
750 }
751 #endif
752
753 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
754 }
755 return NewCall;
756 }
757
moveReturnValueToRegister(Operand * Value,Type ReturnType)758 Variable *TargetX8664::moveReturnValueToRegister(Operand *Value,
759 Type ReturnType) {
760 if (isVectorType(ReturnType) || isScalarFloatingType(ReturnType)) {
761 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
762 } else {
763 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
764 Variable *Reg = nullptr;
765 _mov(Reg, Value,
766 Traits::getGprForType(ReturnType, Traits::RegisterSet::Reg_rax));
767 return Reg;
768 }
769 }
770
emitSandboxedReturn()771 void TargetX8664::emitSandboxedReturn() {
772 Variable *T_rcx = makeReg(IceType_i64, Traits::RegisterSet::Reg_rcx);
773 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
774 _pop(T_rcx);
775 _mov(T_ecx, T_rcx);
776 // lowerIndirectJump(T_ecx);
777 Variable *r15 =
778 getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
779
780 /* AutoBundle scoping */ {
781 AutoBundle _(this);
782 const SizeT BundleSize =
783 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
784 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
785 Context.insert<InstFakeDef>(T_rcx, T_ecx);
786 _add(T_rcx, r15);
787
788 _jmp(T_rcx);
789 }
790 }
791
emitStackProbe(size_t StackSizeBytes)792 void TargetX8664::emitStackProbe(size_t StackSizeBytes) {
793 #if defined(SUBZERO_USE_MICROSOFT_ABI)
794 // Mirroring the behavior of MSVC here, which emits a _chkstk when locals are
795 // >= 4KB, rather than the 8KB claimed by the docs.
796 if (StackSizeBytes >= 4096) {
797 // __chkstk on Win64 probes the stack up to RSP - EAX, but does not clobber
798 // RSP, so we don't need to save and restore it.
799
800 Variable *EAX = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
801 _mov(EAX, Ctx->getConstantInt32(StackSizeBytes));
802
803 auto *CallTarget =
804 Ctx->getConstantInt64(reinterpret_cast<int64_t>(&__chkstk));
805 Operand *CallTargetReg =
806 legalizeToReg(CallTarget, Traits::RegisterSet::Reg_r11);
807 emitCallToTarget(CallTargetReg, nullptr);
808 }
809 #endif
810 }
811
812 // In some cases, there are x-macros tables for both high-level and low-level
813 // instructions/operands that use the same enum key value. The tables are kept
814 // separate to maintain a proper separation between abstraction layers. There
815 // is a risk that the tables could get out of sync if enum values are reordered
816 // or if entries are added or deleted. The following dummy namespaces use
817 // static_asserts to ensure everything is kept in sync.
818
819 namespace {
820 // Validate the enum values in FCMPX8664_TABLE.
821 namespace dummy1 {
822 // Define a temporary set of enum values based on low-level table entries.
823 enum _tmp_enum {
824 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
825 FCMPX8664_TABLE
826 #undef X
827 _num
828 };
829 // Define a set of constants based on high-level table entries.
830 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
831 ICEINSTFCMP_TABLE
832 #undef X
833 // Define a set of constants based on low-level table entries, and ensure the
834 // table entry keys are consistent.
835 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
836 static const int _table2_##val = _tmp_##val; \
837 static_assert( \
838 _table1_##val == _table2_##val, \
839 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
840 FCMPX8664_TABLE
841 #undef X
842 // Repeat the static asserts with respect to the high-level table entries in
843 // case the high-level table has extra entries.
844 #define X(tag, str) \
845 static_assert( \
846 _table1_##tag == _table2_##tag, \
847 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");
848 ICEINSTFCMP_TABLE
849 #undef X
850 } // end of namespace dummy1
851
852 // Validate the enum values in ICMPX8664_TABLE.
853 namespace dummy2 {
854 // Define a temporary set of enum values based on low-level table entries.
855 enum _tmp_enum {
856 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
857 ICMPX8664_TABLE
858 #undef X
859 _num
860 };
861 // Define a set of constants based on high-level table entries.
862 #define X(tag, reverse, str) static const int _table1_##tag = InstIcmp::tag;
863 ICEINSTICMP_TABLE
864 #undef X
865 // Define a set of constants based on low-level table entries, and ensure the
866 // table entry keys are consistent.
867 #define X(val, C_32, C1_64, C2_64, C3_64) \
868 static const int _table2_##val = _tmp_##val; \
869 static_assert( \
870 _table1_##val == _table2_##val, \
871 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
872 ICMPX8664_TABLE
873 #undef X
874 // Repeat the static asserts with respect to the high-level table entries in
875 // case the high-level table has extra entries.
876 #define X(tag, reverse, str) \
877 static_assert( \
878 _table1_##tag == _table2_##tag, \
879 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");
880 ICEINSTICMP_TABLE
881 #undef X
882 } // end of namespace dummy2
883
884 // Validate the enum values in ICETYPEX8664_TABLE.
885 namespace dummy3 {
886 // Define a temporary set of enum values based on low-level table entries.
887 enum _tmp_enum {
888 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
889 _tmp_##tag,
890 ICETYPEX8664_TABLE
891 #undef X
892 _num
893 };
894 // Define a set of constants based on high-level table entries.
895 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
896 static const int _table1_##tag = IceType_##tag;
897 ICETYPE_TABLE
898 #undef X
899 // Define a set of constants based on low-level table entries, and ensure the
900 // table entry keys are consistent.
901 #define X(tag, elty, cvt, sdss, pdps, spsd, int_, unpack, pack, width, fld) \
902 static const int _table2_##tag = _tmp_##tag; \
903 static_assert(_table1_##tag == _table2_##tag, \
904 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
905 ICETYPEX8664_TABLE
906 #undef X
907 // Repeat the static asserts with respect to the high-level table entries in
908 // case the high-level table has extra entries.
909 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
910 static_assert(_table1_##tag == _table2_##tag, \
911 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
912 ICETYPE_TABLE
913 #undef X
914 } // end of namespace dummy3
915 } // end of anonymous namespace
916
917 } // end of namespace X8664
918 } // end of namespace Ice
919