1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "gc/accounting/card_table.h"
24 #include "intrinsics.h"
25 #include "intrinsics_x86_64.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/class-inl.h"
28 #include "mirror/object_reference.h"
29 #include "thread.h"
30 #include "utils/assembler.h"
31 #include "utils/stack_checks.h"
32 #include "utils/x86_64/assembler_x86_64.h"
33 #include "utils/x86_64/managed_register_x86_64.h"
34
35 namespace art {
36
37 template<class MirrorType>
38 class GcRoot;
39
40 namespace x86_64 {
41
42 static constexpr int kCurrentMethodStackOffset = 0;
43 static constexpr Register kMethodRegisterArgument = RDI;
44 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46 // generates less code/data with a small num_entries.
47 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48
49 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51
52 static constexpr int kC2ConditionMask = 0x400;
53
54 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
55 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
56
57 class NullCheckSlowPathX86_64 : public SlowPathCode {
58 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)59 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
60
EmitNativeCode(CodeGenerator * codegen)61 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
62 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
63 __ Bind(GetEntryLabel());
64 if (instruction_->CanThrowIntoCatchBlock()) {
65 // Live registers will be restored in the catch block if caught.
66 SaveLiveRegisters(codegen, instruction_->GetLocations());
67 }
68 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
69 instruction_,
70 instruction_->GetDexPc(),
71 this);
72 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
73 }
74
IsFatal() const75 bool IsFatal() const OVERRIDE { return true; }
76
GetDescription() const77 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
78
79 private:
80 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
81 };
82
83 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
84 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)85 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
86
EmitNativeCode(CodeGenerator * codegen)87 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
88 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
89 __ Bind(GetEntryLabel());
90 if (instruction_->CanThrowIntoCatchBlock()) {
91 // Live registers will be restored in the catch block if caught.
92 SaveLiveRegisters(codegen, instruction_->GetLocations());
93 }
94 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
95 instruction_,
96 instruction_->GetDexPc(),
97 this);
98 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
99 }
100
IsFatal() const101 bool IsFatal() const OVERRIDE { return true; }
102
GetDescription() const103 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
104
105 private:
106 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
107 };
108
109 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
110 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,Primitive::Type type,bool is_div)111 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
112 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
113
EmitNativeCode(CodeGenerator * codegen)114 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
115 __ Bind(GetEntryLabel());
116 if (type_ == Primitive::kPrimInt) {
117 if (is_div_) {
118 __ negl(cpu_reg_);
119 } else {
120 __ xorl(cpu_reg_, cpu_reg_);
121 }
122
123 } else {
124 DCHECK_EQ(Primitive::kPrimLong, type_);
125 if (is_div_) {
126 __ negq(cpu_reg_);
127 } else {
128 __ xorl(cpu_reg_, cpu_reg_);
129 }
130 }
131 __ jmp(GetExitLabel());
132 }
133
GetDescription() const134 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
135
136 private:
137 const CpuRegister cpu_reg_;
138 const Primitive::Type type_;
139 const bool is_div_;
140 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
141 };
142
143 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
144 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)145 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
146 : SlowPathCode(instruction), successor_(successor) {}
147
EmitNativeCode(CodeGenerator * codegen)148 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
149 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
150 __ Bind(GetEntryLabel());
151 SaveLiveRegisters(codegen, instruction_->GetLocations());
152 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
153 instruction_,
154 instruction_->GetDexPc(),
155 this);
156 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
157 RestoreLiveRegisters(codegen, instruction_->GetLocations());
158 if (successor_ == nullptr) {
159 __ jmp(GetReturnLabel());
160 } else {
161 __ jmp(x86_64_codegen->GetLabelOf(successor_));
162 }
163 }
164
GetReturnLabel()165 Label* GetReturnLabel() {
166 DCHECK(successor_ == nullptr);
167 return &return_label_;
168 }
169
GetSuccessor() const170 HBasicBlock* GetSuccessor() const {
171 return successor_;
172 }
173
GetDescription() const174 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
175
176 private:
177 HBasicBlock* const successor_;
178 Label return_label_;
179
180 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
181 };
182
183 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
184 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)185 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
186 : SlowPathCode(instruction) {}
187
EmitNativeCode(CodeGenerator * codegen)188 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
189 LocationSummary* locations = instruction_->GetLocations();
190 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
191 __ Bind(GetEntryLabel());
192 if (instruction_->CanThrowIntoCatchBlock()) {
193 // Live registers will be restored in the catch block if caught.
194 SaveLiveRegisters(codegen, instruction_->GetLocations());
195 }
196 // We're moving two locations to locations that could overlap, so we need a parallel
197 // move resolver.
198 InvokeRuntimeCallingConvention calling_convention;
199 codegen->EmitParallelMoves(
200 locations->InAt(0),
201 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
202 Primitive::kPrimInt,
203 locations->InAt(1),
204 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
205 Primitive::kPrimInt);
206 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
207 instruction_,
208 instruction_->GetDexPc(),
209 this);
210 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
211 }
212
IsFatal() const213 bool IsFatal() const OVERRIDE { return true; }
214
GetDescription() const215 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
216
217 private:
218 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
219 };
220
221 class LoadClassSlowPathX86_64 : public SlowPathCode {
222 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)223 LoadClassSlowPathX86_64(HLoadClass* cls,
224 HInstruction* at,
225 uint32_t dex_pc,
226 bool do_clinit)
227 : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
228 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
229 }
230
EmitNativeCode(CodeGenerator * codegen)231 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
232 LocationSummary* locations = at_->GetLocations();
233 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
234 __ Bind(GetEntryLabel());
235
236 SaveLiveRegisters(codegen, locations);
237
238 InvokeRuntimeCallingConvention calling_convention;
239 __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
240 x86_64_codegen->InvokeRuntime(do_clinit_ ?
241 QUICK_ENTRY_POINT(pInitializeStaticStorage) :
242 QUICK_ENTRY_POINT(pInitializeType),
243 at_,
244 dex_pc_,
245 this);
246 if (do_clinit_) {
247 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
248 } else {
249 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
250 }
251
252 Location out = locations->Out();
253 // Move the class to the desired location.
254 if (out.IsValid()) {
255 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
256 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
257 }
258
259 RestoreLiveRegisters(codegen, locations);
260 __ jmp(GetExitLabel());
261 }
262
GetDescription() const263 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
264
265 private:
266 // The class this slow path will load.
267 HLoadClass* const cls_;
268
269 // The instruction where this slow path is happening.
270 // (Might be the load class or an initialization check).
271 HInstruction* const at_;
272
273 // The dex PC of `at_`.
274 const uint32_t dex_pc_;
275
276 // Whether to initialize the class.
277 const bool do_clinit_;
278
279 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
280 };
281
282 class LoadStringSlowPathX86_64 : public SlowPathCode {
283 public:
LoadStringSlowPathX86_64(HLoadString * instruction)284 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
285
EmitNativeCode(CodeGenerator * codegen)286 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
287 LocationSummary* locations = instruction_->GetLocations();
288 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
289
290 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
291 __ Bind(GetEntryLabel());
292 SaveLiveRegisters(codegen, locations);
293
294 InvokeRuntimeCallingConvention calling_convention;
295 const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
296 __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
297 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
298 instruction_,
299 instruction_->GetDexPc(),
300 this);
301 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
302 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
303 RestoreLiveRegisters(codegen, locations);
304 __ jmp(GetExitLabel());
305 }
306
GetDescription() const307 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
308
309 private:
310 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
311 };
312
313 class TypeCheckSlowPathX86_64 : public SlowPathCode {
314 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)315 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
316 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
317
EmitNativeCode(CodeGenerator * codegen)318 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
319 LocationSummary* locations = instruction_->GetLocations();
320 Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
321 : locations->Out();
322 uint32_t dex_pc = instruction_->GetDexPc();
323 DCHECK(instruction_->IsCheckCast()
324 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
325
326 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
327 __ Bind(GetEntryLabel());
328
329 if (!is_fatal_) {
330 SaveLiveRegisters(codegen, locations);
331 }
332
333 // We're moving two locations to locations that could overlap, so we need a parallel
334 // move resolver.
335 InvokeRuntimeCallingConvention calling_convention;
336 codegen->EmitParallelMoves(
337 locations->InAt(1),
338 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
339 Primitive::kPrimNot,
340 object_class,
341 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
342 Primitive::kPrimNot);
343
344 if (instruction_->IsInstanceOf()) {
345 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
346 instruction_,
347 dex_pc,
348 this);
349 CheckEntrypointTypes<
350 kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
351 } else {
352 DCHECK(instruction_->IsCheckCast());
353 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
354 instruction_,
355 dex_pc,
356 this);
357 CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
358 }
359
360 if (!is_fatal_) {
361 if (instruction_->IsInstanceOf()) {
362 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
363 }
364
365 RestoreLiveRegisters(codegen, locations);
366 __ jmp(GetExitLabel());
367 }
368 }
369
GetDescription() const370 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
371
IsFatal() const372 bool IsFatal() const OVERRIDE { return is_fatal_; }
373
374 private:
375 const bool is_fatal_;
376
377 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
378 };
379
380 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
381 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)382 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
383 : SlowPathCode(instruction) {}
384
EmitNativeCode(CodeGenerator * codegen)385 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
386 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
387 __ Bind(GetEntryLabel());
388 SaveLiveRegisters(codegen, instruction_->GetLocations());
389 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
390 instruction_,
391 instruction_->GetDexPc(),
392 this);
393 CheckEntrypointTypes<kQuickDeoptimize, void, void>();
394 }
395
GetDescription() const396 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
397
398 private:
399 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
400 };
401
402 class ArraySetSlowPathX86_64 : public SlowPathCode {
403 public:
ArraySetSlowPathX86_64(HInstruction * instruction)404 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
405
EmitNativeCode(CodeGenerator * codegen)406 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407 LocationSummary* locations = instruction_->GetLocations();
408 __ Bind(GetEntryLabel());
409 SaveLiveRegisters(codegen, locations);
410
411 InvokeRuntimeCallingConvention calling_convention;
412 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
413 parallel_move.AddMove(
414 locations->InAt(0),
415 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
416 Primitive::kPrimNot,
417 nullptr);
418 parallel_move.AddMove(
419 locations->InAt(1),
420 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
421 Primitive::kPrimInt,
422 nullptr);
423 parallel_move.AddMove(
424 locations->InAt(2),
425 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
426 Primitive::kPrimNot,
427 nullptr);
428 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
429
430 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
431 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
432 instruction_,
433 instruction_->GetDexPc(),
434 this);
435 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
436 RestoreLiveRegisters(codegen, locations);
437 __ jmp(GetExitLabel());
438 }
439
GetDescription() const440 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
441
442 private:
443 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
444 };
445
446 // Slow path marking an object during a read barrier.
447 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
448 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location out,Location obj)449 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
450 : SlowPathCode(instruction), out_(out), obj_(obj) {
451 DCHECK(kEmitCompilerReadBarrier);
452 }
453
GetDescription() const454 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
455
EmitNativeCode(CodeGenerator * codegen)456 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
457 LocationSummary* locations = instruction_->GetLocations();
458 Register reg_out = out_.AsRegister<Register>();
459 DCHECK(locations->CanCall());
460 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
461 DCHECK(instruction_->IsInstanceFieldGet() ||
462 instruction_->IsStaticFieldGet() ||
463 instruction_->IsArrayGet() ||
464 instruction_->IsLoadClass() ||
465 instruction_->IsLoadString() ||
466 instruction_->IsInstanceOf() ||
467 instruction_->IsCheckCast())
468 << "Unexpected instruction in read barrier marking slow path: "
469 << instruction_->DebugName();
470
471 __ Bind(GetEntryLabel());
472 SaveLiveRegisters(codegen, locations);
473
474 InvokeRuntimeCallingConvention calling_convention;
475 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
476 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
477 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
478 instruction_,
479 instruction_->GetDexPc(),
480 this);
481 CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
482 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
483
484 RestoreLiveRegisters(codegen, locations);
485 __ jmp(GetExitLabel());
486 }
487
488 private:
489 const Location out_;
490 const Location obj_;
491
492 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
493 };
494
495 // Slow path generating a read barrier for a heap reference.
496 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
497 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)498 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
499 Location out,
500 Location ref,
501 Location obj,
502 uint32_t offset,
503 Location index)
504 : SlowPathCode(instruction),
505 out_(out),
506 ref_(ref),
507 obj_(obj),
508 offset_(offset),
509 index_(index) {
510 DCHECK(kEmitCompilerReadBarrier);
511 // If `obj` is equal to `out` or `ref`, it means the initial
512 // object has been overwritten by (or after) the heap object
513 // reference load to be instrumented, e.g.:
514 //
515 // __ movl(out, Address(out, offset));
516 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
517 //
518 // In that case, we have lost the information about the original
519 // object, and the emitted read barrier cannot work properly.
520 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
521 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
522 }
523
EmitNativeCode(CodeGenerator * codegen)524 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
525 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526 LocationSummary* locations = instruction_->GetLocations();
527 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
528 DCHECK(locations->CanCall());
529 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
530 DCHECK(!instruction_->IsInvoke() ||
531 (instruction_->IsInvokeStaticOrDirect() &&
532 instruction_->GetLocations()->Intrinsified()))
533 << "Unexpected instruction in read barrier for heap reference slow path: "
534 << instruction_->DebugName();
535
536 __ Bind(GetEntryLabel());
537 SaveLiveRegisters(codegen, locations);
538
539 // We may have to change the index's value, but as `index_` is a
540 // constant member (like other "inputs" of this slow path),
541 // introduce a copy of it, `index`.
542 Location index = index_;
543 if (index_.IsValid()) {
544 // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
545 if (instruction_->IsArrayGet()) {
546 // Compute real offset and store it in index_.
547 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
548 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
549 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
550 // We are about to change the value of `index_reg` (see the
551 // calls to art::x86_64::X86_64Assembler::shll and
552 // art::x86_64::X86_64Assembler::AddImmediate below), but it
553 // has not been saved by the previous call to
554 // art::SlowPathCode::SaveLiveRegisters, as it is a
555 // callee-save register --
556 // art::SlowPathCode::SaveLiveRegisters does not consider
557 // callee-save registers, as it has been designed with the
558 // assumption that callee-save registers are supposed to be
559 // handled by the called function. So, as a callee-save
560 // register, `index_reg` _would_ eventually be saved onto
561 // the stack, but it would be too late: we would have
562 // changed its value earlier. Therefore, we manually save
563 // it here into another freely available register,
564 // `free_reg`, chosen of course among the caller-save
565 // registers (as a callee-save `free_reg` register would
566 // exhibit the same problem).
567 //
568 // Note we could have requested a temporary register from
569 // the register allocator instead; but we prefer not to, as
570 // this is a slow path, and we know we can find a
571 // caller-save register that is available.
572 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
573 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
574 index_reg = free_reg;
575 index = Location::RegisterLocation(index_reg);
576 } else {
577 // The initial register stored in `index_` has already been
578 // saved in the call to art::SlowPathCode::SaveLiveRegisters
579 // (as it is not a callee-save register), so we can freely
580 // use it.
581 }
582 // Shifting the index value contained in `index_reg` by the
583 // scale factor (2) cannot overflow in practice, as the
584 // runtime is unable to allocate object arrays with a size
585 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
586 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
587 static_assert(
588 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
589 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
590 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
591 } else {
592 DCHECK(instruction_->IsInvoke());
593 DCHECK(instruction_->GetLocations()->Intrinsified());
594 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
595 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
596 << instruction_->AsInvoke()->GetIntrinsic();
597 DCHECK_EQ(offset_, 0U);
598 DCHECK(index_.IsRegister());
599 }
600 }
601
602 // We're moving two or three locations to locations that could
603 // overlap, so we need a parallel move resolver.
604 InvokeRuntimeCallingConvention calling_convention;
605 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
606 parallel_move.AddMove(ref_,
607 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
608 Primitive::kPrimNot,
609 nullptr);
610 parallel_move.AddMove(obj_,
611 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
612 Primitive::kPrimNot,
613 nullptr);
614 if (index.IsValid()) {
615 parallel_move.AddMove(index,
616 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
617 Primitive::kPrimInt,
618 nullptr);
619 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
620 } else {
621 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
622 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
623 }
624 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
625 instruction_,
626 instruction_->GetDexPc(),
627 this);
628 CheckEntrypointTypes<
629 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
630 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
631
632 RestoreLiveRegisters(codegen, locations);
633 __ jmp(GetExitLabel());
634 }
635
GetDescription() const636 const char* GetDescription() const OVERRIDE {
637 return "ReadBarrierForHeapReferenceSlowPathX86_64";
638 }
639
640 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)641 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
642 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
643 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
644 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
645 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
646 return static_cast<CpuRegister>(i);
647 }
648 }
649 // We shall never fail to find a free caller-save register, as
650 // there are more than two core caller-save registers on x86-64
651 // (meaning it is possible to find one which is different from
652 // `ref` and `obj`).
653 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
654 LOG(FATAL) << "Could not find a free caller-save register";
655 UNREACHABLE();
656 }
657
658 const Location out_;
659 const Location ref_;
660 const Location obj_;
661 const uint32_t offset_;
662 // An additional location containing an index to an array.
663 // Only used for HArrayGet and the UnsafeGetObject &
664 // UnsafeGetObjectVolatile intrinsics.
665 const Location index_;
666
667 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
668 };
669
670 // Slow path generating a read barrier for a GC root.
671 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
672 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)673 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
674 : SlowPathCode(instruction), out_(out), root_(root) {
675 DCHECK(kEmitCompilerReadBarrier);
676 }
677
EmitNativeCode(CodeGenerator * codegen)678 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
679 LocationSummary* locations = instruction_->GetLocations();
680 DCHECK(locations->CanCall());
681 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
682 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
683 << "Unexpected instruction in read barrier for GC root slow path: "
684 << instruction_->DebugName();
685
686 __ Bind(GetEntryLabel());
687 SaveLiveRegisters(codegen, locations);
688
689 InvokeRuntimeCallingConvention calling_convention;
690 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
691 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
692 x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
693 instruction_,
694 instruction_->GetDexPc(),
695 this);
696 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
697 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
698
699 RestoreLiveRegisters(codegen, locations);
700 __ jmp(GetExitLabel());
701 }
702
GetDescription() const703 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
704
705 private:
706 const Location out_;
707 const Location root_;
708
709 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
710 };
711
712 #undef __
713 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
714
X86_64IntegerCondition(IfCondition cond)715 inline Condition X86_64IntegerCondition(IfCondition cond) {
716 switch (cond) {
717 case kCondEQ: return kEqual;
718 case kCondNE: return kNotEqual;
719 case kCondLT: return kLess;
720 case kCondLE: return kLessEqual;
721 case kCondGT: return kGreater;
722 case kCondGE: return kGreaterEqual;
723 case kCondB: return kBelow;
724 case kCondBE: return kBelowEqual;
725 case kCondA: return kAbove;
726 case kCondAE: return kAboveEqual;
727 }
728 LOG(FATAL) << "Unreachable";
729 UNREACHABLE();
730 }
731
732 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)733 inline Condition X86_64FPCondition(IfCondition cond) {
734 switch (cond) {
735 case kCondEQ: return kEqual;
736 case kCondNE: return kNotEqual;
737 case kCondLT: return kBelow;
738 case kCondLE: return kBelowEqual;
739 case kCondGT: return kAbove;
740 case kCondGE: return kAboveEqual;
741 default: break; // should not happen
742 };
743 LOG(FATAL) << "Unreachable";
744 UNREACHABLE();
745 }
746
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,MethodReference target_method ATTRIBUTE_UNUSED)747 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
748 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
749 MethodReference target_method ATTRIBUTE_UNUSED) {
750 switch (desired_dispatch_info.code_ptr_location) {
751 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
752 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
753 // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
754 return HInvokeStaticOrDirect::DispatchInfo {
755 desired_dispatch_info.method_load_kind,
756 HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
757 desired_dispatch_info.method_load_data,
758 0u
759 };
760 default:
761 return desired_dispatch_info;
762 }
763 }
764
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)765 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
766 Location temp) {
767 // All registers are assumed to be correctly set up.
768
769 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
770 switch (invoke->GetMethodLoadKind()) {
771 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
772 // temp = thread->string_init_entrypoint
773 __ gs()->movq(temp.AsRegister<CpuRegister>(),
774 Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
775 break;
776 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
777 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
778 break;
779 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
780 __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
781 break;
782 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
783 __ movl(temp.AsRegister<CpuRegister>(), Immediate(0)); // Placeholder.
784 method_patches_.emplace_back(invoke->GetTargetMethod());
785 __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn.
786 break;
787 case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
788 __ movq(temp.AsRegister<CpuRegister>(),
789 Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
790 // Bind a new fixup label at the end of the "movl" insn.
791 uint32_t offset = invoke->GetDexCacheArrayOffset();
792 __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
793 break;
794 }
795 case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
796 Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
797 Register method_reg;
798 CpuRegister reg = temp.AsRegister<CpuRegister>();
799 if (current_method.IsRegister()) {
800 method_reg = current_method.AsRegister<Register>();
801 } else {
802 DCHECK(invoke->GetLocations()->Intrinsified());
803 DCHECK(!current_method.IsValid());
804 method_reg = reg.AsRegister();
805 __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
806 }
807 // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
808 __ movq(reg,
809 Address(CpuRegister(method_reg),
810 ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
811 // temp = temp[index_in_cache];
812 // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
813 uint32_t index_in_cache = invoke->GetDexMethodIndex();
814 __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
815 break;
816 }
817 }
818
819 switch (invoke->GetCodePtrLocation()) {
820 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
821 __ call(&frame_entry_label_);
822 break;
823 case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
824 relative_call_patches_.emplace_back(invoke->GetTargetMethod());
825 Label* label = &relative_call_patches_.back().label;
826 __ call(label); // Bind to the patch label, override at link time.
827 __ Bind(label); // Bind the label at the end of the "call" insn.
828 break;
829 }
830 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
831 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
832 // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
833 LOG(FATAL) << "Unsupported";
834 UNREACHABLE();
835 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
836 // (callee_method + offset_of_quick_compiled_code)()
837 __ call(Address(callee_method.AsRegister<CpuRegister>(),
838 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
839 kX86_64WordSize).SizeValue()));
840 break;
841 }
842
843 DCHECK(!IsLeafMethod());
844 }
845
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)846 void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
847 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
848 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
849 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
850
851 // Use the calling convention instead of the location of the receiver, as
852 // intrinsics may have put the receiver in a different register. In the intrinsics
853 // slow path, the arguments have been moved to the right place, so here we are
854 // guaranteed that the receiver is the first register of the calling convention.
855 InvokeDexCallingConvention calling_convention;
856 Register receiver = calling_convention.GetRegisterAt(0);
857
858 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
859 // /* HeapReference<Class> */ temp = receiver->klass_
860 __ movl(temp, Address(CpuRegister(receiver), class_offset));
861 MaybeRecordImplicitNullCheck(invoke);
862 // Instead of simply (possibly) unpoisoning `temp` here, we should
863 // emit a read barrier for the previous class reference load.
864 // However this is not required in practice, as this is an
865 // intermediate/temporary reference and because the current
866 // concurrent copying collector keeps the from-space memory
867 // intact/accessible until the end of the marking phase (the
868 // concurrent copying collector may not in the future).
869 __ MaybeUnpoisonHeapReference(temp);
870 // temp = temp->GetMethodAt(method_offset);
871 __ movq(temp, Address(temp, method_offset));
872 // call temp->GetEntryPoint();
873 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
874 kX86_64WordSize).SizeValue()));
875 }
876
RecordSimplePatch()877 void CodeGeneratorX86_64::RecordSimplePatch() {
878 if (GetCompilerOptions().GetIncludePatchInformation()) {
879 simple_patches_.emplace_back();
880 __ Bind(&simple_patches_.back());
881 }
882 }
883
RecordStringPatch(HLoadString * load_string)884 void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
885 string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
886 __ Bind(&string_patches_.back().label);
887 }
888
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset)889 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
890 uint32_t element_offset) {
891 // Add a patch entry and return the label.
892 pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
893 return &pc_relative_dex_cache_patches_.back().label;
894 }
895
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)896 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
897 DCHECK(linker_patches->empty());
898 size_t size =
899 method_patches_.size() +
900 relative_call_patches_.size() +
901 pc_relative_dex_cache_patches_.size() +
902 simple_patches_.size() +
903 string_patches_.size();
904 linker_patches->reserve(size);
905 // The label points to the end of the "movl" insn but the literal offset for method
906 // patch needs to point to the embedded constant which occupies the last 4 bytes.
907 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
908 for (const MethodPatchInfo<Label>& info : method_patches_) {
909 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
910 linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
911 info.target_method.dex_file,
912 info.target_method.dex_method_index));
913 }
914 for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
915 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
916 linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
917 info.target_method.dex_file,
918 info.target_method.dex_method_index));
919 }
920 for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
921 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
922 linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
923 &info.target_dex_file,
924 info.label.Position(),
925 info.element_offset));
926 }
927 for (const Label& label : simple_patches_) {
928 uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
929 linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
930 }
931 for (const StringPatchInfo<Label>& info : string_patches_) {
932 // These are always PC-relative, see GetSupportedLoadStringKind().
933 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
934 linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
935 &info.dex_file,
936 info.label.Position(),
937 info.string_index));
938 }
939 }
940
DumpCoreRegister(std::ostream & stream,int reg) const941 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
942 stream << Register(reg);
943 }
944
DumpFloatingPointRegister(std::ostream & stream,int reg) const945 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
946 stream << FloatRegister(reg);
947 }
948
SaveCoreRegister(size_t stack_index,uint32_t reg_id)949 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
950 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
951 return kX86_64WordSize;
952 }
953
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)954 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
955 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
956 return kX86_64WordSize;
957 }
958
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)959 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
960 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
961 return kX86_64WordSize;
962 }
963
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)964 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
965 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
966 return kX86_64WordSize;
967 }
968
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)969 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
970 HInstruction* instruction,
971 uint32_t dex_pc,
972 SlowPathCode* slow_path) {
973 InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
974 instruction,
975 dex_pc,
976 slow_path);
977 }
978
InvokeRuntime(int32_t entry_point_offset,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)979 void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
980 HInstruction* instruction,
981 uint32_t dex_pc,
982 SlowPathCode* slow_path) {
983 ValidateInvokeRuntime(instruction, slow_path);
984 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
985 RecordPcInfo(instruction, dex_pc, slow_path);
986 }
987
988 static constexpr int kNumberOfCpuRegisterPairs = 0;
989 // Use a fake return address register to mimic Quick.
990 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)991 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
992 const X86_64InstructionSetFeatures& isa_features,
993 const CompilerOptions& compiler_options,
994 OptimizingCompilerStats* stats)
995 : CodeGenerator(graph,
996 kNumberOfCpuRegisters,
997 kNumberOfFloatRegisters,
998 kNumberOfCpuRegisterPairs,
999 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1000 arraysize(kCoreCalleeSaves))
1001 | (1 << kFakeReturnRegister),
1002 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1003 arraysize(kFpuCalleeSaves)),
1004 compiler_options,
1005 stats),
1006 block_labels_(nullptr),
1007 location_builder_(graph, this),
1008 instruction_visitor_(graph, this),
1009 move_resolver_(graph->GetArena(), this),
1010 assembler_(graph->GetArena()),
1011 isa_features_(isa_features),
1012 constant_area_start_(0),
1013 method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1014 relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1015 pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1016 simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1017 string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1018 fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1019 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1020 }
1021
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1022 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1023 CodeGeneratorX86_64* codegen)
1024 : InstructionCodeGenerator(graph, codegen),
1025 assembler_(codegen->GetAssembler()),
1026 codegen_(codegen) {}
1027
SetupBlockedRegisters() const1028 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1029 // Stack register is always reserved.
1030 blocked_core_registers_[RSP] = true;
1031
1032 // Block the register used as TMP.
1033 blocked_core_registers_[TMP] = true;
1034 }
1035
DWARFReg(Register reg)1036 static dwarf::Reg DWARFReg(Register reg) {
1037 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1038 }
1039
DWARFReg(FloatRegister reg)1040 static dwarf::Reg DWARFReg(FloatRegister reg) {
1041 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1042 }
1043
GenerateFrameEntry()1044 void CodeGeneratorX86_64::GenerateFrameEntry() {
1045 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1046 __ Bind(&frame_entry_label_);
1047 bool skip_overflow_check = IsLeafMethod()
1048 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1049 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1050
1051 if (!skip_overflow_check) {
1052 __ testq(CpuRegister(RAX), Address(
1053 CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1054 RecordPcInfo(nullptr, 0);
1055 }
1056
1057 if (HasEmptyFrame()) {
1058 return;
1059 }
1060
1061 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1062 Register reg = kCoreCalleeSaves[i];
1063 if (allocated_registers_.ContainsCoreRegister(reg)) {
1064 __ pushq(CpuRegister(reg));
1065 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1066 __ cfi().RelOffset(DWARFReg(reg), 0);
1067 }
1068 }
1069
1070 int adjust = GetFrameSize() - GetCoreSpillSize();
1071 __ subq(CpuRegister(RSP), Immediate(adjust));
1072 __ cfi().AdjustCFAOffset(adjust);
1073 uint32_t xmm_spill_location = GetFpuSpillStart();
1074 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1075
1076 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1077 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1078 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1079 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1080 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1081 }
1082 }
1083
1084 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1085 CpuRegister(kMethodRegisterArgument));
1086 }
1087
GenerateFrameExit()1088 void CodeGeneratorX86_64::GenerateFrameExit() {
1089 __ cfi().RememberState();
1090 if (!HasEmptyFrame()) {
1091 uint32_t xmm_spill_location = GetFpuSpillStart();
1092 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1093 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1094 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1095 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1096 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1097 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1098 }
1099 }
1100
1101 int adjust = GetFrameSize() - GetCoreSpillSize();
1102 __ addq(CpuRegister(RSP), Immediate(adjust));
1103 __ cfi().AdjustCFAOffset(-adjust);
1104
1105 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1106 Register reg = kCoreCalleeSaves[i];
1107 if (allocated_registers_.ContainsCoreRegister(reg)) {
1108 __ popq(CpuRegister(reg));
1109 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1110 __ cfi().Restore(DWARFReg(reg));
1111 }
1112 }
1113 }
1114 __ ret();
1115 __ cfi().RestoreState();
1116 __ cfi().DefCFAOffset(GetFrameSize());
1117 }
1118
Bind(HBasicBlock * block)1119 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1120 __ Bind(GetLabelOf(block));
1121 }
1122
Move(Location destination,Location source)1123 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1124 if (source.Equals(destination)) {
1125 return;
1126 }
1127 if (destination.IsRegister()) {
1128 CpuRegister dest = destination.AsRegister<CpuRegister>();
1129 if (source.IsRegister()) {
1130 __ movq(dest, source.AsRegister<CpuRegister>());
1131 } else if (source.IsFpuRegister()) {
1132 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1133 } else if (source.IsStackSlot()) {
1134 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1135 } else if (source.IsConstant()) {
1136 HConstant* constant = source.GetConstant();
1137 if (constant->IsLongConstant()) {
1138 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1139 } else {
1140 Load32BitValue(dest, GetInt32ValueOf(constant));
1141 }
1142 } else {
1143 DCHECK(source.IsDoubleStackSlot());
1144 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1145 }
1146 } else if (destination.IsFpuRegister()) {
1147 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1148 if (source.IsRegister()) {
1149 __ movd(dest, source.AsRegister<CpuRegister>());
1150 } else if (source.IsFpuRegister()) {
1151 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1152 } else if (source.IsConstant()) {
1153 HConstant* constant = source.GetConstant();
1154 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1155 if (constant->IsFloatConstant()) {
1156 Load32BitValue(dest, static_cast<int32_t>(value));
1157 } else {
1158 Load64BitValue(dest, value);
1159 }
1160 } else if (source.IsStackSlot()) {
1161 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1162 } else {
1163 DCHECK(source.IsDoubleStackSlot());
1164 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1165 }
1166 } else if (destination.IsStackSlot()) {
1167 if (source.IsRegister()) {
1168 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1169 source.AsRegister<CpuRegister>());
1170 } else if (source.IsFpuRegister()) {
1171 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1172 source.AsFpuRegister<XmmRegister>());
1173 } else if (source.IsConstant()) {
1174 HConstant* constant = source.GetConstant();
1175 int32_t value = GetInt32ValueOf(constant);
1176 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1177 } else {
1178 DCHECK(source.IsStackSlot()) << source;
1179 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1180 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1181 }
1182 } else {
1183 DCHECK(destination.IsDoubleStackSlot());
1184 if (source.IsRegister()) {
1185 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1186 source.AsRegister<CpuRegister>());
1187 } else if (source.IsFpuRegister()) {
1188 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1189 source.AsFpuRegister<XmmRegister>());
1190 } else if (source.IsConstant()) {
1191 HConstant* constant = source.GetConstant();
1192 int64_t value;
1193 if (constant->IsDoubleConstant()) {
1194 value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
1195 } else {
1196 DCHECK(constant->IsLongConstant());
1197 value = constant->AsLongConstant()->GetValue();
1198 }
1199 Store64BitValueToStack(destination, value);
1200 } else {
1201 DCHECK(source.IsDoubleStackSlot());
1202 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1203 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1204 }
1205 }
1206 }
1207
MoveConstant(Location location,int32_t value)1208 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1209 DCHECK(location.IsRegister());
1210 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1211 }
1212
MoveLocation(Location dst,Location src,Primitive::Type dst_type ATTRIBUTE_UNUSED)1213 void CodeGeneratorX86_64::MoveLocation(
1214 Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1215 Move(dst, src);
1216 }
1217
AddLocationAsTemp(Location location,LocationSummary * locations)1218 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1219 if (location.IsRegister()) {
1220 locations->AddTemp(location);
1221 } else {
1222 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1223 }
1224 }
1225
HandleGoto(HInstruction * got,HBasicBlock * successor)1226 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1227 DCHECK(!successor->IsExitBlock());
1228
1229 HBasicBlock* block = got->GetBlock();
1230 HInstruction* previous = got->GetPrevious();
1231
1232 HLoopInformation* info = block->GetLoopInformation();
1233 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1234 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1235 return;
1236 }
1237
1238 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1239 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1240 }
1241 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1242 __ jmp(codegen_->GetLabelOf(successor));
1243 }
1244 }
1245
VisitGoto(HGoto * got)1246 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1247 got->SetLocations(nullptr);
1248 }
1249
VisitGoto(HGoto * got)1250 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1251 HandleGoto(got, got->GetSuccessor());
1252 }
1253
VisitTryBoundary(HTryBoundary * try_boundary)1254 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1255 try_boundary->SetLocations(nullptr);
1256 }
1257
VisitTryBoundary(HTryBoundary * try_boundary)1258 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1259 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1260 if (!successor->IsExitBlock()) {
1261 HandleGoto(try_boundary, successor);
1262 }
1263 }
1264
VisitExit(HExit * exit)1265 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1266 exit->SetLocations(nullptr);
1267 }
1268
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1269 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1270 }
1271
1272 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1273 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1274 LabelType* true_label,
1275 LabelType* false_label) {
1276 if (cond->IsFPConditionTrueIfNaN()) {
1277 __ j(kUnordered, true_label);
1278 } else if (cond->IsFPConditionFalseIfNaN()) {
1279 __ j(kUnordered, false_label);
1280 }
1281 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1282 }
1283
GenerateCompareTest(HCondition * condition)1284 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1285 LocationSummary* locations = condition->GetLocations();
1286
1287 Location left = locations->InAt(0);
1288 Location right = locations->InAt(1);
1289 Primitive::Type type = condition->InputAt(0)->GetType();
1290 switch (type) {
1291 case Primitive::kPrimBoolean:
1292 case Primitive::kPrimByte:
1293 case Primitive::kPrimChar:
1294 case Primitive::kPrimShort:
1295 case Primitive::kPrimInt:
1296 case Primitive::kPrimNot: {
1297 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1298 if (right.IsConstant()) {
1299 int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
1300 if (value == 0) {
1301 __ testl(left_reg, left_reg);
1302 } else {
1303 __ cmpl(left_reg, Immediate(value));
1304 }
1305 } else if (right.IsStackSlot()) {
1306 __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1307 } else {
1308 __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1309 }
1310 break;
1311 }
1312 case Primitive::kPrimLong: {
1313 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1314 if (right.IsConstant()) {
1315 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1316 codegen_->Compare64BitValue(left_reg, value);
1317 } else if (right.IsDoubleStackSlot()) {
1318 __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1319 } else {
1320 __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1321 }
1322 break;
1323 }
1324 case Primitive::kPrimFloat: {
1325 if (right.IsFpuRegister()) {
1326 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1327 } else if (right.IsConstant()) {
1328 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1329 codegen_->LiteralFloatAddress(
1330 right.GetConstant()->AsFloatConstant()->GetValue()));
1331 } else {
1332 DCHECK(right.IsStackSlot());
1333 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1334 Address(CpuRegister(RSP), right.GetStackIndex()));
1335 }
1336 break;
1337 }
1338 case Primitive::kPrimDouble: {
1339 if (right.IsFpuRegister()) {
1340 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1341 } else if (right.IsConstant()) {
1342 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1343 codegen_->LiteralDoubleAddress(
1344 right.GetConstant()->AsDoubleConstant()->GetValue()));
1345 } else {
1346 DCHECK(right.IsDoubleStackSlot());
1347 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1348 Address(CpuRegister(RSP), right.GetStackIndex()));
1349 }
1350 break;
1351 }
1352 default:
1353 LOG(FATAL) << "Unexpected condition type " << type;
1354 }
1355 }
1356
1357 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1358 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1359 LabelType* true_target_in,
1360 LabelType* false_target_in) {
1361 // Generated branching requires both targets to be explicit. If either of the
1362 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1363 LabelType fallthrough_target;
1364 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1365 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1366
1367 // Generate the comparison to set the CC.
1368 GenerateCompareTest(condition);
1369
1370 // Now generate the correct jump(s).
1371 Primitive::Type type = condition->InputAt(0)->GetType();
1372 switch (type) {
1373 case Primitive::kPrimLong: {
1374 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1375 break;
1376 }
1377 case Primitive::kPrimFloat: {
1378 GenerateFPJumps(condition, true_target, false_target);
1379 break;
1380 }
1381 case Primitive::kPrimDouble: {
1382 GenerateFPJumps(condition, true_target, false_target);
1383 break;
1384 }
1385 default:
1386 LOG(FATAL) << "Unexpected condition type " << type;
1387 }
1388
1389 if (false_target != &fallthrough_target) {
1390 __ jmp(false_target);
1391 }
1392
1393 if (fallthrough_target.IsLinked()) {
1394 __ Bind(&fallthrough_target);
1395 }
1396 }
1397
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1398 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1399 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1400 // are set only strictly before `branch`. We can't use the eflags on long
1401 // conditions if they are materialized due to the complex branching.
1402 return cond->IsCondition() &&
1403 cond->GetNext() == branch &&
1404 !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1405 }
1406
1407 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1408 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1409 size_t condition_input_index,
1410 LabelType* true_target,
1411 LabelType* false_target) {
1412 HInstruction* cond = instruction->InputAt(condition_input_index);
1413
1414 if (true_target == nullptr && false_target == nullptr) {
1415 // Nothing to do. The code always falls through.
1416 return;
1417 } else if (cond->IsIntConstant()) {
1418 // Constant condition, statically compared against "true" (integer value 1).
1419 if (cond->AsIntConstant()->IsTrue()) {
1420 if (true_target != nullptr) {
1421 __ jmp(true_target);
1422 }
1423 } else {
1424 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1425 if (false_target != nullptr) {
1426 __ jmp(false_target);
1427 }
1428 }
1429 return;
1430 }
1431
1432 // The following code generates these patterns:
1433 // (1) true_target == nullptr && false_target != nullptr
1434 // - opposite condition true => branch to false_target
1435 // (2) true_target != nullptr && false_target == nullptr
1436 // - condition true => branch to true_target
1437 // (3) true_target != nullptr && false_target != nullptr
1438 // - condition true => branch to true_target
1439 // - branch to false_target
1440 if (IsBooleanValueOrMaterializedCondition(cond)) {
1441 if (AreEflagsSetFrom(cond, instruction)) {
1442 if (true_target == nullptr) {
1443 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1444 } else {
1445 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1446 }
1447 } else {
1448 // Materialized condition, compare against 0.
1449 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1450 if (lhs.IsRegister()) {
1451 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1452 } else {
1453 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1454 }
1455 if (true_target == nullptr) {
1456 __ j(kEqual, false_target);
1457 } else {
1458 __ j(kNotEqual, true_target);
1459 }
1460 }
1461 } else {
1462 // Condition has not been materialized, use its inputs as the
1463 // comparison and its condition as the branch condition.
1464 HCondition* condition = cond->AsCondition();
1465
1466 // If this is a long or FP comparison that has been folded into
1467 // the HCondition, generate the comparison directly.
1468 Primitive::Type type = condition->InputAt(0)->GetType();
1469 if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1470 GenerateCompareTestAndBranch(condition, true_target, false_target);
1471 return;
1472 }
1473
1474 Location lhs = condition->GetLocations()->InAt(0);
1475 Location rhs = condition->GetLocations()->InAt(1);
1476 if (rhs.IsRegister()) {
1477 __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1478 } else if (rhs.IsConstant()) {
1479 int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1480 codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1481 } else {
1482 __ cmpl(lhs.AsRegister<CpuRegister>(),
1483 Address(CpuRegister(RSP), rhs.GetStackIndex()));
1484 }
1485 if (true_target == nullptr) {
1486 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1487 } else {
1488 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1489 }
1490 }
1491
1492 // If neither branch falls through (case 3), the conditional branch to `true_target`
1493 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1494 if (true_target != nullptr && false_target != nullptr) {
1495 __ jmp(false_target);
1496 }
1497 }
1498
VisitIf(HIf * if_instr)1499 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1500 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1501 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1502 locations->SetInAt(0, Location::Any());
1503 }
1504 }
1505
VisitIf(HIf * if_instr)1506 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1507 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1508 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1509 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1510 nullptr : codegen_->GetLabelOf(true_successor);
1511 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1512 nullptr : codegen_->GetLabelOf(false_successor);
1513 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1514 }
1515
VisitDeoptimize(HDeoptimize * deoptimize)1516 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1517 LocationSummary* locations = new (GetGraph()->GetArena())
1518 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1519 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1520 locations->SetInAt(0, Location::Any());
1521 }
1522 }
1523
VisitDeoptimize(HDeoptimize * deoptimize)1524 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1525 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1526 GenerateTestAndBranch<Label>(deoptimize,
1527 /* condition_input_index */ 0,
1528 slow_path->GetEntryLabel(),
1529 /* false_target */ nullptr);
1530 }
1531
SelectCanUseCMOV(HSelect * select)1532 static bool SelectCanUseCMOV(HSelect* select) {
1533 // There are no conditional move instructions for XMMs.
1534 if (Primitive::IsFloatingPointType(select->GetType())) {
1535 return false;
1536 }
1537
1538 // A FP condition doesn't generate the single CC that we need.
1539 HInstruction* condition = select->GetCondition();
1540 if (condition->IsCondition() &&
1541 Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1542 return false;
1543 }
1544
1545 // We can generate a CMOV for this Select.
1546 return true;
1547 }
1548
VisitSelect(HSelect * select)1549 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1550 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1551 if (Primitive::IsFloatingPointType(select->GetType())) {
1552 locations->SetInAt(0, Location::RequiresFpuRegister());
1553 locations->SetInAt(1, Location::Any());
1554 } else {
1555 locations->SetInAt(0, Location::RequiresRegister());
1556 if (SelectCanUseCMOV(select)) {
1557 if (select->InputAt(1)->IsConstant()) {
1558 locations->SetInAt(1, Location::RequiresRegister());
1559 } else {
1560 locations->SetInAt(1, Location::Any());
1561 }
1562 } else {
1563 locations->SetInAt(1, Location::Any());
1564 }
1565 }
1566 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1567 locations->SetInAt(2, Location::RequiresRegister());
1568 }
1569 locations->SetOut(Location::SameAsFirstInput());
1570 }
1571
VisitSelect(HSelect * select)1572 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1573 LocationSummary* locations = select->GetLocations();
1574 if (SelectCanUseCMOV(select)) {
1575 // If both the condition and the source types are integer, we can generate
1576 // a CMOV to implement Select.
1577 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1578 Location value_true_loc = locations->InAt(1);
1579 DCHECK(locations->InAt(0).Equals(locations->Out()));
1580
1581 HInstruction* select_condition = select->GetCondition();
1582 Condition cond = kNotEqual;
1583
1584 // Figure out how to test the 'condition'.
1585 if (select_condition->IsCondition()) {
1586 HCondition* condition = select_condition->AsCondition();
1587 if (!condition->IsEmittedAtUseSite()) {
1588 // This was a previously materialized condition.
1589 // Can we use the existing condition code?
1590 if (AreEflagsSetFrom(condition, select)) {
1591 // Materialization was the previous instruction. Condition codes are right.
1592 cond = X86_64IntegerCondition(condition->GetCondition());
1593 } else {
1594 // No, we have to recreate the condition code.
1595 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1596 __ testl(cond_reg, cond_reg);
1597 }
1598 } else {
1599 GenerateCompareTest(condition);
1600 cond = X86_64IntegerCondition(condition->GetCondition());
1601 }
1602 } else {
1603 // Must be a boolean condition, which needs to be compared to 0.
1604 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1605 __ testl(cond_reg, cond_reg);
1606 }
1607
1608 // If the condition is true, overwrite the output, which already contains false.
1609 // Generate the correct sized CMOV.
1610 bool is_64_bit = Primitive::Is64BitType(select->GetType());
1611 if (value_true_loc.IsRegister()) {
1612 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1613 } else {
1614 __ cmov(cond,
1615 value_false,
1616 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1617 }
1618 } else {
1619 NearLabel false_target;
1620 GenerateTestAndBranch<NearLabel>(select,
1621 /* condition_input_index */ 2,
1622 /* true_target */ nullptr,
1623 &false_target);
1624 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1625 __ Bind(&false_target);
1626 }
1627 }
1628
VisitNativeDebugInfo(HNativeDebugInfo * info)1629 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1630 new (GetGraph()->GetArena()) LocationSummary(info);
1631 }
1632
VisitNativeDebugInfo(HNativeDebugInfo *)1633 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1634 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1635 }
1636
GenerateNop()1637 void CodeGeneratorX86_64::GenerateNop() {
1638 __ nop();
1639 }
1640
HandleCondition(HCondition * cond)1641 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1642 LocationSummary* locations =
1643 new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1644 // Handle the long/FP comparisons made in instruction simplification.
1645 switch (cond->InputAt(0)->GetType()) {
1646 case Primitive::kPrimLong:
1647 locations->SetInAt(0, Location::RequiresRegister());
1648 locations->SetInAt(1, Location::Any());
1649 break;
1650 case Primitive::kPrimFloat:
1651 case Primitive::kPrimDouble:
1652 locations->SetInAt(0, Location::RequiresFpuRegister());
1653 locations->SetInAt(1, Location::Any());
1654 break;
1655 default:
1656 locations->SetInAt(0, Location::RequiresRegister());
1657 locations->SetInAt(1, Location::Any());
1658 break;
1659 }
1660 if (!cond->IsEmittedAtUseSite()) {
1661 locations->SetOut(Location::RequiresRegister());
1662 }
1663 }
1664
HandleCondition(HCondition * cond)1665 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1666 if (cond->IsEmittedAtUseSite()) {
1667 return;
1668 }
1669
1670 LocationSummary* locations = cond->GetLocations();
1671 Location lhs = locations->InAt(0);
1672 Location rhs = locations->InAt(1);
1673 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1674 NearLabel true_label, false_label;
1675
1676 switch (cond->InputAt(0)->GetType()) {
1677 default:
1678 // Integer case.
1679
1680 // Clear output register: setcc only sets the low byte.
1681 __ xorl(reg, reg);
1682
1683 if (rhs.IsRegister()) {
1684 __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1685 } else if (rhs.IsConstant()) {
1686 int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1687 codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1688 } else {
1689 __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1690 }
1691 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1692 return;
1693 case Primitive::kPrimLong:
1694 // Clear output register: setcc only sets the low byte.
1695 __ xorl(reg, reg);
1696
1697 if (rhs.IsRegister()) {
1698 __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1699 } else if (rhs.IsConstant()) {
1700 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
1701 codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
1702 } else {
1703 __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1704 }
1705 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1706 return;
1707 case Primitive::kPrimFloat: {
1708 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1709 if (rhs.IsConstant()) {
1710 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1711 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1712 } else if (rhs.IsStackSlot()) {
1713 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1714 } else {
1715 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1716 }
1717 GenerateFPJumps(cond, &true_label, &false_label);
1718 break;
1719 }
1720 case Primitive::kPrimDouble: {
1721 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1722 if (rhs.IsConstant()) {
1723 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1724 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1725 } else if (rhs.IsDoubleStackSlot()) {
1726 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1727 } else {
1728 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1729 }
1730 GenerateFPJumps(cond, &true_label, &false_label);
1731 break;
1732 }
1733 }
1734
1735 // Convert the jumps into the result.
1736 NearLabel done_label;
1737
1738 // False case: result = 0.
1739 __ Bind(&false_label);
1740 __ xorl(reg, reg);
1741 __ jmp(&done_label);
1742
1743 // True case: result = 1.
1744 __ Bind(&true_label);
1745 __ movl(reg, Immediate(1));
1746 __ Bind(&done_label);
1747 }
1748
VisitEqual(HEqual * comp)1749 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1750 HandleCondition(comp);
1751 }
1752
VisitEqual(HEqual * comp)1753 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1754 HandleCondition(comp);
1755 }
1756
VisitNotEqual(HNotEqual * comp)1757 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1758 HandleCondition(comp);
1759 }
1760
VisitNotEqual(HNotEqual * comp)1761 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1762 HandleCondition(comp);
1763 }
1764
VisitLessThan(HLessThan * comp)1765 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1766 HandleCondition(comp);
1767 }
1768
VisitLessThan(HLessThan * comp)1769 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1770 HandleCondition(comp);
1771 }
1772
VisitLessThanOrEqual(HLessThanOrEqual * comp)1773 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1774 HandleCondition(comp);
1775 }
1776
VisitLessThanOrEqual(HLessThanOrEqual * comp)1777 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1778 HandleCondition(comp);
1779 }
1780
VisitGreaterThan(HGreaterThan * comp)1781 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1782 HandleCondition(comp);
1783 }
1784
VisitGreaterThan(HGreaterThan * comp)1785 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1786 HandleCondition(comp);
1787 }
1788
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1789 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1790 HandleCondition(comp);
1791 }
1792
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1793 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1794 HandleCondition(comp);
1795 }
1796
VisitBelow(HBelow * comp)1797 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1798 HandleCondition(comp);
1799 }
1800
VisitBelow(HBelow * comp)1801 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
1802 HandleCondition(comp);
1803 }
1804
VisitBelowOrEqual(HBelowOrEqual * comp)1805 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1806 HandleCondition(comp);
1807 }
1808
VisitBelowOrEqual(HBelowOrEqual * comp)1809 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1810 HandleCondition(comp);
1811 }
1812
VisitAbove(HAbove * comp)1813 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
1814 HandleCondition(comp);
1815 }
1816
VisitAbove(HAbove * comp)1817 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
1818 HandleCondition(comp);
1819 }
1820
VisitAboveOrEqual(HAboveOrEqual * comp)1821 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1822 HandleCondition(comp);
1823 }
1824
VisitAboveOrEqual(HAboveOrEqual * comp)1825 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1826 HandleCondition(comp);
1827 }
1828
VisitCompare(HCompare * compare)1829 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
1830 LocationSummary* locations =
1831 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
1832 switch (compare->InputAt(0)->GetType()) {
1833 case Primitive::kPrimBoolean:
1834 case Primitive::kPrimByte:
1835 case Primitive::kPrimShort:
1836 case Primitive::kPrimChar:
1837 case Primitive::kPrimInt:
1838 case Primitive::kPrimLong: {
1839 locations->SetInAt(0, Location::RequiresRegister());
1840 locations->SetInAt(1, Location::Any());
1841 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1842 break;
1843 }
1844 case Primitive::kPrimFloat:
1845 case Primitive::kPrimDouble: {
1846 locations->SetInAt(0, Location::RequiresFpuRegister());
1847 locations->SetInAt(1, Location::Any());
1848 locations->SetOut(Location::RequiresRegister());
1849 break;
1850 }
1851 default:
1852 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
1853 }
1854 }
1855
VisitCompare(HCompare * compare)1856 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
1857 LocationSummary* locations = compare->GetLocations();
1858 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1859 Location left = locations->InAt(0);
1860 Location right = locations->InAt(1);
1861
1862 NearLabel less, greater, done;
1863 Primitive::Type type = compare->InputAt(0)->GetType();
1864 Condition less_cond = kLess;
1865
1866 switch (type) {
1867 case Primitive::kPrimBoolean:
1868 case Primitive::kPrimByte:
1869 case Primitive::kPrimShort:
1870 case Primitive::kPrimChar:
1871 case Primitive::kPrimInt: {
1872 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1873 if (right.IsConstant()) {
1874 int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
1875 codegen_->Compare32BitValue(left_reg, value);
1876 } else if (right.IsStackSlot()) {
1877 __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1878 } else {
1879 __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1880 }
1881 break;
1882 }
1883 case Primitive::kPrimLong: {
1884 CpuRegister left_reg = left.AsRegister<CpuRegister>();
1885 if (right.IsConstant()) {
1886 int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1887 codegen_->Compare64BitValue(left_reg, value);
1888 } else if (right.IsDoubleStackSlot()) {
1889 __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1890 } else {
1891 __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1892 }
1893 break;
1894 }
1895 case Primitive::kPrimFloat: {
1896 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1897 if (right.IsConstant()) {
1898 float value = right.GetConstant()->AsFloatConstant()->GetValue();
1899 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
1900 } else if (right.IsStackSlot()) {
1901 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1902 } else {
1903 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
1904 }
1905 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1906 less_cond = kBelow; // ucomis{s,d} sets CF
1907 break;
1908 }
1909 case Primitive::kPrimDouble: {
1910 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1911 if (right.IsConstant()) {
1912 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
1913 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
1914 } else if (right.IsDoubleStackSlot()) {
1915 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1916 } else {
1917 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
1918 }
1919 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1920 less_cond = kBelow; // ucomis{s,d} sets CF
1921 break;
1922 }
1923 default:
1924 LOG(FATAL) << "Unexpected compare type " << type;
1925 }
1926
1927 __ movl(out, Immediate(0));
1928 __ j(kEqual, &done);
1929 __ j(less_cond, &less);
1930
1931 __ Bind(&greater);
1932 __ movl(out, Immediate(1));
1933 __ jmp(&done);
1934
1935 __ Bind(&less);
1936 __ movl(out, Immediate(-1));
1937
1938 __ Bind(&done);
1939 }
1940
VisitIntConstant(HIntConstant * constant)1941 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
1942 LocationSummary* locations =
1943 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1944 locations->SetOut(Location::ConstantLocation(constant));
1945 }
1946
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)1947 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
1948 // Will be generated at use site.
1949 }
1950
VisitNullConstant(HNullConstant * constant)1951 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
1952 LocationSummary* locations =
1953 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1954 locations->SetOut(Location::ConstantLocation(constant));
1955 }
1956
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)1957 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
1958 // Will be generated at use site.
1959 }
1960
VisitLongConstant(HLongConstant * constant)1961 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
1962 LocationSummary* locations =
1963 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1964 locations->SetOut(Location::ConstantLocation(constant));
1965 }
1966
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)1967 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
1968 // Will be generated at use site.
1969 }
1970
VisitFloatConstant(HFloatConstant * constant)1971 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
1972 LocationSummary* locations =
1973 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1974 locations->SetOut(Location::ConstantLocation(constant));
1975 }
1976
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)1977 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
1978 // Will be generated at use site.
1979 }
1980
VisitDoubleConstant(HDoubleConstant * constant)1981 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
1982 LocationSummary* locations =
1983 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1984 locations->SetOut(Location::ConstantLocation(constant));
1985 }
1986
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)1987 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
1988 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
1989 // Will be generated at use site.
1990 }
1991
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1992 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1993 memory_barrier->SetLocations(nullptr);
1994 }
1995
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)1996 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1997 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
1998 }
1999
VisitReturnVoid(HReturnVoid * ret)2000 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2001 ret->SetLocations(nullptr);
2002 }
2003
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2004 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2005 codegen_->GenerateFrameExit();
2006 }
2007
VisitReturn(HReturn * ret)2008 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2009 LocationSummary* locations =
2010 new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2011 switch (ret->InputAt(0)->GetType()) {
2012 case Primitive::kPrimBoolean:
2013 case Primitive::kPrimByte:
2014 case Primitive::kPrimChar:
2015 case Primitive::kPrimShort:
2016 case Primitive::kPrimInt:
2017 case Primitive::kPrimNot:
2018 case Primitive::kPrimLong:
2019 locations->SetInAt(0, Location::RegisterLocation(RAX));
2020 break;
2021
2022 case Primitive::kPrimFloat:
2023 case Primitive::kPrimDouble:
2024 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2025 break;
2026
2027 default:
2028 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2029 }
2030 }
2031
VisitReturn(HReturn * ret)2032 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2033 if (kIsDebugBuild) {
2034 switch (ret->InputAt(0)->GetType()) {
2035 case Primitive::kPrimBoolean:
2036 case Primitive::kPrimByte:
2037 case Primitive::kPrimChar:
2038 case Primitive::kPrimShort:
2039 case Primitive::kPrimInt:
2040 case Primitive::kPrimNot:
2041 case Primitive::kPrimLong:
2042 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2043 break;
2044
2045 case Primitive::kPrimFloat:
2046 case Primitive::kPrimDouble:
2047 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2048 XMM0);
2049 break;
2050
2051 default:
2052 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2053 }
2054 }
2055 codegen_->GenerateFrameExit();
2056 }
2057
GetReturnLocation(Primitive::Type type) const2058 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2059 switch (type) {
2060 case Primitive::kPrimBoolean:
2061 case Primitive::kPrimByte:
2062 case Primitive::kPrimChar:
2063 case Primitive::kPrimShort:
2064 case Primitive::kPrimInt:
2065 case Primitive::kPrimNot:
2066 case Primitive::kPrimLong:
2067 return Location::RegisterLocation(RAX);
2068
2069 case Primitive::kPrimVoid:
2070 return Location::NoLocation();
2071
2072 case Primitive::kPrimDouble:
2073 case Primitive::kPrimFloat:
2074 return Location::FpuRegisterLocation(XMM0);
2075 }
2076
2077 UNREACHABLE();
2078 }
2079
GetMethodLocation() const2080 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2081 return Location::RegisterLocation(kMethodRegisterArgument);
2082 }
2083
GetNextLocation(Primitive::Type type)2084 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2085 switch (type) {
2086 case Primitive::kPrimBoolean:
2087 case Primitive::kPrimByte:
2088 case Primitive::kPrimChar:
2089 case Primitive::kPrimShort:
2090 case Primitive::kPrimInt:
2091 case Primitive::kPrimNot: {
2092 uint32_t index = gp_index_++;
2093 stack_index_++;
2094 if (index < calling_convention.GetNumberOfRegisters()) {
2095 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2096 } else {
2097 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2098 }
2099 }
2100
2101 case Primitive::kPrimLong: {
2102 uint32_t index = gp_index_;
2103 stack_index_ += 2;
2104 if (index < calling_convention.GetNumberOfRegisters()) {
2105 gp_index_ += 1;
2106 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2107 } else {
2108 gp_index_ += 2;
2109 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2110 }
2111 }
2112
2113 case Primitive::kPrimFloat: {
2114 uint32_t index = float_index_++;
2115 stack_index_++;
2116 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2117 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2118 } else {
2119 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2120 }
2121 }
2122
2123 case Primitive::kPrimDouble: {
2124 uint32_t index = float_index_++;
2125 stack_index_ += 2;
2126 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2127 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2128 } else {
2129 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2130 }
2131 }
2132
2133 case Primitive::kPrimVoid:
2134 LOG(FATAL) << "Unexpected parameter type " << type;
2135 break;
2136 }
2137 return Location::NoLocation();
2138 }
2139
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2140 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2141 // The trampoline uses the same calling convention as dex calling conventions,
2142 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2143 // the method_idx.
2144 HandleInvoke(invoke);
2145 }
2146
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2147 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2148 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2149 }
2150
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2151 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2152 // Explicit clinit checks triggered by static invokes must have been pruned by
2153 // art::PrepareForRegisterAllocation.
2154 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2155
2156 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2157 if (intrinsic.TryDispatch(invoke)) {
2158 return;
2159 }
2160
2161 HandleInvoke(invoke);
2162 }
2163
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2164 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2165 if (invoke->GetLocations()->Intrinsified()) {
2166 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2167 intrinsic.Dispatch(invoke);
2168 return true;
2169 }
2170 return false;
2171 }
2172
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2173 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2174 // Explicit clinit checks triggered by static invokes must have been pruned by
2175 // art::PrepareForRegisterAllocation.
2176 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2177
2178 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2179 return;
2180 }
2181
2182 LocationSummary* locations = invoke->GetLocations();
2183 codegen_->GenerateStaticOrDirectCall(
2184 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2185 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2186 }
2187
HandleInvoke(HInvoke * invoke)2188 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2189 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2190 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2191 }
2192
VisitInvokeVirtual(HInvokeVirtual * invoke)2193 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2194 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2195 if (intrinsic.TryDispatch(invoke)) {
2196 return;
2197 }
2198
2199 HandleInvoke(invoke);
2200 }
2201
VisitInvokeVirtual(HInvokeVirtual * invoke)2202 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2203 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2204 return;
2205 }
2206
2207 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2208 DCHECK(!codegen_->IsLeafMethod());
2209 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2210 }
2211
VisitInvokeInterface(HInvokeInterface * invoke)2212 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2213 HandleInvoke(invoke);
2214 // Add the hidden argument.
2215 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2216 }
2217
VisitInvokeInterface(HInvokeInterface * invoke)2218 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2219 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2220 LocationSummary* locations = invoke->GetLocations();
2221 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2222 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2223 Location receiver = locations->InAt(0);
2224 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2225
2226 // Set the hidden argument. This is safe to do this here, as RAX
2227 // won't be modified thereafter, before the `call` instruction.
2228 DCHECK_EQ(RAX, hidden_reg.AsRegister());
2229 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2230
2231 if (receiver.IsStackSlot()) {
2232 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2233 // /* HeapReference<Class> */ temp = temp->klass_
2234 __ movl(temp, Address(temp, class_offset));
2235 } else {
2236 // /* HeapReference<Class> */ temp = receiver->klass_
2237 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2238 }
2239 codegen_->MaybeRecordImplicitNullCheck(invoke);
2240 // Instead of simply (possibly) unpoisoning `temp` here, we should
2241 // emit a read barrier for the previous class reference load.
2242 // However this is not required in practice, as this is an
2243 // intermediate/temporary reference and because the current
2244 // concurrent copying collector keeps the from-space memory
2245 // intact/accessible until the end of the marking phase (the
2246 // concurrent copying collector may not in the future).
2247 __ MaybeUnpoisonHeapReference(temp);
2248 // temp = temp->GetAddressOfIMT()
2249 __ movq(temp,
2250 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2251 // temp = temp->GetImtEntryAt(method_offset);
2252 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2253 invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
2254 // temp = temp->GetImtEntryAt(method_offset);
2255 __ movq(temp, Address(temp, method_offset));
2256 // call temp->GetEntryPoint();
2257 __ call(Address(temp,
2258 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
2259
2260 DCHECK(!codegen_->IsLeafMethod());
2261 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2262 }
2263
VisitNeg(HNeg * neg)2264 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2265 LocationSummary* locations =
2266 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2267 switch (neg->GetResultType()) {
2268 case Primitive::kPrimInt:
2269 case Primitive::kPrimLong:
2270 locations->SetInAt(0, Location::RequiresRegister());
2271 locations->SetOut(Location::SameAsFirstInput());
2272 break;
2273
2274 case Primitive::kPrimFloat:
2275 case Primitive::kPrimDouble:
2276 locations->SetInAt(0, Location::RequiresFpuRegister());
2277 locations->SetOut(Location::SameAsFirstInput());
2278 locations->AddTemp(Location::RequiresFpuRegister());
2279 break;
2280
2281 default:
2282 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2283 }
2284 }
2285
VisitNeg(HNeg * neg)2286 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2287 LocationSummary* locations = neg->GetLocations();
2288 Location out = locations->Out();
2289 Location in = locations->InAt(0);
2290 switch (neg->GetResultType()) {
2291 case Primitive::kPrimInt:
2292 DCHECK(in.IsRegister());
2293 DCHECK(in.Equals(out));
2294 __ negl(out.AsRegister<CpuRegister>());
2295 break;
2296
2297 case Primitive::kPrimLong:
2298 DCHECK(in.IsRegister());
2299 DCHECK(in.Equals(out));
2300 __ negq(out.AsRegister<CpuRegister>());
2301 break;
2302
2303 case Primitive::kPrimFloat: {
2304 DCHECK(in.Equals(out));
2305 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2306 // Implement float negation with an exclusive or with value
2307 // 0x80000000 (mask for bit 31, representing the sign of a
2308 // single-precision floating-point number).
2309 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2310 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2311 break;
2312 }
2313
2314 case Primitive::kPrimDouble: {
2315 DCHECK(in.Equals(out));
2316 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2317 // Implement double negation with an exclusive or with value
2318 // 0x8000000000000000 (mask for bit 63, representing the sign of
2319 // a double-precision floating-point number).
2320 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2321 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2322 break;
2323 }
2324
2325 default:
2326 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2327 }
2328 }
2329
VisitTypeConversion(HTypeConversion * conversion)2330 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2331 LocationSummary* locations =
2332 new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2333 Primitive::Type result_type = conversion->GetResultType();
2334 Primitive::Type input_type = conversion->GetInputType();
2335 DCHECK_NE(result_type, input_type);
2336
2337 // The Java language does not allow treating boolean as an integral type but
2338 // our bit representation makes it safe.
2339
2340 switch (result_type) {
2341 case Primitive::kPrimByte:
2342 switch (input_type) {
2343 case Primitive::kPrimLong:
2344 // Type conversion from long to byte is a result of code transformations.
2345 case Primitive::kPrimBoolean:
2346 // Boolean input is a result of code transformations.
2347 case Primitive::kPrimShort:
2348 case Primitive::kPrimInt:
2349 case Primitive::kPrimChar:
2350 // Processing a Dex `int-to-byte' instruction.
2351 locations->SetInAt(0, Location::Any());
2352 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2353 break;
2354
2355 default:
2356 LOG(FATAL) << "Unexpected type conversion from " << input_type
2357 << " to " << result_type;
2358 }
2359 break;
2360
2361 case Primitive::kPrimShort:
2362 switch (input_type) {
2363 case Primitive::kPrimLong:
2364 // Type conversion from long to short is a result of code transformations.
2365 case Primitive::kPrimBoolean:
2366 // Boolean input is a result of code transformations.
2367 case Primitive::kPrimByte:
2368 case Primitive::kPrimInt:
2369 case Primitive::kPrimChar:
2370 // Processing a Dex `int-to-short' instruction.
2371 locations->SetInAt(0, Location::Any());
2372 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2373 break;
2374
2375 default:
2376 LOG(FATAL) << "Unexpected type conversion from " << input_type
2377 << " to " << result_type;
2378 }
2379 break;
2380
2381 case Primitive::kPrimInt:
2382 switch (input_type) {
2383 case Primitive::kPrimLong:
2384 // Processing a Dex `long-to-int' instruction.
2385 locations->SetInAt(0, Location::Any());
2386 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2387 break;
2388
2389 case Primitive::kPrimFloat:
2390 // Processing a Dex `float-to-int' instruction.
2391 locations->SetInAt(0, Location::RequiresFpuRegister());
2392 locations->SetOut(Location::RequiresRegister());
2393 break;
2394
2395 case Primitive::kPrimDouble:
2396 // Processing a Dex `double-to-int' instruction.
2397 locations->SetInAt(0, Location::RequiresFpuRegister());
2398 locations->SetOut(Location::RequiresRegister());
2399 break;
2400
2401 default:
2402 LOG(FATAL) << "Unexpected type conversion from " << input_type
2403 << " to " << result_type;
2404 }
2405 break;
2406
2407 case Primitive::kPrimLong:
2408 switch (input_type) {
2409 case Primitive::kPrimBoolean:
2410 // Boolean input is a result of code transformations.
2411 case Primitive::kPrimByte:
2412 case Primitive::kPrimShort:
2413 case Primitive::kPrimInt:
2414 case Primitive::kPrimChar:
2415 // Processing a Dex `int-to-long' instruction.
2416 // TODO: We would benefit from a (to-be-implemented)
2417 // Location::RegisterOrStackSlot requirement for this input.
2418 locations->SetInAt(0, Location::RequiresRegister());
2419 locations->SetOut(Location::RequiresRegister());
2420 break;
2421
2422 case Primitive::kPrimFloat:
2423 // Processing a Dex `float-to-long' instruction.
2424 locations->SetInAt(0, Location::RequiresFpuRegister());
2425 locations->SetOut(Location::RequiresRegister());
2426 break;
2427
2428 case Primitive::kPrimDouble:
2429 // Processing a Dex `double-to-long' instruction.
2430 locations->SetInAt(0, Location::RequiresFpuRegister());
2431 locations->SetOut(Location::RequiresRegister());
2432 break;
2433
2434 default:
2435 LOG(FATAL) << "Unexpected type conversion from " << input_type
2436 << " to " << result_type;
2437 }
2438 break;
2439
2440 case Primitive::kPrimChar:
2441 switch (input_type) {
2442 case Primitive::kPrimLong:
2443 // Type conversion from long to char is a result of code transformations.
2444 case Primitive::kPrimBoolean:
2445 // Boolean input is a result of code transformations.
2446 case Primitive::kPrimByte:
2447 case Primitive::kPrimShort:
2448 case Primitive::kPrimInt:
2449 // Processing a Dex `int-to-char' instruction.
2450 locations->SetInAt(0, Location::Any());
2451 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2452 break;
2453
2454 default:
2455 LOG(FATAL) << "Unexpected type conversion from " << input_type
2456 << " to " << result_type;
2457 }
2458 break;
2459
2460 case Primitive::kPrimFloat:
2461 switch (input_type) {
2462 case Primitive::kPrimBoolean:
2463 // Boolean input is a result of code transformations.
2464 case Primitive::kPrimByte:
2465 case Primitive::kPrimShort:
2466 case Primitive::kPrimInt:
2467 case Primitive::kPrimChar:
2468 // Processing a Dex `int-to-float' instruction.
2469 locations->SetInAt(0, Location::Any());
2470 locations->SetOut(Location::RequiresFpuRegister());
2471 break;
2472
2473 case Primitive::kPrimLong:
2474 // Processing a Dex `long-to-float' instruction.
2475 locations->SetInAt(0, Location::Any());
2476 locations->SetOut(Location::RequiresFpuRegister());
2477 break;
2478
2479 case Primitive::kPrimDouble:
2480 // Processing a Dex `double-to-float' instruction.
2481 locations->SetInAt(0, Location::Any());
2482 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2483 break;
2484
2485 default:
2486 LOG(FATAL) << "Unexpected type conversion from " << input_type
2487 << " to " << result_type;
2488 };
2489 break;
2490
2491 case Primitive::kPrimDouble:
2492 switch (input_type) {
2493 case Primitive::kPrimBoolean:
2494 // Boolean input is a result of code transformations.
2495 case Primitive::kPrimByte:
2496 case Primitive::kPrimShort:
2497 case Primitive::kPrimInt:
2498 case Primitive::kPrimChar:
2499 // Processing a Dex `int-to-double' instruction.
2500 locations->SetInAt(0, Location::Any());
2501 locations->SetOut(Location::RequiresFpuRegister());
2502 break;
2503
2504 case Primitive::kPrimLong:
2505 // Processing a Dex `long-to-double' instruction.
2506 locations->SetInAt(0, Location::Any());
2507 locations->SetOut(Location::RequiresFpuRegister());
2508 break;
2509
2510 case Primitive::kPrimFloat:
2511 // Processing a Dex `float-to-double' instruction.
2512 locations->SetInAt(0, Location::Any());
2513 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2514 break;
2515
2516 default:
2517 LOG(FATAL) << "Unexpected type conversion from " << input_type
2518 << " to " << result_type;
2519 }
2520 break;
2521
2522 default:
2523 LOG(FATAL) << "Unexpected type conversion from " << input_type
2524 << " to " << result_type;
2525 }
2526 }
2527
VisitTypeConversion(HTypeConversion * conversion)2528 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2529 LocationSummary* locations = conversion->GetLocations();
2530 Location out = locations->Out();
2531 Location in = locations->InAt(0);
2532 Primitive::Type result_type = conversion->GetResultType();
2533 Primitive::Type input_type = conversion->GetInputType();
2534 DCHECK_NE(result_type, input_type);
2535 switch (result_type) {
2536 case Primitive::kPrimByte:
2537 switch (input_type) {
2538 case Primitive::kPrimLong:
2539 // Type conversion from long to byte is a result of code transformations.
2540 case Primitive::kPrimBoolean:
2541 // Boolean input is a result of code transformations.
2542 case Primitive::kPrimShort:
2543 case Primitive::kPrimInt:
2544 case Primitive::kPrimChar:
2545 // Processing a Dex `int-to-byte' instruction.
2546 if (in.IsRegister()) {
2547 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2548 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2549 __ movsxb(out.AsRegister<CpuRegister>(),
2550 Address(CpuRegister(RSP), in.GetStackIndex()));
2551 } else {
2552 __ movl(out.AsRegister<CpuRegister>(),
2553 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2554 }
2555 break;
2556
2557 default:
2558 LOG(FATAL) << "Unexpected type conversion from " << input_type
2559 << " to " << result_type;
2560 }
2561 break;
2562
2563 case Primitive::kPrimShort:
2564 switch (input_type) {
2565 case Primitive::kPrimLong:
2566 // Type conversion from long to short is a result of code transformations.
2567 case Primitive::kPrimBoolean:
2568 // Boolean input is a result of code transformations.
2569 case Primitive::kPrimByte:
2570 case Primitive::kPrimInt:
2571 case Primitive::kPrimChar:
2572 // Processing a Dex `int-to-short' instruction.
2573 if (in.IsRegister()) {
2574 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2575 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2576 __ movsxw(out.AsRegister<CpuRegister>(),
2577 Address(CpuRegister(RSP), in.GetStackIndex()));
2578 } else {
2579 __ movl(out.AsRegister<CpuRegister>(),
2580 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2581 }
2582 break;
2583
2584 default:
2585 LOG(FATAL) << "Unexpected type conversion from " << input_type
2586 << " to " << result_type;
2587 }
2588 break;
2589
2590 case Primitive::kPrimInt:
2591 switch (input_type) {
2592 case Primitive::kPrimLong:
2593 // Processing a Dex `long-to-int' instruction.
2594 if (in.IsRegister()) {
2595 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2596 } else if (in.IsDoubleStackSlot()) {
2597 __ movl(out.AsRegister<CpuRegister>(),
2598 Address(CpuRegister(RSP), in.GetStackIndex()));
2599 } else {
2600 DCHECK(in.IsConstant());
2601 DCHECK(in.GetConstant()->IsLongConstant());
2602 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2603 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2604 }
2605 break;
2606
2607 case Primitive::kPrimFloat: {
2608 // Processing a Dex `float-to-int' instruction.
2609 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2610 CpuRegister output = out.AsRegister<CpuRegister>();
2611 NearLabel done, nan;
2612
2613 __ movl(output, Immediate(kPrimIntMax));
2614 // if input >= (float)INT_MAX goto done
2615 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2616 __ j(kAboveEqual, &done);
2617 // if input == NaN goto nan
2618 __ j(kUnordered, &nan);
2619 // output = float-to-int-truncate(input)
2620 __ cvttss2si(output, input, false);
2621 __ jmp(&done);
2622 __ Bind(&nan);
2623 // output = 0
2624 __ xorl(output, output);
2625 __ Bind(&done);
2626 break;
2627 }
2628
2629 case Primitive::kPrimDouble: {
2630 // Processing a Dex `double-to-int' instruction.
2631 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2632 CpuRegister output = out.AsRegister<CpuRegister>();
2633 NearLabel done, nan;
2634
2635 __ movl(output, Immediate(kPrimIntMax));
2636 // if input >= (double)INT_MAX goto done
2637 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2638 __ j(kAboveEqual, &done);
2639 // if input == NaN goto nan
2640 __ j(kUnordered, &nan);
2641 // output = double-to-int-truncate(input)
2642 __ cvttsd2si(output, input);
2643 __ jmp(&done);
2644 __ Bind(&nan);
2645 // output = 0
2646 __ xorl(output, output);
2647 __ Bind(&done);
2648 break;
2649 }
2650
2651 default:
2652 LOG(FATAL) << "Unexpected type conversion from " << input_type
2653 << " to " << result_type;
2654 }
2655 break;
2656
2657 case Primitive::kPrimLong:
2658 switch (input_type) {
2659 DCHECK(out.IsRegister());
2660 case Primitive::kPrimBoolean:
2661 // Boolean input is a result of code transformations.
2662 case Primitive::kPrimByte:
2663 case Primitive::kPrimShort:
2664 case Primitive::kPrimInt:
2665 case Primitive::kPrimChar:
2666 // Processing a Dex `int-to-long' instruction.
2667 DCHECK(in.IsRegister());
2668 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2669 break;
2670
2671 case Primitive::kPrimFloat: {
2672 // Processing a Dex `float-to-long' instruction.
2673 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2674 CpuRegister output = out.AsRegister<CpuRegister>();
2675 NearLabel done, nan;
2676
2677 codegen_->Load64BitValue(output, kPrimLongMax);
2678 // if input >= (float)LONG_MAX goto done
2679 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2680 __ j(kAboveEqual, &done);
2681 // if input == NaN goto nan
2682 __ j(kUnordered, &nan);
2683 // output = float-to-long-truncate(input)
2684 __ cvttss2si(output, input, true);
2685 __ jmp(&done);
2686 __ Bind(&nan);
2687 // output = 0
2688 __ xorl(output, output);
2689 __ Bind(&done);
2690 break;
2691 }
2692
2693 case Primitive::kPrimDouble: {
2694 // Processing a Dex `double-to-long' instruction.
2695 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2696 CpuRegister output = out.AsRegister<CpuRegister>();
2697 NearLabel done, nan;
2698
2699 codegen_->Load64BitValue(output, kPrimLongMax);
2700 // if input >= (double)LONG_MAX goto done
2701 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2702 __ j(kAboveEqual, &done);
2703 // if input == NaN goto nan
2704 __ j(kUnordered, &nan);
2705 // output = double-to-long-truncate(input)
2706 __ cvttsd2si(output, input, true);
2707 __ jmp(&done);
2708 __ Bind(&nan);
2709 // output = 0
2710 __ xorl(output, output);
2711 __ Bind(&done);
2712 break;
2713 }
2714
2715 default:
2716 LOG(FATAL) << "Unexpected type conversion from " << input_type
2717 << " to " << result_type;
2718 }
2719 break;
2720
2721 case Primitive::kPrimChar:
2722 switch (input_type) {
2723 case Primitive::kPrimLong:
2724 // Type conversion from long to char is a result of code transformations.
2725 case Primitive::kPrimBoolean:
2726 // Boolean input is a result of code transformations.
2727 case Primitive::kPrimByte:
2728 case Primitive::kPrimShort:
2729 case Primitive::kPrimInt:
2730 // Processing a Dex `int-to-char' instruction.
2731 if (in.IsRegister()) {
2732 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2733 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2734 __ movzxw(out.AsRegister<CpuRegister>(),
2735 Address(CpuRegister(RSP), in.GetStackIndex()));
2736 } else {
2737 __ movl(out.AsRegister<CpuRegister>(),
2738 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2739 }
2740 break;
2741
2742 default:
2743 LOG(FATAL) << "Unexpected type conversion from " << input_type
2744 << " to " << result_type;
2745 }
2746 break;
2747
2748 case Primitive::kPrimFloat:
2749 switch (input_type) {
2750 case Primitive::kPrimBoolean:
2751 // Boolean input is a result of code transformations.
2752 case Primitive::kPrimByte:
2753 case Primitive::kPrimShort:
2754 case Primitive::kPrimInt:
2755 case Primitive::kPrimChar:
2756 // Processing a Dex `int-to-float' instruction.
2757 if (in.IsRegister()) {
2758 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2759 } else if (in.IsConstant()) {
2760 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2761 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2762 codegen_->Load32BitValue(dest, static_cast<float>(v));
2763 } else {
2764 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2765 Address(CpuRegister(RSP), in.GetStackIndex()), false);
2766 }
2767 break;
2768
2769 case Primitive::kPrimLong:
2770 // Processing a Dex `long-to-float' instruction.
2771 if (in.IsRegister()) {
2772 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2773 } else if (in.IsConstant()) {
2774 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2775 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2776 codegen_->Load32BitValue(dest, static_cast<float>(v));
2777 } else {
2778 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2779 Address(CpuRegister(RSP), in.GetStackIndex()), true);
2780 }
2781 break;
2782
2783 case Primitive::kPrimDouble:
2784 // Processing a Dex `double-to-float' instruction.
2785 if (in.IsFpuRegister()) {
2786 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2787 } else if (in.IsConstant()) {
2788 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2789 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2790 codegen_->Load32BitValue(dest, static_cast<float>(v));
2791 } else {
2792 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2793 Address(CpuRegister(RSP), in.GetStackIndex()));
2794 }
2795 break;
2796
2797 default:
2798 LOG(FATAL) << "Unexpected type conversion from " << input_type
2799 << " to " << result_type;
2800 };
2801 break;
2802
2803 case Primitive::kPrimDouble:
2804 switch (input_type) {
2805 case Primitive::kPrimBoolean:
2806 // Boolean input is a result of code transformations.
2807 case Primitive::kPrimByte:
2808 case Primitive::kPrimShort:
2809 case Primitive::kPrimInt:
2810 case Primitive::kPrimChar:
2811 // Processing a Dex `int-to-double' instruction.
2812 if (in.IsRegister()) {
2813 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2814 } else if (in.IsConstant()) {
2815 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2816 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2817 codegen_->Load64BitValue(dest, static_cast<double>(v));
2818 } else {
2819 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2820 Address(CpuRegister(RSP), in.GetStackIndex()), false);
2821 }
2822 break;
2823
2824 case Primitive::kPrimLong:
2825 // Processing a Dex `long-to-double' instruction.
2826 if (in.IsRegister()) {
2827 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2828 } else if (in.IsConstant()) {
2829 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2830 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2831 codegen_->Load64BitValue(dest, static_cast<double>(v));
2832 } else {
2833 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2834 Address(CpuRegister(RSP), in.GetStackIndex()), true);
2835 }
2836 break;
2837
2838 case Primitive::kPrimFloat:
2839 // Processing a Dex `float-to-double' instruction.
2840 if (in.IsFpuRegister()) {
2841 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2842 } else if (in.IsConstant()) {
2843 float v = in.GetConstant()->AsFloatConstant()->GetValue();
2844 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2845 codegen_->Load64BitValue(dest, static_cast<double>(v));
2846 } else {
2847 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
2848 Address(CpuRegister(RSP), in.GetStackIndex()));
2849 }
2850 break;
2851
2852 default:
2853 LOG(FATAL) << "Unexpected type conversion from " << input_type
2854 << " to " << result_type;
2855 };
2856 break;
2857
2858 default:
2859 LOG(FATAL) << "Unexpected type conversion from " << input_type
2860 << " to " << result_type;
2861 }
2862 }
2863
VisitAdd(HAdd * add)2864 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
2865 LocationSummary* locations =
2866 new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2867 switch (add->GetResultType()) {
2868 case Primitive::kPrimInt: {
2869 locations->SetInAt(0, Location::RequiresRegister());
2870 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2871 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2872 break;
2873 }
2874
2875 case Primitive::kPrimLong: {
2876 locations->SetInAt(0, Location::RequiresRegister());
2877 // We can use a leaq or addq if the constant can fit in an immediate.
2878 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
2879 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2880 break;
2881 }
2882
2883 case Primitive::kPrimDouble:
2884 case Primitive::kPrimFloat: {
2885 locations->SetInAt(0, Location::RequiresFpuRegister());
2886 locations->SetInAt(1, Location::Any());
2887 locations->SetOut(Location::SameAsFirstInput());
2888 break;
2889 }
2890
2891 default:
2892 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2893 }
2894 }
2895
VisitAdd(HAdd * add)2896 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
2897 LocationSummary* locations = add->GetLocations();
2898 Location first = locations->InAt(0);
2899 Location second = locations->InAt(1);
2900 Location out = locations->Out();
2901
2902 switch (add->GetResultType()) {
2903 case Primitive::kPrimInt: {
2904 if (second.IsRegister()) {
2905 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2906 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2907 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2908 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2909 } else {
2910 __ leal(out.AsRegister<CpuRegister>(), Address(
2911 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2912 }
2913 } else if (second.IsConstant()) {
2914 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2915 __ addl(out.AsRegister<CpuRegister>(),
2916 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
2917 } else {
2918 __ leal(out.AsRegister<CpuRegister>(), Address(
2919 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
2920 }
2921 } else {
2922 DCHECK(first.Equals(locations->Out()));
2923 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
2924 }
2925 break;
2926 }
2927
2928 case Primitive::kPrimLong: {
2929 if (second.IsRegister()) {
2930 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2931 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2932 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2933 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2934 } else {
2935 __ leaq(out.AsRegister<CpuRegister>(), Address(
2936 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2937 }
2938 } else {
2939 DCHECK(second.IsConstant());
2940 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
2941 int32_t int32_value = Low32Bits(value);
2942 DCHECK_EQ(int32_value, value);
2943 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2944 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
2945 } else {
2946 __ leaq(out.AsRegister<CpuRegister>(), Address(
2947 first.AsRegister<CpuRegister>(), int32_value));
2948 }
2949 }
2950 break;
2951 }
2952
2953 case Primitive::kPrimFloat: {
2954 if (second.IsFpuRegister()) {
2955 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2956 } else if (second.IsConstant()) {
2957 __ addss(first.AsFpuRegister<XmmRegister>(),
2958 codegen_->LiteralFloatAddress(
2959 second.GetConstant()->AsFloatConstant()->GetValue()));
2960 } else {
2961 DCHECK(second.IsStackSlot());
2962 __ addss(first.AsFpuRegister<XmmRegister>(),
2963 Address(CpuRegister(RSP), second.GetStackIndex()));
2964 }
2965 break;
2966 }
2967
2968 case Primitive::kPrimDouble: {
2969 if (second.IsFpuRegister()) {
2970 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2971 } else if (second.IsConstant()) {
2972 __ addsd(first.AsFpuRegister<XmmRegister>(),
2973 codegen_->LiteralDoubleAddress(
2974 second.GetConstant()->AsDoubleConstant()->GetValue()));
2975 } else {
2976 DCHECK(second.IsDoubleStackSlot());
2977 __ addsd(first.AsFpuRegister<XmmRegister>(),
2978 Address(CpuRegister(RSP), second.GetStackIndex()));
2979 }
2980 break;
2981 }
2982
2983 default:
2984 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2985 }
2986 }
2987
VisitSub(HSub * sub)2988 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
2989 LocationSummary* locations =
2990 new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
2991 switch (sub->GetResultType()) {
2992 case Primitive::kPrimInt: {
2993 locations->SetInAt(0, Location::RequiresRegister());
2994 locations->SetInAt(1, Location::Any());
2995 locations->SetOut(Location::SameAsFirstInput());
2996 break;
2997 }
2998 case Primitive::kPrimLong: {
2999 locations->SetInAt(0, Location::RequiresRegister());
3000 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3001 locations->SetOut(Location::SameAsFirstInput());
3002 break;
3003 }
3004 case Primitive::kPrimFloat:
3005 case Primitive::kPrimDouble: {
3006 locations->SetInAt(0, Location::RequiresFpuRegister());
3007 locations->SetInAt(1, Location::Any());
3008 locations->SetOut(Location::SameAsFirstInput());
3009 break;
3010 }
3011 default:
3012 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3013 }
3014 }
3015
VisitSub(HSub * sub)3016 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3017 LocationSummary* locations = sub->GetLocations();
3018 Location first = locations->InAt(0);
3019 Location second = locations->InAt(1);
3020 DCHECK(first.Equals(locations->Out()));
3021 switch (sub->GetResultType()) {
3022 case Primitive::kPrimInt: {
3023 if (second.IsRegister()) {
3024 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3025 } else if (second.IsConstant()) {
3026 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3027 __ subl(first.AsRegister<CpuRegister>(), imm);
3028 } else {
3029 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3030 }
3031 break;
3032 }
3033 case Primitive::kPrimLong: {
3034 if (second.IsConstant()) {
3035 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3036 DCHECK(IsInt<32>(value));
3037 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3038 } else {
3039 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3040 }
3041 break;
3042 }
3043
3044 case Primitive::kPrimFloat: {
3045 if (second.IsFpuRegister()) {
3046 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3047 } else if (second.IsConstant()) {
3048 __ subss(first.AsFpuRegister<XmmRegister>(),
3049 codegen_->LiteralFloatAddress(
3050 second.GetConstant()->AsFloatConstant()->GetValue()));
3051 } else {
3052 DCHECK(second.IsStackSlot());
3053 __ subss(first.AsFpuRegister<XmmRegister>(),
3054 Address(CpuRegister(RSP), second.GetStackIndex()));
3055 }
3056 break;
3057 }
3058
3059 case Primitive::kPrimDouble: {
3060 if (second.IsFpuRegister()) {
3061 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3062 } else if (second.IsConstant()) {
3063 __ subsd(first.AsFpuRegister<XmmRegister>(),
3064 codegen_->LiteralDoubleAddress(
3065 second.GetConstant()->AsDoubleConstant()->GetValue()));
3066 } else {
3067 DCHECK(second.IsDoubleStackSlot());
3068 __ subsd(first.AsFpuRegister<XmmRegister>(),
3069 Address(CpuRegister(RSP), second.GetStackIndex()));
3070 }
3071 break;
3072 }
3073
3074 default:
3075 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3076 }
3077 }
3078
VisitMul(HMul * mul)3079 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3080 LocationSummary* locations =
3081 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3082 switch (mul->GetResultType()) {
3083 case Primitive::kPrimInt: {
3084 locations->SetInAt(0, Location::RequiresRegister());
3085 locations->SetInAt(1, Location::Any());
3086 if (mul->InputAt(1)->IsIntConstant()) {
3087 // Can use 3 operand multiply.
3088 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3089 } else {
3090 locations->SetOut(Location::SameAsFirstInput());
3091 }
3092 break;
3093 }
3094 case Primitive::kPrimLong: {
3095 locations->SetInAt(0, Location::RequiresRegister());
3096 locations->SetInAt(1, Location::Any());
3097 if (mul->InputAt(1)->IsLongConstant() &&
3098 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3099 // Can use 3 operand multiply.
3100 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3101 } else {
3102 locations->SetOut(Location::SameAsFirstInput());
3103 }
3104 break;
3105 }
3106 case Primitive::kPrimFloat:
3107 case Primitive::kPrimDouble: {
3108 locations->SetInAt(0, Location::RequiresFpuRegister());
3109 locations->SetInAt(1, Location::Any());
3110 locations->SetOut(Location::SameAsFirstInput());
3111 break;
3112 }
3113
3114 default:
3115 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3116 }
3117 }
3118
VisitMul(HMul * mul)3119 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3120 LocationSummary* locations = mul->GetLocations();
3121 Location first = locations->InAt(0);
3122 Location second = locations->InAt(1);
3123 Location out = locations->Out();
3124 switch (mul->GetResultType()) {
3125 case Primitive::kPrimInt:
3126 // The constant may have ended up in a register, so test explicitly to avoid
3127 // problems where the output may not be the same as the first operand.
3128 if (mul->InputAt(1)->IsIntConstant()) {
3129 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3130 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3131 } else if (second.IsRegister()) {
3132 DCHECK(first.Equals(out));
3133 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3134 } else {
3135 DCHECK(first.Equals(out));
3136 DCHECK(second.IsStackSlot());
3137 __ imull(first.AsRegister<CpuRegister>(),
3138 Address(CpuRegister(RSP), second.GetStackIndex()));
3139 }
3140 break;
3141 case Primitive::kPrimLong: {
3142 // The constant may have ended up in a register, so test explicitly to avoid
3143 // problems where the output may not be the same as the first operand.
3144 if (mul->InputAt(1)->IsLongConstant()) {
3145 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3146 if (IsInt<32>(value)) {
3147 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3148 Immediate(static_cast<int32_t>(value)));
3149 } else {
3150 // Have to use the constant area.
3151 DCHECK(first.Equals(out));
3152 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3153 }
3154 } else if (second.IsRegister()) {
3155 DCHECK(first.Equals(out));
3156 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3157 } else {
3158 DCHECK(second.IsDoubleStackSlot());
3159 DCHECK(first.Equals(out));
3160 __ imulq(first.AsRegister<CpuRegister>(),
3161 Address(CpuRegister(RSP), second.GetStackIndex()));
3162 }
3163 break;
3164 }
3165
3166 case Primitive::kPrimFloat: {
3167 DCHECK(first.Equals(out));
3168 if (second.IsFpuRegister()) {
3169 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3170 } else if (second.IsConstant()) {
3171 __ mulss(first.AsFpuRegister<XmmRegister>(),
3172 codegen_->LiteralFloatAddress(
3173 second.GetConstant()->AsFloatConstant()->GetValue()));
3174 } else {
3175 DCHECK(second.IsStackSlot());
3176 __ mulss(first.AsFpuRegister<XmmRegister>(),
3177 Address(CpuRegister(RSP), second.GetStackIndex()));
3178 }
3179 break;
3180 }
3181
3182 case Primitive::kPrimDouble: {
3183 DCHECK(first.Equals(out));
3184 if (second.IsFpuRegister()) {
3185 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3186 } else if (second.IsConstant()) {
3187 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3188 codegen_->LiteralDoubleAddress(
3189 second.GetConstant()->AsDoubleConstant()->GetValue()));
3190 } else {
3191 DCHECK(second.IsDoubleStackSlot());
3192 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3193 Address(CpuRegister(RSP), second.GetStackIndex()));
3194 }
3195 break;
3196 }
3197
3198 default:
3199 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3200 }
3201 }
3202
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3203 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3204 uint32_t stack_adjustment, bool is_float) {
3205 if (source.IsStackSlot()) {
3206 DCHECK(is_float);
3207 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3208 } else if (source.IsDoubleStackSlot()) {
3209 DCHECK(!is_float);
3210 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3211 } else {
3212 // Write the value to the temporary location on the stack and load to FP stack.
3213 if (is_float) {
3214 Location stack_temp = Location::StackSlot(temp_offset);
3215 codegen_->Move(stack_temp, source);
3216 __ flds(Address(CpuRegister(RSP), temp_offset));
3217 } else {
3218 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3219 codegen_->Move(stack_temp, source);
3220 __ fldl(Address(CpuRegister(RSP), temp_offset));
3221 }
3222 }
3223 }
3224
GenerateRemFP(HRem * rem)3225 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3226 Primitive::Type type = rem->GetResultType();
3227 bool is_float = type == Primitive::kPrimFloat;
3228 size_t elem_size = Primitive::ComponentSize(type);
3229 LocationSummary* locations = rem->GetLocations();
3230 Location first = locations->InAt(0);
3231 Location second = locations->InAt(1);
3232 Location out = locations->Out();
3233
3234 // Create stack space for 2 elements.
3235 // TODO: enhance register allocator to ask for stack temporaries.
3236 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3237
3238 // Load the values to the FP stack in reverse order, using temporaries if needed.
3239 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3240 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3241
3242 // Loop doing FPREM until we stabilize.
3243 NearLabel retry;
3244 __ Bind(&retry);
3245 __ fprem();
3246
3247 // Move FP status to AX.
3248 __ fstsw();
3249
3250 // And see if the argument reduction is complete. This is signaled by the
3251 // C2 FPU flag bit set to 0.
3252 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3253 __ j(kNotEqual, &retry);
3254
3255 // We have settled on the final value. Retrieve it into an XMM register.
3256 // Store FP top of stack to real stack.
3257 if (is_float) {
3258 __ fsts(Address(CpuRegister(RSP), 0));
3259 } else {
3260 __ fstl(Address(CpuRegister(RSP), 0));
3261 }
3262
3263 // Pop the 2 items from the FP stack.
3264 __ fucompp();
3265
3266 // Load the value from the stack into an XMM register.
3267 DCHECK(out.IsFpuRegister()) << out;
3268 if (is_float) {
3269 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3270 } else {
3271 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3272 }
3273
3274 // And remove the temporary stack space we allocated.
3275 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3276 }
3277
DivRemOneOrMinusOne(HBinaryOperation * instruction)3278 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3279 DCHECK(instruction->IsDiv() || instruction->IsRem());
3280
3281 LocationSummary* locations = instruction->GetLocations();
3282 Location second = locations->InAt(1);
3283 DCHECK(second.IsConstant());
3284
3285 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3286 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3287 int64_t imm = Int64FromConstant(second.GetConstant());
3288
3289 DCHECK(imm == 1 || imm == -1);
3290
3291 switch (instruction->GetResultType()) {
3292 case Primitive::kPrimInt: {
3293 if (instruction->IsRem()) {
3294 __ xorl(output_register, output_register);
3295 } else {
3296 __ movl(output_register, input_register);
3297 if (imm == -1) {
3298 __ negl(output_register);
3299 }
3300 }
3301 break;
3302 }
3303
3304 case Primitive::kPrimLong: {
3305 if (instruction->IsRem()) {
3306 __ xorl(output_register, output_register);
3307 } else {
3308 __ movq(output_register, input_register);
3309 if (imm == -1) {
3310 __ negq(output_register);
3311 }
3312 }
3313 break;
3314 }
3315
3316 default:
3317 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3318 }
3319 }
3320
DivByPowerOfTwo(HDiv * instruction)3321 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3322 LocationSummary* locations = instruction->GetLocations();
3323 Location second = locations->InAt(1);
3324
3325 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3326 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3327
3328 int64_t imm = Int64FromConstant(second.GetConstant());
3329 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3330 uint64_t abs_imm = AbsOrMin(imm);
3331
3332 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3333
3334 if (instruction->GetResultType() == Primitive::kPrimInt) {
3335 __ leal(tmp, Address(numerator, abs_imm - 1));
3336 __ testl(numerator, numerator);
3337 __ cmov(kGreaterEqual, tmp, numerator);
3338 int shift = CTZ(imm);
3339 __ sarl(tmp, Immediate(shift));
3340
3341 if (imm < 0) {
3342 __ negl(tmp);
3343 }
3344
3345 __ movl(output_register, tmp);
3346 } else {
3347 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3348 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3349
3350 codegen_->Load64BitValue(rdx, abs_imm - 1);
3351 __ addq(rdx, numerator);
3352 __ testq(numerator, numerator);
3353 __ cmov(kGreaterEqual, rdx, numerator);
3354 int shift = CTZ(imm);
3355 __ sarq(rdx, Immediate(shift));
3356
3357 if (imm < 0) {
3358 __ negq(rdx);
3359 }
3360
3361 __ movq(output_register, rdx);
3362 }
3363 }
3364
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3365 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3366 DCHECK(instruction->IsDiv() || instruction->IsRem());
3367
3368 LocationSummary* locations = instruction->GetLocations();
3369 Location second = locations->InAt(1);
3370
3371 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3372 : locations->GetTemp(0).AsRegister<CpuRegister>();
3373 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3374 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3375 : locations->Out().AsRegister<CpuRegister>();
3376 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3377
3378 DCHECK_EQ(RAX, eax.AsRegister());
3379 DCHECK_EQ(RDX, edx.AsRegister());
3380 if (instruction->IsDiv()) {
3381 DCHECK_EQ(RAX, out.AsRegister());
3382 } else {
3383 DCHECK_EQ(RDX, out.AsRegister());
3384 }
3385
3386 int64_t magic;
3387 int shift;
3388
3389 // TODO: can these branches be written as one?
3390 if (instruction->GetResultType() == Primitive::kPrimInt) {
3391 int imm = second.GetConstant()->AsIntConstant()->GetValue();
3392
3393 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3394
3395 __ movl(numerator, eax);
3396
3397 NearLabel no_div;
3398 NearLabel end;
3399 __ testl(eax, eax);
3400 __ j(kNotEqual, &no_div);
3401
3402 __ xorl(out, out);
3403 __ jmp(&end);
3404
3405 __ Bind(&no_div);
3406
3407 __ movl(eax, Immediate(magic));
3408 __ imull(numerator);
3409
3410 if (imm > 0 && magic < 0) {
3411 __ addl(edx, numerator);
3412 } else if (imm < 0 && magic > 0) {
3413 __ subl(edx, numerator);
3414 }
3415
3416 if (shift != 0) {
3417 __ sarl(edx, Immediate(shift));
3418 }
3419
3420 __ movl(eax, edx);
3421 __ shrl(edx, Immediate(31));
3422 __ addl(edx, eax);
3423
3424 if (instruction->IsRem()) {
3425 __ movl(eax, numerator);
3426 __ imull(edx, Immediate(imm));
3427 __ subl(eax, edx);
3428 __ movl(edx, eax);
3429 } else {
3430 __ movl(eax, edx);
3431 }
3432 __ Bind(&end);
3433 } else {
3434 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3435
3436 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3437
3438 CpuRegister rax = eax;
3439 CpuRegister rdx = edx;
3440
3441 CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3442
3443 // Save the numerator.
3444 __ movq(numerator, rax);
3445
3446 // RAX = magic
3447 codegen_->Load64BitValue(rax, magic);
3448
3449 // RDX:RAX = magic * numerator
3450 __ imulq(numerator);
3451
3452 if (imm > 0 && magic < 0) {
3453 // RDX += numerator
3454 __ addq(rdx, numerator);
3455 } else if (imm < 0 && magic > 0) {
3456 // RDX -= numerator
3457 __ subq(rdx, numerator);
3458 }
3459
3460 // Shift if needed.
3461 if (shift != 0) {
3462 __ sarq(rdx, Immediate(shift));
3463 }
3464
3465 // RDX += 1 if RDX < 0
3466 __ movq(rax, rdx);
3467 __ shrq(rdx, Immediate(63));
3468 __ addq(rdx, rax);
3469
3470 if (instruction->IsRem()) {
3471 __ movq(rax, numerator);
3472
3473 if (IsInt<32>(imm)) {
3474 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3475 } else {
3476 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3477 }
3478
3479 __ subq(rax, rdx);
3480 __ movq(rdx, rax);
3481 } else {
3482 __ movq(rax, rdx);
3483 }
3484 }
3485 }
3486
GenerateDivRemIntegral(HBinaryOperation * instruction)3487 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3488 DCHECK(instruction->IsDiv() || instruction->IsRem());
3489 Primitive::Type type = instruction->GetResultType();
3490 DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
3491
3492 bool is_div = instruction->IsDiv();
3493 LocationSummary* locations = instruction->GetLocations();
3494
3495 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3496 Location second = locations->InAt(1);
3497
3498 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3499 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3500
3501 if (second.IsConstant()) {
3502 int64_t imm = Int64FromConstant(second.GetConstant());
3503
3504 if (imm == 0) {
3505 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3506 } else if (imm == 1 || imm == -1) {
3507 DivRemOneOrMinusOne(instruction);
3508 } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3509 DivByPowerOfTwo(instruction->AsDiv());
3510 } else {
3511 DCHECK(imm <= -2 || imm >= 2);
3512 GenerateDivRemWithAnyConstant(instruction);
3513 }
3514 } else {
3515 SlowPathCode* slow_path =
3516 new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3517 instruction, out.AsRegister(), type, is_div);
3518 codegen_->AddSlowPath(slow_path);
3519
3520 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3521 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3522 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3523 // so it's safe to just use negl instead of more complex comparisons.
3524 if (type == Primitive::kPrimInt) {
3525 __ cmpl(second_reg, Immediate(-1));
3526 __ j(kEqual, slow_path->GetEntryLabel());
3527 // edx:eax <- sign-extended of eax
3528 __ cdq();
3529 // eax = quotient, edx = remainder
3530 __ idivl(second_reg);
3531 } else {
3532 __ cmpq(second_reg, Immediate(-1));
3533 __ j(kEqual, slow_path->GetEntryLabel());
3534 // rdx:rax <- sign-extended of rax
3535 __ cqo();
3536 // rax = quotient, rdx = remainder
3537 __ idivq(second_reg);
3538 }
3539 __ Bind(slow_path->GetExitLabel());
3540 }
3541 }
3542
VisitDiv(HDiv * div)3543 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3544 LocationSummary* locations =
3545 new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3546 switch (div->GetResultType()) {
3547 case Primitive::kPrimInt:
3548 case Primitive::kPrimLong: {
3549 locations->SetInAt(0, Location::RegisterLocation(RAX));
3550 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3551 locations->SetOut(Location::SameAsFirstInput());
3552 // Intel uses edx:eax as the dividend.
3553 locations->AddTemp(Location::RegisterLocation(RDX));
3554 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3555 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3556 // output and request another temp.
3557 if (div->InputAt(1)->IsConstant()) {
3558 locations->AddTemp(Location::RequiresRegister());
3559 }
3560 break;
3561 }
3562
3563 case Primitive::kPrimFloat:
3564 case Primitive::kPrimDouble: {
3565 locations->SetInAt(0, Location::RequiresFpuRegister());
3566 locations->SetInAt(1, Location::Any());
3567 locations->SetOut(Location::SameAsFirstInput());
3568 break;
3569 }
3570
3571 default:
3572 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3573 }
3574 }
3575
VisitDiv(HDiv * div)3576 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3577 LocationSummary* locations = div->GetLocations();
3578 Location first = locations->InAt(0);
3579 Location second = locations->InAt(1);
3580 DCHECK(first.Equals(locations->Out()));
3581
3582 Primitive::Type type = div->GetResultType();
3583 switch (type) {
3584 case Primitive::kPrimInt:
3585 case Primitive::kPrimLong: {
3586 GenerateDivRemIntegral(div);
3587 break;
3588 }
3589
3590 case Primitive::kPrimFloat: {
3591 if (second.IsFpuRegister()) {
3592 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3593 } else if (second.IsConstant()) {
3594 __ divss(first.AsFpuRegister<XmmRegister>(),
3595 codegen_->LiteralFloatAddress(
3596 second.GetConstant()->AsFloatConstant()->GetValue()));
3597 } else {
3598 DCHECK(second.IsStackSlot());
3599 __ divss(first.AsFpuRegister<XmmRegister>(),
3600 Address(CpuRegister(RSP), second.GetStackIndex()));
3601 }
3602 break;
3603 }
3604
3605 case Primitive::kPrimDouble: {
3606 if (second.IsFpuRegister()) {
3607 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3608 } else if (second.IsConstant()) {
3609 __ divsd(first.AsFpuRegister<XmmRegister>(),
3610 codegen_->LiteralDoubleAddress(
3611 second.GetConstant()->AsDoubleConstant()->GetValue()));
3612 } else {
3613 DCHECK(second.IsDoubleStackSlot());
3614 __ divsd(first.AsFpuRegister<XmmRegister>(),
3615 Address(CpuRegister(RSP), second.GetStackIndex()));
3616 }
3617 break;
3618 }
3619
3620 default:
3621 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3622 }
3623 }
3624
VisitRem(HRem * rem)3625 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3626 Primitive::Type type = rem->GetResultType();
3627 LocationSummary* locations =
3628 new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3629
3630 switch (type) {
3631 case Primitive::kPrimInt:
3632 case Primitive::kPrimLong: {
3633 locations->SetInAt(0, Location::RegisterLocation(RAX));
3634 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3635 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3636 locations->SetOut(Location::RegisterLocation(RDX));
3637 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3638 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3639 // output and request another temp.
3640 if (rem->InputAt(1)->IsConstant()) {
3641 locations->AddTemp(Location::RequiresRegister());
3642 }
3643 break;
3644 }
3645
3646 case Primitive::kPrimFloat:
3647 case Primitive::kPrimDouble: {
3648 locations->SetInAt(0, Location::Any());
3649 locations->SetInAt(1, Location::Any());
3650 locations->SetOut(Location::RequiresFpuRegister());
3651 locations->AddTemp(Location::RegisterLocation(RAX));
3652 break;
3653 }
3654
3655 default:
3656 LOG(FATAL) << "Unexpected rem type " << type;
3657 }
3658 }
3659
VisitRem(HRem * rem)3660 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3661 Primitive::Type type = rem->GetResultType();
3662 switch (type) {
3663 case Primitive::kPrimInt:
3664 case Primitive::kPrimLong: {
3665 GenerateDivRemIntegral(rem);
3666 break;
3667 }
3668 case Primitive::kPrimFloat:
3669 case Primitive::kPrimDouble: {
3670 GenerateRemFP(rem);
3671 break;
3672 }
3673 default:
3674 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3675 }
3676 }
3677
VisitDivZeroCheck(HDivZeroCheck * instruction)3678 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3679 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
3680 ? LocationSummary::kCallOnSlowPath
3681 : LocationSummary::kNoCall;
3682 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3683 locations->SetInAt(0, Location::Any());
3684 if (instruction->HasUses()) {
3685 locations->SetOut(Location::SameAsFirstInput());
3686 }
3687 }
3688
VisitDivZeroCheck(HDivZeroCheck * instruction)3689 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3690 SlowPathCode* slow_path =
3691 new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3692 codegen_->AddSlowPath(slow_path);
3693
3694 LocationSummary* locations = instruction->GetLocations();
3695 Location value = locations->InAt(0);
3696
3697 switch (instruction->GetType()) {
3698 case Primitive::kPrimBoolean:
3699 case Primitive::kPrimByte:
3700 case Primitive::kPrimChar:
3701 case Primitive::kPrimShort:
3702 case Primitive::kPrimInt: {
3703 if (value.IsRegister()) {
3704 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3705 __ j(kEqual, slow_path->GetEntryLabel());
3706 } else if (value.IsStackSlot()) {
3707 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3708 __ j(kEqual, slow_path->GetEntryLabel());
3709 } else {
3710 DCHECK(value.IsConstant()) << value;
3711 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3712 __ jmp(slow_path->GetEntryLabel());
3713 }
3714 }
3715 break;
3716 }
3717 case Primitive::kPrimLong: {
3718 if (value.IsRegister()) {
3719 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3720 __ j(kEqual, slow_path->GetEntryLabel());
3721 } else if (value.IsDoubleStackSlot()) {
3722 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3723 __ j(kEqual, slow_path->GetEntryLabel());
3724 } else {
3725 DCHECK(value.IsConstant()) << value;
3726 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3727 __ jmp(slow_path->GetEntryLabel());
3728 }
3729 }
3730 break;
3731 }
3732 default:
3733 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3734 }
3735 }
3736
HandleShift(HBinaryOperation * op)3737 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3738 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3739
3740 LocationSummary* locations =
3741 new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3742
3743 switch (op->GetResultType()) {
3744 case Primitive::kPrimInt:
3745 case Primitive::kPrimLong: {
3746 locations->SetInAt(0, Location::RequiresRegister());
3747 // The shift count needs to be in CL.
3748 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3749 locations->SetOut(Location::SameAsFirstInput());
3750 break;
3751 }
3752 default:
3753 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3754 }
3755 }
3756
HandleShift(HBinaryOperation * op)3757 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3758 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3759
3760 LocationSummary* locations = op->GetLocations();
3761 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3762 Location second = locations->InAt(1);
3763
3764 switch (op->GetResultType()) {
3765 case Primitive::kPrimInt: {
3766 if (second.IsRegister()) {
3767 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3768 if (op->IsShl()) {
3769 __ shll(first_reg, second_reg);
3770 } else if (op->IsShr()) {
3771 __ sarl(first_reg, second_reg);
3772 } else {
3773 __ shrl(first_reg, second_reg);
3774 }
3775 } else {
3776 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3777 if (op->IsShl()) {
3778 __ shll(first_reg, imm);
3779 } else if (op->IsShr()) {
3780 __ sarl(first_reg, imm);
3781 } else {
3782 __ shrl(first_reg, imm);
3783 }
3784 }
3785 break;
3786 }
3787 case Primitive::kPrimLong: {
3788 if (second.IsRegister()) {
3789 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3790 if (op->IsShl()) {
3791 __ shlq(first_reg, second_reg);
3792 } else if (op->IsShr()) {
3793 __ sarq(first_reg, second_reg);
3794 } else {
3795 __ shrq(first_reg, second_reg);
3796 }
3797 } else {
3798 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3799 if (op->IsShl()) {
3800 __ shlq(first_reg, imm);
3801 } else if (op->IsShr()) {
3802 __ sarq(first_reg, imm);
3803 } else {
3804 __ shrq(first_reg, imm);
3805 }
3806 }
3807 break;
3808 }
3809 default:
3810 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3811 UNREACHABLE();
3812 }
3813 }
3814
VisitRor(HRor * ror)3815 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3816 LocationSummary* locations =
3817 new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3818
3819 switch (ror->GetResultType()) {
3820 case Primitive::kPrimInt:
3821 case Primitive::kPrimLong: {
3822 locations->SetInAt(0, Location::RequiresRegister());
3823 // The shift count needs to be in CL (unless it is a constant).
3824 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3825 locations->SetOut(Location::SameAsFirstInput());
3826 break;
3827 }
3828 default:
3829 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3830 UNREACHABLE();
3831 }
3832 }
3833
VisitRor(HRor * ror)3834 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3835 LocationSummary* locations = ror->GetLocations();
3836 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3837 Location second = locations->InAt(1);
3838
3839 switch (ror->GetResultType()) {
3840 case Primitive::kPrimInt:
3841 if (second.IsRegister()) {
3842 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3843 __ rorl(first_reg, second_reg);
3844 } else {
3845 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3846 __ rorl(first_reg, imm);
3847 }
3848 break;
3849 case Primitive::kPrimLong:
3850 if (second.IsRegister()) {
3851 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3852 __ rorq(first_reg, second_reg);
3853 } else {
3854 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3855 __ rorq(first_reg, imm);
3856 }
3857 break;
3858 default:
3859 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3860 UNREACHABLE();
3861 }
3862 }
3863
VisitShl(HShl * shl)3864 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
3865 HandleShift(shl);
3866 }
3867
VisitShl(HShl * shl)3868 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
3869 HandleShift(shl);
3870 }
3871
VisitShr(HShr * shr)3872 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
3873 HandleShift(shr);
3874 }
3875
VisitShr(HShr * shr)3876 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
3877 HandleShift(shr);
3878 }
3879
VisitUShr(HUShr * ushr)3880 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
3881 HandleShift(ushr);
3882 }
3883
VisitUShr(HUShr * ushr)3884 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
3885 HandleShift(ushr);
3886 }
3887
VisitNewInstance(HNewInstance * instruction)3888 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
3889 LocationSummary* locations =
3890 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3891 InvokeRuntimeCallingConvention calling_convention;
3892 if (instruction->IsStringAlloc()) {
3893 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
3894 } else {
3895 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3896 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3897 }
3898 locations->SetOut(Location::RegisterLocation(RAX));
3899 }
3900
VisitNewInstance(HNewInstance * instruction)3901 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
3902 // Note: if heap poisoning is enabled, the entry point takes cares
3903 // of poisoning the reference.
3904 if (instruction->IsStringAlloc()) {
3905 // String is allocated through StringFactory. Call NewEmptyString entry point.
3906 CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
3907 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
3908 __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
3909 __ call(Address(temp, code_offset.SizeValue()));
3910 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
3911 } else {
3912 codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3913 instruction,
3914 instruction->GetDexPc(),
3915 nullptr);
3916 CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
3917 DCHECK(!codegen_->IsLeafMethod());
3918 }
3919 }
3920
VisitNewArray(HNewArray * instruction)3921 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
3922 LocationSummary* locations =
3923 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3924 InvokeRuntimeCallingConvention calling_convention;
3925 locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3926 locations->SetOut(Location::RegisterLocation(RAX));
3927 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3928 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
3929 }
3930
VisitNewArray(HNewArray * instruction)3931 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
3932 InvokeRuntimeCallingConvention calling_convention;
3933 codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
3934 instruction->GetTypeIndex());
3935 // Note: if heap poisoning is enabled, the entry point takes cares
3936 // of poisoning the reference.
3937 codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3938 instruction,
3939 instruction->GetDexPc(),
3940 nullptr);
3941 CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
3942
3943 DCHECK(!codegen_->IsLeafMethod());
3944 }
3945
VisitParameterValue(HParameterValue * instruction)3946 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
3947 LocationSummary* locations =
3948 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3949 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
3950 if (location.IsStackSlot()) {
3951 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3952 } else if (location.IsDoubleStackSlot()) {
3953 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3954 }
3955 locations->SetOut(location);
3956 }
3957
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)3958 void InstructionCodeGeneratorX86_64::VisitParameterValue(
3959 HParameterValue* instruction ATTRIBUTE_UNUSED) {
3960 // Nothing to do, the parameter is already at its location.
3961 }
3962
VisitCurrentMethod(HCurrentMethod * instruction)3963 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
3964 LocationSummary* locations =
3965 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3966 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
3967 }
3968
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)3969 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
3970 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
3971 // Nothing to do, the method is already at its location.
3972 }
3973
VisitClassTableGet(HClassTableGet * instruction)3974 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3975 LocationSummary* locations =
3976 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3977 locations->SetInAt(0, Location::RequiresRegister());
3978 locations->SetOut(Location::RequiresRegister());
3979 }
3980
VisitClassTableGet(HClassTableGet * instruction)3981 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3982 LocationSummary* locations = instruction->GetLocations();
3983 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
3984 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
3985 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
3986 __ movq(locations->Out().AsRegister<CpuRegister>(),
3987 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
3988 } else {
3989 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3990 instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
3991 __ movq(locations->Out().AsRegister<CpuRegister>(),
3992 Address(locations->InAt(0).AsRegister<CpuRegister>(),
3993 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3994 __ movq(locations->Out().AsRegister<CpuRegister>(),
3995 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
3996 }
3997 }
3998
VisitNot(HNot * not_)3999 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4000 LocationSummary* locations =
4001 new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
4002 locations->SetInAt(0, Location::RequiresRegister());
4003 locations->SetOut(Location::SameAsFirstInput());
4004 }
4005
VisitNot(HNot * not_)4006 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4007 LocationSummary* locations = not_->GetLocations();
4008 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4009 locations->Out().AsRegister<CpuRegister>().AsRegister());
4010 Location out = locations->Out();
4011 switch (not_->GetResultType()) {
4012 case Primitive::kPrimInt:
4013 __ notl(out.AsRegister<CpuRegister>());
4014 break;
4015
4016 case Primitive::kPrimLong:
4017 __ notq(out.AsRegister<CpuRegister>());
4018 break;
4019
4020 default:
4021 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4022 }
4023 }
4024
VisitBooleanNot(HBooleanNot * bool_not)4025 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4026 LocationSummary* locations =
4027 new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4028 locations->SetInAt(0, Location::RequiresRegister());
4029 locations->SetOut(Location::SameAsFirstInput());
4030 }
4031
VisitBooleanNot(HBooleanNot * bool_not)4032 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4033 LocationSummary* locations = bool_not->GetLocations();
4034 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4035 locations->Out().AsRegister<CpuRegister>().AsRegister());
4036 Location out = locations->Out();
4037 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4038 }
4039
VisitPhi(HPhi * instruction)4040 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4041 LocationSummary* locations =
4042 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4043 for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
4044 locations->SetInAt(i, Location::Any());
4045 }
4046 locations->SetOut(Location::Any());
4047 }
4048
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4049 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4050 LOG(FATAL) << "Unimplemented";
4051 }
4052
GenerateMemoryBarrier(MemBarrierKind kind)4053 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4054 /*
4055 * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4056 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4057 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4058 */
4059 switch (kind) {
4060 case MemBarrierKind::kAnyAny: {
4061 MemoryFence();
4062 break;
4063 }
4064 case MemBarrierKind::kAnyStore:
4065 case MemBarrierKind::kLoadAny:
4066 case MemBarrierKind::kStoreStore: {
4067 // nop
4068 break;
4069 }
4070 default:
4071 LOG(FATAL) << "Unexpected memory barier " << kind;
4072 }
4073 }
4074
HandleFieldGet(HInstruction * instruction)4075 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4076 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4077
4078 bool object_field_get_with_read_barrier =
4079 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4080 LocationSummary* locations =
4081 new (GetGraph()->GetArena()) LocationSummary(instruction,
4082 object_field_get_with_read_barrier ?
4083 LocationSummary::kCallOnSlowPath :
4084 LocationSummary::kNoCall);
4085 locations->SetInAt(0, Location::RequiresRegister());
4086 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4087 locations->SetOut(Location::RequiresFpuRegister());
4088 } else {
4089 // The output overlaps for an object field get when read barriers
4090 // are enabled: we do not want the move to overwrite the object's
4091 // location, as we need it to emit the read barrier.
4092 locations->SetOut(
4093 Location::RequiresRegister(),
4094 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4095 }
4096 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4097 // We need a temporary register for the read barrier marking slow
4098 // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
4099 locations->AddTemp(Location::RequiresRegister());
4100 }
4101 }
4102
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4103 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4104 const FieldInfo& field_info) {
4105 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4106
4107 LocationSummary* locations = instruction->GetLocations();
4108 Location base_loc = locations->InAt(0);
4109 CpuRegister base = base_loc.AsRegister<CpuRegister>();
4110 Location out = locations->Out();
4111 bool is_volatile = field_info.IsVolatile();
4112 Primitive::Type field_type = field_info.GetFieldType();
4113 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4114
4115 switch (field_type) {
4116 case Primitive::kPrimBoolean: {
4117 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4118 break;
4119 }
4120
4121 case Primitive::kPrimByte: {
4122 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4123 break;
4124 }
4125
4126 case Primitive::kPrimShort: {
4127 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4128 break;
4129 }
4130
4131 case Primitive::kPrimChar: {
4132 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4133 break;
4134 }
4135
4136 case Primitive::kPrimInt: {
4137 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4138 break;
4139 }
4140
4141 case Primitive::kPrimNot: {
4142 // /* HeapReference<Object> */ out = *(base + offset)
4143 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4144 Location temp_loc = locations->GetTemp(0);
4145 // Note that a potential implicit null check is handled in this
4146 // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4147 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4148 instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
4149 if (is_volatile) {
4150 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4151 }
4152 } else {
4153 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4154 codegen_->MaybeRecordImplicitNullCheck(instruction);
4155 if (is_volatile) {
4156 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4157 }
4158 // If read barriers are enabled, emit read barriers other than
4159 // Baker's using a slow path (and also unpoison the loaded
4160 // reference, if heap poisoning is enabled).
4161 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4162 }
4163 break;
4164 }
4165
4166 case Primitive::kPrimLong: {
4167 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4168 break;
4169 }
4170
4171 case Primitive::kPrimFloat: {
4172 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4173 break;
4174 }
4175
4176 case Primitive::kPrimDouble: {
4177 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4178 break;
4179 }
4180
4181 case Primitive::kPrimVoid:
4182 LOG(FATAL) << "Unreachable type " << field_type;
4183 UNREACHABLE();
4184 }
4185
4186 if (field_type == Primitive::kPrimNot) {
4187 // Potential implicit null checks, in the case of reference
4188 // fields, are handled in the previous switch statement.
4189 } else {
4190 codegen_->MaybeRecordImplicitNullCheck(instruction);
4191 }
4192
4193 if (is_volatile) {
4194 if (field_type == Primitive::kPrimNot) {
4195 // Memory barriers, in the case of references, are also handled
4196 // in the previous switch statement.
4197 } else {
4198 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4199 }
4200 }
4201 }
4202
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4203 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4204 const FieldInfo& field_info) {
4205 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4206
4207 LocationSummary* locations =
4208 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4209 Primitive::Type field_type = field_info.GetFieldType();
4210 bool is_volatile = field_info.IsVolatile();
4211 bool needs_write_barrier =
4212 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4213
4214 locations->SetInAt(0, Location::RequiresRegister());
4215 if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4216 if (is_volatile) {
4217 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4218 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4219 } else {
4220 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4221 }
4222 } else {
4223 if (is_volatile) {
4224 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4225 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4226 } else {
4227 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4228 }
4229 }
4230 if (needs_write_barrier) {
4231 // Temporary registers for the write barrier.
4232 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
4233 locations->AddTemp(Location::RequiresRegister());
4234 } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4235 // Temporary register for the reference poisoning.
4236 locations->AddTemp(Location::RequiresRegister());
4237 }
4238 }
4239
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4240 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4241 const FieldInfo& field_info,
4242 bool value_can_be_null) {
4243 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4244
4245 LocationSummary* locations = instruction->GetLocations();
4246 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4247 Location value = locations->InAt(1);
4248 bool is_volatile = field_info.IsVolatile();
4249 Primitive::Type field_type = field_info.GetFieldType();
4250 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4251
4252 if (is_volatile) {
4253 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4254 }
4255
4256 bool maybe_record_implicit_null_check_done = false;
4257
4258 switch (field_type) {
4259 case Primitive::kPrimBoolean:
4260 case Primitive::kPrimByte: {
4261 if (value.IsConstant()) {
4262 int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4263 __ movb(Address(base, offset), Immediate(v));
4264 } else {
4265 __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4266 }
4267 break;
4268 }
4269
4270 case Primitive::kPrimShort:
4271 case Primitive::kPrimChar: {
4272 if (value.IsConstant()) {
4273 int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4274 __ movw(Address(base, offset), Immediate(v));
4275 } else {
4276 __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4277 }
4278 break;
4279 }
4280
4281 case Primitive::kPrimInt:
4282 case Primitive::kPrimNot: {
4283 if (value.IsConstant()) {
4284 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4285 // `field_type == Primitive::kPrimNot` implies `v == 0`.
4286 DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4287 // Note: if heap poisoning is enabled, no need to poison
4288 // (negate) `v` if it is a reference, as it would be null.
4289 __ movl(Address(base, offset), Immediate(v));
4290 } else {
4291 if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4292 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4293 __ movl(temp, value.AsRegister<CpuRegister>());
4294 __ PoisonHeapReference(temp);
4295 __ movl(Address(base, offset), temp);
4296 } else {
4297 __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4298 }
4299 }
4300 break;
4301 }
4302
4303 case Primitive::kPrimLong: {
4304 if (value.IsConstant()) {
4305 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4306 codegen_->MoveInt64ToAddress(Address(base, offset),
4307 Address(base, offset + sizeof(int32_t)),
4308 v,
4309 instruction);
4310 maybe_record_implicit_null_check_done = true;
4311 } else {
4312 __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4313 }
4314 break;
4315 }
4316
4317 case Primitive::kPrimFloat: {
4318 if (value.IsConstant()) {
4319 int32_t v =
4320 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4321 __ movl(Address(base, offset), Immediate(v));
4322 } else {
4323 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4324 }
4325 break;
4326 }
4327
4328 case Primitive::kPrimDouble: {
4329 if (value.IsConstant()) {
4330 int64_t v =
4331 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4332 codegen_->MoveInt64ToAddress(Address(base, offset),
4333 Address(base, offset + sizeof(int32_t)),
4334 v,
4335 instruction);
4336 maybe_record_implicit_null_check_done = true;
4337 } else {
4338 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4339 }
4340 break;
4341 }
4342
4343 case Primitive::kPrimVoid:
4344 LOG(FATAL) << "Unreachable type " << field_type;
4345 UNREACHABLE();
4346 }
4347
4348 if (!maybe_record_implicit_null_check_done) {
4349 codegen_->MaybeRecordImplicitNullCheck(instruction);
4350 }
4351
4352 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4353 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4354 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4355 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4356 }
4357
4358 if (is_volatile) {
4359 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4360 }
4361 }
4362
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4363 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4364 HandleFieldSet(instruction, instruction->GetFieldInfo());
4365 }
4366
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4367 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4368 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4369 }
4370
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4371 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4372 HandleFieldGet(instruction);
4373 }
4374
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4375 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4376 HandleFieldGet(instruction, instruction->GetFieldInfo());
4377 }
4378
VisitStaticFieldGet(HStaticFieldGet * instruction)4379 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4380 HandleFieldGet(instruction);
4381 }
4382
VisitStaticFieldGet(HStaticFieldGet * instruction)4383 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4384 HandleFieldGet(instruction, instruction->GetFieldInfo());
4385 }
4386
VisitStaticFieldSet(HStaticFieldSet * instruction)4387 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4388 HandleFieldSet(instruction, instruction->GetFieldInfo());
4389 }
4390
VisitStaticFieldSet(HStaticFieldSet * instruction)4391 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4392 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4393 }
4394
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4395 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4396 HUnresolvedInstanceFieldGet* instruction) {
4397 FieldAccessCallingConventionX86_64 calling_convention;
4398 codegen_->CreateUnresolvedFieldLocationSummary(
4399 instruction, instruction->GetFieldType(), calling_convention);
4400 }
4401
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4402 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4403 HUnresolvedInstanceFieldGet* instruction) {
4404 FieldAccessCallingConventionX86_64 calling_convention;
4405 codegen_->GenerateUnresolvedFieldAccess(instruction,
4406 instruction->GetFieldType(),
4407 instruction->GetFieldIndex(),
4408 instruction->GetDexPc(),
4409 calling_convention);
4410 }
4411
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4412 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4413 HUnresolvedInstanceFieldSet* instruction) {
4414 FieldAccessCallingConventionX86_64 calling_convention;
4415 codegen_->CreateUnresolvedFieldLocationSummary(
4416 instruction, instruction->GetFieldType(), calling_convention);
4417 }
4418
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4419 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4420 HUnresolvedInstanceFieldSet* instruction) {
4421 FieldAccessCallingConventionX86_64 calling_convention;
4422 codegen_->GenerateUnresolvedFieldAccess(instruction,
4423 instruction->GetFieldType(),
4424 instruction->GetFieldIndex(),
4425 instruction->GetDexPc(),
4426 calling_convention);
4427 }
4428
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4429 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4430 HUnresolvedStaticFieldGet* instruction) {
4431 FieldAccessCallingConventionX86_64 calling_convention;
4432 codegen_->CreateUnresolvedFieldLocationSummary(
4433 instruction, instruction->GetFieldType(), calling_convention);
4434 }
4435
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4436 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4437 HUnresolvedStaticFieldGet* instruction) {
4438 FieldAccessCallingConventionX86_64 calling_convention;
4439 codegen_->GenerateUnresolvedFieldAccess(instruction,
4440 instruction->GetFieldType(),
4441 instruction->GetFieldIndex(),
4442 instruction->GetDexPc(),
4443 calling_convention);
4444 }
4445
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4446 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4447 HUnresolvedStaticFieldSet* instruction) {
4448 FieldAccessCallingConventionX86_64 calling_convention;
4449 codegen_->CreateUnresolvedFieldLocationSummary(
4450 instruction, instruction->GetFieldType(), calling_convention);
4451 }
4452
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4453 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4454 HUnresolvedStaticFieldSet* instruction) {
4455 FieldAccessCallingConventionX86_64 calling_convention;
4456 codegen_->GenerateUnresolvedFieldAccess(instruction,
4457 instruction->GetFieldType(),
4458 instruction->GetFieldIndex(),
4459 instruction->GetDexPc(),
4460 calling_convention);
4461 }
4462
VisitNullCheck(HNullCheck * instruction)4463 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4464 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4465 ? LocationSummary::kCallOnSlowPath
4466 : LocationSummary::kNoCall;
4467 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4468 Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
4469 ? Location::RequiresRegister()
4470 : Location::Any();
4471 locations->SetInAt(0, loc);
4472 if (instruction->HasUses()) {
4473 locations->SetOut(Location::SameAsFirstInput());
4474 }
4475 }
4476
GenerateImplicitNullCheck(HNullCheck * instruction)4477 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4478 if (CanMoveNullCheckToUser(instruction)) {
4479 return;
4480 }
4481 LocationSummary* locations = instruction->GetLocations();
4482 Location obj = locations->InAt(0);
4483
4484 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4485 RecordPcInfo(instruction, instruction->GetDexPc());
4486 }
4487
GenerateExplicitNullCheck(HNullCheck * instruction)4488 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4489 SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4490 AddSlowPath(slow_path);
4491
4492 LocationSummary* locations = instruction->GetLocations();
4493 Location obj = locations->InAt(0);
4494
4495 if (obj.IsRegister()) {
4496 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4497 } else if (obj.IsStackSlot()) {
4498 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4499 } else {
4500 DCHECK(obj.IsConstant()) << obj;
4501 DCHECK(obj.GetConstant()->IsNullConstant());
4502 __ jmp(slow_path->GetEntryLabel());
4503 return;
4504 }
4505 __ j(kEqual, slow_path->GetEntryLabel());
4506 }
4507
VisitNullCheck(HNullCheck * instruction)4508 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4509 codegen_->GenerateNullCheck(instruction);
4510 }
4511
VisitArrayGet(HArrayGet * instruction)4512 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4513 bool object_array_get_with_read_barrier =
4514 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4515 LocationSummary* locations =
4516 new (GetGraph()->GetArena()) LocationSummary(instruction,
4517 object_array_get_with_read_barrier ?
4518 LocationSummary::kCallOnSlowPath :
4519 LocationSummary::kNoCall);
4520 locations->SetInAt(0, Location::RequiresRegister());
4521 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4522 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4523 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4524 } else {
4525 // The output overlaps for an object array get when read barriers
4526 // are enabled: we do not want the move to overwrite the array's
4527 // location, as we need it to emit the read barrier.
4528 locations->SetOut(
4529 Location::RequiresRegister(),
4530 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4531 }
4532 // We need a temporary register for the read barrier marking slow
4533 // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
4534 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4535 locations->AddTemp(Location::RequiresRegister());
4536 }
4537 }
4538
VisitArrayGet(HArrayGet * instruction)4539 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4540 LocationSummary* locations = instruction->GetLocations();
4541 Location obj_loc = locations->InAt(0);
4542 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4543 Location index = locations->InAt(1);
4544 Location out_loc = locations->Out();
4545
4546 Primitive::Type type = instruction->GetType();
4547 switch (type) {
4548 case Primitive::kPrimBoolean: {
4549 uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4550 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4551 if (index.IsConstant()) {
4552 __ movzxb(out, Address(obj,
4553 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4554 } else {
4555 __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4556 }
4557 break;
4558 }
4559
4560 case Primitive::kPrimByte: {
4561 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
4562 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4563 if (index.IsConstant()) {
4564 __ movsxb(out, Address(obj,
4565 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4566 } else {
4567 __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4568 }
4569 break;
4570 }
4571
4572 case Primitive::kPrimShort: {
4573 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
4574 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4575 if (index.IsConstant()) {
4576 __ movsxw(out, Address(obj,
4577 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4578 } else {
4579 __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4580 }
4581 break;
4582 }
4583
4584 case Primitive::kPrimChar: {
4585 uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4586 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4587 if (index.IsConstant()) {
4588 __ movzxw(out, Address(obj,
4589 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4590 } else {
4591 __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4592 }
4593 break;
4594 }
4595
4596 case Primitive::kPrimInt: {
4597 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4598 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4599 if (index.IsConstant()) {
4600 __ movl(out, Address(obj,
4601 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4602 } else {
4603 __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4604 }
4605 break;
4606 }
4607
4608 case Primitive::kPrimNot: {
4609 static_assert(
4610 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4611 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4612 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4613 // /* HeapReference<Object> */ out =
4614 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
4615 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4616 Location temp = locations->GetTemp(0);
4617 // Note that a potential implicit null check is handled in this
4618 // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
4619 codegen_->GenerateArrayLoadWithBakerReadBarrier(
4620 instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
4621 } else {
4622 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4623 if (index.IsConstant()) {
4624 uint32_t offset =
4625 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4626 __ movl(out, Address(obj, offset));
4627 codegen_->MaybeRecordImplicitNullCheck(instruction);
4628 // If read barriers are enabled, emit read barriers other than
4629 // Baker's using a slow path (and also unpoison the loaded
4630 // reference, if heap poisoning is enabled).
4631 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4632 } else {
4633 __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4634 codegen_->MaybeRecordImplicitNullCheck(instruction);
4635 // If read barriers are enabled, emit read barriers other than
4636 // Baker's using a slow path (and also unpoison the loaded
4637 // reference, if heap poisoning is enabled).
4638 codegen_->MaybeGenerateReadBarrierSlow(
4639 instruction, out_loc, out_loc, obj_loc, data_offset, index);
4640 }
4641 }
4642 break;
4643 }
4644
4645 case Primitive::kPrimLong: {
4646 uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4647 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4648 if (index.IsConstant()) {
4649 __ movq(out, Address(obj,
4650 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4651 } else {
4652 __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4653 }
4654 break;
4655 }
4656
4657 case Primitive::kPrimFloat: {
4658 uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4659 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4660 if (index.IsConstant()) {
4661 __ movss(out, Address(obj,
4662 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4663 } else {
4664 __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4665 }
4666 break;
4667 }
4668
4669 case Primitive::kPrimDouble: {
4670 uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4671 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4672 if (index.IsConstant()) {
4673 __ movsd(out, Address(obj,
4674 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4675 } else {
4676 __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4677 }
4678 break;
4679 }
4680
4681 case Primitive::kPrimVoid:
4682 LOG(FATAL) << "Unreachable type " << type;
4683 UNREACHABLE();
4684 }
4685
4686 if (type == Primitive::kPrimNot) {
4687 // Potential implicit null checks, in the case of reference
4688 // arrays, are handled in the previous switch statement.
4689 } else {
4690 codegen_->MaybeRecordImplicitNullCheck(instruction);
4691 }
4692 }
4693
VisitArraySet(HArraySet * instruction)4694 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4695 Primitive::Type value_type = instruction->GetComponentType();
4696
4697 bool needs_write_barrier =
4698 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4699 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4700 bool object_array_set_with_read_barrier =
4701 kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
4702
4703 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4704 instruction,
4705 (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
4706 LocationSummary::kCallOnSlowPath :
4707 LocationSummary::kNoCall);
4708
4709 locations->SetInAt(0, Location::RequiresRegister());
4710 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4711 if (Primitive::IsFloatingPointType(value_type)) {
4712 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4713 } else {
4714 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4715 }
4716
4717 if (needs_write_barrier) {
4718 // Temporary registers for the write barrier.
4719
4720 // This first temporary register is possibly used for heap
4721 // reference poisoning and/or read barrier emission too.
4722 locations->AddTemp(Location::RequiresRegister());
4723 locations->AddTemp(Location::RequiresRegister());
4724 }
4725 }
4726
VisitArraySet(HArraySet * instruction)4727 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4728 LocationSummary* locations = instruction->GetLocations();
4729 Location array_loc = locations->InAt(0);
4730 CpuRegister array = array_loc.AsRegister<CpuRegister>();
4731 Location index = locations->InAt(1);
4732 Location value = locations->InAt(2);
4733 Primitive::Type value_type = instruction->GetComponentType();
4734 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4735 bool needs_write_barrier =
4736 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4737 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4738 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4739 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4740
4741 switch (value_type) {
4742 case Primitive::kPrimBoolean:
4743 case Primitive::kPrimByte: {
4744 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4745 Address address = index.IsConstant()
4746 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
4747 : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
4748 if (value.IsRegister()) {
4749 __ movb(address, value.AsRegister<CpuRegister>());
4750 } else {
4751 __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4752 }
4753 codegen_->MaybeRecordImplicitNullCheck(instruction);
4754 break;
4755 }
4756
4757 case Primitive::kPrimShort:
4758 case Primitive::kPrimChar: {
4759 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4760 Address address = index.IsConstant()
4761 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
4762 : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
4763 if (value.IsRegister()) {
4764 __ movw(address, value.AsRegister<CpuRegister>());
4765 } else {
4766 DCHECK(value.IsConstant()) << value;
4767 __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4768 }
4769 codegen_->MaybeRecordImplicitNullCheck(instruction);
4770 break;
4771 }
4772
4773 case Primitive::kPrimNot: {
4774 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4775 Address address = index.IsConstant()
4776 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4777 : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4778
4779 if (!value.IsRegister()) {
4780 // Just setting null.
4781 DCHECK(instruction->InputAt(2)->IsNullConstant());
4782 DCHECK(value.IsConstant()) << value;
4783 __ movl(address, Immediate(0));
4784 codegen_->MaybeRecordImplicitNullCheck(instruction);
4785 DCHECK(!needs_write_barrier);
4786 DCHECK(!may_need_runtime_call_for_type_check);
4787 break;
4788 }
4789
4790 DCHECK(needs_write_barrier);
4791 CpuRegister register_value = value.AsRegister<CpuRegister>();
4792 NearLabel done, not_null, do_put;
4793 SlowPathCode* slow_path = nullptr;
4794 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4795 if (may_need_runtime_call_for_type_check) {
4796 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4797 codegen_->AddSlowPath(slow_path);
4798 if (instruction->GetValueCanBeNull()) {
4799 __ testl(register_value, register_value);
4800 __ j(kNotEqual, ¬_null);
4801 __ movl(address, Immediate(0));
4802 codegen_->MaybeRecordImplicitNullCheck(instruction);
4803 __ jmp(&done);
4804 __ Bind(¬_null);
4805 }
4806
4807 if (kEmitCompilerReadBarrier) {
4808 // When read barriers are enabled, the type checking
4809 // instrumentation requires two read barriers:
4810 //
4811 // __ movl(temp2, temp);
4812 // // /* HeapReference<Class> */ temp = temp->component_type_
4813 // __ movl(temp, Address(temp, component_offset));
4814 // codegen_->GenerateReadBarrierSlow(
4815 // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
4816 //
4817 // // /* HeapReference<Class> */ temp2 = register_value->klass_
4818 // __ movl(temp2, Address(register_value, class_offset));
4819 // codegen_->GenerateReadBarrierSlow(
4820 // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
4821 //
4822 // __ cmpl(temp, temp2);
4823 //
4824 // However, the second read barrier may trash `temp`, as it
4825 // is a temporary register, and as such would not be saved
4826 // along with live registers before calling the runtime (nor
4827 // restored afterwards). So in this case, we bail out and
4828 // delegate the work to the array set slow path.
4829 //
4830 // TODO: Extend the register allocator to support a new
4831 // "(locally) live temp" location so as to avoid always
4832 // going into the slow path when read barriers are enabled.
4833 __ jmp(slow_path->GetEntryLabel());
4834 } else {
4835 // /* HeapReference<Class> */ temp = array->klass_
4836 __ movl(temp, Address(array, class_offset));
4837 codegen_->MaybeRecordImplicitNullCheck(instruction);
4838 __ MaybeUnpoisonHeapReference(temp);
4839
4840 // /* HeapReference<Class> */ temp = temp->component_type_
4841 __ movl(temp, Address(temp, component_offset));
4842 // If heap poisoning is enabled, no need to unpoison `temp`
4843 // nor the object reference in `register_value->klass`, as
4844 // we are comparing two poisoned references.
4845 __ cmpl(temp, Address(register_value, class_offset));
4846
4847 if (instruction->StaticTypeOfArrayIsObjectArray()) {
4848 __ j(kEqual, &do_put);
4849 // If heap poisoning is enabled, the `temp` reference has
4850 // not been unpoisoned yet; unpoison it now.
4851 __ MaybeUnpoisonHeapReference(temp);
4852
4853 // /* HeapReference<Class> */ temp = temp->super_class_
4854 __ movl(temp, Address(temp, super_offset));
4855 // If heap poisoning is enabled, no need to unpoison
4856 // `temp`, as we are comparing against null below.
4857 __ testl(temp, temp);
4858 __ j(kNotEqual, slow_path->GetEntryLabel());
4859 __ Bind(&do_put);
4860 } else {
4861 __ j(kNotEqual, slow_path->GetEntryLabel());
4862 }
4863 }
4864 }
4865
4866 if (kPoisonHeapReferences) {
4867 __ movl(temp, register_value);
4868 __ PoisonHeapReference(temp);
4869 __ movl(address, temp);
4870 } else {
4871 __ movl(address, register_value);
4872 }
4873 if (!may_need_runtime_call_for_type_check) {
4874 codegen_->MaybeRecordImplicitNullCheck(instruction);
4875 }
4876
4877 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4878 codegen_->MarkGCCard(
4879 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4880 __ Bind(&done);
4881
4882 if (slow_path != nullptr) {
4883 __ Bind(slow_path->GetExitLabel());
4884 }
4885
4886 break;
4887 }
4888
4889 case Primitive::kPrimInt: {
4890 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4891 Address address = index.IsConstant()
4892 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4893 : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4894 if (value.IsRegister()) {
4895 __ movl(address, value.AsRegister<CpuRegister>());
4896 } else {
4897 DCHECK(value.IsConstant()) << value;
4898 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4899 __ movl(address, Immediate(v));
4900 }
4901 codegen_->MaybeRecordImplicitNullCheck(instruction);
4902 break;
4903 }
4904
4905 case Primitive::kPrimLong: {
4906 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4907 Address address = index.IsConstant()
4908 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4909 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4910 if (value.IsRegister()) {
4911 __ movq(address, value.AsRegister<CpuRegister>());
4912 codegen_->MaybeRecordImplicitNullCheck(instruction);
4913 } else {
4914 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4915 Address address_high = index.IsConstant()
4916 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4917 offset + sizeof(int32_t))
4918 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4919 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4920 }
4921 break;
4922 }
4923
4924 case Primitive::kPrimFloat: {
4925 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4926 Address address = index.IsConstant()
4927 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4928 : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4929 if (value.IsFpuRegister()) {
4930 __ movss(address, value.AsFpuRegister<XmmRegister>());
4931 } else {
4932 DCHECK(value.IsConstant());
4933 int32_t v =
4934 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4935 __ movl(address, Immediate(v));
4936 }
4937 codegen_->MaybeRecordImplicitNullCheck(instruction);
4938 break;
4939 }
4940
4941 case Primitive::kPrimDouble: {
4942 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4943 Address address = index.IsConstant()
4944 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4945 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4946 if (value.IsFpuRegister()) {
4947 __ movsd(address, value.AsFpuRegister<XmmRegister>());
4948 codegen_->MaybeRecordImplicitNullCheck(instruction);
4949 } else {
4950 int64_t v =
4951 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4952 Address address_high = index.IsConstant()
4953 ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4954 offset + sizeof(int32_t))
4955 : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4956 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4957 }
4958 break;
4959 }
4960
4961 case Primitive::kPrimVoid:
4962 LOG(FATAL) << "Unreachable type " << instruction->GetType();
4963 UNREACHABLE();
4964 }
4965 }
4966
VisitArrayLength(HArrayLength * instruction)4967 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
4968 LocationSummary* locations =
4969 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4970 locations->SetInAt(0, Location::RequiresRegister());
4971 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4972 }
4973
VisitArrayLength(HArrayLength * instruction)4974 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
4975 LocationSummary* locations = instruction->GetLocations();
4976 uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
4977 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
4978 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4979 __ movl(out, Address(obj, offset));
4980 codegen_->MaybeRecordImplicitNullCheck(instruction);
4981 }
4982
VisitBoundsCheck(HBoundsCheck * instruction)4983 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4984 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4985 ? LocationSummary::kCallOnSlowPath
4986 : LocationSummary::kNoCall;
4987 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4988 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4989 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4990 if (instruction->HasUses()) {
4991 locations->SetOut(Location::SameAsFirstInput());
4992 }
4993 }
4994
VisitBoundsCheck(HBoundsCheck * instruction)4995 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4996 LocationSummary* locations = instruction->GetLocations();
4997 Location index_loc = locations->InAt(0);
4998 Location length_loc = locations->InAt(1);
4999 SlowPathCode* slow_path =
5000 new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
5001
5002 if (length_loc.IsConstant()) {
5003 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5004 if (index_loc.IsConstant()) {
5005 // BCE will remove the bounds check if we are guarenteed to pass.
5006 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5007 if (index < 0 || index >= length) {
5008 codegen_->AddSlowPath(slow_path);
5009 __ jmp(slow_path->GetEntryLabel());
5010 } else {
5011 // Some optimization after BCE may have generated this, and we should not
5012 // generate a bounds check if it is a valid range.
5013 }
5014 return;
5015 }
5016
5017 // We have to reverse the jump condition because the length is the constant.
5018 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5019 __ cmpl(index_reg, Immediate(length));
5020 codegen_->AddSlowPath(slow_path);
5021 __ j(kAboveEqual, slow_path->GetEntryLabel());
5022 } else {
5023 CpuRegister length = length_loc.AsRegister<CpuRegister>();
5024 if (index_loc.IsConstant()) {
5025 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5026 __ cmpl(length, Immediate(value));
5027 } else {
5028 __ cmpl(length, index_loc.AsRegister<CpuRegister>());
5029 }
5030 codegen_->AddSlowPath(slow_path);
5031 __ j(kBelowEqual, slow_path->GetEntryLabel());
5032 }
5033 }
5034
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5035 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5036 CpuRegister card,
5037 CpuRegister object,
5038 CpuRegister value,
5039 bool value_can_be_null) {
5040 NearLabel is_null;
5041 if (value_can_be_null) {
5042 __ testl(value, value);
5043 __ j(kEqual, &is_null);
5044 }
5045 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
5046 /* no_rip */ true));
5047 __ movq(temp, object);
5048 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5049 __ movb(Address(temp, card, TIMES_1, 0), card);
5050 if (value_can_be_null) {
5051 __ Bind(&is_null);
5052 }
5053 }
5054
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5055 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5056 LOG(FATAL) << "Unimplemented";
5057 }
5058
VisitParallelMove(HParallelMove * instruction)5059 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5060 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5061 }
5062
VisitSuspendCheck(HSuspendCheck * instruction)5063 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5064 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5065 }
5066
VisitSuspendCheck(HSuspendCheck * instruction)5067 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5068 HBasicBlock* block = instruction->GetBlock();
5069 if (block->GetLoopInformation() != nullptr) {
5070 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5071 // The back edge will generate the suspend check.
5072 return;
5073 }
5074 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5075 // The goto will generate the suspend check.
5076 return;
5077 }
5078 GenerateSuspendCheck(instruction, nullptr);
5079 }
5080
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5081 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5082 HBasicBlock* successor) {
5083 SuspendCheckSlowPathX86_64* slow_path =
5084 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5085 if (slow_path == nullptr) {
5086 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5087 instruction->SetSlowPath(slow_path);
5088 codegen_->AddSlowPath(slow_path);
5089 if (successor != nullptr) {
5090 DCHECK(successor->IsLoopHeader());
5091 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5092 }
5093 } else {
5094 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5095 }
5096
5097 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
5098 /* no_rip */ true),
5099 Immediate(0));
5100 if (successor == nullptr) {
5101 __ j(kNotEqual, slow_path->GetEntryLabel());
5102 __ Bind(slow_path->GetReturnLabel());
5103 } else {
5104 __ j(kEqual, codegen_->GetLabelOf(successor));
5105 __ jmp(slow_path->GetEntryLabel());
5106 }
5107 }
5108
GetAssembler() const5109 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5110 return codegen_->GetAssembler();
5111 }
5112
EmitMove(size_t index)5113 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5114 MoveOperands* move = moves_[index];
5115 Location source = move->GetSource();
5116 Location destination = move->GetDestination();
5117
5118 if (source.IsRegister()) {
5119 if (destination.IsRegister()) {
5120 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5121 } else if (destination.IsStackSlot()) {
5122 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5123 source.AsRegister<CpuRegister>());
5124 } else {
5125 DCHECK(destination.IsDoubleStackSlot());
5126 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5127 source.AsRegister<CpuRegister>());
5128 }
5129 } else if (source.IsStackSlot()) {
5130 if (destination.IsRegister()) {
5131 __ movl(destination.AsRegister<CpuRegister>(),
5132 Address(CpuRegister(RSP), source.GetStackIndex()));
5133 } else if (destination.IsFpuRegister()) {
5134 __ movss(destination.AsFpuRegister<XmmRegister>(),
5135 Address(CpuRegister(RSP), source.GetStackIndex()));
5136 } else {
5137 DCHECK(destination.IsStackSlot());
5138 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5139 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5140 }
5141 } else if (source.IsDoubleStackSlot()) {
5142 if (destination.IsRegister()) {
5143 __ movq(destination.AsRegister<CpuRegister>(),
5144 Address(CpuRegister(RSP), source.GetStackIndex()));
5145 } else if (destination.IsFpuRegister()) {
5146 __ movsd(destination.AsFpuRegister<XmmRegister>(),
5147 Address(CpuRegister(RSP), source.GetStackIndex()));
5148 } else {
5149 DCHECK(destination.IsDoubleStackSlot()) << destination;
5150 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5151 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5152 }
5153 } else if (source.IsConstant()) {
5154 HConstant* constant = source.GetConstant();
5155 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5156 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5157 if (destination.IsRegister()) {
5158 if (value == 0) {
5159 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5160 } else {
5161 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5162 }
5163 } else {
5164 DCHECK(destination.IsStackSlot()) << destination;
5165 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5166 }
5167 } else if (constant->IsLongConstant()) {
5168 int64_t value = constant->AsLongConstant()->GetValue();
5169 if (destination.IsRegister()) {
5170 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5171 } else {
5172 DCHECK(destination.IsDoubleStackSlot()) << destination;
5173 codegen_->Store64BitValueToStack(destination, value);
5174 }
5175 } else if (constant->IsFloatConstant()) {
5176 float fp_value = constant->AsFloatConstant()->GetValue();
5177 if (destination.IsFpuRegister()) {
5178 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5179 codegen_->Load32BitValue(dest, fp_value);
5180 } else {
5181 DCHECK(destination.IsStackSlot()) << destination;
5182 Immediate imm(bit_cast<int32_t, float>(fp_value));
5183 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5184 }
5185 } else {
5186 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5187 double fp_value = constant->AsDoubleConstant()->GetValue();
5188 int64_t value = bit_cast<int64_t, double>(fp_value);
5189 if (destination.IsFpuRegister()) {
5190 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5191 codegen_->Load64BitValue(dest, fp_value);
5192 } else {
5193 DCHECK(destination.IsDoubleStackSlot()) << destination;
5194 codegen_->Store64BitValueToStack(destination, value);
5195 }
5196 }
5197 } else if (source.IsFpuRegister()) {
5198 if (destination.IsFpuRegister()) {
5199 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5200 } else if (destination.IsStackSlot()) {
5201 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5202 source.AsFpuRegister<XmmRegister>());
5203 } else {
5204 DCHECK(destination.IsDoubleStackSlot()) << destination;
5205 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5206 source.AsFpuRegister<XmmRegister>());
5207 }
5208 }
5209 }
5210
Exchange32(CpuRegister reg,int mem)5211 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5212 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5213 __ movl(Address(CpuRegister(RSP), mem), reg);
5214 __ movl(reg, CpuRegister(TMP));
5215 }
5216
Exchange32(int mem1,int mem2)5217 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5218 ScratchRegisterScope ensure_scratch(
5219 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5220
5221 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5222 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5223 __ movl(CpuRegister(ensure_scratch.GetRegister()),
5224 Address(CpuRegister(RSP), mem2 + stack_offset));
5225 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5226 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5227 CpuRegister(ensure_scratch.GetRegister()));
5228 }
5229
Exchange64(CpuRegister reg1,CpuRegister reg2)5230 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5231 __ movq(CpuRegister(TMP), reg1);
5232 __ movq(reg1, reg2);
5233 __ movq(reg2, CpuRegister(TMP));
5234 }
5235
Exchange64(CpuRegister reg,int mem)5236 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5237 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5238 __ movq(Address(CpuRegister(RSP), mem), reg);
5239 __ movq(reg, CpuRegister(TMP));
5240 }
5241
Exchange64(int mem1,int mem2)5242 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5243 ScratchRegisterScope ensure_scratch(
5244 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5245
5246 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5247 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5248 __ movq(CpuRegister(ensure_scratch.GetRegister()),
5249 Address(CpuRegister(RSP), mem2 + stack_offset));
5250 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5251 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5252 CpuRegister(ensure_scratch.GetRegister()));
5253 }
5254
Exchange32(XmmRegister reg,int mem)5255 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5256 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5257 __ movss(Address(CpuRegister(RSP), mem), reg);
5258 __ movd(reg, CpuRegister(TMP));
5259 }
5260
Exchange64(XmmRegister reg,int mem)5261 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5262 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5263 __ movsd(Address(CpuRegister(RSP), mem), reg);
5264 __ movd(reg, CpuRegister(TMP));
5265 }
5266
EmitSwap(size_t index)5267 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5268 MoveOperands* move = moves_[index];
5269 Location source = move->GetSource();
5270 Location destination = move->GetDestination();
5271
5272 if (source.IsRegister() && destination.IsRegister()) {
5273 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5274 } else if (source.IsRegister() && destination.IsStackSlot()) {
5275 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5276 } else if (source.IsStackSlot() && destination.IsRegister()) {
5277 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5278 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5279 Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5280 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5281 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5282 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5283 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5284 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5285 Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5286 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5287 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5288 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5289 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5290 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5291 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5292 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5293 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5294 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5295 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5296 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5297 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5298 } else {
5299 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5300 }
5301 }
5302
5303
SpillScratch(int reg)5304 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5305 __ pushq(CpuRegister(reg));
5306 }
5307
5308
RestoreScratch(int reg)5309 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5310 __ popq(CpuRegister(reg));
5311 }
5312
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5313 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5314 SlowPathCode* slow_path, CpuRegister class_reg) {
5315 __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()),
5316 Immediate(mirror::Class::kStatusInitialized));
5317 __ j(kLess, slow_path->GetEntryLabel());
5318 __ Bind(slow_path->GetExitLabel());
5319 // No need for memory fence, thanks to the x86-64 memory model.
5320 }
5321
VisitLoadClass(HLoadClass * cls)5322 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5323 InvokeRuntimeCallingConvention calling_convention;
5324 CodeGenerator::CreateLoadClassLocationSummary(
5325 cls,
5326 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
5327 Location::RegisterLocation(RAX),
5328 /* code_generator_supports_read_barrier */ true);
5329 }
5330
VisitLoadClass(HLoadClass * cls)5331 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
5332 LocationSummary* locations = cls->GetLocations();
5333 if (cls->NeedsAccessCheck()) {
5334 codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
5335 codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
5336 cls,
5337 cls->GetDexPc(),
5338 nullptr);
5339 CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
5340 return;
5341 }
5342
5343 Location out_loc = locations->Out();
5344 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5345 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5346
5347 if (cls->IsReferrersClass()) {
5348 DCHECK(!cls->CanCallRuntime());
5349 DCHECK(!cls->MustGenerateClinitCheck());
5350 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5351 GenerateGcRootFieldLoad(
5352 cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5353 } else {
5354 // /* GcRoot<mirror::Class>[] */ out =
5355 // current_method.ptr_sized_fields_->dex_cache_resolved_types_
5356 __ movq(out, Address(current_method,
5357 ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
5358 // /* GcRoot<mirror::Class> */ out = out[type_index]
5359 GenerateGcRootFieldLoad(
5360 cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
5361
5362 if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
5363 DCHECK(cls->CanCallRuntime());
5364 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5365 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5366 codegen_->AddSlowPath(slow_path);
5367 if (!cls->IsInDexCache()) {
5368 __ testl(out, out);
5369 __ j(kEqual, slow_path->GetEntryLabel());
5370 }
5371 if (cls->MustGenerateClinitCheck()) {
5372 GenerateClassInitializationCheck(slow_path, out);
5373 } else {
5374 __ Bind(slow_path->GetExitLabel());
5375 }
5376 }
5377 }
5378 }
5379
VisitClinitCheck(HClinitCheck * check)5380 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5381 LocationSummary* locations =
5382 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5383 locations->SetInAt(0, Location::RequiresRegister());
5384 if (check->HasUses()) {
5385 locations->SetOut(Location::SameAsFirstInput());
5386 }
5387 }
5388
VisitClinitCheck(HClinitCheck * check)5389 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5390 // We assume the class to not be null.
5391 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5392 check->GetLoadClass(), check, check->GetDexPc(), true);
5393 codegen_->AddSlowPath(slow_path);
5394 GenerateClassInitializationCheck(slow_path,
5395 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5396 }
5397
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5398 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5399 HLoadString::LoadKind desired_string_load_kind) {
5400 if (kEmitCompilerReadBarrier) {
5401 switch (desired_string_load_kind) {
5402 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5403 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5404 case HLoadString::LoadKind::kBootImageAddress:
5405 // TODO: Implement for read barrier.
5406 return HLoadString::LoadKind::kDexCacheViaMethod;
5407 default:
5408 break;
5409 }
5410 }
5411 switch (desired_string_load_kind) {
5412 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5413 DCHECK(!GetCompilerOptions().GetCompilePic());
5414 // We prefer the always-available RIP-relative address for the x86-64 boot image.
5415 return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5416 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5417 DCHECK(GetCompilerOptions().GetCompilePic());
5418 break;
5419 case HLoadString::LoadKind::kBootImageAddress:
5420 break;
5421 case HLoadString::LoadKind::kDexCacheAddress:
5422 DCHECK(Runtime::Current()->UseJitCompilation());
5423 break;
5424 case HLoadString::LoadKind::kDexCachePcRelative:
5425 DCHECK(!Runtime::Current()->UseJitCompilation());
5426 break;
5427 case HLoadString::LoadKind::kDexCacheViaMethod:
5428 break;
5429 }
5430 return desired_string_load_kind;
5431 }
5432
VisitLoadString(HLoadString * load)5433 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5434 LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
5435 ? LocationSummary::kCallOnSlowPath
5436 : LocationSummary::kNoCall;
5437 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5438 if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5439 locations->SetInAt(0, Location::RequiresRegister());
5440 }
5441 locations->SetOut(Location::RequiresRegister());
5442 }
5443
VisitLoadString(HLoadString * load)5444 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
5445 LocationSummary* locations = load->GetLocations();
5446 Location out_loc = locations->Out();
5447 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5448
5449 switch (load->GetLoadKind()) {
5450 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5451 DCHECK(!kEmitCompilerReadBarrier);
5452 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5453 codegen_->RecordStringPatch(load);
5454 return; // No dex cache slow path.
5455 }
5456 case HLoadString::LoadKind::kBootImageAddress: {
5457 DCHECK(!kEmitCompilerReadBarrier);
5458 DCHECK_NE(load->GetAddress(), 0u);
5459 uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
5460 __ movl(out, Immediate(address)); // Zero-extended.
5461 codegen_->RecordSimplePatch();
5462 return; // No dex cache slow path.
5463 }
5464 case HLoadString::LoadKind::kDexCacheAddress: {
5465 DCHECK_NE(load->GetAddress(), 0u);
5466 if (IsUint<32>(load->GetAddress())) {
5467 Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
5468 GenerateGcRootFieldLoad(load, out_loc, address);
5469 } else {
5470 // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
5471 __ movq(out, Immediate(load->GetAddress()));
5472 GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
5473 }
5474 break;
5475 }
5476 case HLoadString::LoadKind::kDexCachePcRelative: {
5477 uint32_t offset = load->GetDexCacheElementOffset();
5478 Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
5479 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5480 /* no_rip */ false);
5481 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
5482 break;
5483 }
5484 case HLoadString::LoadKind::kDexCacheViaMethod: {
5485 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5486
5487 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5488 GenerateGcRootFieldLoad(
5489 load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5490 // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
5491 __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
5492 // /* GcRoot<mirror::String> */ out = out[string_index]
5493 GenerateGcRootFieldLoad(
5494 load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
5495 break;
5496 }
5497 default:
5498 LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
5499 UNREACHABLE();
5500 }
5501
5502 if (!load->IsInDexCache()) {
5503 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5504 codegen_->AddSlowPath(slow_path);
5505 __ testl(out, out);
5506 __ j(kEqual, slow_path->GetEntryLabel());
5507 __ Bind(slow_path->GetExitLabel());
5508 }
5509 }
5510
GetExceptionTlsAddress()5511 static Address GetExceptionTlsAddress() {
5512 return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
5513 /* no_rip */ true);
5514 }
5515
VisitLoadException(HLoadException * load)5516 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5517 LocationSummary* locations =
5518 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5519 locations->SetOut(Location::RequiresRegister());
5520 }
5521
VisitLoadException(HLoadException * load)5522 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5523 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5524 }
5525
VisitClearException(HClearException * clear)5526 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5527 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5528 }
5529
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5530 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5531 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5532 }
5533
VisitThrow(HThrow * instruction)5534 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5535 LocationSummary* locations =
5536 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
5537 InvokeRuntimeCallingConvention calling_convention;
5538 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5539 }
5540
VisitThrow(HThrow * instruction)5541 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5542 codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
5543 instruction,
5544 instruction->GetDexPc(),
5545 nullptr);
5546 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5547 }
5548
TypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5549 static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5550 return kEmitCompilerReadBarrier &&
5551 (kUseBakerReadBarrier ||
5552 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5553 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5554 type_check_kind == TypeCheckKind::kArrayObjectCheck);
5555 }
5556
VisitInstanceOf(HInstanceOf * instruction)5557 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5558 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5559 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5560 switch (type_check_kind) {
5561 case TypeCheckKind::kExactCheck:
5562 case TypeCheckKind::kAbstractClassCheck:
5563 case TypeCheckKind::kClassHierarchyCheck:
5564 case TypeCheckKind::kArrayObjectCheck:
5565 call_kind =
5566 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5567 break;
5568 case TypeCheckKind::kArrayCheck:
5569 case TypeCheckKind::kUnresolvedCheck:
5570 case TypeCheckKind::kInterfaceCheck:
5571 call_kind = LocationSummary::kCallOnSlowPath;
5572 break;
5573 }
5574
5575 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5576 locations->SetInAt(0, Location::RequiresRegister());
5577 locations->SetInAt(1, Location::Any());
5578 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5579 locations->SetOut(Location::RequiresRegister());
5580 // When read barriers are enabled, we need a temporary register for
5581 // some cases.
5582 if (TypeCheckNeedsATemporary(type_check_kind)) {
5583 locations->AddTemp(Location::RequiresRegister());
5584 }
5585 }
5586
VisitInstanceOf(HInstanceOf * instruction)5587 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5588 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5589 LocationSummary* locations = instruction->GetLocations();
5590 Location obj_loc = locations->InAt(0);
5591 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5592 Location cls = locations->InAt(1);
5593 Location out_loc = locations->Out();
5594 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5595 Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5596 locations->GetTemp(0) :
5597 Location::NoLocation();
5598 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5599 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5600 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5601 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5602 SlowPathCode* slow_path = nullptr;
5603 NearLabel done, zero;
5604
5605 // Return 0 if `obj` is null.
5606 // Avoid null check if we know obj is not null.
5607 if (instruction->MustDoNullCheck()) {
5608 __ testl(obj, obj);
5609 __ j(kEqual, &zero);
5610 }
5611
5612 // /* HeapReference<Class> */ out = obj->klass_
5613 GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
5614
5615 switch (type_check_kind) {
5616 case TypeCheckKind::kExactCheck: {
5617 if (cls.IsRegister()) {
5618 __ cmpl(out, cls.AsRegister<CpuRegister>());
5619 } else {
5620 DCHECK(cls.IsStackSlot()) << cls;
5621 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5622 }
5623 if (zero.IsLinked()) {
5624 // Classes must be equal for the instanceof to succeed.
5625 __ j(kNotEqual, &zero);
5626 __ movl(out, Immediate(1));
5627 __ jmp(&done);
5628 } else {
5629 __ setcc(kEqual, out);
5630 // setcc only sets the low byte.
5631 __ andl(out, Immediate(1));
5632 }
5633 break;
5634 }
5635
5636 case TypeCheckKind::kAbstractClassCheck: {
5637 // If the class is abstract, we eagerly fetch the super class of the
5638 // object to avoid doing a comparison we know will fail.
5639 NearLabel loop, success;
5640 __ Bind(&loop);
5641 // /* HeapReference<Class> */ out = out->super_class_
5642 GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5643 __ testl(out, out);
5644 // If `out` is null, we use it for the result, and jump to `done`.
5645 __ j(kEqual, &done);
5646 if (cls.IsRegister()) {
5647 __ cmpl(out, cls.AsRegister<CpuRegister>());
5648 } else {
5649 DCHECK(cls.IsStackSlot()) << cls;
5650 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5651 }
5652 __ j(kNotEqual, &loop);
5653 __ movl(out, Immediate(1));
5654 if (zero.IsLinked()) {
5655 __ jmp(&done);
5656 }
5657 break;
5658 }
5659
5660 case TypeCheckKind::kClassHierarchyCheck: {
5661 // Walk over the class hierarchy to find a match.
5662 NearLabel loop, success;
5663 __ Bind(&loop);
5664 if (cls.IsRegister()) {
5665 __ cmpl(out, cls.AsRegister<CpuRegister>());
5666 } else {
5667 DCHECK(cls.IsStackSlot()) << cls;
5668 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5669 }
5670 __ j(kEqual, &success);
5671 // /* HeapReference<Class> */ out = out->super_class_
5672 GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5673 __ testl(out, out);
5674 __ j(kNotEqual, &loop);
5675 // If `out` is null, we use it for the result, and jump to `done`.
5676 __ jmp(&done);
5677 __ Bind(&success);
5678 __ movl(out, Immediate(1));
5679 if (zero.IsLinked()) {
5680 __ jmp(&done);
5681 }
5682 break;
5683 }
5684
5685 case TypeCheckKind::kArrayObjectCheck: {
5686 // Do an exact check.
5687 NearLabel exact_check;
5688 if (cls.IsRegister()) {
5689 __ cmpl(out, cls.AsRegister<CpuRegister>());
5690 } else {
5691 DCHECK(cls.IsStackSlot()) << cls;
5692 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5693 }
5694 __ j(kEqual, &exact_check);
5695 // Otherwise, we need to check that the object's class is a non-primitive array.
5696 // /* HeapReference<Class> */ out = out->component_type_
5697 GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
5698 __ testl(out, out);
5699 // If `out` is null, we use it for the result, and jump to `done`.
5700 __ j(kEqual, &done);
5701 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5702 __ j(kNotEqual, &zero);
5703 __ Bind(&exact_check);
5704 __ movl(out, Immediate(1));
5705 __ jmp(&done);
5706 break;
5707 }
5708
5709 case TypeCheckKind::kArrayCheck: {
5710 if (cls.IsRegister()) {
5711 __ cmpl(out, cls.AsRegister<CpuRegister>());
5712 } else {
5713 DCHECK(cls.IsStackSlot()) << cls;
5714 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5715 }
5716 DCHECK(locations->OnlyCallsOnSlowPath());
5717 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5718 /* is_fatal */ false);
5719 codegen_->AddSlowPath(slow_path);
5720 __ j(kNotEqual, slow_path->GetEntryLabel());
5721 __ movl(out, Immediate(1));
5722 if (zero.IsLinked()) {
5723 __ jmp(&done);
5724 }
5725 break;
5726 }
5727
5728 case TypeCheckKind::kUnresolvedCheck:
5729 case TypeCheckKind::kInterfaceCheck: {
5730 // Note that we indeed only call on slow path, but we always go
5731 // into the slow path for the unresolved and interface check
5732 // cases.
5733 //
5734 // We cannot directly call the InstanceofNonTrivial runtime
5735 // entry point without resorting to a type checking slow path
5736 // here (i.e. by calling InvokeRuntime directly), as it would
5737 // require to assign fixed registers for the inputs of this
5738 // HInstanceOf instruction (following the runtime calling
5739 // convention), which might be cluttered by the potential first
5740 // read barrier emission at the beginning of this method.
5741 //
5742 // TODO: Introduce a new runtime entry point taking the object
5743 // to test (instead of its class) as argument, and let it deal
5744 // with the read barrier issues. This will let us refactor this
5745 // case of the `switch` code as it was previously (with a direct
5746 // call to the runtime not using a type checking slow path).
5747 // This should also be beneficial for the other cases above.
5748 DCHECK(locations->OnlyCallsOnSlowPath());
5749 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5750 /* is_fatal */ false);
5751 codegen_->AddSlowPath(slow_path);
5752 __ jmp(slow_path->GetEntryLabel());
5753 if (zero.IsLinked()) {
5754 __ jmp(&done);
5755 }
5756 break;
5757 }
5758 }
5759
5760 if (zero.IsLinked()) {
5761 __ Bind(&zero);
5762 __ xorl(out, out);
5763 }
5764
5765 if (done.IsLinked()) {
5766 __ Bind(&done);
5767 }
5768
5769 if (slow_path != nullptr) {
5770 __ Bind(slow_path->GetExitLabel());
5771 }
5772 }
5773
VisitCheckCast(HCheckCast * instruction)5774 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
5775 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5776 bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
5777 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5778 switch (type_check_kind) {
5779 case TypeCheckKind::kExactCheck:
5780 case TypeCheckKind::kAbstractClassCheck:
5781 case TypeCheckKind::kClassHierarchyCheck:
5782 case TypeCheckKind::kArrayObjectCheck:
5783 call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
5784 LocationSummary::kCallOnSlowPath :
5785 LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
5786 break;
5787 case TypeCheckKind::kArrayCheck:
5788 case TypeCheckKind::kUnresolvedCheck:
5789 case TypeCheckKind::kInterfaceCheck:
5790 call_kind = LocationSummary::kCallOnSlowPath;
5791 break;
5792 }
5793 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5794 locations->SetInAt(0, Location::RequiresRegister());
5795 locations->SetInAt(1, Location::Any());
5796 // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
5797 locations->AddTemp(Location::RequiresRegister());
5798 // When read barriers are enabled, we need an additional temporary
5799 // register for some cases.
5800 if (TypeCheckNeedsATemporary(type_check_kind)) {
5801 locations->AddTemp(Location::RequiresRegister());
5802 }
5803 }
5804
VisitCheckCast(HCheckCast * instruction)5805 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
5806 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5807 LocationSummary* locations = instruction->GetLocations();
5808 Location obj_loc = locations->InAt(0);
5809 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5810 Location cls = locations->InAt(1);
5811 Location temp_loc = locations->GetTemp(0);
5812 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5813 Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5814 locations->GetTemp(1) :
5815 Location::NoLocation();
5816 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5817 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5818 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5819 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5820
5821 bool is_type_check_slow_path_fatal =
5822 (type_check_kind == TypeCheckKind::kExactCheck ||
5823 type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5824 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5825 type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
5826 !instruction->CanThrowIntoCatchBlock();
5827 SlowPathCode* type_check_slow_path =
5828 new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5829 is_type_check_slow_path_fatal);
5830 codegen_->AddSlowPath(type_check_slow_path);
5831
5832 switch (type_check_kind) {
5833 case TypeCheckKind::kExactCheck:
5834 case TypeCheckKind::kArrayCheck: {
5835 NearLabel done;
5836 // Avoid null check if we know obj is not null.
5837 if (instruction->MustDoNullCheck()) {
5838 __ testl(obj, obj);
5839 __ j(kEqual, &done);
5840 }
5841
5842 // /* HeapReference<Class> */ temp = obj->klass_
5843 GenerateReferenceLoadTwoRegisters(
5844 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5845
5846 if (cls.IsRegister()) {
5847 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5848 } else {
5849 DCHECK(cls.IsStackSlot()) << cls;
5850 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5851 }
5852 // Jump to slow path for throwing the exception or doing a
5853 // more involved array check.
5854 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
5855 __ Bind(&done);
5856 break;
5857 }
5858
5859 case TypeCheckKind::kAbstractClassCheck: {
5860 NearLabel done;
5861 // Avoid null check if we know obj is not null.
5862 if (instruction->MustDoNullCheck()) {
5863 __ testl(obj, obj);
5864 __ j(kEqual, &done);
5865 }
5866
5867 // /* HeapReference<Class> */ temp = obj->klass_
5868 GenerateReferenceLoadTwoRegisters(
5869 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5870
5871 // If the class is abstract, we eagerly fetch the super class of the
5872 // object to avoid doing a comparison we know will fail.
5873 NearLabel loop, compare_classes;
5874 __ Bind(&loop);
5875 // /* HeapReference<Class> */ temp = temp->super_class_
5876 GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5877
5878 // If the class reference currently in `temp` is not null, jump
5879 // to the `compare_classes` label to compare it with the checked
5880 // class.
5881 __ testl(temp, temp);
5882 __ j(kNotEqual, &compare_classes);
5883 // Otherwise, jump to the slow path to throw the exception.
5884 //
5885 // But before, move back the object's class into `temp` before
5886 // going into the slow path, as it has been overwritten in the
5887 // meantime.
5888 // /* HeapReference<Class> */ temp = obj->klass_
5889 GenerateReferenceLoadTwoRegisters(
5890 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5891 __ jmp(type_check_slow_path->GetEntryLabel());
5892
5893 __ Bind(&compare_classes);
5894 if (cls.IsRegister()) {
5895 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5896 } else {
5897 DCHECK(cls.IsStackSlot()) << cls;
5898 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5899 }
5900 __ j(kNotEqual, &loop);
5901 __ Bind(&done);
5902 break;
5903 }
5904
5905 case TypeCheckKind::kClassHierarchyCheck: {
5906 NearLabel done;
5907 // Avoid null check if we know obj is not null.
5908 if (instruction->MustDoNullCheck()) {
5909 __ testl(obj, obj);
5910 __ j(kEqual, &done);
5911 }
5912
5913 // /* HeapReference<Class> */ temp = obj->klass_
5914 GenerateReferenceLoadTwoRegisters(
5915 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5916
5917 // Walk over the class hierarchy to find a match.
5918 NearLabel loop;
5919 __ Bind(&loop);
5920 if (cls.IsRegister()) {
5921 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5922 } else {
5923 DCHECK(cls.IsStackSlot()) << cls;
5924 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5925 }
5926 __ j(kEqual, &done);
5927
5928 // /* HeapReference<Class> */ temp = temp->super_class_
5929 GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5930
5931 // If the class reference currently in `temp` is not null, jump
5932 // back at the beginning of the loop.
5933 __ testl(temp, temp);
5934 __ j(kNotEqual, &loop);
5935 // Otherwise, jump to the slow path to throw the exception.
5936 //
5937 // But before, move back the object's class into `temp` before
5938 // going into the slow path, as it has been overwritten in the
5939 // meantime.
5940 // /* HeapReference<Class> */ temp = obj->klass_
5941 GenerateReferenceLoadTwoRegisters(
5942 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5943 __ jmp(type_check_slow_path->GetEntryLabel());
5944 __ Bind(&done);
5945 break;
5946 }
5947
5948 case TypeCheckKind::kArrayObjectCheck: {
5949 // We cannot use a NearLabel here, as its range might be too
5950 // short in some cases when read barriers are enabled. This has
5951 // been observed for instance when the code emitted for this
5952 // case uses high x86-64 registers (R8-R15).
5953 Label done;
5954 // Avoid null check if we know obj is not null.
5955 if (instruction->MustDoNullCheck()) {
5956 __ testl(obj, obj);
5957 __ j(kEqual, &done);
5958 }
5959
5960 // /* HeapReference<Class> */ temp = obj->klass_
5961 GenerateReferenceLoadTwoRegisters(
5962 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5963
5964 // Do an exact check.
5965 NearLabel check_non_primitive_component_type;
5966 if (cls.IsRegister()) {
5967 __ cmpl(temp, cls.AsRegister<CpuRegister>());
5968 } else {
5969 DCHECK(cls.IsStackSlot()) << cls;
5970 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5971 }
5972 __ j(kEqual, &done);
5973
5974 // Otherwise, we need to check that the object's class is a non-primitive array.
5975 // /* HeapReference<Class> */ temp = temp->component_type_
5976 GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
5977
5978 // If the component type is not null (i.e. the object is indeed
5979 // an array), jump to label `check_non_primitive_component_type`
5980 // to further check that this component type is not a primitive
5981 // type.
5982 __ testl(temp, temp);
5983 __ j(kNotEqual, &check_non_primitive_component_type);
5984 // Otherwise, jump to the slow path to throw the exception.
5985 //
5986 // But before, move back the object's class into `temp` before
5987 // going into the slow path, as it has been overwritten in the
5988 // meantime.
5989 // /* HeapReference<Class> */ temp = obj->klass_
5990 GenerateReferenceLoadTwoRegisters(
5991 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5992 __ jmp(type_check_slow_path->GetEntryLabel());
5993
5994 __ Bind(&check_non_primitive_component_type);
5995 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
5996 __ j(kEqual, &done);
5997 // Same comment as above regarding `temp` and the slow path.
5998 // /* HeapReference<Class> */ temp = obj->klass_
5999 GenerateReferenceLoadTwoRegisters(
6000 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6001 __ jmp(type_check_slow_path->GetEntryLabel());
6002 __ Bind(&done);
6003 break;
6004 }
6005
6006 case TypeCheckKind::kUnresolvedCheck:
6007 case TypeCheckKind::kInterfaceCheck:
6008 NearLabel done;
6009 // Avoid null check if we know obj is not null.
6010 if (instruction->MustDoNullCheck()) {
6011 __ testl(obj, obj);
6012 __ j(kEqual, &done);
6013 }
6014
6015 // /* HeapReference<Class> */ temp = obj->klass_
6016 GenerateReferenceLoadTwoRegisters(
6017 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6018
6019 // We always go into the type check slow path for the unresolved
6020 // and interface check cases.
6021 //
6022 // We cannot directly call the CheckCast runtime entry point
6023 // without resorting to a type checking slow path here (i.e. by
6024 // calling InvokeRuntime directly), as it would require to
6025 // assign fixed registers for the inputs of this HInstanceOf
6026 // instruction (following the runtime calling convention), which
6027 // might be cluttered by the potential first read barrier
6028 // emission at the beginning of this method.
6029 //
6030 // TODO: Introduce a new runtime entry point taking the object
6031 // to test (instead of its class) as argument, and let it deal
6032 // with the read barrier issues. This will let us refactor this
6033 // case of the `switch` code as it was previously (with a direct
6034 // call to the runtime not using a type checking slow path).
6035 // This should also be beneficial for the other cases above.
6036 __ jmp(type_check_slow_path->GetEntryLabel());
6037 __ Bind(&done);
6038 break;
6039 }
6040
6041 __ Bind(type_check_slow_path->GetExitLabel());
6042 }
6043
VisitMonitorOperation(HMonitorOperation * instruction)6044 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6045 LocationSummary* locations =
6046 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
6047 InvokeRuntimeCallingConvention calling_convention;
6048 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6049 }
6050
VisitMonitorOperation(HMonitorOperation * instruction)6051 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6052 codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
6053 : QUICK_ENTRY_POINT(pUnlockObject),
6054 instruction,
6055 instruction->GetDexPc(),
6056 nullptr);
6057 if (instruction->IsEnter()) {
6058 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6059 } else {
6060 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6061 }
6062 }
6063
VisitAnd(HAnd * instruction)6064 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6065 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6066 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6067
HandleBitwiseOperation(HBinaryOperation * instruction)6068 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6069 LocationSummary* locations =
6070 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6071 DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6072 || instruction->GetResultType() == Primitive::kPrimLong);
6073 locations->SetInAt(0, Location::RequiresRegister());
6074 locations->SetInAt(1, Location::Any());
6075 locations->SetOut(Location::SameAsFirstInput());
6076 }
6077
VisitAnd(HAnd * instruction)6078 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6079 HandleBitwiseOperation(instruction);
6080 }
6081
VisitOr(HOr * instruction)6082 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6083 HandleBitwiseOperation(instruction);
6084 }
6085
VisitXor(HXor * instruction)6086 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6087 HandleBitwiseOperation(instruction);
6088 }
6089
HandleBitwiseOperation(HBinaryOperation * instruction)6090 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6091 LocationSummary* locations = instruction->GetLocations();
6092 Location first = locations->InAt(0);
6093 Location second = locations->InAt(1);
6094 DCHECK(first.Equals(locations->Out()));
6095
6096 if (instruction->GetResultType() == Primitive::kPrimInt) {
6097 if (second.IsRegister()) {
6098 if (instruction->IsAnd()) {
6099 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6100 } else if (instruction->IsOr()) {
6101 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6102 } else {
6103 DCHECK(instruction->IsXor());
6104 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6105 }
6106 } else if (second.IsConstant()) {
6107 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6108 if (instruction->IsAnd()) {
6109 __ andl(first.AsRegister<CpuRegister>(), imm);
6110 } else if (instruction->IsOr()) {
6111 __ orl(first.AsRegister<CpuRegister>(), imm);
6112 } else {
6113 DCHECK(instruction->IsXor());
6114 __ xorl(first.AsRegister<CpuRegister>(), imm);
6115 }
6116 } else {
6117 Address address(CpuRegister(RSP), second.GetStackIndex());
6118 if (instruction->IsAnd()) {
6119 __ andl(first.AsRegister<CpuRegister>(), address);
6120 } else if (instruction->IsOr()) {
6121 __ orl(first.AsRegister<CpuRegister>(), address);
6122 } else {
6123 DCHECK(instruction->IsXor());
6124 __ xorl(first.AsRegister<CpuRegister>(), address);
6125 }
6126 }
6127 } else {
6128 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6129 CpuRegister first_reg = first.AsRegister<CpuRegister>();
6130 bool second_is_constant = false;
6131 int64_t value = 0;
6132 if (second.IsConstant()) {
6133 second_is_constant = true;
6134 value = second.GetConstant()->AsLongConstant()->GetValue();
6135 }
6136 bool is_int32_value = IsInt<32>(value);
6137
6138 if (instruction->IsAnd()) {
6139 if (second_is_constant) {
6140 if (is_int32_value) {
6141 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6142 } else {
6143 __ andq(first_reg, codegen_->LiteralInt64Address(value));
6144 }
6145 } else if (second.IsDoubleStackSlot()) {
6146 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6147 } else {
6148 __ andq(first_reg, second.AsRegister<CpuRegister>());
6149 }
6150 } else if (instruction->IsOr()) {
6151 if (second_is_constant) {
6152 if (is_int32_value) {
6153 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6154 } else {
6155 __ orq(first_reg, codegen_->LiteralInt64Address(value));
6156 }
6157 } else if (second.IsDoubleStackSlot()) {
6158 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6159 } else {
6160 __ orq(first_reg, second.AsRegister<CpuRegister>());
6161 }
6162 } else {
6163 DCHECK(instruction->IsXor());
6164 if (second_is_constant) {
6165 if (is_int32_value) {
6166 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6167 } else {
6168 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6169 }
6170 } else if (second.IsDoubleStackSlot()) {
6171 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6172 } else {
6173 __ xorq(first_reg, second.AsRegister<CpuRegister>());
6174 }
6175 }
6176 }
6177 }
6178
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp)6179 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
6180 Location out,
6181 uint32_t offset,
6182 Location maybe_temp) {
6183 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6184 if (kEmitCompilerReadBarrier) {
6185 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6186 if (kUseBakerReadBarrier) {
6187 // Load with fast path based Baker's read barrier.
6188 // /* HeapReference<Object> */ out = *(out + offset)
6189 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6190 instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
6191 } else {
6192 // Load with slow path based read barrier.
6193 // Save the value of `out` into `maybe_temp` before overwriting it
6194 // in the following move operation, as we will need it for the
6195 // read barrier below.
6196 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6197 // /* HeapReference<Object> */ out = *(out + offset)
6198 __ movl(out_reg, Address(out_reg, offset));
6199 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6200 }
6201 } else {
6202 // Plain load with no read barrier.
6203 // /* HeapReference<Object> */ out = *(out + offset)
6204 __ movl(out_reg, Address(out_reg, offset));
6205 __ MaybeUnpoisonHeapReference(out_reg);
6206 }
6207 }
6208
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,Location maybe_temp)6209 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
6210 Location out,
6211 Location obj,
6212 uint32_t offset,
6213 Location maybe_temp) {
6214 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6215 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6216 if (kEmitCompilerReadBarrier) {
6217 if (kUseBakerReadBarrier) {
6218 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6219 // Load with fast path based Baker's read barrier.
6220 // /* HeapReference<Object> */ out = *(obj + offset)
6221 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6222 instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
6223 } else {
6224 // Load with slow path based read barrier.
6225 // /* HeapReference<Object> */ out = *(obj + offset)
6226 __ movl(out_reg, Address(obj_reg, offset));
6227 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6228 }
6229 } else {
6230 // Plain load with no read barrier.
6231 // /* HeapReference<Object> */ out = *(obj + offset)
6232 __ movl(out_reg, Address(obj_reg, offset));
6233 __ MaybeUnpoisonHeapReference(out_reg);
6234 }
6235 }
6236
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label)6237 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
6238 Location root,
6239 const Address& address,
6240 Label* fixup_label) {
6241 CpuRegister root_reg = root.AsRegister<CpuRegister>();
6242 if (kEmitCompilerReadBarrier) {
6243 if (kUseBakerReadBarrier) {
6244 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6245 // Baker's read barrier are used:
6246 //
6247 // root = *address;
6248 // if (Thread::Current()->GetIsGcMarking()) {
6249 // root = ReadBarrier::Mark(root)
6250 // }
6251
6252 // /* GcRoot<mirror::Object> */ root = *address
6253 __ movl(root_reg, address);
6254 if (fixup_label != nullptr) {
6255 __ Bind(fixup_label);
6256 }
6257 static_assert(
6258 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6259 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6260 "have different sizes.");
6261 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6262 "art::mirror::CompressedReference<mirror::Object> and int32_t "
6263 "have different sizes.");
6264
6265 // Slow path used to mark the GC root `root`.
6266 SlowPathCode* slow_path =
6267 new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
6268 codegen_->AddSlowPath(slow_path);
6269
6270 __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
6271 /* no_rip */ true),
6272 Immediate(0));
6273 __ j(kNotEqual, slow_path->GetEntryLabel());
6274 __ Bind(slow_path->GetExitLabel());
6275 } else {
6276 // GC root loaded through a slow path for read barriers other
6277 // than Baker's.
6278 // /* GcRoot<mirror::Object>* */ root = address
6279 __ leaq(root_reg, address);
6280 if (fixup_label != nullptr) {
6281 __ Bind(fixup_label);
6282 }
6283 // /* mirror::Object* */ root = root->Read()
6284 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6285 }
6286 } else {
6287 // Plain GC root load with no read barrier.
6288 // /* GcRoot<mirror::Object> */ root = *address
6289 __ movl(root_reg, address);
6290 if (fixup_label != nullptr) {
6291 __ Bind(fixup_label);
6292 }
6293 // Note that GC roots are not affected by heap poisoning, thus we
6294 // do not have to unpoison `root_reg` here.
6295 }
6296 }
6297
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,Location temp,bool needs_null_check)6298 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6299 Location ref,
6300 CpuRegister obj,
6301 uint32_t offset,
6302 Location temp,
6303 bool needs_null_check) {
6304 DCHECK(kEmitCompilerReadBarrier);
6305 DCHECK(kUseBakerReadBarrier);
6306
6307 // /* HeapReference<Object> */ ref = *(obj + offset)
6308 Address src(obj, offset);
6309 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6310 }
6311
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,Location temp,bool needs_null_check)6312 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6313 Location ref,
6314 CpuRegister obj,
6315 uint32_t data_offset,
6316 Location index,
6317 Location temp,
6318 bool needs_null_check) {
6319 DCHECK(kEmitCompilerReadBarrier);
6320 DCHECK(kUseBakerReadBarrier);
6321
6322 // /* HeapReference<Object> */ ref =
6323 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6324 Address src = index.IsConstant() ?
6325 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
6326 Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
6327 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6328 }
6329
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,Location temp,bool needs_null_check)6330 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6331 Location ref,
6332 CpuRegister obj,
6333 const Address& src,
6334 Location temp,
6335 bool needs_null_check) {
6336 DCHECK(kEmitCompilerReadBarrier);
6337 DCHECK(kUseBakerReadBarrier);
6338
6339 // In slow path based read barriers, the read barrier call is
6340 // inserted after the original load. However, in fast path based
6341 // Baker's read barriers, we need to perform the load of
6342 // mirror::Object::monitor_ *before* the original reference load.
6343 // This load-load ordering is required by the read barrier.
6344 // The fast path/slow path (for Baker's algorithm) should look like:
6345 //
6346 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6347 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
6348 // HeapReference<Object> ref = *src; // Original reference load.
6349 // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
6350 // if (is_gray) {
6351 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
6352 // }
6353 //
6354 // Note: the original implementation in ReadBarrier::Barrier is
6355 // slightly more complex as:
6356 // - it implements the load-load fence using a data dependency on
6357 // the high-bits of rb_state, which are expected to be all zeroes
6358 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6359 // here, which is a no-op thanks to the x86-64 memory model);
6360 // - it performs additional checks that we do not do here for
6361 // performance reasons.
6362
6363 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6364 CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
6365 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6366
6367 // /* int32_t */ monitor = obj->monitor_
6368 __ movl(temp_reg, Address(obj, monitor_offset));
6369 if (needs_null_check) {
6370 MaybeRecordImplicitNullCheck(instruction);
6371 }
6372 // /* LockWord */ lock_word = LockWord(monitor)
6373 static_assert(sizeof(LockWord) == sizeof(int32_t),
6374 "art::LockWord and int32_t have different sizes.");
6375 // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
6376 __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
6377 __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
6378 static_assert(
6379 LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
6380 "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
6381
6382 // Load fence to prevent load-load reordering.
6383 // Note that this is a no-op, thanks to the x86-64 memory model.
6384 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6385
6386 // The actual reference load.
6387 // /* HeapReference<Object> */ ref = *src
6388 __ movl(ref_reg, src);
6389
6390 // Object* ref = ref_addr->AsMirrorPtr()
6391 __ MaybeUnpoisonHeapReference(ref_reg);
6392
6393 // Slow path used to mark the object `ref` when it is gray.
6394 SlowPathCode* slow_path =
6395 new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
6396 AddSlowPath(slow_path);
6397
6398 // if (rb_state == ReadBarrier::gray_ptr_)
6399 // ref = ReadBarrier::Mark(ref);
6400 __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
6401 __ j(kEqual, slow_path->GetEntryLabel());
6402 __ Bind(slow_path->GetExitLabel());
6403 }
6404
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6405 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6406 Location out,
6407 Location ref,
6408 Location obj,
6409 uint32_t offset,
6410 Location index) {
6411 DCHECK(kEmitCompilerReadBarrier);
6412
6413 // Insert a slow path based read barrier *after* the reference load.
6414 //
6415 // If heap poisoning is enabled, the unpoisoning of the loaded
6416 // reference will be carried out by the runtime within the slow
6417 // path.
6418 //
6419 // Note that `ref` currently does not get unpoisoned (when heap
6420 // poisoning is enabled), which is alright as the `ref` argument is
6421 // not used by the artReadBarrierSlow entry point.
6422 //
6423 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6424 SlowPathCode* slow_path = new (GetGraph()->GetArena())
6425 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6426 AddSlowPath(slow_path);
6427
6428 __ jmp(slow_path->GetEntryLabel());
6429 __ Bind(slow_path->GetExitLabel());
6430 }
6431
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6432 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6433 Location out,
6434 Location ref,
6435 Location obj,
6436 uint32_t offset,
6437 Location index) {
6438 if (kEmitCompilerReadBarrier) {
6439 // Baker's read barriers shall be handled by the fast path
6440 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6441 DCHECK(!kUseBakerReadBarrier);
6442 // If heap poisoning is enabled, unpoisoning will be taken care of
6443 // by the runtime within the slow path.
6444 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6445 } else if (kPoisonHeapReferences) {
6446 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6447 }
6448 }
6449
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6450 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6451 Location out,
6452 Location root) {
6453 DCHECK(kEmitCompilerReadBarrier);
6454
6455 // Insert a slow path based read barrier *after* the GC root load.
6456 //
6457 // Note that GC roots are not affected by heap poisoning, so we do
6458 // not need to do anything special for this here.
6459 SlowPathCode* slow_path =
6460 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6461 AddSlowPath(slow_path);
6462
6463 __ jmp(slow_path->GetEntryLabel());
6464 __ Bind(slow_path->GetExitLabel());
6465 }
6466
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6467 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6468 // Nothing to do, this should be removed during prepare for register allocator.
6469 LOG(FATAL) << "Unreachable";
6470 }
6471
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6472 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6473 // Nothing to do, this should be removed during prepare for register allocator.
6474 LOG(FATAL) << "Unreachable";
6475 }
6476
6477 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6478 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6479 LocationSummary* locations =
6480 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6481 locations->SetInAt(0, Location::RequiresRegister());
6482 locations->AddTemp(Location::RequiresRegister());
6483 locations->AddTemp(Location::RequiresRegister());
6484 }
6485
VisitPackedSwitch(HPackedSwitch * switch_instr)6486 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6487 int32_t lower_bound = switch_instr->GetStartValue();
6488 uint32_t num_entries = switch_instr->GetNumEntries();
6489 LocationSummary* locations = switch_instr->GetLocations();
6490 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6491 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6492 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6493 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6494
6495 // Should we generate smaller inline compare/jumps?
6496 if (num_entries <= kPackedSwitchJumpTableThreshold) {
6497 // Figure out the correct compare values and jump conditions.
6498 // Handle the first compare/branch as a special case because it might
6499 // jump to the default case.
6500 DCHECK_GT(num_entries, 2u);
6501 Condition first_condition;
6502 uint32_t index;
6503 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6504 if (lower_bound != 0) {
6505 first_condition = kLess;
6506 __ cmpl(value_reg_in, Immediate(lower_bound));
6507 __ j(first_condition, codegen_->GetLabelOf(default_block));
6508 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6509
6510 index = 1;
6511 } else {
6512 // Handle all the compare/jumps below.
6513 first_condition = kBelow;
6514 index = 0;
6515 }
6516
6517 // Handle the rest of the compare/jumps.
6518 for (; index + 1 < num_entries; index += 2) {
6519 int32_t compare_to_value = lower_bound + index + 1;
6520 __ cmpl(value_reg_in, Immediate(compare_to_value));
6521 // Jump to successors[index] if value < case_value[index].
6522 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6523 // Jump to successors[index + 1] if value == case_value[index + 1].
6524 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6525 }
6526
6527 if (index != num_entries) {
6528 // There are an odd number of entries. Handle the last one.
6529 DCHECK_EQ(index + 1, num_entries);
6530 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6531 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6532 }
6533
6534 // And the default for any other value.
6535 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6536 __ jmp(codegen_->GetLabelOf(default_block));
6537 }
6538 return;
6539 }
6540
6541 // Remove the bias, if needed.
6542 Register value_reg_out = value_reg_in.AsRegister();
6543 if (lower_bound != 0) {
6544 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6545 value_reg_out = temp_reg.AsRegister();
6546 }
6547 CpuRegister value_reg(value_reg_out);
6548
6549 // Is the value in range?
6550 __ cmpl(value_reg, Immediate(num_entries - 1));
6551 __ j(kAbove, codegen_->GetLabelOf(default_block));
6552
6553 // We are in the range of the table.
6554 // Load the address of the jump table in the constant area.
6555 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6556
6557 // Load the (signed) offset from the jump table.
6558 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6559
6560 // Add the offset to the address of the table base.
6561 __ addq(temp_reg, base_reg);
6562
6563 // And jump.
6564 __ jmp(temp_reg);
6565 }
6566
Load32BitValue(CpuRegister dest,int32_t value)6567 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6568 if (value == 0) {
6569 __ xorl(dest, dest);
6570 } else {
6571 __ movl(dest, Immediate(value));
6572 }
6573 }
6574
Load64BitValue(CpuRegister dest,int64_t value)6575 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6576 if (value == 0) {
6577 // Clears upper bits too.
6578 __ xorl(dest, dest);
6579 } else if (IsUint<32>(value)) {
6580 // We can use a 32 bit move, as it will zero-extend and is shorter.
6581 __ movl(dest, Immediate(static_cast<int32_t>(value)));
6582 } else {
6583 __ movq(dest, Immediate(value));
6584 }
6585 }
6586
Load32BitValue(XmmRegister dest,int32_t value)6587 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6588 if (value == 0) {
6589 __ xorps(dest, dest);
6590 } else {
6591 __ movss(dest, LiteralInt32Address(value));
6592 }
6593 }
6594
Load64BitValue(XmmRegister dest,int64_t value)6595 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6596 if (value == 0) {
6597 __ xorpd(dest, dest);
6598 } else {
6599 __ movsd(dest, LiteralInt64Address(value));
6600 }
6601 }
6602
Load32BitValue(XmmRegister dest,float value)6603 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6604 Load32BitValue(dest, bit_cast<int32_t, float>(value));
6605 }
6606
Load64BitValue(XmmRegister dest,double value)6607 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6608 Load64BitValue(dest, bit_cast<int64_t, double>(value));
6609 }
6610
Compare32BitValue(CpuRegister dest,int32_t value)6611 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6612 if (value == 0) {
6613 __ testl(dest, dest);
6614 } else {
6615 __ cmpl(dest, Immediate(value));
6616 }
6617 }
6618
Compare64BitValue(CpuRegister dest,int64_t value)6619 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6620 if (IsInt<32>(value)) {
6621 if (value == 0) {
6622 __ testq(dest, dest);
6623 } else {
6624 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6625 }
6626 } else {
6627 // Value won't fit in an int.
6628 __ cmpq(dest, LiteralInt64Address(value));
6629 }
6630 }
6631
Store64BitValueToStack(Location dest,int64_t value)6632 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6633 DCHECK(dest.IsDoubleStackSlot());
6634 if (IsInt<32>(value)) {
6635 // Can move directly as an int32 constant.
6636 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6637 Immediate(static_cast<int32_t>(value)));
6638 } else {
6639 Load64BitValue(CpuRegister(TMP), value);
6640 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6641 }
6642 }
6643
6644 /**
6645 * Class to handle late fixup of offsets into constant area.
6646 */
6647 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6648 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6649 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6650 : codegen_(&codegen), offset_into_constant_area_(offset) {}
6651
6652 protected:
SetOffset(size_t offset)6653 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6654
6655 CodeGeneratorX86_64* codegen_;
6656
6657 private:
Process(const MemoryRegion & region,int pos)6658 void Process(const MemoryRegion& region, int pos) OVERRIDE {
6659 // Patch the correct offset for the instruction. We use the address of the
6660 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6661 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6662 int32_t relative_position = constant_offset - pos;
6663
6664 // Patch in the right value.
6665 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6666 }
6667
6668 // Location in constant area that the fixup refers to.
6669 size_t offset_into_constant_area_;
6670 };
6671
6672 /**
6673 t * Class to handle late fixup of offsets to a jump table that will be created in the
6674 * constant area.
6675 */
6676 class JumpTableRIPFixup : public RIPFixup {
6677 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)6678 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
6679 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
6680
CreateJumpTable()6681 void CreateJumpTable() {
6682 X86_64Assembler* assembler = codegen_->GetAssembler();
6683
6684 // Ensure that the reference to the jump table has the correct offset.
6685 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
6686 SetOffset(offset_in_constant_table);
6687
6688 // Compute the offset from the start of the function to this jump table.
6689 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
6690
6691 // Populate the jump table with the correct values for the jump table.
6692 int32_t num_entries = switch_instr_->GetNumEntries();
6693 HBasicBlock* block = switch_instr_->GetBlock();
6694 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
6695 // The value that we want is the target offset - the position of the table.
6696 for (int32_t i = 0; i < num_entries; i++) {
6697 HBasicBlock* b = successors[i];
6698 Label* l = codegen_->GetLabelOf(b);
6699 DCHECK(l->IsBound());
6700 int32_t offset_to_block = l->Position() - current_table_offset;
6701 assembler->AppendInt32(offset_to_block);
6702 }
6703 }
6704
6705 private:
6706 const HPackedSwitch* switch_instr_;
6707 };
6708
Finalize(CodeAllocator * allocator)6709 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
6710 // Generate the constant area if needed.
6711 X86_64Assembler* assembler = GetAssembler();
6712 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
6713 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
6714 assembler->Align(4, 0);
6715 constant_area_start_ = assembler->CodeSize();
6716
6717 // Populate any jump tables.
6718 for (auto jump_table : fixups_to_jump_tables_) {
6719 jump_table->CreateJumpTable();
6720 }
6721
6722 // And now add the constant area to the generated code.
6723 assembler->AddConstantArea();
6724 }
6725
6726 // And finish up.
6727 CodeGenerator::Finalize(allocator);
6728 }
6729
LiteralDoubleAddress(double v)6730 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
6731 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
6732 return Address::RIP(fixup);
6733 }
6734
LiteralFloatAddress(float v)6735 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
6736 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
6737 return Address::RIP(fixup);
6738 }
6739
LiteralInt32Address(int32_t v)6740 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
6741 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
6742 return Address::RIP(fixup);
6743 }
6744
LiteralInt64Address(int64_t v)6745 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
6746 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
6747 return Address::RIP(fixup);
6748 }
6749
6750 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type)6751 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
6752 if (!trg.IsValid()) {
6753 DCHECK_EQ(type, Primitive::kPrimVoid);
6754 return;
6755 }
6756
6757 DCHECK_NE(type, Primitive::kPrimVoid);
6758
6759 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
6760 if (trg.Equals(return_loc)) {
6761 return;
6762 }
6763
6764 // Let the parallel move resolver take care of all of this.
6765 HParallelMove parallel_move(GetGraph()->GetArena());
6766 parallel_move.AddMove(return_loc, trg, type, nullptr);
6767 GetMoveResolver()->EmitNativeCode(¶llel_move);
6768 }
6769
LiteralCaseTable(HPackedSwitch * switch_instr)6770 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
6771 // Create a fixup to be used to create and address the jump table.
6772 JumpTableRIPFixup* table_fixup =
6773 new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
6774
6775 // We have to populate the jump tables.
6776 fixups_to_jump_tables_.push_back(table_fixup);
6777 return Address::RIP(table_fixup);
6778 }
6779
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)6780 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
6781 const Address& addr_high,
6782 int64_t v,
6783 HInstruction* instruction) {
6784 if (IsInt<32>(v)) {
6785 int32_t v_32 = v;
6786 __ movq(addr_low, Immediate(v_32));
6787 MaybeRecordImplicitNullCheck(instruction);
6788 } else {
6789 // Didn't fit in a register. Do it in pieces.
6790 int32_t low_v = Low32Bits(v);
6791 int32_t high_v = High32Bits(v);
6792 __ movl(addr_low, Immediate(low_v));
6793 MaybeRecordImplicitNullCheck(instruction);
6794 __ movl(addr_high, Immediate(high_v));
6795 }
6796 }
6797
6798 #undef __
6799
6800 } // namespace x86_64
6801 } // namespace art
6802